From 24e1d76a64c08c16478d274cff85d652c38634eb Mon Sep 17 00:00:00 2001 From: alex Date: Thu, 19 Aug 2004 09:32:26 +0000 Subject: [PATCH] - outdated patches have been removed --- .../patches/dynamic-locks-2.4.18-chaos.patch | 212 --- .../patches/ext3-2.4.18-ino_sb_macro-2.patch | 1478 --------------- .../patches/ext3-2.4.18-ino_sb_macro.patch | 1540 ---------------- .../patches/ext3-compat-2.4.18-chaos.patch | 46 - .../patches/ext3-delete_thread-2.4.18-2.patch | 474 ----- .../patches/ext3-delete_thread-2.4.18.patch | 517 ------ .../patches/ext3-ea-in-inode-2.4.18-chaos.patch | 761 -------- .../ext3-extents-2.4.18-chaos-pdirops.patch | 1891 ------------------- .../patches/ext3-extents-2.4.18-chaos.patch | 1895 -------------------- .../patches/ext3-extents-oflag-2.4.18-chaos.patch | 310 ---- .../patches/ext3-inode-reuse-2.4.18.patch | 350 ---- .../patches/ext3-trusted_ea-2.4.18.patch | 180 -- .../patches/gfp_memalloc-2.4.18-chaos.patch | 69 - .../kernel_patches/patches/htree-ext3-2.4.18.patch | 1233 ------------- lustre/kernel_patches/patches/iopen-2.4.18-2.patch | 493 ----- lustre/kernel_patches/patches/iopen-2.4.18.patch | 493 ----- .../patches/linux-2.4.18ea-0.8.26-2.patch | 1775 ------------------ .../patches/linux-2.4.18ea-0.8.26.patch | 1784 ------------------ lustre/kernel_patches/patches/listman-2.4.18.patch | 72 - .../patches/loop-sync-2.4.21-suse.patch | 4 +- .../patches/nfs_export_kernel-2.4.18.patch | 741 -------- .../patches/vfs-pdirops-2.4.18-chaos.patch | 265 --- .../patches/vfs_intent-2.4.18-18-chaos65.patch | 1788 ------------------ lustre/kernel_patches/series/chaos-2.4.18 | 41 - lustre/kernel_patches/series/chaos-2.4.18-pdirops | 36 - 25 files changed, 2 insertions(+), 18446 deletions(-) delete mode 100644 lustre/kernel_patches/patches/dynamic-locks-2.4.18-chaos.patch delete mode 100644 lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro-2.patch delete mode 100644 lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro.patch delete mode 100644 lustre/kernel_patches/patches/ext3-compat-2.4.18-chaos.patch delete mode 100644 lustre/kernel_patches/patches/ext3-delete_thread-2.4.18-2.patch delete mode 100644 lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch delete mode 100644 lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.18-chaos.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-2.4.18-chaos-pdirops.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-2.4.18-chaos.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-oflag-2.4.18-chaos.patch delete mode 100644 lustre/kernel_patches/patches/ext3-inode-reuse-2.4.18.patch delete mode 100644 lustre/kernel_patches/patches/ext3-trusted_ea-2.4.18.patch delete mode 100644 lustre/kernel_patches/patches/gfp_memalloc-2.4.18-chaos.patch delete mode 100644 lustre/kernel_patches/patches/htree-ext3-2.4.18.patch delete mode 100644 lustre/kernel_patches/patches/iopen-2.4.18-2.patch delete mode 100644 lustre/kernel_patches/patches/iopen-2.4.18.patch delete mode 100644 lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26-2.patch delete mode 100644 lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch delete mode 100644 lustre/kernel_patches/patches/listman-2.4.18.patch delete mode 100644 lustre/kernel_patches/patches/nfs_export_kernel-2.4.18.patch delete mode 100644 lustre/kernel_patches/patches/vfs-pdirops-2.4.18-chaos.patch delete mode 100644 lustre/kernel_patches/patches/vfs_intent-2.4.18-18-chaos65.patch delete mode 100644 lustre/kernel_patches/series/chaos-2.4.18 delete mode 100644 lustre/kernel_patches/series/chaos-2.4.18-pdirops diff --git a/lustre/kernel_patches/patches/dynamic-locks-2.4.18-chaos.patch b/lustre/kernel_patches/patches/dynamic-locks-2.4.18-chaos.patch deleted file mode 100644 index a1cef3e..0000000 --- a/lustre/kernel_patches/patches/dynamic-locks-2.4.18-chaos.patch +++ /dev/null @@ -1,212 +0,0 @@ - include/linux/dynlocks.h | 33 ++++++++++ - lib/Makefile | 4 - - lib/dynlocks.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 187 insertions(+), 2 deletions(-) - ---- /dev/null 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.18-alexey/include/linux/dynlocks.h 2003-09-01 16:33:25.000000000 +0400 -@@ -0,0 +1,33 @@ -+#ifndef _LINUX_DYNLOCKS_H -+#define _LINUX_DYNLOCKS_H -+ -+#include -+#include -+ -+struct dynlock_member { -+ struct list_head dl_list; -+ unsigned long dl_value; /* lock value */ -+ int dl_refcount; /* number of users */ -+ int dl_readers; -+ int dl_writers; -+ int dl_pid; /* holder of the lock */ -+ wait_queue_head_t dl_wait; -+}; -+ -+/* -+ * lock's namespace: -+ * - list of locks -+ * - lock to protect this list -+ */ -+struct dynlock { -+ struct list_head dl_list; -+ spinlock_t dl_list_lock; -+}; -+ -+void dynlock_init(struct dynlock *dl); -+void *dynlock_lock(struct dynlock *dl, unsigned long value, int rw, int gfp); -+void dynlock_unlock(struct dynlock *dl, void *lock); -+ -+ -+#endif -+ ---- /dev/null 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.18-alexey/lib/dynlocks.c 2003-09-01 16:36:00.000000000 +0400 -@@ -0,0 +1,152 @@ -+/* -+ * Dynamic Locks -+ * -+ * struct dynlock is lockspace -+ * one may request lock (exclusive or shared) for some value -+ * in that lockspace -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+ -+/* -+ * dynlock_init -+ * -+ * initialize lockspace -+ * -+ */ -+void dynlock_init(struct dynlock *dl) -+{ -+ spin_lock_init(&dl->dl_list_lock); -+ INIT_LIST_HEAD(&dl->dl_list); -+} -+ -+/* -+ * dynlock_lock -+ * -+ * acquires lock (exclusive or shared) in specified lockspace -+ * each lock in lockspace is allocated separately, so user have -+ * to specify GFP flags. -+ * routine returns pointer to lock. this pointer is intended to -+ * be passed to dynlock_unlock -+ * -+ */ -+void *dynlock_lock(struct dynlock *dl, unsigned long value, int rw, int gfp) -+{ -+ struct dynlock_member *nhl = NULL; -+ struct dynlock_member *hl; -+ struct list_head *cur; -+ -+repeat: -+ /* find requested lock in lockspace */ -+ spin_lock(&dl->dl_list_lock); -+ list_for_each(cur, &dl->dl_list) { -+ hl = list_entry(cur, struct dynlock_member, dl_list); -+ if (hl->dl_value == value) { -+ /* lock is found */ -+ if (nhl) { -+ /* someone else just allocated -+ * lock we didn't find and just created -+ * so, we drop our lock -+ */ -+ kfree(nhl); -+ nhl = NULL; -+ } -+ hl->dl_refcount++; -+ goto found; -+ } -+ } -+ /* lock not found */ -+ if (nhl) { -+ /* we already have allocated lock. use it */ -+ hl = nhl; -+ nhl = NULL; -+ list_add(&hl->dl_list, &dl->dl_list); -+ goto found; -+ } -+ spin_unlock(&dl->dl_list_lock); -+ -+ /* lock not found and we haven't allocated lock yet. allocate it */ -+ nhl = kmalloc(sizeof(struct dynlock_member), gfp); -+ if (nhl == NULL) -+ return NULL; -+ nhl->dl_refcount = 1; -+ nhl->dl_value = value; -+ nhl->dl_readers = 0; -+ nhl->dl_writers = 0; -+ init_waitqueue_head(&nhl->dl_wait); -+ -+ /* while lock is being allocated, someone else may allocate it -+ * and put onto to list. check this situation -+ */ -+ goto repeat; -+ -+found: -+ if (rw) { -+ /* exclusive lock: user don't want to share lock at all -+ * NOTE: one process may take the same lock several times -+ * this functionaly is useful for rename operations */ -+ while ((hl->dl_writers && hl->dl_pid != current->pid) || -+ hl->dl_readers) { -+ spin_unlock(&dl->dl_list_lock); -+ wait_event(hl->dl_wait, -+ hl->dl_writers == 0 && hl->dl_readers == 0); -+ spin_lock(&dl->dl_list_lock); -+ } -+ hl->dl_writers++; -+ } else { -+ /* shared lock: user do not want to share lock with writer */ -+ while (hl->dl_writers) { -+ spin_unlock(&dl->dl_list_lock); -+ wait_event(hl->dl_wait, hl->dl_writers == 0); -+ spin_lock(&dl->dl_list_lock); -+ } -+ hl->dl_readers++; -+ } -+ hl->dl_pid = current->pid; -+ spin_unlock(&dl->dl_list_lock); -+ -+ return hl; -+} -+ -+ -+/* -+ * dynlock_unlock -+ * -+ * user have to specify lockspace (dl) and pointer to lock structure -+ * returned by dynlock_lock() -+ * -+ */ -+void dynlock_unlock(struct dynlock *dl, void *lock) -+{ -+ struct dynlock_member *hl = lock; -+ int wakeup = 0; -+ -+ spin_lock(&dl->dl_list_lock); -+ if (hl->dl_writers) { -+ hl->dl_writers--; -+ if (hl->dl_writers == 0) -+ wakeup = 1; -+ } else { -+ hl->dl_readers--; -+ if (hl->dl_readers == 0) -+ wakeup = 1; -+ } -+ if (wakeup) { -+ hl->dl_pid = 0; -+ wake_up(&hl->dl_wait); -+ } -+ if (--(hl->dl_refcount) == 0) -+ list_del(&hl->dl_list); -+ spin_unlock(&dl->dl_list_lock); -+ if (hl->dl_refcount == 0) -+ kfree(hl); -+} -+ -+EXPORT_SYMBOL(dynlock_init); -+EXPORT_SYMBOL(dynlock_lock); -+EXPORT_SYMBOL(dynlock_unlock); -+ ---- linux-2.4.18/lib/Makefile~dynamic-locks-2.4.18-chaos 2003-08-29 11:57:40.000000000 +0400 -+++ linux-2.4.18-alexey/lib/Makefile 2003-09-01 16:35:23.000000000 +0400 -@@ -8,9 +8,9 @@ - - L_TARGET := lib.a - --export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o -+export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o dynlocks.o - --obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o bust_spinlocks.o rbtree.o -+obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o bust_spinlocks.o rbtree.o dynlocks.o - - obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o - obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o - -_ diff --git a/lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro-2.patch b/lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro-2.patch deleted file mode 100644 index bbfe6a9..0000000 --- a/lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro-2.patch +++ /dev/null @@ -1,1478 +0,0 @@ - fs/ext3/balloc.c | 134 +- - fs/ext3/dir.c | 2 - fs/ext3/ialloc.c | 102 - - fs/ext3/inode.c | 202 +-- - fs/ext3/ioctl.c | 13 - fs/ext3/namei.c | 9 - fs/ext3/super.c | 22 - fs/ext3/symlink.c | 8 - include/linux/ext3_fs.h | 64 - include/linux/ext3_jbd.h | 2 - 19 files changed, 5574 insertions(+), 290 deletions(-) - ---- linux-2.4.18-chaos/fs/ext3/balloc.c~ext3-2.4.18-ino_sb_macro-2 2003-07-28 17:52:04.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/balloc.c 2003-09-16 23:34:40.000000000 +0400 -@@ -46,18 +46,18 @@ struct ext3_group_desc * ext3_get_group_ - unsigned long desc; - struct ext3_group_desc * gdp; - -- if (block_group >= sb->u.ext3_sb.s_groups_count) { -+ if (block_group >= EXT3_SB(sb)->s_groups_count) { - ext3_error (sb, "ext3_get_group_desc", - "block_group >= groups_count - " - "block_group = %d, groups_count = %lu", -- block_group, sb->u.ext3_sb.s_groups_count); -+ block_group, EXT3_SB(sb)->s_groups_count); - - return NULL; - } - - group_desc = block_group / EXT3_DESC_PER_BLOCK(sb); - desc = block_group % EXT3_DESC_PER_BLOCK(sb); -- if (!sb->u.ext3_sb.s_group_desc[group_desc]) { -+ if (!EXT3_SB(sb)->s_group_desc[group_desc]) { - ext3_error (sb, "ext3_get_group_desc", - "Group descriptor not loaded - " - "block_group = %d, group_desc = %lu, desc = %lu", -@@ -66,9 +66,9 @@ struct ext3_group_desc * ext3_get_group_ - } - - gdp = (struct ext3_group_desc *) -- sb->u.ext3_sb.s_group_desc[group_desc]->b_data; -+ EXT3_SB(sb)->s_group_desc[group_desc]->b_data; - if (bh) -- *bh = sb->u.ext3_sb.s_group_desc[group_desc]; -+ *bh = EXT3_SB(sb)->s_group_desc[group_desc]; - return gdp + desc; - } - -@@ -104,8 +104,8 @@ static int read_block_bitmap (struct sup - * this group. The IO will be retried next time. - */ - error_out: -- sb->u.ext3_sb.s_block_bitmap_number[bitmap_nr] = block_group; -- sb->u.ext3_sb.s_block_bitmap[bitmap_nr] = bh; -+ EXT3_SB(sb)->s_block_bitmap_number[bitmap_nr] = block_group; -+ EXT3_SB(sb)->s_block_bitmap[bitmap_nr] = bh; - return retval; - } - -@@ -128,16 +128,17 @@ static int __load_block_bitmap (struct s - int i, j, retval = 0; - unsigned long block_bitmap_number; - struct buffer_head * block_bitmap; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); - -- if (block_group >= sb->u.ext3_sb.s_groups_count) -+ if (block_group >= sbi->s_groups_count) - ext3_panic (sb, "load_block_bitmap", - "block_group >= groups_count - " - "block_group = %d, groups_count = %lu", -- block_group, sb->u.ext3_sb.s_groups_count); -+ block_group, EXT3_SB(sb)->s_groups_count); - -- if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED) { -- if (sb->u.ext3_sb.s_block_bitmap[block_group]) { -- if (sb->u.ext3_sb.s_block_bitmap_number[block_group] == -+ if (sbi->s_groups_count <= EXT3_MAX_GROUP_LOADED) { -+ if (sbi->s_block_bitmap[block_group]) { -+ if (sbi->s_block_bitmap_number[block_group] == - block_group) - return block_group; - ext3_error (sb, "__load_block_bitmap", -@@ -149,21 +150,20 @@ static int __load_block_bitmap (struct s - return block_group; - } - -- for (i = 0; i < sb->u.ext3_sb.s_loaded_block_bitmaps && -- sb->u.ext3_sb.s_block_bitmap_number[i] != block_group; i++) -+ for (i = 0; i < sbi->s_loaded_block_bitmaps && -+ sbi->s_block_bitmap_number[i] != block_group; i++) - ; -- if (i < sb->u.ext3_sb.s_loaded_block_bitmaps && -- sb->u.ext3_sb.s_block_bitmap_number[i] == block_group) { -- block_bitmap_number = sb->u.ext3_sb.s_block_bitmap_number[i]; -- block_bitmap = sb->u.ext3_sb.s_block_bitmap[i]; -+ if (i < sbi->s_loaded_block_bitmaps && -+ sbi->s_block_bitmap_number[i] == block_group) { -+ block_bitmap_number = sbi->s_block_bitmap_number[i]; -+ block_bitmap = sbi->s_block_bitmap[i]; - for (j = i; j > 0; j--) { -- sb->u.ext3_sb.s_block_bitmap_number[j] = -- sb->u.ext3_sb.s_block_bitmap_number[j - 1]; -- sb->u.ext3_sb.s_block_bitmap[j] = -- sb->u.ext3_sb.s_block_bitmap[j - 1]; -+ sbi->s_block_bitmap_number[j] = -+ sbi->s_block_bitmap_number[j - 1]; -+ sbi->s_block_bitmap[j] = sbi->s_block_bitmap[j - 1]; - } -- sb->u.ext3_sb.s_block_bitmap_number[0] = block_bitmap_number; -- sb->u.ext3_sb.s_block_bitmap[0] = block_bitmap; -+ sbi->s_block_bitmap_number[0] = block_bitmap_number; -+ sbi->s_block_bitmap[0] = block_bitmap; - - /* - * There's still one special case here --- if block_bitmap == 0 -@@ -173,17 +173,14 @@ static int __load_block_bitmap (struct s - if (!block_bitmap) - retval = read_block_bitmap (sb, block_group, 0); - } else { -- if (sb->u.ext3_sb.s_loaded_block_bitmapsu.ext3_sb.s_loaded_block_bitmaps++; -+ if (sbi->s_loaded_block_bitmapss_loaded_block_bitmaps++; - else -- brelse (sb->u.ext3_sb.s_block_bitmap -- [EXT3_MAX_GROUP_LOADED - 1]); -- for (j = sb->u.ext3_sb.s_loaded_block_bitmaps - 1; -- j > 0; j--) { -- sb->u.ext3_sb.s_block_bitmap_number[j] = -- sb->u.ext3_sb.s_block_bitmap_number[j - 1]; -- sb->u.ext3_sb.s_block_bitmap[j] = -- sb->u.ext3_sb.s_block_bitmap[j - 1]; -+ brelse(sbi->s_block_bitmap[EXT3_MAX_GROUP_LOADED - 1]); -+ for (j = sbi->s_loaded_block_bitmaps - 1; j > 0; j--) { -+ sbi->s_block_bitmap_number[j] = -+ sbi->s_block_bitmap_number[j - 1]; -+ sbi->s_block_bitmap[j] = sbi->s_block_bitmap[j - 1]; - } - retval = read_block_bitmap (sb, block_group, 0); - } -@@ -206,24 +203,25 @@ static int __load_block_bitmap (struct s - static inline int load_block_bitmap (struct super_block * sb, - unsigned int block_group) - { -+ struct ext3_sb_info *sbi = EXT3_SB(sb); - int slot; -- -+ - /* - * Do the lookup for the slot. First of all, check if we're asking - * for the same slot as last time, and did we succeed that last time? - */ -- if (sb->u.ext3_sb.s_loaded_block_bitmaps > 0 && -- sb->u.ext3_sb.s_block_bitmap_number[0] == block_group && -- sb->u.ext3_sb.s_block_bitmap[0]) { -+ if (sbi->s_loaded_block_bitmaps > 0 && -+ sbi->s_block_bitmap_number[0] == block_group && -+ sbi->s_block_bitmap[0]) { - return 0; - } - /* - * Or can we do a fast lookup based on a loaded group on a filesystem - * small enough to be mapped directly into the superblock? - */ -- else if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED && -- sb->u.ext3_sb.s_block_bitmap_number[block_group]==block_group -- && sb->u.ext3_sb.s_block_bitmap[block_group]) { -+ else if (sbi->s_groups_count <= EXT3_MAX_GROUP_LOADED && -+ sbi->s_block_bitmap_number[block_group] == block_group -+ && sbi->s_block_bitmap[block_group]) { - slot = block_group; - } - /* -@@ -243,7 +241,7 @@ static inline int load_block_bitmap (str - * If it's a valid slot, we may still have cached a previous IO error, - * in which case the bh in the superblock cache will be zero. - */ -- if (!sb->u.ext3_sb.s_block_bitmap[slot]) -+ if (!sbi->s_block_bitmap[slot]) - return -EIO; - - /* -@@ -275,7 +273,7 @@ void ext3_free_blocks (handle_t *handle, - return; - } - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - if (block < le32_to_cpu(es->s_first_data_block) || - block + count < block || - (block + count) > le32_to_cpu(es->s_blocks_count)) { -@@ -305,7 +303,7 @@ do_more: - if (bitmap_nr < 0) - goto error_return; - -- bitmap_bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr]; -+ bitmap_bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr]; - gdp = ext3_get_group_desc (sb, block_group, &gd_bh); - if (!gdp) - goto error_return; -@@ -330,8 +328,8 @@ do_more: - if (err) - goto error_return; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (err) - goto error_return; - -@@ -339,7 +337,7 @@ do_more: - if (block == le32_to_cpu(gdp->bg_block_bitmap) || - block == le32_to_cpu(gdp->bg_inode_bitmap) || - in_range(block, le32_to_cpu(gdp->bg_inode_table), -- sb->u.ext2_sb.s_itb_per_group)) { -+ EXT3_SB(sb)->s_itb_per_group)) { - ext3_error(sb, __FUNCTION__, - "Freeing block in system zone - block = %lu", - block); -@@ -412,8 +410,8 @@ do_more: - if (!err) err = ret; - - /* And the superblock */ -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "dirtied superblock"); -- ret = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "dirtied superblock"); -+ ret = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - if (!err) err = ret; - - if (overflow && !err) { -@@ -566,12 +564,12 @@ int ext3_new_block (handle_t *handle, st - } - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - if (le32_to_cpu(es->s_free_blocks_count) <= - le32_to_cpu(es->s_r_blocks_count) && -- ((sb->u.ext3_sb.s_resuid != current->fsuid) && -- (sb->u.ext3_sb.s_resgid == 0 || -- !in_group_p (sb->u.ext3_sb.s_resgid)) && -+ ((EXT3_SB(sb)->s_resuid != current->fsuid) && -+ (EXT3_SB(sb)->s_resgid == 0 || -+ !in_group_p (EXT3_SB(sb)->s_resgid)) && - !capable(CAP_SYS_RESOURCE))) - goto out; - -@@ -601,7 +599,7 @@ repeat: - if (bitmap_nr < 0) - goto io_error; - -- bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr]; -+ bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr]; - - ext3_debug ("goal is at %d:%d.\n", i, j); - -@@ -624,9 +622,9 @@ repeat: - * Now search the rest of the groups. We assume that - * i and gdp correctly point to the last group visited. - */ -- for (k = 0; k < sb->u.ext3_sb.s_groups_count; k++) { -+ for (k = 0; k < EXT3_SB(sb)->s_groups_count; k++) { - i++; -- if (i >= sb->u.ext3_sb.s_groups_count) -+ if (i >= EXT3_SB(sb)->s_groups_count) - i = 0; - gdp = ext3_get_group_desc (sb, i, &bh2); - if (!gdp) { -@@ -638,7 +636,7 @@ repeat: - if (bitmap_nr < 0) - goto io_error; - -- bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr]; -+ bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr]; - j = find_next_usable_block(-1, bh, - EXT3_BLOCKS_PER_GROUP(sb)); - if (j >= 0) -@@ -676,8 +674,8 @@ got_block: - fatal = ext3_journal_get_write_access(handle, bh2); - if (fatal) goto out; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); -+ fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (fatal) goto out; - - tmp = j + i * EXT3_BLOCKS_PER_GROUP(sb) -@@ -810,7 +808,7 @@ got_block: - if (!fatal) fatal = err; - - BUFFER_TRACE(bh, "journal_dirty_metadata for superblock"); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - if (!fatal) fatal = err; - - sb->s_dirt = 1; -@@ -848,11 +846,11 @@ unsigned long ext3_count_free_blocks (st - int i; - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -861,7 +859,7 @@ unsigned long ext3_count_free_blocks (st - if (bitmap_nr < 0) - continue; - -- x = ext3_count_free (sb->u.ext3_sb.s_block_bitmap[bitmap_nr], -+ x = ext3_count_free (EXT3_SB(sb)->s_block_bitmap[bitmap_nr], - sb->s_blocksize); - printk ("group %d: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_blocks_count), x); -@@ -872,7 +870,7 @@ unsigned long ext3_count_free_blocks (st - unlock_super (sb); - return bitmap_count; - #else -- return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count); -+ return le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count); - #endif - } - -@@ -881,7 +879,7 @@ static inline int block_in_use (unsigned - unsigned char * map) - { - return ext3_test_bit ((block - -- le32_to_cpu(sb->u.ext3_sb.s_es->s_first_data_block)) % -+ le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) % - EXT3_BLOCKS_PER_GROUP(sb), map); - } - -@@ -949,11 +947,11 @@ void ext3_check_blocks_bitmap (struct su - struct ext3_group_desc * gdp; - int i; - -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -987,7 +985,7 @@ void ext3_check_blocks_bitmap (struct su - "Inode bitmap for group %d is marked free", - i); - -- for (j = 0; j < sb->u.ext3_sb.s_itb_per_group; j++) -+ for (j = 0; j < EXT3_SB(sb)->s_itb_per_group; j++) - if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j, - sb, bh->b_data)) - ext3_error (sb, "ext3_check_blocks_bitmap", ---- linux-2.4.18-chaos/fs/ext3/dir.c~ext3-2.4.18-ino_sb_macro-2 2003-09-16 23:34:14.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/dir.c 2003-09-16 23:34:40.000000000 +0400 -@@ -67,7 +67,7 @@ int ext3_check_dir_entry (const char * f - else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) - error_msg = "directory entry across blocks"; - else if (le32_to_cpu(de->inode) > -- le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count)) -+ le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) - error_msg = "inode out of bounds"; - - if (error_msg != NULL) ---- linux-2.4.18-chaos/fs/ext3/ialloc.c~ext3-2.4.18-ino_sb_macro-2 2003-09-16 23:34:33.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/ialloc.c 2003-09-16 23:34:40.000000000 +0400 -@@ -74,8 +74,8 @@ static int read_inode_bitmap (struct sup - * this group. The IO will be retried next time. - */ - error_out: -- sb->u.ext3_sb.s_inode_bitmap_number[bitmap_nr] = block_group; -- sb->u.ext3_sb.s_inode_bitmap[bitmap_nr] = bh; -+ EXT3_SB(sb)->s_inode_bitmap_number[bitmap_nr] = block_group; -+ EXT3_SB(sb)->s_inode_bitmap[bitmap_nr] = bh; - return retval; - } - -@@ -227,7 +227,7 @@ void ext3_free_inode (handle_t *handle, - clear_inode (inode); - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext3_error (sb, "ext3_free_inode", - "reserved or nonexistent inode %lu", ino); -@@ -239,7 +239,7 @@ void ext3_free_inode (handle_t *handle, - if (bitmap_nr < 0) - goto error_return; - -- bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; -+ bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; - - BUFFER_TRACE(bh, "get_write_access"); - fatal = ext3_journal_get_write_access(handle, bh); -@@ -257,8 +257,8 @@ void ext3_free_inode (handle_t *handle, - fatal = ext3_journal_get_write_access(handle, bh2); - if (fatal) goto error_return; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get write access"); -- fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get write access"); -+ fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (fatal) goto error_return; - - if (gdp) { -@@ -273,9 +273,9 @@ void ext3_free_inode (handle_t *handle, - if (!fatal) fatal = err; - es->s_free_inodes_count = - cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1); -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, - "call ext3_journal_dirty_metadata"); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - if (!fatal) fatal = err; - } - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -@@ -307,6 +307,8 @@ struct inode * ext3_new_inode (handle_t - int i, j, avefreei; - struct inode * inode; - int bitmap_nr; -+ struct ext3_inode_info *ei; -+ struct ext3_sb_info *sbi; - struct ext3_group_desc * gdp; - struct ext3_group_desc * tmp; - struct ext3_super_block * es; -@@ -320,7 +322,9 @@ struct inode * ext3_new_inode (handle_t - inode = new_inode(sb); - if (!inode) - return ERR_PTR(-ENOMEM); -- init_rwsem(&inode->u.ext3_i.truncate_sem); -+ sbi = EXT3_SB(sb); -+ ei = EXT3_I(inode); -+ init_rwsem(&ei->truncate_sem); - - lock_super (sb); - es = sb->u.ext3_sb.s_es; -@@ -330,9 +334,9 @@ repeat: - - if (S_ISDIR(mode)) { - avefreei = le32_to_cpu(es->s_free_inodes_count) / -- sb->u.ext3_sb.s_groups_count; -+ sbi->s_groups_count; - if (!gdp) { -- for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) { -+ for (j = 0; j < sbi->s_groups_count; j++) { - struct buffer_head *temp_buffer; - tmp = ext3_get_group_desc (sb, j, &temp_buffer); - if (tmp && -@@ -352,7 +356,7 @@ repeat: - /* - * Try to place the inode in its parent directory - */ -- i = dir->u.ext3_i.i_block_group; -+ i = EXT3_I(dir)->i_block_group; - tmp = ext3_get_group_desc (sb, i, &bh2); - if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) - gdp = tmp; -@@ -362,10 +366,10 @@ repeat: - * Use a quadratic hash to find a group with a - * free inode - */ -- for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) { -+ for (j = 1; j < sbi->s_groups_count; j <<= 1) { - i += j; -- if (i >= sb->u.ext3_sb.s_groups_count) -- i -= sb->u.ext3_sb.s_groups_count; -+ if (i >= sbi->s_groups_count) -+ i -= sbi->s_groups_count; - tmp = ext3_get_group_desc (sb, i, &bh2); - if (tmp && - le16_to_cpu(tmp->bg_free_inodes_count)) { -@@ -378,9 +382,9 @@ repeat: - /* - * That failed: try linear search for a free inode - */ -- i = dir->u.ext3_i.i_block_group + 1; -- for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) { -- if (++i >= sb->u.ext3_sb.s_groups_count) -+ i = EXT3_I(dir)->i_block_group + 1; -+ for (j = 2; j < sbi->s_groups_count; j++) { -+ if (++i >= sbi->s_groups_count) - i = 0; - tmp = ext3_get_group_desc (sb, i, &bh2); - if (tmp && -@@ -401,11 +405,11 @@ repeat: - if (bitmap_nr < 0) - goto fail; - -- bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; -+ bh = sbi->s_inode_bitmap[bitmap_nr]; - - if ((j = ext3_find_first_zero_bit ((unsigned long *) bh->b_data, -- EXT3_INODES_PER_GROUP(sb))) < -- EXT3_INODES_PER_GROUP(sb)) { -+ sbi->s_inodes_per_group)) < -+ sbi->s_inodes_per_group) { - BUFFER_TRACE(bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, bh); - if (err) goto fail; -@@ -459,13 +463,13 @@ repeat: - err = ext3_journal_dirty_metadata(handle, bh2); - if (err) goto fail; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(sbi->s_sbh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, sbi->s_sbh); - if (err) goto fail; - es->s_free_inodes_count = - cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1); -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "call ext3_journal_dirty_metadata"); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(sbi->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, sbi->s_sbh); - sb->s_dirt = 1; - if (err) goto fail; - -@@ -485,31 +489,31 @@ repeat: - inode->i_blksize = PAGE_SIZE; - inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; -- inode->u.ext3_i.i_flags = dir->u.ext3_i.i_flags & ~EXT3_INDEX_FL; -+ ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; - if (S_ISLNK(mode)) -- inode->u.ext3_i.i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); -+ ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); - #ifdef EXT3_FRAGMENTS -- inode->u.ext3_i.i_faddr = 0; -- inode->u.ext3_i.i_frag_no = 0; -- inode->u.ext3_i.i_frag_size = 0; -+ ei->i_faddr = 0; -+ ei->i_frag_no = 0; -+ ei->i_frag_size = 0; - #endif -- inode->u.ext3_i.i_file_acl = 0; -- inode->u.ext3_i.i_dir_acl = 0; -- inode->u.ext3_i.i_dtime = 0; -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ ei->i_file_acl = 0; -+ ei->i_dir_acl = 0; -+ ei->i_dtime = 0; -+ INIT_LIST_HEAD(&ei->i_orphan); - #ifdef EXT3_PREALLOCATE -- inode->u.ext3_i.i_prealloc_count = 0; -+ ei->i_prealloc_count = 0; - #endif -- inode->u.ext3_i.i_block_group = i; -+ ei->i_block_group = i; - -- if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) -+ if (ei->i_flags & EXT3_SYNC_FL) - inode->i_flags |= S_SYNC; - if (IS_SYNC(inode)) - handle->h_sync = 1; - insert_inode_hash(inode); -- inode->i_generation = sb->u.ext3_sb.s_next_generation++; -+ inode->i_generation = sbi->s_next_generation++; - -- inode->u.ext3_i.i_state = EXT3_STATE_NEW; -+ ei->i_state = EXT3_STATE_NEW; - err = ext3_mark_inode_dirty(handle, inode); - if (err) goto fail; - -@@ -588,19 +592,19 @@ struct inode *ext3_orphan_get(struct sup - - unsigned long ext3_count_free_inodes (struct super_block * sb) - { -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_super_block *es = sbi->s_es; - #ifdef EXT3FS_DEBUG -- struct ext3_super_block * es; - unsigned long desc_count, bitmap_count, x; - int bitmap_nr; - struct ext3_group_desc * gdp; - int i; - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < sbi->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -609,8 +613,8 @@ unsigned long ext3_count_free_inodes (st - if (bitmap_nr < 0) - continue; - -- x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr], -- EXT3_INODES_PER_GROUP(sb) / 8); -+ x = ext3_count_free(sbi->s_inode_bitmap[bitmap_nr], -+ sbi->s_inodes_per_group / 8); - printk ("group %d: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_inodes_count), x); - bitmap_count += x; -@@ -620,7 +624,7 @@ unsigned long ext3_count_free_inodes (st - unlock_super (sb); - return desc_count; - #else -- return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_inodes_count); -+ return le32_to_cpu(es->s_free_inodes_count); - #endif - } - -@@ -629,16 +633,18 @@ unsigned long ext3_count_free_inodes (st - void ext3_check_inodes_bitmap (struct super_block * sb) - { - struct ext3_super_block * es; -+ struct ext3_sb_info *sbi; - unsigned long desc_count, bitmap_count, x; - int bitmap_nr; - struct ext3_group_desc * gdp; - int i; - -- es = sb->u.ext3_sb.s_es; -+ sbi = EXT3_SB(sb); -+ es = sbi->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < sbi->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -647,7 +653,7 @@ void ext3_check_inodes_bitmap (struct su - if (bitmap_nr < 0) - continue; - -- x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr], -+ x = ext3_count_free (sbi->s_inode_bitmap[bitmap_nr], - EXT3_INODES_PER_GROUP(sb) / 8); - if (le16_to_cpu(gdp->bg_free_inodes_count) != x) - ext3_error (sb, "ext3_check_inodes_bitmap", ---- linux-2.4.18-chaos/fs/ext3/inode.c~ext3-2.4.18-ino_sb_macro-2 2003-09-16 23:34:16.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/inode.c 2003-09-16 23:34:40.000000000 +0400 -@@ -206,7 +206,7 @@ void ext3_delete_inode (struct inode * i - * (Well, we could do this if we need to, but heck - it works) - */ - ext3_orphan_del(handle, inode); -- inode->u.ext3_i.i_dtime = CURRENT_TIME; -+ EXT3_I(inode)->i_dtime = CURRENT_TIME; - - /* - * One subtle ordering requirement: if anything has gone wrong -@@ -230,13 +230,14 @@ no_delete: - void ext3_discard_prealloc (struct inode * inode) - { - #ifdef EXT3_PREALLOCATE -+ struct ext3_inode_info *ei = EXT3_I(inode); - lock_kernel(); - /* Writer: ->i_prealloc* */ -- if (inode->u.ext3_i.i_prealloc_count) { -- unsigned short total = inode->u.ext3_i.i_prealloc_count; -- unsigned long block = inode->u.ext3_i.i_prealloc_block; -- inode->u.ext3_i.i_prealloc_count = 0; -- inode->u.ext3_i.i_prealloc_block = 0; -+ if (ei->i_prealloc_count) { -+ unsigned short total = ei->i_prealloc_count; -+ unsigned long block = ei->i_prealloc_block; -+ ei->i_prealloc_count = 0; -+ ei->i_prealloc_block = 0; - /* Writer: end */ - ext3_free_blocks (inode, block, total); - } -@@ -253,13 +254,15 @@ static int ext3_alloc_block (handle_t *h - unsigned long result; - - #ifdef EXT3_PREALLOCATE -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ - /* Writer: ->i_prealloc* */ -- if (inode->u.ext3_i.i_prealloc_count && -- (goal == inode->u.ext3_i.i_prealloc_block || -- goal + 1 == inode->u.ext3_i.i_prealloc_block)) -+ if (ei->i_prealloc_count && -+ (goal == ei->i_prealloc_block || -+ goal + 1 == ei->i_prealloc_block)) - { -- result = inode->u.ext3_i.i_prealloc_block++; -- inode->u.ext3_i.i_prealloc_count--; -+ result = ei->i_prealloc_block++; -+ ei->i_prealloc_count--; - /* Writer: end */ - ext3_debug ("preallocation hit (%lu/%lu).\n", - ++alloc_hits, ++alloc_attempts); -@@ -269,8 +272,8 @@ static int ext3_alloc_block (handle_t *h - alloc_hits, ++alloc_attempts); - if (S_ISREG(inode->i_mode)) - result = ext3_new_block (inode, goal, -- &inode->u.ext3_i.i_prealloc_count, -- &inode->u.ext3_i.i_prealloc_block, err); -+ &ei->i_prealloc_count, -+ &ei->i_prealloc_block, err); - else - result = ext3_new_block (inode, goal, 0, 0, err); - /* -@@ -404,7 +407,7 @@ static Indirect *ext3_get_branch(struct - - *err = 0; - /* i_data is not going away, no lock needed */ -- add_chain (chain, NULL, inode->u.ext3_i.i_data + *offsets); -+ add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets); - if (!p->key) - goto no_block; - while (--depth) { -@@ -448,7 +451,8 @@ no_block: - - static inline unsigned long ext3_find_near(struct inode *inode, Indirect *ind) - { -- u32 *start = ind->bh ? (u32*) ind->bh->b_data : inode->u.ext3_i.i_data; -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ u32 *start = ind->bh ? (u32*) ind->bh->b_data : ei->i_data; - u32 *p; - - /* Try to find previous block */ -@@ -464,9 +468,8 @@ static inline unsigned long ext3_find_ne - * It is going to be refered from inode itself? OK, just put it into - * the same cylinder group then. - */ -- return (inode->u.ext3_i.i_block_group * -- EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -- le32_to_cpu(inode->i_sb->u.ext3_sb.s_es->s_first_data_block); -+ return (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); - } - - /** -@@ -485,14 +488,15 @@ static inline unsigned long ext3_find_ne - static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4], - Indirect *partial, unsigned long *goal) - { -+ struct ext3_inode_info *ei = EXT3_I(inode); - /* Writer: ->i_next_alloc* */ -- if (block == inode->u.ext3_i.i_next_alloc_block + 1) { -- inode->u.ext3_i.i_next_alloc_block++; -- inode->u.ext3_i.i_next_alloc_goal++; -+ if (block == ei->i_next_alloc_block + 1) { -+ ei->i_next_alloc_block++; -+ ei->i_next_alloc_goal++; - } - #ifdef SEARCH_FROM_ZERO -- inode->u.ext3_i.i_next_alloc_block = 0; -- inode->u.ext3_i.i_next_alloc_goal = 0; -+ ei->i_next_alloc_block = 0; -+ ei->i_next_alloc_goal = 0; - #endif - /* Writer: end */ - /* Reader: pointers, ->i_next_alloc* */ -@@ -501,8 +505,8 @@ static int ext3_find_goal(struct inode * - * try the heuristic for sequential allocation, - * failing that at least try to get decent locality. - */ -- if (block == inode->u.ext3_i.i_next_alloc_block) -- *goal = inode->u.ext3_i.i_next_alloc_goal; -+ if (block == ei->i_next_alloc_block) -+ *goal = ei->i_next_alloc_goal; - if (!*goal) - *goal = ext3_find_near(inode, partial); - #ifdef SEARCH_FROM_ZERO -@@ -628,6 +632,7 @@ static int ext3_splice_branch(handle_t * - { - int i; - int err = 0; -+ struct ext3_inode_info *ei = EXT3_I(inode); - - /* - * If we're splicing into a [td]indirect block (as opposed to the -@@ -650,11 +655,11 @@ static int ext3_splice_branch(handle_t * - /* That's it */ - - *where->p = where->key; -- inode->u.ext3_i.i_next_alloc_block = block; -- inode->u.ext3_i.i_next_alloc_goal = le32_to_cpu(where[num-1].key); -+ ei->i_next_alloc_block = block; -+ ei->i_next_alloc_goal = le32_to_cpu(where[num-1].key); - #ifdef SEARCH_FROM_ZERO -- inode->u.ext3_i.i_next_alloc_block = 0; -- inode->u.ext3_i.i_next_alloc_goal = 0; -+ ei->i_next_alloc_block = 0; -+ ei->i_next_alloc_goal = 0; - #endif - /* Writer: end */ - -@@ -738,6 +743,7 @@ static int ext3_get_block_handle(handle_ - unsigned long goal; - int left; - int depth = ext3_block_to_path(inode, iblock, offsets); -+ struct ext3_inode_info *ei = EXT3_I(inode); - loff_t new_size; - - J_ASSERT(handle != NULL || create == 0); -@@ -791,7 +797,7 @@ out: - /* - * Block out ext3_truncate while we alter the tree - */ -- down_read(&inode->u.ext3_i.truncate_sem); -+ down_read(&ei->truncate_sem); - err = ext3_alloc_branch(handle, inode, left, goal, - offsets+(partial-chain), partial); - -@@ -803,7 +809,7 @@ out: - if (!err) - err = ext3_splice_branch(handle, inode, iblock, chain, - partial, left); -- up_read(&inode->u.ext3_i.truncate_sem); -+ up_read(&ei->truncate_sem); - if (err == -EAGAIN) - goto changed; - if (err) -@@ -816,8 +822,8 @@ out: - * truncate is in progress. It is racy between multiple parallel - * instances of get_block, but we have the BKL. - */ -- if (new_size > inode->u.ext3_i.i_disksize) -- inode->u.ext3_i.i_disksize = new_size; -+ if (new_size > ei->i_disksize) -+ ei->i_disksize = new_size; - - bh_result->b_state |= (1UL << BH_New); - goto got_it; -@@ -932,7 +938,7 @@ struct buffer_head *ext3_bread(handle_t - struct buffer_head *tmp_bh; - - for (i = 1; -- inode->u.ext3_i.i_prealloc_count && -+ EXT3_I(inode)->i_prealloc_count && - i < EXT3_SB(inode->i_sb)->s_es->s_prealloc_dir_blocks; - i++) { - /* -@@ -1152,8 +1158,8 @@ static int ext3_commit_write(struct file - kunmap(page); - } - } -- if (inode->i_size > inode->u.ext3_i.i_disksize) { -- inode->u.ext3_i.i_disksize = inode->i_size; -+ if (inode->i_size > EXT3_I(inode)->i_disksize) { -+ EXT3_I(inode)->i_disksize = inode->i_size; - ret2 = ext3_mark_inode_dirty(handle, inode); - if (!ret) - ret = ret2; -@@ -1873,7 +1879,8 @@ static void ext3_free_branches(handle_t - void ext3_truncate(struct inode * inode) - { - handle_t *handle; -- u32 *i_data = inode->u.ext3_i.i_data; -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ u32 *i_data = EXT3_I(inode)->i_data; - int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); - int offsets[4]; - Indirect chain[4]; -@@ -1934,13 +1941,13 @@ void ext3_truncate(struct inode * inode) - * on-disk inode. We do this via i_disksize, which is the value which - * ext3 *really* writes onto the disk inode. - */ -- inode->u.ext3_i.i_disksize = inode->i_size; -+ ei->i_disksize = inode->i_size; - - /* - * From here we block out all ext3_get_block() callers who want to - * modify the block allocation tree. - */ -- down_write(&inode->u.ext3_i.truncate_sem); -+ down_write(&ei->truncate_sem); - - if (n == 1) { /* direct blocks */ - ext3_free_data(handle, inode, NULL, i_data+offsets[0], -@@ -2004,7 +2011,7 @@ do_indirects: - case EXT3_TIND_BLOCK: - ; - } -- up_write(&inode->u.ext3_i.truncate_sem); -+ up_write(&ei->truncate_sem); - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); - -@@ -2041,6 +2048,8 @@ out_unlock: - - int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) - { -+ struct super_block *sb = inode->i_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); - struct buffer_head *bh = 0; - unsigned long block; - unsigned long block_group; -@@ -2051,25 +2060,21 @@ int ext3_get_inode_loc (struct inode *in - - if ((inode->i_ino != EXT3_ROOT_INO && - inode->i_ino != EXT3_JOURNAL_INO && -- inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || -- inode->i_ino > le32_to_cpu( -- inode->i_sb->u.ext3_sb.s_es->s_inodes_count)) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "bad inode number: %lu", inode->i_ino); -+ inode->i_ino < EXT3_FIRST_INO(sb)) || -+ inode->i_ino > le32_to_cpu(sbi->s_es->s_inodes_count)) { -+ ext3_error (sb, __FUNCTION__, "bad inode #%lu", inode->i_ino); - goto bad_inode; - } -- block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb); -- if (block_group >= inode->i_sb->u.ext3_sb.s_groups_count) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "group >= groups count"); -+ block_group = (inode->i_ino - 1) / sbi->s_inodes_per_group; -+ if (block_group >= sbi->s_groups_count) { -+ ext3_error(sb, __FUNCTION__, "group >= groups count"); - goto bad_inode; - } -- group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb); -- desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1); -- bh = inode->i_sb->u.ext3_sb.s_group_desc[group_desc]; -+ group_desc = block_group >> sbi->s_desc_per_block_bits; -+ desc = block_group & (sbi->s_desc_per_block - 1); -+ bh = sbi->s_group_desc[group_desc]; - if (!bh) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "Descriptor not loaded"); -+ ext3_error(sb, __FUNCTION__, "Descriptor not loaded"); - goto bad_inode; - } - -@@ -2077,17 +2082,17 @@ int ext3_get_inode_loc (struct inode *in - /* - * Figure out the offset within the block group inode table - */ -- offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) * -- EXT3_INODE_SIZE(inode->i_sb); -+ offset = ((inode->i_ino - 1) % sbi->s_inodes_per_group) * -+ sbi->s_inode_size; - block = le32_to_cpu(gdp[desc].bg_inode_table) + -- (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); -- if (!(bh = sb_bread(inode->i_sb, block))) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -+ (offset >> EXT3_BLOCK_SIZE_BITS(sb)); -+ if (!(bh = sb_bread(sb, block))) { -+ ext3_error (sb, __FUNCTION__, - "unable to read inode block - " - "inode=%lu, block=%lu", inode->i_ino, block); - goto bad_inode; - } -- offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1); -+ offset &= (EXT3_BLOCK_SIZE(sb) - 1); - - iloc->bh = bh; - iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); -@@ -2103,6 +2108,7 @@ void ext3_read_inode(struct inode * inod - { - struct ext3_iloc iloc; - struct ext3_inode *raw_inode; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct buffer_head *bh; - int block; - -@@ -2110,7 +2116,7 @@ void ext3_read_inode(struct inode * inod - goto bad_inode; - bh = iloc.bh; - raw_inode = iloc.raw_inode; -- init_rwsem(&inode->u.ext3_i.truncate_sem); -+ init_rwsem(&ei->truncate_sem); - inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); -@@ -2123,7 +2129,7 @@ void ext3_read_inode(struct inode * inod - inode->i_atime = le32_to_cpu(raw_inode->i_atime); - inode->i_ctime = le32_to_cpu(raw_inode->i_ctime); - inode->i_mtime = le32_to_cpu(raw_inode->i_mtime); -- inode->u.ext3_i.i_dtime = le32_to_cpu(raw_inode->i_dtime); -+ ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); - /* We now have enough fields to check if the inode was active or not. - * This is needed because nfsd might try to access dead inodes - * the test is that same one that e2fsck uses -@@ -2131,7 +2137,7 @@ void ext3_read_inode(struct inode * inod - */ - if (inode->i_nlink == 0) { - if (inode->i_mode == 0 || -- !(inode->i_sb->u.ext3_sb.s_mount_state & EXT3_ORPHAN_FS)) { -+ !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { - /* this inode is deleted */ - brelse (bh); - goto bad_inode; -@@ -2146,33 +2152,33 @@ void ext3_read_inode(struct inode * inod - * size */ - inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); - inode->i_version = ++event; -- inode->u.ext3_i.i_flags = le32_to_cpu(raw_inode->i_flags); -+ ei->i_flags = le32_to_cpu(raw_inode->i_flags); - #ifdef EXT3_FRAGMENTS -- inode->u.ext3_i.i_faddr = le32_to_cpu(raw_inode->i_faddr); -- inode->u.ext3_i.i_frag_no = raw_inode->i_frag; -- inode->u.ext3_i.i_frag_size = raw_inode->i_fsize; -+ ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); -+ ei->i_frag_no = raw_inode->i_frag; -+ ei->i_frag_size = raw_inode->i_fsize; - #endif -- inode->u.ext3_i.i_file_acl = le32_to_cpu(raw_inode->i_file_acl); -+ ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); - if (!S_ISREG(inode->i_mode)) { -- inode->u.ext3_i.i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); -+ ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); - } else { - inode->i_size |= - ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; - } -- inode->u.ext3_i.i_disksize = inode->i_size; -+ ei->i_disksize = inode->i_size; - inode->i_generation = le32_to_cpu(raw_inode->i_generation); - #ifdef EXT3_PREALLOCATE -- inode->u.ext3_i.i_prealloc_count = 0; -+ ei->i_prealloc_count = 0; - #endif -- inode->u.ext3_i.i_block_group = iloc.block_group; -+ ei->i_block_group = iloc.block_group; - - /* - * NOTE! The in-memory inode i_data array is in little-endian order - * even on big-endian machines: we do NOT byteswap the block numbers! - */ - for (block = 0; block < EXT3_N_BLOCKS; block++) -- inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ ei->i_data[block] = iloc.raw_inode->i_block[block]; -+ INIT_LIST_HEAD(&ei->i_orphan); - - brelse (iloc.bh); - -@@ -2194,19 +2200,19 @@ void ext3_read_inode(struct inode * inod - init_special_inode(inode, inode->i_mode, - le32_to_cpu(iloc.raw_inode->i_block[0])); - /* inode->i_attr_flags = 0; unused */ -- if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) { -+ if (ei->i_flags & EXT3_SYNC_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */ - inode->i_flags |= S_SYNC; - } -- if (inode->u.ext3_i.i_flags & EXT3_APPEND_FL) { -+ if (ei->i_flags & EXT3_APPEND_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_APPEND; unused */ - inode->i_flags |= S_APPEND; - } -- if (inode->u.ext3_i.i_flags & EXT3_IMMUTABLE_FL) { -+ if (ei->i_flags & EXT3_IMMUTABLE_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_IMMUTABLE; unused */ - inode->i_flags |= S_IMMUTABLE; - } -- if (inode->u.ext3_i.i_flags & EXT3_NOATIME_FL) { -+ if (ei->i_flags & EXT3_NOATIME_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_NOATIME; unused */ - inode->i_flags |= S_NOATIME; - } -@@ -2228,6 +2234,7 @@ static int ext3_do_update_inode(handle_t - struct ext3_iloc *iloc) - { - struct ext3_inode *raw_inode = iloc->raw_inode; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct buffer_head *bh = iloc->bh; - int err = 0, rc, block; - -@@ -2245,7 +2252,7 @@ static int ext3_do_update_inode(handle_t - * Fix up interoperability with old kernels. Otherwise, old inodes get - * re-used with the upper 16 bits of the uid/gid intact - */ -- if(!inode->u.ext3_i.i_dtime) { -+ if(!ei->i_dtime) { - raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(inode->i_uid)); - raw_inode->i_gid_high = -@@ -2263,34 +2270,33 @@ static int ext3_do_update_inode(handle_t - raw_inode->i_gid_high = 0; - } - raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); -- raw_inode->i_size = cpu_to_le32(inode->u.ext3_i.i_disksize); -+ raw_inode->i_size = cpu_to_le32(ei->i_disksize); - raw_inode->i_atime = cpu_to_le32(inode->i_atime); - raw_inode->i_ctime = cpu_to_le32(inode->i_ctime); - raw_inode->i_mtime = cpu_to_le32(inode->i_mtime); - raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); -- raw_inode->i_dtime = cpu_to_le32(inode->u.ext3_i.i_dtime); -- raw_inode->i_flags = cpu_to_le32(inode->u.ext3_i.i_flags); -+ raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); -+ raw_inode->i_flags = cpu_to_le32(ei->i_flags); - #ifdef EXT3_FRAGMENTS -- raw_inode->i_faddr = cpu_to_le32(inode->u.ext3_i.i_faddr); -- raw_inode->i_frag = inode->u.ext3_i.i_frag_no; -- raw_inode->i_fsize = inode->u.ext3_i.i_frag_size; -+ raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); -+ raw_inode->i_frag = ei->i_frag_no; -+ raw_inode->i_fsize = ei->i_frag_size; - #else - /* If we are not tracking these fields in the in-memory inode, - * then preserve them on disk, but still initialise them to zero - * for new inodes. */ -- if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) { -+ if (ei->i_state & EXT3_STATE_NEW) { - raw_inode->i_faddr = 0; - raw_inode->i_frag = 0; - raw_inode->i_fsize = 0; - } - #endif -- raw_inode->i_file_acl = cpu_to_le32(inode->u.ext3_i.i_file_acl); -+ raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); - if (!S_ISREG(inode->i_mode)) { -- raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext3_i.i_dir_acl); -+ raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); - } else { -- raw_inode->i_size_high = -- cpu_to_le32(inode->u.ext3_i.i_disksize >> 32); -- if (inode->u.ext3_i.i_disksize > 0x7fffffffULL) { -+ raw_inode->i_size_high = cpu_to_le32(ei->i_disksize >> 32); -+ if (ei->i_disksize > MAX_NON_LFS) { - struct super_block *sb = inode->i_sb; - if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, - EXT3_FEATURE_RO_COMPAT_LARGE_FILE) || -@@ -2300,7 +2306,7 @@ static int ext3_do_update_inode(handle_t - * created, add a flag to the superblock. - */ - err = ext3_journal_get_write_access(handle, -- sb->u.ext3_sb.s_sbh); -+ EXT3_SB(sb)->s_sbh); - if (err) - goto out_brelse; - ext3_update_dynamic_rev(sb); -@@ -2309,7 +2315,7 @@ static int ext3_do_update_inode(handle_t - sb->s_dirt = 1; - handle->h_sync = 1; - err = ext3_journal_dirty_metadata(handle, -- sb->u.ext3_sb.s_sbh); -+ EXT3_SB(sb)->s_sbh); - } - } - } -@@ -2318,13 +2324,13 @@ static int ext3_do_update_inode(handle_t - raw_inode->i_block[0] = - cpu_to_le32(kdev_t_to_nr(inode->i_rdev)); - else for (block = 0; block < EXT3_N_BLOCKS; block++) -- raw_inode->i_block[block] = inode->u.ext3_i.i_data[block]; -+ raw_inode->i_block[block] = ei->i_data[block]; - - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - rc = ext3_journal_dirty_metadata(handle, bh); - if (!err) - err = rc; -- EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW; -+ ei->i_state &= ~EXT3_STATE_NEW; - - out_brelse: - brelse (bh); -@@ -2432,7 +2438,7 @@ int ext3_setattr(struct dentry *dentry, - } - - error = ext3_orphan_add(handle, inode); -- inode->u.ext3_i.i_disksize = attr->ia_size; -+ EXT3_I(inode)->i_disksize = attr->ia_size; - rc = ext3_mark_inode_dirty(handle, inode); - if (!error) - error = rc; -@@ -2675,9 +2681,9 @@ int ext3_change_inode_journal_flag(struc - */ - - if (val) -- inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL; -+ EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL; - else -- inode->u.ext3_i.i_flags &= ~EXT3_JOURNAL_DATA_FL; -+ EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL; - - journal_unlock_updates(journal); - ---- linux-2.4.18-chaos/fs/ext3/ioctl.c~ext3-2.4.18-ino_sb_macro-2 2001-11-10 01:25:04.000000000 +0300 -+++ linux-2.4.18-chaos-alexey/fs/ext3/ioctl.c 2003-09-16 23:34:40.000000000 +0400 -@@ -18,13 +18,14 @@ - int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) - { -+ struct ext3_inode_info *ei = EXT3_I(inode); - unsigned int flags; - - ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); - - switch (cmd) { - case EXT3_IOC_GETFLAGS: -- flags = inode->u.ext3_i.i_flags & EXT3_FL_USER_VISIBLE; -+ flags = ei->i_flags & EXT3_FL_USER_VISIBLE; - return put_user(flags, (int *) arg); - case EXT3_IOC_SETFLAGS: { - handle_t *handle = NULL; -@@ -42,7 +43,7 @@ int ext3_ioctl (struct inode * inode, st - if (get_user(flags, (int *) arg)) - return -EFAULT; - -- oldflags = inode->u.ext3_i.i_flags; -+ oldflags = ei->i_flags; - - /* The JOURNAL_DATA flag is modifiable only by root */ - jflag = flags & EXT3_JOURNAL_DATA_FL; -@@ -79,7 +80,7 @@ int ext3_ioctl (struct inode * inode, st - - flags = flags & EXT3_FL_USER_MODIFIABLE; - flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE; -- inode->u.ext3_i.i_flags = flags; -+ ei->i_flags = flags; - - if (flags & EXT3_SYNC_FL) - inode->i_flags |= S_SYNC; -@@ -155,12 +156,12 @@ flags_err: - int ret = 0; - - set_current_state(TASK_INTERRUPTIBLE); -- add_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait); -- if (timer_pending(&sb->u.ext3_sb.turn_ro_timer)) { -+ add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait); -+ if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) { - schedule(); - ret = 1; - } -- remove_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait); -+ remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait); - return ret; - } - #endif ---- linux-2.4.18-chaos/fs/ext3/namei.c~ext3-2.4.18-ino_sb_macro-2 2003-09-16 23:34:33.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/namei.c 2003-09-16 23:34:40.000000000 +0400 -@@ -1764,8 +1764,8 @@ int ext3_orphan_add(handle_t *handle, st - J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (err) - goto out_unlock; - -@@ -1776,7 +1776,7 @@ int ext3_orphan_add(handle_t *handle, st - /* Insert this inode at the head of the on-disk orphan list... */ - NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan); - EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - rc = ext3_mark_iloc_dirty(handle, inode, &iloc); - if (!err) - err = rc; -@@ -1850,8 +1850,7 @@ int ext3_orphan_del(handle_t *handle, st - err = ext3_journal_dirty_metadata(handle, sbi->s_sbh); - } else { - struct ext3_iloc iloc2; -- struct inode *i_prev = -- list_entry(prev, struct inode, u.ext3_i.i_orphan); -+ struct inode *i_prev = orphan_list_entry(prev); - - jbd_debug(4, "orphan inode %lu will point to %lu\n", - i_prev->i_ino, ino_next); ---- linux-2.4.18-chaos/fs/ext3/super.c~ext3-2.4.18-ino_sb_macro-2 2003-09-16 23:34:16.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/super.c 2003-09-16 23:34:40.000000000 +0400 -@@ -124,7 +124,7 @@ static int ext3_error_behaviour(struct s - /* If no overrides were specified on the mount, then fall back - * to the default behaviour set in the filesystem's superblock - * on disk. */ -- switch (le16_to_cpu(sb->u.ext3_sb.s_es->s_errors)) { -+ switch (le16_to_cpu(EXT3_SB(sb)->s_es->s_errors)) { - case EXT3_ERRORS_PANIC: - return EXT3_ERRORS_PANIC; - case EXT3_ERRORS_RO: -@@ -272,9 +272,9 @@ void ext3_abort (struct super_block * sb - return; - - printk (KERN_CRIT "Remounting filesystem read-only\n"); -- sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS; -+ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; - sb->s_flags |= MS_RDONLY; -- sb->u.ext3_sb.s_mount_opt |= EXT3_MOUNT_ABORT; -+ EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; - journal_abort(EXT3_SB(sb)->s_journal, -EIO); - } - -@@ -380,8 +380,6 @@ static int ext3_blkdev_remove(struct ext - return ret; - } - --#define orphan_list_entry(l) list_entry((l), struct inode, u.ext3_i.i_orphan) -- - static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi) - { - struct list_head *l; -@@ -825,7 +823,7 @@ static void ext3_orphan_cleanup (struct - sb->s_flags &= ~MS_RDONLY; - } - -- if (sb->u.ext3_sb.s_mount_state & EXT3_ERROR_FS) { -+ if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { - if (es->s_last_orphan) - jbd_debug(1, "Errors on filesystem, " - "clearing orphan list.\n"); -@@ -1474,12 +1472,14 @@ static void ext3_commit_super (struct su - struct ext3_super_block * es, - int sync) - { -+ struct buffer_head *sbh = EXT3_SB(sb)->s_sbh; -+ - es->s_wtime = cpu_to_le32(CURRENT_TIME); -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "marking dirty"); -- mark_buffer_dirty(sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(sbh, "marking dirty"); -+ mark_buffer_dirty(sbh); - if (sync) { -- ll_rw_block(WRITE, 1, &sb->u.ext3_sb.s_sbh); -- wait_on_buffer(sb->u.ext3_sb.s_sbh); -+ ll_rw_block(WRITE, 1, &sbh); -+ wait_on_buffer(sbh); - } - } - -@@ -1530,7 +1530,7 @@ static void ext3_clear_journal_err(struc - ext3_warning(sb, __FUNCTION__, "Marking fs in need of " - "filesystem check."); - -- sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS; -+ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; - es->s_state |= cpu_to_le16(EXT3_ERROR_FS); - ext3_commit_super (sb, es, 1); - ---- linux-2.4.18-chaos/fs/ext3/symlink.c~ext3-2.4.18-ino_sb_macro-2 2001-11-10 01:25:04.000000000 +0300 -+++ linux-2.4.18-chaos-alexey/fs/ext3/symlink.c 2003-09-16 23:34:40.000000000 +0400 -@@ -23,14 +23,14 @@ - - static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen) - { -- char *s = (char *)dentry->d_inode->u.ext3_i.i_data; -- return vfs_readlink(dentry, buffer, buflen, s); -+ struct ext3_inode_info *ei = EXT3_I(dentry->d_inode); -+ return vfs_readlink(dentry, buffer, buflen, (char *)ei->i_data); - } - - static int ext3_follow_link(struct dentry *dentry, struct nameidata *nd) - { -- char *s = (char *)dentry->d_inode->u.ext3_i.i_data; -- return vfs_follow_link(nd, s); -+ struct ext3_inode_info *ei = EXT3_I(dentry->d_inode); -+ return vfs_follow_link(nd, (char*)ei->i_data); - } - - struct inode_operations ext3_fast_symlink_inode_operations = { ---- linux-2.4.18-chaos/include/linux/ext3_fs.h~ext3-2.4.18-ino_sb_macro-2 2003-09-16 23:34:33.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/linux/ext3_fs.h 2003-09-16 23:34:40.000000000 +0400 -@@ -87,22 +87,25 @@ - #define EXT3_MIN_BLOCK_SIZE 1024 - #define EXT3_MAX_BLOCK_SIZE 4096 - #define EXT3_MIN_BLOCK_LOG_SIZE 10 -+ - #ifdef __KERNEL__ --# define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize) --#else --# define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) --#endif --#define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) --#ifdef __KERNEL__ --# define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) --#else --# define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) --#endif --#ifdef __KERNEL__ --#define EXT3_ADDR_PER_BLOCK_BITS(s) ((s)->u.ext3_sb.s_addr_per_block_bits) --#define EXT3_INODE_SIZE(s) ((s)->u.ext3_sb.s_inode_size) --#define EXT3_FIRST_INO(s) ((s)->u.ext3_sb.s_first_ino) -+#define EXT3_SB(sb) (&((sb)->u.ext3_sb)) -+#define EXT3_I(inode) (&((inode)->u.ext3_i)) -+ -+#define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize) -+#define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -+#define EXT3_ADDR_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_addr_per_block_bits) -+#define EXT3_INODE_SIZE(s) (EXT3_SB(s)->s_inode_size) -+#define EXT3_FIRST_INO(s) (EXT3_SB(s)->s_first_ino) - #else -+ -+/* Assume that user mode programs are passing in an ext3fs superblock, not -+ * a kernel struct super_block. This will allow us to call the feature-test -+ * macros from user land. */ -+#define EXT3_SB(sb) (sb) -+ -+#define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) -+#define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) - #define EXT3_INODE_SIZE(s) (((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \ - EXT3_GOOD_OLD_INODE_SIZE : \ - (s)->s_inode_size) -@@ -110,6 +113,7 @@ - EXT3_GOOD_OLD_FIRST_INO : \ - (s)->s_first_ino) - #endif -+#define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) - - /* - * Macro-instructions used to manage fragments -@@ -118,8 +122,8 @@ - #define EXT3_MAX_FRAG_SIZE 4096 - #define EXT3_MIN_FRAG_LOG_SIZE 10 - #ifdef __KERNEL__ --# define EXT3_FRAG_SIZE(s) ((s)->u.ext3_sb.s_frag_size) --# define EXT3_FRAGS_PER_BLOCK(s) ((s)->u.ext3_sb.s_frags_per_block) -+# define EXT3_FRAG_SIZE(s) (EXT3_SB(s)->s_frag_size) -+# define EXT3_FRAGS_PER_BLOCK(s) (EXT3_SB(s)->s_frags_per_block) - #else - # define EXT3_FRAG_SIZE(s) (EXT3_MIN_FRAG_SIZE << (s)->s_log_frag_size) - # define EXT3_FRAGS_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / EXT3_FRAG_SIZE(s)) -@@ -143,15 +147,13 @@ struct ext3_group_desc - /* - * Macro-instructions used to manage group descriptors - */ -+# define EXT3_BLOCKS_PER_GROUP(s) (EXT3_SB(s)->s_blocks_per_group) -+# define EXT3_INODES_PER_GROUP(s) (EXT3_SB(s)->s_inodes_per_group) - #ifdef __KERNEL__ --# define EXT3_BLOCKS_PER_GROUP(s) ((s)->u.ext3_sb.s_blocks_per_group) --# define EXT3_DESC_PER_BLOCK(s) ((s)->u.ext3_sb.s_desc_per_block) --# define EXT3_INODES_PER_GROUP(s) ((s)->u.ext3_sb.s_inodes_per_group) --# define EXT3_DESC_PER_BLOCK_BITS(s) ((s)->u.ext3_sb.s_desc_per_block_bits) -+# define EXT3_DESC_PER_BLOCK(s) (EXT3_SB(s)->s_desc_per_block) -+# define EXT3_DESC_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_desc_per_block_bits) - #else --# define EXT3_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) - # define EXT3_DESC_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_group_desc)) --# define EXT3_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) - #endif - - /* -@@ -325,7 +327,7 @@ struct ext3_inode { - #ifndef _LINUX_EXT2_FS_H - #define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt - #define set_opt(o, opt) o |= EXT3_MOUNT_##opt --#define test_opt(sb, opt) ((sb)->u.ext3_sb.s_mount_opt & \ -+#define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \ - EXT3_MOUNT_##opt) - #else - #define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD -@@ -425,17 +427,11 @@ struct ext3_super_block { - __u32 s_reserved[192]; /* Padding to the end of the block */ - }; - --#ifdef __KERNEL__ --#define EXT3_SB(sb) (&((sb)->u.ext3_sb)) --#define EXT3_I(inode) (&((inode)->u.ext3_i)) --#else --/* Assume that user mode programs are passing in an ext3fs superblock, not -- * a kernel struct super_block. This will allow us to call the feature-test -- * macros from user land. */ --#define EXT3_SB(sb) (sb) --#endif -- --#define NEXT_ORPHAN(inode) (inode)->u.ext3_i.i_dtime -+#define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime -+static inline struct inode *orphan_list_entry(struct list_head *l) -+{ -+ return list_entry(l, struct inode, u.ext3_i.i_orphan); -+} - - /* - * Codes for operating systems ---- linux-2.4.18-chaos/include/linux/ext3_jbd.h~ext3-2.4.18-ino_sb_macro-2 2003-09-16 23:34:16.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/linux/ext3_jbd.h 2003-09-16 23:34:40.000000000 +0400 -@@ -297,7 +297,7 @@ static inline int ext3_should_journal_da - return 1; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA) - return 1; -- if (inode->u.ext3_i.i_flags & EXT3_JOURNAL_DATA_FL) -+ if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL) - return 1; - return 0; - } diff --git a/lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro.patch b/lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro.patch deleted file mode 100644 index 2ddff7d..0000000 --- a/lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro.patch +++ /dev/null @@ -1,1540 +0,0 @@ ---- ./fs/ext3/balloc.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/balloc.c Tue May 7 15:35:59 2002 -@@ -46,18 +46,18 @@ struct ext3_group_desc * ext3_get_group_ - unsigned long desc; - struct ext3_group_desc * gdp; - -- if (block_group >= sb->u.ext3_sb.s_groups_count) { -+ if (block_group >= EXT3_SB(sb)->s_groups_count) { - ext3_error (sb, "ext3_get_group_desc", - "block_group >= groups_count - " - "block_group = %d, groups_count = %lu", -- block_group, sb->u.ext3_sb.s_groups_count); -+ block_group, EXT3_SB(sb)->s_groups_count); - - return NULL; - } - - group_desc = block_group / EXT3_DESC_PER_BLOCK(sb); - desc = block_group % EXT3_DESC_PER_BLOCK(sb); -- if (!sb->u.ext3_sb.s_group_desc[group_desc]) { -+ if (!EXT3_SB(sb)->s_group_desc[group_desc]) { - ext3_error (sb, "ext3_get_group_desc", - "Group descriptor not loaded - " - "block_group = %d, group_desc = %lu, desc = %lu", -@@ -66,9 +66,9 @@ struct ext3_group_desc * ext3_get_group_ - } - - gdp = (struct ext3_group_desc *) -- sb->u.ext3_sb.s_group_desc[group_desc]->b_data; -+ EXT3_SB(sb)->s_group_desc[group_desc]->b_data; - if (bh) -- *bh = sb->u.ext3_sb.s_group_desc[group_desc]; -+ *bh = EXT3_SB(sb)->s_group_desc[group_desc]; - return gdp + desc; - } - -@@ -104,8 +104,8 @@ static int read_block_bitmap (struct sup - * this group. The IO will be retried next time. - */ - error_out: -- sb->u.ext3_sb.s_block_bitmap_number[bitmap_nr] = block_group; -- sb->u.ext3_sb.s_block_bitmap[bitmap_nr] = bh; -+ EXT3_SB(sb)->s_block_bitmap_number[bitmap_nr] = block_group; -+ EXT3_SB(sb)->s_block_bitmap[bitmap_nr] = bh; - return retval; - } - -@@ -128,16 +128,17 @@ static int __load_block_bitmap (struct s - int i, j, retval = 0; - unsigned long block_bitmap_number; - struct buffer_head * block_bitmap; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); - -- if (block_group >= sb->u.ext3_sb.s_groups_count) -+ if (block_group >= sbi->s_groups_count) - ext3_panic (sb, "load_block_bitmap", - "block_group >= groups_count - " - "block_group = %d, groups_count = %lu", -- block_group, sb->u.ext3_sb.s_groups_count); -+ block_group, EXT3_SB(sb)->s_groups_count); - -- if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED) { -- if (sb->u.ext3_sb.s_block_bitmap[block_group]) { -- if (sb->u.ext3_sb.s_block_bitmap_number[block_group] == -+ if (sbi->s_groups_count <= EXT3_MAX_GROUP_LOADED) { -+ if (sbi->s_block_bitmap[block_group]) { -+ if (sbi->s_block_bitmap_number[block_group] == - block_group) - return block_group; - ext3_error (sb, "__load_block_bitmap", -@@ -149,21 +150,20 @@ static int __load_block_bitmap (struct s - return block_group; - } - -- for (i = 0; i < sb->u.ext3_sb.s_loaded_block_bitmaps && -- sb->u.ext3_sb.s_block_bitmap_number[i] != block_group; i++) -+ for (i = 0; i < sbi->s_loaded_block_bitmaps && -+ sbi->s_block_bitmap_number[i] != block_group; i++) - ; -- if (i < sb->u.ext3_sb.s_loaded_block_bitmaps && -- sb->u.ext3_sb.s_block_bitmap_number[i] == block_group) { -- block_bitmap_number = sb->u.ext3_sb.s_block_bitmap_number[i]; -- block_bitmap = sb->u.ext3_sb.s_block_bitmap[i]; -+ if (i < sbi->s_loaded_block_bitmaps && -+ sbi->s_block_bitmap_number[i] == block_group) { -+ block_bitmap_number = sbi->s_block_bitmap_number[i]; -+ block_bitmap = sbi->s_block_bitmap[i]; - for (j = i; j > 0; j--) { -- sb->u.ext3_sb.s_block_bitmap_number[j] = -- sb->u.ext3_sb.s_block_bitmap_number[j - 1]; -- sb->u.ext3_sb.s_block_bitmap[j] = -- sb->u.ext3_sb.s_block_bitmap[j - 1]; -+ sbi->s_block_bitmap_number[j] = -+ sbi->s_block_bitmap_number[j - 1]; -+ sbi->s_block_bitmap[j] = sbi->s_block_bitmap[j - 1]; - } -- sb->u.ext3_sb.s_block_bitmap_number[0] = block_bitmap_number; -- sb->u.ext3_sb.s_block_bitmap[0] = block_bitmap; -+ sbi->s_block_bitmap_number[0] = block_bitmap_number; -+ sbi->s_block_bitmap[0] = block_bitmap; - - /* - * There's still one special case here --- if block_bitmap == 0 -@@ -173,17 +173,14 @@ static int __load_block_bitmap (struct s - if (!block_bitmap) - retval = read_block_bitmap (sb, block_group, 0); - } else { -- if (sb->u.ext3_sb.s_loaded_block_bitmapsu.ext3_sb.s_loaded_block_bitmaps++; -+ if (sbi->s_loaded_block_bitmapss_loaded_block_bitmaps++; - else -- brelse (sb->u.ext3_sb.s_block_bitmap -- [EXT3_MAX_GROUP_LOADED - 1]); -- for (j = sb->u.ext3_sb.s_loaded_block_bitmaps - 1; -- j > 0; j--) { -- sb->u.ext3_sb.s_block_bitmap_number[j] = -- sb->u.ext3_sb.s_block_bitmap_number[j - 1]; -- sb->u.ext3_sb.s_block_bitmap[j] = -- sb->u.ext3_sb.s_block_bitmap[j - 1]; -+ brelse(sbi->s_block_bitmap[EXT3_MAX_GROUP_LOADED - 1]); -+ for (j = sbi->s_loaded_block_bitmaps - 1; j > 0; j--) { -+ sbi->s_block_bitmap_number[j] = -+ sbi->s_block_bitmap_number[j - 1]; -+ sbi->s_block_bitmap[j] = sbi->s_block_bitmap[j - 1]; - } - retval = read_block_bitmap (sb, block_group, 0); - } -@@ -206,24 +203,25 @@ static int __load_block_bitmap (struct s - static inline int load_block_bitmap (struct super_block * sb, - unsigned int block_group) - { -+ struct ext3_sb_info *sbi = EXT3_SB(sb); - int slot; -- -+ - /* - * Do the lookup for the slot. First of all, check if we're asking - * for the same slot as last time, and did we succeed that last time? - */ -- if (sb->u.ext3_sb.s_loaded_block_bitmaps > 0 && -- sb->u.ext3_sb.s_block_bitmap_number[0] == block_group && -- sb->u.ext3_sb.s_block_bitmap[0]) { -+ if (sbi->s_loaded_block_bitmaps > 0 && -+ sbi->s_block_bitmap_number[0] == block_group && -+ sbi->s_block_bitmap[0]) { - return 0; - } - /* - * Or can we do a fast lookup based on a loaded group on a filesystem - * small enough to be mapped directly into the superblock? - */ -- else if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED && -- sb->u.ext3_sb.s_block_bitmap_number[block_group]==block_group -- && sb->u.ext3_sb.s_block_bitmap[block_group]) { -+ else if (sbi->s_groups_count <= EXT3_MAX_GROUP_LOADED && -+ sbi->s_block_bitmap_number[block_group] == block_group -+ && sbi->s_block_bitmap[block_group]) { - slot = block_group; - } - /* -@@ -243,7 +241,7 @@ static inline int load_block_bitmap (str - * If it's a valid slot, we may still have cached a previous IO error, - * in which case the bh in the superblock cache will be zero. - */ -- if (!sb->u.ext3_sb.s_block_bitmap[slot]) -+ if (!sbi->s_block_bitmap[slot]) - return -EIO; - - /* -@@ -275,7 +273,7 @@ void ext3_free_blocks (handle_t *handle, - return; - } - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - if (block < le32_to_cpu(es->s_first_data_block) || - block + count < block || - (block + count) > le32_to_cpu(es->s_blocks_count)) { -@@ -304,7 +302,7 @@ do_more: - if (bitmap_nr < 0) - goto error_return; - -- bitmap_bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr]; -+ bitmap_bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr]; - gdp = ext3_get_group_desc (sb, block_group, &gd_bh); - if (!gdp) - goto error_return; -@@ -330,8 +328,8 @@ do_more: - if (err) - goto error_return; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (err) - goto error_return; - -@@ -341,7 +339,7 @@ - if (block == le32_to_cpu(gdp->bg_block_bitmap) || - block == le32_to_cpu(gdp->bg_inode_bitmap) || - in_range(block, le32_to_cpu(gdp->bg_inode_table), -- sb->u.ext2_sb.s_itb_per_group)) { -+ EXT3_SB(sb)->s_itb_per_group)) { - ext3_error(sb, __FUNCTION__, - "Freeing block in system zone - block = %lu", - block); -@@ -410,8 +407,8 @@ do_more: - if (!err) err = ret; - - /* And the superblock */ -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "dirtied superblock"); -- ret = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "dirtied superblock"); -+ ret = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - if (!err) err = ret; - - if (overflow && !err) { -@@ -564,12 +560,12 @@ int ext3_new_block (handle_t *handle, st - } - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - if (le32_to_cpu(es->s_free_blocks_count) <= - le32_to_cpu(es->s_r_blocks_count) && -- ((sb->u.ext3_sb.s_resuid != current->fsuid) && -- (sb->u.ext3_sb.s_resgid == 0 || -- !in_group_p (sb->u.ext3_sb.s_resgid)) && -+ ((EXT3_SB(sb)->s_resuid != current->fsuid) && -+ (EXT3_SB(sb)->s_resgid == 0 || -+ !in_group_p (EXT3_SB(sb)->s_resgid)) && - !capable(CAP_SYS_RESOURCE))) - goto out; - -@@ -598,7 +595,7 @@ int ext3_new_block (handle_t *handle, st - if (bitmap_nr < 0) - goto io_error; - -- bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr]; -+ bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr]; - - ext3_debug ("goal is at %d:%d.\n", i, j); - -@@ -621,9 +618,9 @@ int ext3_new_block (handle_t *handle, st - * Now search the rest of the groups. We assume that - * i and gdp correctly point to the last group visited. - */ -- for (k = 0; k < sb->u.ext3_sb.s_groups_count; k++) { -+ for (k = 0; k < EXT3_SB(sb)->s_groups_count; k++) { - i++; -- if (i >= sb->u.ext3_sb.s_groups_count) -+ if (i >= EXT3_SB(sb)->s_groups_count) - i = 0; - gdp = ext3_get_group_desc (sb, i, &bh2); - if (!gdp) { -@@ -635,7 +632,7 @@ int ext3_new_block (handle_t *handle, st - if (bitmap_nr < 0) - goto io_error; - -- bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr]; -+ bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr]; - j = find_next_usable_block(-1, bh, - EXT3_BLOCKS_PER_GROUP(sb)); - if (j >= 0) -@@ -674,8 +671,8 @@ got_block: - fatal = ext3_journal_get_write_access(handle, bh2); - if (fatal) goto out; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); -+ fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (fatal) goto out; - - tmp = j + i * EXT3_BLOCKS_PER_GROUP(sb) -@@ -796,7 +804,7 @@ got_block: - if (!fatal) fatal = err; - - BUFFER_TRACE(bh, "journal_dirty_metadata for superblock"); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - if (!fatal) fatal = err; - - sb->s_dirt = 1; -@@ -829,11 +837,11 @@ unsigned long ext3_count_free_blocks (st - int i; - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -842,7 +850,7 @@ unsigned long ext3_count_free_blocks (st - if (bitmap_nr < 0) - continue; - -- x = ext3_count_free (sb->u.ext3_sb.s_block_bitmap[bitmap_nr], -+ x = ext3_count_free (EXT3_SB(sb)->s_block_bitmap[bitmap_nr], - sb->s_blocksize); - printk ("group %d: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_blocks_count), x); -@@ -853,7 +861,7 @@ unsigned long ext3_count_free_blocks (st - unlock_super (sb); - return bitmap_count; - #else -- return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count); -+ return le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count); - #endif - } - -@@ -862,7 +870,7 @@ static inline int block_in_use (unsigned - unsigned char * map) - { - return ext3_test_bit ((block - -- le32_to_cpu(sb->u.ext3_sb.s_es->s_first_data_block)) % -+ le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) % - EXT3_BLOCKS_PER_GROUP(sb), map); - } - -@@ -930,11 +938,11 @@ void ext3_check_blocks_bitmap (struct su - struct ext3_group_desc * gdp; - int i; - -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -968,7 +976,7 @@ void ext3_check_blocks_bitmap (struct su - "Inode bitmap for group %d is marked free", - i); - -- for (j = 0; j < sb->u.ext3_sb.s_itb_per_group; j++) -+ for (j = 0; j < EXT3_SB(sb)->s_itb_per_group; j++) - if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j, - sb, bh->b_data)) - ext3_error (sb, "ext3_check_blocks_bitmap", ---- ./fs/ext3/dir.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/dir.c Tue May 7 14:54:13 2002 -@@ -52,7 +52,7 @@ int ext3_check_dir_entry (const char * f - else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) - error_msg = "directory entry across blocks"; - else if (le32_to_cpu(de->inode) > -- le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count)) -+ le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) - error_msg = "inode out of bounds"; - - if (error_msg != NULL) ---- ./fs/ext3/ialloc.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/ialloc.c Tue May 7 15:39:26 2002 -@@ -73,8 +73,8 @@ static int read_inode_bitmap (struct sup - * this group. The IO will be retried next time. - */ - error_out: -- sb->u.ext3_sb.s_inode_bitmap_number[bitmap_nr] = block_group; -- sb->u.ext3_sb.s_inode_bitmap[bitmap_nr] = bh; -+ EXT3_SB(sb)->s_inode_bitmap_number[bitmap_nr] = block_group; -+ EXT3_SB(sb)->s_inode_bitmap[bitmap_nr] = bh; - return retval; - } - -@@ -225,7 +225,7 @@ void ext3_free_inode (handle_t *handle, - clear_inode (inode); - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext3_error (sb, "ext3_free_inode", - "reserved or nonexistent inode %lu", ino); -@@ -237,7 +237,7 @@ void ext3_free_inode (handle_t *handle, - if (bitmap_nr < 0) - goto error_return; - -- bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; -+ bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; - - BUFFER_TRACE(bh, "get_write_access"); - fatal = ext3_journal_get_write_access(handle, bh); -@@ -255,8 +255,8 @@ void ext3_free_inode (handle_t *handle, - fatal = ext3_journal_get_write_access(handle, bh2); - if (fatal) goto error_return; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get write access"); -- fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get write access"); -+ fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (fatal) goto error_return; - - if (gdp) { -@@ -271,9 +271,9 @@ void ext3_free_inode (handle_t *handle, - if (!fatal) fatal = err; - es->s_free_inodes_count = - cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1); -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, - "call ext3_journal_dirty_metadata"); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - if (!fatal) fatal = err; - } - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -@@ -305,6 +305,8 @@ struct inode * ext3_new_inode (handle_t - int i, j, avefreei; - struct inode * inode; - int bitmap_nr; -+ struct ext3_inode_info *ei; -+ struct ext3_sb_info *sbi; - struct ext3_group_desc * gdp; - struct ext3_group_desc * tmp; - struct ext3_super_block * es; -@@ -318,7 +320,9 @@ struct inode * ext3_new_inode (handle_t - inode = new_inode(sb); - if (!inode) - return ERR_PTR(-ENOMEM); -- init_rwsem(&inode->u.ext3_i.truncate_sem); -+ sbi = EXT3_SB(sb); -+ ei = EXT3_I(inode); -+ init_rwsem(&ei->truncate_sem); - - lock_super (sb); - es = sb->u.ext3_sb.s_es; -@@ -328,9 +332,9 @@ struct inode * ext3_new_inode (handle_t - - if (S_ISDIR(mode)) { - avefreei = le32_to_cpu(es->s_free_inodes_count) / -- sb->u.ext3_sb.s_groups_count; -+ sbi->s_groups_count; - if (!gdp) { -- for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) { -+ for (j = 0; j < sbi->s_groups_count; j++) { - struct buffer_head *temp_buffer; - tmp = ext3_get_group_desc (sb, j, &temp_buffer); - if (tmp && -@@ -350,7 +354,7 @@ repeat: - /* - * Try to place the inode in its parent directory - */ -- i = dir->u.ext3_i.i_block_group; -+ i = EXT3_I(dir)->i_block_group; - tmp = ext3_get_group_desc (sb, i, &bh2); - if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) - gdp = tmp; -@@ -360,10 +364,10 @@ repeat: - * Use a quadratic hash to find a group with a - * free inode - */ -- for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) { -+ for (j = 1; j < sbi->s_groups_count; j <<= 1) { - i += j; -- if (i >= sb->u.ext3_sb.s_groups_count) -- i -= sb->u.ext3_sb.s_groups_count; -+ if (i >= sbi->s_groups_count) -+ i -= sbi->s_groups_count; - tmp = ext3_get_group_desc (sb, i, &bh2); - if (tmp && - le16_to_cpu(tmp->bg_free_inodes_count)) { -@@ -376,9 +380,9 @@ repeat: - /* - * That failed: try linear search for a free inode - */ -- i = dir->u.ext3_i.i_block_group + 1; -- for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) { -- if (++i >= sb->u.ext3_sb.s_groups_count) -+ i = EXT3_I(dir)->i_block_group + 1; -+ for (j = 2; j < sbi->s_groups_count; j++) { -+ if (++i >= sbi->s_groups_count) - i = 0; - tmp = ext3_get_group_desc (sb, i, &bh2); - if (tmp && -@@ -399,11 +403,11 @@ repeat: - if (bitmap_nr < 0) - goto fail; - -- bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; -+ bh = sbi->s_inode_bitmap[bitmap_nr]; - - if ((j = ext3_find_first_zero_bit ((unsigned long *) bh->b_data, -- EXT3_INODES_PER_GROUP(sb))) < -- EXT3_INODES_PER_GROUP(sb)) { -+ sbi->s_inodes_per_group)) < -+ sbi->s_inodes_per_group) { - BUFFER_TRACE(bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, bh); - if (err) goto fail; -@@ -457,13 +461,13 @@ repeat: - err = ext3_journal_dirty_metadata(handle, bh2); - if (err) goto fail; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(sbi->s_sbh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, sbi->s_sbh); - if (err) goto fail; - es->s_free_inodes_count = - cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1); -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "call ext3_journal_dirty_metadata"); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(sbi->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, sbi->s_sbh); - sb->s_dirt = 1; - if (err) goto fail; - -@@ -483,31 +487,31 @@ repeat: - inode->i_blksize = PAGE_SIZE; - inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; -- inode->u.ext3_i.i_flags = dir->u.ext3_i.i_flags & ~EXT3_INDEX_FL; -+ ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; - if (S_ISLNK(mode)) -- inode->u.ext3_i.i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); -+ ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); - #ifdef EXT3_FRAGMENTS -- inode->u.ext3_i.i_faddr = 0; -- inode->u.ext3_i.i_frag_no = 0; -- inode->u.ext3_i.i_frag_size = 0; -+ ei->i_faddr = 0; -+ ei->i_frag_no = 0; -+ ei->i_frag_size = 0; - #endif -- inode->u.ext3_i.i_file_acl = 0; -- inode->u.ext3_i.i_dir_acl = 0; -- inode->u.ext3_i.i_dtime = 0; -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ ei->i_file_acl = 0; -+ ei->i_dir_acl = 0; -+ ei->i_dtime = 0; -+ INIT_LIST_HEAD(&ei->i_orphan); - #ifdef EXT3_PREALLOCATE -- inode->u.ext3_i.i_prealloc_count = 0; -+ ei->i_prealloc_count = 0; - #endif -- inode->u.ext3_i.i_block_group = i; -+ ei->i_block_group = i; - -- if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) -+ if (ei->i_flags & EXT3_SYNC_FL) - inode->i_flags |= S_SYNC; - if (IS_SYNC(inode)) - handle->h_sync = 1; - insert_inode_hash(inode); -- inode->i_generation = sb->u.ext3_sb.s_next_generation++; -+ inode->i_generation = sbi->s_next_generation++; - -- inode->u.ext3_i.i_state = EXT3_STATE_NEW; -+ ei->i_state = EXT3_STATE_NEW; - err = ext3_mark_inode_dirty(handle, inode); - if (err) goto fail; - -@@ -585,19 +589,19 @@ struct inode *ext3_orphan_get (struct su - - unsigned long ext3_count_free_inodes (struct super_block * sb) - { -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_super_block *es = sbi->s_es; - #ifdef EXT3FS_DEBUG -- struct ext3_super_block * es; - unsigned long desc_count, bitmap_count, x; - int bitmap_nr; - struct ext3_group_desc * gdp; - int i; - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < sbi->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -606,8 +610,8 @@ unsigned long ext3_count_free_inodes (st - if (bitmap_nr < 0) - continue; - -- x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr], -- EXT3_INODES_PER_GROUP(sb) / 8); -+ x = ext3_count_free(sbi->s_inode_bitmap[bitmap_nr], -+ sbi->s_inodes_per_group / 8); - printk ("group %d: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_inodes_count), x); - bitmap_count += x; -@@ -617,7 +621,7 @@ unsigned long ext3_count_free_inodes (st - unlock_super (sb); - return desc_count; - #else -- return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_inodes_count); -+ return le32_to_cpu(es->s_free_inodes_count); - #endif - } - -@@ -626,16 +630,18 @@ unsigned long ext3_count_free_inodes (st - void ext3_check_inodes_bitmap (struct super_block * sb) - { - struct ext3_super_block * es; -+ struct ext3_sb_info *sbi; - unsigned long desc_count, bitmap_count, x; - int bitmap_nr; - struct ext3_group_desc * gdp; - int i; - -- es = sb->u.ext3_sb.s_es; -+ sbi = EXT3_SB(sb); -+ es = sbi->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < sbi->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -644,7 +650,7 @@ void ext3_check_inodes_bitmap (struct su - if (bitmap_nr < 0) - continue; - -- x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr], -+ x = ext3_count_free (sbi->s_inode_bitmap[bitmap_nr], - EXT3_INODES_PER_GROUP(sb) / 8); - if (le16_to_cpu(gdp->bg_free_inodes_count) != x) - ext3_error (sb, "ext3_check_inodes_bitmap", ---- ./fs/ext3/inode.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/inode.c Tue May 7 15:41:23 2002 -@@ -196,7 +196,7 @@ void ext3_delete_inode (struct inode * i - * (Well, we could do this if we need to, but heck - it works) - */ - ext3_orphan_del(handle, inode); -- inode->u.ext3_i.i_dtime = CURRENT_TIME; -+ EXT3_I(inode)->i_dtime = CURRENT_TIME; - - /* - * One subtle ordering requirement: if anything has gone wrong -@@ -220,13 +220,14 @@ no_delete: - void ext3_discard_prealloc (struct inode * inode) - { - #ifdef EXT3_PREALLOCATE -+ struct ext3_inode_info *ei = EXT3_I(inode); - lock_kernel(); - /* Writer: ->i_prealloc* */ -- if (inode->u.ext3_i.i_prealloc_count) { -- unsigned short total = inode->u.ext3_i.i_prealloc_count; -- unsigned long block = inode->u.ext3_i.i_prealloc_block; -- inode->u.ext3_i.i_prealloc_count = 0; -- inode->u.ext3_i.i_prealloc_block = 0; -+ if (ei->i_prealloc_count) { -+ unsigned short total = ei->i_prealloc_count; -+ unsigned long block = ei->i_prealloc_block; -+ ei->i_prealloc_count = 0; -+ ei->i_prealloc_block = 0; - /* Writer: end */ - ext3_free_blocks (inode, block, total); - } -@@ -243,13 +244,15 @@ static int ext3_alloc_block (handle_t *h - unsigned long result; - - #ifdef EXT3_PREALLOCATE -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ - /* Writer: ->i_prealloc* */ -- if (inode->u.ext3_i.i_prealloc_count && -- (goal == inode->u.ext3_i.i_prealloc_block || -- goal + 1 == inode->u.ext3_i.i_prealloc_block)) -+ if (ei->i_prealloc_count && -+ (goal == ei->i_prealloc_block || -+ goal + 1 == ei->i_prealloc_block)) - { -- result = inode->u.ext3_i.i_prealloc_block++; -- inode->u.ext3_i.i_prealloc_count--; -+ result = ei->i_prealloc_block++; -+ ei->i_prealloc_count--; - /* Writer: end */ - ext3_debug ("preallocation hit (%lu/%lu).\n", - ++alloc_hits, ++alloc_attempts); -@@ -259,8 +262,8 @@ static int ext3_alloc_block (handle_t *h - alloc_hits, ++alloc_attempts); - if (S_ISREG(inode->i_mode)) - result = ext3_new_block (inode, goal, -- &inode->u.ext3_i.i_prealloc_count, -- &inode->u.ext3_i.i_prealloc_block, err); -+ &ei->i_prealloc_count, -+ &ei->i_prealloc_block, err); - else - result = ext3_new_block (inode, goal, 0, 0, err); - /* -@@ -394,7 +397,7 @@ static Indirect *ext3_get_branch(struct - - *err = 0; - /* i_data is not going away, no lock needed */ -- add_chain (chain, NULL, inode->u.ext3_i.i_data + *offsets); -+ add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets); - if (!p->key) - goto no_block; - while (--depth) { -@@ -437,7 +440,8 @@ no_block: - - static inline unsigned long ext3_find_near(struct inode *inode, Indirect *ind) - { -- u32 *start = ind->bh ? (u32*) ind->bh->b_data : inode->u.ext3_i.i_data; -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ u32 *start = ind->bh ? (u32*) ind->bh->b_data : ei->i_data; - u32 *p; - - /* Try to find previous block */ -@@ -453,9 +456,8 @@ static inline unsigned long ext3_find_ne - * It is going to be refered from inode itself? OK, just put it into - * the same cylinder group then. - */ -- return (inode->u.ext3_i.i_block_group * -- EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -- le32_to_cpu(inode->i_sb->u.ext3_sb.s_es->s_first_data_block); -+ return (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); - } - - /** -@@ -474,14 +477,15 @@ - static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4], - Indirect *partial, unsigned long *goal) - { -+ struct ext3_inode_info *ei = EXT3_I(inode); - /* Writer: ->i_next_alloc* */ -- if (block == inode->u.ext3_i.i_next_alloc_block + 1) { -- inode->u.ext3_i.i_next_alloc_block++; -- inode->u.ext3_i.i_next_alloc_goal++; -+ if (block == ei->i_next_alloc_block + 1) { -+ ei->i_next_alloc_block++; -+ ei->i_next_alloc_goal++; - } - #ifdef SEARCH_FROM_ZERO -- inode->u.ext3_i.i_next_alloc_block = 0; -- inode->u.ext3_i.i_next_alloc_goal = 0; -+ ei->i_next_alloc_block = 0; -+ ei->i_next_alloc_goal = 0; - #endif - /* Writer: end */ - /* Reader: pointers, ->i_next_alloc* */ -@@ -490,8 +493,8 @@ static int ext3_find_goal(struct inode * - * try the heuristic for sequential allocation, - * failing that at least try to get decent locality. - */ -- if (block == inode->u.ext3_i.i_next_alloc_block) -- *goal = inode->u.ext3_i.i_next_alloc_goal; -+ if (block == ei->i_next_alloc_block) -+ *goal = ei->i_next_alloc_goal; - if (!*goal) - *goal = ext3_find_near(inode, partial); - #ifdef SEARCH_FROM_ZERO -@@ -619,6 +621,7 @@ - { - int i; - int err = 0; -+ struct ext3_inode_info *ei = EXT3_I(inode); - - /* - * If we're splicing into a [td]indirect block (as opposed to the -@@ -641,11 +644,11 @@ static int ext3_splice_branch(handle_t * - /* That's it */ - - *where->p = where->key; -- inode->u.ext3_i.i_next_alloc_block = block; -- inode->u.ext3_i.i_next_alloc_goal = le32_to_cpu(where[num-1].key); -+ ei->i_next_alloc_block = block; -+ ei->i_next_alloc_goal = le32_to_cpu(where[num-1].key); - #ifdef SEARCH_FROM_ZERO -- inode->u.ext3_i.i_next_alloc_block = 0; -- inode->u.ext3_i.i_next_alloc_goal = 0; -+ ei->i_next_alloc_block = 0; -+ ei->i_next_alloc_goal = 0; - #endif - /* Writer: end */ - -@@ -729,6 +732,7 @@ - unsigned long goal; - int left; - int depth = ext3_block_to_path(inode, iblock, offsets); -+ struct ext3_inode_info *ei = EXT3_I(inode); - loff_t new_size; - - J_ASSERT(handle != NULL || create == 0); -@@ -782,7 +785,7 @@ out: - /* - * Block out ext3_truncate while we alter the tree - */ -- down_read(&inode->u.ext3_i.truncate_sem); -+ down_read(&ei->truncate_sem); - err = ext3_alloc_branch(handle, inode, left, goal, - offsets+(partial-chain), partial); - -@@ -794,7 +797,7 @@ out: - if (!err) - err = ext3_splice_branch(handle, inode, iblock, chain, - partial, left); -- up_read(&inode->u.ext3_i.truncate_sem); -+ up_read(&ei->truncate_sem); - if (err == -EAGAIN) - goto changed; - if (err) -@@ -807,8 +810,8 @@ out: - * truncate is in progress. It is racy between multiple parallel - * instances of get_block, but we have the BKL. - */ -- if (new_size > inode->u.ext3_i.i_disksize) -- inode->u.ext3_i.i_disksize = new_size; -+ if (new_size > ei->i_disksize) -+ ei->i_disksize = new_size; - - bh_result->b_state |= (1UL << BH_New); - goto got_it; -@@ -921,7 +924,7 @@ struct buffer_head *ext3_bread(handle_t - struct buffer_head *tmp_bh; - - for (i = 1; -- inode->u.ext3_i.i_prealloc_count && -+ EXT3_I(inode)->i_prealloc_count && - i < EXT3_SB(inode->i_sb)->s_es->s_prealloc_dir_blocks; - i++) { - /* -@@ -1131,8 +1134,8 @@ static int ext3_commit_write(struct file - kunmap(page); - } - } -- if (inode->i_size > inode->u.ext3_i.i_disksize) { -- inode->u.ext3_i.i_disksize = inode->i_size; -+ if (inode->i_size > EXT3_I(inode)->i_disksize) { -+ EXT3_I(inode)->i_disksize = inode->i_size; - ret2 = ext3_mark_inode_dirty(handle, inode); - if (!ret) - ret = ret2; -@@ -1832,7 +1835,8 @@ static void ext3_free_branches(handle_t - void ext3_truncate(struct inode * inode) - { - handle_t *handle; -- u32 *i_data = inode->u.ext3_i.i_data; -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ u32 *i_data = EXT3_I(inode)->i_data; - int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); - int offsets[4]; - Indirect chain[4]; -@@ -1884,13 +1887,13 @@ void ext3_truncate(struct inode * inode) - * on-disk inode. We do this via i_disksize, which is the value which - * ext3 *really* writes onto the disk inode. - */ -- inode->u.ext3_i.i_disksize = inode->i_size; -+ ei->i_disksize = inode->i_size; - - /* - * From here we block out all ext3_get_block() callers who want to - * modify the block allocation tree. - */ -- down_write(&inode->u.ext3_i.truncate_sem); -+ down_write(&ei->truncate_sem); - - if (n == 1) { /* direct blocks */ - ext3_free_data(handle, inode, NULL, i_data+offsets[0], -@@ -1954,7 +1957,7 @@ do_indirects: - case EXT3_TIND_BLOCK: - ; - } -- up_write(&inode->u.ext3_i.truncate_sem); -+ up_write(&ei->truncate_sem); - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); - -@@ -1983,6 +1986,8 @@ out_stop: - - int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) - { -+ struct super_block *sb = inode->i_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); - struct buffer_head *bh = 0; - unsigned long block; - unsigned long block_group; -@@ -1997,23 +2010,19 @@ int ext3_get_inode_loc (struct inode *in - inode->i_ino != EXT3_JOURNAL_INO && -- inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || -- inode->i_ino > le32_to_cpu( -- inode->i_sb->u.ext3_sb.s_es->s_inodes_count)) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "bad inode number: %lu", inode->i_ino); -+ inode->i_ino < EXT3_FIRST_INO(sb)) || -+ inode->i_ino > le32_to_cpu(sbi->s_es->s_inodes_count)) { -+ ext3_error (sb, __FUNCTION__, "bad inode #%lu", inode->i_ino); - goto bad_inode; - } -- block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb); -- if (block_group >= inode->i_sb->u.ext3_sb.s_groups_count) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "group >= groups count"); -+ block_group = (inode->i_ino - 1) / sbi->s_inodes_per_group; -+ if (block_group >= sbi->s_groups_count) { -+ ext3_error(sb, __FUNCTION__, "group >= groups count"); - goto bad_inode; - } -- group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb); -- desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1); -- bh = inode->i_sb->u.ext3_sb.s_group_desc[group_desc]; -+ group_desc = block_group >> sbi->s_desc_per_block_bits; -+ desc = block_group & (sbi->s_desc_per_block - 1); -+ bh = sbi->s_group_desc[group_desc]; - if (!bh) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "Descriptor not loaded"); -+ ext3_error(sb, __FUNCTION__, "Descriptor not loaded"); - goto bad_inode; - } - -@@ -2021,17 +2022,17 @@ int ext3_get_inode_loc (struct inode *in - /* - * Figure out the offset within the block group inode table - */ -- offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) * -- EXT3_INODE_SIZE(inode->i_sb); -+ offset = ((inode->i_ino - 1) % sbi->s_inodes_per_group) * -+ sbi->s_inode_size; - block = le32_to_cpu(gdp[desc].bg_inode_table) + -- (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); -- if (!(bh = sb_bread(inode->i_sb, block))) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -+ (offset >> EXT3_BLOCK_SIZE_BITS(sb)); -+ if (!(bh = sb_bread(sb, block))) { -+ ext3_error (sb, __FUNCTION__, - "unable to read inode block - " - "inode=%lu, block=%lu", inode->i_ino, block); - goto bad_inode; - } -- offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1); -+ offset &= (EXT3_BLOCK_SIZE(sb) - 1); - - iloc->bh = bh; - iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); -@@ -2047,6 +2048,7 @@ void ext3_read_inode(struct inode * inod - { - struct ext3_iloc iloc; - struct ext3_inode *raw_inode; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct buffer_head *bh; - int block; - -@@ -2054,7 +2056,7 @@ void ext3_read_inode(struct inode * inod - goto bad_inode; - bh = iloc.bh; - raw_inode = iloc.raw_inode; -- init_rwsem(&inode->u.ext3_i.truncate_sem); -+ init_rwsem(&ei->truncate_sem); - inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); -@@ -2067,7 +2069,7 @@ void ext3_read_inode(struct inode * inod - inode->i_atime = le32_to_cpu(raw_inode->i_atime); - inode->i_ctime = le32_to_cpu(raw_inode->i_ctime); - inode->i_mtime = le32_to_cpu(raw_inode->i_mtime); -- inode->u.ext3_i.i_dtime = le32_to_cpu(raw_inode->i_dtime); -+ ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); - /* We now have enough fields to check if the inode was active or not. - * This is needed because nfsd might try to access dead inodes - * the test is that same one that e2fsck uses -@@ -2075,7 +2077,7 @@ void ext3_read_inode(struct inode * inod - */ - if (inode->i_nlink == 0) { - if (inode->i_mode == 0 || -- !(inode->i_sb->u.ext3_sb.s_mount_state & EXT3_ORPHAN_FS)) { -+ !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { - /* this inode is deleted */ - brelse (bh); - goto bad_inode; -@@ -2090,33 +2092,33 @@ void ext3_read_inode(struct inode * inod - * size */ - inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); - inode->i_version = ++event; -- inode->u.ext3_i.i_flags = le32_to_cpu(raw_inode->i_flags); -+ ei->i_flags = le32_to_cpu(raw_inode->i_flags); - #ifdef EXT3_FRAGMENTS -- inode->u.ext3_i.i_faddr = le32_to_cpu(raw_inode->i_faddr); -- inode->u.ext3_i.i_frag_no = raw_inode->i_frag; -- inode->u.ext3_i.i_frag_size = raw_inode->i_fsize; -+ ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); -+ ei->i_frag_no = raw_inode->i_frag; -+ ei->i_frag_size = raw_inode->i_fsize; - #endif -- inode->u.ext3_i.i_file_acl = le32_to_cpu(raw_inode->i_file_acl); -+ ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); - if (!S_ISREG(inode->i_mode)) { -- inode->u.ext3_i.i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); -+ ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); - } else { - inode->i_size |= - ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; - } -- inode->u.ext3_i.i_disksize = inode->i_size; -+ ei->i_disksize = inode->i_size; - inode->i_generation = le32_to_cpu(raw_inode->i_generation); - #ifdef EXT3_PREALLOCATE -- inode->u.ext3_i.i_prealloc_count = 0; -+ ei->i_prealloc_count = 0; - #endif -- inode->u.ext3_i.i_block_group = iloc.block_group; -+ ei->i_block_group = iloc.block_group; - - /* - * NOTE! The in-memory inode i_data array is in little-endian order - * even on big-endian machines: we do NOT byteswap the block numbers! - */ - for (block = 0; block < EXT3_N_BLOCKS; block++) -- inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ ei->i_data[block] = iloc.raw_inode->i_block[block]; -+ INIT_LIST_HEAD(&ei->i_orphan); - - brelse (iloc.bh); - -@@ -2143,17 +2145,17 @@ void ext3_read_inode(struct inode * inod - /* inode->i_attr_flags = 0; unused */ -- if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) { -+ if (ei->i_flags & EXT3_SYNC_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */ - inode->i_flags |= S_SYNC; - } -- if (inode->u.ext3_i.i_flags & EXT3_APPEND_FL) { -+ if (ei->i_flags & EXT3_APPEND_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_APPEND; unused */ - inode->i_flags |= S_APPEND; - } -- if (inode->u.ext3_i.i_flags & EXT3_IMMUTABLE_FL) { -+ if (ei->i_flags & EXT3_IMMUTABLE_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_IMMUTABLE; unused */ - inode->i_flags |= S_IMMUTABLE; - } -- if (inode->u.ext3_i.i_flags & EXT3_NOATIME_FL) { -+ if (ei->i_flags & EXT3_NOATIME_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_NOATIME; unused */ - inode->i_flags |= S_NOATIME; - } -@@ -2175,6 +2177,7 @@ static int ext3_do_update_inode(handle_t - struct ext3_iloc *iloc) - { - struct ext3_inode *raw_inode = iloc->raw_inode; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct buffer_head *bh = iloc->bh; - int err = 0, rc, block; - -@@ -2192,7 +2195,7 @@ static int ext3_do_update_inode(handle_t - * Fix up interoperability with old kernels. Otherwise, old inodes get - * re-used with the upper 16 bits of the uid/gid intact - */ -- if(!inode->u.ext3_i.i_dtime) { -+ if(!ei->i_dtime) { - raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(inode->i_uid)); - raw_inode->i_gid_high = -@@ -2210,34 +2213,33 @@ static int ext3_do_update_inode(handle_t - raw_inode->i_gid_high = 0; - } - raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); -- raw_inode->i_size = cpu_to_le32(inode->u.ext3_i.i_disksize); -+ raw_inode->i_size = cpu_to_le32(ei->i_disksize); - raw_inode->i_atime = cpu_to_le32(inode->i_atime); - raw_inode->i_ctime = cpu_to_le32(inode->i_ctime); - raw_inode->i_mtime = cpu_to_le32(inode->i_mtime); - raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); -- raw_inode->i_dtime = cpu_to_le32(inode->u.ext3_i.i_dtime); -- raw_inode->i_flags = cpu_to_le32(inode->u.ext3_i.i_flags); -+ raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); -+ raw_inode->i_flags = cpu_to_le32(ei->i_flags); - #ifdef EXT3_FRAGMENTS -- raw_inode->i_faddr = cpu_to_le32(inode->u.ext3_i.i_faddr); -- raw_inode->i_frag = inode->u.ext3_i.i_frag_no; -- raw_inode->i_fsize = inode->u.ext3_i.i_frag_size; -+ raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); -+ raw_inode->i_frag = ei->i_frag_no; -+ raw_inode->i_fsize = ei->i_frag_size; - #else - /* If we are not tracking these fields in the in-memory inode, - * then preserve them on disk, but still initialise them to zero - * for new inodes. */ -- if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) { -+ if (ei->i_state & EXT3_STATE_NEW) { - raw_inode->i_faddr = 0; - raw_inode->i_frag = 0; - raw_inode->i_fsize = 0; - } - #endif -- raw_inode->i_file_acl = cpu_to_le32(inode->u.ext3_i.i_file_acl); -+ raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); - if (!S_ISREG(inode->i_mode)) { -- raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext3_i.i_dir_acl); -+ raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); - } else { -- raw_inode->i_size_high = -- cpu_to_le32(inode->u.ext3_i.i_disksize >> 32); -- if (inode->u.ext3_i.i_disksize > 0x7fffffffULL) { -+ raw_inode->i_size_high = cpu_to_le32(ei->i_disksize >> 32); -+ if (ei->i_disksize > MAX_NON_LFS) { - struct super_block *sb = inode->i_sb; - if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, - EXT3_FEATURE_RO_COMPAT_LARGE_FILE) || -@@ -2247,7 +2249,7 @@ static int ext3_do_update_inode(handle_t - * created, add a flag to the superblock. - */ - err = ext3_journal_get_write_access(handle, -- sb->u.ext3_sb.s_sbh); -+ EXT3_SB(sb)->s_sbh); - if (err) - goto out_brelse; - ext3_update_dynamic_rev(sb); -@@ -2256,7 +2258,7 @@ static int ext3_do_update_inode(handle_t - sb->s_dirt = 1; - handle->h_sync = 1; - err = ext3_journal_dirty_metadata(handle, -- sb->u.ext3_sb.s_sbh); -+ EXT3_SB(sb)->s_sbh); - } - } - } -@@ -2265,13 +2267,13 @@ static int ext3_do_update_inode(handle_t - raw_inode->i_block[0] = - cpu_to_le32(kdev_t_to_nr(inode->i_rdev)); - else for (block = 0; block < EXT3_N_BLOCKS; block++) -- raw_inode->i_block[block] = inode->u.ext3_i.i_data[block]; -+ raw_inode->i_block[block] = ei->i_data[block]; - - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - rc = ext3_journal_dirty_metadata(handle, bh); - if (!err) - err = rc; -- EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW; -+ ei->i_state &= ~EXT3_STATE_NEW; - - out_brelse: - brelse (bh); -@@ -2379,7 +2381,7 @@ int ext3_setattr(struct dentry *dentry, - } - - error = ext3_orphan_add(handle, inode); -- inode->u.ext3_i.i_disksize = attr->ia_size; -+ EXT3_I(inode)->i_disksize = attr->ia_size; - rc = ext3_mark_inode_dirty(handle, inode); - if (!error) - error = rc; -@@ -2622,9 +2624,9 @@ int ext3_change_inode_journal_flag(struc - */ - - if (val) -- inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL; -+ EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL; - else -- inode->u.ext3_i.i_flags &= ~EXT3_JOURNAL_DATA_FL; -+ EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL; - - journal_unlock_updates(journal); - ---- ./fs/ext3/ioctl.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/ioctl.c Tue May 7 15:20:52 2002 -@@ -18,13 +18,14 @@ - int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) - { -+ struct ext3_inode_info *ei = EXT3_I(inode); - unsigned int flags; - - ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); - - switch (cmd) { - case EXT3_IOC_GETFLAGS: -- flags = inode->u.ext3_i.i_flags & EXT3_FL_USER_VISIBLE; -+ flags = ei->i_flags & EXT3_FL_USER_VISIBLE; - return put_user(flags, (int *) arg); - case EXT3_IOC_SETFLAGS: { - handle_t *handle = NULL; -@@ -42,7 +42,7 @@ int ext3_ioctl (struct inode * inode, st - if (get_user(flags, (int *) arg)) - return -EFAULT; - -- oldflags = inode->u.ext3_i.i_flags; -+ oldflags = ei->i_flags; - - /* The JOURNAL_DATA flag is modifiable only by root */ - jflag = flags & EXT3_JOURNAL_DATA_FL; -@@ -79,7 +79,7 @@ int ext3_ioctl (struct inode * inode, st - - flags = flags & EXT3_FL_USER_MODIFIABLE; - flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE; -- inode->u.ext3_i.i_flags = flags; -+ ei->i_flags = flags; - - if (flags & EXT3_SYNC_FL) - inode->i_flags |= S_SYNC; -@@ -155,12 +155,12 @@ flags_err: - int ret = 0; - - set_current_state(TASK_INTERRUPTIBLE); -- add_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait); -- if (timer_pending(&sb->u.ext3_sb.turn_ro_timer)) { -+ add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait); -+ if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) { - schedule(); - ret = 1; - } -- remove_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait); -+ remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait); - return ret; - } - #endif ---- ./fs/ext3/namei.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/namei.c Tue May 7 16:05:51 2002 -@@ -636,7 +636,7 @@ static struct buffer_head * ext3_find_en - } - - nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); -- start = dir->u.ext3_i.i_dir_start_lookup; -+ start = EXT3_I(dir)->i_dir_start_lookup; - if (start >= nblocks) - start = 0; - block = start; -@@ -677,7 +677,7 @@ restart: - i = search_dirblock(bh, dir, dentry, - block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); - if (i == 1) { -- dir->u.ext3_i.i_dir_start_lookup = block; -+ EXT3_I(dir)->i_dir_start_lookup = block; - ret = bh; - goto cleanup_and_exit; - } else { -@@ -1419,7 +1419,7 @@ int ext3_orphan_add(handle_t *handle, st - int err = 0, rc; - - lock_super(sb); -- if (!list_empty(&inode->u.ext3_i.i_orphan)) -+ if (!list_empty(&EXT3_I(inode)->i_orphan)) - goto out_unlock; - - /* Orphan handling is only valid for files with data blocks -@@ -1430,8 +1430,8 @@ int ext3_orphan_add(handle_t *handle, st - J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (err) - goto out_unlock; - -@@ -1442,7 +1442,7 @@ int ext3_orphan_add(handle_t *handle, st - /* Insert this inode at the head of the on-disk orphan list... */ - NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan); - EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - rc = ext3_mark_iloc_dirty(handle, inode, &iloc); - if (!err) - err = rc; -@@ -1456,7 +1456,7 @@ int ext3_orphan_add(handle_t *handle, st - * This is safe: on error we're going to ignore the orphan list - * anyway on the next recovery. */ - if (!err) -- list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan); -+ list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); - - jbd_debug(4, "superblock will point to %ld\n", inode->i_ino); - jbd_debug(4, "orphan inode %ld will point to %d\n", -@@ -714,25 +770,25 @@ - int ext3_orphan_del(handle_t *handle, struct inode *inode) - { - struct list_head *prev; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_sb_info *sbi; - unsigned long ino_next; - struct ext3_iloc iloc; - int err = 0; - - lock_super(inode->i_sb); -- if (list_empty(&inode->u.ext3_i.i_orphan)) { -+ if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); - return 0; - } - - ino_next = NEXT_ORPHAN(inode); -- prev = inode->u.ext3_i.i_orphan.prev; -+ prev = ei->i_orphan.prev; - sbi = EXT3_SB(inode->i_sb); - - jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - -- list_del(&inode->u.ext3_i.i_orphan); -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ list_del_init(&ei->i_orphan); - - /* If we're on an error path, we may not have a valid - * transaction handle with which to update the orphan list on -@@ -1520,8 +1520,7 @@ int ext3_orphan_del(handle_t *handle, st - err = ext3_journal_dirty_metadata(handle, sbi->s_sbh); - } else { - struct ext3_iloc iloc2; -- struct inode *i_prev = -- list_entry(prev, struct inode, u.ext3_i.i_orphan); -+ struct inode *i_prev = orphan_list_entry(prev); - - jbd_debug(4, "orphan inode %lu will point to %lu\n", - i_prev->i_ino, ino_next); -@@ -1695,10 +1695,10 @@ static int ext3_symlink (struct inode * - goto out_no_entry; - } else { - inode->i_op = &ext3_fast_symlink_inode_operations; -- memcpy((char*)&inode->u.ext3_i.i_data,symname,l); -+ memcpy((char*)&EXT3_I(inode)->i_data,symname,l); - inode->i_size = l-1; - } -- inode->u.ext3_i.i_disksize = inode->i_size; -+ EXT3_I(inode)->i_disksize = inode->i_size; - err = ext3_add_nondir(handle, dentry, inode); - ext3_mark_inode_dirty(handle, inode); - out_stop: ---- ./fs/ext3/super.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/super.c Tue May 7 16:05:44 2002 -@@ -121,7 +121,7 @@ static int ext3_error_behaviour(struct s - /* If no overrides were specified on the mount, then fall back - * to the default behaviour set in the filesystem's superblock - * on disk. */ -- switch (le16_to_cpu(sb->u.ext3_sb.s_es->s_errors)) { -+ switch (le16_to_cpu(EXT3_SB(sb)->s_es->s_errors)) { - case EXT3_ERRORS_PANIC: - return EXT3_ERRORS_PANIC; - case EXT3_ERRORS_RO: -@@ -269,9 +269,9 @@ void ext3_abort (struct super_block * sb - return; - - printk (KERN_CRIT "Remounting filesystem read-only\n"); -- sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS; -+ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; - sb->s_flags |= MS_RDONLY; -- sb->u.ext3_sb.s_mount_opt |= EXT3_MOUNT_ABORT; -+ EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; - journal_abort(EXT3_SB(sb)->s_journal, -EIO); - } - -@@ -377,8 +377,6 @@ static int ext3_blkdev_remove(struct ext3 - return ret; - } - --#define orphan_list_entry(l) list_entry((l), struct inode, u.ext3_i.i_orphan) -- - static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi) - { - struct list_head *l; -@@ -818,7 +818,7 @@ static void ext3_orphan_cleanup (struct - sb->s_flags &= ~MS_RDONLY; - } - -- if (sb->u.ext3_sb.s_mount_state & EXT3_ERROR_FS) { -+ if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { - if (es->s_last_orphan) - jbd_debug(1, "Errors on filesystem, " - "clearing orphan list.\n"); -@@ -1463,12 +1463,14 @@ static void ext3_commit_super (struct su - struct ext3_super_block * es, - int sync) - { -+ struct buffer_head *sbh = EXT3_SB(sb)->s_sbh; -+ - es->s_wtime = cpu_to_le32(CURRENT_TIME); -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "marking dirty"); -- mark_buffer_dirty(sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(sbh, "marking dirty"); -+ mark_buffer_dirty(sbh); - if (sync) { -- ll_rw_block(WRITE, 1, &sb->u.ext3_sb.s_sbh); -- wait_on_buffer(sb->u.ext3_sb.s_sbh); -+ ll_rw_block(WRITE, 1, &sbh); -+ wait_on_buffer(sbh); - } - } - -@@ -1519,7 +1521,7 @@ static void ext3_clear_journal_err(struc - ext3_warning(sb, __FUNCTION__, "Marking fs in need of " - "filesystem check."); - -- sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS; -+ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; - es->s_state |= cpu_to_le16(EXT3_ERROR_FS); - ext3_commit_super (sb, es, 1); - ---- ./fs/ext3/symlink.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/symlink.c Tue May 7 15:25:39 2002 -@@ -23,13 +23,13 @@ - - static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen) - { -- char *s = (char *)dentry->d_inode->u.ext3_i.i_data; -- return vfs_readlink(dentry, buffer, buflen, s); -+ struct ext3_inode_info *ei = EXT3_I(dentry->d_inode); -+ return vfs_readlink(dentry, buffer, buflen, (char *)ei->i_data); - } - - static int ext3_follow_link(struct dentry *dentry, struct nameidata *nd) - { -- char *s = (char *)dentry->d_inode->u.ext3_i.i_data; -- return vfs_follow_link(nd, s); -+ struct ext3_inode_info *ei = EXT3_I(dentry->d_inode); -+ return vfs_follow_link(nd, (char*)ei->i_data); - } - ---- ./include/linux/ext3_fs.h.orig Tue Apr 16 14:27:25 2002 -+++ ./include/linux/ext3_fs.h Tue May 7 16:47:36 2002 -@@ -84,22 +84,25 @@ - #define EXT3_MIN_BLOCK_SIZE 1024 - #define EXT3_MAX_BLOCK_SIZE 4096 - #define EXT3_MIN_BLOCK_LOG_SIZE 10 -+ - #ifdef __KERNEL__ --# define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize) --#else --# define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) --#endif --#define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) --#ifdef __KERNEL__ --# define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) --#else --# define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) --#endif --#ifdef __KERNEL__ --#define EXT3_ADDR_PER_BLOCK_BITS(s) ((s)->u.ext3_sb.s_addr_per_block_bits) --#define EXT3_INODE_SIZE(s) ((s)->u.ext3_sb.s_inode_size) --#define EXT3_FIRST_INO(s) ((s)->u.ext3_sb.s_first_ino) -+#define EXT3_SB(sb) (&((sb)->u.ext3_sb)) -+#define EXT3_I(inode) (&((inode)->u.ext3_i)) -+ -+#define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize) -+#define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -+#define EXT3_ADDR_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_addr_per_block_bits) -+#define EXT3_INODE_SIZE(s) (EXT3_SB(s)->s_inode_size) -+#define EXT3_FIRST_INO(s) (EXT3_SB(s)->s_first_ino) - #else -+ -+/* Assume that user mode programs are passing in an ext3fs superblock, not -+ * a kernel struct super_block. This will allow us to call the feature-test -+ * macros from user land. */ -+#define EXT3_SB(sb) (sb) -+ -+#define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) -+#define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) - #define EXT3_INODE_SIZE(s) (((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \ - EXT3_GOOD_OLD_INODE_SIZE : \ - (s)->s_inode_size) -@@ -108,6 +110,7 @@ - EXT3_GOOD_OLD_FIRST_INO : \ - (s)->s_first_ino) - #endif -+#define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) - - /* - * Macro-instructions used to manage fragments -@@ -116,8 +120,8 @@ - #define EXT3_MAX_FRAG_SIZE 4096 - #define EXT3_MIN_FRAG_LOG_SIZE 10 - #ifdef __KERNEL__ --# define EXT3_FRAG_SIZE(s) ((s)->u.ext3_sb.s_frag_size) --# define EXT3_FRAGS_PER_BLOCK(s) ((s)->u.ext3_sb.s_frags_per_block) -+# define EXT3_FRAG_SIZE(s) (EXT3_SB(s)->s_frag_size) -+# define EXT3_FRAGS_PER_BLOCK(s) (EXT3_SB(s)->s_frags_per_block) - #else - # define EXT3_FRAG_SIZE(s) (EXT3_MIN_FRAG_SIZE << (s)->s_log_frag_size) - # define EXT3_FRAGS_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / EXT3_FRAG_SIZE(s)) -@@ -163,15 +167,13 @@ - /* - * Macro-instructions used to manage group descriptors - */ -+# define EXT3_BLOCKS_PER_GROUP(s) (EXT3_SB(s)->s_blocks_per_group) -+# define EXT3_INODES_PER_GROUP(s) (EXT3_SB(s)->s_inodes_per_group) - #ifdef __KERNEL__ --# define EXT3_BLOCKS_PER_GROUP(s) ((s)->u.ext3_sb.s_blocks_per_group) --# define EXT3_DESC_PER_BLOCK(s) ((s)->u.ext3_sb.s_desc_per_block) --# define EXT3_INODES_PER_GROUP(s) ((s)->u.ext3_sb.s_inodes_per_group) --# define EXT3_DESC_PER_BLOCK_BITS(s) ((s)->u.ext3_sb.s_desc_per_block_bits) -+# define EXT3_DESC_PER_BLOCK(s) (EXT3_SB(s)->s_desc_per_block) -+# define EXT3_DESC_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_desc_per_block_bits) - #else --# define EXT3_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) - # define EXT3_DESC_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_group_desc)) --# define EXT3_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) - #endif - - /* -@@ -344,7 +347,7 @@ - #ifndef _LINUX_EXT2_FS_H - #define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt - #define set_opt(o, opt) o |= EXT3_MOUNT_##opt --#define test_opt(sb, opt) ((sb)->u.ext3_sb.s_mount_opt & \ -+#define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \ - EXT3_MOUNT_##opt) - #else - #define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD -@@ -441,17 +443,11 @@ - /*EC*/ __u32 s_reserved[197]; /* Padding to the end of the block */ - }; - --#ifdef __KERNEL__ --#define EXT3_SB(sb) (&((sb)->u.ext3_sb)) --#define EXT3_I(inode) (&((inode)->u.ext3_i)) --#else --/* Assume that user mode programs are passing in an ext3fs superblock, not -- * a kernel struct super_block. This will allow us to call the feature-test -- * macros from user land. */ --#define EXT3_SB(sb) (sb) --#endif -- --#define NEXT_ORPHAN(inode) (inode)->u.ext3_i.i_dtime -+#define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime -+static inline struct inode *orphan_list_entry(struct list_head *l) -+{ -+ return list_entry(l, struct inode, u.ext3_i.i_orphan); -+} - - /* - * Codes for operating systems ---- ./include/linux/ext3_jbd.h.orig Tue May 7 14:44:08 2002 -+++ ./include/linux/ext3_jbd.h Tue May 7 14:44:43 2002 -@@ -291,7 +291,7 @@ - return 1; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA) - return 1; -- if (inode->u.ext3_i.i_flags & EXT3_JOURNAL_DATA_FL) -+ if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL) - return 1; - return 0; - } diff --git a/lustre/kernel_patches/patches/ext3-compat-2.4.18-chaos.patch b/lustre/kernel_patches/patches/ext3-compat-2.4.18-chaos.patch deleted file mode 100644 index 68a2244..0000000 --- a/lustre/kernel_patches/patches/ext3-compat-2.4.18-chaos.patch +++ /dev/null @@ -1,46 +0,0 @@ - fs/ext3/namei.c | 3 ++- - lib/rbtree.c | 6 +++--- - 2 files changed, 5 insertions(+), 4 deletions(-) - ---- linux-2.4.18-chaos-pdirops/fs/ext3/namei.c~ext3-compat-2.4.18-chaos 2003-09-23 13:13:10.000000000 +0400 -+++ linux-2.4.18-chaos-pdirops-alexey/fs/ext3/namei.c 2003-09-23 13:13:28.000000000 +0400 -@@ -28,6 +28,7 @@ - #include - #include - #include -+#include - - - /* -@@ -830,9 +831,9 @@ static int ext3_rmdir (struct inode * di - * recovery. */ - inode->i_size = 0; - ext3_orphan_add(handle, inode); -- ext3_mark_inode_dirty(handle, inode); - dir->i_nlink--; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; -+ ext3_mark_inode_dirty(handle, inode); - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; - ext3_mark_inode_dirty(handle, dir); - ---- linux-2.4.18-chaos-pdirops/lib/rbtree.c~ext3-compat-2.4.18-chaos 2003-07-28 17:52:20.000000000 +0400 -+++ linux-2.4.18-chaos-pdirops-alexey/lib/rbtree.c 2003-09-23 13:13:15.000000000 +0400 -@@ -219,6 +219,8 @@ static void __rb_erase_color(rb_node_t * - node->rb_color = RB_BLACK; - } - -+EXPORT_SYMBOL_GPL(rb_insert_color); -+ - void rb_erase(rb_node_t * node, rb_root_t * root) - { - rb_node_t * child, * parent; -@@ -292,6 +294,4 @@ void rb_erase(rb_node_t * node, rb_root_ - if (color == RB_BLACK) - __rb_erase_color(child, parent, root); - } -- --EXPORT_SYMBOL_GPL(rb_insert_color); --EXPORT_SYMBOL_GPL(rb_erase); -+EXPORT_SYMBOL(rb_erase); - -_ diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18-2.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18-2.patch deleted file mode 100644 index 1e2295c..0000000 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18-2.patch +++ /dev/null @@ -1,474 +0,0 @@ - fs/ext3/file.c | 4 - fs/ext3/inode.c | 112 +++++++++++++++++++++ - fs/ext3/super.c | 231 +++++++++++++++++++++++++++++++++++++++++++++ - include/linux/ext3_fs.h | 5 - include/linux/ext3_fs_sb.h | 10 + - 5 files changed, 362 insertions(+) - ---- linux-2.4.18-chaos/fs/ext3/file.c~ext3-delete_thread-2.4.18-2 2003-09-16 23:34:07.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/file.c 2003-09-16 23:42:34.000000000 +0400 -@@ -124,7 +124,11 @@ struct file_operations ext3_file_operati - }; - - struct inode_operations ext3_file_inode_operations = { -+#ifdef EXT3_DELETE_THREAD -+ truncate: ext3_truncate_thread, /* BKL held */ -+#else - truncate: ext3_truncate, /* BKL held */ -+#endif - setattr: ext3_setattr, /* BKL held */ - }; - ---- linux-2.4.18-chaos/fs/ext3/inode.c~ext3-delete_thread-2.4.18-2 2003-09-16 23:39:37.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/inode.c 2003-09-16 23:42:34.000000000 +0400 -@@ -2041,6 +2041,118 @@ out_unlock: - return; /* AKPM: return what? */ - } - -+#ifdef EXT3_DELETE_THREAD -+/* Move blocks from to-be-truncated inode over to a new inode, and delete -+ * that one from the delete thread instead. This avoids a lot of latency -+ * when truncating large files. -+ * -+ * If we have any problem deferring the truncate, just truncate it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * delete thread when we run out of space. -+ */ -+void ext3_truncate_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); -+ struct inode *new_inode; -+ handle_t *handle; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) -+ goto out_truncate; -+ -+ /* XXX This is a temporary limitation for code simplicity. -+ * We could truncate to arbitrary sizes at some later time. -+ */ -+ if (old_inode->i_size != 0) -+ goto out_truncate; -+ -+ /* We may want to truncate the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || -+ old_inode->i_size > oei->i_disksize) -+ goto out_truncate; -+ -+ /* We can't use the delete thread as-is during real orphan recovery, -+ * as we add to the orphan list here, causing ext3_orphan_cleanup() -+ * to loop endlessly. It would be nice to do so, but needs work. -+ */ -+ if (oei->i_state & EXT3_STATE_DELETE || -+ sbi->s_mount_state & EXT3_ORPHAN_FS) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ goto out_truncate; -+ } -+ -+ ext3_discard_prealloc(old_inode); -+ -+ /* old_inode = 1 -+ * new_inode = sb + GDT + ibitmap -+ * orphan list = 1 inode/superblock for add, 2 inodes for del -+ * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS -+ */ -+ handle = ext3_journal_start(old_inode, 7); -+ if (IS_ERR(handle)) -+ goto out_truncate; -+ -+ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode); -+ if (IS_ERR(new_inode)) { -+ ext3_debug("truncate inode %lu directly (no new inodes)\n", -+ old_inode->i_ino); -+ goto out_journal; -+ } -+ -+ nei = EXT3_I(new_inode); -+ -+ down_write(&oei->truncate_sem); -+ new_inode->i_size = old_inode->i_size; -+ new_inode->i_blocks = old_inode->i_blocks; -+ new_inode->i_uid = old_inode->i_uid; -+ new_inode->i_gid = old_inode->i_gid; -+ new_inode->i_nlink = 0; -+ -+ /* FIXME when we do arbitrary truncates */ -+ old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0; -+ old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME; -+ -+ memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data)); -+ memset(oei->i_data, 0, sizeof(oei->i_data)); -+ -+ nei->i_disksize = oei->i_disksize; -+ nei->i_state |= EXT3_STATE_DELETE; -+ up_write(&oei->truncate_sem); -+ -+ if (ext3_orphan_add(handle, new_inode) < 0) -+ goto out_journal; -+ -+ if (ext3_orphan_del(handle, old_inode) < 0) { -+ ext3_orphan_del(handle, new_inode); -+ iput(new_inode); -+ goto out_journal; -+ } -+ -+ ext3_journal_stop(handle, old_inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_dentry)); -+ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ return; -+ -+out_journal: -+ ext3_journal_stop(handle, old_inode); -+out_truncate: -+ ext3_truncate(old_inode); -+} -+#endif /* EXT3_DELETE_THREAD */ -+ - /* - * ext3_get_inode_loc returns with an extra refcount against the - * inode's underlying buffer_head on success. ---- linux-2.4.18-chaos/fs/ext3/super.c~ext3-delete_thread-2.4.18-2 2003-09-16 23:42:33.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/super.c 2003-09-16 23:42:34.000000000 +0400 -@@ -398,6 +398,220 @@ static void dump_orphan_list(struct supe - } - } - -+#ifdef EXT3_DELETE_THREAD -+/* -+ * Delete inodes in a loop until there are no more to be deleted. -+ * Normally, we run in the background doing the deletes and sleeping again, -+ * and clients just add new inodes to be deleted onto the end of the list. -+ * If someone is concerned about free space (e.g. block allocation or similar) -+ * then they can sleep on s_delete_waiter_queue and be woken up when space -+ * has been freed. -+ */ -+int ext3_delete_thread(void *data) -+{ -+ struct super_block *sb = data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct task_struct *tsk = current; -+ -+ /* Almost like daemonize, but not quite */ -+ exit_mm(current); -+ tsk->session = 1; -+ tsk->pgrp = 1; -+ tsk->tty = NULL; -+ exit_files(current); -+ reparent_to_init(); -+ -+ sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev)); -+ sigfillset(&tsk->blocked); -+ -+ /*tsk->flags |= PF_KERNTHREAD;*/ -+ -+ INIT_LIST_HEAD(&sbi->s_delete_list); -+ wake_up(&sbi->s_delete_waiter_queue); -+ ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev)); -+ -+ /* main loop */ -+ for (;;) { -+ wait_event_interruptible(sbi->s_delete_thread_queue, -+ !list_empty(&sbi->s_delete_list) || -+ !test_opt(sb, ASYNCDEL)); -+ ext3_debug("%s woken up: %lu inodes, %lu blocks\n", -+ tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks); -+ -+ spin_lock(&sbi->s_delete_lock); -+ if (list_empty(&sbi->s_delete_list)) { -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ memset(&sbi->s_delete_list, 0, -+ sizeof(sbi->s_delete_list)); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("delete thread on %s exiting\n", -+ kdevname(sb->s_dev)); -+ wake_up(&sbi->s_delete_waiter_queue); -+ break; -+ } -+ -+ while (!list_empty(&sbi->s_delete_list)) { -+ struct inode *inode=list_entry(sbi->s_delete_list.next, -+ struct inode, i_dentry); -+ unsigned long blocks = inode->i_blocks >> -+ (inode->i_blkbits - 9); -+ -+ list_del_init(&inode->i_dentry); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("%s delete ino %lu blk %lu\n", -+ tsk->comm, inode->i_ino, blocks); -+ -+ iput(inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ sbi->s_delete_blocks -= blocks; -+ sbi->s_delete_inodes--; -+ } -+ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) { -+ ext3_warning(sb, __FUNCTION__, -+ "%lu blocks, %lu inodes on list?\n", -+ sbi->s_delete_blocks,sbi->s_delete_inodes); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; -+ } -+ spin_unlock(&sbi->s_delete_lock); -+ wake_up(&sbi->s_delete_waiter_queue); -+ } -+ -+ return 0; -+} -+ -+static void ext3_start_delete_thread(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int rc; -+ -+ spin_lock_init(&sbi->s_delete_lock); -+ init_waitqueue_head(&sbi->s_delete_thread_queue); -+ init_waitqueue_head(&sbi->s_delete_waiter_queue); -+ -+ if (!test_opt(sb, ASYNCDEL)) -+ return; -+ -+ rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES); -+ if (rc < 0) -+ printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n", -+ rc); -+ else -+ wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next); -+} -+ -+static void ext3_stop_delete_thread(struct ext3_sb_info *sbi) -+{ -+ if (sbi->s_delete_list.next == 0) /* thread never started */ -+ return; -+ -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ wake_up(&sbi->s_delete_thread_queue); -+ wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list)); -+} -+ -+/* Instead of playing games with the inode flags, destruction, etc we just -+ * create a new inode locally and put it on a list for the truncate thread. -+ * We need large parts of the inode struct in order to complete the -+ * truncate and unlink, so we may as well just have a real inode to do it. -+ * -+ * If we have any problem deferring the delete, just delete it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * delete thread when we run out of space. -+ */ -+static void ext3_delete_inode_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); -+ struct inode *new_inode; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (is_bad_inode(old_inode)) { -+ clear_inode(old_inode); -+ return; -+ } -+ -+ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) -+ goto out_delete; -+ -+ /* We may want to delete the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS) -+ goto out_delete; -+ -+ /* We can't use the delete thread as-is during real orphan recovery, -+ * as we add to the orphan list here, causing ext3_orphan_cleanup() -+ * to loop endlessly. It would be nice to do so, but needs work. -+ */ -+ if (oei->i_state & EXT3_STATE_DELETE || -+ sbi->s_mount_state & EXT3_ORPHAN_FS) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ goto out_delete; -+ } -+ -+ /* We can iget this inode again here, because our caller has unhashed -+ * old_inode, so new_inode will be in a different inode struct. -+ * -+ * We need to ensure that the i_orphan pointers in the other inodes -+ * point at the new inode copy instead of the old one so the orphan -+ * list doesn't get corrupted when the old orphan inode is freed. -+ */ -+ down(&sbi->s_orphan_lock); -+ -+ sbi->s_mount_state |= EXT3_ORPHAN_FS; -+ new_inode = iget(old_inode->i_sb, old_inode->i_ino); -+ sbi->s_mount_state &= ~EXT3_ORPHAN_FS; -+ if (is_bad_inode(new_inode)) { -+ printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino); -+ iput(new_inode); -+ new_inode = NULL; -+ } -+ if (!new_inode) { -+ up(&sbi->s_orphan_lock); -+ ext3_debug("delete inode %lu directly (bad read)\n", -+ old_inode->i_ino); -+ goto out_delete; -+ } -+ J_ASSERT(new_inode != old_inode); -+ -+ J_ASSERT(!list_empty(&oei->i_orphan)); -+ -+ nei = EXT3_I(new_inode); -+ /* Ugh. We need to insert new_inode into the same spot on the list -+ * as old_inode was, to ensure the in-memory orphan list is still -+ * in the same order as the on-disk orphan list (badness otherwise). -+ */ -+ nei->i_orphan = oei->i_orphan; -+ nei->i_orphan.next->prev = &nei->i_orphan; -+ nei->i_orphan.prev->next = &nei->i_orphan; -+ nei->i_state |= EXT3_STATE_DELETE; -+ up(&sbi->s_orphan_lock); -+ -+ clear_inode(old_inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_dentry)); -+ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ return; -+ -+out_delete: -+ ext3_delete_inode(old_inode); -+} -+#else -+#define ext3_start_delete_thread(sbi) do {} while(0) -+#define ext3_stop_delete_thread(sbi) do {} while(0) -+#endif /* EXT3_DELETE_THREAD */ -+ - void ext3_put_super (struct super_block * sb) - { - struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -405,6 +619,7 @@ void ext3_put_super (struct super_block - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ ext3_stop_delete_thread(sbi); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -453,7 +668,11 @@ static struct super_operations ext3_sops - write_inode: ext3_write_inode, /* BKL not held. Don't need */ - dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */ - put_inode: ext3_put_inode, /* BKL not held. Don't need */ -+#ifdef EXT3_DELETE_THREAD -+ delete_inode: ext3_delete_inode_thread,/* BKL not held. We take it */ -+#else - delete_inode: ext3_delete_inode, /* BKL not held. We take it */ -+#endif - put_super: ext3_put_super, /* BKL held */ - write_super: ext3_write_super, /* BKL held */ - sync_fs: ext3_sync_fs, -@@ -514,6 +733,14 @@ static int parse_options (char * options - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef EXT3_DELETE_THREAD -+ if (!strcmp(this_char, "asyncdel")) -+ set_opt(*mount_options, ASYNCDEL); -+ else if (!strcmp(this_char, "noasyncdel")) -+ clear_opt(*mount_options, ASYNCDEL); -+ else -+#endif -+ - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -1203,6 +1430,7 @@ struct super_block * ext3_read_super (st - } - - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ ext3_start_delete_thread(sb); - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock -@@ -1643,6 +1871,9 @@ int ext3_remount (struct super_block * s - if (!parse_options(data, &tmp, sbi, &tmp, 1)) - return -EINVAL; - -+ if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY)) -+ ext3_stop_delete_thread(sbi); -+ - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) - ext3_abort(sb, __FUNCTION__, "Abort forced by user"); - ---- linux-2.4.18-chaos/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18-2 2003-09-16 23:39:37.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/linux/ext3_fs.h 2003-09-16 23:42:34.000000000 +0400 -@@ -195,6 +195,7 @@ struct ext3_group_desc - */ - #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ - #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ -+#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */ - - /* - * ioctl commands -@@ -322,6 +323,7 @@ struct ext3_inode { - #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ -+#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -708,6 +710,9 @@ extern void ext3_discard_prealloc (struc - extern void ext3_dirty_inode(struct inode *); - extern int ext3_change_inode_journal_flag(struct inode *, int); - extern void ext3_truncate (struct inode *); -+#ifdef EXT3_DELETE_THREAD -+extern void ext3_truncate_thread(struct inode *inode); -+#endif - - /* ioctl.c */ - extern int ext3_ioctl (struct inode *, struct file *, unsigned int, ---- linux-2.4.18-chaos/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18-2 2003-09-16 23:42:33.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/linux/ext3_fs_sb.h 2003-09-16 23:42:34.000000000 +0400 -@@ -29,6 +29,8 @@ - - #define EXT3_MAX_GROUP_LOADED 32 - -+#define EXT3_DELETE_THREAD -+ - /* - * third extended-fs super-block data in memory - */ -@@ -76,6 +78,14 @@ struct ext3_sb_info { - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ - #endif -+#ifdef EXT3_DELETE_THREAD -+ spinlock_t s_delete_lock; -+ struct list_head s_delete_list; -+ unsigned long s_delete_blocks; -+ unsigned long s_delete_inodes; -+ wait_queue_head_t s_delete_thread_queue; -+ wait_queue_head_t s_delete_waiter_queue; -+#endif - }; - - #endif /* _LINUX_EXT3_FS_SB */ - -_ diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch deleted file mode 100644 index a6a64de..0000000 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch +++ /dev/null @@ -1,517 +0,0 @@ - -Create a service thread to handle delete and truncate of inodes, to avoid -long latency while truncating very large files. - - - fs/ext3/inode.c | 116 ++++++++++++++++++++++ - fs/ext3/super.c | 231 +++++++++++++++++++++++++++++++++++++++++++++ - include/linux/ext3_fs.h | 5 - include/linux/ext3_fs_sb.h | 10 + - 4 files changed, 362 insertions(+) - -Index: linux-2.4.18-chaos/fs/ext3/super.c -=================================================================== ---- linux-2.4.18-chaos.orig/fs/ext3/super.c 2004-01-13 15:39:03.000000000 +0300 -+++ linux-2.4.18-chaos/fs/ext3/super.c 2004-01-13 16:35:05.000000000 +0300 -@@ -398,6 +398,221 @@ - } - } - -+#ifdef EXT3_DELETE_THREAD -+/* -+ * Delete inodes in a loop until there are no more to be deleted. -+ * Normally, we run in the background doing the deletes and sleeping again, -+ * and clients just add new inodes to be deleted onto the end of the list. -+ * If someone is concerned about free space (e.g. block allocation or similar) -+ * then they can sleep on s_delete_waiter_queue and be woken up when space -+ * has been freed. -+ */ -+int ext3_delete_thread(void *data) -+{ -+ struct super_block *sb = data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct task_struct *tsk = current; -+ -+ /* Almost like daemonize, but not quite */ -+ exit_mm(current); -+ tsk->session = 1; -+ tsk->pgrp = 1; -+ tsk->tty = NULL; -+ exit_files(current); -+ reparent_to_init(); -+ -+ sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev)); -+ sigfillset(&tsk->blocked); -+ -+ /*tsk->flags |= PF_KERNTHREAD;*/ -+ -+ INIT_LIST_HEAD(&sbi->s_delete_list); -+ wake_up(&sbi->s_delete_waiter_queue); -+ ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev)); -+ -+ /* main loop */ -+ for (;;) { -+ wait_event_interruptible(sbi->s_delete_thread_queue, -+ !list_empty(&sbi->s_delete_list) || -+ !test_opt(sb, ASYNCDEL)); -+ ext3_debug("%s woken up: %lu inodes, %lu blocks\n", -+ tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks); -+ -+ spin_lock(&sbi->s_delete_lock); -+ if (list_empty(&sbi->s_delete_list)) { -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ memset(&sbi->s_delete_list, 0, -+ sizeof(sbi->s_delete_list)); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("delete thread on %s exiting\n", -+ kdevname(sb->s_dev)); -+ wake_up(&sbi->s_delete_waiter_queue); -+ break; -+ } -+ -+ while (!list_empty(&sbi->s_delete_list)) { -+ struct inode *inode=list_entry(sbi->s_delete_list.next, -+ struct inode, i_dentry); -+ unsigned long blocks = inode->i_blocks >> -+ (inode->i_blkbits - 9); -+ -+ list_del_init(&inode->i_dentry); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("%s delete ino %lu blk %lu\n", -+ tsk->comm, inode->i_ino, blocks); -+ -+ iput(inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ sbi->s_delete_blocks -= blocks; -+ sbi->s_delete_inodes--; -+ } -+ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) { -+ ext3_warning(sb, __FUNCTION__, -+ "%lu blocks, %lu inodes on list?\n", -+ sbi->s_delete_blocks,sbi->s_delete_inodes); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; -+ } -+ spin_unlock(&sbi->s_delete_lock); -+ wake_up(&sbi->s_delete_waiter_queue); -+ } -+ -+ return 0; -+} -+ -+static void ext3_start_delete_thread(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int rc; -+ -+ spin_lock_init(&sbi->s_delete_lock); -+ init_waitqueue_head(&sbi->s_delete_thread_queue); -+ init_waitqueue_head(&sbi->s_delete_waiter_queue); -+ -+ if (!test_opt(sb, ASYNCDEL)) -+ return; -+ -+ rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES); -+ if (rc < 0) -+ printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n", -+ rc); -+ else -+ wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next); -+} -+ -+static void ext3_stop_delete_thread(struct ext3_sb_info *sbi) -+{ -+ if (sbi->s_delete_list.next == 0) /* thread never started */ -+ return; -+ -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ wake_up(&sbi->s_delete_thread_queue); -+ wait_event(sbi->s_delete_waiter_queue, -+ sbi->s_delete_list.next == 0 && sbi->s_delete_inodes == 0); -+} -+ -+/* Instead of playing games with the inode flags, destruction, etc we just -+ * create a new inode locally and put it on a list for the truncate thread. -+ * We need large parts of the inode struct in order to complete the -+ * truncate and unlink, so we may as well just have a real inode to do it. -+ * -+ * If we have any problem deferring the delete, just delete it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * delete thread when we run out of space. -+ */ -+static void ext3_delete_inode_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); -+ struct inode *new_inode; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (is_bad_inode(old_inode)) { -+ clear_inode(old_inode); -+ return; -+ } -+ -+ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) -+ goto out_delete; -+ -+ /* We may want to delete the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS) -+ goto out_delete; -+ -+ /* We can't use the delete thread as-is during real orphan recovery, -+ * as we add to the orphan list here, causing ext3_orphan_cleanup() -+ * to loop endlessly. It would be nice to do so, but needs work. -+ */ -+ if (oei->i_state & EXT3_STATE_DELETE || -+ sbi->s_mount_state & EXT3_ORPHAN_FS) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ goto out_delete; -+ } -+ -+ /* We can iget this inode again here, because our caller has unhashed -+ * old_inode, so new_inode will be in a different inode struct. -+ * -+ * We need to ensure that the i_orphan pointers in the other inodes -+ * point at the new inode copy instead of the old one so the orphan -+ * list doesn't get corrupted when the old orphan inode is freed. -+ */ -+ down(&sbi->s_orphan_lock); -+ -+ sbi->s_mount_state |= EXT3_ORPHAN_FS; -+ new_inode = iget(old_inode->i_sb, old_inode->i_ino); -+ sbi->s_mount_state &= ~EXT3_ORPHAN_FS; -+ if (is_bad_inode(new_inode)) { -+ printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino); -+ iput(new_inode); -+ new_inode = NULL; -+ } -+ if (!new_inode) { -+ up(&sbi->s_orphan_lock); -+ ext3_debug("delete inode %lu directly (bad read)\n", -+ old_inode->i_ino); -+ goto out_delete; -+ } -+ J_ASSERT(new_inode != old_inode); -+ -+ J_ASSERT(!list_empty(&oei->i_orphan)); -+ -+ nei = EXT3_I(new_inode); -+ /* Ugh. We need to insert new_inode into the same spot on the list -+ * as old_inode was, to ensure the in-memory orphan list is still -+ * in the same order as the on-disk orphan list (badness otherwise). -+ */ -+ nei->i_orphan = oei->i_orphan; -+ nei->i_orphan.next->prev = &nei->i_orphan; -+ nei->i_orphan.prev->next = &nei->i_orphan; -+ nei->i_state |= EXT3_STATE_DELETE; -+ up(&sbi->s_orphan_lock); -+ -+ clear_inode(old_inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_dentry)); -+ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ return; -+ -+out_delete: -+ ext3_delete_inode(old_inode); -+} -+#else -+#define ext3_start_delete_thread(sbi) do {} while(0) -+#define ext3_stop_delete_thread(sbi) do {} while(0) -+#endif /* EXT3_DELETE_THREAD */ -+ - void ext3_put_super (struct super_block * sb) - { - struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -405,6 +620,8 @@ - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ J_ASSERT(sbi->s_delete_inodes == 0); -+ - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -453,7 +670,11 @@ - write_inode: ext3_write_inode, /* BKL not held. Don't need */ - dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */ - put_inode: ext3_put_inode, /* BKL not held. Don't need */ -+#ifdef EXT3_DELETE_THREAD -+ delete_inode: ext3_delete_inode_thread,/* BKL not held. We take it */ -+#else - delete_inode: ext3_delete_inode, /* BKL not held. We take it */ -+#endif - put_super: ext3_put_super, /* BKL held */ - write_super: ext3_write_super, /* BKL held */ - sync_fs: ext3_sync_fs, -@@ -514,6 +735,14 @@ - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef EXT3_DELETE_THREAD -+ if (!strcmp(this_char, "asyncdel")) -+ set_opt(*mount_options, ASYNCDEL); -+ else if (!strcmp(this_char, "noasyncdel")) -+ clear_opt(*mount_options, ASYNCDEL); -+ else -+#endif -+ - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -1209,6 +1438,7 @@ - } - - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ ext3_start_delete_thread(sb); - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock -@@ -1585,7 +1815,12 @@ - static int ext3_sync_fs(struct super_block *sb) - { - tid_t target; -- -+ -+ if (atomic_read(&sb->s_active) == 0) { -+ /* fs is being umounted: time to stop delete thread */ -+ ext3_stop_delete_thread(EXT3_SB(sb)); -+ } -+ - sb->s_dirt = 0; - target = log_start_commit(EXT3_SB(sb)->s_journal, NULL); - log_wait_commit(EXT3_SB(sb)->s_journal, target); -@@ -1649,6 +1884,9 @@ - if (!parse_options(data, &tmp, sbi, &tmp, 1)) - return -EINVAL; - -+ if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY)) -+ ext3_stop_delete_thread(sbi); -+ - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) - ext3_abort(sb, __FUNCTION__, "Abort forced by user"); - -Index: linux-2.4.18-chaos/fs/ext3/file.c -=================================================================== ---- linux-2.4.18-chaos.orig/fs/ext3/file.c 2003-07-28 17:52:04.000000000 +0400 -+++ linux-2.4.18-chaos/fs/ext3/file.c 2004-01-13 16:26:01.000000000 +0300 -@@ -121,7 +121,11 @@ - }; - - struct inode_operations ext3_file_inode_operations = { -+#ifdef EXT3_DELETE_THREAD -+ truncate: ext3_truncate_thread, /* BKL held */ -+#else - truncate: ext3_truncate, /* BKL held */ -+#endif - setattr: ext3_setattr, /* BKL held */ - }; - -Index: linux-2.4.18-chaos/fs/ext3/inode.c -=================================================================== ---- linux-2.4.18-chaos.orig/fs/ext3/inode.c 2004-01-13 15:39:03.000000000 +0300 -+++ linux-2.4.18-chaos/fs/ext3/inode.c 2004-01-13 16:26:01.000000000 +0300 -@@ -2041,6 +2041,118 @@ - return; /* AKPM: return what? */ - } - -+#ifdef EXT3_DELETE_THREAD -+/* Move blocks from to-be-truncated inode over to a new inode, and delete -+ * that one from the delete thread instead. This avoids a lot of latency -+ * when truncating large files. -+ * -+ * If we have any problem deferring the truncate, just truncate it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * delete thread when we run out of space. -+ */ -+void ext3_truncate_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); -+ struct inode *new_inode; -+ handle_t *handle; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) -+ goto out_truncate; -+ -+ /* XXX This is a temporary limitation for code simplicity. -+ * We could truncate to arbitrary sizes at some later time. -+ */ -+ if (old_inode->i_size != 0) -+ goto out_truncate; -+ -+ /* We may want to truncate the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || -+ old_inode->i_size > oei->i_disksize) -+ goto out_truncate; -+ -+ /* We can't use the delete thread as-is during real orphan recovery, -+ * as we add to the orphan list here, causing ext3_orphan_cleanup() -+ * to loop endlessly. It would be nice to do so, but needs work. -+ */ -+ if (oei->i_state & EXT3_STATE_DELETE || -+ sbi->s_mount_state & EXT3_ORPHAN_FS) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ goto out_truncate; -+ } -+ -+ ext3_discard_prealloc(old_inode); -+ -+ /* old_inode = 1 -+ * new_inode = sb + GDT + ibitmap -+ * orphan list = 1 inode/superblock for add, 2 inodes for del -+ * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS -+ */ -+ handle = ext3_journal_start(old_inode, 7); -+ if (IS_ERR(handle)) -+ goto out_truncate; -+ -+ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode); -+ if (IS_ERR(new_inode)) { -+ ext3_debug("truncate inode %lu directly (no new inodes)\n", -+ old_inode->i_ino); -+ goto out_journal; -+ } -+ -+ nei = EXT3_I(new_inode); -+ -+ down_write(&oei->truncate_sem); -+ new_inode->i_size = old_inode->i_size; -+ new_inode->i_blocks = old_inode->i_blocks; -+ new_inode->i_uid = old_inode->i_uid; -+ new_inode->i_gid = old_inode->i_gid; -+ new_inode->i_nlink = 0; -+ -+ /* FIXME when we do arbitrary truncates */ -+ old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0; -+ old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME; -+ -+ memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data)); -+ memset(oei->i_data, 0, sizeof(oei->i_data)); -+ -+ nei->i_disksize = oei->i_disksize; -+ nei->i_state |= EXT3_STATE_DELETE; -+ up_write(&oei->truncate_sem); -+ -+ if (ext3_orphan_add(handle, new_inode) < 0) -+ goto out_journal; -+ -+ if (ext3_orphan_del(handle, old_inode) < 0) { -+ ext3_orphan_del(handle, new_inode); -+ iput(new_inode); -+ goto out_journal; -+ } -+ -+ ext3_journal_stop(handle, old_inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_dentry)); -+ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ return; -+ -+out_journal: -+ ext3_journal_stop(handle, old_inode); -+out_truncate: -+ ext3_truncate(old_inode); -+} -+#endif /* EXT3_DELETE_THREAD */ -+ - /* - * ext3_get_inode_loc returns with an extra refcount against the - * inode's underlying buffer_head on success. -Index: linux-2.4.18-chaos/fs/buffer.c -=================================================================== ---- linux-2.4.18-chaos.orig/fs/buffer.c 2003-07-28 17:52:03.000000000 +0400 -+++ linux-2.4.18-chaos/fs/buffer.c 2004-01-13 16:34:43.000000000 +0300 -@@ -352,9 +352,9 @@ - lock_super(sb); - if (sb->s_dirt && sb->s_op && sb->s_op->write_super) - sb->s_op->write_super(sb); -+ unlock_super(sb); - if (sb->s_op && sb->s_op->sync_fs) - sb->s_op->sync_fs(sb); -- unlock_super(sb); - unlock_kernel(); - - return sync_buffers(dev, 1); -Index: linux-2.4.18-chaos/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.18-chaos.orig/include/linux/ext3_fs.h 2004-01-13 15:39:03.000000000 +0300 -+++ linux-2.4.18-chaos/include/linux/ext3_fs.h 2004-01-13 16:26:01.000000000 +0300 -@@ -190,6 +190,7 @@ - */ - #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ - #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ -+#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */ - - /* - * ioctl commands -@@ -317,6 +318,7 @@ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ - #define EXT3_MOUNT_INDEX 0x4000 /* Enable directory index */ -+#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -651,6 +653,9 @@ - extern void ext3_dirty_inode(struct inode *); - extern int ext3_change_inode_journal_flag(struct inode *, int); - extern void ext3_truncate (struct inode *); -+#ifdef EXT3_DELETE_THREAD -+extern void ext3_truncate_thread(struct inode *inode); -+#endif - - /* ioctl.c */ - extern int ext3_ioctl (struct inode *, struct file *, unsigned int, -Index: linux-2.4.18-chaos/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.4.18-chaos.orig/include/linux/ext3_fs_sb.h 2004-01-13 15:39:03.000000000 +0300 -+++ linux-2.4.18-chaos/include/linux/ext3_fs_sb.h 2004-01-13 16:26:01.000000000 +0300 -@@ -29,6 +29,8 @@ - - #define EXT3_MAX_GROUP_LOADED 32 - -+#define EXT3_DELETE_THREAD -+ - /* - * third extended-fs super-block data in memory - */ -@@ -74,6 +76,14 @@ - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ - #endif -+#ifdef EXT3_DELETE_THREAD -+ spinlock_t s_delete_lock; -+ struct list_head s_delete_list; -+ unsigned long s_delete_blocks; -+ unsigned long s_delete_inodes; -+ wait_queue_head_t s_delete_thread_queue; -+ wait_queue_head_t s_delete_waiter_queue; -+#endif - }; - - #endif /* _LINUX_EXT3_FS_SB */ diff --git a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.18-chaos.patch b/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.18-chaos.patch deleted file mode 100644 index b6dc0dd..0000000 --- a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.18-chaos.patch +++ /dev/null @@ -1,761 +0,0 @@ - fs/ext3/ialloc.c | 6 - fs/ext3/inode.c | 10 - fs/ext3/super.c | 4 - fs/ext3/xattr.c | 598 +++++++++++++++++++++++++++++++++++++++++++++- - include/linux/ext3_fs.h | 2 - include/linux/ext3_fs_i.h | 3 - 6 files changed, 613 insertions(+), 10 deletions(-) - ---- linux-2.4.18-chaos/fs/ext3/ialloc.c~ext3-ea-in-inode-2.4.18-chaos 2003-10-08 10:38:03.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/ialloc.c 2003-10-08 15:08:45.000000000 +0400 -@@ -586,6 +586,12 @@ repeat: - insert_inode_hash(inode); - inode->i_generation = sbi->s_next_generation++; - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) { -+ ei->i_extra_isize = sizeof(__u16) /* i_extra_isize */ -+ + sizeof(__u16); /* i_pad1 */ -+ } else -+ ei->i_extra_isize = 0; -+ - ei->i_state = EXT3_STATE_NEW; - err = ext3_get_inode_loc_new(inode, &iloc, 1); - if (err) goto fail; ---- linux-2.4.18-chaos/fs/ext3/inode.c~ext3-ea-in-inode-2.4.18-chaos 2003-10-08 10:38:03.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/inode.c 2003-10-08 15:08:45.000000000 +0400 -@@ -2459,6 +2459,11 @@ void ext3_read_inode(struct inode * inod - ei->i_data[block] = iloc.raw_inode->i_block[block]; - INIT_LIST_HEAD(&ei->i_orphan); - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); -+ else -+ ei->i_extra_isize = 0; -+ - brelse (iloc.bh); - - if (S_ISREG(inode->i_mode)) { -@@ -2523,6 +2528,8 @@ static int ext3_do_update_inode(handle_t - if (err) - goto out_brelse; - } -+ if (ei->i_state & EXT3_STATE_NEW) -+ memset(raw_inode, 0, EXT3_INODE_SIZE(inode->i_sb)); - raw_inode->i_mode = cpu_to_le16(inode->i_mode); - if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); -@@ -2606,6 +2613,9 @@ static int ext3_do_update_inode(handle_t - else for (block = 0; block < EXT3_N_BLOCKS; block++) - raw_inode->i_block[block] = ei->i_data[block]; - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); -+ - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - rc = ext3_journal_dirty_metadata(handle, bh); - if (!err) ---- linux-2.4.18-chaos/fs/ext3/xattr.c~ext3-ea-in-inode-2.4.18-chaos 2003-10-08 10:38:01.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/xattr.c 2003-10-12 16:16:44.000000000 +0400 -@@ -102,6 +102,9 @@ - static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, - struct ext3_xattr_header *); - -+int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *, -+ void *, size_t, int); -+ - #ifdef CONFIG_EXT3_FS_XATTR_SHARING - - static int ext3_xattr_cache_insert(struct buffer_head *); -@@ -362,17 +365,12 @@ ext3_removexattr(struct dentry *dentry, - } - - /* -- * ext3_xattr_get() -- * -- * Copy an extended attribute into the buffer -- * provided, or compute the buffer size required. -- * Buffer is NULL to compute the size of the buffer required. -+ * ext3_xattr_block_get() - * -- * Returns a negative error number on failure, or the number of bytes -- * used / required on success. -+ * routine looks for attribute in EA block and returns it's value and size - */ - int --ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ext3_xattr_block_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; -@@ -461,6 +459,94 @@ cleanup: - } - - /* -+ * ext3_xattr_ibode_get() -+ * -+ * routine looks for attribute in inode body and returns it's value and size -+ */ -+int -+ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int size, name_len = strlen(name), storage_size; -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOENT; -+ -+ ret = ext3_get_inode_loc(inode, &iloc); -+ if (ret) -+ return ret; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_get", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ goto found; -+ last = next; -+ } -+ -+ /* can't find EA */ -+ brelse(iloc.bh); -+ return -ENOENT; -+ -+found: -+ size = le32_to_cpu(last->e_value_size); -+ if (buffer) { -+ ret = -ERANGE; -+ if (buffer_size >= size) { -+ memcpy(buffer, start + le16_to_cpu(last->e_value_offs), -+ size); -+ ret = size; -+ } -+ } else -+ ret = size; -+ brelse(iloc.bh); -+ return ret; -+} -+ -+int ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int err; -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_get(inode, name_index, name, -+ buffer, buffer_size); -+ if (err < 0) -+ /* search was unsuccessful, try to find EA in dedicated block */ -+ err = ext3_xattr_block_get(inode, name_index, name, -+ buffer, buffer_size); -+ return err; -+} -+ -+/* - * ext3_xattr_list() - * - * Copy a list of attribute names into the buffer -@@ -471,7 +557,7 @@ cleanup: - * used / required on success. - */ - int --ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; - struct ext3_xattr_entry *entry; -@@ -547,6 +633,131 @@ cleanup: - return error; - } - -+/* ext3_xattr_ibody_list() -+ * -+ * generate list of attributes stored in inode body -+ */ -+int -+ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ char *start, *end, *buf; -+ struct ext3_iloc iloc; -+ int storage_size; -+ int ret; -+ int size = 0; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return 0; -+ -+ ret = ext3_get_inode_loc(inode, &iloc); -+ if (ret) -+ return ret; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return 0; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_list", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ -+ if (!buffer) { -+ ret = size; -+ goto cleanup; -+ } else { -+ ret = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ last = (struct ext3_xattr_entry *) start; -+ buf = buffer; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ ret = size; -+cleanup: -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ int error; -+ int size = buffer_size; -+ -+ /* get list of attributes stored in inode body */ -+ error = ext3_xattr_ibody_list(inode, buffer, buffer_size); -+ if (error < 0) { -+ /* some error occured while collecting -+ * attributes in inode body */ -+ size = 0; -+ goto cleanup; -+ } -+ size = error; -+ -+ /* get list of attributes stored in dedicated block */ -+ if (buffer) { -+ buffer_size -= error; -+ if (buffer_size <= 0) { -+ buffer = NULL; -+ buffer_size = 0; -+ } else -+ buffer += error; -+ } -+ -+ error = ext3_xattr_block_list(inode, buffer, buffer_size); -+ if (error < 0) -+ /* listing was successful, so we return len */ -+ size = 0; -+ -+cleanup: -+ return error + size; -+} -+ - /* - * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is - * not set, set it. -@@ -570,6 +781,279 @@ static void ext3_xattr_update_super_bloc - } - - /* -+ * ext3_xattr_ibody_find() -+ * -+ * search attribute and calculate free space in inode body -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_ibody_find(struct inode *inode, int name_index, -+ const char *name, struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return ret; -+ -+ err = ext3_get_inode_loc(inode, &iloc); -+ if (err) -+ return -EIO; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ *free = storage_size - sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_find", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) { -+ memcpy(rentry, last, sizeof(struct ext3_xattr_entry)); -+ ret = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(last->e_name_len); -+ *free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_block_find() -+ * -+ * search attribute and calculate free space in EA block (if it allocated) -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_block_find(struct inode *inode, int name_index, const char *name, -+ struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ char *end; -+ int name_len, error = -ENOENT; -+ -+ if (!EXT3_I(inode)->i_file_acl) { -+ *free = inode->i_sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - -+ sizeof(__u32); -+ return -ENOENT; -+ } -+ ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); -+ bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, -+ EXT3_I(inode)->i_file_acl); -+ brelse(bh); -+ return -EIO; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ *free = bh->b_size - sizeof(__u32); -+ -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) { -+ memcpy(rentry, entry, sizeof(struct ext3_xattr_entry)); -+ error = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(entry->e_name_len); -+ *free -= le32_to_cpu(entry->e_value_size); -+ } -+ entry = next; -+ } -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_inode_set() -+ * -+ * this routine add/remove/replace attribute in inode body -+ */ -+int -+ext3_xattr_ibody_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ struct ext3_xattr_entry *last, *next, *here = NULL; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int esize = EXT3_XATTR_LEN(name_len); -+ struct buffer_head *bh; -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ int free, min_offs; -+ char *start, *end; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOSPC; -+ -+ err = ext3_get_inode_loc(inode, &iloc); -+ if (err) -+ return err; -+ raw_inode = iloc.raw_inode; -+ bh = iloc.bh; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if ((*(__u32*) start) != EXT3_XATTR_MAGIC) { -+ /* inode had no attributes before */ -+ *((__u32*) start) = cpu_to_le32(EXT3_XATTR_MAGIC); -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ min_offs = storage_size; -+ free = storage_size - sizeof(__u32); -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_set", -+ "inode %ld", inode->i_ino); -+ brelse(bh); -+ return -EIO; -+ } -+ -+ if (last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ here = last; -+ else { -+ /* we calculate all but our attribute -+ * because it will be removed before changing */ -+ free -= EXT3_XATTR_LEN(last->e_name_len); -+ free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ if (value && (esize + value_len > free)) { -+ brelse(bh); -+ return -ENOSPC; -+ } -+ -+ err = ext3_reserve_inode_write(handle, inode, &iloc); -+ if (err) { -+ brelse(bh); -+ return err; -+ } -+ -+ if (here) { -+ /* time to remove old value */ -+ struct ext3_xattr_entry *e; -+ int size = le32_to_cpu(here->e_value_size); -+ int border = le16_to_cpu(here->e_value_offs); -+ char *src; -+ -+ /* move tail */ -+ memmove(start + min_offs + size, start + min_offs, -+ border - min_offs); -+ -+ /* recalculate offsets */ -+ e = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(e)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(e); -+ int offs = le16_to_cpu(e->e_value_offs); -+ if (offs < border) -+ e->e_value_offs = -+ cpu_to_le16(offs + size); -+ e = next; -+ } -+ min_offs += size; -+ -+ /* remove entry */ -+ border = EXT3_XATTR_LEN(here->e_name_len); -+ src = (char *) here + EXT3_XATTR_LEN(here->e_name_len); -+ size = (char *) last - src; -+ if ((char *) here + size > end) -+ printk("ALERT at %s:%d: 0x%p + %d > 0x%p\n", -+ __FILE__, __LINE__, here, size, end); -+ memmove(here, src, size); -+ last = (struct ext3_xattr_entry *) ((char *) last - border); -+ *((__u32 *) last) = 0; -+ } -+ -+ if (value) { -+ int offs = min_offs - value_len; -+ /* use last to create new entry */ -+ last->e_name_len = strlen(name); -+ last->e_name_index = name_index; -+ last->e_value_offs = cpu_to_le16(offs); -+ last->e_value_size = cpu_to_le32(value_len); -+ last->e_hash = last->e_value_block = 0; -+ memset(last->e_name, 0, esize); -+ memcpy(last->e_name, name, last->e_name_len); -+ if (start + offs + value_len > end) -+ printk("ALERT at %s:%d: 0x%p + %d + %d > 0x%p\n", -+ __FILE__, __LINE__, start, offs, -+ value_len, end); -+ memcpy(start + offs, value, value_len); -+ last = EXT3_XATTR_NEXT(last); -+ *((__u32 *) last) = 0; -+ } -+ -+ ext3_mark_iloc_dirty(handle, inode, &iloc); -+ brelse(bh); -+ -+ return 0; -+} -+ -+/* - * ext3_xattr_set() - * - * Create, replace or remove an extended attribute for this inode. Buffer -@@ -583,6 +1067,101 @@ static void ext3_xattr_update_super_bloc - */ - int - ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, void *value, size_t value_len, int flags) -+{ -+ struct ext3_xattr_entry entry; -+ int err, where = 0, found = 0, total; -+ int free1 = -1, free2 = -1; -+ int name_len; -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > inode->i_sb->s_blocksize) -+ return -ERANGE; -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_find(inode, name_index, name, &entry, &free1); -+ if (err == 0) { -+ /* found EA in inode */ -+ found = 1; -+ where = 0; -+ } else if (err == -ENOENT) { -+ /* there is no such attribute in inode body */ -+ /* try to find attribute in dedicated block */ -+ err = ext3_xattr_block_find(inode, name_index, name, -+ &entry, &free2); -+ if (err != 0 && err != -ENOENT) { -+ /* not found EA in block */ -+ goto finish; -+ } else if (err == 0) { -+ /* found EA in block */ -+ where = 1; -+ found = 1; -+ } -+ } else -+ goto finish; -+ -+ /* check flags: may replace? may create ? */ -+ if (found && (flags & XATTR_CREATE)) { -+ err = -EEXIST; -+ goto finish; -+ } else if (!found && (flags & XATTR_REPLACE)) { -+ err = -ENODATA; -+ goto finish; -+ } -+ -+ /* check if we have enough space to store attribute */ -+ total = EXT3_XATTR_LEN(strlen(name)) + value_len; -+ if (free1 >= 0 && total > free1 && free2 >= 0 && total > free2) { -+ /* have no enough space */ -+ err = -ENOSPC; -+ goto finish; -+ } -+ -+ /* time to remove attribute */ -+ if (found) { -+ if (where == 0) { -+ /* EA is stored in inode body */ -+ ext3_xattr_ibody_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } else { -+ /* EA is stored in separated block */ -+ ext3_xattr_block_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } -+ } -+ -+ /* try to store EA in inode body */ -+ err = ext3_xattr_ibody_set(handle, inode, name_index, name, -+ value, value_len, flags); -+ if (err) { -+ /* can't store EA in inode body */ -+ /* try to store in block */ -+ err = ext3_xattr_block_set(handle, inode, name_index, -+ name, value, value_len, flags); -+ } -+ -+finish: -+ return err; -+} -+ -+/* -+ * ext3_xattr_block_set() -+ * -+ * this routine add/remove/replace attribute in EA block -+ */ -+int -+ext3_xattr_block_set(handle_t *handle, struct inode *inode, int name_index, - const char *name, void *value, size_t value_len, int flags) - { - struct super_block *sb = inode->i_sb; -@@ -619,6 +1197,7 @@ ext3_xattr_set(handle_t *handle, struct - name_len = strlen(name); - if (name_len > 255 || value_len > sb->s_blocksize) - return -ERANGE; -+ - ext3_xattr_lock(); - - if (EXT3_I(inode)->i_file_acl) { -@@ -819,6 +1398,7 @@ cleanup: - brelse(bh); - if (!(bh && header == HDR(bh))) - kfree(header); -+ - ext3_xattr_unlock(); - - return error; ---- linux-2.4.18-chaos/include/linux/ext3_fs.h~ext3-ea-in-inode-2.4.18-chaos 2003-10-08 10:38:03.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/linux/ext3_fs.h 2003-10-08 15:08:45.000000000 +0400 -@@ -264,6 +264,8 @@ struct ext3_inode { - __u32 m_i_reserved2[2]; - } masix2; - } osd2; /* OS dependent 2 */ -+ __u16 i_extra_isize; -+ __u16 i_pad1; - }; - - #define i_size_high i_dir_acl ---- linux-2.4.18-chaos/include/linux/ext3_fs_i.h~ext3-ea-in-inode-2.4.18-chaos 2003-10-08 10:38:03.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/linux/ext3_fs_i.h 2003-10-08 15:08:45.000000000 +0400 -@@ -62,6 +62,9 @@ struct ext3_inode_info { - */ - loff_t i_disksize; - -+ /* on-disk additional length */ -+ __u16 i_extra_isize; -+ - /* - * truncate_sem is for serialising ext3_truncate() against - * ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's ---- linux-2.4.18-chaos/fs/ext3/super.c~ext3-ea-in-inode-2.4.18-chaos 2003-10-08 10:38:03.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/super.c 2003-10-08 15:08:45.000000000 +0400 -@@ -1292,7 +1292,9 @@ struct super_block * ext3_read_super (st - } else { - sbi->s_inode_size = le16_to_cpu(es->s_inode_size); - sbi->s_first_ino = le32_to_cpu(es->s_first_ino); -- if (sbi->s_inode_size != EXT3_GOOD_OLD_INODE_SIZE) { -+ if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || -+ (sbi->s_inode_size & (sbi->s_inode_size - 1)) || -+ (sbi->s_inode_size > blocksize)) { - printk (KERN_ERR - "EXT3-fs: unsupported inode size: %d\n", - sbi->s_inode_size); - -_ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.18-chaos-pdirops.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.18-chaos-pdirops.patch deleted file mode 100644 index 592af93..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.18-chaos-pdirops.patch +++ /dev/null @@ -1,1891 +0,0 @@ - fs/ext3/Makefile | 3 - fs/ext3/extents.c | 1624 +++++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/ialloc.c | 4 - fs/ext3/inode.c | 30 - fs/ext3/super.c | 8 - include/linux/ext3_fs.h | 18 - include/linux/ext3_fs_i.h | 4 - include/linux/ext3_fs_sb.h | 10 - 8 files changed, 1693 insertions(+), 8 deletions(-) - ---- /dev/null 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.18-chaos-pdirops-alexey/fs/ext3/extents.c 2003-09-23 18:09:30.000000000 +0400 -@@ -0,0 +1,1624 @@ -+/* -+ * -+ * linux/fs/ext3/extents.c -+ * -+ * Extents support for EXT3 -+ * -+ * 07/08/2003 Alex Tomas -+ * -+ * TODO: -+ * - ext3*_error() should be used in some situations -+ * - find_goal() [to be tested and improved] -+ * - error handling -+ * - we could leak allocated block in some error cases -+ * - quick search for index/leaf in ext3_ext_find_extent() -+ * - tree reduction -+ * - cache last found extent -+ * - arch-independent -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks -+ * become very little, so index split, in-depth growing and -+ * other hard changes happens much more often -+ * this is for debug purposes only -+ */ -+#define AGRESSIVE_TEST_ -+ -+/* -+ * if EXT_DEBUG defined you can use 'extdebug' mount option -+ * to get lots of info what's going on -+ */ -+#define EXT_DEBUG -+#ifdef EXT_DEBUG -+#define ext_debug(inode,fmt,a...) \ -+do { \ -+ if (test_opt((inode)->i_sb, EXTDEBUG)) \ -+ printk(fmt, ##a); \ -+} while (0); -+#else -+#define ext_debug(inode,fmt,a...) -+#endif -+ -+#define EXT3_ALLOC_NEEDED 2 /* block bitmap + group descriptor */ -+ -+/* -+ * ext3_inode has i_block array (total 60 bytes) -+ * first 4 bytes are used to store: -+ * - tree depth (0 mean there is no tree yet. all extents in the inode) -+ * - number of alive extents in the inode -+ */ -+ -+/* -+ * this is extent on-disk structure -+ * it's used at the bottom of the tree -+ */ -+struct ext3_extent { -+ __u32 e_block; /* first logical block extent covers */ -+ __u32 e_start; /* first physical block extents lives */ -+ __u32 e_num; /* number of blocks covered by extent */ -+}; -+ -+/* -+ * this is index on-disk structure -+ * it's used at all the levels, but the bottom -+ */ -+struct ext3_extent_idx { -+ __u32 e_block; /* index covers logical blocks from 'block' */ -+ __u32 e_leaf; /* pointer to the physical block of the next * -+ * level. leaf or next index could bet here */ -+}; -+ -+/* -+ * each block (leaves and indexes), even inode-stored has header -+ */ -+struct ext3_extent_header { -+ __u16 e_num; /* number of valid entries */ -+ __u16 e_max; /* capacity of store in entries */ -+}; -+ -+/* -+ * array of ext3_ext_path contains path to some extent -+ * creation/lookup routines use it for traversal/splitting/etc -+ * truncate uses it to simulate recursive walking -+ */ -+struct ext3_ext_path { -+ __u32 p_block; -+ __u16 p_depth; -+ struct ext3_extent *p_ext; -+ struct ext3_extent_idx *p_idx; -+ struct ext3_extent_header *p_hdr; -+ struct buffer_head *p_bh; -+}; -+ -+#define EXT_FIRST_EXTENT(__hdr__) \ -+ ((struct ext3_extent *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_FIRST_INDEX(__hdr__) \ -+ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_HAS_FREE_INDEX(__path__) \ -+ ((__path__)->p_hdr->e_num < (__path__)->p_hdr->e_max) -+#define EXT_LAST_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->e_num - 1) -+#define EXT_LAST_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->e_num - 1) -+#define EXT_MAX_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->e_max - 1) -+#define EXT_MAX_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->e_max - 1) -+ -+ -+#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ */ -+static int ext3_ext_get_access(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ if (path->p_bh) { -+ /* path points to block */ -+ return ext3_journal_get_write_access(handle, path->p_bh); -+ } -+ -+ /* path points to leaf/index in inode body */ -+ return 0; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ * - EIO -+ */ -+static int ext3_ext_dirty(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ if (path->p_bh) { -+ /* path points to block */ -+ return ext3_journal_dirty_metadata(handle, path->p_bh); -+ } -+ -+ /* path points to leaf/index in inode body */ -+ return ext3_mark_inode_dirty(handle, inode); -+} -+ -+static inline int ext3_ext_space_block(struct inode *inode) -+{ -+ int size; -+ -+ size = (inode->i_sb->s_blocksize - sizeof(struct ext3_extent_header)) -+ / sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 6; /* FIXME: for debug, remove this line */ -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_inode(struct inode *inode) -+{ -+ int size; -+ -+ size = (sizeof(EXT3_I(inode)->i_data) - -+ sizeof(struct ext3_extent_header)) -+ / sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 3; /* FIXME: for debug, remove this line */ -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_inode_idx(struct inode *inode) -+{ -+ int size; -+ -+ size = (sizeof(EXT3_I(inode)->i_data) - -+ sizeof(struct ext3_extent_header)) -+ / sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 4; /* FIXME: for debug, remove this line */ -+#endif -+ return size; -+} -+ -+static void ext3_ext_show_path(struct inode *inode, struct ext3_ext_path *path) -+{ -+ int k, l = path->p_depth; -+ -+ ext_debug(inode, "path:"); -+ for (k = 0; k <= l; k++, path++) { -+ if (path->p_idx) { -+ ext_debug(inode, " %d->%d", path->p_idx->e_block, -+ path->p_idx->e_leaf); -+ } else if (path->p_ext) { -+ ext_debug(inode, " %d:%d:%d", -+ path->p_ext->e_block, -+ path->p_ext->e_start, -+ path->p_ext->e_num); -+ } else -+ ext_debug(inode, " []"); -+ } -+ ext_debug(inode, "\n"); -+} -+ -+static void ext3_ext_show_leaf(struct inode *inode, struct ext3_ext_path *path) -+{ -+ int depth = EXT3_I(inode)->i_depth; -+ struct ext3_extent_header *eh = path[depth].p_hdr; -+ struct ext3_extent *ex = EXT_FIRST_EXTENT(eh); -+ int i; -+ -+ for (i = 0; i < eh->e_num; i++, ex++) { -+ ext_debug(inode, "%d:%d:%d ", -+ ex->e_block, ex->e_start, ex->e_num); -+ } -+ ext_debug(inode, "\n"); -+} -+ -+static void ext3_ext_drop_refs(struct inode *inode, struct ext3_ext_path *path) -+{ -+ int depth = path->p_depth; -+ int i; -+ -+ for (i = 0; i <= depth; i++, path++) -+ if (path->p_bh) { -+ brelse(path->p_bh); -+ path->p_bh = NULL; -+ } -+} -+ -+static int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ int depth; -+ -+ if (path) { -+ depth = path->p_depth; -+ /* try to find previous block */ -+ if (path[depth].p_ext) -+ return path[depth].p_ext->e_start + -+ path[depth].p_ext->e_num - 1; -+ -+ /* it looks index is empty -+ * try to find starting from index itself */ -+ if (path[depth].p_bh) -+ return path[depth].p_bh->b_blocknr; -+ } -+ -+ /* OK. use inode's group */ -+ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ return bg_start + colour; -+} -+ -+static struct ext3_ext_path * -+ext3_ext_find_extent(struct inode *inode, int block, struct ext3_ext_path *path) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ struct ext3_extent_header *eh = (void *) ei->i_data; -+ struct ext3_extent_idx *ix; -+ struct buffer_head *bh; -+ struct ext3_extent *ex; -+ int depth, i, k, ppos = 0, prev = 0; -+ -+ eh = (struct ext3_extent_header *) ei->i_data; -+ -+ /* initialize capacity of leaf in inode for first time */ -+ if (eh->e_max == 0) -+ eh->e_max = ext3_ext_space_inode(inode); -+ i = depth = ei->i_depth; -+ EXT_ASSERT(i == 0 || eh->e_num > 0); -+ -+ /* account possible depth increase */ -+ if (!path) { -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), -+ GFP_NOFS); -+ if (!path) -+ return ERR_PTR(-ENOMEM); -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ -+ /* walk through the tree */ -+ while (i) { -+ ext_debug(inode, "depth %d: num %d, max %d\n", -+ ppos, eh->e_num, eh->e_max); -+ ix = EXT_FIRST_INDEX(eh); -+ if (eh->e_num) { -+ EXT_ASSERT(prev == 0 || ix->e_block == prev); -+ path[ppos].p_idx = ix; -+ } -+ EXT_ASSERT(eh->e_num <= eh->e_max); -+ for (k = 0; k < eh->e_num; k++, ix++) { -+ ext_debug(inode, "index: %d -> %d\n", -+ ix->e_block, ix->e_leaf); -+ EXT_ASSERT((k == 0 && prev <= (int)ix->e_block) || -+ (k > 0 && prev < (int)ix->e_block)); -+ if (block < ix->e_block) -+ break; -+ prev = ix->e_block; -+ path[ppos].p_idx = ix; -+ } -+ path[ppos].p_block = path[ppos].p_idx->e_leaf; -+ path[ppos].p_depth = i; -+ path[ppos].p_hdr = eh; -+ path[ppos].p_ext = NULL; -+ -+ bh = sb_bread(inode->i_sb, path[ppos].p_block); -+ if (!bh) { -+ ext3_ext_drop_refs(inode, path); -+ kfree(path); -+ return ERR_PTR(-EIO); -+ } -+ eh = (struct ext3_extent_header *) bh->b_data; -+ ppos++; -+ EXT_ASSERT(ppos <= depth); -+ path[ppos].p_bh = bh; -+ i--; -+ } -+ -+ path[ppos].p_depth = i; -+ path[ppos].p_hdr = eh; -+ path[ppos].p_ext = NULL; -+ -+ /* find extent */ -+ ex = EXT_FIRST_EXTENT(eh); -+ if (eh->e_num) -+ path[ppos].p_ext = ex; -+ EXT_ASSERT(eh->e_num <= eh->e_max); -+ for (k = 0; k < eh->e_num; k++, ex++) { -+ EXT_ASSERT(ex->e_num < EXT3_BLOCKS_PER_GROUP(inode->i_sb)); -+ EXT_ASSERT((k == 0 && prev <= (int)ex->e_block) || -+ (k > 0 && prev < (int)ex->e_block)); -+ if (block < ex->e_block) -+ break; -+ prev = ex->e_block; -+ path[ppos].p_ext = ex; -+ } -+ -+ ext3_ext_show_path(inode, path); -+ -+ return path; -+} -+ -+static void ext3_ext_check_boundary(struct inode *inode, -+ struct ext3_ext_path *curp, -+ void *addr, int len) -+{ -+ void *end; -+ -+ if (!len) -+ return; -+ if (curp->p_bh) -+ end = (void *) curp->p_hdr + inode->i_sb->s_blocksize; -+ else -+ end = (void *) curp->p_hdr + sizeof(EXT3_I(inode)->i_data); -+ if (((unsigned long) addr) + len > (unsigned long) end) { -+ printk("overflow! 0x%p > 0x%p\n", addr + len, end); -+ BUG(); -+ } -+ if ((unsigned long) addr < (unsigned long) curp->p_hdr) { -+ printk("underflow! 0x%p < 0x%p\n", addr, curp->p_hdr); -+ BUG(); -+ } -+} -+ -+/* -+ * insert new index [logical;ptr] into the block at cupr -+ * it check where to insert: before curp or after curp -+ */ -+static int ext3_ext_insert_index(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *curp, int logical, -+ int ptr) -+{ -+ struct ext3_extent_idx *ix; -+ int len, err; -+ -+ if ((err = ext3_ext_get_access(handle, inode, curp))) -+ return err; -+ -+ EXT_ASSERT(logical != curp->p_idx->e_block); -+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; -+ if (logical > curp->p_idx->e_block) { -+ /* insert after */ -+ len = (len - 1) * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(inode, "insert new index %d after: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ (curp->p_idx + 1), (curp->p_idx + 2)); -+ -+ ext3_ext_check_boundary(inode, curp, curp->p_idx + 2, len); -+ memmove(curp->p_idx + 2, curp->p_idx + 1, len); -+ ix = curp->p_idx + 1; -+ } else { -+ /* insert before */ -+ len = len * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(inode, "insert new index %d before: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ curp->p_idx, (curp->p_idx + 1)); -+ -+ ext3_ext_check_boundary(inode, curp, curp->p_idx + 1, len); -+ memmove(curp->p_idx + 1, curp->p_idx, len); -+ ix = curp->p_idx; -+ } -+ -+ ix->e_block = logical; -+ ix->e_leaf = ptr; -+ curp->p_hdr->e_num++; -+ -+ err = ext3_ext_dirty(handle, inode, curp); -+ ext3_std_error(inode->i_sb, err); -+ -+ return err; -+} -+ -+/* -+ * routine inserts new subtree into the path, using free index entry -+ * at depth 'at: -+ * - allocates all needed blocks (new leaf and all intermediate index blocks) -+ * - makes decision where to split -+ * - moves remaining extens and index entries (right to the split point) -+ * into the newly allocated blocks -+ * - initialize subtree -+ */ -+static int ext3_ext_split(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext, int at) -+{ -+ struct buffer_head *bh = NULL; -+ int depth = EXT3_I(inode)->i_depth; -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct ext3_extent *ex; -+ int i = at, k, m, a; -+ long newblock, oldblock, border; -+ int *ablocks = NULL; /* array of allocated blocks */ -+ int err = 0; -+ -+ /* make decision: where to split? */ -+ /* FIXME: now desicion is simplest: at current extent */ -+ -+ /* if current leaf will be splitted, then we should use -+ * border from split point */ -+ -+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ border = path[depth].p_ext[1].e_block; -+ ext_debug(inode, "leaf will be splitted." -+ " next leaf starts at %d\n", -+ (int)border); -+ } else { -+ border = newext->e_block; -+ ext_debug(inode, "leaf will be added." -+ " next leaf starts at %d\n", -+ (int)border); -+ } -+ -+ /* -+ * if error occurs, then we break processing -+ * and turn filesystem read-only. so, index won't -+ * be inserted and tree will be in consistent -+ * state. next mount will repair buffers too -+ */ -+ -+ /* -+ * get array to track all allocated blocks -+ * we need this to handle errors and free blocks -+ * upon them -+ */ -+ ablocks = kmalloc(sizeof(long) * depth, GFP_NOFS); -+ if (!ablocks) -+ return -ENOMEM; -+ memset(ablocks, 0, sizeof(long) * depth); -+ -+ /* allocate all needed blocks */ -+ ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at); -+ newblock = 0; /* FIXME: something more sophisticated needed here */ -+ for (a = 0; newext->e_num > 0 && a < depth - at; a++) { -+ newblock = ablocks[a] = newext->e_start++; -+ newext->e_num--; -+ } -+ for (; a < depth - at; a++) { -+ newblock = ext3_new_block(handle, inode, -+ newblock + 1, 0, 0, &err); -+ if (newblock == 0) -+ goto cleanup; -+ ablocks[a] = newblock; -+ } -+ -+ /* initialize new leaf */ -+ newblock = ablocks[--a]; -+ EXT_ASSERT(newblock); -+ bh = sb_getblk(inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = (struct ext3_extent_header *) bh->b_data; -+ neh->e_num = 0; -+ neh->e_max = ext3_ext_space_block(inode); -+ ex = EXT_FIRST_EXTENT(neh); -+ -+ /* move remain of path[depth] to the new leaf */ -+ EXT_ASSERT(path[depth].p_hdr->e_num == -+ path[depth].p_hdr->e_max); -+ /* start copy from next extent */ -+ /* TODO: we could do it by single memmove */ -+ m = 0; -+ path[depth].p_ext++; -+ while (path[depth].p_ext <= -+ EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ ext_debug(inode, "move %d:%d:%d in new leaf\n", -+ path[depth].p_ext->e_block, -+ path[depth].p_ext->e_start, -+ path[depth].p_ext->e_num); -+ memmove(ex++, path[depth].p_ext++, -+ sizeof(struct ext3_extent)); -+ neh->e_num++; -+ m++; -+ } -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old leaf */ -+ if (m) { -+ if ((err = ext3_ext_get_access(handle, inode, path))) -+ goto cleanup; -+ path[depth].p_hdr->e_num -= m; -+ if ((err = ext3_ext_dirty(handle, inode, path))) -+ goto cleanup; -+ -+ } -+ -+ /* create intermediate indexes */ -+ k = depth - at - 1; -+ EXT_ASSERT(k >= 0); -+ if (k) -+ ext_debug(inode, -+ "create %d intermediate indices\n", k); -+ /* insert new index into current index block */ -+ /* current depth stored in i var */ -+ i = depth - 1; -+ while (k--) { -+ oldblock = newblock; -+ newblock = ablocks[--a]; -+ bh = sb_getblk(inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = (struct ext3_extent_header *) bh->b_data; -+ neh->e_num = 1; -+ neh->e_max = ext3_ext_space_block(inode); -+ fidx = EXT_FIRST_INDEX(neh); -+ fidx->e_block = border; -+ fidx->e_leaf = oldblock; -+ -+ ext_debug(inode, -+ "int.index at %d (block %u): %d -> %d\n", -+ i, (unsigned) newblock, -+ (int) border, -+ (int) oldblock); -+ /* copy indexes */ -+ m = 0; -+ path[i].p_idx++; -+ ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx, -+ EXT_MAX_INDEX(path[i].p_hdr)); -+ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == -+ EXT_LAST_INDEX(path[i].p_hdr)); -+ while (path[i].p_idx <= -+ EXT_MAX_INDEX(path[i].p_hdr)) { -+ ext_debug(inode, "%d: move %d:%d in new index\n", -+ i, path[i].p_idx->e_block, -+ path[i].p_idx->e_leaf); -+ memmove(++fidx, path[i].p_idx++, -+ sizeof(struct ext3_extent_idx)); -+ neh->e_num++; -+ m++; -+ } -+ -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old index */ -+ if (m) { -+ err = ext3_ext_get_access(handle,inode,path+i); -+ if (err) -+ goto cleanup; -+ path[i].p_hdr->e_num -= m; -+ err = ext3_ext_dirty(handle, inode, path + i); -+ if (err) -+ goto cleanup; -+ } -+ -+ i--; -+ } -+ -+ /* insert new index */ -+ if (!err) -+ err = ext3_ext_insert_index(handle, inode, path + at, -+ border, newblock); -+ -+cleanup: -+ if (bh) { -+ if (buffer_locked(bh)) -+ unlock_buffer(bh); -+ brelse(bh); -+ } -+ -+ if (err) { -+ /* free all allocated blocks in error case */ -+ for (i = 0; i < depth; i++) -+ if (!ablocks[i]) -+ continue; -+ ext3_free_blocks(handle, inode, ablocks[i], 1); -+ } -+ kfree(ablocks); -+ -+ return err; -+} -+ -+/* -+ * routine implements tree growing procedure: -+ * - allocates new block -+ * - moves top-level data (index block or leaf) into the new block -+ * - initialize new top-level, creating index that points to the -+ * just created block -+ */ -+static int ext3_ext_grow_indepth(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct buffer_head *bh; -+ struct ext3_ext_path *curp = path; -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ int len, err = 0; -+ long newblock; -+ -+ /* -+ * use already allocated by the called block for new root block -+ */ -+ newblock = newext->e_start++; -+ if (newext->e_num == 0) { -+ /* -+ * FIXME: if this may happen, then we have to handle -+ * possible error and free allocated block -+ */ -+ printk("grow_indepth with zero blocks\n"); -+ newblock = ext3_new_block(handle, inode, -+ newblock, 0, 0, &err); -+ } else -+ newext->e_num--; -+ -+ bh = sb_getblk(inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ ext3_std_error(inode->i_sb, err); -+ return err; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) { -+ unlock_buffer(bh); -+ goto out; -+ } -+ -+ /* move top-level index/leaf into new block */ -+ len = sizeof(struct ext3_extent_header) + -+ sizeof(struct ext3_extent) * curp->p_hdr->e_max; -+ EXT_ASSERT(len >= 0 && len < 4096); -+ memmove(bh->b_data, curp->p_hdr, len); -+ -+ /* set size of new block */ -+ neh = (struct ext3_extent_header *) bh->b_data; -+ neh->e_max = ext3_ext_space_block(inode); -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto out; -+ -+ /* create index in new top-level index: num,max,pointer */ -+ if ((err = ext3_ext_get_access(handle, inode, curp))) -+ goto out; -+ -+ curp->p_hdr->e_max = ext3_ext_space_inode_idx(inode); -+ curp->p_hdr->e_num = 1; -+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); -+ curp->p_idx->e_block = EXT_FIRST_EXTENT(path[0].p_hdr)->e_block; -+ curp->p_idx->e_leaf = newblock; -+ -+ neh = (struct ext3_extent_header *) EXT3_I(inode)->i_data; -+ fidx = EXT_FIRST_INDEX(neh); -+ ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %d\n", -+ neh->e_num, neh->e_max, fidx->e_block, fidx->e_leaf); -+ -+ EXT3_I(inode)->i_depth++; -+ err = ext3_ext_dirty(handle, inode, curp); -+out: -+ brelse(bh); -+ -+ return err; -+} -+ -+/* -+ * routine finds empty index and adds new leaf. if no free index found -+ * then it requests in-depth growing -+ */ -+static int ext3_ext_create_new_leaf(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ long newblock = newext->e_start; -+ struct ext3_ext_path *curp; -+ int depth, i, err = 0; -+ -+repeat: -+ i = depth = EXT3_I(inode)->i_depth; -+ -+ /* walk up to the tree and look for free index entry */ -+ curp = path + depth; -+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { -+ i--; -+ curp--; -+ } -+ -+ /* we use already allocated block for index block -+ * so, subsequent data blocks should be contigoues */ -+ if (EXT_HAS_FREE_INDEX(curp)) { -+ /* if we found index with free entry, then use that -+ * entry: create all needed subtree and add new leaf */ -+ err = ext3_ext_split(handle, inode, path, newext, i); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(inode, path); -+ path = ext3_ext_find_extent(inode, newext->e_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ } else { -+ /* tree is full, time to grow in depth */ -+ err = ext3_ext_grow_indepth(handle, inode, path, newext); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(inode, path); -+ path = ext3_ext_find_extent(inode, newext->e_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ -+ /* -+ * only first (depth 0 -> 1) produces free space -+ * in all other cases we have to split growed tree -+ */ -+ depth = EXT3_I(inode)->i_depth; -+ if (path[depth].p_hdr->e_num == path[depth].p_hdr->e_max) { -+ /* now we need split */ -+ goto repeat; -+ } -+ } -+ -+ if (err) -+ return err; -+ -+ /* -+ * probably we've used some blocks from extent -+ * let's allocate new block for it -+ */ -+ if (newext->e_num == 0 && !err) { -+ newext->e_start = -+ ext3_new_block(handle, inode, newblock, -+ 0, 0, &err); -+ if (newext->e_start != 0) -+ newext->e_num = 1; -+ } -+ -+ return 0; -+} -+ -+/* -+ * returns next allocated block or 0xffffffff -+ * NOTE: it consider block number from index entry as -+ * allocated block. thus, index entries have to be consistent -+ * with leafs -+ */ -+static inline unsigned ext3_ext_next_allocated_block(struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ if (depth == 0 && path->p_ext == NULL) -+ return 0xffffffff; -+ -+ /* FIXME: what if index isn't full ?! */ -+ while (depth >= 0) { -+ if (depth == path->p_depth) { -+ /* leaf */ -+ if (path[depth].p_ext != -+ EXT_LAST_EXTENT(path[depth].p_hdr)) -+ return path[depth].p_ext[1].e_block; -+ } else { -+ /* index */ -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].e_block; -+ } -+ depth--; -+ } -+ -+ return 0xffffffff; -+} -+ -+/* -+ * returns first allocated block from next leaf or 0xffffffff -+ */ -+static unsigned ext3_ext_next_leaf_block(struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ /* zero-tree has no leaf blocks at all */ -+ if (depth == 0) -+ return 0xffffffff; -+ -+ /* go to index block */ -+ depth--; -+ -+ while (depth >= 0) { -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].e_block; -+ depth--; -+ } -+ -+ return 0xffffffff; -+} -+ -+/* -+ * if leaf gets modified and modified extent is first in the leaf -+ * then we have to correct all indexes above -+ * TODO: do we need to correct tree in all cases? -+ */ -+int ext3_ext_correct_indexes(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ int depth = EXT3_I(inode)->i_depth; -+ struct ext3_extent_header *eh; -+ struct ext3_extent *ex; -+ long border; -+ int k, err = 0; -+ -+ eh = path[depth].p_hdr; -+ ex = path[depth].p_ext; -+ -+ EXT_ASSERT(ex); -+ EXT_ASSERT(eh); -+ -+ if (depth == 0) { -+ /* there is no tree at all */ -+ return 0; -+ } -+ -+ if (ex != EXT_FIRST_EXTENT(eh)) { -+ /* we correct tree if first leaf got modified only */ -+ return 0; -+ } -+ -+ /* -+ * TODO: we need correction if border is smaller then current one -+ */ -+ k = depth - 1; -+ border = path[depth].p_ext->e_block; -+ if ((err = ext3_ext_get_access(handle, inode, path + k))) -+ return err; -+ path[k].p_idx->e_block = border; -+ if ((err = ext3_ext_dirty(handle, inode, path + k))) -+ return err; -+ -+ while (k--) { -+ /* change all left-side indexes */ -+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) -+ break; -+ if ((err = ext3_ext_get_access(handle, inode, path + k))) -+ break; -+ path[k].p_idx->e_block = border; -+ if ((err = ext3_ext_dirty(handle, inode, path + k))) -+ break; -+ } -+ -+ return err; -+} -+ -+/* -+ * this routine tries to merge requsted extent into the existing -+ * extent or inserts requested extent as new one into the tree, -+ * creating new leaf in no-space case -+ */ -+int ext3_ext_insert_extent(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ int depth, len; -+ struct ext3_extent_header * eh; -+ struct ext3_extent *ex; -+ struct ext3_extent *nearex; /* nearest extent */ -+ struct ext3_ext_path *npath = NULL; -+ int err; -+ -+ depth = EXT3_I(inode)->i_depth; -+ if ((ex = path[depth].p_ext)) { -+ /* try to insert block into found extent and return */ -+ if (ex->e_block + ex->e_num == newext->e_block && -+ ex->e_start + ex->e_num == newext->e_start) { -+#ifdef AGRESSIVE_TEST -+ if (ex->e_num >= 2) -+ goto repeat; -+#endif -+ if ((err = ext3_ext_get_access(handle, inode, -+ path + depth))) -+ return err; -+ ext_debug(inode, "append %d block to %d:%d (from %d)\n", -+ newext->e_num, ex->e_block, ex->e_num, -+ ex->e_start); -+ ex->e_num += newext->e_num; -+ err = ext3_ext_dirty(handle, inode, path + depth); -+ return err; -+ } -+ } -+ -+repeat: -+ depth = EXT3_I(inode)->i_depth; -+ eh = path[depth].p_hdr; -+ if (eh->e_num == eh->e_max) { -+ /* probably next leaf has space for us? */ -+ int next = ext3_ext_next_leaf_block(inode, path); -+ if (next != 0xffffffff) { -+ ext_debug(inode, "next leaf block - %d\n", next); -+ EXT_ASSERT(!npath); -+ npath = ext3_ext_find_extent(inode, next, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ EXT_ASSERT(npath->p_depth == path->p_depth); -+ eh = npath[depth].p_hdr; -+ if (eh->e_num < eh->e_max) { -+ ext_debug(inode, -+ "next leaf has free ext(%d)\n", -+ eh->e_num); -+ path = npath; -+ goto repeat; -+ } -+ ext_debug(inode, "next leaf hasno free space(%d,%d)\n", -+ eh->e_num, eh->e_max); -+ } -+ /* -+ * there is no free space in found leaf -+ * we're gonna add new leaf in the tree -+ */ -+ err = ext3_ext_create_new_leaf(handle, inode, path, newext); -+ if (err) -+ goto cleanup; -+ goto repeat; -+ } -+ -+ nearex = path[depth].p_ext; -+ -+ if ((err = ext3_ext_get_access(handle, inode, path + depth))) -+ goto cleanup; -+ -+ if (!nearex) { -+ /* there is no extent in this leaf, create first one */ -+ ext_debug(inode, "first extent in the leaf: %d:%d:%d\n", -+ newext->e_block, newext->e_start, -+ newext->e_num); -+ eh->e_num++; -+ path[depth].p_ext = EXT_FIRST_EXTENT(eh); -+ -+ } else if (newext->e_block > nearex->e_block) { -+ EXT_ASSERT(newext->e_block != nearex->e_block); -+ len = EXT_MAX_EXTENT(eh) - nearex; -+ len = (len - 1) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(inode, "insert %d:%d:%d after: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->e_block, newext->e_start, newext->e_num, -+ nearex, len, nearex + 1, nearex + 2); -+ ext3_ext_check_boundary(inode, path + depth, nearex + 2, len); -+ memmove(nearex + 2, nearex + 1, len); -+ path[depth].p_ext = nearex + 1; -+ eh->e_num++; -+ } else { -+ EXT_ASSERT(newext->e_block != nearex->e_block); -+ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(inode, "insert %d:%d:%d before: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->e_block, newext->e_start, newext->e_num, -+ nearex, len, nearex + 1, nearex + 2); -+ -+ memmove(nearex + 1, nearex, len); -+ path[depth].p_ext = nearex; -+ } -+ -+ if (!err) { -+ eh->e_num++; -+ nearex = path[depth].p_ext; -+ nearex->e_block = newext->e_block; -+ nearex->e_start = newext->e_start; -+ nearex->e_num = newext->e_num; -+ EXT_ASSERT(nearex->e_num < EXT3_BLOCKS_PER_GROUP(inode->i_sb) && -+ nearex->e_num > 0); -+ -+ /* time to correct all indexes above */ -+ err = ext3_ext_correct_indexes(handle, inode, path); -+ } -+ -+ err = ext3_ext_dirty(handle, inode, path + depth); -+ -+cleanup: -+ if (npath) { -+ ext3_ext_drop_refs(inode, npath); -+ kfree(npath); -+ } -+ -+ return err; -+} -+ -+int ext3_ext_get_block(handle_t *handle, struct inode *inode, long iblock, -+ struct buffer_head *bh_result, int create, -+ int extend_disksize) -+{ -+ struct ext3_ext_path *path; -+ int depth = EXT3_I(inode)->i_depth; -+ struct ext3_extent newex; -+ struct ext3_extent *ex; -+ int goal, newblock, err = 0; -+ -+ ext_debug(inode, "block %d requested for inode %u, bh_result 0x%p\n", -+ (int) iblock, (unsigned) inode->i_ino, bh_result); -+ bh_result->b_state &= ~(1UL << BH_New); -+ -+ down(&EXT3_I(inode)->i_ext_sem); -+ -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(inode, iblock, NULL); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ goto out2; -+ } -+ -+ if ((ex = path[depth].p_ext)) { -+ /* if found exent covers block, simple return it */ -+ if (iblock >= ex->e_block && iblock < ex->e_block + ex->e_num) { -+ newblock = iblock - ex->e_block + ex->e_start; -+ ext_debug(inode, "%d fit into %d:%d -> %d\n", -+ (int) iblock, ex->e_block, ex->e_num, -+ newblock); -+ goto out; -+ } -+ } -+ -+ /* -+ * we couldn't try to create block if create flag is zero -+ */ -+ if (!create) -+ goto out2; -+ -+ /* allocate new block */ -+ goal = ext3_ext_find_goal(inode, path); -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, &err); -+ if (!newblock) -+ goto out2; -+ ext_debug(inode, "allocate new block: goal %d, found %d\n", -+ goal, newblock); -+ -+ /* try to insert new extent into found leaf and return */ -+ newex.e_block = iblock; -+ newex.e_start = newblock; -+ newex.e_num = 1; -+ err = ext3_ext_insert_extent(handle, inode, path, &newex); -+ if (err) -+ goto out2; -+ -+ /* previous routine could use block we allocated */ -+ newblock = newex.e_start; -+ bh_result->b_state |= (1UL << BH_New); -+ -+out: -+ ext3_ext_show_leaf(inode, path); -+ bh_result->b_dev = inode->i_dev; -+ bh_result->b_blocknr = newblock; -+ bh_result->b_state |= (1UL << BH_Mapped); -+out2: -+ ext3_ext_drop_refs(inode, path); -+ kfree(path); -+ up(&EXT3_I(inode)->i_ext_sem); -+ -+ return err; -+} -+ -+/* -+ * returns 1 if current index have to be freed (even partial) -+ */ -+static int ext3_ext_more_to_truncate(struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ EXT_ASSERT(path->p_idx); -+ -+ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) -+ return 0; -+ -+ /* -+ * if truncate on deeper level happened it it wasn't partial -+ * so we have to consider current index for truncation -+ */ -+ if (path->p_hdr->e_num == path->p_block) -+ return 0; -+ -+ /* -+ * put actual number of indexes to know is this number got -+ * changed at the next iteration -+ */ -+ path->p_block = path->p_hdr->e_num; -+ -+ return 1; -+} -+ -+/* -+ * routine removes index from the index block -+ * it's used in truncate case only. thus all requests are for -+ * last index in the block only -+ */ -+int ext3_ext_remove_index(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ struct buffer_head *bh; -+ int err; -+ -+ /* free index block */ -+ path--; -+ EXT_ASSERT(path->p_hdr->e_num); -+ if ((err = ext3_ext_get_access(handle, inode, path))) -+ return err; -+ path->p_hdr->e_num--; -+ if ((err = ext3_ext_dirty(handle, inode, path))) -+ return err; -+ bh = sb_get_hash_table(inode->i_sb, path->p_idx->e_leaf); -+ ext3_forget(handle, 0, inode, bh, path->p_idx->e_leaf); -+ ext3_free_blocks(handle, inode, path->p_idx->e_leaf, 1); -+ -+ ext_debug(inode, "index is empty, remove it, free block %d\n", -+ path->p_idx->e_leaf); -+ return err; -+} -+ -+/* -+ * returns 1 if current extent needs to be freed (even partial) -+ * instead, returns 0 -+ */ -+int ext3_ext_more_leaves_to_truncate(struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ unsigned blocksize = inode->i_sb->s_blocksize; -+ struct ext3_extent *ex = path->p_ext; -+ int last_block; -+ -+ EXT_ASSERT(ex); -+ -+ /* is there leave in the current leaf? */ -+ if (ex < EXT_FIRST_EXTENT(path->p_hdr)) -+ return 0; -+ -+ last_block = (inode->i_size + blocksize-1) -+ >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); -+ -+ if (last_block >= ex->e_block + ex->e_num) -+ return 0; -+ -+ /* seems it extent have to be freed */ -+ return 1; -+} -+ -+handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) -+{ -+ int err; -+ -+ if (handle->h_buffer_credits > needed) -+ return handle; -+ if (!ext3_journal_extend(handle, needed)) -+ return handle; -+ err = ext3_journal_restart(handle, needed); -+ -+ return handle; -+} -+ -+/* -+ * this routine calculate max number of blocks to be modified -+ * while freeing extent and is intended to be used in truncate path -+ */ -+static int ext3_ext_calc_credits(struct inode *inode, -+ struct ext3_ext_path *path, -+ int num) -+{ -+ int depth = EXT3_I(inode)->i_depth; -+ int needed; -+ -+ /* -+ * extent couldn't cross group, so we will modify -+ * single bitmap block and single group descriptor -+ */ -+ needed = 2; -+ -+ /* -+ * if this is last extent in a leaf, then we have to -+ * free leaf block and remove pointer from index above. -+ * that pointer could be last in index block, so we'll -+ * have to remove it too. this way we could modify/free -+ * the whole path + root index (inode stored) will be -+ * modified -+ */ -+ if (!path || (num == path->p_ext->e_num && -+ path->p_ext == EXT_FIRST_EXTENT(path->p_hdr))) -+ needed += (depth * EXT3_ALLOC_NEEDED) + 1; -+ -+ /* -+ * it seems current calculation has bug -+ * this is workaround -bzzz -+ */ -+ needed += 10; -+ -+ return needed; -+} -+ -+/* -+ * core of the truncate procedure: -+ * - calculated what part of each extent in the requested leaf -+ * need to be freed -+ * - frees and forgets these blocks -+ * -+ * TODO: we could optimize and free several extents during -+ * single journal_restart()-journal_restart() cycle -+ */ -+static int ext3_ext_truncate_leaf(handle_t *handle, -+ struct inode *inode, -+ struct ext3_ext_path *path, -+ int depth) -+{ -+ unsigned blocksize = inode->i_sb->s_blocksize; -+ int last_block; -+ int i, err = 0, sf, num; -+ -+ ext_debug(inode, "level %d - leaf\n", depth); -+ if (!path->p_hdr) -+ path->p_hdr = -+ (struct ext3_extent_header *) path->p_bh->b_data; -+ -+ EXT_ASSERT(path->p_hdr->e_num <= path->p_hdr->e_max); -+ -+ last_block = (inode->i_size + blocksize-1) -+ >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); -+ path->p_ext = EXT_LAST_EXTENT(path->p_hdr); -+ while (ext3_ext_more_leaves_to_truncate(inode, path)) { -+ -+ /* what part of extent have to be freed? */ -+ sf = last_block > path->p_ext->e_block ? -+ last_block : path->p_ext->e_block; -+ -+ /* number of blocks from extent to be freed */ -+ num = path->p_ext->e_block + path->p_ext->e_num - sf; -+ -+ /* calc physical first physical block to be freed */ -+ sf = path->p_ext->e_start + (sf - path->p_ext->e_block); -+ -+ i = ext3_ext_calc_credits(inode, path, num); -+ handle = ext3_ext_journal_restart(handle, i); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ ext_debug(inode, "free extent %d:%d:%d -> free %d:%d\n", -+ path->p_ext->e_block, path->p_ext->e_start, -+ path->p_ext->e_num, sf, num); -+ for (i = 0; i < num; i++) { -+ struct buffer_head *bh = -+ sb_get_hash_table(inode->i_sb, sf + i); -+ ext3_forget(handle, 0, inode, bh, sf + i); -+ } -+ ext3_free_blocks(handle, inode, sf, num); -+ -+ /* collect extents usage stats */ -+ spin_lock(&EXT3_SB(inode->i_sb)->s_ext_lock); -+ EXT3_SB(inode->i_sb)->s_ext_extents++; -+ EXT3_SB(inode->i_sb)->s_ext_blocks += num; -+ spin_unlock(&EXT3_SB(inode->i_sb)->s_ext_lock); -+ -+ /* reduce extent */ -+ if ((err = ext3_ext_get_access(handle, inode, path))) -+ return err; -+ path->p_ext->e_num -= num; -+ if (path->p_ext->e_num == 0) -+ path->p_hdr->e_num--; -+ if ((err = ext3_ext_dirty(handle, inode, path))) -+ return err; -+ -+ path->p_ext--; -+ } -+ -+ /* if this leaf is free, then we should -+ * remove it from index block above */ -+ if (path->p_hdr->e_num == 0 && depth > 0) -+ err = ext3_ext_remove_index(handle, inode, path); -+ -+ return err; -+} -+ -+static void ext3_ext_collect_stats(struct inode *inode) -+{ -+ int depth; -+ -+ /* skip inodes with old good bitmap */ -+ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) -+ return; -+ -+ /* collect on full truncate only */ -+ if (inode->i_size) -+ return; -+ -+ depth = EXT3_I(inode)->i_depth; -+ if (depth < EXT3_SB(inode->i_sb)->s_ext_mindepth) -+ EXT3_SB(inode->i_sb)->s_ext_mindepth = depth; -+ if (depth > EXT3_SB(inode->i_sb)->s_ext_maxdepth) -+ EXT3_SB(inode->i_sb)->s_ext_maxdepth = depth; -+ EXT3_SB(inode->i_sb)->s_ext_sum += depth; -+ EXT3_SB(inode->i_sb)->s_ext_count++; -+ -+} -+ -+void ext3_ext_truncate(struct inode * inode) -+{ -+ struct address_space *mapping = inode->i_mapping; -+ struct ext3_ext_path *path; -+ struct page * page; -+ handle_t *handle; -+ int i, depth, err = 0; -+ -+ ext3_ext_collect_stats(inode); -+ -+ /* -+ * We have to lock the EOF page here, because lock_page() nests -+ * outside journal_start(). -+ */ -+ if ((inode->i_size & (inode->i_sb->s_blocksize - 1)) == 0) { -+ /* Block boundary? Nothing to do */ -+ page = NULL; -+ } else { -+ page = grab_cache_page(mapping, -+ inode->i_size >> PAGE_CACHE_SHIFT); -+ if (!page) -+ return; -+ } -+ -+ /* -+ * probably first extent we're gonna free will be last in block -+ */ -+ i = ext3_ext_calc_credits(inode, NULL, 0); -+ handle = ext3_journal_start(inode, i); -+ if (IS_ERR(handle)) { -+ if (page) { -+ clear_highpage(page); -+ flush_dcache_page(page); -+ unlock_page(page); -+ page_cache_release(page); -+ } -+ return; -+ } -+ -+ if (page) -+ ext3_block_truncate_page(handle, mapping, inode->i_size, page, -+ inode->i_sb->s_blocksize); -+ -+ down(&EXT3_I(inode)->i_ext_sem); -+ -+ /* -+ * TODO: optimization is possible here -+ * probably we need not scaning at all, -+ * because page truncation is enough -+ */ -+ if (ext3_orphan_add(handle, inode)) -+ goto out_stop; -+ -+ /* we have to know where to truncate from in crash case */ -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_mark_inode_dirty(handle, inode); -+ -+ /* -+ * we start scanning from right side freeing all the blocks -+ * after i_size and walking into the deep -+ */ -+ i = 0; -+ depth = EXT3_I(inode)->i_depth; -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); -+ if (IS_ERR(path)) { -+ ext3_error(inode->i_sb, "ext3_ext_truncate", -+ "Can't allocate path array"); -+ goto out_stop; -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ -+ path[i].p_hdr = (struct ext3_extent_header *) EXT3_I(inode)->i_data; -+ while (i >= 0 && err == 0) { -+ if (i == depth) { -+ /* this is leaf block */ -+ err = ext3_ext_truncate_leaf(handle, inode, -+ path + i, i); -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ continue; -+ } -+ -+ /* this is index block */ -+ if (!path[i].p_hdr) { -+ path[i].p_hdr = -+ (struct ext3_extent_header *) path[i].p_bh->b_data; -+ ext_debug(inode, "initialize header\n"); -+ } -+ -+ EXT_ASSERT(path[i].p_hdr->e_num <= path[i].p_hdr->e_max); -+ -+ if (!path[i].p_idx) { -+ /* this level hasn't touched yet */ -+ path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); -+ path[i].p_block = path[i].p_hdr->e_num + 1; -+ ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n", -+ path[i].p_hdr, path[i].p_hdr->e_num); -+ } else { -+ /* we've already was here, see at next index */ -+ path[i].p_idx--; -+ } -+ -+ ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n", -+ i, EXT_FIRST_INDEX(path[i].p_hdr), -+ path[i].p_idx); -+ if (ext3_ext_more_to_truncate(inode, path + i)) { -+ /* go to the next level */ -+ ext_debug(inode, "move to level %d (block %d)\n", i+1, -+ path[i].p_idx->e_leaf); -+ memset(path + i + 1, 0, sizeof(*path)); -+ path[i+1].p_bh = sb_bread(inode->i_sb, -+ path[i].p_idx->e_leaf); -+ if (!path[i+1].p_bh) { -+ /* should we reset i_size? */ -+ err = -EIO; -+ break; -+ } -+ i++; -+ } else { -+ /* we finish processing this index, go up */ -+ if (path[i].p_hdr->e_num == 0 && i > 0) { -+ /* index is empty, remove it -+ * handle must be already prepared by the -+ * truncate_leaf() -+ */ -+ err = ext3_ext_remove_index(handle, inode, -+ path + i); -+ } -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ ext_debug(inode, "return to level %d\n", i); -+ } -+ } -+ -+ /* TODO: flexible tree reduction should be here */ -+ if (path->p_hdr->e_num == 0) { -+ /* -+ * truncate to zero freed all the tree -+ * so, we need to correct i_depth -+ */ -+ EXT3_I(inode)->i_depth = 0; -+ path->p_hdr->e_max = 0; -+ ext3_mark_inode_dirty(handle, inode); -+ } -+ -+ kfree(path); -+ -+ /* In a multi-transaction truncate, we only make the final -+ * transaction synchronous */ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+out_stop: -+ /* -+ * If this was a simple ftruncate(), and the file will remain alive -+ * then we need to clear up the orphan record which we created above. -+ * However, if this was a real unlink then we were called by -+ * ext3_delete_inode(), and we allow that function to clean up the -+ * orphan info for us. -+ */ -+ if (inode->i_nlink) -+ ext3_orphan_del(handle, inode); -+ -+ up(&EXT3_I(inode)->i_ext_sem); -+ ext3_journal_stop(handle, inode); -+} -+ -+/* -+ * this routine calculate max number of blocks we could modify -+ * in order to allocate new block for an inode -+ */ -+int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ int depth = ei->i_depth + 1; -+ int needed; -+ -+ /* -+ * the worste case we're expecting is creation of the -+ * new root (growing in depth) with index splitting -+ * for splitting we have to consider depth + 1 because -+ * previous growing could increase it -+ */ -+ -+ /* -+ * growing in depth: -+ * block allocation + new root + old root -+ */ -+ needed = EXT3_ALLOC_NEEDED + 2; -+ -+ /* index split. we may need: -+ * allocate intermediate indexes and new leaf -+ * change two blocks at each level, but root -+ * modify root block (inode) -+ */ -+ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; -+ -+ /* caller want to allocate num blocks */ -+ needed *= num; -+ -+#ifdef CONFIG_QUOTA -+ /* -+ * FIXME: real calculation should be here -+ * it depends on blockmap format of qouta file -+ */ -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return needed; -+} -+ -+/* -+ * called at mount time -+ */ -+void ext3_ext_init(struct super_block *sb) -+{ -+ /* -+ * possible initialization would be here -+ */ -+ -+ if (test_opt(sb, EXTENTS)) -+ printk("EXT3-fs: file extents enabled\n"); -+ spin_lock_init(&EXT3_SB(sb)->s_ext_lock); -+} -+ -+/* -+ * called at umount time -+ */ -+void ext3_ext_release(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ /* show collected stats */ -+ if (sbi->s_ext_count && sbi->s_ext_extents) -+ printk("EXT3-fs: min depth - %d, max depth - %d, " -+ "ave. depth - %d, ave. blocks/extent - %d\n", -+ sbi->s_ext_mindepth, -+ sbi->s_ext_maxdepth, -+ sbi->s_ext_sum / sbi->s_ext_count, -+ sbi->s_ext_blocks / sbi->s_ext_extents); -+} -+ ---- linux-2.4.18-chaos-pdirops/fs/ext3/ialloc.c~ext3-extents-2.4.18-chaos-pdirops 2003-09-23 13:44:53.000000000 +0400 -+++ linux-2.4.18-chaos-pdirops-alexey/fs/ext3/ialloc.c 2003-09-23 14:29:32.000000000 +0400 -@@ -573,6 +573,10 @@ repeat: - ei->i_prealloc_count = 0; - #endif - ei->i_block_group = i; -+ if (test_opt(sb, EXTENTS)) -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ ei->i_depth = 0; -+ sema_init(&ei->i_ext_sem, 1); - - if (ei->i_flags & EXT3_SYNC_FL) - inode->i_flags |= S_SYNC; ---- linux-2.4.18-chaos-pdirops/fs/ext3/inode.c~ext3-extents-2.4.18-chaos-pdirops 2003-09-23 13:44:53.000000000 +0400 -+++ linux-2.4.18-chaos-pdirops-alexey/fs/ext3/inode.c 2003-09-23 14:29:32.000000000 +0400 -@@ -842,6 +842,15 @@ changed: - goto reread; - } - -+static inline int -+ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -+ struct buffer_head *bh, int create, int extend_disksize) -+{ -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_get_block(handle, inode, block, bh, create, 1); -+ return ext3_get_block_handle(handle, inode, block, bh, create, 1); -+} -+ - /* - * The BKL is not held on entry here. - */ -@@ -855,7 +864,7 @@ static int ext3_get_block(struct inode * - handle = ext3_journal_current_handle(); - J_ASSERT(handle != 0); - } -- ret = ext3_get_block_handle(handle, inode, iblock, -+ ret = ext3_get_block_wrap(handle, inode, iblock, - bh_result, create, 1); - return ret; - } -@@ -882,7 +891,7 @@ ext3_direct_io_get_block(struct inode *i - } - } - if (ret == 0) -- ret = ext3_get_block_handle(handle, inode, iblock, -+ ret = ext3_get_block_wrap(handle, inode, iblock, - bh_result, create, 0); - if (ret == 0) - bh_result->b_size = (1 << inode->i_blkbits); -@@ -904,7 +913,7 @@ struct buffer_head *ext3_getblk(handle_t - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); -+ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); - if (!*errp && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -@@ -1520,7 +1529,7 @@ ext3_block_truncate_page_prepare(struct - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ --static int ext3_block_truncate_page(handle_t *handle, -+int ext3_block_truncate_page(handle_t *handle, - struct address_space *mapping, loff_t from, - struct page *page, unsigned blocksize) - { -@@ -1998,6 +2007,9 @@ void ext3_truncate(struct inode * inode) - - ext3_discard_prealloc(inode); - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_truncate(inode); -+ - blocksize = inode->i_sb->s_blocksize; - last_block = (inode->i_size + blocksize-1) - >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); -@@ -2436,6 +2448,8 @@ void ext3_read_inode(struct inode * inod - ei->i_prealloc_count = 0; - #endif - ei->i_block_group = iloc.block_group; -+ ei->i_depth = raw_inode->osd2.linux2.l_i_depth; -+ sema_init(&ei->i_ext_sem, 1); - - /* - * NOTE! The in-memory inode i_data array is in little-endian order -@@ -2559,6 +2573,7 @@ static int ext3_do_update_inode(handle_t - raw_inode->i_fsize = 0; - } - #endif -+ raw_inode->osd2.linux2.l_i_depth = ei->i_depth; - raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); - if (!S_ISREG(inode->i_mode)) { - raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); -@@ -2762,6 +2777,9 @@ int ext3_writepage_trans_blocks(struct i - int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_writepage_trans_blocks(inode, bpp); -+ - if (ext3_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else -@@ -3085,7 +3103,7 @@ int ext3_prep_san_write(struct inode *in - - /* alloc blocks one by one */ - for (i = 0; i < nblocks; i++) { -- ret = ext3_get_block_handle(handle, inode, blocks[i], -+ ret = ext3_get_block_wrap(handle, inode, blocks[i], - &bh_tmp, 1, 1); - if (ret) - break; -@@ -3146,7 +3164,7 @@ int ext3_map_inode_page(struct inode *in - if (blocks[i] != 0) - continue; - -- rc = ext3_get_block_handle(handle, inode, iblock, &dummy, 1, 1); -+ rc = ext3_get_block_wrap(handle, inode, iblock, &dummy, 1, 1); - if (rc) { - printk(KERN_INFO "ext3_map_inode_page: error reading " - "block %ld\n", iblock); ---- linux-2.4.18-chaos-pdirops/fs/ext3/Makefile~ext3-extents-2.4.18-chaos-pdirops 2003-09-23 13:44:48.000000000 +0400 -+++ linux-2.4.18-chaos-pdirops-alexey/fs/ext3/Makefile 2003-09-23 14:29:32.000000000 +0400 -@@ -12,7 +12,8 @@ O_TARGET := ext3.o - export-objs := ext3-exports.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ -- ioctl.o namei.o super.o symlink.o xattr.o hash.o ext3-exports.o -+ ioctl.o namei.o super.o symlink.o xattr.o hash.o ext3-exports.o \ -+ extents.o - obj-m := $(O_TARGET) - - include $(TOPDIR)/Rules.make ---- linux-2.4.18-chaos-pdirops/fs/ext3/super.c~ext3-extents-2.4.18-chaos-pdirops 2003-09-23 13:44:53.000000000 +0400 -+++ linux-2.4.18-chaos-pdirops-alexey/fs/ext3/super.c 2003-09-23 14:29:33.000000000 +0400 -@@ -619,6 +619,7 @@ void ext3_put_super (struct super_block - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ ext3_ext_release(sb); - ext3_stop_delete_thread(sbi); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); -@@ -741,6 +742,12 @@ static int parse_options (char * options - else - #endif - -+ if (!strcmp (this_char, "extents")) -+ set_opt (sbi->s_mount_opt, EXTENTS); -+ else -+ if (!strcmp (this_char, "extdebug")) -+ set_opt (sbi->s_mount_opt, EXTDEBUG); -+ else - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -1471,6 +1478,7 @@ struct super_block * ext3_read_super (st - test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": - test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": - "writeback"); -+ ext3_ext_init(sb); - - if (test_opt(sb, PDIROPS)) { - printk (KERN_INFO "EXT3-fs: mounted filesystem with parallel dirops\n"); ---- linux-2.4.18-chaos-pdirops/include/linux/ext3_fs.h~ext3-extents-2.4.18-chaos-pdirops 2003-09-23 13:44:53.000000000 +0400 -+++ linux-2.4.18-chaos-pdirops-alexey/include/linux/ext3_fs.h 2003-09-23 14:29:33.000000000 +0400 -@@ -188,6 +188,7 @@ struct ext3_group_desc - #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */ - #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ - #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ -+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ - - #define EXT3_FL_USER_VISIBLE 0x00005FFF /* User visible flags */ - #define EXT3_FL_USER_MODIFIABLE 0x000000FF /* User modifiable flags */ -@@ -248,7 +249,7 @@ struct ext3_inode { - struct { - __u8 l_i_frag; /* Fragment number */ - __u8 l_i_fsize; /* Fragment size */ -- __u16 i_pad1; -+ __u16 l_i_depth; - __u16 l_i_uid_high; /* these 2 fields */ - __u16 l_i_gid_high; /* were reserved2[0] */ - __u32 l_i_reserved2; -@@ -329,6 +330,8 @@ struct ext3_inode { - #define EXT3_MOUNT_IOPEN 0x8000 /* Allow access via iopen */ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x10000 /* Make iopen world-readable */ - #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ -+#define EXT3_MOUNT_EXTENTS 0x40000 /* Extents support */ -+#define EXT3_MOUNT_EXTDEBUG 0x80000 /* Extents debug */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -720,6 +723,12 @@ extern void ext3_discard_prealloc (struc - extern void ext3_dirty_inode(struct inode *); - extern int ext3_change_inode_journal_flag(struct inode *, int); - extern void ext3_truncate (struct inode *); -+extern int ext3_block_truncate_page(handle_t *handle, -+ struct address_space *mapping, loff_t from, -+ struct page *page, unsigned blocksize); -+extern int ext3_forget(handle_t *handle, int is_metadata, -+ struct inode *inode, struct buffer_head *bh, -+ int blocknr); - #ifdef EXT3_DELETE_THREAD - extern void ext3_truncate_thread(struct inode *inode); - #endif -@@ -781,6 +790,13 @@ extern struct inode_operations ext3_dir_ - /* symlink.c */ - extern struct inode_operations ext3_fast_symlink_inode_operations; - -+/* extents.c */ -+extern int ext3_ext_writepage_trans_blocks(struct inode *, int); -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+ struct buffer_head *, int, int); -+extern void ext3_ext_truncate(struct inode *); -+extern void ext3_ext_init(struct super_block *); -+extern void ext3_ext_release(struct super_block *); - - #endif /* __KERNEL__ */ - ---- linux-2.4.18-chaos-pdirops/include/linux/ext3_fs_i.h~ext3-extents-2.4.18-chaos-pdirops 2003-09-23 13:44:53.000000000 +0400 -+++ linux-2.4.18-chaos-pdirops-alexey/include/linux/ext3_fs_i.h 2003-09-23 14:29:33.000000000 +0400 -@@ -79,6 +79,10 @@ struct ext3_inode_info { - struct dynlock i_htree_lock; - struct semaphore i_append_sem; - struct semaphore i_rename_sem; -+ -+ /* extents-related data */ -+ struct semaphore i_ext_sem; -+ __u16 i_depth; - }; - - #endif /* _LINUX_EXT3_FS_I */ ---- linux-2.4.18-chaos-pdirops/include/linux/ext3_fs_sb.h~ext3-extents-2.4.18-chaos-pdirops 2003-09-23 13:34:51.000000000 +0400 -+++ linux-2.4.18-chaos-pdirops-alexey/include/linux/ext3_fs_sb.h 2003-09-23 14:29:33.000000000 +0400 -@@ -86,6 +86,16 @@ struct ext3_sb_info { - wait_queue_head_t s_delete_thread_queue; - wait_queue_head_t s_delete_waiter_queue; - #endif -+ -+ /* extents */ -+ int s_ext_debug; -+ int s_ext_mindepth; -+ int s_ext_maxdepth; -+ int s_ext_sum; -+ int s_ext_count; -+ spinlock_t s_ext_lock; -+ int s_ext_extents; -+ int s_ext_blocks; - }; - - #endif /* _LINUX_EXT3_FS_SB */ - -_ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.18-chaos.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.18-chaos.patch deleted file mode 100644 index a0b4230..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.18-chaos.patch +++ /dev/null @@ -1,1895 +0,0 @@ - fs/ext3/Makefile | 3 - fs/ext3/extents.c | 1615 +++++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/ialloc.c | 4 - fs/ext3/inode.c | 30 - fs/ext3/super.c | 8 - include/linux/ext3_fs.h | 18 - include/linux/ext3_fs_i.h | 4 - include/linux/ext3_fs_sb.h | 10 - 8 files changed, 1684 insertions(+), 8 deletions(-) - -Index: linux-2.4.18-chaos/fs/ext3/extents.c -=================================================================== ---- linux-2.4.18-chaos.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.18-chaos/fs/ext3/extents.c 2004-01-13 16:11:00.000000000 +0300 -@@ -0,0 +1,1614 @@ -+/* -+ * -+ * linux/fs/ext3/extents.c -+ * -+ * Extents support for EXT3 -+ * -+ * 07/08/2003 Alex Tomas -+ * -+ * TODO: -+ * - ext3*_error() should be used in some situations -+ * - find_goal() [to be tested and improved] -+ * - error handling -+ * - we could leak allocated block in some error cases -+ * - quick search for index/leaf in ext3_ext_find_extent() -+ * - tree reduction -+ * - cache last found extent -+ * - arch-independent -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks -+ * become very little, so index split, in-depth growing and -+ * other hard changes happens much more often -+ * this is for debug purposes only -+ */ -+#define AGRESSIVE_TEST_ -+ -+/* -+ * if EXT_DEBUG defined you can use 'extdebug' mount option -+ * to get lots of info what's going on -+ */ -+#define EXT_DEBUG -+#ifdef EXT_DEBUG -+#define ext_debug(inode,fmt,a...) \ -+do { \ -+ if (test_opt((inode)->i_sb, EXTDEBUG)) \ -+ printk(fmt, ##a); \ -+} while (0); -+#else -+#define ext_debug(inode,fmt,a...) -+#endif -+ -+#define EXT3_ALLOC_NEEDED 2 /* block bitmap + group descriptor */ -+ -+/* -+ * ext3_inode has i_block array (total 60 bytes) -+ * first 4 bytes are used to store: -+ * - tree depth (0 mean there is no tree yet. all extents in the inode) -+ * - number of alive extents in the inode -+ */ -+ -+/* -+ * this is extent on-disk structure -+ * it's used at the bottom of the tree -+ */ -+struct ext3_extent { -+ __u32 e_block; /* first logical block extent covers */ -+ __u32 e_start; /* first physical block extents lives */ -+ __u32 e_num; /* number of blocks covered by extent */ -+}; -+ -+/* -+ * this is index on-disk structure -+ * it's used at all the levels, but the bottom -+ */ -+struct ext3_extent_idx { -+ __u32 e_block; /* index covers logical blocks from 'block' */ -+ __u32 e_leaf; /* pointer to the physical block of the next * -+ * level. leaf or next index could bet here */ -+}; -+ -+/* -+ * each block (leaves and indexes), even inode-stored has header -+ */ -+struct ext3_extent_header { -+ __u16 e_num; /* number of valid entries */ -+ __u16 e_max; /* capacity of store in entries */ -+}; -+ -+/* -+ * array of ext3_ext_path contains path to some extent -+ * creation/lookup routines use it for traversal/splitting/etc -+ * truncate uses it to simulate recursive walking -+ */ -+struct ext3_ext_path { -+ __u32 p_block; -+ __u16 p_depth; -+ struct ext3_extent *p_ext; -+ struct ext3_extent_idx *p_idx; -+ struct ext3_extent_header *p_hdr; -+ struct buffer_head *p_bh; -+}; -+ -+#define EXT_FIRST_EXTENT(__hdr__) \ -+ ((struct ext3_extent *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_FIRST_INDEX(__hdr__) \ -+ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_HAS_FREE_INDEX(__path__) \ -+ ((__path__)->p_hdr->e_num < (__path__)->p_hdr->e_max) -+#define EXT_LAST_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->e_num - 1) -+#define EXT_LAST_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->e_num - 1) -+#define EXT_MAX_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->e_max - 1) -+#define EXT_MAX_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->e_max - 1) -+ -+ -+#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ */ -+static int ext3_ext_get_access(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ if (path->p_bh) { -+ /* path points to block */ -+ return ext3_journal_get_write_access(handle, path->p_bh); -+ } -+ -+ /* path points to leaf/index in inode body */ -+ return 0; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ * - EIO -+ */ -+static int ext3_ext_dirty(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ if (path->p_bh) { -+ /* path points to block */ -+ return ext3_journal_dirty_metadata(handle, path->p_bh); -+ } -+ -+ /* path points to leaf/index in inode body */ -+ return ext3_mark_inode_dirty(handle, inode); -+} -+ -+static inline int ext3_ext_space_block(struct inode *inode) -+{ -+ int size; -+ -+ size = (inode->i_sb->s_blocksize - sizeof(struct ext3_extent_header)) -+ / sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 6; /* FIXME: for debug, remove this line */ -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_inode(struct inode *inode) -+{ -+ int size; -+ -+ size = (sizeof(EXT3_I(inode)->i_data) - -+ sizeof(struct ext3_extent_header)) -+ / sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 3; /* FIXME: for debug, remove this line */ -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_inode_idx(struct inode *inode) -+{ -+ int size; -+ -+ size = (sizeof(EXT3_I(inode)->i_data) - -+ sizeof(struct ext3_extent_header)) -+ / sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 4; /* FIXME: for debug, remove this line */ -+#endif -+ return size; -+} -+ -+static void ext3_ext_show_path(struct inode *inode, struct ext3_ext_path *path) -+{ -+ int k, l = path->p_depth; -+ -+ ext_debug(inode, "path:"); -+ for (k = 0; k <= l; k++, path++) { -+ if (path->p_idx) { -+ ext_debug(inode, " %d->%d", path->p_idx->e_block, -+ path->p_idx->e_leaf); -+ } else if (path->p_ext) { -+ ext_debug(inode, " %d:%d:%d", -+ path->p_ext->e_block, -+ path->p_ext->e_start, -+ path->p_ext->e_num); -+ } else -+ ext_debug(inode, " []"); -+ } -+ ext_debug(inode, "\n"); -+} -+ -+static void ext3_ext_show_leaf(struct inode *inode, struct ext3_ext_path *path) -+{ -+ int depth = EXT3_I(inode)->i_depth; -+ struct ext3_extent_header *eh = path[depth].p_hdr; -+ struct ext3_extent *ex = EXT_FIRST_EXTENT(eh); -+ int i; -+ -+ for (i = 0; i < eh->e_num; i++, ex++) { -+ ext_debug(inode, "%d:%d:%d ", -+ ex->e_block, ex->e_start, ex->e_num); -+ } -+ ext_debug(inode, "\n"); -+} -+ -+static void ext3_ext_drop_refs(struct inode *inode, struct ext3_ext_path *path) -+{ -+ int depth = path->p_depth; -+ int i; -+ -+ for (i = 0; i <= depth; i++, path++) -+ if (path->p_bh) { -+ brelse(path->p_bh); -+ path->p_bh = NULL; -+ } -+} -+ -+static int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ int depth; -+ -+ if (path) { -+ depth = path->p_depth; -+ /* try to find previous block */ -+ if (path[depth].p_ext) -+ return path[depth].p_ext->e_start + -+ path[depth].p_ext->e_num - 1; -+ -+ /* it looks index is empty -+ * try to find starting from index itself */ -+ if (path[depth].p_bh) -+ return path[depth].p_bh->b_blocknr; -+ } -+ -+ /* OK. use inode's group */ -+ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ return bg_start + colour; -+} -+ -+static struct ext3_ext_path * -+ext3_ext_find_extent(struct inode *inode, int block, struct ext3_ext_path *path) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ struct ext3_extent_header *eh = (void *) ei->i_data; -+ struct ext3_extent_idx *ix; -+ struct buffer_head *bh; -+ struct ext3_extent *ex; -+ int depth, i, k, ppos = 0, prev = 0; -+ -+ eh = (struct ext3_extent_header *) ei->i_data; -+ -+ /* initialize capacity of leaf in inode for first time */ -+ if (eh->e_max == 0) -+ eh->e_max = ext3_ext_space_inode(inode); -+ i = depth = ei->i_depth; -+ EXT_ASSERT(i == 0 || eh->e_num > 0); -+ -+ /* account possible depth increase */ -+ if (!path) { -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), -+ GFP_NOFS); -+ if (!path) -+ return ERR_PTR(-ENOMEM); -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ -+ /* walk through the tree */ -+ while (i) { -+ ext_debug(inode, "depth %d: num %d, max %d\n", -+ ppos, eh->e_num, eh->e_max); -+ ix = EXT_FIRST_INDEX(eh); -+ if (eh->e_num) { -+ EXT_ASSERT(prev == 0 || ix->e_block == prev); -+ path[ppos].p_idx = ix; -+ } -+ EXT_ASSERT(eh->e_num <= eh->e_max); -+ for (k = 0; k < eh->e_num; k++, ix++) { -+ ext_debug(inode, "index: %d -> %d\n", -+ ix->e_block, ix->e_leaf); -+ EXT_ASSERT((k == 0 && prev <= (int)ix->e_block) || -+ (k > 0 && prev < (int)ix->e_block)); -+ if (block < ix->e_block) -+ break; -+ prev = ix->e_block; -+ path[ppos].p_idx = ix; -+ } -+ path[ppos].p_block = path[ppos].p_idx->e_leaf; -+ path[ppos].p_depth = i; -+ path[ppos].p_hdr = eh; -+ path[ppos].p_ext = NULL; -+ -+ bh = sb_bread(inode->i_sb, path[ppos].p_block); -+ if (!bh) { -+ ext3_ext_drop_refs(inode, path); -+ kfree(path); -+ return ERR_PTR(-EIO); -+ } -+ eh = (struct ext3_extent_header *) bh->b_data; -+ ppos++; -+ EXT_ASSERT(ppos <= depth); -+ path[ppos].p_bh = bh; -+ i--; -+ } -+ -+ path[ppos].p_depth = i; -+ path[ppos].p_hdr = eh; -+ path[ppos].p_ext = NULL; -+ -+ /* find extent */ -+ ex = EXT_FIRST_EXTENT(eh); -+ if (eh->e_num) -+ path[ppos].p_ext = ex; -+ EXT_ASSERT(eh->e_num <= eh->e_max); -+ for (k = 0; k < eh->e_num; k++, ex++) { -+ EXT_ASSERT(ex->e_num < EXT3_BLOCKS_PER_GROUP(inode->i_sb)); -+ EXT_ASSERT((k == 0 && prev <= (int)ex->e_block) || -+ (k > 0 && prev < (int)ex->e_block)); -+ if (block < ex->e_block) -+ break; -+ prev = ex->e_block; -+ path[ppos].p_ext = ex; -+ } -+ -+ ext3_ext_show_path(inode, path); -+ -+ return path; -+} -+ -+static void ext3_ext_check_boundary(struct inode *inode, -+ struct ext3_ext_path *curp, -+ void *addr, int len) -+{ -+ void *end; -+ -+ if (!len) -+ return; -+ if (curp->p_bh) -+ end = (void *) curp->p_hdr + inode->i_sb->s_blocksize; -+ else -+ end = (void *) curp->p_hdr + sizeof(EXT3_I(inode)->i_data); -+ if (((unsigned long) addr) + len > (unsigned long) end) { -+ printk("overflow! 0x%p > 0x%p\n", addr + len, end); -+ BUG(); -+ } -+ if ((unsigned long) addr < (unsigned long) curp->p_hdr) { -+ printk("underflow! 0x%p < 0x%p\n", addr, curp->p_hdr); -+ BUG(); -+ } -+} -+ -+/* -+ * insert new index [logical;ptr] into the block at cupr -+ * it check where to insert: before curp or after curp -+ */ -+static int ext3_ext_insert_index(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *curp, int logical, -+ int ptr) -+{ -+ struct ext3_extent_idx *ix; -+ int len, err; -+ -+ if ((err = ext3_ext_get_access(handle, inode, curp))) -+ return err; -+ -+ EXT_ASSERT(logical != curp->p_idx->e_block); -+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; -+ if (logical > curp->p_idx->e_block) { -+ /* insert after */ -+ len = (len - 1) * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(inode, "insert new index %d after: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ (curp->p_idx + 1), (curp->p_idx + 2)); -+ -+ ext3_ext_check_boundary(inode, curp, curp->p_idx + 2, len); -+ memmove(curp->p_idx + 2, curp->p_idx + 1, len); -+ ix = curp->p_idx + 1; -+ } else { -+ /* insert before */ -+ len = len * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(inode, "insert new index %d before: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ curp->p_idx, (curp->p_idx + 1)); -+ -+ ext3_ext_check_boundary(inode, curp, curp->p_idx + 1, len); -+ memmove(curp->p_idx + 1, curp->p_idx, len); -+ ix = curp->p_idx; -+ } -+ -+ ix->e_block = logical; -+ ix->e_leaf = ptr; -+ curp->p_hdr->e_num++; -+ -+ err = ext3_ext_dirty(handle, inode, curp); -+ ext3_std_error(inode->i_sb, err); -+ -+ return err; -+} -+ -+/* -+ * routine inserts new subtree into the path, using free index entry -+ * at depth 'at: -+ * - allocates all needed blocks (new leaf and all intermediate index blocks) -+ * - makes decision where to split -+ * - moves remaining extens and index entries (right to the split point) -+ * into the newly allocated blocks -+ * - initialize subtree -+ */ -+static int ext3_ext_split(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext, int at) -+{ -+ struct buffer_head *bh = NULL; -+ int depth = EXT3_I(inode)->i_depth; -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct ext3_extent *ex; -+ int i = at, k, m, a; -+ long newblock, oldblock, border; -+ int *ablocks = NULL; /* array of allocated blocks */ -+ int err = 0; -+ -+ /* make decision: where to split? */ -+ /* FIXME: now desicion is simplest: at current extent */ -+ -+ /* if current leaf will be splitted, then we should use -+ * border from split point */ -+ -+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ border = path[depth].p_ext[1].e_block; -+ ext_debug(inode, "leaf will be splitted." -+ " next leaf starts at %d\n", -+ (int)border); -+ } else { -+ border = newext->e_block; -+ ext_debug(inode, "leaf will be added." -+ " next leaf starts at %d\n", -+ (int)border); -+ } -+ -+ /* -+ * if error occurs, then we break processing -+ * and turn filesystem read-only. so, index won't -+ * be inserted and tree will be in consistent -+ * state. next mount will repair buffers too -+ */ -+ -+ /* -+ * get array to track all allocated blocks -+ * we need this to handle errors and free blocks -+ * upon them -+ */ -+ ablocks = kmalloc(sizeof(long) * depth, GFP_NOFS); -+ if (!ablocks) -+ return -ENOMEM; -+ memset(ablocks, 0, sizeof(long) * depth); -+ -+ /* allocate all needed blocks */ -+ ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at); -+ newblock = 0; /* FIXME: something more sophisticated needed here */ -+ for (a = 0; newext->e_num > 0 && a < depth - at; a++) { -+ newblock = ablocks[a] = newext->e_start++; -+ newext->e_num--; -+ } -+ for (; a < depth - at; a++) { -+ newblock = ext3_new_block(handle, inode, -+ newblock + 1, 0, 0, &err); -+ if (newblock == 0) -+ goto cleanup; -+ ablocks[a] = newblock; -+ } -+ -+ /* initialize new leaf */ -+ newblock = ablocks[--a]; -+ EXT_ASSERT(newblock); -+ bh = sb_getblk(inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = (struct ext3_extent_header *) bh->b_data; -+ neh->e_num = 0; -+ neh->e_max = ext3_ext_space_block(inode); -+ ex = EXT_FIRST_EXTENT(neh); -+ -+ /* move remain of path[depth] to the new leaf */ -+ EXT_ASSERT(path[depth].p_hdr->e_num == -+ path[depth].p_hdr->e_max); -+ /* start copy from next extent */ -+ /* TODO: we could do it by single memmove */ -+ m = 0; -+ path[depth].p_ext++; -+ while (path[depth].p_ext <= -+ EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ ext_debug(inode, "move %d:%d:%d in new leaf\n", -+ path[depth].p_ext->e_block, -+ path[depth].p_ext->e_start, -+ path[depth].p_ext->e_num); -+ memmove(ex++, path[depth].p_ext++, -+ sizeof(struct ext3_extent)); -+ neh->e_num++; -+ m++; -+ } -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old leaf */ -+ if (m) { -+ if ((err = ext3_ext_get_access(handle, inode, path))) -+ goto cleanup; -+ path[depth].p_hdr->e_num -= m; -+ if ((err = ext3_ext_dirty(handle, inode, path))) -+ goto cleanup; -+ -+ } -+ -+ /* create intermediate indexes */ -+ k = depth - at - 1; -+ EXT_ASSERT(k >= 0); -+ if (k) -+ ext_debug(inode, -+ "create %d intermediate indices\n", k); -+ /* insert new index into current index block */ -+ /* current depth stored in i var */ -+ i = depth - 1; -+ while (k--) { -+ oldblock = newblock; -+ newblock = ablocks[--a]; -+ bh = sb_getblk(inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = (struct ext3_extent_header *) bh->b_data; -+ neh->e_num = 1; -+ neh->e_max = ext3_ext_space_block(inode); -+ fidx = EXT_FIRST_INDEX(neh); -+ fidx->e_block = border; -+ fidx->e_leaf = oldblock; -+ -+ ext_debug(inode, -+ "int.index at %d (block %u): %d -> %d\n", -+ i, (unsigned) newblock, -+ (int) border, -+ (int) oldblock); -+ /* copy indexes */ -+ m = 0; -+ path[i].p_idx++; -+ ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx, -+ EXT_MAX_INDEX(path[i].p_hdr)); -+ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == -+ EXT_LAST_INDEX(path[i].p_hdr)); -+ while (path[i].p_idx <= -+ EXT_MAX_INDEX(path[i].p_hdr)) { -+ ext_debug(inode, "%d: move %d:%d in new index\n", -+ i, path[i].p_idx->e_block, -+ path[i].p_idx->e_leaf); -+ memmove(++fidx, path[i].p_idx++, -+ sizeof(struct ext3_extent_idx)); -+ neh->e_num++; -+ m++; -+ } -+ -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old index */ -+ if (m) { -+ err = ext3_ext_get_access(handle,inode,path+i); -+ if (err) -+ goto cleanup; -+ path[i].p_hdr->e_num -= m; -+ err = ext3_ext_dirty(handle, inode, path + i); -+ if (err) -+ goto cleanup; -+ } -+ -+ i--; -+ } -+ -+ /* insert new index */ -+ if (!err) -+ err = ext3_ext_insert_index(handle, inode, path + at, -+ border, newblock); -+ -+cleanup: -+ if (bh) { -+ if (buffer_locked(bh)) -+ unlock_buffer(bh); -+ brelse(bh); -+ } -+ -+ if (err) { -+ /* free all allocated blocks in error case */ -+ for (i = 0; i < depth; i++) -+ if (!ablocks[i]) -+ continue; -+ ext3_free_blocks(handle, inode, ablocks[i], 1); -+ } -+ kfree(ablocks); -+ -+ return err; -+} -+ -+/* -+ * routine implements tree growing procedure: -+ * - allocates new block -+ * - moves top-level data (index block or leaf) into the new block -+ * - initialize new top-level, creating index that points to the -+ * just created block -+ */ -+static int ext3_ext_grow_indepth(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct buffer_head *bh; -+ struct ext3_ext_path *curp = path; -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ int len, err = 0; -+ long newblock; -+ -+ /* -+ * use already allocated by the called block for new root block -+ */ -+ newblock = newext->e_start++; -+ if (newext->e_num == 0) { -+ /* -+ * FIXME: if this may happen, then we have to handle -+ * possible error and free allocated block -+ */ -+ printk("grow_indepth with zero blocks\n"); -+ newblock = ext3_new_block(handle, inode, -+ newblock, 0, 0, &err); -+ } else -+ newext->e_num--; -+ -+ bh = sb_getblk(inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ ext3_std_error(inode->i_sb, err); -+ return err; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) { -+ unlock_buffer(bh); -+ goto out; -+ } -+ -+ /* move top-level index/leaf into new block */ -+ len = sizeof(struct ext3_extent_header) + -+ sizeof(struct ext3_extent) * curp->p_hdr->e_max; -+ EXT_ASSERT(len >= 0 && len < 4096); -+ memmove(bh->b_data, curp->p_hdr, len); -+ -+ /* set size of new block */ -+ neh = (struct ext3_extent_header *) bh->b_data; -+ neh->e_max = ext3_ext_space_block(inode); -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto out; -+ -+ /* create index in new top-level index: num,max,pointer */ -+ if ((err = ext3_ext_get_access(handle, inode, curp))) -+ goto out; -+ -+ curp->p_hdr->e_max = ext3_ext_space_inode_idx(inode); -+ curp->p_hdr->e_num = 1; -+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); -+ curp->p_idx->e_block = EXT_FIRST_EXTENT(path[0].p_hdr)->e_block; -+ curp->p_idx->e_leaf = newblock; -+ -+ neh = (struct ext3_extent_header *) EXT3_I(inode)->i_data; -+ fidx = EXT_FIRST_INDEX(neh); -+ ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %d\n", -+ neh->e_num, neh->e_max, fidx->e_block, fidx->e_leaf); -+ -+ EXT3_I(inode)->i_depth++; -+ err = ext3_ext_dirty(handle, inode, curp); -+out: -+ brelse(bh); -+ -+ return err; -+} -+ -+/* -+ * routine finds empty index and adds new leaf. if no free index found -+ * then it requests in-depth growing -+ */ -+static int ext3_ext_create_new_leaf(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ long newblock = newext->e_start; -+ struct ext3_ext_path *curp; -+ int depth, i, err = 0; -+ -+repeat: -+ i = depth = EXT3_I(inode)->i_depth; -+ -+ /* walk up to the tree and look for free index entry */ -+ curp = path + depth; -+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { -+ i--; -+ curp--; -+ } -+ -+ /* we use already allocated block for index block -+ * so, subsequent data blocks should be contigoues */ -+ if (EXT_HAS_FREE_INDEX(curp)) { -+ /* if we found index with free entry, then use that -+ * entry: create all needed subtree and add new leaf */ -+ err = ext3_ext_split(handle, inode, path, newext, i); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(inode, path); -+ path = ext3_ext_find_extent(inode, newext->e_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ } else { -+ /* tree is full, time to grow in depth */ -+ err = ext3_ext_grow_indepth(handle, inode, path, newext); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(inode, path); -+ path = ext3_ext_find_extent(inode, newext->e_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ -+ /* -+ * only first (depth 0 -> 1) produces free space -+ * in all other cases we have to split growed tree -+ */ -+ depth = EXT3_I(inode)->i_depth; -+ if (path[depth].p_hdr->e_num == path[depth].p_hdr->e_max) { -+ /* now we need split */ -+ goto repeat; -+ } -+ } -+ -+ if (err) -+ return err; -+ -+ /* -+ * probably we've used some blocks from extent -+ * let's allocate new block for it -+ */ -+ if (newext->e_num == 0 && !err) { -+ newext->e_start = -+ ext3_new_block(handle, inode, newblock, -+ 0, 0, &err); -+ if (newext->e_start != 0) -+ newext->e_num = 1; -+ } -+ -+ return 0; -+} -+ -+/* -+ * returns next allocated block or 0xffffffff -+ * NOTE: it consider block number from index entry as -+ * allocated block. thus, index entries have to be consistent -+ * with leafs -+ */ -+static inline unsigned ext3_ext_next_allocated_block(struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ if (depth == 0 && path->p_ext == NULL) -+ return 0xffffffff; -+ -+ /* FIXME: what if index isn't full ?! */ -+ while (depth >= 0) { -+ if (depth == path->p_depth) { -+ /* leaf */ -+ if (path[depth].p_ext != -+ EXT_LAST_EXTENT(path[depth].p_hdr)) -+ return path[depth].p_ext[1].e_block; -+ } else { -+ /* index */ -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].e_block; -+ } -+ depth--; -+ } -+ -+ return 0xffffffff; -+} -+ -+/* -+ * returns first allocated block from next leaf or 0xffffffff -+ */ -+static unsigned ext3_ext_next_leaf_block(struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ /* zero-tree has no leaf blocks at all */ -+ if (depth == 0) -+ return 0xffffffff; -+ -+ /* go to index block */ -+ depth--; -+ -+ while (depth >= 0) { -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].e_block; -+ depth--; -+ } -+ -+ return 0xffffffff; -+} -+ -+/* -+ * if leaf gets modified and modified extent is first in the leaf -+ * then we have to correct all indexes above -+ * TODO: do we need to correct tree in all cases? -+ */ -+int ext3_ext_correct_indexes(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ int depth = EXT3_I(inode)->i_depth; -+ struct ext3_extent_header *eh; -+ struct ext3_extent *ex; -+ long border; -+ int k, err = 0; -+ -+ eh = path[depth].p_hdr; -+ ex = path[depth].p_ext; -+ -+ EXT_ASSERT(ex); -+ EXT_ASSERT(eh); -+ -+ if (depth == 0) { -+ /* there is no tree at all */ -+ return 0; -+ } -+ -+ if (ex != EXT_FIRST_EXTENT(eh)) { -+ /* we correct tree if first leaf got modified only */ -+ return 0; -+ } -+ -+ /* -+ * TODO: we need correction if border is smaller then current one -+ */ -+ k = depth - 1; -+ border = path[depth].p_ext->e_block; -+ if ((err = ext3_ext_get_access(handle, inode, path + k))) -+ return err; -+ path[k].p_idx->e_block = border; -+ if ((err = ext3_ext_dirty(handle, inode, path + k))) -+ return err; -+ -+ while (k--) { -+ /* change all left-side indexes */ -+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) -+ break; -+ if ((err = ext3_ext_get_access(handle, inode, path + k))) -+ break; -+ path[k].p_idx->e_block = border; -+ if ((err = ext3_ext_dirty(handle, inode, path + k))) -+ break; -+ } -+ -+ return err; -+} -+ -+/* -+ * this routine tries to merge requsted extent into the existing -+ * extent or inserts requested extent as new one into the tree, -+ * creating new leaf in no-space case -+ */ -+int ext3_ext_insert_extent(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ int depth, len; -+ struct ext3_extent_header * eh; -+ struct ext3_extent *ex; -+ struct ext3_extent *nearex; /* nearest extent */ -+ struct ext3_ext_path *npath = NULL; -+ int err; -+ -+ depth = EXT3_I(inode)->i_depth; -+ if ((ex = path[depth].p_ext)) { -+ /* try to insert block into found extent and return */ -+ if (ex->e_block + ex->e_num == newext->e_block && -+ ex->e_start + ex->e_num == newext->e_start) { -+#ifdef AGRESSIVE_TEST -+ if (ex->e_num >= 2) -+ goto repeat; -+#endif -+ if ((err = ext3_ext_get_access(handle, inode, -+ path + depth))) -+ return err; -+ ext_debug(inode, "append %d block to %d:%d (from %d)\n", -+ newext->e_num, ex->e_block, ex->e_num, -+ ex->e_start); -+ ex->e_num += newext->e_num; -+ err = ext3_ext_dirty(handle, inode, path + depth); -+ return err; -+ } -+ } -+ -+repeat: -+ depth = EXT3_I(inode)->i_depth; -+ eh = path[depth].p_hdr; -+ if (eh->e_num == eh->e_max) { -+ /* probably next leaf has space for us? */ -+ int next = ext3_ext_next_leaf_block(inode, path); -+ if (next != 0xffffffff) { -+ ext_debug(inode, "next leaf block - %d\n", next); -+ EXT_ASSERT(!npath); -+ npath = ext3_ext_find_extent(inode, next, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ EXT_ASSERT(npath->p_depth == path->p_depth); -+ eh = npath[depth].p_hdr; -+ if (eh->e_num < eh->e_max) { -+ ext_debug(inode, -+ "next leaf has free ext(%d)\n", -+ eh->e_num); -+ path = npath; -+ goto repeat; -+ } -+ ext_debug(inode, "next leaf hasno free space(%d,%d)\n", -+ eh->e_num, eh->e_max); -+ } -+ /* -+ * there is no free space in found leaf -+ * we're gonna add new leaf in the tree -+ */ -+ err = ext3_ext_create_new_leaf(handle, inode, path, newext); -+ if (err) -+ goto cleanup; -+ goto repeat; -+ } -+ -+ nearex = path[depth].p_ext; -+ -+ if ((err = ext3_ext_get_access(handle, inode, path + depth))) -+ goto cleanup; -+ -+ if (!nearex) { -+ /* there is no extent in this leaf, create first one */ -+ ext_debug(inode, "first extent in the leaf: %d:%d:%d\n", -+ newext->e_block, newext->e_start, -+ newext->e_num); -+ path[depth].p_ext = EXT_FIRST_EXTENT(eh); -+ } else if (newext->e_block > nearex->e_block) { -+ EXT_ASSERT(newext->e_block != nearex->e_block); -+ len = EXT_MAX_EXTENT(eh) - nearex; -+ len = (len - 1) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(inode, "insert %d:%d:%d after: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->e_block, newext->e_start, newext->e_num, -+ nearex, len, nearex + 1, nearex + 2); -+ ext3_ext_check_boundary(inode, path + depth, nearex + 2, len); -+ memmove(nearex + 2, nearex + 1, len); -+ path[depth].p_ext = nearex + 1; -+ } else { -+ EXT_ASSERT(newext->e_block != nearex->e_block); -+ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(inode, "insert %d:%d:%d before: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->e_block, newext->e_start, newext->e_num, -+ nearex, len, nearex + 1, nearex + 2); -+ -+ memmove(nearex + 1, nearex, len); -+ path[depth].p_ext = nearex; -+ } -+ -+ if (!err) { -+ eh->e_num++; -+ nearex = path[depth].p_ext; -+ nearex->e_block = newext->e_block; -+ nearex->e_start = newext->e_start; -+ nearex->e_num = newext->e_num; -+ EXT_ASSERT(nearex->e_num < EXT3_BLOCKS_PER_GROUP(inode->i_sb) && -+ nearex->e_num > 0); -+ -+ /* time to correct all indexes above */ -+ err = ext3_ext_correct_indexes(handle, inode, path); -+ } -+ -+ err = ext3_ext_dirty(handle, inode, path + depth); -+ -+cleanup: -+ if (npath) { -+ ext3_ext_drop_refs(inode, npath); -+ kfree(npath); -+ } -+ -+ return err; -+} -+ -+int ext3_ext_get_block(handle_t *handle, struct inode *inode, long iblock, -+ struct buffer_head *bh_result, int create, -+ int extend_disksize) -+{ -+ struct ext3_ext_path *path; -+ int depth = EXT3_I(inode)->i_depth; -+ struct ext3_extent newex; -+ struct ext3_extent *ex; -+ int goal, newblock, err = 0; -+ -+ ext_debug(inode, "block %d requested for inode %u, bh_result 0x%p\n", -+ (int) iblock, (unsigned) inode->i_ino, bh_result); -+ bh_result->b_state &= ~(1UL << BH_New); -+ -+ down(&EXT3_I(inode)->i_ext_sem); -+ -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(inode, iblock, NULL); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ goto out2; -+ } -+ -+ if ((ex = path[depth].p_ext)) { -+ /* if found exent covers block, simple return it */ -+ if (iblock >= ex->e_block && iblock < ex->e_block + ex->e_num) { -+ newblock = iblock - ex->e_block + ex->e_start; -+ ext_debug(inode, "%d fit into %d:%d -> %d\n", -+ (int) iblock, ex->e_block, ex->e_num, -+ newblock); -+ goto out; -+ } -+ } -+ -+ /* -+ * we couldn't try to create block if create flag is zero -+ */ -+ if (!create) -+ goto out2; -+ -+ /* allocate new block */ -+ goal = ext3_ext_find_goal(inode, path); -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, &err); -+ if (!newblock) -+ goto out2; -+ ext_debug(inode, "allocate new block: goal %d, found %d\n", -+ goal, newblock); -+ -+ /* try to insert new extent into found leaf and return */ -+ newex.e_block = iblock; -+ newex.e_start = newblock; -+ newex.e_num = 1; -+ err = ext3_ext_insert_extent(handle, inode, path, &newex); -+ if (err) -+ goto out2; -+ -+ /* previous routine could use block we allocated */ -+ newblock = newex.e_start; -+ bh_result->b_state |= (1UL << BH_New); -+ -+out: -+ ext3_ext_show_leaf(inode, path); -+ bh_result->b_dev = inode->i_dev; -+ bh_result->b_blocknr = newblock; -+ bh_result->b_state |= (1UL << BH_Mapped); -+out2: -+ ext3_ext_drop_refs(inode, path); -+ kfree(path); -+ up(&EXT3_I(inode)->i_ext_sem); -+ -+ return err; -+} -+ -+/* -+ * returns 1 if current index have to be freed (even partial) -+ */ -+static int ext3_ext_more_to_truncate(struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ EXT_ASSERT(path->p_idx); -+ -+ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) -+ return 0; -+ -+ /* -+ * if truncate on deeper level happened it it wasn't partial -+ * so we have to consider current index for truncation -+ */ -+ if (path->p_hdr->e_num == path->p_block) -+ return 0; -+ -+ /* -+ * put actual number of indexes to know is this number got -+ * changed at the next iteration -+ */ -+ path->p_block = path->p_hdr->e_num; -+ -+ return 1; -+} -+ -+/* -+ * routine removes index from the index block -+ * it's used in truncate case only. thus all requests are for -+ * last index in the block only -+ */ -+int ext3_ext_remove_index(handle_t *handle, struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ struct buffer_head *bh; -+ int err; -+ -+ /* free index block */ -+ path--; -+ EXT_ASSERT(path->p_hdr->e_num); -+ if ((err = ext3_ext_get_access(handle, inode, path))) -+ return err; -+ path->p_hdr->e_num--; -+ if ((err = ext3_ext_dirty(handle, inode, path))) -+ return err; -+ bh = sb_get_hash_table(inode->i_sb, path->p_idx->e_leaf); -+ ext3_forget(handle, 0, inode, bh, path->p_idx->e_leaf); -+ ext3_free_blocks(handle, inode, path->p_idx->e_leaf, 1); -+ -+ ext_debug(inode, "index is empty, remove it, free block %d\n", -+ path->p_idx->e_leaf); -+ return err; -+} -+ -+/* -+ * returns 1 if current extent needs to be freed (even partial) -+ * instead, returns 0 -+ */ -+int ext3_ext_more_leaves_to_truncate(struct inode *inode, -+ struct ext3_ext_path *path) -+{ -+ unsigned blocksize = inode->i_sb->s_blocksize; -+ struct ext3_extent *ex = path->p_ext; -+ int last_block; -+ -+ EXT_ASSERT(ex); -+ -+ /* is there leave in the current leaf? */ -+ if (ex < EXT_FIRST_EXTENT(path->p_hdr)) -+ return 0; -+ -+ last_block = (inode->i_size + blocksize-1) -+ >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); -+ -+ if (last_block >= ex->e_block + ex->e_num) -+ return 0; -+ -+ /* seems it extent have to be freed */ -+ return 1; -+} -+ -+handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) -+{ -+ int err; -+ -+ if (handle->h_buffer_credits > needed) -+ return handle; -+ if (!ext3_journal_extend(handle, needed)) -+ return handle; -+ err = ext3_journal_restart(handle, needed); -+ -+ return handle; -+} -+ -+/* -+ * this routine calculate max number of blocks to be modified -+ * while freeing extent and is intended to be used in truncate path -+ */ -+static int ext3_ext_calc_credits(struct inode *inode, -+ struct ext3_ext_path *path, -+ int num) -+{ -+ int depth = EXT3_I(inode)->i_depth; -+ int needed; -+ -+ /* -+ * extent couldn't cross group, so we will modify -+ * single bitmap block and single group descriptor -+ */ -+ needed = 3; -+ -+ /* -+ * if this is last extent in a leaf, then we have to -+ * free leaf block and remove pointer from index above. -+ * that pointer could be last in index block, so we'll -+ * have to remove it too. this way we could modify/free -+ * the whole path + root index (inode stored) will be -+ * modified -+ */ -+ if (!path || (num == path->p_ext->e_num && -+ path->p_ext == EXT_FIRST_EXTENT(path->p_hdr))) -+ needed += (depth * (EXT3_ALLOC_NEEDED + 1)) + 1; -+ -+ return needed; -+} -+ -+/* -+ * core of the truncate procedure: -+ * - calculated what part of each extent in the requested leaf -+ * need to be freed -+ * - frees and forgets these blocks -+ * -+ * TODO: we could optimize and free several extents during -+ * single journal_restart()-journal_restart() cycle -+ */ -+static int ext3_ext_truncate_leaf(handle_t *handle, -+ struct inode *inode, -+ struct ext3_ext_path *path, -+ int depth) -+{ -+ unsigned blocksize = inode->i_sb->s_blocksize; -+ int last_block; -+ int i, err = 0, sf, num; -+ -+ ext_debug(inode, "level %d - leaf\n", depth); -+ if (!path->p_hdr) -+ path->p_hdr = -+ (struct ext3_extent_header *) path->p_bh->b_data; -+ -+ EXT_ASSERT(path->p_hdr->e_num <= path->p_hdr->e_max); -+ -+ last_block = (inode->i_size + blocksize-1) -+ >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); -+ path->p_ext = EXT_LAST_EXTENT(path->p_hdr); -+ while (ext3_ext_more_leaves_to_truncate(inode, path)) { -+ -+ /* what part of extent have to be freed? */ -+ sf = last_block > path->p_ext->e_block ? -+ last_block : path->p_ext->e_block; -+ -+ /* number of blocks from extent to be freed */ -+ num = path->p_ext->e_block + path->p_ext->e_num - sf; -+ -+ /* calc physical first physical block to be freed */ -+ sf = path->p_ext->e_start + (sf - path->p_ext->e_block); -+ -+ i = ext3_ext_calc_credits(inode, path, num); -+ handle = ext3_ext_journal_restart(handle, i); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ ext_debug(inode, "free extent %d:%d:%d -> free %d:%d\n", -+ path->p_ext->e_block, path->p_ext->e_start, -+ path->p_ext->e_num, sf, num); -+ for (i = 0; i < num; i++) { -+ struct buffer_head *bh = -+ sb_get_hash_table(inode->i_sb, sf + i); -+ ext3_forget(handle, 0, inode, bh, sf + i); -+ } -+ ext3_free_blocks(handle, inode, sf, num); -+ -+ /* collect extents usage stats */ -+ spin_lock(&EXT3_SB(inode->i_sb)->s_ext_lock); -+ EXT3_SB(inode->i_sb)->s_ext_extents++; -+ EXT3_SB(inode->i_sb)->s_ext_blocks += num; -+ spin_unlock(&EXT3_SB(inode->i_sb)->s_ext_lock); -+ -+ /* reduce extent */ -+ if ((err = ext3_ext_get_access(handle, inode, path))) -+ return err; -+ path->p_ext->e_num -= num; -+ if (path->p_ext->e_num == 0) -+ path->p_hdr->e_num--; -+ if ((err = ext3_ext_dirty(handle, inode, path))) -+ return err; -+ -+ path->p_ext--; -+ } -+ -+ /* if this leaf is free, then we should -+ * remove it from index block above */ -+ if (path->p_hdr->e_num == 0 && depth > 0) -+ err = ext3_ext_remove_index(handle, inode, path); -+ -+ return err; -+} -+ -+static void ext3_ext_collect_stats(struct inode *inode) -+{ -+ int depth; -+ -+ /* skip inodes with old good bitmap */ -+ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) -+ return; -+ -+ /* collect on full truncate only */ -+ if (inode->i_size) -+ return; -+ -+ depth = EXT3_I(inode)->i_depth; -+ if (depth < EXT3_SB(inode->i_sb)->s_ext_mindepth) -+ EXT3_SB(inode->i_sb)->s_ext_mindepth = depth; -+ if (depth > EXT3_SB(inode->i_sb)->s_ext_maxdepth) -+ EXT3_SB(inode->i_sb)->s_ext_maxdepth = depth; -+ EXT3_SB(inode->i_sb)->s_ext_sum += depth; -+ EXT3_SB(inode->i_sb)->s_ext_count++; -+ -+} -+ -+void ext3_ext_truncate(struct inode * inode) -+{ -+ struct address_space *mapping = inode->i_mapping; -+ struct ext3_ext_path *path; -+ struct page * page; -+ handle_t *handle; -+ int i, depth, err = 0; -+ -+ ext3_ext_collect_stats(inode); -+ -+ /* -+ * We have to lock the EOF page here, because lock_page() nests -+ * outside journal_start(). -+ */ -+ if ((inode->i_size & (inode->i_sb->s_blocksize - 1)) == 0) { -+ /* Block boundary? Nothing to do */ -+ page = NULL; -+ } else { -+ page = grab_cache_page(mapping, -+ inode->i_size >> PAGE_CACHE_SHIFT); -+ if (!page) -+ return; -+ } -+ -+ /* -+ * probably first extent we're gonna free will be last in block -+ */ -+ i = ext3_ext_calc_credits(inode, NULL, 0); -+ handle = ext3_journal_start(inode, i); -+ if (IS_ERR(handle)) { -+ if (page) { -+ clear_highpage(page); -+ flush_dcache_page(page); -+ unlock_page(page); -+ page_cache_release(page); -+ } -+ return; -+ } -+ -+ if (page) -+ ext3_block_truncate_page(handle, mapping, inode->i_size, page, -+ inode->i_sb->s_blocksize); -+ -+ down(&EXT3_I(inode)->i_ext_sem); -+ -+ /* -+ * TODO: optimization is possible here -+ * probably we need not scaning at all, -+ * because page truncation is enough -+ */ -+ if (ext3_orphan_add(handle, inode)) -+ goto out_stop; -+ -+ /* we have to know where to truncate from in crash case */ -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_mark_inode_dirty(handle, inode); -+ -+ /* -+ * we start scanning from right side freeing all the blocks -+ * after i_size and walking into the deep -+ */ -+ i = 0; -+ depth = EXT3_I(inode)->i_depth; -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); -+ if (IS_ERR(path)) { -+ ext3_error(inode->i_sb, "ext3_ext_truncate", -+ "Can't allocate path array"); -+ goto out_stop; -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ -+ path[i].p_hdr = (struct ext3_extent_header *) EXT3_I(inode)->i_data; -+ while (i >= 0 && err == 0) { -+ if (i == depth) { -+ /* this is leaf block */ -+ err = ext3_ext_truncate_leaf(handle, inode, -+ path + i, i); -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ continue; -+ } -+ -+ /* this is index block */ -+ if (!path[i].p_hdr) { -+ path[i].p_hdr = -+ (struct ext3_extent_header *) path[i].p_bh->b_data; -+ ext_debug(inode, "initialize header\n"); -+ } -+ -+ EXT_ASSERT(path[i].p_hdr->e_num <= path[i].p_hdr->e_max); -+ -+ if (!path[i].p_idx) { -+ /* this level hasn't touched yet */ -+ path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); -+ path[i].p_block = path[i].p_hdr->e_num + 1; -+ ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n", -+ path[i].p_hdr, path[i].p_hdr->e_num); -+ } else { -+ /* we've already was here, see at next index */ -+ path[i].p_idx--; -+ } -+ -+ ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n", -+ i, EXT_FIRST_INDEX(path[i].p_hdr), -+ path[i].p_idx); -+ if (ext3_ext_more_to_truncate(inode, path + i)) { -+ /* go to the next level */ -+ ext_debug(inode, "move to level %d (block %d)\n", i+1, -+ path[i].p_idx->e_leaf); -+ memset(path + i + 1, 0, sizeof(*path)); -+ path[i+1].p_bh = sb_bread(inode->i_sb, -+ path[i].p_idx->e_leaf); -+ if (!path[i+1].p_bh) { -+ /* should we reset i_size? */ -+ err = -EIO; -+ break; -+ } -+ i++; -+ } else { -+ /* we finish processing this index, go up */ -+ if (path[i].p_hdr->e_num == 0 && i > 0) { -+ /* index is empty, remove it -+ * handle must be already prepared by the -+ * truncate_leaf() -+ */ -+ err = ext3_ext_remove_index(handle, inode, -+ path + i); -+ } -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ ext_debug(inode, "return to level %d\n", i); -+ } -+ } -+ -+ /* TODO: flexible tree reduction should be here */ -+ if (path->p_hdr->e_num == 0) { -+ /* -+ * truncate to zero freed all the tree -+ * so, we need to correct i_depth -+ */ -+ EXT3_I(inode)->i_depth = 0; -+ path->p_hdr->e_max = 0; -+ ext3_mark_inode_dirty(handle, inode); -+ } -+ -+ kfree(path); -+ -+ /* In a multi-transaction truncate, we only make the final -+ * transaction synchronous */ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+out_stop: -+ /* -+ * If this was a simple ftruncate(), and the file will remain alive -+ * then we need to clear up the orphan record which we created above. -+ * However, if this was a real unlink then we were called by -+ * ext3_delete_inode(), and we allow that function to clean up the -+ * orphan info for us. -+ */ -+ if (inode->i_nlink) -+ ext3_orphan_del(handle, inode); -+ -+ up(&EXT3_I(inode)->i_ext_sem); -+ ext3_journal_stop(handle, inode); -+} -+ -+/* -+ * this routine calculate max number of blocks we could modify -+ * in order to allocate new block for an inode -+ */ -+int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ int depth = ei->i_depth + 1; -+ int needed; -+ -+ /* -+ * the worste case we're expecting is creation of the -+ * new root (growing in depth) with index splitting -+ * for splitting we have to consider depth + 1 because -+ * previous growing could increase it -+ */ -+ -+ /* -+ * growing in depth: -+ * block allocation + new root + old root -+ */ -+ needed = EXT3_ALLOC_NEEDED + 2; -+ -+ /* index split. we may need: -+ * allocate intermediate indexes and new leaf -+ * change two blocks at each level, but root -+ * modify root block (inode) -+ */ -+ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; -+ -+ /* caller want to allocate num blocks */ -+ needed *= num; -+ -+#ifdef CONFIG_QUOTA -+ /* -+ * FIXME: real calculation should be here -+ * it depends on blockmap format of qouta file -+ */ -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return needed; -+} -+ -+/* -+ * called at mount time -+ */ -+void ext3_ext_init(struct super_block *sb) -+{ -+ /* -+ * possible initialization would be here -+ */ -+ -+ if (test_opt(sb, EXTENTS)) -+ printk("EXT3-fs: file extents enabled\n"); -+ spin_lock_init(&EXT3_SB(sb)->s_ext_lock); -+} -+ -+/* -+ * called at umount time -+ */ -+void ext3_ext_release(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ /* show collected stats */ -+ if (sbi->s_ext_count && sbi->s_ext_extents) -+ printk("EXT3-fs: min depth - %d, max depth - %d, " -+ "ave. depth - %d, ave. blocks/extent - %d\n", -+ sbi->s_ext_mindepth, -+ sbi->s_ext_maxdepth, -+ sbi->s_ext_sum / sbi->s_ext_count, -+ sbi->s_ext_blocks / sbi->s_ext_extents); -+} -Index: linux-2.4.18-chaos/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.18-chaos.orig/fs/ext3/ialloc.c 2004-01-13 16:10:23.000000000 +0300 -+++ linux-2.4.18-chaos/fs/ext3/ialloc.c 2004-01-13 16:11:00.000000000 +0300 -@@ -573,6 +573,10 @@ - ei->i_prealloc_count = 0; - #endif - ei->i_block_group = i; -+ if (test_opt(sb, EXTENTS)) -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ ei->i_depth = 0; -+ sema_init(&ei->i_ext_sem, 1); - - if (ei->i_flags & EXT3_SYNC_FL) - inode->i_flags |= S_SYNC; -Index: linux-2.4.18-chaos/fs/ext3/inode.c -=================================================================== ---- linux-2.4.18-chaos.orig/fs/ext3/inode.c 2004-01-13 16:10:23.000000000 +0300 -+++ linux-2.4.18-chaos/fs/ext3/inode.c 2004-01-13 16:11:00.000000000 +0300 -@@ -842,6 +842,15 @@ - goto reread; - } - -+static inline int -+ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -+ struct buffer_head *bh, int create, int extend_disksize) -+{ -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_get_block(handle, inode, block, bh, create, 1); -+ return ext3_get_block_handle(handle, inode, block, bh, create, 1); -+} -+ - /* - * The BKL is not held on entry here. - */ -@@ -855,7 +864,7 @@ - handle = ext3_journal_current_handle(); - J_ASSERT(handle != 0); - } -- ret = ext3_get_block_handle(handle, inode, iblock, -+ ret = ext3_get_block_wrap(handle, inode, iblock, - bh_result, create, 1); - return ret; - } -@@ -882,7 +891,7 @@ - } - } - if (ret == 0) -- ret = ext3_get_block_handle(handle, inode, iblock, -+ ret = ext3_get_block_wrap(handle, inode, iblock, - bh_result, create, 0); - if (ret == 0) - bh_result->b_size = (1 << inode->i_blkbits); -@@ -904,7 +913,7 @@ - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); -+ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); - if (!*errp && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -@@ -1520,7 +1529,7 @@ - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ --static int ext3_block_truncate_page(handle_t *handle, -+int ext3_block_truncate_page(handle_t *handle, - struct address_space *mapping, loff_t from, - struct page *page, unsigned blocksize) - { -@@ -1998,6 +2007,9 @@ - - ext3_discard_prealloc(inode); - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_truncate(inode); -+ - blocksize = inode->i_sb->s_blocksize; - last_block = (inode->i_size + blocksize-1) - >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); -@@ -2436,6 +2448,8 @@ - ei->i_prealloc_count = 0; - #endif - ei->i_block_group = iloc.block_group; -+ ei->i_depth = raw_inode->osd2.linux2.l_i_depth; -+ sema_init(&ei->i_ext_sem, 1); - - /* - * NOTE! The in-memory inode i_data array is in little-endian order -@@ -2556,6 +2570,7 @@ - raw_inode->i_fsize = 0; - } - #endif -+ raw_inode->osd2.linux2.l_i_depth = ei->i_depth; - raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); - if (!S_ISREG(inode->i_mode)) { - raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); -@@ -2759,6 +2774,9 @@ - int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_writepage_trans_blocks(inode, bpp); -+ - if (ext3_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else -@@ -3082,7 +3100,7 @@ - - /* alloc blocks one by one */ - for (i = 0; i < nblocks; i++) { -- ret = ext3_get_block_handle(handle, inode, blocks[i], -+ ret = ext3_get_block_wrap(handle, inode, blocks[i], - &bh_tmp, 1, 1); - if (ret) - break; -@@ -3158,7 +3176,7 @@ - if (blocks[i] != 0) - continue; - -- rc = ext3_get_block_handle(handle, inode, iblock, &bh, 1, 1); -+ rc = ext3_get_block_wrap(handle, inode, iblock, &bh, 1, 1); - if (rc) { - printk(KERN_INFO "ext3_map_inode_page: error %d " - "allocating block %ld\n", rc, iblock); -Index: linux-2.4.18-chaos/fs/ext3/Makefile -=================================================================== ---- linux-2.4.18-chaos.orig/fs/ext3/Makefile 2004-01-13 16:10:23.000000000 +0300 -+++ linux-2.4.18-chaos/fs/ext3/Makefile 2004-01-13 16:11:00.000000000 +0300 -@@ -12,7 +12,8 @@ - export-objs := ext3-exports.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ -- ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o -+ ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o \ -+ extents.o - obj-m := $(O_TARGET) - - include $(TOPDIR)/Rules.make -Index: linux-2.4.18-chaos/fs/ext3/super.c -=================================================================== ---- linux-2.4.18-chaos.orig/fs/ext3/super.c 2004-01-13 16:10:23.000000000 +0300 -+++ linux-2.4.18-chaos/fs/ext3/super.c 2004-01-13 16:11:23.000000000 +0300 -@@ -622,6 +622,7 @@ - - J_ASSERT(sbi->s_delete_inodes == 0); - -+ ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -743,6 +744,12 @@ - else - #endif - -+ if (!strcmp (this_char, "extents")) -+ set_opt (sbi->s_mount_opt, EXTENTS); -+ else -+ if (!strcmp (this_char, "extdebug")) -+ set_opt (sbi->s_mount_opt, EXTDEBUG); -+ else - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -1470,6 +1477,7 @@ - test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": - test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": - "writeback"); -+ ext3_ext_init(sb); - - return sb; - -Index: linux-2.4.18-chaos/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.18-chaos.orig/include/linux/ext3_fs.h 2004-01-13 16:10:23.000000000 +0300 -+++ linux-2.4.18-chaos/include/linux/ext3_fs.h 2004-01-13 16:11:00.000000000 +0300 -@@ -183,6 +183,7 @@ - #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */ - #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ - #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ -+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ - - #define EXT3_FL_USER_VISIBLE 0x00005FFF /* User visible flags */ - #define EXT3_FL_USER_MODIFIABLE 0x000000FF /* User modifiable flags */ -@@ -243,7 +244,7 @@ - struct { - __u8 l_i_frag; /* Fragment number */ - __u8 l_i_fsize; /* Fragment size */ -- __u16 i_pad1; -+ __u16 l_i_depth; - __u16 l_i_uid_high; /* these 2 fields */ - __u16 l_i_gid_high; /* were reserved2[0] */ - __u32 l_i_reserved2; -@@ -324,6 +325,8 @@ - #define EXT3_MOUNT_IOPEN 0x8000 /* Allow access via iopen */ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x10000 /* Make iopen world-readable */ - #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ -+#define EXT3_MOUNT_EXTENTS 0x40000 /* Extents support */ -+#define EXT3_MOUNT_EXTDEBUG 0x80000 /* Extents debug */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -663,6 +666,12 @@ - extern void ext3_dirty_inode(struct inode *); - extern int ext3_change_inode_journal_flag(struct inode *, int); - extern void ext3_truncate (struct inode *); -+extern int ext3_block_truncate_page(handle_t *handle, -+ struct address_space *mapping, loff_t from, -+ struct page *page, unsigned blocksize); -+extern int ext3_forget(handle_t *handle, int is_metadata, -+ struct inode *inode, struct buffer_head *bh, -+ int blocknr); - #ifdef EXT3_DELETE_THREAD - extern void ext3_truncate_thread(struct inode *inode); - #endif -@@ -722,6 +731,13 @@ - /* symlink.c */ - extern struct inode_operations ext3_fast_symlink_inode_operations; - -+/* extents.c */ -+extern int ext3_ext_writepage_trans_blocks(struct inode *, int); -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+ struct buffer_head *, int, int); -+extern void ext3_ext_truncate(struct inode *); -+extern void ext3_ext_init(struct super_block *); -+extern void ext3_ext_release(struct super_block *); - - #endif /* __KERNEL__ */ - -Index: linux-2.4.18-chaos/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.4.18-chaos.orig/include/linux/ext3_fs_i.h 2001-11-22 22:46:19.000000000 +0300 -+++ linux-2.4.18-chaos/include/linux/ext3_fs_i.h 2004-01-13 16:11:00.000000000 +0300 -@@ -73,6 +73,10 @@ - * by other means, so we have truncate_sem. - */ - struct rw_semaphore truncate_sem; -+ -+ /* extents-related data */ -+ struct semaphore i_ext_sem; -+ __u16 i_depth; - }; - - #endif /* _LINUX_EXT3_FS_I */ -Index: linux-2.4.18-chaos/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.4.18-chaos.orig/include/linux/ext3_fs_sb.h 2004-01-13 16:10:21.000000000 +0300 -+++ linux-2.4.18-chaos/include/linux/ext3_fs_sb.h 2004-01-13 16:11:00.000000000 +0300 -@@ -84,6 +84,16 @@ - wait_queue_head_t s_delete_thread_queue; - wait_queue_head_t s_delete_waiter_queue; - #endif -+ -+ /* extents */ -+ int s_ext_debug; -+ int s_ext_mindepth; -+ int s_ext_maxdepth; -+ int s_ext_sum; -+ int s_ext_count; -+ spinlock_t s_ext_lock; -+ int s_ext_extents; -+ int s_ext_blocks; - }; - - #endif /* _LINUX_EXT3_FS_SB */ diff --git a/lustre/kernel_patches/patches/ext3-extents-oflag-2.4.18-chaos.patch b/lustre/kernel_patches/patches/ext3-extents-oflag-2.4.18-chaos.patch deleted file mode 100644 index 53e00a4..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-oflag-2.4.18-chaos.patch +++ /dev/null @@ -1,310 +0,0 @@ - fs/ext3/ialloc.c | 5 +++-- - fs/ext3/inode.c | 2 +- - fs/ext3/namei.c | 43 +++++++++++++++++++++++++++++++++++++------ - include/asm-alpha/fcntl.h | 1 + - include/asm-arm/fcntl.h | 1 + - include/asm-cris/fcntl.h | 1 + - include/asm-i386/fcntl.h | 1 + - include/asm-ia64/fcntl.h | 1 + - include/asm-m68k/fcntl.h | 1 + - include/asm-mips/fcntl.h | 1 + - include/asm-mips64/fcntl.h | 1 + - include/asm-parisc/fcntl.h | 1 + - include/asm-ppc/fcntl.h | 1 + - include/asm-s390/fcntl.h | 1 + - include/asm-s390x/fcntl.h | 1 + - include/asm-sh/fcntl.h | 1 + - include/asm-sparc/fcntl.h | 1 + - include/asm-sparc64/fcntl.h | 1 + - include/linux/ext3_fs.h | 2 +- - 19 files changed, 57 insertions(+), 10 deletions(-) - ---- linux-2.4.18-chaos/fs/ext3/ialloc.c~ext3-extents-oflag-2.4.18-chaos 2003-10-08 10:29:12.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/ialloc.c 2003-10-08 10:29:30.000000000 +0400 -@@ -331,7 +331,8 @@ int ext3_itable_block_used(struct super_ - */ - struct inode * ext3_new_inode (handle_t *handle, - const struct inode * dir, int mode, -- unsigned long goal) -+ unsigned long goal, -+ struct lookup_intent *it) - { - struct super_block * sb; - struct buffer_head * bh; -@@ -573,7 +574,7 @@ repeat: - ei->i_prealloc_count = 0; - #endif - ei->i_block_group = i; -- if (test_opt(sb, EXTENTS)) -+ if (test_opt(sb, EXTENTS) && it && (it->it_flags & O_EXTENTS)) - EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; - ei->i_depth = 0; - sema_init(&ei->i_ext_sem, 1); ---- linux-2.4.18-chaos/fs/ext3/namei.c~ext3-extents-oflag-2.4.18-chaos 2003-10-08 10:29:11.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/namei.c 2003-10-08 10:36:33.000000000 +0400 -@@ -1204,7 +1204,9 @@ static int ext3_add_nondir(handle_t *han - } - - static struct inode * ext3_new_inode_wantedi(handle_t *handle, struct inode *dir, -- int mode, struct dentry *dentry) -+ int mode, struct dentry *dentry, -+ struct lookup_intent *it) -+ - { - unsigned long inum = 0; - -@@ -1213,7 +1215,7 @@ static struct inode * ext3_new_inode_wan - (struct dentry_params *) dentry->d_fsdata; - inum = param->p_inum; - } -- return ext3_new_inode(handle, dir, mode, inum); -+ return ext3_new_inode(handle, dir, mode, inum, it); - } - - /* -@@ -1238,7 +1240,35 @@ static int ext3_create (struct inode * d - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry, NULL); -+ err = PTR_ERR(inode); -+ if (!IS_ERR(inode)) { -+ inode->i_op = &ext3_file_inode_operations; -+ inode->i_fop = &ext3_file_operations; -+ inode->i_mapping->a_ops = &ext3_aops; -+ err = ext3_add_nondir(handle, dentry, inode); -+ ext3_mark_inode_dirty(handle, inode); -+ } -+ ext3_journal_stop(handle, dir); -+ return err; -+} -+ -+static int ext3_create_it (struct inode * dir, struct dentry * dentry, int mode, -+ struct lookup_intent *it) -+{ -+ handle_t *handle; -+ struct inode * inode; -+ int err; -+ -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ if (IS_SYNC(dir)) -+ handle->h_sync = 1; -+ -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry, it); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ext3_file_inode_operations; -@@ -1266,7 +1296,7 @@ static int ext3_mknod (struct inode * di - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry, NULL); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - init_special_inode(inode, mode, rdev); -@@ -1296,7 +1326,7 @@ static int ext3_mkdir(struct inode * dir - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode_wantedi (handle, dir, S_IFDIR | mode, dentry); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFDIR|mode, dentry, NULL); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -1688,7 +1718,7 @@ static int ext3_symlink (struct inode * - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode_wantedi (handle, dir, S_IFLNK|S_IRWXUGO, dentry); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFLNK|S_IRWXUGO, dentry,NULL); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -1892,6 +1922,7 @@ end_rename: - * directories can handle most operations... - */ - struct inode_operations ext3_dir_inode_operations = { -+ create_it: ext3_create_it, /* BKL held */ - create: ext3_create, /* BKL held */ - lookup: ext3_lookup, /* BKL held */ - link: ext3_link, /* BKL held */ ---- linux-2.4.18-chaos/include/asm-alpha/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:07.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/asm-alpha/fcntl.h 2003-10-08 10:29:30.000000000 +0400 -@@ -22,6 +22,7 @@ - #define O_LARGEFILE 0400000 /* will be set by the kernel on every open */ - #define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */ - #define O_DIRECT 02000000 /* direct disk access - should check with OSF/1 */ -+#define O_EXTENTS 04000000 /* create file with extents if possible */ - - #define F_DUPFD 0 /* dup */ - #define F_GETFD 1 /* get close_on_exec */ ---- linux-2.4.18-chaos/include/asm-arm/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:07.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/asm-arm/fcntl.h 2003-10-08 10:29:30.000000000 +0400 -@@ -21,6 +21,7 @@ - #define O_DIRECT 0200000 /* direct disk access hint - currently ignored */ - #define O_LARGEFILE 0400000 - #define O_ATOMICLOOKUP 01000000 -+#define O_EXTENTS 02000000 /* create file with extents if possible */ - - #define F_DUPFD 0 /* dup */ - #define F_GETFD 1 /* get close_on_exec */ ---- linux-2.4.18-chaos/include/asm-cris/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2001-02-09 03:32:44.000000000 +0300 -+++ linux-2.4.18-chaos-alexey/include/asm-cris/fcntl.h 2003-10-08 10:29:30.000000000 +0400 -@@ -22,6 +22,7 @@ - #define O_LARGEFILE 0100000 - #define O_DIRECTORY 0200000 /* must be a directory */ - #define O_NOFOLLOW 0400000 /* don't follow links */ -+#define O_EXTENTS 01000000 /* create file with extents if possible */ - - #define F_DUPFD 0 /* dup */ - #define F_GETFD 1 /* get f_flags */ ---- linux-2.4.18-chaos/include/asm-i386/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:09.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/asm-i386/fcntl.h 2003-10-08 10:29:30.000000000 +0400 -@@ -21,6 +21,7 @@ - #define O_DIRECTORY 0200000 /* must be a directory */ - #define O_NOFOLLOW 0400000 /* don't follow links */ - #define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */ -+#define O_EXTENTS 02000000 /* create file with extents if possible */ - - #define F_DUPFD 0 /* dup */ - #define F_GETFD 1 /* get close_on_exec */ ---- linux-2.4.18-chaos/include/asm-ia64/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:09.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/asm-ia64/fcntl.h 2003-10-08 10:29:30.000000000 +0400 -@@ -29,6 +29,7 @@ - #define O_DIRECTORY 0200000 /* must be a directory */ - #define O_NOFOLLOW 0400000 /* don't follow links */ - #define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */ -+#define O_EXTENTS 02000000 /* create file with extents if possible */ - - #define F_DUPFD 0 /* dup */ - #define F_GETFD 1 /* get close_on_exec */ ---- linux-2.4.18-chaos/include/asm-m68k/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2000-11-28 05:00:49.000000000 +0300 -+++ linux-2.4.18-chaos-alexey/include/asm-m68k/fcntl.h 2003-10-08 10:29:30.000000000 +0400 -@@ -20,6 +20,7 @@ - #define O_NOFOLLOW 0100000 /* don't follow links */ - #define O_DIRECT 0200000 /* direct disk access hint - currently ignored */ - #define O_LARGEFILE 0400000 -+#define O_EXTENTS 01000000 /* create file with extents if possible */ - - #define F_DUPFD 0 /* dup */ - #define F_GETFD 1 /* get close_on_exec */ ---- linux-2.4.18-chaos/include/asm-mips64/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:15.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/asm-mips64/fcntl.h 2003-10-08 10:29:30.000000000 +0400 -@@ -27,6 +27,7 @@ - #define O_DIRECTORY 0x10000 /* must be a directory */ - #define O_NOFOLLOW 0x20000 /* don't follow links */ - #define O_ATOMICLOOKUP 0x40000 -+#define O_EXTENTS 0x80000 /* create file with extents if possible */ - - #define O_NDELAY O_NONBLOCK - ---- linux-2.4.18-chaos/include/asm-mips/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:14.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/asm-mips/fcntl.h 2003-10-08 10:29:30.000000000 +0400 -@@ -27,6 +27,7 @@ - #define O_DIRECTORY 0x10000 /* must be a directory */ - #define O_NOFOLLOW 0x20000 /* don't follow links */ - #define O_ATOMICLOOKUP 0x40000 -+#define O_EXTENTS 02000000 /* create file with extents if possible */ - - #define O_NDELAY O_NONBLOCK - ---- linux-2.4.18-chaos/include/asm-parisc/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2000-12-05 23:29:39.000000000 +0300 -+++ linux-2.4.18-chaos-alexey/include/asm-parisc/fcntl.h 2003-10-08 10:29:30.000000000 +0400 -@@ -19,6 +19,7 @@ - #define O_NOCTTY 00400000 /* not fcntl */ - #define O_DSYNC 01000000 /* HPUX only */ - #define O_RSYNC 02000000 /* HPUX only */ -+#define O_EXTENTS 04000000 /* create file with extents if possible */ - - #define FASYNC 00020000 /* fcntl, for BSD compatibility */ - #define O_DIRECT 00040000 /* direct disk access hint - currently ignored */ ---- linux-2.4.18-chaos/include/asm-ppc/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:15.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/asm-ppc/fcntl.h 2003-10-08 10:29:30.000000000 +0400 -@@ -24,6 +24,7 @@ - #define O_LARGEFILE 0200000 - #define O_DIRECT 0400000 /* direct disk access hint */ - #define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */ -+#define O_EXTENT 02000000 /* create file with extents if possible */ - - #define F_DUPFD 0 /* dup */ - #define F_GETFD 1 /* get close_on_exec */ ---- linux-2.4.18-chaos/include/asm-s390/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:15.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/asm-s390/fcntl.h 2003-10-08 10:29:30.000000000 +0400 -@@ -28,6 +28,7 @@ - #define O_DIRECTORY 0200000 /* must be a directory */ - #define O_NOFOLLOW 0400000 /* don't follow links */ - #define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */ -+#define O_EXTENTS 02000000 /* create file with extents if possible */ - - #define F_DUPFD 0 /* dup */ - #define F_GETFD 1 /* get close_on_exec */ ---- linux-2.4.18-chaos/include/asm-s390x/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:15.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/asm-s390x/fcntl.h 2003-10-08 10:29:30.000000000 +0400 -@@ -28,6 +28,7 @@ - #define O_DIRECTORY 0200000 /* must be a directory */ - #define O_NOFOLLOW 0400000 /* don't follow links */ - #define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */ -+#define O_EXTENTS 02000000 /* create file with extents if possible */ - - #define F_DUPFD 0 /* dup */ - #define F_GETFD 1 /* get close_on_exec */ ---- linux-2.4.18-chaos/include/asm-sh/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:15.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/asm-sh/fcntl.h 2003-10-08 10:29:30.000000000 +0400 -@@ -21,6 +21,7 @@ - #define O_DIRECTORY 0200000 /* must be a directory */ - #define O_NOFOLLOW 0400000 /* don't follow links */ - #define O_ATOMICLOOKUP 01000000 -+#define O_EXTENTS 02000000 /* create file with extents if possible */ - - #define F_DUPFD 0 /* dup */ - #define F_GETFD 1 /* get close_on_exec */ ---- linux-2.4.18-chaos/include/asm-sparc64/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:16.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/asm-sparc64/fcntl.h 2003-10-08 10:29:30.000000000 +0400 -@@ -22,6 +22,7 @@ - #define O_LARGEFILE 0x40000 - #define O_ATOMICLOOKUP 0x80000 /* do atomic file lookup */ - #define O_DIRECT 0x100000 /* direct disk access hint */ -+#define O_EXTENTS 0x200000 /* create file with extents if possible */ - - - #define F_DUPFD 0 /* dup */ ---- linux-2.4.18-chaos/include/asm-sparc/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:16.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/asm-sparc/fcntl.h 2003-10-08 10:29:30.000000000 +0400 -@@ -22,6 +22,7 @@ - #define O_LARGEFILE 0x40000 - #define O_ATOMICLOOKUP 0x80000 /* do atomic file lookup */ - #define O_DIRECT 0x100000 /* direct disk access hint */ -+#define O_EXTENTS 0x200000 /* create file with extents if possible */ - - #define F_DUPFD 0 /* dup */ - #define F_GETFD 1 /* get close_on_exec */ ---- linux-2.4.18-chaos/include/linux/ext3_fs.h~ext3-extents-oflag-2.4.18-chaos 2003-10-08 10:29:12.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/linux/ext3_fs.h 2003-10-08 10:29:30.000000000 +0400 -@@ -641,7 +641,7 @@ extern int ext3_sync_file (struct file * - - /* ialloc.c */ - extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int, -- unsigned long); -+ unsigned long, struct lookup_intent *); - extern void ext3_free_inode (handle_t *, struct inode *); - extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); - extern unsigned long ext3_count_free_inodes (struct super_block *); ---- linux-2.4.18-chaos/fs/ext3/inode.c~ext3-extents-oflag-2.4.18-chaos 2003-10-08 10:29:12.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/inode.c 2003-10-08 10:29:30.000000000 +0400 -@@ -2204,7 +2204,7 @@ void ext3_truncate_thread(struct inode * - if (IS_ERR(handle)) - goto out_truncate; - -- new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode, 0); -+ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode, 0, 0); - if (IS_ERR(new_inode)) { - ext3_debug("truncate inode %lu directly (no new inodes)\n", - old_inode->i_ino); - -_ diff --git a/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.18.patch b/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.18.patch deleted file mode 100644 index 85e12b8..0000000 --- a/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.18.patch +++ /dev/null @@ -1,350 +0,0 @@ -Index: linux-2.4.18-chaos/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.18-chaos.orig/fs/ext3/ialloc.c 2003-10-22 14:23:53.000000000 +0400 -+++ linux-2.4.18-chaos/fs/ext3/ialloc.c 2003-10-29 20:42:04.000000000 +0300 -@@ -241,11 +241,16 @@ - - bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; - -- BUFFER_TRACE(bh, "get_write_access"); -- fatal = ext3_journal_get_write_access(handle, bh); -+ BUFFER_TRACE(bh, "get_undo_access"); -+ fatal = ext3_journal_get_undo_access(handle, bh); - if (fatal) - goto error_return; - -+ /* to prevent inode reusing within single transaction -bzzz */ -+ BUFFER_TRACE(bh, "clear in b_committed_data"); -+ J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data != NULL); -+ ext3_set_bit(bit, bh2jh(bh)->b_committed_data); -+ - /* Ok, now we can actually update the inode bitmaps.. */ - if (!ext3_clear_bit (bit, bh->b_data)) - ext3_error (sb, "ext3_free_inode", -@@ -319,6 +324,131 @@ - return 0; - } - -+static int ext3_test_allocatable(int nr, struct buffer_head *bh) -+{ -+ if (ext3_test_bit(nr, bh->b_data)) -+ return 0; -+ if (!buffer_jbd(bh) || !bh2jh(bh)->b_committed_data) -+ return 1; -+#if 0 -+ if (!ext3_test_bit(nr, bh2jh(bh)->b_committed_data)) -+ printk("EXT3-fs: inode %d was used\n", nr); -+#endif -+ return !ext3_test_bit(nr, bh2jh(bh)->b_committed_data); -+} -+ -+int ext3_find_group_dir(const struct inode *dir, -+ struct ext3_group_desc **gdp, -+ struct buffer_head **bh) -+{ -+ struct super_block *sb = dir->i_sb; -+ struct ext3_super_block *es; -+ struct ext3_group_desc *tmp; -+ int i = 0, j, avefreei; -+ -+ es = EXT3_SB(sb)->s_es; -+ avefreei = le32_to_cpu(es->s_free_inodes_count) / -+ EXT3_SB(sb)->s_groups_count; -+ for (j = 0; j < EXT3_SB(sb)->s_groups_count; j++) { -+ struct buffer_head *temp_buffer; -+ tmp = ext3_get_group_desc(sb, j, &temp_buffer); -+ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count) && -+ le16_to_cpu(tmp->bg_free_inodes_count) >= avefreei) { -+ if (!*gdp || (le16_to_cpu(tmp->bg_free_blocks_count) > -+ le16_to_cpu((*gdp)->bg_free_blocks_count))) { -+ i = j; -+ *gdp = tmp; -+ *bh = temp_buffer; -+ } -+ } -+ } -+ -+ return i; -+} -+ -+int ext3_find_group_other(const struct inode *dir, -+ struct ext3_group_desc **gdp, -+ struct buffer_head **bh) -+{ -+ struct super_block *sb = dir->i_sb; -+ struct ext3_group_desc *tmp; -+ int i, j; -+ -+ /* -+ * Try to place the inode in its parent directory -+ */ -+ i = EXT3_I(dir)->i_block_group; -+ tmp = ext3_get_group_desc(sb, i, bh); -+ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) -+ *gdp = tmp; -+ else { -+ /* -+ * Use a quadratic hash to find a group with a -+ * free inode -+ */ -+ for (j = 1; j < EXT3_SB(sb)->s_groups_count; j <<= 1) { -+ i += j; -+ if (i >= EXT3_SB(sb)->s_groups_count) -+ i -= EXT3_SB(sb)->s_groups_count; -+ tmp = ext3_get_group_desc (sb, i, bh); -+ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) { -+ *gdp = tmp; -+ break; -+ } -+ } -+ } -+ if (!*gdp) { -+ /* -+ * That failed: try linear search for a free inode -+ */ -+ i = EXT3_I(dir)->i_block_group + 1; -+ for (j = 2; j < EXT3_SB(sb)->s_groups_count; j++) { -+ if (++i >= EXT3_SB(sb)->s_groups_count) -+ i = 0; -+ tmp = ext3_get_group_desc (sb, i, bh); -+ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) { -+ *gdp = tmp; -+ break; -+ } -+ } -+ } -+ -+ return i; -+} -+ -+static int ext3_find_group(const struct inode *dir, int mode, -+ struct ext3_group_desc **gdp, -+ struct buffer_head **bh) -+{ -+ if (S_ISDIR(mode)) -+ return ext3_find_group_dir(dir, gdp, bh); -+ return ext3_find_group_other(dir, gdp, bh); -+} -+ -+static int ext3_find_usable_inode(struct super_block *sb, -+ struct buffer_head *bh) -+{ -+ int here, maxinodes, next; -+ -+ maxinodes = EXT3_INODES_PER_GROUP(sb); -+ here = 0; -+ -+ while (here < maxinodes) { -+ next = ext3_find_next_zero_bit((unsigned long *) bh->b_data, -+ maxinodes, here); -+ if (next >= maxinodes) -+ return -1; -+ if (ext3_test_allocatable(next, bh)) -+ return next; -+ -+ J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data); -+ here = ext3_find_next_zero_bit -+ ((unsigned long *) bh2jh(bh)->b_committed_data, -+ maxinodes, next); -+ } -+ return -1; -+} -+ - /* - * There are two policies for allocating an inode. If the new inode is - * a directory, then a forward search is made for a block group with both -@@ -337,7 +467,7 @@ - struct super_block * sb; - struct buffer_head * bh; - struct buffer_head * bh2; -- int i, j, avefreei; -+ int i, j, k; - struct inode * inode; - int bitmap_nr; - struct ext3_inode_info *ei; -@@ -376,11 +506,12 @@ - - bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; - -- BUFFER_TRACE(bh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, bh); -+ BUFFER_TRACE(bh, "get_undo_access"); -+ err = ext3_journal_get_undo_access(handle, bh); - if (err) goto fail; - -- if (ext3_set_bit(j, bh->b_data)) { -+ if (!ext3_test_allocatable(j, bh) || -+ ext3_set_bit(j, bh->b_data)) { - printk(KERN_ERR "goal inode %lu unavailable\n", goal); - /* Oh well, we tried. */ - goto repeat; -@@ -398,119 +529,68 @@ - - repeat: - gdp = NULL; -- i = 0; -- -- if (S_ISDIR(mode)) { -- avefreei = le32_to_cpu(es->s_free_inodes_count) / -- sbi->s_groups_count; -- if (!gdp) { -- for (j = 0; j < sbi->s_groups_count; j++) { -- struct buffer_head *temp_buffer; -- tmp = ext3_get_group_desc (sb, j, &temp_buffer); -- if (tmp && -- le16_to_cpu(tmp->bg_free_inodes_count) && -- le16_to_cpu(tmp->bg_free_inodes_count) >= -- avefreei) { -- if (!gdp || (le16_to_cpu(tmp->bg_free_blocks_count) > -- le16_to_cpu(gdp->bg_free_blocks_count))) { -- i = j; -- gdp = tmp; -- bh2 = temp_buffer; -- } -- } -- } -- } -- } else { -- /* -- * Try to place the inode in its parent directory -- */ -- i = EXT3_I(dir)->i_block_group; -- tmp = ext3_get_group_desc (sb, i, &bh2); -- if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) -- gdp = tmp; -- else -- { -- /* -- * Use a quadratic hash to find a group with a -- * free inode -- */ -- for (j = 1; j < sbi->s_groups_count; j <<= 1) { -- i += j; -- if (i >= sbi->s_groups_count) -- i -= sbi->s_groups_count; -- tmp = ext3_get_group_desc (sb, i, &bh2); -- if (tmp && -- le16_to_cpu(tmp->bg_free_inodes_count)) { -- gdp = tmp; -- break; -- } -- } -- } -- if (!gdp) { -- /* -- * That failed: try linear search for a free inode -- */ -- i = EXT3_I(dir)->i_block_group + 1; -- for (j = 2; j < sbi->s_groups_count; j++) { -- if (++i >= sbi->s_groups_count) -- i = 0; -- tmp = ext3_get_group_desc (sb, i, &bh2); -- if (tmp && -- le16_to_cpu(tmp->bg_free_inodes_count)) { -- gdp = tmp; -- break; -- } -- } -- } -- } - -+ /* choose group */ -+ i = ext3_find_group(dir, mode, &gdp, &bh2); - err = -ENOSPC; - if (!gdp) - goto out; -- -+ - err = -EIO; -- bitmap_nr = load_inode_bitmap (sb, i); -+ bitmap_nr = load_inode_bitmap(sb, i); - if (bitmap_nr < 0) - goto fail; -- - bh = sbi->s_inode_bitmap[bitmap_nr]; - -- if ((j = ext3_find_first_zero_bit ((unsigned long *) bh->b_data, -- sbi->s_inodes_per_group)) < -- sbi->s_inodes_per_group) { -- BUFFER_TRACE(bh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, bh); -- if (err) goto fail; -- -- if (ext3_set_bit (j, bh->b_data)) { -- ext3_error (sb, "ext3_new_inode", -- "bit already set for inode %d", j); -- goto repeat; -- } -- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -- err = ext3_journal_dirty_metadata(handle, bh); -- if (err) goto fail; -- } else { -- if (le16_to_cpu(gdp->bg_free_inodes_count) != 0) { -- ext3_error (sb, "ext3_new_inode", -- "Free inodes count corrupted in group %d", -- i); -- /* Is it really ENOSPC? */ -- err = -ENOSPC; -- if (sb->s_flags & MS_RDONLY) -- goto fail; -- -- BUFFER_TRACE(bh2, "get_write_access"); -- err = ext3_journal_get_write_access(handle, bh2); -- if (err) goto fail; -- gdp->bg_free_inodes_count = 0; -- BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); -- err = ext3_journal_dirty_metadata(handle, bh2); -- if (err) goto fail; -+ /* try to allocate in selected group */ -+ if ((j = ext3_find_usable_inode(sb, bh)) >= 0) -+ goto find_free; -+ -+ /* can't allocate: try to allocate in ANY another groups */ -+ k = i; -+ err = -EIO; -+ for (i = i + 1; i != k; i++) { -+ if (i >= sbi->s_groups_count) -+ i = 0; -+ tmp = ext3_get_group_desc(sb, i, &bh2); -+ if (le16_to_cpu(tmp->bg_free_inodes_count) == 0) -+ continue; -+ -+ bitmap_nr = load_inode_bitmap(sb, i); -+ if (bitmap_nr < 0) -+ goto fail; -+ bh = sbi->s_inode_bitmap[bitmap_nr]; -+ -+ /* try to allocate in selected group */ -+ if ((j = ext3_find_usable_inode(sb, bh)) >= 0) { -+ gdp = tmp; -+ break; - } -- goto repeat; - } -+ err = -ENOSPC; -+ if (!gdp) -+ goto out; -+ -+ find_free: -+ BUFFER_TRACE(bh, "get_undo_access"); -+ err = ext3_journal_get_undo_access(handle, bh); -+ if (err) -+ goto fail; -+ -+ if (ext3_set_bit(j, bh->b_data)) { -+ ext3_error (sb, "ext3_new_inode", -+ "bit already set for inode %d", j); -+ goto fail; -+ } -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bh); -+ if (err) -+ goto fail; -+ - have_bit_and_group: -+ if (buffer_jbd(bh) && bh2jh(bh)->b_committed_data) -+ J_ASSERT_BH(bh, !ext3_test_bit(j, bh2jh(bh)->b_committed_data)); -+ - j += i * EXT3_INODES_PER_GROUP(sb) + 1; - if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) { - ext3_error (sb, "ext3_new_inode", diff --git a/lustre/kernel_patches/patches/ext3-trusted_ea-2.4.18.patch b/lustre/kernel_patches/patches/ext3-trusted_ea-2.4.18.patch deleted file mode 100644 index 4ee1e2c..0000000 --- a/lustre/kernel_patches/patches/ext3-trusted_ea-2.4.18.patch +++ /dev/null @@ -1,180 +0,0 @@ - fs/ext3/xattr.c | 12 +++++- - fs/ext3/xattr_trusted.c | 86 +++++++++++++++++++++++++++++++++++++++++++++ - include/linux/ext3_xattr.h | 6 +++ - 3 files changed, 102 insertions(+), 2 deletions(-) - -Index: linux-2.4.18-chaos/fs/ext3/xattr.c -=================================================================== ---- linux-2.4.18-chaos.orig/fs/ext3/xattr.c 2003-10-22 14:23:53.000000000 +0400 -+++ linux-2.4.18-chaos/fs/ext3/xattr.c 2003-10-24 01:01:03.000000000 +0400 -@@ -1789,18 +1789,25 @@ - int __init - init_ext3_xattr(void) - { -+ int error; -+ - ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, - sizeof(struct mb_cache_entry) + - sizeof(struct mb_cache_entry_index), 1, 61); - if (!ext3_xattr_cache) - return -ENOMEM; - -- return 0; -+ error = init_ext3_xattr_trusted(); -+ if (error) -+ mb_cache_destroy(ext3_xattr_cache); -+ -+ return error; - } - - void - exit_ext3_xattr(void) - { -+ exit_ext3_xattr_trusted(); - if (ext3_xattr_cache) - mb_cache_destroy(ext3_xattr_cache); - ext3_xattr_cache = NULL; -@@ -1811,12 +1818,13 @@ - int __init - init_ext3_xattr(void) - { -- return 0; -+ return init_ext3_xattr_trusted(); - } - - void - exit_ext3_xattr(void) - { -+ exit_ext3_xattr_trusted(); - } - - #endif /* CONFIG_EXT3_FS_XATTR_SHARING */ -Index: linux-2.4.18-chaos/fs/ext3/xattr_trusted.c -=================================================================== ---- linux-2.4.18-chaos.orig/fs/ext3/xattr_trusted.c 2003-10-24 01:01:03.000000000 +0400 -+++ linux-2.4.18-chaos/fs/ext3/xattr_trusted.c 2003-10-24 01:01:03.000000000 +0400 -@@ -0,0 +1,86 @@ -+/* -+ * linux/fs/ext3/xattr_trusted.c -+ * Handler for trusted extended attributes. -+ * -+ * Copyright (C) 2003 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define XATTR_TRUSTED_PREFIX "trusted." -+ -+static size_t -+ext3_xattr_trusted_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return prefix_len + name_len + 1; -+} -+ -+static int -+ext3_xattr_trusted_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name, -+ buffer, size); -+} -+ -+static int -+ext3_xattr_trusted_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ handle_t *handle; -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_TRUSTED, name, -+ value, size, flags); -+ ext3_journal_stop(handle, inode); -+ -+ return error; -+} -+ -+struct ext3_xattr_handler ext3_xattr_trusted_handler = { -+ .prefix = XATTR_TRUSTED_PREFIX, -+ .list = ext3_xattr_trusted_list, -+ .get = ext3_xattr_trusted_get, -+ .set = ext3_xattr_trusted_set, -+}; -+ -+int __init -+init_ext3_xattr_trusted(void) -+{ -+ return ext3_xattr_register(EXT3_XATTR_INDEX_TRUSTED, -+ &ext3_xattr_trusted_handler); -+} -+ -+void -+exit_ext3_xattr_trusted(void) -+{ -+ ext3_xattr_unregister(EXT3_XATTR_INDEX_TRUSTED, -+ &ext3_xattr_trusted_handler); -+} -Index: linux-2.4.18-chaos/include/linux/ext3_xattr.h -=================================================================== ---- linux-2.4.18-chaos.orig/include/linux/ext3_xattr.h 2003-10-22 14:23:51.000000000 +0400 -+++ linux-2.4.18-chaos/include/linux/ext3_xattr.h 2003-10-24 01:01:03.000000000 +0400 -@@ -19,6 +19,10 @@ - /* Name indexes */ - #define EXT3_XATTR_INDEX_MAX 10 - #define EXT3_XATTR_INDEX_USER 1 -+#define EXT3_XATTR_INDEX_TRUSTED 4 -+#define EXT3_XATTR_INDEX_LUSTRE 5 -+#define EXT3_XATTR_INDEX_SECURITY 6 -+ - - struct ext3_xattr_header { - __u32 h_magic; /* magic number for identification */ -@@ -82,6 +86,9 @@ - extern int init_ext3_xattr(void) __init; - extern void exit_ext3_xattr(void); - -+extern int init_ext3_xattr_trusted(void) __init; -+extern void exit_ext3_xattr_trusted(void); -+ - # else /* CONFIG_EXT3_FS_XATTR */ - # define ext3_setxattr NULL - # define ext3_getxattr NULL -Index: linux-2.4.18-chaos/fs/ext3/Makefile -=================================================================== ---- linux-2.4.18-chaos.orig/fs/ext3/Makefile 2003-10-22 14:23:53.000000000 +0400 -+++ linux-2.4.18-chaos/fs/ext3/Makefile 2003-10-24 01:02:28.000000000 +0400 -@@ -13,7 +13,7 @@ - - obj-y := balloc.o iopen.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o \ -- extents.o -+ extents.o xattr_trusted.o - obj-m := $(O_TARGET) - - include $(TOPDIR)/Rules.make diff --git a/lustre/kernel_patches/patches/gfp_memalloc-2.4.18-chaos.patch b/lustre/kernel_patches/patches/gfp_memalloc-2.4.18-chaos.patch deleted file mode 100644 index 97cd9dc..0000000 --- a/lustre/kernel_patches/patches/gfp_memalloc-2.4.18-chaos.patch +++ /dev/null @@ -1,69 +0,0 @@ -Index: linux-2.4.18-chaos/include/linux/mm.h -=================================================================== ---- linux-2.4.18-chaos.orig/include/linux/mm.h 2003-11-23 00:07:20.000000000 +0300 -+++ linux-2.4.18-chaos/include/linux/mm.h 2003-11-23 00:07:23.000000000 +0300 -@@ -677,6 +677,7 @@ - #define __GFP_IO 0x40 /* Can start low memory physical IO? */ - #define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */ - #define __GFP_FS 0x100 /* Can call down to low-level FS? */ -+#define __GFP_MEMALLOC 0x200 /* like PF_MEMALLOC: see __alloc_pages */ - - #define GFP_NOHIGHIO (__GFP_HIGH | __GFP_WAIT | __GFP_IO) - #define GFP_NOIO (__GFP_HIGH | __GFP_WAIT) -@@ -687,6 +688,7 @@ - #define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) - #define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) - #define GFP_KSWAPD ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) -+#define GFP_MEMALLOC __GFP_MEMALLOC - - /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some - platforms, used as appropriate on others */ -Index: linux-2.4.18-chaos/mm/page_alloc.c -=================================================================== ---- linux-2.4.18-chaos.orig/mm/page_alloc.c 2003-11-23 00:07:20.000000000 +0300 -+++ linux-2.4.18-chaos/mm/page_alloc.c 2003-12-02 23:12:31.000000000 +0300 -@@ -554,7 +554,7 @@ - /* - * Oh well, we didn't succeed. - */ -- if (!(current->flags & PF_MEMALLOC)) { -+ if (!(current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_MEMALLOC)) { - /* - * Are we dealing with a higher order allocation? - * -@@ -628,7 +628,9 @@ - - /* XXX: is pages_min/4 a good amount to reserve for this? */ - min += z->pages_min / 4; -- if (z->free_pages > min || ((current->flags & PF_MEMALLOC) && !in_interrupt())) { -+ if (z->free_pages > min || -+ (((current->flags & PF_MEMALLOC) || (gfp_mask & __GFP_MEMALLOC)) -+ && !in_interrupt())) { - page = rmqueue(z, order); - if (page) - return page; -Index: linux-2.4.18-chaos/include/linux/slab.h -=================================================================== ---- linux-2.4.18-chaos.orig/include/linux/slab.h 2003-07-28 17:52:18.000000000 +0400 -+++ linux-2.4.18-chaos/include/linux/slab.h 2003-11-23 00:07:23.000000000 +0300 -@@ -23,6 +23,7 @@ - #define SLAB_KERNEL GFP_KERNEL - #define SLAB_NFS GFP_NFS - #define SLAB_DMA GFP_DMA -+#define SLAB_MEMALLOC GFP_MEMALLOC - - #define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_HIGHIO|__GFP_FS) - #define SLAB_NO_GROW 0x00001000UL /* don't grow a cache */ -Index: linux-2.4.18-chaos/mm/slab.c -=================================================================== ---- linux-2.4.18-chaos.orig/mm/slab.c 2003-07-28 17:52:20.000000000 +0400 -+++ linux-2.4.18-chaos/mm/slab.c 2003-11-23 00:07:23.000000000 +0300 -@@ -1116,7 +1116,7 @@ - /* Be lazy and only check for valid flags here, - * keeping it out of the critical path in kmem_cache_alloc(). - */ -- if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) -+ if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW|SLAB_MEMALLOC)) - BUG(); - if (flags & SLAB_NO_GROW) - return 0; diff --git a/lustre/kernel_patches/patches/htree-ext3-2.4.18.patch b/lustre/kernel_patches/patches/htree-ext3-2.4.18.patch deleted file mode 100644 index 2100f53..0000000 --- a/lustre/kernel_patches/patches/htree-ext3-2.4.18.patch +++ /dev/null @@ -1,1233 +0,0 @@ ---- ./fs/ext3/super.c 2002/03/05 06:18:59 2.1 -+++ ./fs/ext3/super.c 2002/03/05 06:26:56 -@@ -529,6 +529,12 @@ - "EXT3 Check option not supported\n"); - #endif - } -+ else if (!strcmp (this_char, "index")) -+#ifdef CONFIG_EXT3_INDEX -+ set_opt (*mount_options, INDEX); -+#else -+ printk("EXT3 index option not supported\n"); -+#endif - else if (!strcmp (this_char, "debug")) - set_opt (*mount_options, DEBUG); - else if (!strcmp (this_char, "errors")) { -@@ -702,6 +708,12 @@ - es->s_mtime = cpu_to_le32(CURRENT_TIME); - ext3_update_dynamic_rev(sb); - EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -+ -+ if (test_opt(sb, INDEX)) -+ EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX); -+ else if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ set_opt (EXT3_SB(sb)->s_mount_opt, INDEX); -+ - ext3_commit_super (sb, es, 1); - if (test_opt (sb, DEBUG)) - printk (KERN_INFO ---- ./fs/ext3/namei.c 2002/03/05 06:18:59 2.1 -+++ ./fs/ext3/namei.c 2002/03/06 00:13:18 -@@ -16,6 +16,10 @@ - * David S. Miller (davem@caip.rutgers.edu), 1995 - * Directory entry file type support and forward compatibility hooks - * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 -+ * Hash Tree Directory indexing (c) -+ * Daniel Phillips, 2001 -+ * Hash Tree Directory indexing porting -+ * Christopher Li, 2002 - */ - - #include -@@ -33,7 +33,7 @@ - #include - #include - #include -- -+#include - - /* - * define how far ahead to read directories while searching them. -@@ -38,6 +42,437 @@ - #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) - #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) - -+static struct buffer_head *ext3_append(handle_t *handle, -+ struct inode *inode, -+ u32 *block, int *err) -+{ -+ struct buffer_head *bh; -+ -+ *block = inode->i_size >> inode->i_sb->s_blocksize_bits; -+ -+ if ((bh = ext3_bread(handle, inode, *block, 1, err))) { -+ inode->i_size += inode->i_sb->s_blocksize; -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_journal_get_write_access(handle,bh); -+ } -+ return bh; -+} -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#ifndef swap -+#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0) -+#endif -+ -+typedef struct { u32 v; } le_u32; -+typedef struct { u16 v; } le_u16; -+ -+#define dxtrace_on(command) command -+#define dxtrace_off(command) -+ -+struct fake_dirent -+{ -+ /*le*/u32 inode; -+ /*le*/u16 rec_len; -+ u8 name_len; -+ u8 file_type; -+}; -+ -+struct dx_countlimit -+{ -+ le_u16 limit; -+ le_u16 count; -+}; -+ -+struct dx_entry -+{ -+ le_u32 hash; -+ le_u32 block; -+}; -+ -+/* -+ * dx_root_info is laid out so that if it should somehow get overlaid by a -+ * dirent the two low bits of the hash version will be zero. Therefore, the -+ * hash version mod 4 should never be 0. Sincerely, the paranoia department. -+ */ -+ -+struct dx_root -+{ -+ struct fake_dirent dot; -+ char dot_name[4]; -+ struct fake_dirent dotdot; -+ char dotdot_name[4]; -+ struct dx_root_info -+ { -+ le_u32 reserved_zero; -+ u8 hash_version; /* 0 now, 1 at release */ -+ u8 info_length; /* 8 */ -+ u8 indirect_levels; -+ u8 unused_flags; -+ } -+ info; -+ struct dx_entry entries[0]; -+}; -+ -+struct dx_node -+{ -+ struct fake_dirent fake; -+ struct dx_entry entries[0]; -+}; -+ -+ -+struct dx_frame -+{ -+ struct buffer_head *bh; -+ struct dx_entry *entries; -+ struct dx_entry *at; -+}; -+ -+struct dx_map_entry -+{ -+ u32 hash; -+ u32 offs; -+}; -+ -+typedef struct ext3_dir_entry_2 ext3_dirent; -+static inline unsigned dx_get_block (struct dx_entry *entry); -+static void dx_set_block (struct dx_entry *entry, unsigned value); -+static inline unsigned dx_get_hash (struct dx_entry *entry); -+static void dx_set_hash (struct dx_entry *entry, unsigned value); -+static unsigned dx_get_count (struct dx_entry *entries); -+static unsigned dx_get_limit (struct dx_entry *entries); -+static void dx_set_count (struct dx_entry *entries, unsigned value); -+static void dx_set_limit (struct dx_entry *entries, unsigned value); -+static unsigned dx_root_limit (struct inode *dir, unsigned infosize); -+static unsigned dx_node_limit (struct inode *dir); -+static unsigned dx_hack_hash (const u8 *name, int len); -+static struct dx_frame *dx_probe (struct inode *dir, u32 hash, struct dx_frame *frame); -+static void dx_release (struct dx_frame *frames); -+static int dx_make_map (ext3_dirent *de, int size, struct dx_map_entry map[]); -+static void dx_sort_map(struct dx_map_entry *map, unsigned count); -+static ext3_dirent *dx_copy_dirents (char *from, char *to, -+ struct dx_map_entry *map, int count); -+static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); -+ -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * Future: use high four bits of block for coalesce-on-delete flags -+ * Mask them off for now. -+ */ -+ -+static inline unsigned dx_get_block (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->block.v) & 0x00ffffff; -+} -+ -+static inline void dx_set_block (struct dx_entry *entry, unsigned value) -+{ -+ entry->block.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_hash (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->hash.v); -+} -+ -+static inline void dx_set_hash (struct dx_entry *entry, unsigned value) -+{ -+ entry->hash.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_count (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->count.v); -+} -+ -+static inline unsigned dx_get_limit (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v); -+} -+ -+static inline void dx_set_count (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value); -+} -+ -+static inline void dx_set_limit (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value); -+} -+ -+static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - -+ EXT3_DIR_REC_LEN(2) - infosize; -+ return 0? 20: entry_space / sizeof(struct dx_entry); -+} -+ -+static inline unsigned dx_node_limit (struct inode *dir) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); -+ return 0? 22: entry_space / sizeof(struct dx_entry); -+} -+ -+/* Hash function - not bad, but still looking for an ideal default */ -+ -+static unsigned dx_hack_hash (const u8 *name, int len) -+{ -+ u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; -+ while (len--) -+ { -+ u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); -+ if (hash & 0x80000000) hash -= 0x7fffffff; -+ hash1 = hash0; -+ hash0 = hash; -+ } -+ return hash0; -+} -+ -+#define dx_hash(s,n) (dx_hack_hash(s,n) << 1) -+ -+/* -+ * Debug -+ */ -+#ifdef DX_DEBUG -+#define dxtrace dxtrace_on -+static void dx_show_index (char * label, struct dx_entry *entries) -+{ -+ int i, n = dx_get_count (entries); -+ printk("%s index ", label); -+ for (i = 0; i < n; i++) -+ { -+ printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i)); -+ } -+ printk("\n"); -+} -+ -+struct stats -+{ -+ unsigned names; -+ unsigned space; -+ unsigned bcount; -+}; -+ -+static struct stats dx_show_leaf (ext3_dirent *de, int size, int show_names) -+{ -+ unsigned names = 0, space = 0; -+ char *base = (char *) de; -+ printk("names: "); -+ while ((char *) de < base + size) -+ { -+ if (de->inode) -+ { -+ if (show_names) -+ { -+ int len = de->name_len; -+ char *name = de->name; -+ while (len--) printk("%c", *name++); -+ printk(":%x.%u ", dx_hash (de->name, de->name_len), ((char *) de - base)); -+ } -+ space += EXT3_DIR_REC_LEN(de->name_len); -+ names++; -+ } -+ de = (ext3_dirent *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ printk("(%i)\n", names); -+ return (struct stats) { names, space, 1 }; -+} -+ -+struct stats dx_show_entries (struct inode *dir, struct dx_entry *entries, int levels) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count = dx_get_count (entries), names = 0, space = 0, i; -+ unsigned bcount = 0; -+ struct buffer_head *bh; -+ int err; -+ printk("%i indexed blocks...\n", count); -+ for (i = 0; i < count; i++, entries++) -+ { -+ u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; -+ u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; -+ struct stats stats; -+ printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); -+ if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue; -+ stats = levels? -+ dx_show_entries (dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): -+ dx_show_leaf ((ext3_dirent *) bh->b_data, blocksize, 0); -+ names += stats.names; -+ space += stats.space; -+ bcount += stats.bcount; -+ brelse (bh); -+ } -+ if (bcount) -+ printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", -+ names, space/bcount,(space/bcount)*100/blocksize); -+ return (struct stats) { names, space, bcount}; -+} -+#else -+#define dxtrace dxtrace_off -+#endif -+ -+/* -+ * Probe for a directory leaf block to search -+ */ -+ -+static struct dx_frame * -+dx_probe(struct inode *dir, u32 hash, struct dx_frame *frame_in) -+{ -+ unsigned count, indirect; -+ struct dx_entry *at, *entries, *p, *q, *m; -+ struct dx_root *root; -+ struct buffer_head *bh; -+ struct dx_frame *frame = frame_in; -+ int err; -+ -+ frame->bh = NULL; -+ if (!(bh = ext3_bread(NULL, dir, 0, 0, &err))) -+ goto fail; -+ root = (struct dx_root *) bh->b_data; -+ if (root->info.hash_version > 0 || root->info.unused_flags & 1) { -+ brelse(bh); -+ goto fail; -+ } -+ if ((indirect = root->info.indirect_levels) > 1) { -+ brelse(bh); -+ goto fail; -+ } -+ entries = (struct dx_entry *) (((char *) &root->info) + root->info.info_length); -+ assert (dx_get_limit(entries) == dx_root_limit(dir, root->info.info_length)); -+ dxtrace (printk("Look up %x", hash)); -+ while (1) -+ { -+ count = dx_get_count(entries); -+ assert (count && count <= dx_get_limit(entries)); -+ p = entries + 1; -+ q = entries + count - 1; -+ while (p <= q) -+ { -+ m = p + (q - p)/2; -+ dxtrace(printk(".")); -+ if (dx_get_hash(m) > hash) -+ q = m - 1; -+ else -+ p = m + 1; -+ } -+ -+ if (0) // linear search cross check -+ { -+ unsigned n = count - 1; -+ at = entries; -+ while (n--) -+ { -+ dxtrace(printk(",")); -+ if (dx_get_hash(++at) > hash) -+ { -+ at--; -+ break; -+ } -+ } -+ assert (at == p - 1); -+ } -+ -+ at = p - 1; -+ dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); -+ frame->bh = bh; -+ frame->entries = entries; -+ frame->at = at; -+ if (!indirect--) return frame; -+ if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0,&err))) -+ goto fail2; -+ at = entries = ((struct dx_node *) bh->b_data)->entries; -+ assert (dx_get_limit(entries) == dx_node_limit (dir)); -+ frame++; -+ } -+fail2: -+ while (frame >= frame_in) { -+ brelse(frame->bh); -+ frame--; -+ } -+fail: -+ return NULL; -+} -+ -+static void dx_release (struct dx_frame *frames) -+{ -+ if (frames[0].bh == NULL) -+ return; -+ -+ if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels) -+ brelse (frames[1].bh); -+ brelse (frames[0].bh); -+} -+ -+/* -+ * Directory block splitting, compacting -+ */ -+ -+static int dx_make_map (ext3_dirent *de, int size, struct dx_map_entry map[]) -+{ -+ int count = 0; -+ char *base = (char *) de; -+ while ((char *) de < base + size) { -+ if (de->name_len && de->inode) { -+ map[count].hash = dx_hash (de->name, de->name_len); -+ map[count].offs = (u32) ((char *) de - base); -+ count++; -+ } -+ de = (ext3_dirent *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ return count; -+} -+ -+static void dx_sort_map (struct dx_map_entry *map, unsigned count) -+{ -+ struct dx_map_entry *p, *q, *top = map + count - 1; -+ int more; -+ /* Combsort until bubble sort doesn't suck */ -+ while (count > 2) -+ { -+ count = count*10/13; -+ if (count - 9 < 2) /* 9, 10 -> 11 */ -+ count = 11; -+ for (p = top, q = p - count; q >= map; p--, q--) -+ if (p->hash < q->hash) -+ swap(*p, *q); -+ } -+ /* Garden variety bubble sort */ -+ do { -+ more = 0; -+ q = top; -+ while (q-- > map) -+ { -+ if (q[1].hash >= q[0].hash) -+ continue; -+ swap(*(q+1), *q); -+ more = 1; -+ } -+ } while(more); -+} -+ -+static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) -+{ -+ struct dx_entry *entries = frame->entries; -+ struct dx_entry *old = frame->at, *new = old + 1; -+ int count = dx_get_count(entries); -+ -+ assert(count < dx_get_limit(entries)); -+ assert(old < entries + count); -+ memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); -+ dx_set_hash(new, hash); -+ dx_set_block(new, block); -+ dx_set_count(entries, count + 1); -+} -+#endif -+ -+static void ext3_update_dx_flag(struct inode *inode) -+{ -+ if (!test_opt(inode->i_sb, INDEX)) -+ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; -+} -+ - /* - * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure. - * -@@ -95,6 +529,15 @@ - } - - /* -+ * p is at least 6 bytes before the end of page -+ */ -+static inline ext3_dirent *ext3_next_entry(ext3_dirent *p) -+{ -+ return (ext3_dirent *)((char*)p + le16_to_cpu(p->rec_len)); -+} -+ -+ -+/* - * ext3_find_entry() - * - * finds an entry in the specified directory with the wanted name. It -@@ -105,6 +548,8 @@ - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ -+ -+ - static struct buffer_head * ext3_find_entry (struct dentry *dentry, - struct ext3_dir_entry_2 ** res_dir) - { -@@ -119,10 +564,70 @@ - int num = 0; - int nblocks, i, err; - struct inode *dir = dentry->d_parent->d_inode; -+ ext3_dirent *de, *top; - - *res_dir = NULL; - sb = dir->i_sb; -+ if (dentry->d_name.len > EXT3_NAME_LEN) -+ return NULL; -+ if (ext3_dx && is_dx(dir)) { -+ u32 hash = dx_hash(dentry->d_name.name, dentry->d_name.len); -+ struct dx_frame frames[2], *frame; -+ if (!(frame = dx_probe (dir, hash, frames))) -+ return NULL; -+dxnext: -+ block = dx_get_block(frame->at); -+ if (!(bh = ext3_bread (NULL,dir, block, 0, &err))) -+ goto dxfail; -+ de = (ext3_dirent *) bh->b_data; -+ top = (ext3_dirent *) ((char *) de + sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) -+ if (ext3_match(dentry->d_name.len, dentry->d_name.name, de)) { -+ if (!ext3_check_dir_entry("ext3_find_entry", -+ dir, de, bh, -+ (block<b_data))) { -+ brelse (bh); -+ goto dxfail; -+ } -+ *res_dir = de; -+ goto dxfound; -+ } -+ brelse (bh); -+ /* Same hash continues in next block? Search on. */ -+ if (++(frame->at) == frame->entries + dx_get_count(frame->entries)) -+ { -+ struct buffer_head *bh2; -+ if (frame == frames) -+ goto dxfail; -+ if (++(frames->at) == frames->entries + dx_get_count(frames->entries)) -+ goto dxfail; -+ /* should omit read if not continued */ -+ if (!(bh2 = ext3_bread (NULL, dir, -+ dx_get_block(frames->at), -+ 0, &err))) -+ goto dxfail; -+ brelse (frame->bh); -+ frame->bh = bh2; -+ frame->at = frame->entries = ((struct dx_node *) bh2->b_data)->entries; -+ /* Subtle: the 0th entry has the count, find the hash in frame above */ -+ if ((dx_get_hash(frames->at) & -2) == hash) -+ goto dxnext; -+ goto dxfail; -+ } -+ if ((dx_get_hash(frame->at) & -2) == hash) -+ goto dxnext; -+dxfail: -+ dxtrace(printk("%s not found\n", name)); -+ dx_release (frames); -+ return NULL; -+dxfound: -+ dx_release (frames); -+ return bh; - -+ } -+ - nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); - start = dir->u.ext3_i.i_dir_start_lookup; - if (start >= nblocks) -@@ -237,6 +748,90 @@ - de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; - } - -+static ext3_dirent * -+dx_copy_dirents (char *from, char *to, struct dx_map_entry *map, int count) -+{ -+ unsigned rec_len = 0; -+ -+ while (count--) { -+ ext3_dirent *de = (ext3_dirent *) (from + map->offs); -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ memcpy (to, de, rec_len); -+ ((ext3_dirent *)to)->rec_len = le16_to_cpu(rec_len); -+ to += rec_len; -+ map++; -+ } -+ return (ext3_dirent *) (to - rec_len); -+} -+ -+#ifdef CONFIG_EXT3_INDEX -+static ext3_dirent *do_split(handle_t *handle, struct inode *dir, -+ struct buffer_head **bh,struct dx_frame *frame, -+ u32 hash, int *error) -+{ -+ unsigned count; -+ struct buffer_head *bh2; -+ u32 newblock; -+ u32 hash2; -+ struct dx_map_entry *map; -+ char *data1 = (*bh)->b_data, *data2, *data3; -+ unsigned split; -+ ext3_dirent *de, *de2; -+ -+ bh2 = ext3_append (handle, dir, &newblock, error); -+ if (!(bh2)) -+ { -+ brelse(*bh); -+ *bh = NULL; -+ return (ext3_dirent *)bh2; -+ } -+ -+ BUFFER_TRACE(*bh, "get_write_access"); -+ ext3_journal_get_write_access(handle, *bh); -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ ext3_journal_get_write_access(handle, frame->bh); -+ -+ data2 = bh2->b_data; -+ -+ map = kmalloc(sizeof(*map) * PAGE_CACHE_SIZE/EXT3_DIR_REC_LEN(1) + 1, -+ GFP_KERNEL); -+ if (!map) -+ panic("no memory for do_split\n"); -+ count = dx_make_map((ext3_dirent *)data1, dir->i_sb->s_blocksize, map); -+ split = count/2; // need to adjust to actual middle -+ dx_sort_map (map, count); -+ hash2 = map[split].hash; -+ dxtrace(printk("Split block %i at %x, %i/%i\n", -+ dx_get_block(frame->at), hash2, split, count-split)); -+ -+ /* Fancy dance to stay within two buffers */ -+ de2 = dx_copy_dirents (data1, data2, map + split, count - split); -+ data3 = (char *) de2 + le16_to_cpu(de2->rec_len); -+ de = dx_copy_dirents (data1, data3, map, split); -+ memcpy(data1, data3, (char *) de + le16_to_cpu(de->rec_len) - data3); -+ de = (ext3_dirent *) ((char *) de - data3 + data1); // relocate de -+ de->rec_len = cpu_to_le16(data1 + dir->i_sb->s_blocksize - (char *)de); -+ de2->rec_len = cpu_to_le16(data2 + dir->i_sb->s_blocksize-(char *)de2); -+ dxtrace(dx_show_leaf((ext3_dirent *)data1, dir->i_sb->s_blocksize, 1)); -+ dxtrace(dx_show_leaf((ext3_dirent *)data2, dir->i_sb->s_blocksize, 1)); -+ -+ /* Which block gets the new entry? */ -+ if (hash >= hash2) -+ { -+ swap(*bh, bh2); -+ de = de2; -+ } -+ dx_insert_block(frame, hash2 + (hash2 == map[split-1].hash), newblock); -+ ext3_journal_dirty_metadata (handle, bh2); -+ brelse (bh2); -+ ext3_journal_dirty_metadata (handle, frame->bh); -+ dxtrace(dx_show_index ("frame", frame->entries)); -+ kfree(map); -+ return de; -+} -+#endif -+ -+ - /* - * ext3_add_entry() - * -@@ -255,118 +849,279 @@ - struct inode *inode) - { - struct inode *dir = dentry->d_parent->d_inode; -- const char *name = dentry->d_name.name; -- int namelen = dentry->d_name.len; - unsigned long offset; -- unsigned short rec_len; - struct buffer_head * bh; -- struct ext3_dir_entry_2 * de, * de1; -- struct super_block * sb; -+ ext3_dirent *de; -+ struct super_block * sb = dir->i_sb; - int retval; -+ unsigned short reclen = EXT3_DIR_REC_LEN(dentry->d_name.len); - -- sb = dir->i_sb; -+ unsigned nlen, rlen; -+ u32 block, blocks; -+ char *top; - -- if (!namelen) -+ if (!dentry->d_name.len) - return -EINVAL; -- bh = ext3_bread (handle, dir, 0, 0, &retval); -- if (!bh) -- return retval; -- rec_len = EXT3_DIR_REC_LEN(namelen); -- offset = 0; -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- while (1) { -- if ((char *)de >= sb->s_blocksize + bh->b_data) { -- brelse (bh); -- bh = NULL; -- bh = ext3_bread (handle, dir, -- offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval); -- if (!bh) -- return retval; -- if (dir->i_size <= offset) { -- if (dir->i_size == 0) { -- brelse(bh); -- return -ENOENT; -+ if (ext3_dx && is_dx(dir)) { -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries, *at; -+ u32 hash; -+ char *data1; -+ -+ hash = dx_hash(dentry->d_name.name, dentry->d_name.len); -+ /* FIXME: do something if dx_probe() fails here */ -+ frame = dx_probe(dir, hash, frames); -+ entries = frame->entries; -+ at = frame->at; -+ -+ if (!(bh = ext3_bread(handle,dir, dx_get_block(at), 0,&retval))) -+ goto dxfail1; -+ -+ BUFFER_TRACE(bh, "get_write_access"); -+ ext3_journal_get_write_access(handle, bh); -+ -+ data1 = bh->b_data; -+ de = (ext3_dirent *) data1; -+ top = data1 + (0? 200: sb->s_blocksize); -+ while ((char *) de < top) -+ { -+ /* FIXME: check EEXIST and dir */ -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if ((de->inode? rlen - nlen: rlen) >= reclen) -+ goto dx_add; -+ de = (ext3_dirent *) ((char *) de + rlen); -+ } -+ /* Block full, should compress but for now just split */ -+ dxtrace(printk("using %u of %u node entries\n", -+ dx_get_count(entries), dx_get_limit(entries))); -+ /* Need to split index? */ -+ if (dx_get_count(entries) == dx_get_limit(entries)) -+ { -+ u32 newblock; -+ unsigned icount = dx_get_count(entries); -+ int levels = frame - frames; -+ struct dx_entry *entries2; -+ struct dx_node *node2; -+ struct buffer_head *bh2; -+ if (levels && dx_get_count(frames->entries) == dx_get_limit(frames->entries)) -+ goto dxfull; -+ bh2 = ext3_append (handle, dir, &newblock, &retval); -+ if (!(bh2)) -+ goto dxfail2; -+ node2 = (struct dx_node *)(bh2->b_data); -+ entries2 = node2->entries; -+ node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); -+ node2->fake.inode = 0; -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ ext3_journal_get_write_access(handle, frame->bh); -+ if (levels) -+ { -+ unsigned icount1 = icount/2, icount2 = icount - icount1; -+ unsigned hash2 = dx_get_hash(entries + icount1); -+ dxtrace(printk("Split index %i/%i\n", icount1, icount2)); -+ -+ BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ -+ ext3_journal_get_write_access(handle, frames[0].bh); -+ -+ memcpy ((char *) entries2, (char *) (entries + icount1), -+ icount2 * sizeof(struct dx_entry)); -+ dx_set_count (entries, icount1); -+ dx_set_count (entries2, icount2); -+ dx_set_limit (entries2, dx_node_limit(dir)); -+ -+ /* Which index block gets the new entry? */ -+ if (at - entries >= icount1) { -+ frame->at = at = at - entries - icount1 + entries2; -+ frame->entries = entries = entries2; -+ swap(frame->bh, bh2); - } -- -- ext3_debug ("creating next block\n"); -- -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- de->inode = 0; -- de->rec_len = le16_to_cpu(sb->s_blocksize); -- dir->u.ext3_i.i_disksize = -- dir->i_size = offset + sb->s_blocksize; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- ext3_mark_inode_dirty(handle, dir); -+ dx_insert_block (frames + 0, hash2, newblock); -+ dxtrace(dx_show_index ("node", frames[1].entries)); -+ dxtrace(dx_show_index ("node", -+ ((struct dx_node *) bh2->b_data)->entries)); -+ ext3_journal_dirty_metadata(handle, bh2); -+ brelse (bh2); - } else { -- -- ext3_debug ("skipping to next block\n"); -- -- de = (struct ext3_dir_entry_2 *) bh->b_data; -+ dxtrace(printk("Creating second level index...\n")); -+ memcpy((char *) entries2, (char *) entries, -+ icount * sizeof(struct dx_entry)); -+ dx_set_limit(entries2, dx_node_limit(dir)); -+ -+ /* Set up root */ -+ dx_set_count(entries, 1); -+ dx_set_block(entries + 0, newblock); -+ ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; -+ -+ /* Add new access path frame */ -+ frame = frames + 1; -+ frame->at = at = at - entries + entries2; -+ frame->entries = entries = entries2; -+ frame->bh = bh2; -+ ext3_journal_get_write_access(handle, frame->bh); - } -+ ext3_journal_dirty_metadata(handle, frames[0].bh); - } -- if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh, -- offset)) { -- brelse (bh); -- return -ENOENT; -- } -- if (ext3_match (namelen, name, de)) { -+ de = do_split(handle, dir, &bh, frame, hash, &retval); -+ dx_release (frames); -+ if (!(de)) -+ goto fail; -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ goto add; -+ -+dx_add: -+ dx_release (frames); -+ goto add; -+ -+dxfull: -+ ext3_warning(sb, __FUNCTION__, "Directory index full!\n"); -+ retval = -ENOSPC; -+dxfail2: -+ brelse(bh); -+dxfail1: -+ dx_release (frames); -+ goto fail1; -+ } -+ -+ blocks = dir->i_size >> sb->s_blocksize_bits; -+ for (block = 0, offset = 0; block < blocks; block++) { -+ bh = ext3_bread(handle, dir, block, 0, &retval); -+ if(!bh) -+ return retval; -+ de = (ext3_dirent *)bh->b_data; -+ top = bh->b_data + sb->s_blocksize - reclen; -+ while ((char *) de <= top) { -+ if (!ext3_check_dir_entry("ext3_add_entry", dir, de, -+ bh, offset)) { -+ brelse (bh); -+ return -EIO; -+ } -+ if (ext3_match(dentry->d_name.len,dentry->d_name.name,de)) { - brelse (bh); - return -EEXIST; -- } -- if ((le32_to_cpu(de->inode) == 0 && -- le16_to_cpu(de->rec_len) >= rec_len) || -- (le16_to_cpu(de->rec_len) >= -- EXT3_DIR_REC_LEN(de->name_len) + rec_len)) { -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- /* By now the buffer is marked for journaling */ -- offset += le16_to_cpu(de->rec_len); -- if (le32_to_cpu(de->inode)) { -- de1 = (struct ext3_dir_entry_2 *) ((char *) de + -- EXT3_DIR_REC_LEN(de->name_len)); -- de1->rec_len = -- cpu_to_le16(le16_to_cpu(de->rec_len) - -- EXT3_DIR_REC_LEN(de->name_len)); -- de->rec_len = cpu_to_le16( -- EXT3_DIR_REC_LEN(de->name_len)); -- de = de1; - } -- de->file_type = EXT3_FT_UNKNOWN; -- if (inode) { -- de->inode = cpu_to_le32(inode->i_ino); -- ext3_set_de_type(dir->i_sb, de, inode->i_mode); -- } else -- de->inode = 0; -- de->name_len = namelen; -- memcpy (de->name, name, namelen); -- /* -- * XXX shouldn't update any times until successful -- * completion of syscall, but too many callers depend -- * on this. -- * -- * XXX similarly, too many callers depend on -- * ext3_new_inode() setting the times, but error -- * recovery deletes the inode, so the worst that can -- * happen is that the times are slightly out of date -- * and/or different from the directory change time. -- */ -- dir->i_mtime = dir->i_ctime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- dir->i_version = ++event; -- ext3_mark_inode_dirty(handle, dir); -- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -- ext3_journal_dirty_metadata(handle, bh); -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if ((de->inode ? rlen - nlen: rlen) >= reclen) -+ goto add; -+ de = (ext3_dirent *)((char *)de + rlen); -+ offset += rlen; -+ } -+ if (ext3_dx && blocks == 1 && test_opt(sb, INDEX)) -+ goto dx_make_index; -+ brelse(bh); -+ } -+ bh = ext3_append(handle, dir, &block, &retval); -+ if (!bh) -+ return retval; -+ de = (ext3_dirent *) bh->b_data; -+ de->inode = 0; -+ de->rec_len = cpu_to_le16(rlen = sb->s_blocksize); -+ nlen = 0; -+ goto add; -+ -+add: -+ BUFFER_TRACE(bh, "get_write_access"); -+ ext3_journal_get_write_access(handle, bh); -+ /* By now the buffer is marked for journaling */ -+ if (de->inode) { -+ ext3_dirent *de1 = (ext3_dirent *)((char *)de + nlen); -+ de1->rec_len = cpu_to_le16(rlen - nlen); -+ de->rec_len = cpu_to_le16(nlen); -+ de = de1; -+ } -+ de->file_type = EXT3_FT_UNKNOWN; -+ if (inode) { -+ de->inode = cpu_to_le32(inode->i_ino); -+ ext3_set_de_type(dir->i_sb, de, inode->i_mode); -+ } else -+ de->inode = 0; -+ de->name_len = dentry->d_name.len; -+ memcpy (de->name, dentry->d_name.name, dentry->d_name.len); -+ /* -+ * XXX shouldn't update any times until successful -+ * completion of syscall, but too many callers depend -+ * on this. -+ * -+ * XXX similarly, too many callers depend on -+ * ext3_new_inode() setting the times, but error -+ * recovery deletes the inode, so the worst that can -+ * happen is that the times are slightly out of date -+ * and/or different from the directory change time. -+ */ -+ dir->i_mtime = dir->i_ctime = CURRENT_TIME; -+ ext3_update_dx_flag(dir); -+ dir->i_version = ++event; -+ ext3_mark_inode_dirty(handle, dir); -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ ext3_journal_dirty_metadata(handle, bh); -+ brelse(bh); -+ return 0; -+ -+dx_make_index: -+ { -+ struct buffer_head *bh2; -+ struct dx_root *root; -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries; -+ ext3_dirent *de2; -+ char *data1; -+ unsigned len; -+ u32 hash; -+ -+ dxtrace(printk("Creating index\n")); -+ ext3_journal_get_write_access(handle, bh); -+ root = (struct dx_root *) bh->b_data; -+ -+ EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; -+ bh2 = ext3_append (handle, dir, &block, &retval); -+ if (!(bh2)) -+ { - brelse(bh); -- return 0; -+ return retval; - } -- offset += le16_to_cpu(de->rec_len); -- de = (struct ext3_dir_entry_2 *) -- ((char *) de + le16_to_cpu(de->rec_len)); -+ data1 = bh2->b_data; -+ -+ /* The 0th block becomes the root, move the dirents out */ -+ de = (struct ext3_dir_entry_2 *)&root->dotdot; -+ de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len)); -+ len = ((char *) root) + sb->s_blocksize - (char *) de; -+ memcpy (data1, de, len); -+ de = (ext3_dirent *) data1; -+ top = data1 + len; -+ while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top) -+ de = de2; -+ de->rec_len = cpu_to_le16(data1 + sb->s_blocksize - (char *)de); -+ /* Initialize the root; the dot dirents already exist */ -+ de = (ext3_dirent *) (&root->dotdot); -+ de->rec_len = cpu_to_le16(sb->s_blocksize-EXT3_DIR_REC_LEN(2)); -+ memset (&root->info, 0, sizeof(root->info)); -+ root->info.info_length = sizeof(root->info); -+ entries = root->entries; -+ dx_set_block (entries, 1); -+ dx_set_count (entries, 1); -+ dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); -+ -+ /* Initialize as for dx_probe */ -+ hash = dx_hash (dentry->d_name.name, dentry->d_name.len); -+ frame = frames; -+ frame->entries = entries; -+ frame->at = entries; -+ frame->bh = bh; -+ bh = bh2; -+ de = do_split(handle,dir, &bh, frame, hash, &retval); -+ dx_release (frames); -+ if (!(de)) -+ return retval; -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ goto add; - } -- brelse (bh); -- return -ENOSPC; -+fail1: -+ return retval; -+fail: -+ return -ENOENT; - } - - /* -@@ -451,7 +1212,8 @@ - struct inode * inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); - if (IS_ERR(handle)) - return PTR_ERR(handle); - -@@ -478,7 +1240,8 @@ - struct inode *inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); - if (IS_ERR(handle)) - return PTR_ERR(handle); - -@@ -507,7 +1270,8 @@ - if (dir->i_nlink >= EXT3_LINK_MAX) - return -EMLINK; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); - if (IS_ERR(handle)) - return PTR_ERR(handle); - -@@ -550,7 +1320,7 @@ - if (err) - goto out_no_entry; - dir->i_nlink++; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); - out_stop: -@@ -832,7 +1596,7 @@ - ext3_mark_inode_dirty(handle, inode); - dir->i_nlink--; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - - end_rmdir: -@@ -878,7 +1642,7 @@ - if (retval) - goto end_unlink; - dir->i_ctime = dir->i_mtime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - inode->i_nlink--; - if (!inode->i_nlink) -@@ -904,7 +1668,8 @@ - if (l > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5); -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5); - if (IS_ERR(handle)) - return PTR_ERR(handle); - -@@ -959,7 +1724,8 @@ - if (inode->i_nlink >= EXT3_LINK_MAX) - return -EMLINK; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS); -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS); - if (IS_ERR(handle)) - return PTR_ERR(handle); - -@@ -995,7 +1761,8 @@ - - old_bh = new_bh = dir_bh = NULL; - -- handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2); -+ handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); - if (IS_ERR(handle)) - return PTR_ERR(handle); - -@@ -1069,14 +1837,37 @@ - /* - * ok, that's it - */ -- ext3_delete_entry(handle, old_dir, old_de, old_bh); -+ if (le32_to_cpu(old_de->inode) != old_inode->i_ino || -+ old_de->name_len != old_dentry->d_name.len || -+ strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) || -+ (retval = ext3_delete_entry(handle, old_dir, -+ old_de, old_bh)) == -ENOENT) { -+ /* old_de could have moved from under us during htree split, so -+ * make sure that we are deleting the right entry. We might -+ * also be pointing to a stale entry in the unused part of -+ * old_bh so just checking inum and the name isn't enough. */ -+ struct buffer_head *old_bh2; -+ struct ext3_dir_entry_2 *old_de2; -+ -+ old_bh2 = ext3_find_entry(old_dentry, &old_de2); -+ if (old_bh2) { -+ retval = ext3_delete_entry(handle, old_dir, -+ old_de2, old_bh2); -+ brelse(old_bh2); -+ } -+ } -+ if (retval) { -+ ext3_warning(old_dir->i_sb, "ext3_rename", -+ "Deleting old file (%lu), %d, error=%d", -+ old_dir->i_ino, old_dir->i_nlink, retval); -+ } - - if (new_inode) { - new_inode->i_nlink--; - new_inode->i_ctime = CURRENT_TIME; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; -- old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(old_dir); - if (dir_bh) { - BUFFER_TRACE(dir_bh, "get_write_access"); - ext3_journal_get_write_access(handle, dir_bh); -@@ -1089,7 +1856,7 @@ - new_inode->i_nlink--; - } else { - new_dir->i_nlink++; -- new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(new_dir); - ext3_mark_inode_dirty(handle, new_dir); - } - } ---- ./include/linux/ext3_fs.h 2002/03/05 06:18:59 2.1 -+++ ./include/linux/ext3_fs.h 2002/03/05 06:26:56 -@@ -339,6 +339,7 @@ - #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ -+#define EXT3_MOUNT_INDEX 0x4000 /* Enable directory index */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -575,6 +576,24 @@ - #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1) - #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \ - ~EXT3_DIR_ROUND) -+/* -+ * Hash Tree Directory indexing -+ * (c) Daniel Phillips, 2001 -+ */ -+ -+#define CONFIG_EXT3_INDEX -+ -+#ifdef CONFIG_EXT3_INDEX -+ enum {ext3_dx = 1}; -+ #define is_dx(dir) (EXT3_I(dir)->i_flags & EXT3_INDEX_FL) -+#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) -+#else -+ enum {ext3_dx = 0}; -+ #define is_dx(dir) 0 -+#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) -+#endif - - #ifdef __KERNEL__ - /* ---- ./include/linux/ext3_jbd.h 2002/03/05 06:18:59 2.1 -+++ ./include/linux/ext3_jbd.h 2002/03/05 06:33:54 -@@ -63,6 +63,8 @@ - - #define EXT3_RESERVE_TRANS_BLOCKS 12 - -+#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8 -+ - int - ext3_mark_iloc_dirty(handle_t *handle, - struct inode *inode, diff --git a/lustre/kernel_patches/patches/iopen-2.4.18-2.patch b/lustre/kernel_patches/patches/iopen-2.4.18-2.patch deleted file mode 100644 index 4af67bc..0000000 --- a/lustre/kernel_patches/patches/iopen-2.4.18-2.patch +++ /dev/null @@ -1,493 +0,0 @@ - Documentation/filesystems/ext2.txt | 16 ++ - fs/ext3/Makefile | 2 - fs/ext3/inode.c | 4 - fs/ext3/iopen.c | 259 +++++++++++++++++++++++++++++++++++++ - fs/ext3/iopen.h | 13 + - fs/ext3/namei.c | 12 + - fs/ext3/super.c | 11 + - include/linux/ext3_fs.h | 2 - 8 files changed, 318 insertions(+), 1 deletion(-) - -Index: linux-aed/Documentation/filesystems/ext2.txt -=================================================================== ---- linux-aed.orig/Documentation/filesystems/ext2.txt Tue May 4 13:14:35 2004 -+++ linux-aed/Documentation/filesystems/ext2.txt Tue May 4 19:17:12 2004 -@@ -35,6 +35,22 @@ - - sb=n Use alternate superblock at this location. - -+iopen Makes an invisible pseudo-directory called -+ __iopen__ available in the root directory -+ of the filesystem. Allows open-by-inode- -+ number. i.e., inode 3145 can be accessed -+ via /mntpt/__iopen__/3145 -+ -+iopen_nopriv This option makes the iopen directory be -+ world-readable. This may be safer since it -+ allows daemons to run as an unprivileged user, -+ however it significantly changes the security -+ model of a Unix filesystem, since previously -+ all files under a mode 700 directory were not -+ generally avilable even if the -+ permissions on the file itself is -+ world-readable. -+ - grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2. - - -Index: linux-aed/fs/ext3/Makefile -=================================================================== ---- linux-aed.orig/fs/ext3/Makefile Tue May 4 19:16:51 2004 -+++ linux-aed/fs/ext3/Makefile Tue May 4 19:17:12 2004 -@@ -11,7 +11,7 @@ - - export-objs := ext3-exports.o - --obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -+obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o xattr.o hash.o ext3-exports.o - obj-m := $(O_TARGET) - -Index: linux-aed/fs/ext3/inode.c -=================================================================== ---- linux-aed.orig/fs/ext3/inode.c Tue May 4 19:17:09 2004 -+++ linux-aed/fs/ext3/inode.c Tue May 4 19:17:12 2004 -@@ -31,6 +31,7 @@ - #include - #include - #include -+#include "iopen.h" - - /* - * SEARCH_FROM_ZERO forces each block allocation to search from the start -@@ -2277,6 +2278,9 @@ - struct buffer_head *bh; - int block; - -+ if (ext3_iopen_get_inode(inode)) -+ return; -+ - if(ext3_get_inode_loc(inode, &iloc)) - goto bad_inode; - bh = iloc.bh; -Index: linux-aed/fs/ext3/iopen.c -=================================================================== ---- linux-aed.orig/fs/ext3/iopen.c Tue May 4 13:14:35 2004 -+++ linux-aed/fs/ext3/iopen.c Tue May 4 19:17:12 2004 -@@ -0,0 +1,282 @@ -+/* -+ * linux/fs/ext3/iopen.c -+ * -+ * Special support for open by inode number -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ * -+ * -+ * Invariants: -+ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias -+ * for an inode at one time. -+ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry -+ * aliases on an inode at the same time. -+ * -+ * If we have any connected dentry aliases for an inode, use one of those -+ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED -+ * dentry for this inode, which thereafter will be found by the dcache -+ * when looking up this inode number in __iopen__, so we don't return here -+ * until it is gone. -+ * -+ * If we get an inode via a regular name lookup, then we "rename" the -+ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures -+ * existing users of the disconnected dentry will continue to use the same -+ * dentry as the connected users, and there will never be both kinds of -+ * dentry aliases at one time. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "iopen.h" -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#define IOPEN_NAME_LEN 32 -+ -+/* -+ * This implements looking up an inode by number. -+ */ -+static struct dentry *iopen_lookup(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ unsigned long ino; -+ struct list_head *lp; -+ struct dentry *alternate; -+ char buf[IOPEN_NAME_LEN]; -+ -+ if (dentry->d_name.len >= IOPEN_NAME_LEN) -+ return ERR_PTR(-ENAMETOOLONG); -+ -+ memcpy(buf, dentry->d_name.name, dentry->d_name.len); -+ buf[dentry->d_name.len] = 0; -+ -+ if (strcmp(buf, ".") == 0) -+ ino = dir->i_ino; -+ else if (strcmp(buf, "..") == 0) -+ ino = EXT3_ROOT_INO; -+ else -+ ino = simple_strtoul(buf, 0, 0); -+ -+ if ((ino != EXT3_ROOT_INO && -+ //ino != EXT3_ACL_IDX_INO && -+ //ino != EXT3_ACL_DATA_INO && -+ ino < EXT3_FIRST_INO(dir->i_sb)) || -+ ino > le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count)) -+ return ERR_PTR(-ENOENT); -+ -+ inode = iget(dir->i_sb, ino); -+ if (!inode) -+ return ERR_PTR(-EACCES); -+ if (is_bad_inode(inode)) { -+ iput(inode); -+ return ERR_PTR(-ENOENT); -+ } -+ -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ assert(list_empty(&dentry->d_hash)); /* d_rehash */ -+ -+ /* preferrably return a connected dentry */ -+ spin_lock(&dcache_lock); -+ list_for_each(lp, &inode->i_dentry) { -+ alternate = list_entry(lp, struct dentry, d_alias); -+ assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED)); -+ } -+ -+ if (!list_empty(&inode->i_dentry)) { -+ alternate = list_entry(inode->i_dentry.next, -+ struct dentry, d_alias); -+ dget_locked(alternate); -+ alternate->d_vfs_flags |= DCACHE_REFERENCED; -+ iput(inode); -+ spin_unlock(&dcache_lock); -+ return alternate; -+ } -+ dentry->d_flags |= DCACHE_NFSD_DISCONNECTED; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+ -+ __d_rehash(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+#define do_switch(x,y) do { \ -+ __typeof__ (x) __tmp = x; \ -+ x = y; y = __tmp; } while (0) -+ -+static inline void switch_names(struct dentry *dentry, struct dentry *target) -+{ -+ const unsigned char *old_name, *new_name; -+ -+ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); -+ old_name = target->d_name.name; -+ new_name = dentry->d_name.name; -+ if (old_name == target->d_iname) -+ old_name = dentry->d_iname; -+ if (new_name == dentry->d_iname) -+ new_name = target->d_iname; -+ target->d_name.name = new_name; -+ dentry->d_name.name = old_name; -+} -+ -+/* This function is spliced into ext3_lookup and does the move of a -+ * disconnected dentry (if it exists) to a connected dentry. -+ */ -+struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode, -+ int rehash) -+{ -+ struct dentry *tmp, *goal = NULL; -+ struct list_head *lp; -+ -+ /* verify this dentry is really new */ -+ assert(dentry->d_inode == NULL); -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ if (rehash) -+ assert(list_empty(&dentry->d_hash)); /* d_rehash */ -+ assert(list_empty(&dentry->d_subdirs)); -+ -+ spin_lock(&dcache_lock); -+ if (!inode) -+ goto do_rehash; -+ -+ /* preferrably return a connected dentry */ -+ list_for_each(lp, &inode->i_dentry) { -+ tmp = list_entry(lp, struct dentry, d_alias); -+ if (tmp->d_flags & DCACHE_NFSD_DISCONNECTED) { -+ assert(tmp->d_alias.next == &inode->i_dentry); -+ assert(tmp->d_alias.prev == &inode->i_dentry); -+ goal = tmp; -+ dget_locked(goal); -+ break; -+ } -+ } -+ -+ if (!goal) -+ goto do_instantiate; -+ -+ /* Move the goal to the de hash queue - like d_move() */ -+ goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED; -+ list_del_init(&goal->d_hash); -+ -+ list_del(&goal->d_child); -+ list_del(&dentry->d_child); -+ -+ /* Switch the parents and the names.. */ -+ switch_names(goal, dentry); -+ do_switch(goal->d_parent, dentry->d_parent); -+ do_switch(goal->d_name.len, dentry->d_name.len); -+ do_switch(goal->d_name.hash, dentry->d_name.hash); -+ -+ /* And add them back to the (new) parent lists */ -+ list_add(&goal->d_child, &goal->d_parent->d_subdirs); -+ list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); -+ __d_rehash(goal, 0); -+ spin_unlock(&dcache_lock); -+ iput(inode); -+ -+ return goal; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+do_instantiate: -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+do_rehash: -+ if (rehash) -+ __d_rehash(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+/* -+ * These are the special structures for the iopen pseudo directory. -+ */ -+ -+static struct inode_operations iopen_inode_operations = { -+ lookup: iopen_lookup, /* BKL held */ -+}; -+ -+static struct file_operations iopen_file_operations = { -+ read: generic_read_dir, -+}; -+ -+static int match_dentry(struct dentry *dentry, const char *name) -+{ -+ int len; -+ -+ len = strlen(name); -+ if (dentry->d_name.len != len) -+ return 0; -+ if (strncmp(dentry->d_name.name, name, len)) -+ return 0; -+ return 1; -+} -+ -+/* -+ * This function is spliced into ext3_lookup and returns 1 the file -+ * name is __iopen__ and dentry has been filled in appropriately. -+ */ -+int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ -+ if (dir->i_ino != EXT3_ROOT_INO || -+ !test_opt(dir->i_sb, IOPEN) || -+ !match_dentry(dentry, "__iopen__")) -+ return 0; -+ -+ inode = iget(dir->i_sb, EXT3_BAD_INO); -+ -+ if (!inode) -+ return 0; -+ d_add(dentry, inode); -+ return 1; -+} -+ -+/* -+ * This function is spliced into read_inode; it returns 1 if inode -+ * number is the one for /__iopen__, in which case the inode is filled -+ * in appropriately. Otherwise, this fuction returns 0. -+ */ -+int ext3_iopen_get_inode(struct inode *inode) -+{ -+ if (inode->i_ino != EXT3_BAD_INO) -+ return 0; -+ -+ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; -+ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) -+ inode->i_mode |= 0777; -+ inode->i_uid = 0; -+ inode->i_gid = 0; -+ inode->i_nlink = 1; -+ inode->i_size = 4096; -+ inode->i_atime = CURRENT_TIME; -+ inode->i_ctime = CURRENT_TIME; -+ inode->i_mtime = CURRENT_TIME; -+ inode->u.ext3_i.i_dtime = 0; -+ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size -+ * (for stat), not the fs block -+ * size */ -+ inode->i_blocks = 0; -+ inode->i_version = 1; -+ inode->i_generation = 0; -+ -+ inode->i_op = &iopen_inode_operations; -+ inode->i_fop = &iopen_file_operations; -+ inode->i_mapping->a_ops = 0; -+ -+ return 1; -+} -Index: linux-aed/fs/ext3/iopen.h -=================================================================== ---- linux-aed.orig/fs/ext3/iopen.h Tue May 4 13:14:35 2004 -+++ linux-aed/fs/ext3/iopen.h Tue May 4 19:17:12 2004 -@@ -0,0 +1,15 @@ -+/* -+ * iopen.h -+ * -+ * Special support for opening files by inode number. -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ */ -+ -+extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); -+extern int ext3_iopen_get_inode(struct inode *inode); -+extern struct dentry *iopen_connect_dentry(struct dentry *dentry, -+ struct inode *inode, int rehash); -Index: linux-aed/fs/ext3/namei.c -=================================================================== ---- linux-aed.orig/fs/ext3/namei.c Tue May 4 19:17:05 2004 -+++ linux-aed/fs/ext3/namei.c Tue May 4 19:17:12 2004 -@@ -34,6 +34,7 @@ - #include - #include - #include -+#include "iopen.h" - - /* - * define how far ahead to read directories while searching them. -@@ -713,6 +714,9 @@ - if (dentry->d_name.len > EXT3_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - -+ if (ext3_check_for_iopen(dir, dentry)) -+ return NULL; -+ - bh = ext3_find_entry(dentry, &de); - inode = NULL; - if (bh) { -@@ -723,8 +727,8 @@ - if (!inode) - return ERR_PTR(-EACCES); - } -- d_add(dentry, inode); -- return NULL; -+ -+ return iopen_connect_dentry(dentry, inode, 1); - } - - #define S_SHIFT 12 -@@ -1588,10 +1592,6 @@ - inode->i_nlink); - inode->i_version = ++event; - inode->i_nlink = 0; -- /* There's no need to set i_disksize: the fact that i_nlink is -- * zero will ensure that the right thing happens during any -- * recovery. */ -- inode->i_size = 0; - ext3_orphan_add(handle, inode); - ext3_mark_inode_dirty(handle, inode); - dir->i_nlink--; -@@ -1711,6 +1711,23 @@ - goto out_stop; - } - -+/* Like ext3_add_nondir() except for call to iopen_connect_dentry */ -+static int ext3_add_link(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ int err = ext3_add_entry(handle, dentry, inode); -+ if (!err) { -+ err = ext3_mark_inode_dirty(handle, inode); -+ if (err == 0) { -+ (void)iopen_connect_dentry(dentry, inode, 0); -+ return 0; -+ } -+ } -+ ext3_dec_count(handle, inode); -+ iput(inode); -+ return err; -+} -+ - static int ext3_link (struct dentry * old_dentry, - struct inode * dir, struct dentry *dentry) - { -@@ -1736,7 +1753,8 @@ - ext3_inc_count(handle, inode); - atomic_inc(&inode->i_count); - -- err = ext3_add_nondir(handle, dentry, inode); -+ err = ext3_add_link(handle, dentry, inode); -+ ext3_orphan_del(handle, inode); - ext3_mark_inode_dirty(handle, inode); - ext3_journal_stop(handle, dir); - return err; -Index: linux-aed/fs/ext3/super.c -=================================================================== ---- linux-aed.orig/fs/ext3/super.c Tue May 4 19:17:01 2004 -+++ linux-aed/fs/ext3/super.c Tue May 4 19:17:12 2004 -@@ -834,6 +834,18 @@ - || !strcmp (this_char, "quota") - || !strcmp (this_char, "usrquota")) - /* Don't do anything ;-) */ ; -+ else if (!strcmp (this_char, "iopen")) { -+ set_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } -+ else if (!strcmp (this_char, "noiopen")) { -+ clear_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } -+ else if (!strcmp (this_char, "iopen_nopriv")) { -+ set_opt (sbi->s_mount_opt, IOPEN); -+ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } - else if (!strcmp (this_char, "journal")) { - /* @@@ FIXME */ - /* Eventually we will want to be able to create -Index: linux-aed/include/linux/ext3_fs.h -=================================================================== ---- linux-aed.orig/include/linux/ext3_fs.h Tue May 4 19:17:08 2004 -+++ linux-aed/include/linux/ext3_fs.h Tue May 4 19:17:12 2004 -@@ -321,6 +321,8 @@ - #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ -+#define EXT3_MOUNT_IOPEN 0x8000 /* Allow access via iopen */ -+#define EXT3_MOUNT_IOPEN_NOPRIV 0x10000 /* Make iopen world-readable */ - #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ diff --git a/lustre/kernel_patches/patches/iopen-2.4.18.patch b/lustre/kernel_patches/patches/iopen-2.4.18.patch deleted file mode 100644 index 7c56b03..0000000 --- a/lustre/kernel_patches/patches/iopen-2.4.18.patch +++ /dev/null @@ -1,493 +0,0 @@ - Documentation/filesystems/ext2.txt | 16 ++ - fs/ext3/Makefile | 2 - fs/ext3/inode.c | 4 - fs/ext3/iopen.c | 259 +++++++++++++++++++++++++++++++++++++ - fs/ext3/iopen.h | 13 + - fs/ext3/namei.c | 12 + - fs/ext3/super.c | 11 + - include/linux/ext3_fs.h | 2 - 8 files changed, 318 insertions(+), 1 deletion(-) - -Index: linux-aed/Documentation/filesystems/ext2.txt -=================================================================== ---- linux-aed.orig/Documentation/filesystems/ext2.txt Tue May 4 13:14:35 2004 -+++ linux-aed/Documentation/filesystems/ext2.txt Tue May 4 19:17:12 2004 -@@ -35,6 +35,22 @@ - - sb=n Use alternate superblock at this location. - -+iopen Makes an invisible pseudo-directory called -+ __iopen__ available in the root directory -+ of the filesystem. Allows open-by-inode- -+ number. i.e., inode 3145 can be accessed -+ via /mntpt/__iopen__/3145 -+ -+iopen_nopriv This option makes the iopen directory be -+ world-readable. This may be safer since it -+ allows daemons to run as an unprivileged user, -+ however it significantly changes the security -+ model of a Unix filesystem, since previously -+ all files under a mode 700 directory were not -+ generally avilable even if the -+ permissions on the file itself is -+ world-readable. -+ - grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2. - - -Index: linux-aed/fs/ext3/Makefile -=================================================================== ---- linux-aed.orig/fs/ext3/Makefile Tue May 4 19:16:51 2004 -+++ linux-aed/fs/ext3/Makefile Tue May 4 19:17:12 2004 -@@ -11,7 +11,7 @@ - - export-objs := ext3-exports.o - --obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -+obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o - obj-m := $(O_TARGET) - -Index: linux-aed/fs/ext3/inode.c -=================================================================== ---- linux-aed.orig/fs/ext3/inode.c Tue May 4 19:17:09 2004 -+++ linux-aed/fs/ext3/inode.c Tue May 4 19:17:12 2004 -@@ -31,6 +31,7 @@ - #include - #include - #include -+#include "iopen.h" - - /* - * SEARCH_FROM_ZERO forces each block allocation to search from the start -@@ -2277,6 +2278,9 @@ - struct buffer_head *bh; - int block; - -+ if (ext3_iopen_get_inode(inode)) -+ return; -+ - if(ext3_get_inode_loc(inode, &iloc)) - goto bad_inode; - bh = iloc.bh; -Index: linux-aed/fs/ext3/iopen.c -=================================================================== ---- linux-aed.orig/fs/ext3/iopen.c Tue May 4 13:14:35 2004 -+++ linux-aed/fs/ext3/iopen.c Tue May 4 19:17:12 2004 -@@ -0,0 +1,282 @@ -+/* -+ * linux/fs/ext3/iopen.c -+ * -+ * Special support for open by inode number -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ * -+ * -+ * Invariants: -+ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias -+ * for an inode at one time. -+ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry -+ * aliases on an inode at the same time. -+ * -+ * If we have any connected dentry aliases for an inode, use one of those -+ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED -+ * dentry for this inode, which thereafter will be found by the dcache -+ * when looking up this inode number in __iopen__, so we don't return here -+ * until it is gone. -+ * -+ * If we get an inode via a regular name lookup, then we "rename" the -+ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures -+ * existing users of the disconnected dentry will continue to use the same -+ * dentry as the connected users, and there will never be both kinds of -+ * dentry aliases at one time. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "iopen.h" -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#define IOPEN_NAME_LEN 32 -+ -+/* -+ * This implements looking up an inode by number. -+ */ -+static struct dentry *iopen_lookup(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ unsigned long ino; -+ struct list_head *lp; -+ struct dentry *alternate; -+ char buf[IOPEN_NAME_LEN]; -+ -+ if (dentry->d_name.len >= IOPEN_NAME_LEN) -+ return ERR_PTR(-ENAMETOOLONG); -+ -+ memcpy(buf, dentry->d_name.name, dentry->d_name.len); -+ buf[dentry->d_name.len] = 0; -+ -+ if (strcmp(buf, ".") == 0) -+ ino = dir->i_ino; -+ else if (strcmp(buf, "..") == 0) -+ ino = EXT3_ROOT_INO; -+ else -+ ino = simple_strtoul(buf, 0, 0); -+ -+ if ((ino != EXT3_ROOT_INO && -+ //ino != EXT3_ACL_IDX_INO && -+ //ino != EXT3_ACL_DATA_INO && -+ ino < EXT3_FIRST_INO(dir->i_sb)) || -+ ino > le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count)) -+ return ERR_PTR(-ENOENT); -+ -+ inode = iget(dir->i_sb, ino); -+ if (!inode) -+ return ERR_PTR(-EACCES); -+ if (is_bad_inode(inode)) { -+ iput(inode); -+ return ERR_PTR(-ENOENT); -+ } -+ -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ assert(list_empty(&dentry->d_hash)); /* d_rehash */ -+ -+ /* preferrably return a connected dentry */ -+ spin_lock(&dcache_lock); -+ list_for_each(lp, &inode->i_dentry) { -+ alternate = list_entry(lp, struct dentry, d_alias); -+ assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED)); -+ } -+ -+ if (!list_empty(&inode->i_dentry)) { -+ alternate = list_entry(inode->i_dentry.next, -+ struct dentry, d_alias); -+ dget_locked(alternate); -+ alternate->d_vfs_flags |= DCACHE_REFERENCED; -+ iput(inode); -+ spin_unlock(&dcache_lock); -+ return alternate; -+ } -+ dentry->d_flags |= DCACHE_NFSD_DISCONNECTED; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+ -+ __d_rehash(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+#define do_switch(x,y) do { \ -+ __typeof__ (x) __tmp = x; \ -+ x = y; y = __tmp; } while (0) -+ -+static inline void switch_names(struct dentry *dentry, struct dentry *target) -+{ -+ const unsigned char *old_name, *new_name; -+ -+ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); -+ old_name = target->d_name.name; -+ new_name = dentry->d_name.name; -+ if (old_name == target->d_iname) -+ old_name = dentry->d_iname; -+ if (new_name == dentry->d_iname) -+ new_name = target->d_iname; -+ target->d_name.name = new_name; -+ dentry->d_name.name = old_name; -+} -+ -+/* This function is spliced into ext3_lookup and does the move of a -+ * disconnected dentry (if it exists) to a connected dentry. -+ */ -+struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode, -+ int rehash) -+{ -+ struct dentry *tmp, *goal = NULL; -+ struct list_head *lp; -+ -+ /* verify this dentry is really new */ -+ assert(dentry->d_inode == NULL); -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ if (rehash) -+ assert(list_empty(&dentry->d_hash)); /* d_rehash */ -+ assert(list_empty(&dentry->d_subdirs)); -+ -+ spin_lock(&dcache_lock); -+ if (!inode) -+ goto do_rehash; -+ -+ /* preferrably return a connected dentry */ -+ list_for_each(lp, &inode->i_dentry) { -+ tmp = list_entry(lp, struct dentry, d_alias); -+ if (tmp->d_flags & DCACHE_NFSD_DISCONNECTED) { -+ assert(tmp->d_alias.next == &inode->i_dentry); -+ assert(tmp->d_alias.prev == &inode->i_dentry); -+ goal = tmp; -+ dget_locked(goal); -+ break; -+ } -+ } -+ -+ if (!goal) -+ goto do_instantiate; -+ -+ /* Move the goal to the de hash queue - like d_move() */ -+ goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED; -+ list_del_init(&goal->d_hash); -+ -+ list_del(&goal->d_child); -+ list_del(&dentry->d_child); -+ -+ /* Switch the parents and the names.. */ -+ switch_names(goal, dentry); -+ do_switch(goal->d_parent, dentry->d_parent); -+ do_switch(goal->d_name.len, dentry->d_name.len); -+ do_switch(goal->d_name.hash, dentry->d_name.hash); -+ -+ /* And add them back to the (new) parent lists */ -+ list_add(&goal->d_child, &goal->d_parent->d_subdirs); -+ list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); -+ __d_rehash(goal, 0); -+ spin_unlock(&dcache_lock); -+ iput(inode); -+ -+ return goal; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+do_instantiate: -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+do_rehash: -+ if (rehash) -+ __d_rehash(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+/* -+ * These are the special structures for the iopen pseudo directory. -+ */ -+ -+static struct inode_operations iopen_inode_operations = { -+ lookup: iopen_lookup, /* BKL held */ -+}; -+ -+static struct file_operations iopen_file_operations = { -+ read: generic_read_dir, -+}; -+ -+static int match_dentry(struct dentry *dentry, const char *name) -+{ -+ int len; -+ -+ len = strlen(name); -+ if (dentry->d_name.len != len) -+ return 0; -+ if (strncmp(dentry->d_name.name, name, len)) -+ return 0; -+ return 1; -+} -+ -+/* -+ * This function is spliced into ext3_lookup and returns 1 the file -+ * name is __iopen__ and dentry has been filled in appropriately. -+ */ -+int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ -+ if (dir->i_ino != EXT3_ROOT_INO || -+ !test_opt(dir->i_sb, IOPEN) || -+ !match_dentry(dentry, "__iopen__")) -+ return 0; -+ -+ inode = iget(dir->i_sb, EXT3_BAD_INO); -+ -+ if (!inode) -+ return 0; -+ d_add(dentry, inode); -+ return 1; -+} -+ -+/* -+ * This function is spliced into read_inode; it returns 1 if inode -+ * number is the one for /__iopen__, in which case the inode is filled -+ * in appropriately. Otherwise, this fuction returns 0. -+ */ -+int ext3_iopen_get_inode(struct inode *inode) -+{ -+ if (inode->i_ino != EXT3_BAD_INO) -+ return 0; -+ -+ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; -+ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) -+ inode->i_mode |= 0777; -+ inode->i_uid = 0; -+ inode->i_gid = 0; -+ inode->i_nlink = 1; -+ inode->i_size = 4096; -+ inode->i_atime = CURRENT_TIME; -+ inode->i_ctime = CURRENT_TIME; -+ inode->i_mtime = CURRENT_TIME; -+ inode->u.ext3_i.i_dtime = 0; -+ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size -+ * (for stat), not the fs block -+ * size */ -+ inode->i_blocks = 0; -+ inode->i_version = 1; -+ inode->i_generation = 0; -+ -+ inode->i_op = &iopen_inode_operations; -+ inode->i_fop = &iopen_file_operations; -+ inode->i_mapping->a_ops = 0; -+ -+ return 1; -+} -Index: linux-aed/fs/ext3/iopen.h -=================================================================== ---- linux-aed.orig/fs/ext3/iopen.h Tue May 4 13:14:35 2004 -+++ linux-aed/fs/ext3/iopen.h Tue May 4 19:17:12 2004 -@@ -0,0 +1,15 @@ -+/* -+ * iopen.h -+ * -+ * Special support for opening files by inode number. -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ */ -+ -+extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); -+extern int ext3_iopen_get_inode(struct inode *inode); -+extern struct dentry *iopen_connect_dentry(struct dentry *dentry, -+ struct inode *inode, int rehash); -Index: linux-aed/fs/ext3/namei.c -=================================================================== ---- linux-aed.orig/fs/ext3/namei.c Tue May 4 19:17:05 2004 -+++ linux-aed/fs/ext3/namei.c Tue May 4 19:17:12 2004 -@@ -34,6 +34,7 @@ - #include - #include - #include -+#include "iopen.h" - - /* - * define how far ahead to read directories while searching them. -@@ -713,6 +714,9 @@ - if (dentry->d_name.len > EXT3_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - -+ if (ext3_check_for_iopen(dir, dentry)) -+ return NULL; -+ - bh = ext3_find_entry(dentry, &de); - inode = NULL; - if (bh) { -@@ -723,8 +727,8 @@ - if (!inode) - return ERR_PTR(-EACCES); - } -- d_add(dentry, inode); -- return NULL; -+ -+ return iopen_connect_dentry(dentry, inode, 1); - } - - #define S_SHIFT 12 -@@ -1588,10 +1592,6 @@ - inode->i_nlink); - inode->i_version = ++event; - inode->i_nlink = 0; -- /* There's no need to set i_disksize: the fact that i_nlink is -- * zero will ensure that the right thing happens during any -- * recovery. */ -- inode->i_size = 0; - ext3_orphan_add(handle, inode); - ext3_mark_inode_dirty(handle, inode); - dir->i_nlink--; -@@ -1711,6 +1711,23 @@ - goto out_stop; - } - -+/* Like ext3_add_nondir() except for call to iopen_connect_dentry */ -+static int ext3_add_link(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ int err = ext3_add_entry(handle, dentry, inode); -+ if (!err) { -+ err = ext3_mark_inode_dirty(handle, inode); -+ if (err == 0) { -+ (void)iopen_connect_dentry(dentry, inode, 0); -+ return 0; -+ } -+ } -+ ext3_dec_count(handle, inode); -+ iput(inode); -+ return err; -+} -+ - static int ext3_link (struct dentry * old_dentry, - struct inode * dir, struct dentry *dentry) - { -@@ -1736,7 +1753,8 @@ - ext3_inc_count(handle, inode); - atomic_inc(&inode->i_count); - -- err = ext3_add_nondir(handle, dentry, inode); -+ err = ext3_add_link(handle, dentry, inode); -+ ext3_orphan_del(handle, inode); - ext3_mark_inode_dirty(handle, inode); - ext3_journal_stop(handle, dir); - return err; -Index: linux-aed/fs/ext3/super.c -=================================================================== ---- linux-aed.orig/fs/ext3/super.c Tue May 4 19:17:01 2004 -+++ linux-aed/fs/ext3/super.c Tue May 4 19:17:12 2004 -@@ -834,6 +834,18 @@ - || !strcmp (this_char, "quota") - || !strcmp (this_char, "usrquota")) - /* Don't do anything ;-) */ ; -+ else if (!strcmp (this_char, "iopen")) { -+ set_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } -+ else if (!strcmp (this_char, "noiopen")) { -+ clear_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } -+ else if (!strcmp (this_char, "iopen_nopriv")) { -+ set_opt (sbi->s_mount_opt, IOPEN); -+ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } - else if (!strcmp (this_char, "journal")) { - /* @@@ FIXME */ - /* Eventually we will want to be able to create -Index: linux-aed/include/linux/ext3_fs.h -=================================================================== ---- linux-aed.orig/include/linux/ext3_fs.h Tue May 4 19:17:08 2004 -+++ linux-aed/include/linux/ext3_fs.h Tue May 4 19:17:12 2004 -@@ -321,6 +321,8 @@ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ - #define EXT3_MOUNT_INDEX 0x4000 /* Enable directory index */ -+#define EXT3_MOUNT_IOPEN 0x8000 /* Allow access via iopen */ -+#define EXT3_MOUNT_IOPEN_NOPRIV 0x10000 /* Make iopen world-readable */ - #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ diff --git a/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26-2.patch b/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26-2.patch deleted file mode 100644 index c7d06a8..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26-2.patch +++ /dev/null @@ -1,1775 +0,0 @@ - fs/ext3/Makefile | 4 - fs/ext3/ext3-exports.c | 13 - fs/ext3/ialloc.c | 2 - fs/ext3/inode.c | 29 - - fs/ext3/namei.c | 8 - fs/ext3/super.c | 23 - fs/ext3/xattr.c | 1242 +++++++++++++++++++++++++++++++++++++++++++++ - include/linux/ext3_fs.h | 46 - - include/linux/ext3_jbd.h | 8 - include/linux/ext3_xattr.h | 155 +++++ - include/linux/xattr.h | 15 - 11 files changed, 1494 insertions(+), 51 deletions(-) - ---- /dev/null 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.18-alexey/fs/ext3/ext3-exports.c 2003-09-01 14:55:39.000000000 +0400 -@@ -0,0 +1,13 @@ -+#include -+#include -+#include -+#include -+#include -+ -+EXPORT_SYMBOL(ext3_force_commit); -+EXPORT_SYMBOL(ext3_bread); -+EXPORT_SYMBOL(ext3_xattr_register); -+EXPORT_SYMBOL(ext3_xattr_unregister); -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_list); -+EXPORT_SYMBOL(ext3_xattr_set); ---- linux-2.4.18/fs/ext3/ialloc.c~linux-2.4.18ea-0.8.26-2 2003-07-28 17:52:04.000000000 +0400 -+++ linux-2.4.18-alexey/fs/ext3/ialloc.c 2003-09-01 14:55:39.000000000 +0400 -@@ -17,6 +17,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -216,6 +217,7 @@ void ext3_free_inode (handle_t *handle, - * as writing the quota to disk may need the lock as well. - */ - DQUOT_INIT(inode); -+ ext3_xattr_drop_inode(handle, inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - ---- linux-2.4.18/fs/ext3/inode.c~linux-2.4.18ea-0.8.26-2 2003-07-28 17:52:04.000000000 +0400 -+++ linux-2.4.18-alexey/fs/ext3/inode.c 2003-09-01 14:55:39.000000000 +0400 -@@ -39,6 +39,18 @@ - */ - #undef SEARCH_FROM_ZERO - -+/* -+ * Test whether an inode is a fast symlink. -+ */ -+static inline int ext3_inode_is_fast_symlink(struct inode *inode) -+{ -+ int ea_blocks = EXT3_I(inode)->i_file_acl ? -+ (inode->i_sb->s_blocksize >> 9) : 0; -+ -+ return (S_ISLNK(inode->i_mode) && -+ inode->i_blocks - ea_blocks == 0); -+} -+ - /* The ext3 forget function must perform a revoke if we are freeing data - * which has been journaled. Metadata (eg. indirect blocks) must be - * revoked in all cases. -@@ -48,7 +60,7 @@ - * still needs to be revoked. - */ - --static int ext3_forget(handle_t *handle, int is_metadata, -+int ext3_forget(handle_t *handle, int is_metadata, - struct inode *inode, struct buffer_head *bh, - int blocknr) - { -@@ -164,9 +176,7 @@ void ext3_delete_inode (struct inode * i - { - handle_t *handle; - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - - lock_kernel(); -@@ -1877,6 +1887,8 @@ void ext3_truncate(struct inode * inode) - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; -+ if (ext3_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -2038,8 +2050,6 @@ int ext3_get_inode_loc (struct inode *in - struct ext3_group_desc * gdp; - - if ((inode->i_ino != EXT3_ROOT_INO && -- inode->i_ino != EXT3_ACL_IDX_INO && -- inode->i_ino != EXT3_ACL_DATA_INO && - inode->i_ino != EXT3_JOURNAL_INO && - inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu( -@@ -2166,10 +2176,7 @@ void ext3_read_inode(struct inode * inod - - brelse (iloc.bh); - -- if (inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; - inode->i_mapping->a_ops = &ext3_aops; -@@ -2177,7 +2184,7 @@ void ext3_read_inode(struct inode * inod - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { -- if (!inode->i_blocks) -+ if (ext3_inode_is_fast_symlink(inode)) - inode->i_op = &ext3_fast_symlink_inode_operations; - else { - inode->i_op = &page_symlink_inode_operations; ---- linux-2.4.18/fs/ext3/Makefile~linux-2.4.18ea-0.8.26-2 2003-08-29 16:53:17.000000000 +0400 -+++ linux-2.4.18-alexey/fs/ext3/Makefile 2003-09-01 14:55:50.000000000 +0400 -@@ -9,10 +9,10 @@ - - O_TARGET := ext3.o - --export-objs := super.o inode.o -+export-objs := ext3-exports.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o hash.o -+ ioctl.o namei.o super.o symlink.o xattr.o hash.o ext3-exports.o - obj-m := $(O_TARGET) - - include $(TOPDIR)/Rules.make ---- linux-2.4.18/fs/ext3/namei.c~linux-2.4.18ea-0.8.26-2 2003-09-01 11:50:59.000000000 +0400 -+++ linux-2.4.18-alexey/fs/ext3/namei.c 2003-09-01 14:55:39.000000000 +0400 -@@ -29,6 +29,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1524,6 +1525,7 @@ static int ext3_add_nondir(handle_t *han - d_instantiate(dentry, inode); - return 0; - } -+ ext3_xattr_drop_inode(handle, inode); - ext3_dec_count(handle, inode); - iput(inode); - return err; -@@ -1612,7 +1614,7 @@ static int ext3_mkdir(struct inode * dir - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR); -+ inode = ext3_new_inode (handle, dir, S_IFDIR | mode); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -1620,7 +1622,6 @@ static int ext3_mkdir(struct inode * dir - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; -- inode->i_blocks = 0; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { - inode->i_nlink--; /* is this nlink == 0? */ -@@ -1647,9 +1648,6 @@ static int ext3_mkdir(struct inode * dir - BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_block); - brelse (dir_block); -- inode->i_mode = S_IFDIR | mode; -- if (dir->i_mode & S_ISGID) -- inode->i_mode |= S_ISGID; - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); - if (err) { ---- linux-2.4.18/fs/ext3/super.c~linux-2.4.18ea-0.8.26-2 2003-08-29 16:53:17.000000000 +0400 -+++ linux-2.4.18-alexey/fs/ext3/super.c 2003-09-01 14:55:39.000000000 +0400 -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -406,6 +407,7 @@ void ext3_put_super (struct super_block - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { - EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -@@ -1743,18 +1745,27 @@ int ext3_statfs (struct super_block * sb - - static DECLARE_FSTYPE_DEV(ext3_fs_type, "ext3", ext3_read_super); - --static int __init init_ext3_fs(void) -+static void exit_ext3_fs(void) - { -- return register_filesystem(&ext3_fs_type); -+ unregister_filesystem(&ext3_fs_type); -+ exit_ext3_xattr_user(); -+ exit_ext3_xattr(); - } - --static void __exit exit_ext3_fs(void) -+static int __init init_ext3_fs(void) - { -- unregister_filesystem(&ext3_fs_type); -+ int error = init_ext3_xattr(); -+ if (!error) -+ error = init_ext3_xattr_user(); -+ if (!error) -+ error = register_filesystem(&ext3_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext3_fs(); -+ return error; - } - --EXPORT_SYMBOL(ext3_force_commit); --EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); ---- /dev/null 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.18-alexey/fs/ext3/xattr.c 2003-09-01 14:55:39.000000000 +0400 -@@ -0,0 +1,1242 @@ -+/* -+ * linux/fs/ext3/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Ext3 code with a lot of help from Eric Jarman . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * ¦ entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT3_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT3_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext3_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+#include -+#endif -+#include -+#include -+#include -+#include -+ -+/* These symbols may be needed by a module. */ -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT3_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, -+ struct ext3_xattr_header *); -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+static int ext3_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext3_xattr_cache_find(struct inode *, -+ struct ext3_xattr_header *); -+static void ext3_xattr_cache_remove(struct buffer_head *); -+static void ext3_xattr_rehash(struct ext3_xattr_header *, -+ struct ext3_xattr_entry *); -+ -+static struct mb_cache *ext3_xattr_cache; -+ -+#else -+# define ext3_xattr_cache_insert(bh) 0 -+# define ext3_xattr_cache_find(inode, header) NULL -+# define ext3_xattr_cache_remove(bh) do {} while(0) -+# define ext3_xattr_rehash(header, entry) do {} while(0) -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext3_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext3_xattr_sem); -+ -+static inline void -+ext3_xattr_lock(void) -+{ -+ down(&ext3_xattr_sem); -+} -+ -+static inline void -+ext3_xattr_unlock(void) -+{ -+ up(&ext3_xattr_sem); -+} -+ -+static inline int -+ext3_xattr_new_block(handle_t *handle, struct inode *inode, -+ int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + -+ EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext3_new_block(handle, inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext3_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext3_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext3_xattr_free_block(handle_t *handle, struct inode * inode, -+ unsigned long block) -+{ -+ ext3_free_blocks(handle, inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext3_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext3_xattr_free_block(handle, inode, block) \ -+ ext3_free_blocks(handle, inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; -+rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ if (!ext3_xattr_handlers[name_index-1]) { -+ ext3_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext3_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler) -+{ -+ if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ ext3_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext3_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static inline struct ext3_xattr_handler * -+ext3_xattr_resolve_name(const char **name) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext3_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext3_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext3_handler_lock); -+ return handler; -+} -+ -+static inline struct ext3_xattr_handler * -+ext3_xattr_handler(int name_index) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ read_lock(&ext3_handler_lock); -+ handler = ext3_xattr_handlers[name_index-1]; -+ read_unlock(&ext3_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext3_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_setxattr(struct dentry *dentry, const char *name, -+ void *value, size_t size, int flags) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext3_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT3_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT3_I(inode)->i_file_acl) -+ return 0; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) { -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len) + 1; -+ } -+ } -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) { -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ *buf++ = '\0'; -+ } -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext3_xattr_update_super_block(handle_t *handle, -+ struct super_block *sb) -+{ -+ if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+ ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT3_SB(sb)->s_feature_compat |= EXT3_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT3_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext3_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_header *header = NULL; -+ struct ext3_xattr_entry *here, *last; -+ unsigned int name_len; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ ext3_xattr_lock(); -+ -+ if (EXT3_I(inode)->i_file_acl) { -+ /* The inode already has an extended attribute block. */ -+ int block = EXT3_I(inode)->i_file_acl; -+ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(sb, "ext3_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT3_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT3_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT3_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext3_xattr_cache_remove(bh); -+ error = ext3_journal_get_write_access(handle, bh); -+ if (error) -+ goto cleanup; -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT3_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT3_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext3_xattr_set2(handle, inode, bh,NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT3_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT3_XATTR_PAD, 0, -+ EXT3_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext3_xattr_rehash(header, here); -+ -+ error = ext3_xattr_set2(handle, inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ ext3_xattr_unlock(); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext3_xattr_set(): Update the file system. -+ */ -+static int -+ext3_xattr_set2(handle_t *handle, struct inode *inode, -+ struct buffer_head *old_bh, struct ext3_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext3_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext3_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ error = ext3_journal_get_write_access(handle, new_bh); -+ if (error) -+ goto cleanup; -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ (void)ext3_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT3_I(inode)->i_file_acl != 0; -+ int block = ext3_xattr_new_block(handle, inode, -+ &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+getblk_failed: ext3_xattr_free_block(handle, inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ error = ext3_journal_get_create_access(handle, new_bh); -+ if (error) { -+ unlock_buffer(new_bh); -+ goto getblk_failed; -+ } -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ (void)ext3_xattr_cache_insert(new_bh); -+ ext3_xattr_update_super_block(handle, sb); -+ } -+ error = ext3_journal_dirty_metadata(handle, new_bh); -+ if (error) -+ goto cleanup; -+ } -+ -+ /* Update the inode. */ -+ EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ ext3_mark_inode_dirty(handle, inode); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ error = ext3_journal_get_write_access(handle, old_bh); -+ if (error) -+ goto cleanup; -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext3_xattr_free_block(handle, inode, old_bh->b_blocknr); -+ -+ /* ext3_forget() calls bforget() for us, but we -+ let our caller release old_bh, so we need to -+ duplicate the handle before. */ -+ get_bh(old_bh); -+ ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext3_xattr_quota_free(inode); -+ ext3_journal_dirty_metadata(handle, old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_drop_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext3_xattr_drop_inode(handle_t *handle, struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT3_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ ext3_xattr_lock(); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_drop_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext3_error(inode->i_sb, "ext3_xattr_drop_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ext3_journal_get_write_access(handle, bh); -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext3_xattr_cache_remove(bh); -+ ext3_xattr_free_block(handle, inode, block); -+ ext3_forget(handle, 1, inode, bh, block); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ ext3_journal_dirty_metadata(handle, bh); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ ext3_xattr_quota_free(inode); -+ } -+ EXT3_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ ext3_xattr_unlock(); -+} -+ -+/* -+ * ext3_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ mb_cache_shrink(ext3_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+/* -+ * ext3_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext3_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext3_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext3_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext3_xattr_cmp(struct ext3_xattr_header *header1, -+ struct ext3_xattr_header *header2) -+{ -+ struct ext3_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT3_XATTR_NEXT(entry1); -+ entry2 = EXT3_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext3_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT3_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT3_XATTR_REFCOUNT_MAX); -+ } else if (!ext3_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext3_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext3_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext3_xattr_rehash(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ struct ext3_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext3_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT3_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext3_xattr(void) -+{ -+ ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext3_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+ if (ext3_xattr_cache) -+ mb_cache_destroy(ext3_xattr_cache); -+ ext3_xattr_cache = NULL; -+} -+ -+#else /* CONFIG_EXT3_FS_XATTR_SHARING */ -+ -+int __init -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_SHARING */ ---- linux-2.4.18/include/linux/ext3_fs.h~linux-2.4.18ea-0.8.26-2 2003-09-01 11:51:00.000000000 +0400 -+++ linux-2.4.18-alexey/include/linux/ext3_fs.h 2003-09-01 14:55:39.000000000 +0400 -@@ -63,8 +63,6 @@ - */ - #define EXT3_BAD_INO 1 /* Bad blocks inode */ - #define EXT3_ROOT_INO 2 /* Root inode */ --#define EXT3_ACL_IDX_INO 3 /* ACL inode */ --#define EXT3_ACL_DATA_INO 4 /* ACL inode */ - #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */ - #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */ -@@ -94,7 +92,6 @@ - #else - # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry)) - #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -129,28 +126,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext3_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext3_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext3_group_desc -@@ -521,7 +496,7 @@ struct ext3_super_block { - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - --#define EXT3_FEATURE_COMPAT_SUPP 0 -+#define EXT3_FEATURE_COMPAT_SUPP EXT3_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ -@@ -623,6 +598,24 @@ struct dx_hash_info - #define HASH_NB_ALWAYS 1 - - -+/* Defined for extended attributes */ -+#define CONFIG_EXT3_FS_XATTR y -+#ifndef ENOATTR -+#define ENOATTR ENODATA /* No such attribute */ -+#endif -+#ifndef ENOTSUP -+#define ENOTSUP EOPNOTSUPP /* Operation not supported */ -+#endif -+#ifndef XATTR_NAME_MAX -+#define XATTR_NAME_MAX 255 /* # chars in an extended attribute name */ -+#define XATTR_SIZE_MAX 65536 /* size of an extended attribute value (64k) */ -+#define XATTR_LIST_MAX 65536 /* size of extended attribute namelist (64k) */ -+#endif -+#ifndef XATTR_CREATE -+#define XATTR_CREATE 1 /* set value, fail if attr already exists */ -+#define XATTR_REPLACE 2 /* set value, fail if attr does not exist */ -+#endif -+ - /* - * Describe an inode's exact location on disk and in memory - */ -@@ -704,6 +697,7 @@ extern void ext3_check_inodes_bitmap (st - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - - /* inode.c */ -+extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - ---- linux-2.4.18/include/linux/ext3_jbd.h~linux-2.4.18ea-0.8.26-2 2003-08-29 16:53:17.000000000 +0400 -+++ linux-2.4.18-alexey/include/linux/ext3_jbd.h 2003-09-01 14:55:39.000000000 +0400 -@@ -30,13 +30,19 @@ - - #define EXT3_SINGLEDATA_TRANS_BLOCKS 8 - -+/* Extended attributes may touch two data buffers, two bitmap buffers, -+ * and two group and summaries. */ -+ -+#define EXT3_XATTR_TRANS_BLOCKS 8 -+ - /* Define the minimum size for a transaction which modifies data. This - * needs to take into account the fact that we may end up modifying two - * quota files too (one for the group, one for the user quota). The - * superblock only gets updated once, of course, so don't bother - * counting that again for the quota updates. */ - --#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2) -+#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \ -+ EXT3_XATTR_TRANS_BLOCKS - 2) - - extern int ext3_writepage_trans_blocks(struct inode *inode); - ---- /dev/null 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.18-alexey/include/linux/ext3_xattr.h 2003-09-01 14:55:39.000000000 +0400 -@@ -0,0 +1,155 @@ -+/* -+ File: linux/ext3_xattr.h -+ -+ On-disk format of extended attributes for the ext3 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT3_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT3_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT3_XATTR_INDEX_MAX 10 -+#define EXT3_XATTR_INDEX_USER 1 -+ -+struct ext3_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext3_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT3_XATTR_PAD_BITS 2 -+#define EXT3_XATTR_PAD (1<e_name_len)) ) -+#define EXT3_XATTR_SIZE(size) \ -+ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT3_FS_XATTR -+ -+struct ext3_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext3_xattr_register(int, struct ext3_xattr_handler *); -+extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *); -+ -+extern int ext3_setxattr(struct dentry *, const char *, void *, size_t, int); -+extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); -+extern int ext3_removexattr(struct dentry *, const char *); -+ -+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext3_xattr_list(struct inode *, char *, size_t); -+extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, void *, size_t, int); -+ -+extern void ext3_xattr_drop_inode(handle_t *, struct inode *); -+extern void ext3_xattr_put_super(struct super_block *); -+ -+extern int init_ext3_xattr(void) __init; -+extern void exit_ext3_xattr(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR */ -+# define ext3_setxattr NULL -+# define ext3_getxattr NULL -+# define ext3_listxattr NULL -+# define ext3_removexattr NULL -+ -+static inline int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_list(struct inode *inode, void *buffer, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext3_xattr_drop_inode(handle_t *handle, struct inode *inode) -+{ -+} -+ -+static inline void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT3_FS_XATTR */ -+ -+# ifdef CONFIG_EXT3_FS_XATTR_USER -+ -+extern int init_ext3_xattr_user(void) __init; -+extern void exit_ext3_xattr_user(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+static inline int -+init_ext3_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr_user(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ ---- /dev/null 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.18-alexey/include/linux/xattr.h 2003-09-01 14:55:39.000000000 +0400 -@@ -0,0 +1,15 @@ -+/* -+ File: linux/xattr.h -+ -+ Extended attributes handling. -+ -+ Copyright (C) 2001 by Andreas Gruenbacher -+ Copyright (C) 2001 SGI - Silicon Graphics, Inc -+*/ -+#ifndef _LINUX_XATTR_H -+#define _LINUX_XATTR_H -+ -+#define XATTR_CREATE 1 /* set value, fail if attr already exists */ -+#define XATTR_REPLACE 2 /* set value, fail if attr does not exist */ -+ -+#endif /* _LINUX_XATTR_H */ - -_ diff --git a/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch b/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch deleted file mode 100644 index 15f1b2a..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch +++ /dev/null @@ -1,1784 +0,0 @@ - fs/ext3/Makefile | 4 - fs/ext3/ext3-exports.c | 13 - fs/ext3/ialloc.c | 2 - fs/ext3/inode.c | 29 - - fs/ext3/namei.c | 12 - fs/ext3/super.c | 22 - fs/ext3/xattr.c | 1242 +++++++++++++++++++++++++++++++++++++++++++++ - include/linux/ext3_fs.h | 46 - - include/linux/ext3_jbd.h | 8 - include/linux/ext3_xattr.h | 155 +++++ - include/linux/xattr.h | 15 - 11 files changed, 1496 insertions(+), 52 deletions(-) - ---- linux-2.4.18-p4smp/fs/ext3/ialloc.c~linux-2.4.18ea-0.8.26 2003-07-20 17:12:43.000000000 -0600 -+++ linux-2.4.18-p4smp-braam/fs/ext3/ialloc.c 2003-07-21 22:49:05.000000000 -0600 -@@ -17,6 +17,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -216,6 +217,7 @@ void ext3_free_inode (handle_t *handle, - * as writing the quota to disk may need the lock as well. - */ - DQUOT_INIT(inode); -+ ext3_xattr_drop_inode(handle, inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - ---- linux-2.4.18-p4smp/fs/ext3/inode.c~linux-2.4.18ea-0.8.26 2003-07-20 17:12:43.000000000 -0600 -+++ linux-2.4.18-p4smp-braam/fs/ext3/inode.c 2003-07-21 22:49:05.000000000 -0600 -@@ -39,6 +39,18 @@ - */ - #undef SEARCH_FROM_ZERO - -+/* -+ * Test whether an inode is a fast symlink. -+ */ -+static inline int ext3_inode_is_fast_symlink(struct inode *inode) -+{ -+ int ea_blocks = EXT3_I(inode)->i_file_acl ? -+ (inode->i_sb->s_blocksize >> 9) : 0; -+ -+ return (S_ISLNK(inode->i_mode) && -+ inode->i_blocks - ea_blocks == 0); -+} -+ - /* The ext3 forget function must perform a revoke if we are freeing data - * which has been journaled. Metadata (eg. indirect blocks) must be - * revoked in all cases. -@@ -48,7 +60,7 @@ - * still needs to be revoked. - */ - --static int ext3_forget(handle_t *handle, int is_metadata, -+int ext3_forget(handle_t *handle, int is_metadata, - struct inode *inode, struct buffer_head *bh, - int blocknr) - { -@@ -164,9 +176,7 @@ void ext3_delete_inode (struct inode * i - { - handle_t *handle; - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - - lock_kernel(); -@@ -1877,6 +1887,8 @@ void ext3_truncate(struct inode * inode) - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; -+ if (ext3_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -2038,8 +2050,6 @@ int ext3_get_inode_loc (struct inode *in - struct ext3_group_desc * gdp; - - if ((inode->i_ino != EXT3_ROOT_INO && -- inode->i_ino != EXT3_ACL_IDX_INO && -- inode->i_ino != EXT3_ACL_DATA_INO && - inode->i_ino != EXT3_JOURNAL_INO && - inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu( -@@ -2166,10 +2176,7 @@ void ext3_read_inode(struct inode * inod - - brelse (iloc.bh); - -- if (inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; - inode->i_mapping->a_ops = &ext3_aops; -@@ -2177,7 +2184,7 @@ void ext3_read_inode(struct inode * inod - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { -- if (!inode->i_blocks) -+ if (ext3_inode_is_fast_symlink(inode)) - inode->i_op = &ext3_fast_symlink_inode_operations; - else { - inode->i_op = &page_symlink_inode_operations; ---- linux-2.4.18-p4smp/fs/ext3/namei.c~linux-2.4.18ea-0.8.26 2003-07-21 22:29:27.000000000 -0600 -+++ linux-2.4.18-p4smp-braam/fs/ext3/namei.c 2003-07-21 22:49:05.000000000 -0600 -@@ -27,6 +27,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1183,6 +1184,7 @@ static int ext3_add_nondir(handle_t *han - d_instantiate(dentry, inode); - return 0; - } -+ ext3_xattr_drop_inode(handle, inode); - ext3_dec_count(handle, inode); - iput(inode); - return err; -@@ -1268,15 +1270,14 @@ static int ext3_mkdir(struct inode * dir - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR); -+ inode = ext3_new_inode (handle, dir, S_IFDIR | mode); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; - - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; -- inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize; -- inode->i_blocks = 0; -+ inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { - inode->i_nlink--; /* is this nlink == 0? */ -@@ -1303,9 +1304,6 @@ static int ext3_mkdir(struct inode * dir - BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_block); - brelse (dir_block); -- inode->i_mode = S_IFDIR | mode; -- if (dir->i_mode & S_ISGID) -- inode->i_mode |= S_ISGID; - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); - if (err) -@@ -1671,7 +1669,7 @@ static int ext3_symlink (struct inode * - if (IS_ERR(inode)) - goto out_stop; - -- if (l > sizeof (inode->u.ext3_i.i_data)) { -+ if (l > sizeof(EXT3_I(inode)->i_data)) { - inode->i_op = &page_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - /* ---- linux-2.4.18-p4smp/fs/ext3/super.c~linux-2.4.18ea-0.8.26 2003-07-21 22:29:27.000000000 -0600 -+++ linux-2.4.18-p4smp-braam/fs/ext3/super.c 2003-07-21 22:50:28.000000000 -0600 -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -406,6 +407,7 @@ void ext3_put_super (struct super_block - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { - EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -@@ -1749,17 +1751,27 @@ int ext3_statfs (struct super_block * sb - - static DECLARE_FSTYPE_DEV(ext3_fs_type, "ext3", ext3_read_super); - --static int __init init_ext3_fs(void) -+static void exit_ext3_fs(void) - { -- return register_filesystem(&ext3_fs_type); -+ unregister_filesystem(&ext3_fs_type); -+ exit_ext3_xattr_user(); -+ exit_ext3_xattr(); - } - --static void __exit exit_ext3_fs(void) -+static int __init init_ext3_fs(void) - { -- unregister_filesystem(&ext3_fs_type); -+ int error = init_ext3_xattr(); -+ if (!error) -+ error = init_ext3_xattr_user(); -+ if (!error) -+ error = register_filesystem(&ext3_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext3_fs(); -+ return error; - } - --EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); ---- /dev/null 2003-01-30 03:24:37.000000000 -0700 -+++ linux-2.4.18-p4smp-braam/fs/ext3/ext3-exports.c 2003-07-21 22:49:05.000000000 -0600 -@@ -0,0 +1,13 @@ -+#include -+#include -+#include -+#include -+#include -+ -+EXPORT_SYMBOL(ext3_force_commit); -+EXPORT_SYMBOL(ext3_bread); -+EXPORT_SYMBOL(ext3_xattr_register); -+EXPORT_SYMBOL(ext3_xattr_unregister); -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_list); -+EXPORT_SYMBOL(ext3_xattr_set); ---- /dev/null 2003-01-30 03:24:37.000000000 -0700 -+++ linux-2.4.18-p4smp-braam/fs/ext3/xattr.c 2003-07-21 22:50:40.000000000 -0600 -@@ -0,0 +1,1242 @@ -+/* -+ * linux/fs/ext3/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Ext3 code with a lot of help from Eric Jarman . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * ¦ entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT3_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT3_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext3_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+#include -+#endif -+#include -+#include -+#include -+#include -+ -+/* These symbols may be needed by a module. */ -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT3_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, -+ struct ext3_xattr_header *); -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+static int ext3_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext3_xattr_cache_find(struct inode *, -+ struct ext3_xattr_header *); -+static void ext3_xattr_cache_remove(struct buffer_head *); -+static void ext3_xattr_rehash(struct ext3_xattr_header *, -+ struct ext3_xattr_entry *); -+ -+static struct mb_cache *ext3_xattr_cache; -+ -+#else -+# define ext3_xattr_cache_insert(bh) 0 -+# define ext3_xattr_cache_find(inode, header) NULL -+# define ext3_xattr_cache_remove(bh) do {} while(0) -+# define ext3_xattr_rehash(header, entry) do {} while(0) -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext3_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext3_xattr_sem); -+ -+static inline void -+ext3_xattr_lock(void) -+{ -+ down(&ext3_xattr_sem); -+} -+ -+static inline void -+ext3_xattr_unlock(void) -+{ -+ up(&ext3_xattr_sem); -+} -+ -+static inline int -+ext3_xattr_new_block(handle_t *handle, struct inode *inode, -+ int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + -+ EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext3_new_block(handle, inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext3_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext3_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext3_xattr_free_block(handle_t *handle, struct inode * inode, -+ unsigned long block) -+{ -+ ext3_free_blocks(handle, inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext3_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext3_xattr_free_block(handle, inode, block) \ -+ ext3_free_blocks(handle, inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; -+rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ if (!ext3_xattr_handlers[name_index-1]) { -+ ext3_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext3_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler) -+{ -+ if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ ext3_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext3_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static inline struct ext3_xattr_handler * -+ext3_xattr_resolve_name(const char **name) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext3_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext3_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext3_handler_lock); -+ return handler; -+} -+ -+static inline struct ext3_xattr_handler * -+ext3_xattr_handler(int name_index) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ read_lock(&ext3_handler_lock); -+ handler = ext3_xattr_handlers[name_index-1]; -+ read_unlock(&ext3_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext3_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_setxattr(struct dentry *dentry, const char *name, -+ void *value, size_t size, int flags) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext3_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT3_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT3_I(inode)->i_file_acl) -+ return 0; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) { -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len) + 1; -+ } -+ } -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) { -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ *buf++ = '\0'; -+ } -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext3_xattr_update_super_block(handle_t *handle, -+ struct super_block *sb) -+{ -+ if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+ ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT3_SB(sb)->s_feature_compat |= EXT3_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT3_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext3_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_header *header = NULL; -+ struct ext3_xattr_entry *here, *last; -+ unsigned int name_len; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ ext3_xattr_lock(); -+ -+ if (EXT3_I(inode)->i_file_acl) { -+ /* The inode already has an extended attribute block. */ -+ int block = EXT3_I(inode)->i_file_acl; -+ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(sb, "ext3_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT3_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT3_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT3_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext3_xattr_cache_remove(bh); -+ error = ext3_journal_get_write_access(handle, bh); -+ if (error) -+ goto cleanup; -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT3_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT3_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext3_xattr_set2(handle, inode, bh,NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT3_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT3_XATTR_PAD, 0, -+ EXT3_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext3_xattr_rehash(header, here); -+ -+ error = ext3_xattr_set2(handle, inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ ext3_xattr_unlock(); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext3_xattr_set(): Update the file system. -+ */ -+static int -+ext3_xattr_set2(handle_t *handle, struct inode *inode, -+ struct buffer_head *old_bh, struct ext3_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext3_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext3_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ error = ext3_journal_get_write_access(handle, new_bh); -+ if (error) -+ goto cleanup; -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ (void)ext3_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT3_I(inode)->i_file_acl != 0; -+ int block = ext3_xattr_new_block(handle, inode, -+ &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+getblk_failed: ext3_xattr_free_block(handle, inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ error = ext3_journal_get_create_access(handle, new_bh); -+ if (error) { -+ unlock_buffer(new_bh); -+ goto getblk_failed; -+ } -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ (void)ext3_xattr_cache_insert(new_bh); -+ ext3_xattr_update_super_block(handle, sb); -+ } -+ error = ext3_journal_dirty_metadata(handle, new_bh); -+ if (error) -+ goto cleanup; -+ } -+ -+ /* Update the inode. */ -+ EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ ext3_mark_inode_dirty(handle, inode); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ error = ext3_journal_get_write_access(handle, old_bh); -+ if (error) -+ goto cleanup; -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext3_xattr_free_block(handle, inode, old_bh->b_blocknr); -+ -+ /* ext3_forget() calls bforget() for us, but we -+ let our caller release old_bh, so we need to -+ duplicate the handle before. */ -+ get_bh(old_bh); -+ ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext3_xattr_quota_free(inode); -+ ext3_journal_dirty_metadata(handle, old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_drop_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext3_xattr_drop_inode(handle_t *handle, struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT3_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ ext3_xattr_lock(); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_drop_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext3_error(inode->i_sb, "ext3_xattr_drop_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ext3_journal_get_write_access(handle, bh); -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext3_xattr_cache_remove(bh); -+ ext3_xattr_free_block(handle, inode, block); -+ ext3_forget(handle, 1, inode, bh, block); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ ext3_journal_dirty_metadata(handle, bh); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ ext3_xattr_quota_free(inode); -+ } -+ EXT3_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ ext3_xattr_unlock(); -+} -+ -+/* -+ * ext3_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ mb_cache_shrink(ext3_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+/* -+ * ext3_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext3_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext3_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext3_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext3_xattr_cmp(struct ext3_xattr_header *header1, -+ struct ext3_xattr_header *header2) -+{ -+ struct ext3_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT3_XATTR_NEXT(entry1); -+ entry2 = EXT3_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext3_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT3_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT3_XATTR_REFCOUNT_MAX); -+ } else if (!ext3_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext3_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext3_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext3_xattr_rehash(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ struct ext3_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext3_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT3_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext3_xattr(void) -+{ -+ ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext3_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+ if (ext3_xattr_cache) -+ mb_cache_destroy(ext3_xattr_cache); -+ ext3_xattr_cache = NULL; -+} -+ -+#else /* CONFIG_EXT3_FS_XATTR_SHARING */ -+ -+int __init -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_SHARING */ ---- linux-2.4.18-p4smp/include/linux/ext3_fs.h~linux-2.4.18ea-0.8.26 2003-07-21 22:29:27.000000000 -0600 -+++ linux-2.4.18-p4smp-braam/include/linux/ext3_fs.h 2003-07-21 22:49:05.000000000 -0600 -@@ -58,8 +58,6 @@ - */ - #define EXT3_BAD_INO 1 /* Bad blocks inode */ - #define EXT3_ROOT_INO 2 /* Root inode */ --#define EXT3_ACL_IDX_INO 3 /* ACL inode */ --#define EXT3_ACL_DATA_INO 4 /* ACL inode */ - #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */ - #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */ -@@ -89,7 +87,6 @@ - #else - # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry)) - #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -124,28 +121,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext3_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext3_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext3_group_desc -@@ -513,7 +488,7 @@ struct ext3_super_block { - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - --#define EXT3_FEATURE_COMPAT_SUPP 0 -+#define EXT3_FEATURE_COMPAT_SUPP EXT3_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ -@@ -606,6 +581,24 @@ struct ext3_iloc - unsigned long block_group; - }; - -+/* Defined for extended attributes */ -+#define CONFIG_EXT3_FS_XATTR y -+#ifndef ENOATTR -+#define ENOATTR ENODATA /* No such attribute */ -+#endif -+#ifndef ENOTSUP -+#define ENOTSUP EOPNOTSUPP /* Operation not supported */ -+#endif -+#ifndef XATTR_NAME_MAX -+#define XATTR_NAME_MAX 255 /* # chars in an extended attribute name */ -+#define XATTR_SIZE_MAX 65536 /* size of an extended attribute value (64k) */ -+#define XATTR_LIST_MAX 65536 /* size of extended attribute namelist (64k) */ -+#endif -+#ifndef XATTR_CREATE -+#define XATTR_CREATE 1 /* set value, fail if attr already exists */ -+#define XATTR_REPLACE 2 /* set value, fail if attr does not exist */ -+#endif -+ - /* - * Function prototypes - */ -@@ -647,6 +640,7 @@ extern void ext3_check_inodes_bitmap (st - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - - /* inode.c */ -+extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - ---- linux-2.4.18-p4smp/include/linux/ext3_jbd.h~linux-2.4.18ea-0.8.26 2003-07-21 22:29:27.000000000 -0600 -+++ linux-2.4.18-p4smp-braam/include/linux/ext3_jbd.h 2003-07-21 22:49:05.000000000 -0600 -@@ -30,13 +30,19 @@ - - #define EXT3_SINGLEDATA_TRANS_BLOCKS 8 - -+/* Extended attributes may touch two data buffers, two bitmap buffers, -+ * and two group and summaries. */ -+ -+#define EXT3_XATTR_TRANS_BLOCKS 8 -+ - /* Define the minimum size for a transaction which modifies data. This - * needs to take into account the fact that we may end up modifying two - * quota files too (one for the group, one for the user quota). The - * superblock only gets updated once, of course, so don't bother - * counting that again for the quota updates. */ - --#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2) -+#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \ -+ EXT3_XATTR_TRANS_BLOCKS - 2) - - extern int ext3_writepage_trans_blocks(struct inode *inode); - ---- /dev/null 2003-01-30 03:24:37.000000000 -0700 -+++ linux-2.4.18-p4smp-braam/include/linux/ext3_xattr.h 2003-07-21 22:49:05.000000000 -0600 -@@ -0,0 +1,155 @@ -+/* -+ File: linux/ext3_xattr.h -+ -+ On-disk format of extended attributes for the ext3 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT3_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT3_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT3_XATTR_INDEX_MAX 10 -+#define EXT3_XATTR_INDEX_USER 1 -+ -+struct ext3_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext3_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT3_XATTR_PAD_BITS 2 -+#define EXT3_XATTR_PAD (1<e_name_len)) ) -+#define EXT3_XATTR_SIZE(size) \ -+ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT3_FS_XATTR -+ -+struct ext3_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext3_xattr_register(int, struct ext3_xattr_handler *); -+extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *); -+ -+extern int ext3_setxattr(struct dentry *, const char *, void *, size_t, int); -+extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); -+extern int ext3_removexattr(struct dentry *, const char *); -+ -+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext3_xattr_list(struct inode *, char *, size_t); -+extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, void *, size_t, int); -+ -+extern void ext3_xattr_drop_inode(handle_t *, struct inode *); -+extern void ext3_xattr_put_super(struct super_block *); -+ -+extern int init_ext3_xattr(void) __init; -+extern void exit_ext3_xattr(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR */ -+# define ext3_setxattr NULL -+# define ext3_getxattr NULL -+# define ext3_listxattr NULL -+# define ext3_removexattr NULL -+ -+static inline int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_list(struct inode *inode, void *buffer, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext3_xattr_drop_inode(handle_t *handle, struct inode *inode) -+{ -+} -+ -+static inline void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT3_FS_XATTR */ -+ -+# ifdef CONFIG_EXT3_FS_XATTR_USER -+ -+extern int init_ext3_xattr_user(void) __init; -+extern void exit_ext3_xattr_user(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+static inline int -+init_ext3_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr_user(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ ---- /dev/null 2003-01-30 03:24:37.000000000 -0700 -+++ linux-2.4.18-p4smp-braam/include/linux/xattr.h 2003-07-21 22:49:05.000000000 -0600 -@@ -0,0 +1,15 @@ -+/* -+ File: linux/xattr.h -+ -+ Extended attributes handling. -+ -+ Copyright (C) 2001 by Andreas Gruenbacher -+ Copyright (C) 2001 SGI - Silicon Graphics, Inc -+*/ -+#ifndef _LINUX_XATTR_H -+#define _LINUX_XATTR_H -+ -+#define XATTR_CREATE 1 /* set value, fail if attr already exists */ -+#define XATTR_REPLACE 2 /* set value, fail if attr does not exist */ -+ -+#endif /* _LINUX_XATTR_H */ ---- linux-2.4.18-p4smp/fs/ext3/Makefile~linux-2.4.18ea-0.8.26 2003-07-21 22:27:37.000000000 -0600 -+++ linux-2.4.18-p4smp-braam/fs/ext3/Makefile 2003-07-21 22:51:23.000000000 -0600 -@@ -9,10 +9,10 @@ - - O_TARGET := ext3.o - --export-objs := super.o inode.o -+export-objs := ext3-exports.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o -+ ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o - obj-m := $(O_TARGET) - - include $(TOPDIR)/Rules.make - -_ diff --git a/lustre/kernel_patches/patches/listman-2.4.18.patch b/lustre/kernel_patches/patches/listman-2.4.18.patch deleted file mode 100644 index 19ad959..0000000 --- a/lustre/kernel_patches/patches/listman-2.4.18.patch +++ /dev/null @@ -1,72 +0,0 @@ -Index: linux-2.4.18-chaos/include/linux/list.h -=================================================================== ---- linux-2.4.18-chaos.orig/include/linux/list.h 2003-11-23 00:07:05.000000000 +0300 -+++ linux-2.4.18-chaos/include/linux/list.h 2003-12-11 00:25:15.000000000 +0300 -@@ -173,6 +173,67 @@ - for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \ - pos = pos->prev, prefetch(pos->prev)) - -+/** -+ * list_for_each_entry - iterate over list of given type -+ * @pos: the type * to use as a loop counter. -+ * @head: the head for your list. -+ * @member: the name of the list_struct within the struct. -+ */ -+#define list_for_each_entry(pos, head, member) \ -+ for (pos = list_entry((head)->next, typeof(*pos), member), \ -+ prefetch(pos->member.next); \ -+ &pos->member != (head); \ -+ pos = list_entry(pos->member.next, typeof(*pos), member), \ -+ prefetch(pos->member.next)) -+ -+#ifndef list_for_each_entry_safe -+/** -+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry -+ * @pos: the type * to use as a loop counter. -+ * @n: another type * to use as temporary storage -+ * @head: the head for your list. -+ * @member: the name of the list_struct within the struct. -+ */ -+#define list_for_each_entry_safe(pos, n, head, member) \ -+ for (pos = list_entry((head)->next, typeof(*pos), member), \ -+ n = list_entry(pos->member.next, typeof(*pos), member); \ -+ &pos->member != (head); \ -+ pos = n, n = list_entry(n->member.next, typeof(*n), member)) -+#endif -+ -+/** -+ * list_move - delete from one list and add as another's head -+ * @list: the entry to move -+ * @head: the head that will precede our entry -+ */ -+static inline void list_move(struct list_head *list, struct list_head *head) -+{ -+ __list_del(list->prev, list->next); -+ list_add(list, head); -+} -+ -+/** -+ * list_move_tail - delete from one list and add as another's tail -+ * @list: the entry to move -+ * @head: the head that will follow our entry -+ */ -+static inline void list_move_tail(struct list_head *list, -+ struct list_head *head) -+{ -+ __list_del(list->prev, list->next); -+ list_add_tail(list, head); -+} -+ -+/* 2.5 uses hlists for some things, like the d_hash. we'll treat them -+ * as 2.5 and let macros drop back.. */ -+#define hlist_entry list_entry -+#define hlist_head list_head -+#define hlist_node list_head -+#define HLIST_HEAD LIST_HEAD -+#define INIT_HLIST_HEAD INIT_LIST_HEAD -+#define hlist_del_init list_del_init -+#define hlist_add_head list_add -+#define hlist_for_each_safe list_for_each_safe - - #endif /* __KERNEL__ || _LVM_H_INCLUDE */ - diff --git a/lustre/kernel_patches/patches/loop-sync-2.4.21-suse.patch b/lustre/kernel_patches/patches/loop-sync-2.4.21-suse.patch index 73372b9..c0f0f1d 100644 --- a/lustre/kernel_patches/patches/loop-sync-2.4.21-suse.patch +++ b/lustre/kernel_patches/patches/loop-sync-2.4.21-suse.patch @@ -1,5 +1,5 @@ ---- drivers/block/loop.c.bu 2004-05-11 16:27:23.000000000 -0700 -+++ drivers/block/loop.c 2004-05-11 16:28:50.000000000 -0700 +--- linux/drivers/block/loop.c. 2004-05-11 16:27:23.000000000 -0700 ++++ linux/drivers/block/loop.c 2004-05-11 16:28:50.000000000 -0700 @@ -978,7 +978,7 @@ static int lo_release(struct inode *inod lo = &loop_dev[dev]; diff --git a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.18.patch b/lustre/kernel_patches/patches/nfs_export_kernel-2.4.18.patch deleted file mode 100644 index eae4623..0000000 --- a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.18.patch +++ /dev/null @@ -1,741 +0,0 @@ - fs/Makefile | 3 - fs/file_table.c | 11 ++ - fs/inode.c | 23 ++++- - fs/namei.c | 12 ++ - fs/nfsd/export.c | 5 + - fs/nfsd/nfsfh.c | 65 +++++++++++++- - fs/nfsd/vfs.c | 240 ++++++++++++++++++++++++++++++++++++++++++++++++----- - include/linux/fs.h | 10 ++ - kernel/ksyms.c | 2 - 9 files changed, 337 insertions(+), 34 deletions(-) - ---- linux-2.4.18-chaos/fs/file_table.c~nfs_export_kernel-2.4.18 2003-07-28 17:52:04.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/file_table.c 2003-10-08 01:10:58.000000000 +0400 -@@ -82,7 +82,8 @@ struct file * get_empty_filp(void) - * and call the open function (if any). The caller must verify that - * inode->i_fop is not NULL. - */ --int init_private_file(struct file *filp, struct dentry *dentry, int mode) -+int init_private_file_it(struct file *filp, struct dentry *dentry, int mode, -+ struct lookup_intent *it) - { - memset(filp, 0, sizeof(*filp)); - filp->f_mode = mode; -@@ -90,12 +91,20 @@ int init_private_file(struct file *filp, - filp->f_dentry = dentry; - filp->f_uid = current->fsuid; - filp->f_gid = current->fsgid; -+ if (it) -+ filp->f_it = it; - filp->f_op = dentry->d_inode->i_fop; - if (filp->f_op->open) - return filp->f_op->open(dentry->d_inode, filp); - else - return 0; - } -+EXPORT_SYMBOL(init_private_file_it); -+ -+int init_private_file(struct file *filp, struct dentry *dentry, int mode) -+{ -+ return init_private_file_it(filp, dentry, mode, NULL); -+} - - void fput(struct file * file) - { ---- linux-2.4.18-chaos/fs/inode.c~nfs_export_kernel-2.4.18 2003-10-08 00:58:35.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/inode.c 2003-10-08 01:10:58.000000000 +0400 -@@ -987,9 +987,10 @@ struct inode *igrab(struct inode *inode) - } - - --struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque) -+static inline struct inode *ifind(struct super_block *sb, unsigned long ino, -+ struct list_head *head, -+ find_inode_t find_actor, void *opaque) - { -- struct list_head * head = inode_hashtable + hash(sb,ino); - struct inode * inode; - - spin_lock(&inode_lock); -@@ -1002,6 +1003,24 @@ struct inode *iget4(struct super_block * - } - spin_unlock(&inode_lock); - -+ return NULL; -+} -+ -+struct inode *ilookup4(struct super_block *sb, unsigned long ino, -+ find_inode_t find_actor, void *opaque) -+{ -+ struct list_head * head = inode_hashtable + hash(sb,ino); -+ return ifind(sb, ino, head, find_actor, opaque); -+} -+ -+struct inode *iget4(struct super_block *sb, unsigned long ino, -+ find_inode_t find_actor, void *opaque) -+{ -+ struct list_head * head = inode_hashtable + hash(sb,ino); -+ struct inode *inode = ifind(sb, ino, head, find_actor, opaque); -+ if (inode) -+ return inode; -+ - /* - * get_new_inode() will do the right thing, re-trying the search - * in case it had to block at any point. ---- linux-2.4.18-chaos/fs/Makefile~nfs_export_kernel-2.4.18 2003-10-08 00:58:35.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/Makefile 2003-10-08 01:10:58.000000000 +0400 -@@ -9,7 +9,8 @@ - - O_TARGET := fs.o - --export-objs := filesystems.o open.o dcache.o buffer.o inode.o -+export-objs := filesystems.o open.o dcache.o buffer.o inode.o namei.o \ -+ file_table.o - mod-subdirs := nls - - obj-y := open.o read_write.o devices.o file_table.o buffer.o \ ---- linux-2.4.18-chaos/fs/namei.c~nfs_export_kernel-2.4.18 2003-10-08 00:58:34.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/namei.c 2003-10-08 01:10:58.000000000 +0400 -@@ -22,6 +22,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -100,6 +101,7 @@ void intent_release(struct lookup_intent - it->it_op_release(it); - - } -+EXPORT_SYMBOL(intent_release); - - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the -@@ -914,7 +916,8 @@ struct dentry * lookup_hash(struct qstr - - - /* SMP-safe */ --struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+struct dentry * lookup_one_len_it(const char * name, struct dentry * base, -+ int len, struct lookup_intent *it) - { - unsigned long hash; - struct qstr this; -@@ -934,11 +937,16 @@ struct dentry * lookup_one_len(const cha - } - this.hash = end_name_hash(hash); - -- return lookup_hash_it(&this, base, NULL); -+ return lookup_hash_it(&this, base, it); - access: - return ERR_PTR(-EACCES); - } - -+struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+{ -+ return lookup_one_len_it(name, base, len, NULL); -+} -+ - /* - * namei() - * ---- linux-2.4.18-chaos/fs/nfsd/export.c~nfs_export_kernel-2.4.18 2003-07-28 17:52:06.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/nfsd/export.c 2003-10-08 01:10:58.000000000 +0400 -@@ -250,6 +250,11 @@ exp_export(struct nfsctl_export *nxp) - inode = nd.dentry->d_inode; - dev = inode->i_dev; - ino = inode->i_ino; -+ if ((inode->i_sb->s_type->fs_flags & FS_NFSEXP_FSID) && -+ !(nxp->ex_flags & NFSEXP_FSID)) { -+ nxp->ex_dev = inode->i_sb->s_dev; -+ nxp->ex_flags |= NFSEXP_FSID; -+ } - err = -EINVAL; - - exp = exp_get(clp, dev, ino); ---- linux-2.4.18-chaos/fs/nfsd/nfsfh.c~nfs_export_kernel-2.4.18 2003-07-28 17:52:06.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/nfsd/nfsfh.c 2003-10-08 01:10:58.000000000 +0400 -@@ -37,6 +37,15 @@ struct nfsd_getdents_callback { - int sequence; /* sequence counter */ - }; - -+static struct dentry *lookup_it(struct inode *inode, struct dentry * dentry) -+{ -+ if (inode->i_op->lookup_it) -+ return inode->i_op->lookup_it(inode, dentry, NULL, NULL, 0); -+ else -+ return inode->i_op->lookup(inode, dentry); -+ -+} -+ - /* - * A rather strange filldir function to capture - * the name matching the specified inode number. -@@ -76,6 +85,8 @@ static int nfsd_get_name(struct dentry * - int error; - struct file file; - struct nfsd_getdents_callback buffer; -+ struct lookup_intent it; -+ struct file *filp = NULL; - - error = -ENOTDIR; - if (!dir || !S_ISDIR(dir->i_mode)) -@@ -86,9 +97,37 @@ static int nfsd_get_name(struct dentry * - /* - * Open the directory ... - */ -- error = init_private_file(&file, dentry, FMODE_READ); -- if (error) -+ if (dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if ((dentry->d_flags & DCACHE_NFSD_DISCONNECTED) && -+ (dentry->d_parent == dentry) ) { -+ it.it_op_release = NULL; -+ /* -+ * XXX Temporary Hack: Simulating init_private_file without -+ * f_op->open for disconnected dentry Since we don't have actual -+ * dentry->d_name to revalidate in revalidate_it() -+ */ -+ filp = &file; -+ memset(filp, 0, sizeof(*filp)); -+ filp->f_mode = FMODE_READ; -+ atomic_set(&filp->f_count, 1); -+ filp->f_dentry = dentry; -+ filp->f_uid = current->fsuid; -+ filp->f_gid = current->fsgid; -+ filp->f_op = dentry->d_inode->i_fop; -+ error = 0; -+ } else { -+ intent_init(&it, IT_OPEN, 0); -+ error = revalidate_it(dentry, &it); -+ if (error) -+ goto out; -+ error = init_private_file_it(&file, dentry, FMODE_READ, &it); -+ } -+ } else { -+ error = init_private_file_it(&file, dentry, FMODE_READ, NULL); -+ } -+ if (error) - goto out; -+ - error = -EINVAL; - if (!file.f_op->readdir) - goto out_close; -@@ -114,9 +153,13 @@ static int nfsd_get_name(struct dentry * - } - - out_close: -- if (file.f_op->release) -+ if (file.f_op->release && !filp) - file.f_op->release(dir, &file); - out: -+ if (dentry->d_op && -+ dentry->d_op->d_revalidate_it && -+ it.it_op_release && !filp) -+ intent_release(&it); - return error; - } - -@@ -275,7 +318,7 @@ struct dentry *nfsd_findparent(struct de - * it is well connected. But nobody returns different dentrys do they? - */ - down(&child->d_inode->i_sem); -- pdentry = child->d_inode->i_op->lookup(child->d_inode, tdentry); -+ pdentry = lookup_it(child->d_inode, tdentry); - up(&child->d_inode->i_sem); - d_drop(tdentry); /* we never want ".." hashed */ - if (!pdentry && tdentry->d_inode == NULL) { -@@ -307,6 +350,8 @@ struct dentry *nfsd_findparent(struct de - igrab(tdentry->d_inode); - pdentry->d_flags |= DCACHE_NFSD_DISCONNECTED; - } -+ if (child->d_op && child->d_op->d_revalidate_it) -+ pdentry->d_op = child->d_op; - } - if (pdentry == NULL) - pdentry = ERR_PTR(-ENOMEM); -@@ -465,6 +510,8 @@ find_fh_dentry(struct super_block *sb, _ - struct dentry *pdentry; - struct inode *parent; - -+ if (result->d_op && result->d_op->d_revalidate_it) -+ dentry->d_op = result->d_op; - pdentry = nfsd_findparent(dentry); - err = PTR_ERR(pdentry); - if (IS_ERR(pdentry)) -@@ -667,6 +714,11 @@ fh_verify(struct svc_rqst *rqstp, struct - - inode = dentry->d_inode; - -+ /* cache coherency for non-device filesystems */ -+ if (inode->i_op && inode->i_op->revalidate_it) { -+ inode->i_op->revalidate_it(dentry, NULL); -+ } -+ - /* Type check. The correct error return for type mismatches - * does not seem to be generally agreed upon. SunOS seems to - * use EISDIR if file isn't S_IFREG; a comment in the NFSv3 -@@ -905,8 +957,9 @@ out_negative: - dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; - out_uptodate: -- printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", -- dentry->d_parent->d_name.name, dentry->d_name.name); -+ if(!dentry->d_parent->d_inode->i_op->mkdir_raw) -+ printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", -+ dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; - } - ---- linux-2.4.18-chaos/fs/nfsd/vfs.c~nfs_export_kernel-2.4.18 2003-07-28 17:52:06.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/nfsd/vfs.c 2003-10-08 01:11:59.000000000 +0400 -@@ -77,6 +77,128 @@ struct raparms { - static struct raparms * raparml; - static struct raparms * raparm_cache; - -+static int link_raw(struct dentry *dold, struct dentry *ddir, -+ struct dentry *dnew) -+{ -+ int err; -+ -+ struct nameidata old_nd = { .dentry = dold }; -+ struct nameidata nd = { .dentry = ddir, .last = dnew->d_name }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->link_raw(&old_nd, &nd); -+ d_instantiate(dnew, dold->d_inode); -+ if(dold->d_inode->i_op && dold->d_inode->i_op->revalidate_it) -+ dold->d_inode->i_op->revalidate_it(dnew, NULL); -+ -+ return err; -+} -+ -+static int unlink_raw(struct dentry *dentry, char *fname, int flen, -+ struct dentry *rdentry) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->unlink_raw(&nd); -+ if (!err) -+ d_delete(rdentry); -+ -+ return err; -+} -+ -+static int rmdir_raw(struct dentry *dentry, char *fname, int flen, -+ struct dentry *rdentry) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->rmdir_raw(&nd); -+ if(!err) { -+ rdentry->d_inode->i_flags |= S_DEAD; -+ d_delete(rdentry); -+ } -+ -+ return err; -+} -+ -+static int symlink_raw(struct dentry *dentry, char *fname, int flen, -+ char *path) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->symlink_raw(&nd, path); -+ -+ return err; -+} -+ -+static int mkdir_raw(struct dentry *dentry, char *fname, int flen, int mode) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->mkdir_raw(&nd, mode); -+ -+ return err; -+} -+ -+static int mknod_raw(struct dentry *dentry, char *fname, int flen, int mode, -+ dev_t dev) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->mknod_raw(&nd, mode, dev); -+ -+ return err; -+} -+ -+static int rename_raw(struct dentry *fdentry, struct dentry *tdentry, -+ struct dentry *odentry, struct dentry *ndentry) -+{ -+ int err; -+ -+ struct nameidata old_nd = { .dentry = fdentry, .last = odentry->d_name}; -+ struct nameidata new_nd = { .dentry = tdentry, .last = ndentry->d_name}; -+ struct inode_operations *op = old_nd.dentry->d_inode->i_op; -+ err = op->rename_raw(&old_nd, &new_nd); -+ d_move(odentry, ndentry); -+ -+ return err; -+} -+ -+static int setattr_raw(struct inode *inode, struct iattr *iap) -+{ -+ int err; -+ -+ iap->ia_valid |= ATTR_RAW; -+ err = inode->i_op->setattr_raw(inode, iap); -+ -+ return err; -+} -+ -+int revalidate_it(struct dentry *dentry, struct lookup_intent *it) -+{ -+ int err = 0; -+ -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if (!dentry->d_op->d_revalidate_it(dentry, 0, NULL, it) && -+ !d_invalidate(dentry)) { -+ dput(dentry); -+ err = -EINVAL; -+ dentry = NULL; -+ return err; -+ } -+ } -+ -+ return err; -+} -+ - /* - * Look up one component of a pathname. - * N.B. After this call _both_ fhp and resfh need an fh_put -@@ -300,7 +422,10 @@ nfsd_setattr(struct svc_rqst *rqstp, str - } - err = nfserr_notsync; - if (!check_guard || guardtime == inode->i_ctime) { -- err = notify_change(dentry, iap); -+ if ( dentry->d_inode->i_op && dentry->d_inode->i_op->setattr_raw) -+ err = setattr_raw(dentry->d_inode, iap); -+ else -+ err = notify_change(dentry, iap); - err = nfserrno(err); - } - if (size_change) { -@@ -427,6 +552,7 @@ nfsd_open(struct svc_rqst *rqstp, struct - { - struct dentry *dentry; - struct inode *inode; -+ struct lookup_intent it; - int err; - - /* If we get here, then the client has already done an "open", and (hopefully) -@@ -473,6 +599,14 @@ nfsd_open(struct svc_rqst *rqstp, struct - filp->f_mode = FMODE_READ; - } - -+ intent_init(&it, IT_OPEN, (filp->f_flags & ~O_ACCMODE) | filp->f_mode); -+ -+ err = revalidate_it(dentry, &it); -+ if (err) -+ goto out_nfserr; -+ -+ filp->f_it = ⁢ -+ - err = 0; - if (filp->f_op && filp->f_op->open) { - err = filp->f_op->open(inode, filp); -@@ -487,7 +621,11 @@ nfsd_open(struct svc_rqst *rqstp, struct - atomic_dec(&filp->f_count); - } - } -+ - out_nfserr: -+ if (it.it_op_release) -+ intent_release(&it); -+ - if (err) - err = nfserrno(err); - out: -@@ -824,7 +962,7 @@ nfsd_create(struct svc_rqst *rqstp, stru - { - struct dentry *dentry, *dchild; - struct inode *dirp; -- int err; -+ int err, error = -EOPNOTSUPP; - - err = nfserr_perm; - if (!flen) -@@ -840,20 +978,44 @@ nfsd_create(struct svc_rqst *rqstp, stru - dentry = fhp->fh_dentry; - dirp = dentry->d_inode; - -+ switch (type) { -+ case S_IFDIR: -+ if (dirp->i_op->mkdir_raw) -+ error = mkdir_raw(dentry, fname, flen, iap->ia_mode); -+ break; -+ case S_IFCHR: -+ case S_IFBLK: -+ case S_IFIFO: -+ case S_IFSOCK: -+ case S_IFREG: -+ if (dirp->i_op->mknod_raw) { -+ if (type == S_IFREG) -+ rdev = 0; -+ error = mknod_raw(dentry, fname, flen, iap->ia_mode, rdev); -+ } -+ break; -+ default: -+ printk("nfsd: bad file type %o in nfsd_create\n", type); -+ } -+ - err = nfserr_notdir; -- if(!dirp->i_op || !dirp->i_op->lookup) -+ if(!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) - goto out; - /* - * Check whether the response file handle has been verified yet. - * If it has, the parent directory should already be locked. - */ -- if (!resfhp->fh_dentry) { -- /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ -- fh_lock(fhp); -+ if (!resfhp->fh_dentry || dirp->i_op->lookup_it) { -+ /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create -+ and nfsd_proc_create in case of lustre -+ */ -+ if (!resfhp->fh_dentry) -+ fh_lock(fhp); - dchild = lookup_one_len(fname, dentry, flen); - err = PTR_ERR(dchild); - if (IS_ERR(dchild)) - goto out_nfserr; -+ resfhp->fh_dentry = NULL; - err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); - if (err) - goto out; -@@ -874,10 +1036,12 @@ nfsd_create(struct svc_rqst *rqstp, stru - * Make sure the child dentry is still negative ... - */ - err = nfserr_exist; -- if (dchild->d_inode) { -- dprintk("nfsd_create: dentry %s/%s not negative!\n", -- dentry->d_name.name, dchild->d_name.name); -- goto out; -+ if ( error == -EOPNOTSUPP) { -+ if (dchild->d_inode) { -+ dprintk("nfsd_create: dentry %s/%s not negative!\n", -+ dentry->d_name.name, dchild->d_name.name); -+ goto out; -+ } - } - - if (!(iap->ia_valid & ATTR_MODE)) -@@ -890,16 +1054,19 @@ nfsd_create(struct svc_rqst *rqstp, stru - err = nfserr_perm; - switch (type) { - case S_IFREG: -- err = vfs_create(dirp, dchild, iap->ia_mode); -+ if (error == -EOPNOTSUPP) -+ err = vfs_create(dirp, dchild, iap->ia_mode); - break; - case S_IFDIR: -- err = vfs_mkdir(dirp, dchild, iap->ia_mode); -+ if (error == -EOPNOTSUPP) -+ err = vfs_mkdir(dirp, dchild, iap->ia_mode); - break; - case S_IFCHR: - case S_IFBLK: - case S_IFIFO: - case S_IFSOCK: -- err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); -+ if (error == -EOPNOTSUPP) -+ err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); - break; - default: - printk("nfsd: bad file type %o in nfsd_create\n", type); -@@ -968,7 +1135,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, s - /* Get all the sanity checks out of the way before - * we lock the parent. */ - err = nfserr_notdir; -- if(!dirp->i_op || !dirp->i_op->lookup) -+ if (dirp->i_op->mknod_raw) { -+ err = mknod_raw(dentry, fname, flen, iap->ia_mode, 0); -+ if (err && err != -EOPNOTSUPP) -+ goto out; -+ } -+ -+ if(!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) - goto out; - fh_lock(fhp); - -@@ -1019,6 +1192,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, s - case NFS3_CREATE_GUARDED: - err = nfserr_exist; - } -+ if(dirp->i_op->mknod_raw) -+ err = 0; - goto out; - } - -@@ -1125,7 +1300,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str - struct iattr *iap) - { - struct dentry *dentry, *dnew; -- int err, cerr; -+ int err, cerr, error = -EOPNOTSUPP; - - err = nfserr_noent; - if (!flen || !plen) -@@ -1139,12 +1314,18 @@ nfsd_symlink(struct svc_rqst *rqstp, str - goto out; - fh_lock(fhp); - dentry = fhp->fh_dentry; -+ -+ if (dentry->d_inode->i_op->symlink_raw) -+ error = symlink_raw(dentry, fname, flen, path); -+ - dnew = lookup_one_len(fname, dentry, flen); - err = PTR_ERR(dnew); - if (IS_ERR(dnew)) - goto out_nfserr; - -- err = vfs_symlink(dentry->d_inode, dnew, path); -+ err = error; -+ if (err == -EOPNOTSUPP || !dentry->d_inode->i_op->symlink_raw) -+ err = vfs_symlink(dentry->d_inode, dnew, path); - if (!err) { - if (EX_ISSYNC(fhp->fh_export)) - nfsd_sync_dir(dentry); -@@ -1154,7 +1335,10 @@ nfsd_symlink(struct svc_rqst *rqstp, str - iap->ia_valid |= ATTR_CTIME; - iap->ia_mode = (iap->ia_mode&S_IALLUGO) - | S_IFLNK; -- err = notify_change(dnew, iap); -+ if (dnew->d_inode->i_op && dnew->d_inode->i_op->setattr_raw) -+ err = setattr_raw(dnew->d_inode, iap); -+ else -+ err = notify_change(dnew, iap); - if (!err && EX_ISSYNC(fhp->fh_export)) - write_inode_now(dentry->d_inode, 1); - } -@@ -1212,7 +1396,10 @@ nfsd_link(struct svc_rqst *rqstp, struct - dold = tfhp->fh_dentry; - dest = dold->d_inode; - -- err = vfs_link(dold, dirp, dnew); -+ if (dirp->i_op->link_raw) -+ err = link_raw(dold, ddir, dnew); -+ else -+ err = vfs_link(dold, dirp, dnew); - if (!err) { - if (EX_ISSYNC(ffhp->fh_export)) { - nfsd_sync_dir(ddir); -@@ -1298,7 +1485,10 @@ nfsd_rename(struct svc_rqst *rqstp, stru - err = nfserr_perm; - } else - #endif -- err = vfs_rename(fdir, odentry, tdir, ndentry); -+ if(fdir->i_op->rename_raw) -+ err = rename_raw(fdentry, tdentry, odentry, ndentry); -+ else -+ err = vfs_rename(fdir, odentry, tdir, ndentry); - unlock_kernel(); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); -@@ -1320,7 +1510,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru - fill_post_wcc(tfhp); - double_up(&tdir->i_sem, &fdir->i_sem); - ffhp->fh_locked = tfhp->fh_locked = 0; -- -+ - out: - return err; - } -@@ -1366,9 +1556,15 @@ nfsd_unlink(struct svc_rqst *rqstp, stru - err = nfserr_perm; - } else - #endif -- err = vfs_unlink(dirp, rdentry); -+ if (dirp->i_op->unlink_raw) -+ err = unlink_raw(dentry, fname, flen, rdentry); -+ else -+ err = vfs_unlink(dirp, rdentry); - } else { /* It's RMDIR */ -- err = vfs_rmdir(dirp, rdentry); -+ if (dirp->i_op->rmdir_raw) -+ err = rmdir_raw(dentry, fname, flen, rdentry); -+ else -+ err = vfs_rmdir(dirp, rdentry); - } - - dput(rdentry); ---- linux-2.4.18-chaos/include/linux/fs.h~nfs_export_kernel-2.4.18 2003-10-08 01:09:51.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/linux/fs.h 2003-10-08 01:10:58.000000000 +0400 -@@ -93,6 +93,9 @@ extern int leases_enable, dir_notify_ena - #define FS_SINGLE 8 /* Filesystem that can have only one superblock */ - #define FS_NOMOUNT 16 /* Never mount from userland */ - #define FS_LITTER 32 /* Keeps the tree in dcache */ -+#define FS_NFSEXP_FSID 64 /* Use file system specific fsid for -+ * exporting non device filesystems. -+ */ - #define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon - * as nfs_rename() will be cleaned up - */ -@@ -1147,6 +1150,9 @@ extern struct file *filp_open(const char - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); - extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, - int flags, struct lookup_intent *it); -+extern int revalidate_it(struct dentry *dentry, struct lookup_intent *it); -+extern int init_private_file_it(struct file *, struct dentry *dentry, int mode, -+ struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1423,6 +1429,8 @@ extern void path_release(struct nameidat - extern int follow_down(struct vfsmount **, struct dentry **); - extern int follow_up(struct vfsmount **, struct dentry **); - extern struct dentry * lookup_one_len(const char *, struct dentry *, int); -+extern struct dentry * lookup_one_len_it(const char *, struct dentry *, int, -+ struct lookup_intent *); - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -@@ -1437,6 +1445,8 @@ extern ino_t iunique(struct super_block - - typedef int (*find_inode_t)(struct inode *, unsigned long, void *); - extern struct inode * iget4(struct super_block *, unsigned long, find_inode_t, void *); -+extern struct inode * ilookup4(struct super_block *, unsigned long, -+ find_inode_t, void *); - static inline struct inode *iget(struct super_block *sb, unsigned long ino) - { - return iget4(sb, ino, NULL, NULL); ---- linux-2.4.18-chaos/kernel/ksyms.c~nfs_export_kernel-2.4.18 2003-10-08 00:58:34.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/kernel/ksyms.c 2003-10-08 01:10:58.000000000 +0400 -@@ -156,6 +156,7 @@ EXPORT_SYMBOL(fget); - EXPORT_SYMBOL(igrab); - EXPORT_SYMBOL(iunique); - EXPORT_SYMBOL(iget4); -+EXPORT_SYMBOL(ilookup4); - EXPORT_SYMBOL(iput); - EXPORT_SYMBOL(inode_init_once); - EXPORT_SYMBOL(force_delete); -@@ -167,6 +168,7 @@ EXPORT_SYMBOL(path_walk); - EXPORT_SYMBOL(path_release); - EXPORT_SYMBOL(__user_walk); - EXPORT_SYMBOL(lookup_one_len); -+EXPORT_SYMBOL(lookup_one_len_it); - EXPORT_SYMBOL(lookup_hash); - EXPORT_SYMBOL(sys_close); - EXPORT_SYMBOL(sys_read); - -_ diff --git a/lustre/kernel_patches/patches/vfs-pdirops-2.4.18-chaos.patch b/lustre/kernel_patches/patches/vfs-pdirops-2.4.18-chaos.patch deleted file mode 100644 index 0f3070b..0000000 --- a/lustre/kernel_patches/patches/vfs-pdirops-2.4.18-chaos.patch +++ /dev/null @@ -1,265 +0,0 @@ - fs/inode.c | 1 - fs/namei.c | 65 +++++++++++++++++++++++++++++++++++++++-------------- - include/linux/fs.h | 11 ++++---- - 3 files changed, 54 insertions(+), 23 deletions(-) - ---- linux-2.4.18-chaos/fs/namei.c~vfs-pdirops-2.4.18-chaos 2003-09-16 23:33:47.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/namei.c 2003-09-17 00:18:45.000000000 +0400 -@@ -101,6 +101,36 @@ void intent_release(struct lookup_intent - - } - -+static void *lock_dir(struct inode *dir, struct qstr *name) -+{ -+ unsigned long hash; -+ -+ if (!IS_PDIROPS(dir)) { -+ down(&dir->i_sem); -+ return 0; -+ } -+ -+ /* OK. fs understands parallel directory operations. -+ * so, we try to acquire lock for hash of requested -+ * filename in order to prevent any operations with -+ * same name in same time -bzzz */ -+ -+ /* calculate name hash */ -+ hash = full_name_hash(name->name, name->len); -+ -+ /* lock this hash */ -+ return dynlock_lock(&dir->i_dcache_lock, hash, 1, GFP_ATOMIC); -+} -+ -+static void unlock_dir(struct inode *dir, void *lock) -+{ -+ if (!IS_PDIROPS(dir)) { -+ up(&dir->i_sem); -+ return; -+ } -+ dynlock_unlock(&dir->i_dcache_lock, lock); -+} -+ - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the - * kernel data space before using them.. -@@ -303,10 +333,11 @@ static struct dentry *real_lookup(struct - struct dentry * result; - struct inode *dir = parent->d_inode; - int counter = 0; -+ void *lock; - - again: -+ lock = lock_dir(dir, name); - counter++; -- down(&dir->i_sem); - /* - * First re-do the cached lookup just in case it was created - * while we waited for the directory semaphore.. -@@ -330,7 +361,7 @@ again: - else - result = dentry; - } -- up(&dir->i_sem); -+ unlock_dir(dir, lock); - return result; - } - -@@ -338,7 +369,7 @@ again: - * Uhhuh! Nasty case: the cache was re-populated while - * we waited on the semaphore. Need to revalidate. - */ -- up(&dir->i_sem); -+ unlock_dir(dir, lock); - if (result->d_op && result->d_op->d_revalidate) { - if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) { - dput(result); -@@ -1240,13 +1271,13 @@ struct file *filp_open(const char * path - goto exit; - - dir = nd.dentry; -- down(&dir->d_inode->i_sem); -+ nd.lock = lock_dir(dir->d_inode, &nd.last); - dentry = lookup_hash_it(&nd.last, nd.dentry, &it); - - do_last: - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) { -- up(&dir->d_inode->i_sem); -+ unlock_dir(dir->d_inode, nd.lock); - goto exit; - } - -@@ -1255,7 +1286,7 @@ do_last: - if (!dentry->d_inode) { - error = vfs_create_it(dir->d_inode, dentry, - mode & ~current->fs->umask, &it); -- up(&dir->d_inode->i_sem); -+ unlock_dir(dir->d_inode, nd.lock); - dput(nd.dentry); - nd.dentry = dentry; - if (error) -@@ -1270,7 +1301,7 @@ do_last: - /* - * It already exists. - */ -- up(&dir->d_inode->i_sem); -+ unlock_dir(dir->d_inode, nd.lock); - - error = -EEXIST; - if (flag & O_EXCL) -@@ -1350,7 +1381,7 @@ do_link: - goto exit; - } - dir = nd.dentry; -- down(&dir->d_inode->i_sem); -+ nd.lock = lock_dir(dir->d_inode, &nd.last); - dentry = lookup_hash_it(&nd.last, nd.dentry, &it); - putname(nd.last.name); - goto do_last; -@@ -1363,7 +1394,7 @@ static struct dentry *lookup_create(stru - { - struct dentry *dentry; - -- down(&nd->dentry->d_inode->i_sem); -+ nd->lock = lock_dir(nd->dentry->d_inode, &nd->last); - dentry = ERR_PTR(-EEXIST); - if (nd->last_type != LAST_NORM) - goto fail; -@@ -1452,7 +1483,7 @@ asmlinkage long sys_mknod(const char * f - } - dput(dentry); - } -- up(&nd.dentry->d_inode->i_sem); -+ unlock_dir(nd.dentry->d_inode, nd.lock); - out2: - path_release(&nd); - out: -@@ -1515,7 +1546,7 @@ asmlinkage long sys_mkdir(const char * p - mode & ~current->fs->umask); - dput(dentry); - } -- up(&nd.dentry->d_inode->i_sem); -+ unlock_dir(nd.dentry->d_inode, nd.lock); - out2: - path_release(&nd); - out: -@@ -1625,14 +1656,14 @@ asmlinkage long sys_rmdir(const char * p - if (error != -EOPNOTSUPP) - goto exit1; - } -- down(&nd.dentry->d_inode->i_sem); -+ nd.lock = lock_dir(nd.dentry->d_inode, &nd.last); - dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); - dput(dentry); - } -- up(&nd.dentry->d_inode->i_sem); -+ unlock_dir(nd.dentry->d_inode, nd.lock); - exit1: - path_release(&nd); - exit: -@@ -1691,7 +1722,7 @@ asmlinkage long sys_unlink(const char * - if (error != -EOPNOTSUPP) - goto exit1; - } -- down(&nd.dentry->d_inode->i_sem); -+ nd.lock = lock_dir(nd.dentry->d_inode, &nd.last); - dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { -@@ -1702,7 +1733,7 @@ asmlinkage long sys_unlink(const char * - exit2: - dput(dentry); - } -- up(&nd.dentry->d_inode->i_sem); -+ unlock_dir(nd.dentry->d_inode, nd.lock); - exit1: - path_release(&nd); - exit: -@@ -1772,7 +1803,7 @@ asmlinkage long sys_symlink(const char * - error = vfs_symlink(nd.dentry->d_inode, dentry, from); - dput(dentry); - } -- up(&nd.dentry->d_inode->i_sem); -+ unlock_dir(nd.dentry->d_inode, nd.lock); - out2: - path_release(&nd); - out: -@@ -1864,7 +1895,7 @@ asmlinkage long sys_link(const char * ol - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); - dput(new_dentry); - } -- up(&nd.dentry->d_inode->i_sem); -+ unlock_dir(nd.dentry->d_inode, nd.lock); - out_release: - path_release(&nd); - out: ---- linux-2.4.18-chaos/include/linux/fs.h~vfs-pdirops-2.4.18-chaos 2003-09-16 23:33:47.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/include/linux/fs.h 2003-09-17 00:16:08.000000000 +0400 -@@ -21,6 +21,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -136,6 +137,7 @@ extern int leases_enable, dir_notify_ena - #define S_IMMUTABLE 16 /* Immutable file */ - #define S_DEAD 32 /* removed, but still open directory */ - #define S_NOQUOTA 64 /* Inode is not counted to quota */ -+#define S_PDIROPS 256 /* Parallel directory operations */ - - /* - * Note that nosuid etc flags are inode-specific: setting some file-system -@@ -162,6 +164,7 @@ extern int leases_enable, dir_notify_ena - #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) - #define IS_NOATIME(inode) (__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME)) - #define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME) -+#define IS_PDIROPS(inode) __IS_FLG(inode, S_PDIROPS) - - #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) - -@@ -491,6 +494,7 @@ struct inode { - atomic_t i_writecount; - unsigned int i_attr_flags; - __u32 i_generation; -+ struct dynlock i_dcache_lock; /* for parallel directory ops */ - union { - struct minix_inode_info minix_i; - struct ext2_inode_info ext2_i; -@@ -714,6 +718,7 @@ struct nameidata { - unsigned int flags; - int last_type; - struct lookup_intent *intent; -+ void *lock; - }; - - #define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */ -@@ -1611,12 +1616,6 @@ static inline struct dentry *get_parent( - return dget(dentry->d_parent); - } - --static inline void unlock_dir(struct dentry *dir) --{ -- up(&dir->d_inode->i_sem); -- dput(dir); --} -- - /* - * Whee.. Deadlock country. Happily there are only two VFS - * operations that does this.. ---- linux-2.4.18-chaos/fs/inode.c~vfs-pdirops-2.4.18-chaos 2003-09-16 23:33:48.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/inode.c 2003-09-16 23:47:45.000000000 +0400 -@@ -119,6 +119,7 @@ static struct inode *alloc_inode(struct - mapping->host = inode; - mapping->gfp_mask = GFP_HIGHUSER; - inode->i_mapping = mapping; -+ dynlock_init(&inode->i_dcache_lock); - } - return inode; - } - -_ diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.18-18-chaos65.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.18-18-chaos65.patch deleted file mode 100644 index b7185b9..0000000 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.18-18-chaos65.patch +++ /dev/null @@ -1,1788 +0,0 @@ - fs/dcache.c | 19 ++ - fs/exec.c | 18 +- - fs/namei.c | 328 +++++++++++++++++++++++++++++++++++++++++-------- - fs/namespace.c | 28 +++- - fs/open.c | 167 ++++++++++++++++++------ - fs/stat.c | 29 ++-- - include/linux/dcache.h | 60 ++++++++ - include/linux/fs.h | 34 ++++- - kernel/exit.c | 3 - kernel/fork.c | 3 - kernel/ksyms.c | 1 - 11 files changed, 564 insertions(+), 126 deletions(-) - -Index: linux-2.4.18-p4smp/fs/dcache.c -=================================================================== ---- linux-2.4.18-p4smp.orig/fs/dcache.c 2004-02-03 01:00:10.000000000 -0500 -+++ linux-2.4.18-p4smp/fs/dcache.c 2004-03-19 16:05:42.000000000 -0500 -@@ -186,6 +186,13 @@ - spin_unlock(&dcache_lock); - return 0; - } -+ -+ /* network invalidation by Lustre */ -+ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) { -+ spin_unlock(&dcache_lock); -+ return 0; -+ } -+ - /* - * Check whether to do a partial shrink_dcache - * to get rid of unused child entries. -@@ -859,13 +866,19 @@ void d_delete(struct dentry * dentry) - * Adds a dentry to the hash according to its name. - */ - --void d_rehash(struct dentry * entry) -+void __d_rehash(struct dentry * entry, int lock) - { - struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash); - if (!list_empty(&entry->d_hash)) BUG(); -- spin_lock(&dcache_lock); -+ if (lock) spin_lock(&dcache_lock); - list_add(&entry->d_hash, list); -- spin_unlock(&dcache_lock); -+ if (lock) spin_unlock(&dcache_lock); -+} -+EXPORT_SYMBOL(__d_rehash); -+ -+void d_rehash(struct dentry * entry) -+{ -+ __d_rehash(entry, 1); - } - - #define do_switch(x,y) do { \ -Index: linux-2.4.18-p4smp/fs/exec.c -=================================================================== ---- linux-2.4.18-p4smp.orig/fs/exec.c 2004-02-03 01:00:10.000000000 -0500 -+++ linux-2.4.18-p4smp/fs/exec.c 2004-03-19 16:05:42.000000000 -0500 -@@ -117,8 +117,10 @@ - struct file * file; - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_OPEN, -+ .it_flags = FMODE_READ|FMODE_EXEC }; - -- error = user_path_walk(library, &nd); -+ error = user_path_walk_it(library, &nd, &it); - if (error) - goto out; - -@@ -130,7 +132,8 @@ asmlinkage long sys_uselib(const char * - if (error) - goto exit; - -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(&it); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; -@@ -359,8 +362,10 @@ struct file *open_exec(const char *name) - struct inode *inode; - struct file *file; - int err = 0; -+ struct lookup_intent it = { .it_op = IT_OPEN, -+ .it_flags = FMODE_READ|FMODE_EXEC }; - -- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -+ err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); - file = ERR_PTR(err); - if (!err) { - inode = nd.dentry->d_inode; -@@ -372,7 +376,8 @@ struct file *open_exec(const char *name) - err = -EACCES; - file = ERR_PTR(err); - if (!err) { -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(&it); - if (!IS_ERR(file)) { - err = deny_write_access(file); - if (err) { -@@ -384,6 +389,7 @@ out: - return file; - } - } -+ intent_release(&it); - path_release(&nd); - } - goto out; -@@ -1104,7 +1110,7 @@ int do_coredump(long signr, struct pt_re - goto close_fail; - if (!file->f_op->write) - goto close_fail; -- if (do_truncate(file->f_dentry, 0) != 0) -+ if (do_truncate(file->f_dentry, 0, 0) != 0) - goto close_fail; - - retval = binfmt->core_dump(signr, regs, file); -Index: linux-2.4.18-p4smp/fs/namei.c -=================================================================== ---- linux-2.4.18-p4smp.orig/fs/namei.c 2004-02-03 01:00:10.000000000 -0500 -+++ linux-2.4.18-p4smp/fs/namei.c 2004-03-19 16:06:19.000000000 -0500 -@@ -94,6 +94,13 @@ - * XEmacs seems to be relying on it... - */ - -+void intent_release(struct lookup_intent *it) -+{ -+ if (it && it->it_op_release) -+ it->it_op_release(it); -+ -+} -+ - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the - * kernel data space before using them.. -@@ -260,10 +267,19 @@ void path_release(struct nameidata *nd) - * Internal lookup() using the new generic dcache. - * SMP-safe - */ --static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * dentry = d_lookup(parent, name); - -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if (!dentry->d_op->d_revalidate_it(dentry, flags, it) && -+ !d_invalidate(dentry)) { -+ dput(dentry); -+ dentry = NULL; -+ } -+ return dentry; -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { - dput(dentry); -@@ -281,11 +297,15 @@ static struct dentry * cached_lookup(str - * make sure that nobody added the entry to the dcache in the meantime.. - * SMP-safe - */ --static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *real_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * result; - struct inode *dir = parent->d_inode; -+ int counter = 0; - -+again: -+ counter++; - down(&dir->i_sem); - /* - * First re-do the cached lookup just in case it was created -@@ -300,6 +320,9 @@ static struct dentry * real_lookup(struc - result = ERR_PTR(-ENOMEM); - if (dentry) { - lock_kernel(); -+ if (dir->i_op->lookup_it) -+ result = dir->i_op->lookup_it(dir, dentry, it, flags); -+ else - result = dir->i_op->lookup(dir, dentry); - unlock_kernel(); - if (result) -@@ -321,6 +344,15 @@ static struct dentry * real_lookup(struc - dput(result); - result = ERR_PTR(-ENOENT); - } -+ } else if (result->d_op && result->d_op->d_revalidate_it) { -+ if (!result->d_op->d_revalidate_it(result, flags, it) && -+ !d_invalidate(result)) { -+ dput(result); -+ if (counter > 10) -+ result = ERR_PTR(-ESTALE); -+ if (!IS_ERR(result)) -+ goto again; -+ } - } - return result; - } -@@ -334,7 +366,8 @@ int max_recursive_link = 5; - * Without that kind of total limit, nasty chains of consecutive - * symlinks can cause almost arbitrarily long lookups. - */ --static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) -+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd, -+ struct lookup_intent *it) - { - int err; - if (current->link_count >= max_recursive_link) -@@ -348,10 +381,12 @@ static inline int do_follow_link(struct - current->link_count++; - current->total_link_count++; - UPDATE_ATIME(dentry->d_inode); -+ nd->intent = it; - err = dentry->d_inode->i_op->follow_link(dentry, nd); - current->link_count--; - return err; - loop: -+ intent_release(it); - path_release(nd); - return -ELOOP; - } -@@ -381,7 +416,8 @@ int follow_up(struct vfsmount **mnt, str - return __follow_up(mnt, dentry); - } - --static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry) -+static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry, -+ struct lookup_intent *it) - { - struct vfsmount *mounted; - -@@ -401,7 +437,7 @@ static inline int __follow_down(struct v - - int follow_down(struct vfsmount **mnt, struct dentry **dentry) - { -- return __follow_down(mnt,dentry); -+ return __follow_down(mnt,dentry,NULL); - } - - static inline void follow_dotdot(struct nameidata *nd) -@@ -437,7 +473,7 @@ static inline void follow_dotdot(struct - mntput(nd->mnt); - nd->mnt = parent; - } -- while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry)) -+ while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry, NULL)) - ; - } - -@@ -449,7 +485,8 @@ static inline void follow_dotdot(struct - * - * We expect 'base' to be positive and a directory. - */ --int link_path_walk(const char * name, struct nameidata *nd) -+int link_path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it) - { - struct dentry *dentry; - struct inode *inode; -@@ -526,18 +563,18 @@ int link_path_walk(const char * name, st - break; - } - /* This does the actual lookups.. */ -- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - if (!dentry) { - err = -EWOULDBLOCKIO; - if (atomic) - break; -- dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; - } - /* Check mountpoints.. */ -- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) -+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL)) - ; - - err = -ENOENT; -@@ -549,7 +586,7 @@ int link_path_walk(const char * name, st - goto out_dput; - - if (inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ err = do_follow_link(dentry, nd, NULL); - dput(dentry); - if (err) - goto return_err; -@@ -565,7 +602,7 @@ int link_path_walk(const char * name, st - nd->dentry = dentry; - } - err = -ENOTDIR; -- if (!inode->i_op->lookup) -+ if (!inode->i_op->lookup && !inode->i_op->lookup_it) - break; - continue; - /* here ends the main loop */ -@@ -592,22 +629,22 @@ last_component: - if (err < 0) - break; - } -- dentry = cached_lookup(nd->dentry, &this, 0); -+ dentry = cached_lookup(nd->dentry, &this, 0, it); - if (!dentry) { - err = -EWOULDBLOCKIO; - if (atomic) - break; -- dentry = real_lookup(nd->dentry, &this, 0); -+ dentry = real_lookup(nd->dentry, &this, 0, it); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; - } -- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) -+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it)) - ; - inode = dentry->d_inode; - if ((lookup_flags & LOOKUP_FOLLOW) - && inode && inode->i_op && inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ err = do_follow_link(dentry, nd, it); - dput(dentry); - if (err) - goto return_err; -@@ -621,7 +659,8 @@ last_component: - goto no_inode; - if (lookup_flags & LOOKUP_DIRECTORY) { - err = -ENOTDIR; -- if (!inode->i_op || !inode->i_op->lookup) -+ if (!inode->i_op || -+ (!inode->i_op->lookup && !inode->i_op->lookup_it)) - break; - } - goto return_base; -@@ -645,6 +684,27 @@ return_reval: - * Check the cached dentry for staleness. - */ - dentry = nd->dentry; -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ err = -ESTALE; -+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) { -+ struct dentry *new; -+ err = permission(dentry->d_parent->d_inode, -+ MAY_EXEC); -+ if (err) -+ break; -+ new = real_lookup(dentry->d_parent, -+ &dentry->d_name, 0, it); -+ if (IS_ERR(new)) { -+ err = PTR_ERR(new); -+ break; -+ } -+ d_invalidate(dentry); -+ dput(dentry); -+ nd->dentry = new; -+ } -+ if (!nd->dentry->d_inode) -+ goto no_inode; -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - err = -ESTALE; - if (!dentry->d_op->d_revalidate(dentry, 0)) { -@@ -658,15 +716,28 @@ out_dput: - dput(dentry); - break; - } -+ if (err) -+ intent_release(it); - path_release(nd); - return_err: - return err; - } - -+int link_path_walk(const char * name, struct nameidata *nd) -+{ -+ return link_path_walk_it(name, nd, NULL); -+} -+ -+int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it) -+{ -+ current->total_link_count = 0; -+ return link_path_walk_it(name, nd, it); -+} -+ - int path_walk(const char * name, struct nameidata *nd) - { - current->total_link_count = 0; -- return link_path_walk(name, nd); -+ return link_path_walk_it(name, nd, NULL); - } - - /* SMP-safe */ -@@ -751,6 +822,17 @@ walk_init_root(const char *name, struct - } - - /* SMP-safe */ -+int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ int error = 0; -+ if (path_init(path, flags, nd)) -+ error = path_walk_it(path, nd, it); -+ return error; -+} -+ -+ -+/* SMP-safe */ - int path_lookup(const char *path, unsigned flags, struct nameidata *nd) - { - int error = 0; -@@ -765,6 +847,7 @@ int path_init(const char *name, unsigned - { - nd->last_type = LAST_ROOT; /* if there are only slashes... */ - nd->flags = flags; -+ nd->intent = NULL; - if (*name=='/') - return walk_init_root(name,nd); - read_lock(¤t->fs->lock); -@@ -779,7 +862,8 @@ int path_init(const char *name, unsigned - * needs parent already locked. Doesn't follow mounts. - * SMP-safe. - */ --struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base, -+ struct lookup_intent *it) - { - struct dentry * dentry; - struct inode *inode; -@@ -802,13 +886,16 @@ struct dentry * lookup_hash(struct qstr - goto out; - } - -- dentry = cached_lookup(base, name, 0); -+ dentry = cached_lookup(base, name, 0, it); - if (!dentry) { - struct dentry *new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; - lock_kernel(); -+ if (inode->i_op->lookup_it) -+ dentry = inode->i_op->lookup_it(inode, new, it, 0); -+ else - dentry = inode->i_op->lookup(inode, new); - unlock_kernel(); - if (!dentry) -@@ -820,6 +907,12 @@ out: - return dentry; - } - -+struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+{ -+ return lookup_hash_it(name, base, NULL); -+} -+ -+ - /* SMP-safe */ - struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) - { -@@ -841,7 +934,7 @@ struct dentry * lookup_one_len(const cha - } - this.hash = end_name_hash(hash); - -- return lookup_hash(&this, base); -+ return lookup_hash_it(&this, base, NULL); - access: - return ERR_PTR(-EACCES); - } -@@ -872,6 +965,23 @@ int __user_walk(const char *name, unsign - return err; - } - -+int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ char *tmp; -+ int err; -+ -+ tmp = getname(name); -+ err = PTR_ERR(tmp); -+ if (!IS_ERR(tmp)) { -+ err = 0; -+ if (path_init(tmp, flags, nd)) -+ err = path_walk_it(tmp, nd, it); -+ putname(tmp); -+ } -+ return err; -+} -+ - /* - * It's inline, so penalty for filesystems that don't use sticky bit is - * minimal. -@@ -969,6 +1079,37 @@ static inline int lookup_flags(unsigned - return retval; - } - -+static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode, -+ struct lookup_intent *it) -+{ -+ int error; -+ -+ mode &= S_IALLUGO; -+ mode |= S_IFREG; -+ -+ down(&dir->i_zombie); -+ error = may_create(dir, dentry); -+ if (error) -+ goto exit_lock; -+ -+ error = -EACCES; /* shouldn't it be ENOSYS? */ -+ if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it)) -+ goto exit_lock; -+ -+ DQUOT_INIT(dir); -+ lock_kernel(); -+ if (dir->i_op->create_it) -+ error = dir->i_op->create_it(dir, dentry, mode, it); -+ else -+ error = dir->i_op->create(dir, dentry, mode); -+ unlock_kernel(); -+exit_lock: -+ up(&dir->i_zombie); -+ if (!error) -+ inode_dir_notify(dir, DN_CREATE); -+ return error; -+} -+ - int vfs_create(struct inode *dir, struct dentry *dentry, int mode) - { - int error; -@@ -1045,14 +1186,17 @@ int may_open(struct nameidata *nd, int a - return get_lease(inode, flag); - } - -+extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it); -+ - struct file *filp_open(const char * pathname, int open_flags, int mode) - { - int acc_mode, error = 0; -- struct inode *inode; - struct dentry *dentry; - struct dentry *dir; - int flag = open_flags; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_OPEN}; - int count = 0; - - if (!capable(CAP_SYS_ADMIN)) -@@ -1063,13 +1207,14 @@ struct file *filp_open(const char * path - if (flag & O_TRUNC) - flag |= 2; - -+ it.it_flags = flag; - acc_mode = ACC_MODE(flag); - - /* - * The simplest case - just a plain lookup. - */ - if (!(flag & O_CREAT)) { -- error = path_lookup(pathname, lookup_flags(flag), &nd); -+ error = path_lookup_it(pathname, lookup_flags(flag), &nd, &it); - if (error) - return ERR_PTR(error); - dentry = nd.dentry; -@@ -1079,6 +1224,8 @@ struct file *filp_open(const char * path - /* - * Create - we need to know the parent. - */ -+ it.it_create_mode = mode; -+ it.it_op |= IT_CREAT; - error = path_lookup(pathname, LOOKUP_PARENT, &nd); - if (error) - return ERR_PTR(error); -@@ -1094,7 +1241,7 @@ struct file *filp_open(const char * path - - dir = nd.dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, &it); - - do_last: - error = PTR_ERR(dentry); -@@ -1103,10 +1250,11 @@ do_last: - goto exit; - } - -+ it.it_create_mode = mode; - /* Negative dentry, just create the file */ - if (!dentry->d_inode) { -- error = vfs_create(dir->d_inode, dentry, -- mode & ~current->fs->umask); -+ error = vfs_create_it(dir->d_inode, dentry, -+ mode & ~current->fs->umask, &it); - up(&dir->d_inode->i_sem); - dput(nd.dentry); - nd.dentry = dentry; -@@ -1132,12 +1280,12 @@ do_last: - error = -ELOOP; - if (flag & O_NOFOLLOW) - goto exit_dput; -- while (__follow_down(&nd.mnt,&dentry) && d_mountpoint(dentry)); -+ while (__follow_down(&nd.mnt,&dentry, &it) && d_mountpoint(dentry)); - } - error = -ENOENT; - if (!dentry->d_inode) - goto exit_dput; -- if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link) -+ if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link)) - goto do_link; - - dput(nd.dentry); -@@ -1152,11 +1300,13 @@ ok: - if (!S_ISREG(nd.dentry->d_inode->i_mode)) - open_flags &= ~O_TRUNC; - -- return dentry_open(nd.dentry, nd.mnt, open_flags); -+ return dentry_open_it(nd.dentry, nd.mnt, open_flags, &it); - - exit_dput: -+ intent_release(&it); - dput(dentry); - exit: -+ intent_release(&it); - path_release(&nd); - return ERR_PTR(error); - -@@ -1175,10 +1325,14 @@ do_link: - * are done. Procfs-like symlinks just set LAST_BIND. - */ - UPDATE_ATIME(dentry->d_inode); -+ nd.intent = ⁢ - error = dentry->d_inode->i_op->follow_link(dentry, &nd); -+ if (error) { -+ intent_release(&it); -+ } - dput(dentry); - if (error) -- return error; -+ return ERR_PTR(error); - if (nd.last_type == LAST_BIND) { - dentry = nd.dentry; - goto ok; -@@ -1197,13 +1351,15 @@ do_link: - } - dir = nd.dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, &it); - putname(nd.last.name); - goto do_last; - } - -+ - /* SMP-safe */ --static struct dentry *lookup_create(struct nameidata *nd, int is_dir) -+static struct dentry *lookup_create(struct nameidata *nd, int is_dir, -+ struct lookup_intent *it) - { - struct dentry *dentry; - -@@ -1211,7 +1367,7 @@ static struct dentry *lookup_create(stru - dentry = ERR_PTR(-EEXIST); - if (nd->last_type != LAST_NORM) - goto fail; -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - if (IS_ERR(dentry)) - goto fail; - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) -@@ -1267,7 +1423,20 @@ asmlinkage long sys_mknod(const char * f - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->mknod_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mknod_raw(&nd, mode, dev); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - - mode &= ~current->fs->umask; -@@ -1288,6 +1453,7 @@ asmlinkage long sys_mknod(const char * f - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1335,7 +1501,18 @@ asmlinkage long sys_mkdir(const char * p - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 1); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->mkdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir_raw(&nd, mode); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 1, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_mkdir(nd.dentry->d_inode, dentry, -@@ -1343,6 +1516,7 @@ asmlinkage long sys_mkdir(const char * p - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1443,8 +1617,16 @@ asmlinkage long sys_rmdir(const char * p - error = -EBUSY; - goto exit1; - } -+ if (nd.dentry->d_inode->i_op->rmdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ error = op->rmdir_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); -@@ -1502,8 +1684,15 @@ asmlinkage long sys_unlink(const char * - error = -EISDIR; - if (nd.last_type != LAST_NORM) - goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - /* Why not before? Because we want correct error value */ -@@ -1570,15 +1759,27 @@ asmlinkage long sys_symlink(const char * - error = path_lookup(to, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->symlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->symlink_raw(&nd, from); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_symlink(nd.dentry->d_inode, dentry, from); - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+ out2: - path_release(&nd); --out: -+ out: - putname(to); - } - putname(from); -@@ -1654,7 +1851,18 @@ asmlinkage long sys_link(const char * ol - error = -EXDEV; - if (old_nd.mnt != nd.mnt) - goto out_release; -- new_dentry = lookup_create(&nd, 0); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out_release; -+ } -+ if (nd.dentry->d_inode->i_op->link_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link_raw(&old_nd, &nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out_release; -+ } -+ new_dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(new_dentry); - if (!IS_ERR(new_dentry)) { - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); -@@ -1698,7 +1902,7 @@ exit: - * locking]. - */ - int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error; - struct inode *target; -@@ -1777,7 +1981,7 @@ out_unlock: - } - - int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error; - -@@ -1865,9 +2069,18 @@ static inline int do_rename(const char * - if (newnd.last_type != LAST_NORM) - goto exit2; - -+ if (old_dir->d_inode->i_op->rename_raw) { -+ lock_kernel(); -+ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); -+ unlock_kernel(); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit2; -+ } -+ - double_lock(new_dir, old_dir); - -- old_dentry = lookup_hash(&oldnd.last, old_dir); -+ old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; -@@ -1883,16 +2096,16 @@ static inline int do_rename(const char * - if (newnd.last.name[newnd.last.len]) - goto exit4; - } -- new_dentry = lookup_hash(&newnd.last, new_dir); -+ new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; - -+ - lock_kernel(); - error = vfs_rename(old_dir->d_inode, old_dentry, - new_dir->d_inode, new_dentry); - unlock_kernel(); -- - dput(new_dentry); - exit4: - dput(old_dentry); -@@ -1943,12 +2156,17 @@ out: - } - - static inline int --__vfs_follow_link(struct nameidata *nd, const char *link) -+__vfs_follow_link(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) - { - int res = 0; - char *name; - if (IS_ERR(link)) - goto fail; -+ if (it == NULL) -+ it = nd->intent; -+ else if (it != nd->intent) -+ printk("it != nd->intent: tell phil@clusterfs.com\n"); - - if (*link == '/') { - path_release(nd); -@@ -1956,7 +2174,7 @@ __vfs_follow_link(struct nameidata *nd, - /* weird __emul_prefix() stuff did it */ - goto out; - } -- res = link_path_walk(link, nd); -+ res = link_path_walk_it(link, nd, it); - out: - if (current->link_count || res || nd->last_type!=LAST_NORM) - return res; -@@ -1978,7 +2196,13 @@ fail: - - int vfs_follow_link(struct nameidata *nd, const char *link) - { -- return __vfs_follow_link(nd, link); -+ return __vfs_follow_link(nd, link, NULL); -+} -+ -+int vfs_follow_link_it(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) -+{ -+ return __vfs_follow_link(nd, link, it); - } - - /* get the link contents into pagecache */ -@@ -2020,7 +2244,7 @@ int page_follow_link(struct dentry *dent - { - struct page *page = NULL; - char *s = page_getlink(dentry, &page); -- int res = __vfs_follow_link(nd, s); -+ int res = __vfs_follow_link(nd, s, NULL); - if (page) { - kunmap(page); - page_cache_release(page); -Index: linux-2.4.18-p4smp/fs/namespace.c -=================================================================== ---- linux-2.4.18-p4smp.orig/fs/namespace.c 2004-02-03 01:00:10.000000000 -0500 -+++ linux-2.4.18-p4smp/fs/namespace.c 2004-03-19 16:05:42.000000000 -0500 -@@ -99,6 +99,7 @@ - { - old_nd->dentry = mnt->mnt_mountpoint; - old_nd->mnt = mnt->mnt_parent; -+ UNPIN(old_nd->dentry, old_nd->mnt, 1); - mnt->mnt_parent = mnt; - mnt->mnt_mountpoint = mnt->mnt_root; - list_del_init(&mnt->mnt_child); -@@ -110,6 +111,7 @@ static void attach_mnt(struct vfsmount * - { - mnt->mnt_parent = mntget(nd->mnt); - mnt->mnt_mountpoint = dget(nd->dentry); -+ PIN(nd->dentry, nd->mnt, 1); - list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry)); - list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts); - nd->dentry->d_mounted++; -@@ -485,14 +487,17 @@ static int do_loopback(struct nameidata - { - struct nameidata old_nd; - struct vfsmount *mnt = NULL; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int err = mount_is_safe(nd); - if (err) - return err; - if (!old_name || !*old_name) - return -EINVAL; -- err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd); -- if (err) -+ err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it); -+ if (err) { -+ intent_release(&it); - return err; -+ } - - down_write(¤t->namespace->sem); - err = -EINVAL; -@@ -515,6 +520,7 @@ static int do_loopback(struct nameidata - } - - up_write(¤t->namespace->sem); -+ intent_release(&it); - path_release(&old_nd); - return err; - } -@@ -698,6 +704,7 @@ long do_mount(char * dev_name, char * di - unsigned long flags, void *data_page) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int retval = 0; - int mnt_flags = 0; - -@@ -722,9 +729,11 @@ long do_mount(char * dev_name, char * di - flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV); - - /* ... and get the mountpoint */ -- retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -- if (retval) -+ retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); -+ if (retval) { -+ intent_release(&it); - return retval; -+ } - - if (flags & MS_REMOUNT) - retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, -@@ -736,6 +744,8 @@ long do_mount(char * dev_name, char * di - else - retval = do_add_mount(&nd, type_page, flags, mnt_flags, - dev_name, data_page); -+ -+ intent_release(&it); - path_release(&nd); - return retval; - } -@@ -901,6 +911,8 @@ asmlinkage long sys_pivot_root(const cha - { - struct vfsmount *tmp; - struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; -+ struct lookup_intent new_it = { .it_op = IT_GETATTR }; -+ struct lookup_intent old_it = { .it_op = IT_GETATTR }; - int error; - - if (!capable(CAP_SYS_ADMIN)) -@@ -908,14 +920,14 @@ asmlinkage long sys_pivot_root(const cha - - lock_kernel(); - -- error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd); -+ error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it); - if (error) - goto out0; - error = -EINVAL; - if (!check_mnt(new_nd.mnt)) - goto out1; - -- error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd); -+ error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it); - if (error) - goto out1; - -@@ -970,8 +982,10 @@ out2: - up(&old_nd.dentry->d_inode->i_zombie); - up_write(¤t->namespace->sem); - path_release(&user_nd); -+ intent_release(&old_it); - path_release(&old_nd); - out1: -+ intent_release(&new_it); - path_release(&new_nd); - out0: - unlock_kernel(); -Index: linux-2.4.18-p4smp/fs/open.c -=================================================================== ---- linux-2.4.18-p4smp.orig/fs/open.c 2004-02-03 01:00:10.000000000 -0500 -+++ linux-2.4.18-p4smp/fs/open.c 2004-03-19 16:05:42.000000000 -0500 -@@ -19,6 +19,8 @@ - #include - - #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -+extern int path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it); - - int vfs_statfs(struct super_block *sb, struct statfs *buf) - { -@@ -95,9 +97,10 @@ void fd_install(unsigned int fd, struct - write_unlock(&files->file_lock); - } - --int do_truncate(struct dentry *dentry, loff_t length) -+int do_truncate(struct dentry *dentry, loff_t length, int called_from_open) - { - struct inode *inode = dentry->d_inode; -+ struct inode_operations *op = dentry->d_inode->i_op; - int error; - struct iattr newattrs; - -@@ -108,7 +111,13 @@ int do_truncate(struct dentry *dentry, l - down(&inode->i_sem); - newattrs.ia_size = length; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; -- error = notify_change(dentry, &newattrs); -+ if (called_from_open) -+ newattrs.ia_valid |= ATTR_FROM_OPEN; -+ if (op->setattr_raw) { -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ } else -+ error = notify_change(dentry, &newattrs); - up(&inode->i_sem); - return error; - } -@@ -118,12 +127,13 @@ static inline long do_sys_truncate(const - struct nameidata nd; - struct inode * inode; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; - -- error = user_path_walk(path, &nd); -+ error = user_path_walk_it(path, &nd, &it); - if (error) - goto out; - inode = nd.dentry->d_inode; -@@ -163,11 +173,13 @@ static inline long do_sys_truncate(const - error = locks_verify_truncate(inode, NULL, length); - if (!error) { - DQUOT_INIT(inode); -- error = do_truncate(nd.dentry, length); -+ intent_release(&it); -+ error = do_truncate(nd.dentry, length, 0); - } - put_write_access(inode); - - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; -@@ -215,7 +227,7 @@ static inline long do_sys_ftruncate(unsi - - error = locks_verify_truncate(inode, file, length); - if (!error) -- error = do_truncate(dentry, length); -+ error = do_truncate(dentry, length, 0); - out_putf: - fput(file); - out: -@@ -260,11 +272,13 @@ asmlinkage long sys_utime(char * filenam - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -279,11 +293,25 @@ asmlinkage long sys_utime(char * filenam - goto dput_and_out; - - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EPERM; -+ if (!times) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; - } -+ - error = notify_change(nd.dentry, &newattrs); - dput_and_out: - path_release(&nd); -@@ -304,12 +332,14 @@ asmlinkage long sys_utimes(char * filena - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -324,7 +354,20 @@ asmlinkage long sys_utimes(char * filena - newattrs.ia_atime = times[0].tv_sec; - newattrs.ia_mtime = times[1].tv_sec; - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EPERM; -+ if (!utimes) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; -@@ -347,6 +390,7 @@ asmlinkage long sys_access(const char * - int old_fsuid, old_fsgid; - kernel_cap_t old_cap; - int res; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; -@@ -364,13 +408,14 @@ asmlinkage long sys_access(const char * - else - current->cap_effective = current->cap_permitted; - -- res = user_path_walk(filename, &nd); -+ res = user_path_walk_it(filename, &nd, &it); - if (!res) { - res = permission(nd.dentry->d_inode, mode); - /* SuS v2 requires we report a read only fs too */ - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) - && !special_file(nd.dentry->d_inode->i_mode)) - res = -EROFS; -+ intent_release(&it); - path_release(&nd); - } - -@@ -385,8 +430,9 @@ asmlinkage long sys_chdir(const char * f - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd); -+ error = __user_walk_it(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd, &it); - if (error) - goto out; - -@@ -397,6 +445,7 @@ asmlinkage long sys_chdir(const char * f - set_fs_pwd(current->fs, nd.mnt, nd.dentry); - - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; -@@ -436,9 +485,10 @@ asmlinkage long sys_chroot(const char * - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -- LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); -+ error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -+ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); - if (error) - goto out; - -@@ -454,39 +504,56 @@ asmlinkage long sys_chroot(const char * - set_fs_altroot(); - error = 0; - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; - } - --asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+int chmod_common(struct dentry *dentry, mode_t mode) - { -- struct inode * inode; -- struct dentry * dentry; -- struct file * file; -- int err = -EBADF; -+ struct inode *inode = dentry->d_inode; - struct iattr newattrs; -+ int err = -EROFS; - -- file = fget(fd); -- if (!file) -+ if (IS_RDONLY(inode)) - goto out; - -- dentry = file->f_dentry; -- inode = dentry->d_inode; -+ if (inode->i_op->setattr_raw) { -+ newattrs.ia_mode = mode; -+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ err = inode->i_op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (err != -EOPNOTSUPP) -+ goto out; -+ } - -- err = -EROFS; -- if (IS_RDONLY(inode)) -- goto out_putf; - err = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto out_putf; -+ goto out; -+ - if (mode == (mode_t) -1) - mode = inode->i_mode; - newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); - newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - err = notify_change(dentry, &newattrs); - --out_putf: -+out: -+ return err; -+} -+ -+asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+{ -+ struct file * file; -+ int err = -EBADF; -+ -+ file = fget(fd); -+ if (!file) -+ goto out; -+ -+ err = chmod_common(file->f_dentry, mode); -+ - fput(file); - out: - return err; -@@ -495,30 +562,14 @@ out: - asmlinkage long sys_chmod(const char * filename, mode_t mode) - { - struct nameidata nd; -- struct inode * inode; - int error; -- struct iattr newattrs; - - error = user_path_walk(filename, &nd); - if (error) - goto out; -- inode = nd.dentry->d_inode; -- -- error = -EROFS; -- if (IS_RDONLY(inode)) -- goto dput_and_out; - -- error = -EPERM; -- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto dput_and_out; -- -- if (mode == (mode_t) -1) -- mode = inode->i_mode; -- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); -- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -- error = notify_change(nd.dentry, &newattrs); -+ error = chmod_common(nd.dentry, mode); - --dput_and_out: - path_release(&nd); - out: - return error; -@@ -538,6 +589,20 @@ static int chown_common(struct dentry * - error = -EROFS; - if (IS_RDONLY(inode)) - goto out; -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = dentry->d_inode->i_op; -+ -+ newattrs.ia_uid = user; -+ newattrs.ia_gid = group; -+ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ return error; -+ } -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; -@@ -628,7 +693,8 @@ extern ssize_t do_readahead(struct file - /* for files over a certains size it doesn't pay to do readahead on open */ - #define READAHEAD_CUTOFF 48000 - --struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it) - { - struct file * f; - struct inode *inode; -@@ -649,7 +715,7 @@ struct file *dentry_open(struct dentry * - error = locks_verify_locked(inode); - if (!error) { - DQUOT_INIT(inode); -- error = do_truncate(dentry, 0); -+ error = do_truncate(dentry, 0, 1); - } - if (error || !(f->f_mode & FMODE_WRITE)) - put_write_access(inode); -@@ -679,7 +745,9 @@ struct file *dentry_open(struct dentry * - } - - if (f->f_op && f->f_op->open) { -+ f->f_it = it; - error = f->f_op->open(inode,f); -+ f->f_it = NULL; - if (error) - goto cleanup_all; - } -@@ -693,6 +761,7 @@ struct file *dentry_open(struct dentry * - do_readahead(f, 0, (48 * 1024) >> PAGE_SHIFT); - - -+ intent_release(it); - return f; - - cleanup_all: -@@ -707,11 +776,17 @@ cleanup_all: - cleanup_file: - put_filp(f); - cleanup_dentry: -+ intent_release(it); - dput(dentry); - mntput(mnt); - return ERR_PTR(error); - } - -+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+{ -+ return dentry_open_it(dentry, mnt, flags, NULL); -+} -+ - /* - * Find an empty file descriptor entry, and mark it busy. - */ -Index: linux-2.4.18-p4smp/fs/stat.c -=================================================================== ---- linux-2.4.18-p4smp.orig/fs/stat.c 2004-02-03 01:00:10.000000000 -0500 -+++ linux-2.4.18-p4smp/fs/stat.c 2004-03-19 16:06:19.000000000 -0500 -@@ -17,21 +17,24 @@ - * Revalidate the inode. This is required for proper NFS attribute caching. - */ - static __inline__ int --do_revalidate(struct dentry *dentry) -+do_revalidate(struct dentry *dentry, struct lookup_intent *it) - { - struct inode * inode = dentry->d_inode; -- if (inode->i_op && inode->i_op->revalidate) -+ if (inode->i_op && inode->i_op->revalidate_it) -+ return inode->i_op->revalidate_it(dentry, it); -+ else if (inode->i_op && inode->i_op->revalidate) - return inode->i_op->revalidate(dentry); - return 0; - } - --static int do_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) -+static int do_getattr(struct vfsmount *mnt, struct dentry *dentry, -+ struct kstat *stat, struct lookup_intent *it) - { - int res = 0; - unsigned int blocks, indirect; - struct inode *inode = dentry->d_inode; - -- res = do_revalidate(dentry); -+ res = do_revalidate(dentry, it); - if (res) - return res; - -@@ -104,10 +109,12 @@ int vfs_stat(char *name, struct kstat *s - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk(name, &nd); -+ error = user_path_walk_it(name, &nd, &it); - if (!error) { -- error = do_getattr(nd.mnt, nd.dentry, stat); -+ error = do_getattr(nd.mnt, nd.dentry, stat, &it); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -117,10 +124,12 @@ int vfs_lstat(char *name, struct kstat * - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk_link(name, &nd); -+ error = user_path_walk_link_it(name, &nd, &it); - if (!error) { -- error = do_getattr(nd.mnt, nd.dentry, stat); -+ error = do_getattr(nd.mnt, nd.dentry, stat, &it); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -132,7 +141,7 @@ int vfs_fstat(unsigned int fd, struct ks - int error = -EBADF; - - if (f) { -- error = do_getattr(f->f_vfsmnt, f->f_dentry, stat); -+ error = do_getattr(f->f_vfsmnt, f->f_dentry, stat, NULL); - fput(f); - } - return error; -@@ -279,7 +288,7 @@ asmlinkage long sys_readlink(const char - - error = -EINVAL; - if (inode->i_op && inode->i_op->readlink && -- !(error = do_revalidate(nd.dentry))) { -+ !(error = do_revalidate(nd.dentry, NULL))) { - UPDATE_ATIME(inode); - error = inode->i_op->readlink(nd.dentry, buf, bufsiz); - } -Index: linux-2.4.18-p4smp/include/linux/dcache.h -=================================================================== ---- linux-2.4.18-p4smp.orig/include/linux/dcache.h 2004-02-03 01:00:10.000000000 -0500 -+++ linux-2.4.18-p4smp/include/linux/dcache.h 2004-03-19 16:05:42.000000000 -0500 -@@ -5,6 +5,51 @@ - - #include - #include -+#include -+ -+#define IT_OPEN 0x0001 -+#define IT_CREAT 0x0002 -+#define IT_READDIR 0x0004 -+#define IT_GETATTR 0x0008 -+#define IT_LOOKUP 0x0010 -+#define IT_UNLINK 0x0020 -+#define IT_GETXATTR 0x0040 -+#define IT_EXEC 0x0080 -+#define IT_PIN 0x0100 -+ -+#define IT_FL_LOCKED 0x0001 -+#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */ -+ -+#define INTENT_MAGIC 0x19620323 -+ -+ -+struct lustre_intent_data { -+ int it_disposition; -+ int it_status; -+ __u64 it_lock_handle; -+ void *it_data; -+ int it_lock_mode; -+ int it_int_flags; -+}; -+struct lookup_intent { -+ int it_magic; -+ void (*it_op_release)(struct lookup_intent *); -+ int it_op; -+ int it_flags; -+ int it_create_mode; -+ union { -+ struct lustre_intent_data lustre; -+ } d; -+}; -+ -+static inline void intent_init(struct lookup_intent *it, int op, int flags) -+{ -+ memset(it, 0, sizeof(*it)); -+ it->it_magic = INTENT_MAGIC; -+ it->it_op = op; -+ it->it_flags = flags; -+} -+ - - /* - * linux/include/linux/dcache.h -@@ -91,8 +136,22 @@ struct dentry_operations { - int (*d_delete)(struct dentry *); - void (*d_release)(struct dentry *); - void (*d_iput)(struct dentry *, struct inode *); -+ int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *); -+ void (*d_pin)(struct dentry *, struct vfsmount * , int); -+ void (*d_unpin)(struct dentry *, struct vfsmount *, int); - }; - -+#define PIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_pin) \ -+ de->d_op->d_pin(de, mnt, flag); -+#define UNPIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_unpin) \ -+ de->d_op->d_unpin(de, mnt, flag); -+ -+ -+/* defined in fs/namei.c */ -+extern void intent_release(struct lookup_intent *it); -+/* defined in fs/dcache.c */ -+extern void __d_rehash(struct dentry * entry, int lock); -+ - /* the dentry parameter passed to d_hash and d_compare is the parent - * directory of the entries to be compared. It is used in case these - * functions need any directory specific information for determining -@@ -124,6 +183,7 @@ d_iput: no no yes - * s_nfsd_free_path semaphore will be down - */ - #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ -+#define DCACHE_LUSTRE_INVALID 0x0010 /* Lustre invalidated */ - - extern spinlock_t dcache_lock; - -Index: linux-2.4.18-p4smp/include/linux/fs.h -=================================================================== ---- linux-2.4.18-p4smp.orig/include/linux/fs.h 2004-03-19 16:05:40.000000000 -0500 -+++ linux-2.4.18-p4smp/include/linux/fs.h 2004-03-19 16:05:42.000000000 -0500 -@@ -73,6 +73,7 @@ - - #define FMODE_READ 1 - #define FMODE_WRITE 2 -+#define FMODE_EXEC 4 - - #define READ 0 - #define WRITE 1 -@@ -339,6 +340,9 @@ extern void set_bh_page(struct buffer_he - #define ATTR_MTIME_SET 256 - #define ATTR_FORCE 512 /* Not a change, but a change it */ - #define ATTR_ATTR_FLAG 1024 -+#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ -+#define ATTR_CTIME_SET 0x2000 - - /* - * This is the Inode Attributes structure, used for notify_change(). It -@@ -474,6 +478,7 @@ struct inode { - struct pipe_inode_info *i_pipe; - struct block_device *i_bdev; - struct char_device *i_cdev; -+ void *i_filterdata; - - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ -@@ -578,6 +583,7 @@ struct file { - - /* needed for tty driver, and maybe others */ - void *private_data; -+ struct lookup_intent *f_it; - - /* preallocated helper kiobuf to speedup O_DIRECT */ - struct kiobuf *f_iobuf; -@@ -707,6 +713,7 @@ struct nameidata { - struct qstr last; - unsigned int flags; - int last_type; -+ struct lookup_intent *intent; - }; - - #define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */ -@@ -840,7 +847,8 @@ extern int vfs_symlink(struct inode *, s - extern int vfs_link(struct dentry *, struct inode *, struct dentry *); - extern int vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_unlink(struct inode *, struct dentry *); --extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry); - - /* - * File types -@@ -900,21 +908,32 @@ struct file_operations { - - struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); -+ int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *); - struct dentry * (*lookup) (struct inode *,struct dentry *); -+ struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags); - int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link_raw) (struct nameidata *,struct nameidata *); - int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink_raw) (struct nameidata *); - int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink_raw) (struct nameidata *,const char *); - int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir_raw) (struct nameidata *,int); - int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir_raw) (struct nameidata *); - int (*mknod) (struct inode *,struct dentry *,int,int); -+ int (*mknod_raw) (struct nameidata *,int,dev_t); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename_raw) (struct nameidata *, struct nameidata *); - int (*readlink) (struct dentry *, char *,int); - int (*follow_link) (struct dentry *, struct nameidata *); - void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*revalidate) (struct dentry *); -+ int (*revalidate_it) (struct dentry *, struct lookup_intent *); - int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); - }; - -@@ -1119,10 +1140,12 @@ static inline int get_lease(struct inode - - asmlinkage long sys_open(const char *, int, int); - asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ --extern int do_truncate(struct dentry *, loff_t start); -+extern int do_truncate(struct dentry *, loff_t start, int called_from_open); - - extern struct file *filp_open(const char *, int, int); - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); -+extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1388,9 +1411,12 @@ typedef int (*read_actor_t)(read_descrip - extern loff_t default_llseek(struct file *file, loff_t offset, int origin); - - extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *)); -+extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it)); - extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *)); - extern int FASTCALL(path_walk(const char *, struct nameidata *)); - extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); -+extern int FASTCALL(path_lookup_it(const char *path, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it)); - extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); - extern void path_release(struct nameidata *); - extern int follow_down(struct vfsmount **, struct dentry **); -@@ -1399,6 +1425,8 @@ extern struct dentry * lookup_one_len(co - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -+#define user_path_walk_it(name,nd,it) __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it) -+#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it) - - extern void inode_init_once(struct inode *); - extern void iput(struct inode *); -@@ -1499,6 +1527,8 @@ extern struct file_operations generic_ro - - extern int vfs_readlink(struct dentry *, char *, int, const char *); - extern int vfs_follow_link(struct nameidata *, const char *); -+extern int vfs_follow_link_it(struct nameidata *, const char *, -+ struct lookup_intent *it); - extern int page_readlink(struct dentry *, char *, int); - extern int page_follow_link(struct dentry *, struct nameidata *); - extern struct inode_operations page_symlink_inode_operations; -Index: linux-2.4.18-p4smp/kernel/exit.c -=================================================================== ---- linux-2.4.18-p4smp.orig/kernel/exit.c 2004-02-03 01:00:10.000000000 -0500 -+++ linux-2.4.18-p4smp/kernel/exit.c 2004-03-19 16:05:42.000000000 -0500 -@@ -303,11 +303,14 @@ - { - /* No need to hold fs->lock if we are killing it */ - if (atomic_dec_and_test(&fs->count)) { -+ UNPIN(fs->pwd, fs->pwdmnt, 0); -+ UNPIN(fs->root, fs->rootmnt, 1); - dput(fs->root); - mntput(fs->rootmnt); - dput(fs->pwd); - mntput(fs->pwdmnt); - if (fs->altroot) { -+ UNPIN(fs->altroot, fs->altrootmnt, 1); - dput(fs->altroot); - mntput(fs->altrootmnt); - } -Index: linux-2.4.18-p4smp/kernel/fork.c -=================================================================== ---- linux-2.4.18-p4smp.orig/kernel/fork.c 2004-02-03 01:00:10.000000000 -0500 -+++ linux-2.4.18-p4smp/kernel/fork.c 2004-03-19 16:05:42.000000000 -0500 -@@ -399,10 +399,13 @@ - fs->umask = old->umask; - read_lock(&old->lock); - fs->rootmnt = mntget(old->rootmnt); -+ PIN(old->pwd, old->pwdmnt, 0); -+ PIN(old->root, old->rootmnt, 1); - fs->root = dget(old->root); - fs->pwdmnt = mntget(old->pwdmnt); - fs->pwd = dget(old->pwd); - if (old->altroot) { -+ PIN(old->altroot, old->altrootmnt, 1); - fs->altrootmnt = mntget(old->altrootmnt); - fs->altroot = dget(old->altroot); - } else { -Index: linux-2.4.18-p4smp/kernel/ksyms.c -=================================================================== ---- linux-2.4.18-p4smp.orig/kernel/ksyms.c 2004-03-19 16:05:40.000000000 -0500 -+++ linux-2.4.18-p4smp/kernel/ksyms.c 2004-03-19 16:05:42.000000000 -0500 -@@ -293,6 +293,7 @@ - EXPORT_SYMBOL(set_page_dirty); - EXPORT_SYMBOL(vfs_readlink); - EXPORT_SYMBOL(vfs_follow_link); -+EXPORT_SYMBOL(vfs_follow_link_it); - EXPORT_SYMBOL(page_readlink); - EXPORT_SYMBOL(page_follow_link); - EXPORT_SYMBOL(page_symlink_inode_operations); diff --git a/lustre/kernel_patches/series/chaos-2.4.18 b/lustre/kernel_patches/series/chaos-2.4.18 deleted file mode 100644 index 99cdf04..0000000 --- a/lustre/kernel_patches/series/chaos-2.4.18 +++ /dev/null @@ -1,41 +0,0 @@ -dev_read_only.patch -exports.patch -lustre_version.patch -vfs_intent-2.4.18-18-chaos65.patch -invalidate_show.patch -iod-rmap-exports.patch -export-truncate.patch -htree-ext3-2.4.18.patch -linux-2.4.18ea-0.8.26.patch -ext3-2.4-ino_t.patch -ext3-2.4.18-ino_sb_macro.patch -ext3-orphan_lock.patch -ext3-delete_thread-2.4.18.patch -extN-misc-fixup.patch -extN-noread.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -extN-2.4.18-ino_sb_fixup.patch -ext3-map_inode_page_2.4.18.patch -ext3-error-export.patch -iopen-2.4.18.patch -jbd-dont-account-blocks-twice.patch -jbd-commit-tricks.patch -ext3-o_direct-1-2.4.18-chaos.patch -ext3-no-write-super-chaos.patch -add_page_private.patch -ext3-extents-2.4.18-chaos.patch -ext3-extents-oflag-2.4.18-chaos.patch -ext3-raw-lookup.patch -nfs_export_kernel-2.4.18.patch -ext3-ea-in-inode-2.4.18-chaos.patch -listman-2.4.18.patch -ext3-trusted_ea-2.4.18.patch -gfp_memalloc-2.4.18-chaos.patch -ext3-xattr-ptr-arith-fix.patch -kernel_text_address-2.4.18-chaos.patch -procfs-ndynamic-2.4.patch -ext3-truncate-buffer-head.patch -inode-max-readahead-2.4.24.patch -dcache_refcount_debug.patch -mkdep-revert-rh-2.4.patch diff --git a/lustre/kernel_patches/series/chaos-2.4.18-pdirops b/lustre/kernel_patches/series/chaos-2.4.18-pdirops deleted file mode 100644 index 456c2eb..0000000 --- a/lustre/kernel_patches/series/chaos-2.4.18-pdirops +++ /dev/null @@ -1,36 +0,0 @@ -dev_read_only.patch -exports.patch -kmem_cache_validate.patch -lustre_version.patch -vfs_intent-2.4.18-18-chaos65.patch -invalidate_show.patch -iod-rmap-exports.patch -export-truncate.patch -ext3-compat-2.4.18-chaos.patch -ext3-htree.patch -linux-2.4.18ea-0.8.26-2.patch -ext3-2.4-ino_t.patch -ext3-2.4.18-ino_sb_macro-2.patch -ext3-orphan_lock.patch -ext3-delete_thread-2.4.18-2.patch -extN-misc-fixup.patch -extN-noread.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -extN-2.4.18-ino_sb_fixup.patch -ext3-map_inode_page_2.4.18.patch -ext3-error-export.patch -iopen-2.4.18-2.patch -jbd-dont-account-blocks-twice.patch -jbd-commit-tricks.patch -ext3-o_direct-1-2.4.18-chaos.patch -ext3-no-write-super-chaos.patch -add_page_private.patch -dynamic-locks-2.4.18-chaos.patch -vfs-pdirops-2.4.18-chaos.patch -ext3-pdirops-2.4.18-chaos.patch -ext3-extents-2.4.18-chaos-pdirops.patch -nfs_export_kernel-2.4.18.patch -ext3-raw-lookup-pdirops.patch -ext3-truncate-buffer-head.patch -mkdep-revert-rh-2.4.patch -- 1.8.3.1