Whamcloud - gitweb
Land b_smallfix onto HEAD (20040428_2142)
authoradilger <adilger>
Thu, 29 Apr 2004 08:54:18 +0000 (08:54 +0000)
committeradilger <adilger>
Thu, 29 Apr 2004 08:54:18 +0000 (08:54 +0000)
b=3284, b=3285, b=3043, b=3236, revert liblustre group changes

97 files changed:
lnet/include/linux/kp30.h
lustre/ChangeLog
lustre/include/liblustre.h
lustre/include/linux/lustre_fsfilt.h
lustre/include/linux/lustre_lib.h
lustre/include/linux/lustre_net.h
lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-sles8sp3.patch [new file with mode: 0644]
lustre/kernel_patches/patches/dev_read_only-suse-2.4.19.patch [new file with mode: 0644]
lustre/kernel_patches/patches/exports_2.4.19-suse.patch
lustre/kernel_patches/patches/ext-2.4-patch-1-suse-2.4.19.patch [new file with mode: 0644]
lustre/kernel_patches/patches/ext3-orphan_lock-2.4.19-suse.patch
lustre/kernel_patches/patches/ext3-trusted_ea-suse-2.4.19.patch [new file with mode: 0644]
lustre/kernel_patches/patches/ext3-use-after-free-suse.patch
lustre/kernel_patches/patches/invalidate_show-2.4.21-sles8sp3.patch [new file with mode: 0644]
lustre/kernel_patches/patches/iopen-2.4.19-suse.patch
lustre/kernel_patches/patches/iopen-2.4.21-sles8sp3.patch [new file with mode: 0644]
lustre/kernel_patches/patches/kernel_text_address-2.4.21-sles8sp3.patch [new file with mode: 0644]
lustre/kernel_patches/patches/linux-2.4.19-pre1-xattr-0.8.54.patch
lustre/kernel_patches/patches/linux-2.4.19-xattr-0.8.54-suse.patch
lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch
lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-hp.patch
lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54.patch
lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-chaos.patch
lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse.patch
lustre/kernel_patches/patches/linux-2.4.22-xattr-0.8.54.patch
lustre/kernel_patches/patches/linux-2.4.24-xattr-0.8.54.patch
lustre/kernel_patches/patches/lustre_version.patch
lustre/kernel_patches/patches/mkdep-revert-rh-2.4.patch [new file with mode: 0644]
lustre/kernel_patches/patches/tcp-zero-copy-2.4.21-sles8sp3.patch [new file with mode: 0644]
lustre/kernel_patches/patches/vfs_intent-2.4.18-18-chaos65.patch
lustre/kernel_patches/patches/vfs_intent-2.4.19-pre1.patch
lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch
lustre/kernel_patches/patches/vfs_intent-2.4.20-hp.patch
lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch
lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch
lustre/kernel_patches/patches/vfs_intent-2.4.21-chaos.patch
lustre/kernel_patches/patches/vfs_intent-2.4.21-sles8sp3.patch [new file with mode: 0644]
lustre/kernel_patches/patches/vfs_intent-2.4.21-suse2.patch
lustre/kernel_patches/patches/vfs_intent-2.4.22-rh.patch
lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch
lustre/kernel_patches/patches/xattr-0.8.54-2.4.22-rh.patch
lustre/kernel_patches/series/chaos-2.4.18
lustre/kernel_patches/series/chaos-2.4.18-pdirops
lustre/kernel_patches/series/rh-2.4.20
lustre/kernel_patches/series/suse-2.4.19
lustre/kernel_patches/series/suse-sles8sp3-2.4.21 [new file with mode: 0644]
lustre/kernel_patches/which_patch
lustre/ldlm/ldlm_resource.c
lustre/liblustre/genlib.sh
lustre/liblustre/llite_lib.c
lustre/liblustre/namei.c
lustre/liblustre/super.c
lustre/liblustre/tests/Makefile.am
lustre/liblustre/tests/echo_test.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/lproc_llite.c
lustre/llite/rw.c
lustre/mdc/mdc_locks.c
lustre/mdc/mdc_request.c
lustre/mds/mds_open.c
lustre/obdclass/class_obd.c
lustre/obdclass/genops.c
lustre/obdecho/echo_client.c
lustre/obdfilter/filter.c
lustre/obdfilter/filter_io.c
lustre/osc/lproc_osc.c
lustre/osc/osc_create.c
lustre/osc/osc_request.c
lustre/portals/include/linux/kp30.h
lustre/ptlrpc/client.c
lustre/ptlrpc/pinger.c
lustre/ptlrpc/recover.c
lustre/ptlrpc/service.c
lustre/scripts/version_tag.pl.in
lustre/tests/acceptance-small.sh
lustre/tests/cfg/insanity-local.sh
lustre/tests/insanity.sh
lustre/tests/llmountcleanup.sh
lustre/tests/multiop.c
lustre/tests/oos.sh
lustre/tests/oos2.sh
lustre/tests/recovery-small.sh
lustre/tests/replay-dual.sh
lustre/tests/replay-ost-single.sh
lustre/tests/replay-single.sh
lustre/tests/run-llog.sh
lustre/tests/runas.c
lustre/tests/runregression-brw.sh
lustre/tests/runregression-mds.sh
lustre/tests/runregression-net.sh
lustre/tests/runtests
lustre/tests/sanity.sh
lustre/tests/sanityN.sh
lustre/tests/test-framework.sh

index 181594f..8a56b55 100644 (file)
@@ -689,27 +689,30 @@ typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data);
 # endif
 #endif
 
+#ifndef LP_POISON
+# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
+# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a)
+# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
+#endif
+
 #if defined(__x86_64__)
 # define LPU64 "%Lu"
 # define LPD64 "%Ld"
 # define LPX64 "%#Lx"
 # define LPSZ  "%lu"
 # define LPSSZ "%ld"
-# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
 #elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
 # define LPU64 "%Lu"
 # define LPD64 "%Ld"
 # define LPX64 "%#Lx"
 # define LPSZ  "%u"
 # define LPSSZ "%d"
-# define LP_POISON ((void *)0x5a5a5a5a)
 #elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
 # define LPU64 "%lu"
 # define LPD64 "%ld"
 # define LPX64 "%#lx"
 # define LPSZ  "%lu"
 # define LPSSZ "%ld"
-# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
 #endif
 #ifndef LPU64
 # error "No word size defined"
index 3693d4c..b2cf2ca 100644 (file)
@@ -5,7 +5,9 @@ tbd  Cluster File Systems, Inc. <info@clusterfs.com>
        - deal with strange write() on x86-64 (3043)
        - don't dereference NULL peer_ni in ldlm_handle_ast_error (3258)
        - clear page->private before handing to FS (3119)
-    - drop scimac NAL
+       - tune the read pipeline (3236)
+       * miscellania
+       - drop scimac NAL (unmaintained)
 
 tbd  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.2.2
@@ -30,6 +32,9 @@ tbd  Cluster File Systems, Inc. <info@clusterfs.com>
        - initialize RPC timeout timer earlier for 2.6 (3219)
        - don't dereference NULL reply buffer if mdc_close was never sent (2410)
        - print nal/nid for unknown nid (3258)
+       - additional checks for oscc recovery before doing precreate (3284)
+       - fix ll_extent_lock() error return code for 64-bit systems (3043)
+       - don't crash in mdc_close for bad permissions on open (3285)
        * miscellania
        - allow default OST striping configuration per directory (1414)
        - increase maximum number of MDS request buffers for large systems
index 30d9574..af80f44 100644 (file)
@@ -116,6 +116,9 @@ static inline void *kmalloc(int size, int prot)
 #define PTR_ERR(a) ((long)(a))
 #define ERR_PTR(a) ((void*)((long)(a)))
 
+#define capable(foo) 1
+#define CAP_SYS_ADMIN 1
+
 typedef struct {
         void *cwd;
 }mm_segment_t;
@@ -575,23 +578,12 @@ struct task_struct {
         int pid;
         int fsuid;
         int fsgid;
-        int max_groups;
-        int ngroups;
-        gid_t *groups;
         __u32 cap_effective;
-
-        struct fs_struct __fs;
 };
 
 extern struct task_struct *current;
-int in_group_p(gid_t gid);
-static inline int capable(int cap)
-{
-        if (current->cap_effective & (1 << cap))
-                return 1;
-        else
-                return 0;
-}
+
+#define in_group_p(a) 0 /* FIXME */
 
 #define set_current_state(foo) do { current->state = foo; } while (0)
 
index 40e9914..72f3817 100644 (file)
@@ -100,7 +100,7 @@ static inline void *fsfilt_start_log(struct obd_device *obd,
         unsigned long now = jiffies;
         void *parent_handle = oti ? oti->oti_handle : NULL;
         void *handle = obd->obd_fsops->fs_start(inode, op, parent_handle, logs);
-        CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle);
+        CDEBUG(D_INFO, "started handle %p (%p)\n", handle, parent_handle);
 
         if (oti != NULL) {
                 if (parent_handle == NULL) {
@@ -162,7 +162,7 @@ static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode,
 {
         unsigned long now = jiffies;
         int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync);
-        CDEBUG(D_HA, "committing handle %p\n", handle);
+        CDEBUG(D_INFO, "committing handle %p\n", handle);
 
         if (time_after(jiffies, now + 15 * HZ))
                 CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
index ebdfdf6..0bb5f0b 100644 (file)
 #include <linux/lustre_idl.h>
 #include <linux/lustre_cfg.h>
 
+#define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
+#define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
+
 #ifndef LPU64
 /* x86_64 has 64bit longs and defines u64 as long long */
 #if BITS_PER_LONG > 32 && !defined(__x86_64__)
 #define LPU64 "%lu"
 #define LPD64 "%ld"
 #define LPX64 "%#lx"
-#define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
 #else
 #define LPU64 "%Lu"
 #define LPD64 "%Ld"
 #define LPX64 "%#Lx"
-#define LP_POISON ((void *)0x5a5a5a5a)
 #endif
 #endif
 
index 84062e2..3c75a8b 100644 (file)
@@ -79,6 +79,9 @@
 # define PTLRPC_MAX_BRW_PAGES  (PTLRPC_MAX_BRW_SIZE / PAGE_SIZE)
 #endif
 
+#if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0)
+#error "PTLRPC_MAX_BRW_PAGES isn't a power of two"
+#endif
 
 /* Size over which to OBD_VMALLOC() rather than OBD_ALLOC() service request
  * buffers */
diff --git a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-sles8sp3.patch b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-sles8sp3.patch
new file mode 100644 (file)
index 0000000..bc0a1b7
--- /dev/null
@@ -0,0 +1,330 @@
+Index: linux-2.4.21/arch/i386/kernel/entry.S
+===================================================================
+--- linux-2.4.21.orig/arch/i386/kernel/entry.S 2004-04-24 02:39:01.000000000 -0400
++++ linux-2.4.21/arch/i386/kernel/entry.S      2004-04-24 02:42:58.000000000 -0400
+@@ -45,6 +45,7 @@
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+ #include <asm/smp.h>
++#include <asm/current.h>
+ EBX           = 0x00
+ ECX           = 0x04
+@@ -130,10 +131,6 @@
+       .long 3b,6b;    \
+ .previous
+-#define GET_CURRENT(reg) \
+-      movl $-8192, reg; \
+-      andl %esp, reg
+-
+ ENTRY(lcall7)
+       pushfl                  # We get a different stack layout with call gates,
+       pushl %eax              # which has to be cleaned up later..
+@@ -149,7 +146,7 @@
+       movl %ecx,CS(%esp)      #
+       movl %esp,%ebx
+       pushl %ebx
+-      andl $-8192,%ebx        # GET_CURRENT
++      andl $-THREAD_SIZE,%ebx # GET_CURRENT
+       movl exec_domain(%ebx),%edx     # Get the execution domain
+       movl 4(%edx),%edx       # Get the lcall7 handler for the domain
+       pushl $0x7
+@@ -173,7 +170,7 @@
+       movl %ecx,CS(%esp)      #
+       movl %esp,%ebx
+       pushl %ebx
+-      andl $-8192,%ebx        # GET_CURRENT
++      andl $-THREAD_SIZE,%ebx # GET_CURRENT
+       movl exec_domain(%ebx),%edx     # Get the execution domain
+       movl 4(%edx),%edx       # Get the lcall7 handler for the domain
+       pushl $0x27
+Index: linux-2.4.21/arch/i386/kernel/smpboot.c
+===================================================================
+--- linux-2.4.21.orig/arch/i386/kernel/smpboot.c       2004-04-24 02:39:05.000000000 -0400
++++ linux-2.4.21/arch/i386/kernel/smpboot.c    2004-04-24 02:42:58.000000000 -0400
+@@ -837,7 +837,7 @@
+       /* So we see what's up   */
+       printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+-      stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
++      stack_start.esp = (void *)idle->thread.esp;
+       /*
+        * This grunge runs the startup process for
+@@ -918,7 +918,7 @@
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+-                      if (*((volatile unsigned char *)phys_to_virt(8192))
++                      if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE))
+                                       == 0xA5)
+                               /* trampoline started but...? */
+                               printk("Stuck ??\n");
+@@ -941,7 +941,7 @@
+       }
+       /* mark "stuck" area as not stuck */
+-      *((volatile unsigned long *)phys_to_virt(8192)) = 0;
++      *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0;
+ #ifdef CONFIG_ES7000
+       if (!es7000_plat)
+Index: linux-2.4.21/arch/i386/kernel/traps.c
+===================================================================
+--- linux-2.4.21.orig/arch/i386/kernel/traps.c 2004-04-24 02:39:18.000000000 -0400
++++ linux-2.4.21/arch/i386/kernel/traps.c      2004-04-24 02:42:58.000000000 -0400
+@@ -304,7 +304,7 @@
+       unsigned long esp = tsk->thread.esp;
+       /* User space on another CPU? */
+-      if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
++      if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1))
+               return;
+       show_trace((unsigned long *)esp);
+ }
+Index: linux-2.4.21/arch/i386/kernel/head.S
+===================================================================
+--- linux-2.4.21.orig/arch/i386/kernel/head.S  2004-04-24 02:38:42.000000000 -0400
++++ linux-2.4.21/arch/i386/kernel/head.S       2004-04-24 02:42:58.000000000 -0400
+@@ -15,6 +15,7 @@
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ #include <asm/desc.h>
++#include <asm/current.h>
+ #define OLD_CL_MAGIC_ADDR     0x90020
+ #define OLD_CL_MAGIC          0xA33F
+@@ -326,7 +327,7 @@
+       ret
+ ENTRY(stack_start)
+-      .long SYMBOL_NAME(init_task_union)+8192
++      .long SYMBOL_NAME(init_task_union)+THREAD_SIZE
+       .long __KERNEL_DS
+ /* This is the default interrupt "handler" :-) */
+Index: linux-2.4.21/arch/i386/kernel/irq.c
+===================================================================
+--- linux-2.4.21.orig/arch/i386/kernel/irq.c   2004-04-24 02:39:18.000000000 -0400
++++ linux-2.4.21/arch/i386/kernel/irq.c        2004-04-24 02:44:26.000000000 -0400
+@@ -602,7 +602,10 @@
+       long esp;
+       /* Debugging check for stack overflow: is there less than 1KB free? */
+-      __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191));
++        __asm__ __volatile__(
++                "andl %%esp,%0"
++                : "=r" (esp) : "0" (THREAD_SIZE-1));
++
+       if (unlikely(esp < (sizeof(struct task_struct) + sysctl_stackwarn))) {
+               static unsigned long next_jiffies;      /* ratelimiting */
+               static long least_esp = THREAD_SIZE;
+Index: linux-2.4.21/arch/i386/lib/getuser.S
+===================================================================
+--- linux-2.4.21.orig/arch/i386/lib/getuser.S  1998-01-12 16:42:52.000000000 -0500
++++ linux-2.4.21/arch/i386/lib/getuser.S       2004-04-24 02:42:58.000000000 -0400
+@@ -21,6 +21,10 @@
+  * as they get called from within inline assembly.
+  */
++/* Duplicated from asm/processor.h */
++#include <asm/current.h>
++#include <linux/config.h>
++
+ addr_limit = 12
+ .text
+@@ -28,7 +32,7 @@
+ .globl __get_user_1
+ __get_user_1:
+       movl %esp,%edx
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 1:    movzbl (%eax),%edx
+@@ -41,7 +45,7 @@
+       addl $1,%eax
+       movl %esp,%edx
+       jc bad_get_user
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 2:    movzwl -1(%eax),%edx
+@@ -54,7 +58,7 @@
+       addl $3,%eax
+       movl %esp,%edx
+       jc bad_get_user
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 3:    movl -3(%eax),%edx
+Index: linux-2.4.21/arch/i386/config.in
+===================================================================
+--- linux-2.4.21.orig/arch/i386/config.in      2004-04-24 02:39:21.000000000 -0400
++++ linux-2.4.21/arch/i386/config.in   2004-04-24 02:42:58.000000000 -0400
+@@ -326,6 +326,29 @@
+ if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
+    define_bool CONFIG_HAVE_DEC_LOCK y
+ fi
++
++choice 'Bigger Stack Size Support' \
++     "off    CONFIG_NOBIGSTACK \
++      16KB   CONFIG_STACK_SIZE_16KB \
++      32KB   CONFIG_STACK_SIZE_32KB \
++      64KB   CONFIG_STACK_SIZE_64KB" off
++
++if [ "$CONFIG_NOBIGSTACK" = "y" ]; then
++   define_int CONFIG_STACK_SIZE_SHIFT 1
++else
++  if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then
++     define_int CONFIG_STACK_SIZE_SHIFT 2
++  else
++    if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then
++      define_int CONFIG_STACK_SIZE_SHIFT 3
++    else
++      if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then
++        define_int CONFIG_STACK_SIZE_SHIFT 4
++      fi
++    fi
++  fi
++fi
++ 
+ endmenu
+ mainmenu_option next_comment
+Index: linux-2.4.21/arch/i386/vmlinux.lds.S
+===================================================================
+--- linux-2.4.21.orig/arch/i386/vmlinux.lds.S  2004-04-24 02:38:06.000000000 -0400
++++ linux-2.4.21/arch/i386/vmlinux.lds.S       2004-04-24 02:42:58.000000000 -0400
+@@ -39,7 +39,8 @@
+   _edata = .;                 /* End of data section */
+-  . = ALIGN(8192);            /* init_task */
++/* chose the biggest of the possible stack sizes here? */
++  . = ALIGN(65536);           /* init_task */
+   .data.init_task : { *(.data.init_task) }
+   . = ALIGN(4096);            /* Init code and data */
+Index: linux-2.4.21/include/asm-i386/current.h
+===================================================================
+--- linux-2.4.21.orig/include/asm-i386/current.h       1998-08-14 19:35:22.000000000 -0400
++++ linux-2.4.21/include/asm-i386/current.h    2004-04-24 02:42:58.000000000 -0400
+@@ -1,15 +1,43 @@
+ #ifndef _I386_CURRENT_H
+ #define _I386_CURRENT_H
++#include <asm/page.h>
++
++/*
++ * Configurable page sizes on i386, mainly for debugging purposes.
++ * (c) Balbir Singh
++ */
++
++#ifdef __ASSEMBLY__
++
++#define PAGE_SIZE      4096    /* as cannot handle 1UL << 12 */
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++
++#define GET_CURRENT(reg) \
++        movl $-THREAD_SIZE, reg; \
++        andl %esp, reg
++
++#else  /* __ASSEMBLY__ */
++
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++#define alloc_task_struct() \
++  ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT))
++
++#define free_task_struct(p) \
++  free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT)
++
++#define INIT_TASK_SIZE THREAD_SIZE
+ struct task_struct;
+ static inline struct task_struct * get_current(void)
+ {
+       struct task_struct *current;
+-      __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
++      __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1)));
+       return current;
+  }
+  
+ #define current get_current()
++#endif /* __ASSEMBLY__ */
++
+ #endif /* !(_I386_CURRENT_H) */
+Index: linux-2.4.21/include/asm-i386/hw_irq.h
+===================================================================
+--- linux-2.4.21.orig/include/asm-i386/hw_irq.h        2004-04-24 02:39:05.000000000 -0400
++++ linux-2.4.21/include/asm-i386/hw_irq.h     2004-04-24 02:42:58.000000000 -0400
+@@ -16,6 +16,7 @@
+ #include <linux/sched.h>
+ #include <asm/atomic.h>
+ #include <asm/irq.h>
++#include <asm/current.h>
+ /*
+  * IDT vectors usable for external interrupt sources start
+@@ -120,10 +121,6 @@
+ #define IRQ_NAME2(nr) nr##_interrupt(void)
+ #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+-#define GET_CURRENT \
+-      "movl %esp, %ebx\n\t" \
+-      "andl $-8192, %ebx\n\t"
+-
+ /*
+  *    SMP has a few special interrupts for IPI messages
+  */
+Index: linux-2.4.21/include/asm-i386/processor.h
+===================================================================
+--- linux-2.4.21.orig/include/asm-i386/processor.h     2004-04-24 02:39:21.000000000 -0400
++++ linux-2.4.21/include/asm-i386/processor.h  2004-04-24 02:42:58.000000000 -0400
+@@ -14,6 +14,7 @@
+ #include <asm/types.h>
+ #include <asm/sigcontext.h>
+ #include <asm/cpufeature.h>
++#include <asm/current.h>
+ #include <linux/cache.h>
+ #include <linux/config.h>
+ #include <linux/threads.h>
+@@ -466,9 +467,6 @@
+ #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+ #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+-#define THREAD_SIZE (2*PAGE_SIZE)
+-#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+-#define free_task_struct(p) free_pages((unsigned long) (p), 1)
+ #define get_task_struct(tsk)      atomic_inc(&virt_to_page(tsk)->count)
+ #define init_task     (init_task_union.task)
+Index: linux-2.4.21/include/linux/sched.h
+===================================================================
+--- linux-2.4.21.orig/include/linux/sched.h    2004-04-24 02:39:20.000000000 -0400
++++ linux-2.4.21/include/linux/sched.h 2004-04-24 02:42:58.000000000 -0400
+@@ -2,6 +2,7 @@
+ #define _LINUX_SCHED_H
+ #include <asm/param.h>        /* for HZ */
++#include <asm/current.h>      /* maybe for INIT_TASK_SIZE */
+ extern unsigned long event;
+Index: linux-2.4.21/include/asm-x86_64/current.h
+===================================================================
+--- linux-2.4.21.orig/include/asm-x86_64/current.h     2002-11-28 18:53:15.000000000 -0500
++++ linux-2.4.21/include/asm-x86_64/current.h  2004-04-24 02:42:58.000000000 -0400
+@@ -5,6 +5,7 @@
+ struct task_struct;
+ #include <asm/pda.h>
++#include <asm/page.h>
+ static inline struct task_struct *get_current(void) 
+ { 
diff --git a/lustre/kernel_patches/patches/dev_read_only-suse-2.4.19.patch b/lustre/kernel_patches/patches/dev_read_only-suse-2.4.19.patch
new file mode 100644 (file)
index 0000000..f2eb39a
--- /dev/null
@@ -0,0 +1,76 @@
+ drivers/block/blkpg.c  |   36 ++++++++++++++++++++++++++++++++++++
+ drivers/block/loop.c   |    3 +++
+ drivers/ide/ide-disk.c |    4 ++++
+ 3 files changed, 43 insertions(+)
+
+Index: linux-2.4.19/drivers/block/blkpg.c
+===================================================================
+--- linux-2.4.19.orig/drivers/block/blkpg.c    2002-08-02 20:39:43.000000000 -0400
++++ linux-2.4.19/drivers/block/blkpg.c 2004-04-23 18:24:40.000000000 -0400
+@@ -296,3 +296,37 @@
+ }
+ EXPORT_SYMBOL(blk_ioctl);
++
++#define NUM_DEV_NO_WRITE 16
++static int dev_no_write[NUM_DEV_NO_WRITE];
++/*
++ * Debug code for turning block devices "read-only" (will discard writes
++ * silently).  This is for filesystem crash/recovery testing.
++ */
++void dev_set_rdonly(kdev_t dev, int no_write)
++{
++      if (dev) {
++              printk(KERN_WARNING "Turning device %s read-only\n",
++                     bdevname(dev));
++              dev_no_write[no_write] = 0xdead0000 + dev;
++      }
++}
++
++int dev_check_rdonly(kdev_t dev) {
++      int i;
++
++      for (i = 0; i < NUM_DEV_NO_WRITE; i++) {
++      if ((dev_no_write[i] & 0xffff0000) == 0xdead0000 &&
++              dev == (dev_no_write[i] & 0xffff))
++              return 1;
++      }
++      return 0;
++}
++
++void dev_clear_rdonly(int no_write) {
++      dev_no_write[no_write] = 0;
++}
++
++EXPORT_SYMBOL(dev_set_rdonly);
++EXPORT_SYMBOL(dev_check_rdonly);
++EXPORT_SYMBOL(dev_clear_rdonly);
+Index: linux-2.4.19/drivers/block/loop.c
+===================================================================
+--- linux-2.4.19.orig/drivers/block/loop.c     2004-04-23 17:53:56.000000000 -0400
++++ linux-2.4.19/drivers/block/loop.c  2004-04-23 18:23:16.000000000 -0400
+@@ -478,6 +478,9 @@
+       spin_unlock_irq(&lo->lo_lock);
+       if (rw == WRITE) {
++              if (dev_check_rdonly(rbh->b_rdev))
++                      goto err;
++
+               if (lo->lo_flags & LO_FLAGS_READ_ONLY)
+                       goto err;
+       } else if (rw == READA) {
+Index: linux-2.4.19/drivers/ide/ide-disk.c
+===================================================================
+--- linux-2.4.19.orig/drivers/ide/ide-disk.c   2004-04-23 17:53:51.000000000 -0400
++++ linux-2.4.19/drivers/ide/ide-disk.c        2004-04-23 18:23:16.000000000 -0400
+@@ -558,6 +558,10 @@
+  */
+ static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+ {
++      if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) {
++              ide_end_request(1, HWGROUP(drive));
++              return ide_stopped;
++      }
+       if (IDE_CONTROL_REG)
+               OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
index feaeec6..769f411 100644 (file)
@@ -4,8 +4,10 @@
  kernel/ksyms.c     |    4 ++++
  4 files changed, 8 insertions(+), 1 deletion(-)
 
---- linux/fs/ext3/Makefile~exports_2.4.20      Wed Apr  9 10:07:14 2003
-+++ linux-mmonroe/fs/ext3/Makefile     Wed Apr  9 10:19:53 2003
+Index: linux-2.4.19/fs/ext3/Makefile
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/Makefile 2004-04-23 17:53:55.000000000 -0400
++++ linux-2.4.19/fs/ext3/Makefile      2004-04-23 18:25:03.000000000 -0400
 @@ -9,6 +9,8 @@
  
  O_TARGET := ext3.o
  obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
                ioctl.o namei.o super.o symlink.o
  obj-m    := $(O_TARGET)
---- linux/fs/ext3/super.c~exports_2.4.20       Wed Apr  9 10:07:14 2003
-+++ linux-mmonroe/fs/ext3/super.c      Wed Apr  9 10:19:53 2003
-@@ -1769,7 +1769,7 @@ static void __exit exit_ext3_fs(void)
-       unregister_filesystem(&ext3_fs_type);
+Index: linux-2.4.19/fs/ext3/super.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/super.c  2004-04-23 17:53:55.000000000 -0400
++++ linux-2.4.19/fs/ext3/super.c       2004-04-23 18:25:03.000000000 -0400
+@@ -1821,7 +1821,7 @@
+       exit_ext3_xattr();
  }
  
 -EXPORT_NO_SYMBOLS;
  
  MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
  MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
---- linux/include/linux/fs.h~exports_2.4.20    Wed Apr  9 10:07:14 2003
-+++ linux-mmonroe/include/linux/fs.h   Wed Apr  9 10:19:53 2003
-@@ -1020,6 +1020,7 @@ extern int unregister_filesystem(struct 
+Index: linux-2.4.19/include/linux/fs.h
+===================================================================
+--- linux-2.4.19.orig/include/linux/fs.h       2004-04-23 17:54:14.000000000 -0400
++++ linux-2.4.19/include/linux/fs.h    2004-04-23 18:25:27.000000000 -0400
+@@ -1183,6 +1183,7 @@
  extern struct vfsmount *kern_mount(struct file_system_type *);
  extern int may_umount(struct vfsmount *);
  extern long do_mount(char *, char *, char *, unsigned long, void *);
 +struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data);
+ extern void umount_tree(struct vfsmount *);
  
  #define kern_umount mntput
---- linux/kernel/ksyms.c~exports_2.4.20        Wed Apr  9 10:07:14 2003
-+++ linux-mmonroe/kernel/ksyms.c       Wed Apr  9 10:19:53 2003
-@@ -308,6 +308,10 @@ EXPORT_SYMBOL(dcache_dir_fsync);
+Index: linux-2.4.19/kernel/ksyms.c
+===================================================================
+--- linux-2.4.19.orig/kernel/ksyms.c   2004-04-23 17:54:14.000000000 -0400
++++ linux-2.4.19/kernel/ksyms.c        2004-04-23 18:25:03.000000000 -0400
+@@ -330,6 +330,10 @@
  EXPORT_SYMBOL(dcache_readdir);
  EXPORT_SYMBOL(dcache_dir_ops);
  
@@ -49,5 +57,3 @@
  /* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */
  EXPORT_SYMBOL(default_llseek);
  EXPORT_SYMBOL(dentry_open);
-
-_
diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-1-suse-2.4.19.patch b/lustre/kernel_patches/patches/ext-2.4-patch-1-suse-2.4.19.patch
new file mode 100644 (file)
index 0000000..e937932
--- /dev/null
@@ -0,0 +1,2551 @@
+ fs/ext3/Makefile           |    2 
+ fs/ext3/dir.c              |  299 +++++++++
+ fs/ext3/file.c             |    3 
+ fs/ext3/hash.c             |  215 ++++++
+ fs/ext3/namei.c            | 1388 ++++++++++++++++++++++++++++++++++++++++-----
+ fs/ext3/super.c            |    7 
+ include/linux/ext3_fs.h    |   85 ++
+ include/linux/ext3_fs_sb.h |    2 
+ include/linux/ext3_jbd.h   |    2 
+ include/linux/rbtree.h     |    2 
+ lib/rbtree.c               |   42 +
+ 11 files changed, 1887 insertions(+), 160 deletions(-)
+
+Index: linux-2.4.19/fs/ext3/Makefile
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/Makefile 2004-04-23 18:25:03.000000000 -0400
++++ linux-2.4.19/fs/ext3/Makefile      2004-04-23 18:26:27.000000000 -0400
+@@ -12,7 +12,7 @@
+ export-objs :=        super.o inode.o
+ obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+-              ioctl.o namei.o super.o symlink.o
++              ioctl.o namei.o super.o symlink.o hash.o
+ obj-m    := $(O_TARGET)
+ obj-$(CONFIG_EXT3_FS_XATTR) += xattr.o
+Index: linux-2.4.19/fs/ext3/dir.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/dir.c    2001-11-09 17:25:04.000000000 -0500
++++ linux-2.4.19/fs/ext3/dir.c 2004-04-23 18:26:27.000000000 -0400
+@@ -21,12 +21,16 @@
+ #include <linux/fs.h>
+ #include <linux/jbd.h>
+ #include <linux/ext3_fs.h>
++#include <linux/slab.h>
++#include <linux/rbtree.h>
+ static unsigned char ext3_filetype_table[] = {
+       DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
+ };
+ static int ext3_readdir(struct file *, void *, filldir_t);
++static int ext3_dx_readdir(struct file * filp,
++                         void * dirent, filldir_t filldir);
+ struct file_operations ext3_dir_operations = {
+       read:           generic_read_dir,
+@@ -35,6 +39,17 @@
+       fsync:          ext3_sync_file,         /* BKL held */
+ };
++
++static unsigned char get_dtype(struct super_block *sb, int filetype)
++{
++      if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) ||
++          (filetype >= EXT3_FT_MAX))
++              return DT_UNKNOWN;
++
++      return (ext3_filetype_table[filetype]);
++}
++                             
++
+ int ext3_check_dir_entry (const char * function, struct inode * dir,
+                         struct ext3_dir_entry_2 * de,
+                         struct buffer_head * bh,
+@@ -79,6 +94,16 @@
+       sb = inode->i_sb;
++      if (is_dx(inode)) {
++              err = ext3_dx_readdir(filp, dirent, filldir);
++              if (err != ERR_BAD_DX_DIR)
++                      return err;
++              /*
++               * We don't set the inode dirty flag since it's not
++               * critical that it get flushed back to the disk.
++               */
++              EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
++      }
+       stored = 0;
+       bh = NULL;
+       offset = filp->f_pos & (sb->s_blocksize - 1);
+@@ -162,18 +187,12 @@
+                                * during the copy operation.
+                                */
+                               unsigned long version = filp->f_version;
+-                              unsigned char d_type = DT_UNKNOWN;
+-                              if (EXT3_HAS_INCOMPAT_FEATURE(sb,
+-                                              EXT3_FEATURE_INCOMPAT_FILETYPE)
+-                                              && de->file_type < EXT3_FT_MAX)
+-                                      d_type =
+-                                        ext3_filetype_table[de->file_type];
+                               error = filldir(dirent, de->name,
+                                               de->name_len,
+                                               filp->f_pos,
+                                               le32_to_cpu(de->inode),
+-                                              d_type);
++                                              get_dtype(sb, de->file_type));
+                               if (error)
+                                       break;
+                               if (version != filp->f_version)
+@@ -188,3 +207,269 @@
+       UPDATE_ATIME(inode);
+       return 0;
+ }
++
++#ifdef CONFIG_EXT3_INDEX
++/*
++ * These functions convert from the major/minor hash to an f_pos
++ * value.
++ * 
++ * Currently we only use major hash numer.  This is unfortunate, but
++ * on 32-bit machines, the same VFS interface is used for lseek and
++ * llseek, so if we use the 64 bit offset, then the 32-bit versions of
++ * lseek/telldir/seekdir will blow out spectacularly, and from within
++ * the ext2 low-level routine, we don't know if we're being called by
++ * a 64-bit version of the system call or the 32-bit version of the
++ * system call.  Worse yet, NFSv2 only allows for a 32-bit readdir
++ * cookie.  Sigh.
++ */
++#define hash2pos(major, minor)        (major >> 1)
++#define pos2maj_hash(pos)     ((pos << 1) & 0xffffffff)
++#define pos2min_hash(pos)     (0)
++
++/*
++ * This structure holds the nodes of the red-black tree used to store
++ * the directory entry in hash order.
++ */
++struct fname {
++      __u32           hash;
++      __u32           minor_hash;
++      rb_node_t       rb_hash; 
++      struct fname    *next;
++      __u32           inode;
++      __u8            name_len;
++      __u8            file_type;
++      char            name[0];
++};
++
++/*
++ * This functoin implements a non-recursive way of freeing all of the
++ * nodes in the red-black tree.
++ */
++static void free_rb_tree_fname(rb_root_t *root)
++{
++      rb_node_t       *n = root->rb_node;
++      rb_node_t       *parent;
++      struct fname    *fname;
++
++      while (n) {
++              /* Do the node's children first */
++              if ((n)->rb_left) {
++                      n = n->rb_left;
++                      continue;
++              }
++              if (n->rb_right) {
++                      n = n->rb_right;
++                      continue;
++              }
++              /*
++               * The node has no children; free it, and then zero
++               * out parent's link to it.  Finally go to the
++               * beginning of the loop and try to free the parent
++               * node.
++               */
++              parent = n->rb_parent;
++              fname = rb_entry(n, struct fname, rb_hash);
++              kfree(fname);
++              if (!parent)
++                      root->rb_node = 0;
++              else if (parent->rb_left == n)
++                      parent->rb_left = 0;
++              else if (parent->rb_right == n)
++                      parent->rb_right = 0;
++              n = parent;
++      }
++      root->rb_node = 0;
++}
++
++
++struct dir_private_info *create_dir_info(loff_t pos)
++{
++      struct dir_private_info *p;
++
++      p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
++      if (!p)
++              return NULL;
++      p->root.rb_node = 0;
++      p->curr_node = 0;
++      p->extra_fname = 0;
++      p->last_pos = 0;
++      p->curr_hash = pos2maj_hash(pos);
++      p->curr_minor_hash = pos2min_hash(pos);
++      p->next_hash = 0;
++      return p;
++}
++
++void ext3_htree_free_dir_info(struct dir_private_info *p)
++{
++      free_rb_tree_fname(&p->root);
++      kfree(p);
++}
++              
++/*
++ * Given a directory entry, enter it into the fname rb tree.
++ */
++void ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
++                           __u32 minor_hash,
++                           struct ext3_dir_entry_2 *dirent)
++{
++      rb_node_t **p, *parent = NULL;
++      struct fname * fname, *new_fn;
++      struct dir_private_info *info;
++      int len;
++
++      info = (struct dir_private_info *) dir_file->private_data;
++      p = &info->root.rb_node;
++
++      /* Create and allocate the fname structure */
++      len = sizeof(struct fname) + dirent->name_len + 1;
++      new_fn = kmalloc(len, GFP_KERNEL);
++      memset(new_fn, 0, len);
++      new_fn->hash = hash;
++      new_fn->minor_hash = minor_hash;
++      new_fn->inode = le32_to_cpu(dirent->inode);
++      new_fn->name_len = dirent->name_len;
++      new_fn->file_type = dirent->file_type;
++      memcpy(new_fn->name, dirent->name, dirent->name_len);
++      new_fn->name[dirent->name_len] = 0;
++      
++      while (*p) {
++              parent = *p;
++              fname = rb_entry(parent, struct fname, rb_hash);
++
++              /*
++               * If the hash and minor hash match up, then we put
++               * them on a linked list.  This rarely happens...
++               */
++              if ((new_fn->hash == fname->hash) &&
++                  (new_fn->minor_hash == fname->minor_hash)) {
++                      new_fn->next = fname->next;
++                      fname->next = new_fn;
++                      return;
++              }
++                      
++              if (new_fn->hash < fname->hash)
++                      p = &(*p)->rb_left;
++              else if (new_fn->hash > fname->hash)
++                      p = &(*p)->rb_right;
++              else if (new_fn->minor_hash < fname->minor_hash)
++                      p = &(*p)->rb_left;
++              else /* if (new_fn->minor_hash > fname->minor_hash) */
++                      p = &(*p)->rb_right;
++      }
++
++      rb_link_node(&new_fn->rb_hash, parent, p);
++      rb_insert_color(&new_fn->rb_hash, &info->root);
++}
++
++
++
++/*
++ * This is a helper function for ext3_dx_readdir.  It calls filldir
++ * for all entres on the fname linked list.  (Normally there is only
++ * one entry on the linked list, unless there are 62 bit hash collisions.)
++ */
++static int call_filldir(struct file * filp, void * dirent,
++                      filldir_t filldir, struct fname *fname)
++{
++      struct dir_private_info *info = filp->private_data;
++      loff_t  curr_pos;
++      struct inode *inode = filp->f_dentry->d_inode;
++      struct super_block * sb;
++      int error;
++
++      sb = inode->i_sb;
++      
++      if (!fname) {
++              printk("call_filldir: called with null fname?!?\n");
++              return 0;
++      }
++      curr_pos = hash2pos(fname->hash, fname->minor_hash);
++      while (fname) {
++              error = filldir(dirent, fname->name,
++                              fname->name_len, curr_pos, 
++                              fname->inode,
++                              get_dtype(sb, fname->file_type));
++              if (error) {
++                      filp->f_pos = curr_pos;
++                      info->extra_fname = fname->next;
++                      return error;
++              }
++              fname = fname->next;
++      }
++      return 0;
++}
++
++static int ext3_dx_readdir(struct file * filp,
++                       void * dirent, filldir_t filldir)
++{
++      struct dir_private_info *info = filp->private_data;
++      struct inode *inode = filp->f_dentry->d_inode;
++      struct fname *fname;
++      int     ret;
++
++      if (!info) {
++              info = create_dir_info(filp->f_pos);
++              if (!info)
++                      return -ENOMEM;
++              filp->private_data = info;
++      }
++
++      /* Some one has messed with f_pos; reset the world */
++      if (info->last_pos != filp->f_pos) {
++              free_rb_tree_fname(&info->root);
++              info->curr_node = 0;
++              info->extra_fname = 0;
++              info->curr_hash = pos2maj_hash(filp->f_pos);
++              info->curr_minor_hash = pos2min_hash(filp->f_pos);
++      }
++
++      /*
++       * If there are any leftover names on the hash collision
++       * chain, return them first.
++       */
++      if (info->extra_fname &&
++          call_filldir(filp, dirent, filldir, info->extra_fname))
++              goto finished;
++
++      if (!info->curr_node)
++              info->curr_node = rb_get_first(&info->root);
++
++      while (1) {
++              /*
++               * Fill the rbtree if we have no more entries,
++               * or the inode has changed since we last read in the
++               * cached entries. 
++               */
++              if ((!info->curr_node) ||
++                  (filp->f_version != inode->i_version)) {
++                      info->curr_node = 0;
++                      free_rb_tree_fname(&info->root);
++                      filp->f_version = inode->i_version;
++                      ret = ext3_htree_fill_tree(filp, info->curr_hash,
++                                                 info->curr_minor_hash,
++                                                 &info->next_hash);
++                      if (ret < 0)
++                              return ret;
++                      if (ret == 0)
++                              break;
++                      info->curr_node = rb_get_first(&info->root);
++              }
++
++              fname = rb_entry(info->curr_node, struct fname, rb_hash);
++              info->curr_hash = fname->hash;
++              info->curr_minor_hash = fname->minor_hash;
++              if (call_filldir(filp, dirent, filldir, fname))
++                      break;
++
++              info->curr_node = rb_get_next(info->curr_node);
++              if (!info->curr_node) {
++                      info->curr_hash = info->next_hash;
++                      info->curr_minor_hash = 0;
++              }
++      }
++finished:
++      info->last_pos = filp->f_pos;
++      UPDATE_ATIME(inode);
++      return 0;
++}
++#endif
+Index: linux-2.4.19/fs/ext3/file.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/file.c   2004-04-23 17:54:02.000000000 -0400
++++ linux-2.4.19/fs/ext3/file.c        2004-04-23 18:26:27.000000000 -0400
+@@ -38,6 +38,9 @@
+ {
+       if (filp->f_mode & FMODE_WRITE)
+               ext3_discard_prealloc (inode);
++      if (is_dx(inode) && filp->private_data)
++              ext3_htree_free_dir_info(filp->private_data);
++
+       return 0;
+ }
+Index: linux-2.4.19/fs/ext3/hash.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/hash.c   2003-01-30 05:24:37.000000000 -0500
++++ linux-2.4.19/fs/ext3/hash.c        2004-04-23 18:26:27.000000000 -0400
+@@ -0,0 +1,215 @@
++/*
++ *  linux/fs/ext3/hash.c
++ *
++ * Copyright (C) 2002 by Theodore Ts'o
++ *
++ * This file is released under the GPL v2.
++ * 
++ * This file may be redistributed under the terms of the GNU Public
++ * License.
++ */
++
++#include <linux/fs.h>
++#include <linux/jbd.h>
++#include <linux/sched.h>
++#include <linux/ext3_fs.h>
++
++#define DELTA 0x9E3779B9
++
++static void TEA_transform(__u32 buf[4], __u32 const in[])
++{
++      __u32   sum = 0;
++      __u32   b0 = buf[0], b1 = buf[1];
++      __u32   a = in[0], b = in[1], c = in[2], d = in[3];
++      int     n = 16;
++
++      do {                                                    
++              sum += DELTA;                                   
++              b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); 
++              b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); 
++      } while(--n);
++
++      buf[0] += b0;
++      buf[1] += b1;
++}
++
++/* F, G and H are basic MD4 functions: selection, majority, parity */
++#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
++#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z)))
++#define H(x, y, z) ((x) ^ (y) ^ (z))
++
++/*
++ * The generic round function.  The application is so specific that
++ * we don't bother protecting all the arguments with parens, as is generally
++ * good macro practice, in favor of extra legibility.
++ * Rotation is separate from addition to prevent recomputation
++ */
++#define ROUND(f, a, b, c, d, x, s)    \
++      (a += f(b, c, d) + x, a = (a << s) | (a >> (32-s)))
++#define K1 0
++#define K2 013240474631UL
++#define K3 015666365641UL
++
++/*
++ * Basic cut-down MD4 transform.  Returns only 32 bits of result.
++ */
++static void halfMD4Transform (__u32 buf[4], __u32 const in[])
++{
++      __u32   a = buf[0], b = buf[1], c = buf[2], d = buf[3];
++
++      /* Round 1 */
++      ROUND(F, a, b, c, d, in[0] + K1,  3);
++      ROUND(F, d, a, b, c, in[1] + K1,  7);
++      ROUND(F, c, d, a, b, in[2] + K1, 11);
++      ROUND(F, b, c, d, a, in[3] + K1, 19);
++      ROUND(F, a, b, c, d, in[4] + K1,  3);
++      ROUND(F, d, a, b, c, in[5] + K1,  7);
++      ROUND(F, c, d, a, b, in[6] + K1, 11);
++      ROUND(F, b, c, d, a, in[7] + K1, 19);
++
++      /* Round 2 */
++      ROUND(G, a, b, c, d, in[1] + K2,  3);
++      ROUND(G, d, a, b, c, in[3] + K2,  5);
++      ROUND(G, c, d, a, b, in[5] + K2,  9);
++      ROUND(G, b, c, d, a, in[7] + K2, 13);
++      ROUND(G, a, b, c, d, in[0] + K2,  3);
++      ROUND(G, d, a, b, c, in[2] + K2,  5);
++      ROUND(G, c, d, a, b, in[4] + K2,  9);
++      ROUND(G, b, c, d, a, in[6] + K2, 13);
++
++      /* Round 3 */
++      ROUND(H, a, b, c, d, in[3] + K3,  3);
++      ROUND(H, d, a, b, c, in[7] + K3,  9);
++      ROUND(H, c, d, a, b, in[2] + K3, 11);
++      ROUND(H, b, c, d, a, in[6] + K3, 15);
++      ROUND(H, a, b, c, d, in[1] + K3,  3);
++      ROUND(H, d, a, b, c, in[5] + K3,  9);
++      ROUND(H, c, d, a, b, in[0] + K3, 11);
++      ROUND(H, b, c, d, a, in[4] + K3, 15);
++
++      buf[0] += a;
++      buf[1] += b;
++      buf[2] += c;
++      buf[3] += d;
++}
++
++#undef ROUND
++#undef F
++#undef G
++#undef H
++#undef K1
++#undef K2
++#undef K3
++
++/* The old legacy hash */
++static __u32 dx_hack_hash (const char *name, int len)
++{
++      __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
++      while (len--) {
++              __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
++              
++              if (hash & 0x80000000) hash -= 0x7fffffff;
++              hash1 = hash0;
++              hash0 = hash;
++      }
++      return (hash0 << 1);
++}
++
++static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
++{
++      __u32   pad, val;
++      int     i;
++
++      pad = (__u32)len | ((__u32)len << 8);
++      pad |= pad << 16;
++
++      val = pad;
++      if (len > num*4)
++              len = num * 4;
++      for (i=0; i < len; i++) {
++              if ((i % 4) == 0)
++                      val = pad;
++              val = msg[i] + (val << 8);
++              if ((i % 4) == 3) {
++                      *buf++ = val;
++                      val = pad;
++                      num--;
++              }
++      }
++      if (--num >= 0)
++              *buf++ = val;
++      while (--num >= 0)
++              *buf++ = pad;
++}
++
++/*
++ * Returns the hash of a filename.  If len is 0 and name is NULL, then
++ * this function can be used to test whether or not a hash version is
++ * supported.
++ * 
++ * The seed is an 4 longword (32 bits) "secret" which can be used to
++ * uniquify a hash.  If the seed is all zero's, then some default seed
++ * may be used.
++ * 
++ * A particular hash version specifies whether or not the seed is
++ * represented, and whether or not the returned hash is 32 bits or 64
++ * bits.  32 bit hashes will return 0 for the minor hash.
++ */
++int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
++{
++      __u32   hash;
++      __u32   minor_hash = 0;
++      const char      *p;
++      int             i;
++      __u32           in[8], buf[4];
++
++      /* Initialize the default seed for the hash checksum functions */
++      buf[0] = 0x67452301;
++      buf[1] = 0xefcdab89;
++      buf[2] = 0x98badcfe;
++      buf[3] = 0x10325476;
++
++      /* Check to see if the seed is all zero's */
++      if (hinfo->seed) {
++              for (i=0; i < 4; i++) {
++                      if (hinfo->seed[i])
++                              break;
++              }
++              if (i < 4)
++                      memcpy(buf, hinfo->seed, sizeof(buf));
++      }
++              
++      switch (hinfo->hash_version) {
++      case DX_HASH_LEGACY:
++              hash = dx_hack_hash(name, len);
++              break;
++      case DX_HASH_HALF_MD4:
++              p = name;
++              while (len > 0) {
++                      str2hashbuf(p, len, in, 8);
++                      halfMD4Transform(buf, in);
++                      len -= 32;
++                      p += 32;
++              }
++              minor_hash = buf[2];
++              hash = buf[1];
++              break;
++      case DX_HASH_TEA:
++              p = name;
++              while (len > 0) {
++                      str2hashbuf(p, len, in, 4);
++                      TEA_transform(buf, in);
++                      len -= 16;
++                      p += 16;
++              }
++              hash = buf[0];
++              minor_hash = buf[1];
++              break;
++      default:
++              hinfo->hash = 0;
++              return -1;
++      }
++      hinfo->hash = hash & ~1;
++      hinfo->minor_hash = minor_hash;
++      return 0;
++}
+Index: linux-2.4.19/fs/ext3/namei.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/namei.c  2004-04-23 17:53:55.000000000 -0400
++++ linux-2.4.19/fs/ext3/namei.c       2004-04-23 22:24:05.000000000 -0400
+@@ -16,6 +16,12 @@
+  *        David S. Miller (davem@caip.rutgers.edu), 1995
+  *  Directory entry file type support and forward compatibility hooks
+  *    for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
++ *  Hash Tree Directory indexing (c)
++ *    Daniel Phillips, 2001
++ *  Hash Tree Directory indexing porting
++ *    Christopher Li, 2002
++ *  Hash Tree Directory indexing cleanup
++ *    Theodore Ts'o, 2002
+  */
+ #include <linux/fs.h>
+@@ -40,6 +46,630 @@
+ #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
+ #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
++static struct buffer_head *ext3_append(handle_t *handle,
++                                      struct inode *inode,
++                                      u32 *block, int *err)
++{
++      struct buffer_head *bh;
++
++      *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
++
++      if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
++              inode->i_size += inode->i_sb->s_blocksize;
++              EXT3_I(inode)->i_disksize = inode->i_size;
++              ext3_journal_get_write_access(handle,bh);
++      }
++      return bh;
++}
++
++#ifndef assert
++#define assert(test) J_ASSERT(test)
++#endif
++
++#ifndef swap
++#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
++#endif
++
++typedef struct { u32 v; } le_u32;
++typedef struct { u16 v; } le_u16;
++
++#ifdef DX_DEBUG
++#define dxtrace(command) command
++#else
++#define dxtrace(command) 
++#endif
++
++struct fake_dirent
++{
++      /*le*/u32 inode;
++      /*le*/u16 rec_len;
++      u8 name_len;
++      u8 file_type;
++};
++
++struct dx_countlimit
++{
++      le_u16 limit;
++      le_u16 count;
++};
++
++struct dx_entry
++{
++      le_u32 hash;
++      le_u32 block;
++};
++
++/*
++ * dx_root_info is laid out so that if it should somehow get overlaid by a
++ * dirent the two low bits of the hash version will be zero.  Therefore, the
++ * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
++ */
++
++struct dx_root
++{
++      struct fake_dirent dot;
++      char dot_name[4];
++      struct fake_dirent dotdot;
++      char dotdot_name[4];
++      struct dx_root_info
++      {
++              le_u32 reserved_zero;
++              u8 hash_version;
++              u8 info_length; /* 8 */
++              u8 indirect_levels;
++              u8 unused_flags;
++      }
++      info;
++      struct dx_entry entries[0];
++};
++
++struct dx_node
++{
++      struct fake_dirent fake;
++      struct dx_entry entries[0];
++};
++
++
++struct dx_frame
++{
++      struct buffer_head *bh;
++      struct dx_entry *entries;
++      struct dx_entry *at;
++};
++
++struct dx_map_entry
++{
++      u32 hash;
++      u32 offs;
++};
++
++#ifdef CONFIG_EXT3_INDEX
++static inline unsigned dx_get_block (struct dx_entry *entry);
++static void dx_set_block (struct dx_entry *entry, unsigned value);
++static inline unsigned dx_get_hash (struct dx_entry *entry);
++static void dx_set_hash (struct dx_entry *entry, unsigned value);
++static unsigned dx_get_count (struct dx_entry *entries);
++static unsigned dx_get_limit (struct dx_entry *entries);
++static void dx_set_count (struct dx_entry *entries, unsigned value);
++static void dx_set_limit (struct dx_entry *entries, unsigned value);
++static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
++static unsigned dx_node_limit (struct inode *dir);
++static struct dx_frame *dx_probe(struct dentry *dentry,
++                               struct inode *dir,
++                               struct dx_hash_info *hinfo,
++                               struct dx_frame *frame,
++                               int *err);
++static void dx_release (struct dx_frame *frames);
++static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
++                      struct dx_hash_info *hinfo, struct dx_map_entry map[]);
++static void dx_sort_map(struct dx_map_entry *map, unsigned count);
++static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
++              struct dx_map_entry *offsets, int count);
++static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
++static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
++static int ext3_htree_next_block(struct inode *dir, __u32 hash,
++                               struct dx_frame *frame,
++                               struct dx_frame *frames, int *err,
++                               __u32 *start_hash);
++static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
++                     struct ext3_dir_entry_2 **res_dir, int *err);
++static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
++                           struct inode *inode);
++
++/*
++ * Future: use high four bits of block for coalesce-on-delete flags
++ * Mask them off for now.
++ */
++
++static inline unsigned dx_get_block (struct dx_entry *entry)
++{
++      return le32_to_cpu(entry->block.v) & 0x00ffffff;
++}
++
++static inline void dx_set_block (struct dx_entry *entry, unsigned value)
++{
++      entry->block.v = cpu_to_le32(value);
++}
++
++static inline unsigned dx_get_hash (struct dx_entry *entry)
++{
++      return le32_to_cpu(entry->hash.v);
++}
++
++static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
++{
++      entry->hash.v = cpu_to_le32(value);
++}
++
++static inline unsigned dx_get_count (struct dx_entry *entries)
++{
++      return le16_to_cpu(((struct dx_countlimit *) entries)->count.v);
++}
++
++static inline unsigned dx_get_limit (struct dx_entry *entries)
++{
++      return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v);
++}
++
++static inline void dx_set_count (struct dx_entry *entries, unsigned value)
++{
++      ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value);
++}
++
++static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
++{
++      ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value);
++}
++
++static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
++{
++      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
++              EXT3_DIR_REC_LEN(2) - infosize;
++      return 0? 20: entry_space / sizeof(struct dx_entry);
++}
++
++static inline unsigned dx_node_limit (struct inode *dir)
++{
++      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
++      return 0? 22: entry_space / sizeof(struct dx_entry);
++}
++
++/*
++ * Debug
++ */
++#ifdef DX_DEBUG
++struct stats
++{ 
++      unsigned names;
++      unsigned space;
++      unsigned bcount;
++};
++
++static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
++                               int size, int show_names)
++{
++      unsigned names = 0, space = 0;
++      char *base = (char *) de;
++      struct dx_hash_info h = *hinfo;
++      
++      printk("names: ");
++      while ((char *) de < base + size)
++      {
++              if (de->inode)
++              {
++                      if (show_names)
++                      {
++                              int len = de->name_len;
++                              char *name = de->name;
++                              while (len--) printk("%c", *name++);
++                              ext3fs_dirhash(de->name, de->name_len, &h);
++                              printk(":%x.%u ", h.hash,
++                                     ((char *) de - base));
++                      }
++                      space += EXT3_DIR_REC_LEN(de->name_len);
++                      names++;
++              }
++              de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
++      }
++      printk("(%i)\n", names);
++      return (struct stats) { names, space, 1 };
++}
++
++struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
++                           struct dx_entry *entries, int levels)
++{
++      unsigned blocksize = dir->i_sb->s_blocksize;
++      unsigned count = dx_get_count (entries), names = 0, space = 0, i;
++      unsigned bcount = 0;
++      struct buffer_head *bh;
++      int err;
++      printk("%i indexed blocks...\n", count);
++      for (i = 0; i < count; i++, entries++)
++      {
++              u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0;
++              u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
++              struct stats stats;
++              printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
++              if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
++              stats = levels?
++                 dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
++                 dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
++              names += stats.names;
++              space += stats.space;
++              bcount += stats.bcount;
++              brelse (bh);
++      }
++      if (bcount)
++              printk("%snames %u, fullness %u (%u%%)\n", levels?"":"   ",
++                      names, space/bcount,(space/bcount)*100/blocksize);
++      return (struct stats) { names, space, bcount};
++}
++#endif /* DX_DEBUG */
++
++/*
++ * Probe for a directory leaf block to search.
++ *
++ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
++ * error in the directory index, and the caller should fall back to
++ * searching the directory normally.  The callers of dx_probe **MUST**
++ * check for this error code, and make sure it never gets reflected
++ * back to userspace.
++ */
++static struct dx_frame *
++dx_probe(struct dentry *dentry, struct inode *dir,
++       struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
++{
++      unsigned count, indirect;
++      struct dx_entry *at, *entries, *p, *q, *m;
++      struct dx_root *root;
++      struct buffer_head *bh;
++      struct dx_frame *frame = frame_in;
++      u32 hash;
++
++      frame->bh = NULL;
++      if (dentry)
++              dir = dentry->d_parent->d_inode;
++      if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
++              goto fail;
++      root = (struct dx_root *) bh->b_data;
++      if (root->info.hash_version != DX_HASH_TEA &&
++          root->info.hash_version != DX_HASH_HALF_MD4 &&
++          root->info.hash_version != DX_HASH_LEGACY) {
++              ext3_warning(dir->i_sb, __FUNCTION__,
++                           "Unrecognised inode hash code %d",
++                           root->info.hash_version);
++              brelse(bh);
++              *err = ERR_BAD_DX_DIR;
++              goto fail;
++      }
++      hinfo->hash_version = root->info.hash_version;
++      hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed;
++      if (dentry)
++              ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
++      hash = hinfo->hash;
++
++      if (root->info.unused_flags & 1) {
++              ext3_warning(dir->i_sb, __FUNCTION__,
++                           "Unimplemented inode hash flags: %#06x",
++                           root->info.unused_flags);
++              brelse(bh);
++              *err = ERR_BAD_DX_DIR;
++              goto fail;
++      }
++
++      if ((indirect = root->info.indirect_levels) > 1) {
++              ext3_warning(dir->i_sb, __FUNCTION__,
++                           "Unimplemented inode hash depth: %#06x",
++                           root->info.indirect_levels);
++              brelse(bh);
++              *err = ERR_BAD_DX_DIR;
++              goto fail;
++      }
++
++      entries = (struct dx_entry *) (((char *)&root->info) +
++                                     root->info.info_length);
++      assert(dx_get_limit(entries) == dx_root_limit(dir,
++                                                    root->info.info_length));
++      dxtrace (printk("Look up %x", hash));
++      while (1)
++      {
++              count = dx_get_count(entries);
++              assert (count && count <= dx_get_limit(entries));
++              p = entries + 1;
++              q = entries + count - 1;
++              while (p <= q)
++              {
++                      m = p + (q - p)/2;
++                      dxtrace(printk("."));
++                      if (dx_get_hash(m) > hash)
++                              q = m - 1;
++                      else
++                              p = m + 1;
++              }
++
++              if (0) // linear search cross check
++              {
++                      unsigned n = count - 1;
++                      at = entries;
++                      while (n--)
++                      {
++                              dxtrace(printk(","));
++                              if (dx_get_hash(++at) > hash)
++                              {
++                                      at--;
++                                      break;
++                              }
++                      }
++                      assert (at == p - 1);
++              }
++
++              at = p - 1;
++              dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
++              frame->bh = bh;
++              frame->entries = entries;
++              frame->at = at;
++              if (!indirect--) return frame;
++              if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
++                      goto fail2;
++              at = entries = ((struct dx_node *) bh->b_data)->entries;
++              assert (dx_get_limit(entries) == dx_node_limit (dir));
++              frame++;
++      }
++fail2:
++      while (frame >= frame_in) {
++              brelse(frame->bh);
++              frame--;
++      }
++fail:
++      return NULL;
++}
++
++static void dx_release (struct dx_frame *frames)
++{
++      if (frames[0].bh == NULL)
++              return;
++
++      if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
++              brelse(frames[1].bh);
++      brelse(frames[0].bh);
++}
++
++/*
++ * This function increments the frame pointer to search the next leaf
++ * block, and reads in the necessary intervening nodes if the search
++ * should be necessary.  Whether or not the search is necessary is
++ * controlled by the hash parameter.  If the hash value is even, then
++ * the search is only continued if the next block starts with that
++ * hash value.  This is used if we are searching for a specific file.
++ *
++ * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
++ *
++ * This function returns 1 if the caller should continue to search,
++ * or 0 if it should not.  If there is an error reading one of the
++ * index blocks, it will return -1.
++ *
++ * If start_hash is non-null, it will be filled in with the starting
++ * hash of the next page.
++ */
++static int ext3_htree_next_block(struct inode *dir, __u32 hash,
++                               struct dx_frame *frame,
++                               struct dx_frame *frames, int *err,
++                               __u32 *start_hash)
++{
++      struct dx_frame *p;
++      struct buffer_head *bh;
++      int num_frames = 0;
++      __u32 bhash;
++
++      *err = ENOENT;
++      p = frame;
++      /*
++       * Find the next leaf page by incrementing the frame pointer.
++       * If we run out of entries in the interior node, loop around and
++       * increment pointer in the parent node.  When we break out of
++       * this loop, num_frames indicates the number of interior
++       * nodes need to be read.
++       */
++      while (1) {
++              if (++(p->at) < p->entries + dx_get_count(p->entries))
++                      break;
++              if (p == frames)
++                      return 0;
++              num_frames++;
++              p--;
++      }
++
++      /*
++       * If the hash is 1, then continue only if the next page has a
++       * continuation hash of any value.  This is used for readdir
++       * handling.  Otherwise, check to see if the hash matches the
++       * desired contiuation hash.  If it doesn't, return since
++       * there's no point to read in the successive index pages.
++       */
++      bhash = dx_get_hash(p->at);
++      if (start_hash)
++              *start_hash = bhash;
++      if ((hash & 1) == 0) {
++              if ((bhash & ~1) != hash)
++                      return 0;
++      }
++      /*
++       * If the hash is HASH_NB_ALWAYS, we always go to the next
++       * block so no check is necessary
++       */
++      while (num_frames--) {
++              if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
++                                    0, err)))
++                      return -1; /* Failure */
++              p++;
++              brelse (p->bh);
++              p->bh = bh;
++              p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
++      }
++      return 1;
++}
++
++
++/*
++ * p is at least 6 bytes before the end of page
++ */
++static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p)
++{
++      return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
++}
++
++/*
++ * This function fills a red-black tree with information from a
++ * directory.  We start scanning the directory in hash order, starting
++ * at start_hash and start_minor_hash.
++ *
++ * This function returns the number of entries inserted into the tree,
++ * or a negative error code.
++ */
++int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
++                       __u32 start_minor_hash, __u32 *next_hash)
++{
++      struct dx_hash_info hinfo;
++      struct buffer_head *bh;
++      struct ext3_dir_entry_2 *de, *top;
++      static struct dx_frame frames[2], *frame;
++      struct inode *dir;
++      int block, err;
++      int count = 0;
++      int ret;
++      __u32 hashval;
++      
++      dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
++                     start_minor_hash));
++      dir = dir_file->f_dentry->d_inode;
++      hinfo.hash = start_hash;
++      hinfo.minor_hash = 0;
++      frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
++      if (!frame)
++              return err;
++
++      while (1) {
++              block = dx_get_block(frame->at);
++              dxtrace(printk("Reading block %d\n", block));
++              if (!(bh = ext3_bread (NULL, dir, block, 0, &err)))
++                      goto errout;
++      
++              de = (struct ext3_dir_entry_2 *) bh->b_data;
++              top = (struct ext3_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize -
++                                     EXT3_DIR_REC_LEN(0));
++              for (; de < top; de = ext3_next_entry(de)) {
++                      ext3fs_dirhash(de->name, de->name_len, &hinfo);
++                      if ((hinfo.hash < start_hash) ||
++                          ((hinfo.hash == start_hash) &&
++                           (hinfo.minor_hash < start_minor_hash)))
++                              continue;
++                      ext3_htree_store_dirent(dir_file, hinfo.hash,
++                                              hinfo.minor_hash, de);
++                      count++;
++              }
++              brelse (bh);
++              hashval = ~1;
++              ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, 
++                                          frame, frames, &err, &hashval);
++              if (next_hash)
++                      *next_hash = hashval;
++              if (ret == -1)
++                      goto errout;
++              /*
++               * Stop if:  (a) there are no more entries, or
++               * (b) we have inserted at least one entry and the
++               * next hash value is not a continuation
++               */
++              if ((ret == 0) ||
++                  (count && ((hashval & 1) == 0)))
++                      break;
++      }
++      dx_release(frames);
++      dxtrace(printk("Fill tree: returned %d entries\n", count));
++      return count;
++errout:
++      dx_release(frames);
++      return (err);
++}
++
++
++/*
++ * Directory block splitting, compacting
++ */
++
++static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
++                      struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
++{
++      int count = 0;
++      char *base = (char *) de;
++      struct dx_hash_info h = *hinfo;
++      
++      while ((char *) de < base + size)
++      {
++              if (de->name_len && de->inode) {
++                      ext3fs_dirhash(de->name, de->name_len, &h);
++                      map_tail--;
++                      map_tail->hash = h.hash;
++                      map_tail->offs = (u32) ((char *) de - base);
++                      count++;
++              }
++              /* XXX: do we need to check rec_len == 0 case? -Chris */
++              de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
++      }
++      return count;
++}
++
++static void dx_sort_map (struct dx_map_entry *map, unsigned count)
++{
++      struct dx_map_entry *p, *q, *top = map + count - 1;
++      int more;
++      /* Combsort until bubble sort doesn't suck */
++      while (count > 2)
++      {
++              count = count*10/13;
++              if (count - 9 < 2) /* 9, 10 -> 11 */
++                      count = 11;
++              for (p = top, q = p - count; q >= map; p--, q--)
++                      if (p->hash < q->hash)
++                              swap(*p, *q);
++      }
++      /* Garden variety bubble sort */
++      do {
++              more = 0;
++              q = top;
++              while (q-- > map)
++              {
++                      if (q[1].hash >= q[0].hash)
++                              continue;
++                      swap(*(q+1), *q);
++                      more = 1;
++              }
++      } while(more);
++}
++
++static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
++{
++      struct dx_entry *entries = frame->entries;
++      struct dx_entry *old = frame->at, *new = old + 1;
++      int count = dx_get_count(entries);
++
++      assert(count < dx_get_limit(entries));
++      assert(old < entries + count);
++      memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
++      dx_set_hash(new, hash);
++      dx_set_block(new, block);
++      dx_set_count(entries, count + 1);
++}
++#endif
++
++
++static void ext3_update_dx_flag(struct inode *inode)
++{
++      if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
++                                   EXT3_FEATURE_COMPAT_DIR_INDEX))
++              EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
++}
++
+ /*
+  * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
+  *
+@@ -96,6 +726,7 @@
+       return 0;
+ }
++
+ /*
+  *    ext3_find_entry()
+  *
+@@ -107,6 +738,8 @@
+  * The returned buffer_head has ->b_count elevated.  The caller is expected
+  * to brelse() it when appropriate.
+  */
++
++      
+ static struct buffer_head * ext3_find_entry (struct dentry *dentry,
+                                       struct ext3_dir_entry_2 ** res_dir)
+ {
+@@ -121,12 +754,32 @@
+       int num = 0;
+       int nblocks, i, err;
+       struct inode *dir = dentry->d_parent->d_inode;
++      int namelen;
++      const u8 *name;
++      unsigned blocksize;
+       *res_dir = NULL;
+       sb = dir->i_sb;
+-
++      blocksize = sb->s_blocksize;
++      namelen = dentry->d_name.len;
++      name = dentry->d_name.name;
++      if (namelen > EXT3_NAME_LEN)
++              return NULL;
++#ifdef CONFIG_EXT3_INDEX
++      if (is_dx(dir)) {
++              bh = ext3_dx_find_entry(dentry, res_dir, &err);
++              /*
++               * On success, or if the error was file not found,
++               * return.  Otherwise, fall back to doing a search the
++               * old fashioned way.
++               */
++              if (bh || (err != ERR_BAD_DX_DIR))
++                      return bh;
++              dxtrace(printk("ext3_find_entry: dx failed, falling back\n"));
++      }
++#endif
+       nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
+-      start = dir->u.ext3_i.i_dir_start_lookup;
++      start = EXT3_I(dir)->i_dir_start_lookup;
+       if (start >= nblocks)
+               start = 0;
+       block = start;
+@@ -167,7 +820,7 @@
+               i = search_dirblock(bh, dir, dentry,
+                           block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
+               if (i == 1) {
+-                      dir->u.ext3_i.i_dir_start_lookup = block;
++                      EXT3_I(dir)->i_dir_start_lookup = block;
+                       ret = bh;
+                       goto cleanup_and_exit;
+               } else {
+@@ -198,6 +851,66 @@
+       return ret;
+ }
++#ifdef CONFIG_EXT3_INDEX
++static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
++                     struct ext3_dir_entry_2 **res_dir, int *err)
++{
++      struct super_block * sb;
++      struct dx_hash_info     hinfo;
++      u32 hash;
++      struct dx_frame frames[2], *frame;
++      struct ext3_dir_entry_2 *de, *top;
++      struct buffer_head *bh;
++      unsigned long block;
++      int retval;
++      int namelen = dentry->d_name.len;
++      const u8 *name = dentry->d_name.name;
++      struct inode *dir = dentry->d_parent->d_inode;
++      
++      sb = dir->i_sb;
++      if (!(frame = dx_probe (dentry, 0, &hinfo, frames, err)))
++              return NULL;
++      hash = hinfo.hash;
++      do {
++              block = dx_get_block(frame->at);
++              if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
++                      goto errout;
++              de = (struct ext3_dir_entry_2 *) bh->b_data;
++              top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
++                                     EXT3_DIR_REC_LEN(0));
++              for (; de < top; de = ext3_next_entry(de))
++              if (ext3_match (namelen, name, de)) {
++                      if (!ext3_check_dir_entry("ext3_find_entry",
++                                                dir, de, bh,
++                                (block<<EXT3_BLOCK_SIZE_BITS(sb))
++                                        +((char *)de - bh->b_data))) {
++                              brelse (bh);
++                              goto errout;
++                      }
++                      *res_dir = de;
++                      dx_release (frames);
++                      return bh;
++              }
++              brelse (bh);
++              /* Check to see if we should continue to search */
++              retval = ext3_htree_next_block(dir, hash, frame,
++                                             frames, err, 0);
++              if (retval == -1) {
++                      ext3_warning(sb, __FUNCTION__,
++                           "error reading index page in directory #%lu",
++                           dir->i_ino);
++                      goto errout;
++              }
++      } while (retval == 1);
++      
++      *err = -ENOENT;
++errout:
++      dxtrace(printk("%s not found\n", name));
++      dx_release (frames);
++      return NULL;
++}
++#endif
++
+ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry)
+ {
+       struct inode * inode;
+@@ -214,8 +927,9 @@
+               brelse (bh);
+               inode = iget(dir->i_sb, ino);
+-              if (!inode)
++              if (!inode) {
+                       return ERR_PTR(-EACCES);
++              }
+       }
+       d_add(dentry, inode);
+       return NULL;
+@@ -239,6 +953,301 @@
+               de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
+ }
++#ifdef CONFIG_EXT3_INDEX
++static struct ext3_dir_entry_2 *
++dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
++{
++      unsigned rec_len = 0;
++
++      while (count--) {
++              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
++              rec_len = EXT3_DIR_REC_LEN(de->name_len);
++              memcpy (to, de, rec_len);
++              ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++              de->inode = 0;
++              map++;
++              to += rec_len;
++      }
++      return (struct ext3_dir_entry_2 *) (to - rec_len);
++}
++
++static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
++{
++      struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
++      unsigned rec_len = 0;
++
++      prev = to = de;
++      while ((char*)de < base + size) {
++              next = (struct ext3_dir_entry_2 *) ((char *) de +
++                                                  le16_to_cpu(de->rec_len));
++              if (de->inode && de->name_len) {
++                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
++                      if (de > to)
++                              memmove(to, de, rec_len);
++                      to->rec_len = rec_len;
++                      prev = to;
++                      to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++              }
++              de = next;
++      }
++      return prev;
++}
++
++static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
++                      struct buffer_head **bh,struct dx_frame *frame,
++                      struct dx_hash_info *hinfo, int *error)
++{
++      unsigned blocksize = dir->i_sb->s_blocksize;
++      unsigned count, continued;
++      struct buffer_head *bh2;
++      u32 newblock;
++      u32 hash2;
++      struct dx_map_entry *map;
++      char *data1 = (*bh)->b_data, *data2;
++      unsigned split;
++      struct ext3_dir_entry_2 *de = NULL, *de2;
++      int     err;
++
++      bh2 = ext3_append (handle, dir, &newblock, error);
++      if (!(bh2)) {
++              brelse(*bh);
++              *bh = NULL;
++              goto errout;
++      }
++
++      BUFFER_TRACE(*bh, "get_write_access");
++      err = ext3_journal_get_write_access(handle, *bh);
++      if (err) {
++      journal_error:
++              brelse(*bh);
++              brelse(bh2);
++              *bh = NULL;
++              ext3_std_error(dir->i_sb, err);
++              goto errout;
++      }
++      BUFFER_TRACE(frame->bh, "get_write_access");
++      err = ext3_journal_get_write_access(handle, frame->bh);
++      if (err)
++              goto journal_error;
++
++      data2 = bh2->b_data;
++
++      /* create map in the end of data2 block */
++      map = (struct dx_map_entry *) (data2 + blocksize);
++      count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
++                           blocksize, hinfo, map);
++      map -= count;
++      split = count/2; // need to adjust to actual middle
++      dx_sort_map (map, count);
++      hash2 = map[split].hash;
++      continued = hash2 == map[split - 1].hash;
++      dxtrace(printk("Split block %i at %x, %i/%i\n",
++              dx_get_block(frame->at), hash2, split, count-split));
++
++      /* Fancy dance to stay within two buffers */
++      de2 = dx_move_dirents(data1, data2, map + split, count - split);
++      de = dx_pack_dirents(data1,blocksize);
++      de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
++      de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
++      dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
++      dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
++
++      /* Which block gets the new entry? */
++      if (hinfo->hash >= hash2)
++      {
++              swap(*bh, bh2);
++              de = de2;
++      }
++      dx_insert_block (frame, hash2 + continued, newblock);
++      err = ext3_journal_dirty_metadata (handle, bh2);
++      if (err)
++              goto journal_error;
++      err = ext3_journal_dirty_metadata (handle, frame->bh);
++      if (err)
++              goto journal_error;
++      brelse (bh2);
++      dxtrace(dx_show_index ("frame", frame->entries));
++errout:
++      return de;
++}
++#endif
++
++
++/*
++ * Add a new entry into a directory (leaf) block.  If de is non-NULL,
++ * it points to a directory entry which is guaranteed to be large
++ * enough for new directory entry.  If de is NULL, then
++ * add_dirent_to_buf will attempt search the directory block for
++ * space.  It will return -ENOSPC if no space is available, and -EIO
++ * and -EEXIST if directory entry already exists.
++ * 
++ * NOTE!  bh is NOT released in the case where ENOSPC is returned.  In
++ * all other cases bh is released.
++ */
++static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
++                           struct inode *inode, struct ext3_dir_entry_2 *de,
++                           struct buffer_head * bh)
++{
++      struct inode    *dir = dentry->d_parent->d_inode;
++      const char      *name = dentry->d_name.name;
++      int             namelen = dentry->d_name.len;
++      unsigned long   offset = 0;
++      unsigned short  reclen;
++      int             nlen, rlen, err;
++      char            *top;
++      
++      reclen = EXT3_DIR_REC_LEN(namelen);
++      if (!de) {
++              de = (struct ext3_dir_entry_2 *)bh->b_data;
++              top = bh->b_data + dir->i_sb->s_blocksize - reclen;
++              while ((char *) de <= top) {
++                      if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
++                                                bh, offset)) {
++                              brelse (bh);
++                              return -EIO;
++                      }
++                      if (ext3_match (namelen, name, de)) {
++                              brelse (bh);
++                              return -EEXIST;
++                      }
++                      nlen = EXT3_DIR_REC_LEN(de->name_len);
++                      rlen = le16_to_cpu(de->rec_len);
++                      if ((de->inode? rlen - nlen: rlen) >= reclen)
++                              break;
++                      de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
++                      offset += rlen;
++              }
++              if ((char *) de > top)
++                      return -ENOSPC;
++      }
++      BUFFER_TRACE(bh, "get_write_access");
++      err = ext3_journal_get_write_access(handle, bh);
++      if (err) {
++              ext3_std_error(dir->i_sb, err);
++              brelse(bh);
++              return err;
++      }
++      
++      /* By now the buffer is marked for journaling */
++      nlen = EXT3_DIR_REC_LEN(de->name_len);
++      rlen = le16_to_cpu(de->rec_len);
++      if (de->inode) {
++              struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
++              de1->rec_len = cpu_to_le16(rlen - nlen);
++              de->rec_len = cpu_to_le16(nlen);
++              de = de1;
++      }
++      de->file_type = EXT3_FT_UNKNOWN;
++      if (inode) {
++              de->inode = cpu_to_le32(inode->i_ino);
++              ext3_set_de_type(dir->i_sb, de, inode->i_mode);
++      } else
++              de->inode = 0;
++      de->name_len = namelen;
++      memcpy (de->name, name, namelen);
++      /*
++       * XXX shouldn't update any times until successful
++       * completion of syscall, but too many callers depend
++       * on this.
++       *
++       * XXX similarly, too many callers depend on
++       * ext3_new_inode() setting the times, but error
++       * recovery deletes the inode, so the worst that can
++       * happen is that the times are slightly out of date
++       * and/or different from the directory change time.
++       */
++      dir->i_mtime = dir->i_ctime = CURRENT_TIME;
++      ext3_update_dx_flag(dir);
++      dir->i_version = ++event;
++      ext3_mark_inode_dirty(handle, dir);
++      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
++      err = ext3_journal_dirty_metadata(handle, bh);
++      if (err)
++              ext3_std_error(dir->i_sb, err);
++      brelse(bh);
++      return 0;
++}
++
++#ifdef CONFIG_EXT3_INDEX
++/*
++ * This converts a one block unindexed directory to a 3 block indexed
++ * directory, and adds the dentry to the indexed directory.
++ */
++static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
++                          struct inode *inode, struct buffer_head *bh)
++{
++      struct inode    *dir = dentry->d_parent->d_inode;
++      const char      *name = dentry->d_name.name;
++      int             namelen = dentry->d_name.len;
++      struct buffer_head *bh2;
++      struct dx_root  *root;
++      struct dx_frame frames[2], *frame;
++      struct dx_entry *entries;
++      struct ext3_dir_entry_2 *de, *de2;
++      char            *data1, *top;
++      unsigned        len;
++      int             retval;
++      unsigned        blocksize;
++      struct dx_hash_info hinfo;
++      u32             block;
++              
++      blocksize =  dir->i_sb->s_blocksize;
++      dxtrace(printk("Creating index\n"));
++      retval = ext3_journal_get_write_access(handle, bh);
++      if (retval) {
++              ext3_std_error(dir->i_sb, retval);
++              brelse(bh);
++              return retval;
++      }
++      root = (struct dx_root *) bh->b_data;
++              
++      EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
++      bh2 = ext3_append (handle, dir, &block, &retval);
++      if (!(bh2)) {
++              brelse(bh);
++              return retval;
++      }
++      data1 = bh2->b_data;
++
++      /* The 0th block becomes the root, move the dirents out */
++      de = (struct ext3_dir_entry_2 *) &root->dotdot;
++      de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len);
++      len = ((char *) root) + blocksize - (char *) de;
++      memcpy (data1, de, len);
++      de = (struct ext3_dir_entry_2 *) data1;
++      top = data1 + len;
++      while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top)
++              de = de2;
++      de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
++      /* Initialize the root; the dot dirents already exist */
++      de = (struct ext3_dir_entry_2 *) (&root->dotdot);
++      de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
++      memset (&root->info, 0, sizeof(root->info));
++      root->info.info_length = sizeof(root->info);
++      root->info.hash_version = dir->i_sb->u.ext3_sb.s_def_hash_version;
++      entries = root->entries;
++      dx_set_block (entries, 1);
++      dx_set_count (entries, 1);
++      dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
++
++      /* Initialize as for dx_probe */
++      hinfo.hash_version = root->info.hash_version;
++      hinfo.seed = dir->i_sb->u.ext3_sb.s_hash_seed;
++      ext3fs_dirhash(name, namelen, &hinfo);
++      frame = frames;
++      frame->entries = entries;
++      frame->at = entries;
++      frame->bh = bh;
++      bh = bh2;
++      de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
++      dx_release (frames);
++      if (!(de))
++              return retval;
++
++      return add_dirent_to_buf(handle, dentry, inode, de, bh);
++}
++#endif
++
+ /*
+  *    ext3_add_entry()
+  *
+@@ -249,127 +1258,197 @@
+  * may not sleep between calling this and putting something into
+  * the entry, as someone else might have used it while you slept.
+  */
+-
+-/*
+- * AKPM: the journalling code here looks wrong on the error paths
+- */
+ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
+       struct inode *inode)
+ {
+       struct inode *dir = dentry->d_parent->d_inode;
+-      const char *name = dentry->d_name.name;
+-      int namelen = dentry->d_name.len;
+       unsigned long offset;
+-      unsigned short rec_len;
+       struct buffer_head * bh;
+-      struct ext3_dir_entry_2 * de, * de1;
++      struct ext3_dir_entry_2 *de;
+       struct super_block * sb;
+       int     retval;
++#ifdef CONFIG_EXT3_INDEX
++        int     dx_fallback=0;
++#endif
++        unsigned blocksize;
++        unsigned nlen, rlen;
++        u32 block, blocks;
+       sb = dir->i_sb;
+-
+-      if (!namelen)
++        blocksize = sb->s_blocksize;
++      if (!dentry->d_name.len)
+               return -EINVAL;
+-      bh = ext3_bread (handle, dir, 0, 0, &retval);
++#ifdef CONFIG_EXT3_INDEX
++        if (is_dx(dir)) {
++                retval = ext3_dx_add_entry(handle, dentry, inode);
++                if (!retval || (retval != ERR_BAD_DX_DIR))
++                        return retval;
++                EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
++                dx_fallback++;
++                ext3_mark_inode_dirty(handle, dir);
++        }
++#endif
++        blocks = dir->i_size >> sb->s_blocksize_bits;
++        for (block = 0, offset = 0; block < blocks; block++) {
++                bh = ext3_bread(handle, dir, block, 0, &retval);
++                if(!bh)
++                        return retval;
++                retval = add_dirent_to_buf(handle, dentry, inode, 0, bh);
++                if (retval != -ENOSPC)
++                        return retval;
++
++#ifdef CONFIG_EXT3_INDEX
++                if (blocks == 1 && !dx_fallback &&
++                    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
++                        return make_indexed_dir(handle, dentry, inode, bh);
++#endif
++                brelse(bh);
++        }
++        bh = ext3_append(handle, dir, &block, &retval);
+       if (!bh)
+               return retval;
+-      rec_len = EXT3_DIR_REC_LEN(namelen);
+-      offset = 0;
+       de = (struct ext3_dir_entry_2 *) bh->b_data;
+-      while (1) {
+-              if ((char *)de >= sb->s_blocksize + bh->b_data) {
+-                      brelse (bh);
+-                      bh = NULL;
+-                      bh = ext3_bread (handle, dir,
+-                              offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval);
+-                      if (!bh)
+-                              return retval;
+-                      if (dir->i_size <= offset) {
+-                              if (dir->i_size == 0) {
+-                                      brelse(bh);
+-                                      return -ENOENT;
+-                              }
+-
+-                              ext3_debug ("creating next block\n");
+-
+-                              BUFFER_TRACE(bh, "get_write_access");
+-                              ext3_journal_get_write_access(handle, bh);
+-                              de = (struct ext3_dir_entry_2 *) bh->b_data;
+-                              de->inode = 0;
+-                              de->rec_len = le16_to_cpu(sb->s_blocksize);
+-                              dir->u.ext3_i.i_disksize =
+-                                      dir->i_size = offset + sb->s_blocksize;
+-                              dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
+-                              ext3_mark_inode_dirty(handle, dir);
+-                      } else {
+-
+-                              ext3_debug ("skipping to next block\n");
++        de->inode = 0;
++        de->rec_len = cpu_to_le16(rlen = blocksize);
++        nlen = 0;
++        return add_dirent_to_buf(handle, dentry, inode, de, bh);
++}
+-                              de = (struct ext3_dir_entry_2 *) bh->b_data;
+-                      }
+-              }
+-              if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh,
+-                                         offset)) {
+-                      brelse (bh);
+-                      return -ENOENT;
+-              }
+-              if (ext3_match (namelen, name, de)) {
+-                              brelse (bh);
+-                              return -EEXIST;
++#ifdef CONFIG_EXT3_INDEX
++/*
++ * Returns 0 for success, or a negative error value
++ */
++static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
++                             struct inode *inode)
++{
++        struct dx_frame frames[2], *frame;
++        struct dx_entry *entries, *at;
++        struct dx_hash_info hinfo;
++        struct buffer_head * bh;
++        struct inode *dir = dentry->d_parent->d_inode;
++        struct super_block * sb = dir->i_sb;
++        struct ext3_dir_entry_2 *de;
++        int err;
++
++        frame = dx_probe(dentry, 0, &hinfo, frames, &err);
++        if (!frame)
++                return err;
++        entries = frame->entries;
++        at = frame->at;
++
++        if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
++                goto cleanup;
++
++        BUFFER_TRACE(bh, "get_write_access");
++        err = ext3_journal_get_write_access(handle, bh);
++        if (err)
++                goto journal_error;
++
++        err = add_dirent_to_buf(handle, dentry, inode, 0, bh);
++        if (err != -ENOSPC) {
++                bh = 0;
++                goto cleanup;
++        }
++
++        /* Block full, should compress but for now just split */
++        dxtrace(printk("using %u of %u node entries\n",
++                       dx_get_count(entries), dx_get_limit(entries)));
++        /* Need to split index? */
++        if (dx_get_count(entries) == dx_get_limit(entries)) {
++                u32 newblock;
++                unsigned icount = dx_get_count(entries);
++                int levels = frame - frames;
++                struct dx_entry *entries2;
++                struct dx_node *node2;
++                struct buffer_head *bh2;
++
++                if (levels && (dx_get_count(frames->entries) ==
++                               dx_get_limit(frames->entries))) {
++                        ext3_warning(sb, __FUNCTION__,
++                                     "Directory index full!\n");
++                        err = -ENOSPC;
++                        goto cleanup;
+               }
+-              if ((le32_to_cpu(de->inode) == 0 &&
+-                              le16_to_cpu(de->rec_len) >= rec_len) ||
+-                  (le16_to_cpu(de->rec_len) >=
+-                              EXT3_DIR_REC_LEN(de->name_len) + rec_len)) {
+-                      BUFFER_TRACE(bh, "get_write_access");
+-                      ext3_journal_get_write_access(handle, bh);
+-                      /* By now the buffer is marked for journaling */
+-                      offset += le16_to_cpu(de->rec_len);
+-                      if (le32_to_cpu(de->inode)) {
+-                              de1 = (struct ext3_dir_entry_2 *) ((char *) de +
+-                                      EXT3_DIR_REC_LEN(de->name_len));
+-                              de1->rec_len =
+-                                      cpu_to_le16(le16_to_cpu(de->rec_len) -
+-                                      EXT3_DIR_REC_LEN(de->name_len));
+-                              de->rec_len = cpu_to_le16(
+-                                              EXT3_DIR_REC_LEN(de->name_len));
+-                              de = de1;
++
++                bh2 = ext3_append (handle, dir, &newblock, &err);
++                if (!(bh2))
++                        goto cleanup;
++                node2 = (struct dx_node *)(bh2->b_data);
++                entries2 = node2->entries;
++                node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
++                node2->fake.inode = 0;
++                BUFFER_TRACE(frame->bh, "get_write_access");
++                err = ext3_journal_get_write_access(handle, frame->bh);
++                if (err)
++                        goto journal_error;
++                if (levels) {
++                        unsigned icount1 = icount/2, icount2 = icount - icount1;+                       unsigned hash2 = dx_get_hash(entries + icount1);
++                        dxtrace(printk("Split index %i/%i\n", icount1, icount2));
++
++                        BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
++                        err = ext3_journal_get_write_access(handle,
++                                                            frames[0].bh);
++                        if (err)
++                                goto journal_error;
++
++                        memcpy ((char *) entries2, (char *) (entries + icount1),+                               icount2 * sizeof(struct dx_entry));
++                        dx_set_count (entries, icount1);
++                        dx_set_count (entries2, icount2);
++                        dx_set_limit (entries2, dx_node_limit(dir));
++
++                        /* Which index block gets the new entry? */
++                        if (at - entries >= icount1) {
++                                frame->at = at = at - entries - icount1 + entries2;
++                                frame->entries = entries = entries2;
++                                swap(frame->bh, bh2);
+                       }
+-                      de->file_type = EXT3_FT_UNKNOWN;
+-                      if (inode) {
+-                              de->inode = cpu_to_le32(inode->i_ino);
+-                              ext3_set_de_type(dir->i_sb, de, inode->i_mode);
+-                      } else
+-                              de->inode = 0;
+-                      de->name_len = namelen;
+-                      memcpy (de->name, name, namelen);
+-                      /*
+-                       * XXX shouldn't update any times until successful
+-                       * completion of syscall, but too many callers depend
+-                       * on this.
+-                       *
+-                       * XXX similarly, too many callers depend on
+-                       * ext3_new_inode() setting the times, but error
+-                       * recovery deletes the inode, so the worst that can
+-                       * happen is that the times are slightly out of date
+-                       * and/or different from the directory change time.
+-                       */
+-                      dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+-                      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
+-                      ext3_mark_inode_dirty(handle, dir);
+-                      dir->i_version = ++event;
+-                      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+-                      ext3_journal_dirty_metadata(handle, bh);
+-                      brelse(bh);
+-                      return 0;
++                        dx_insert_block (frames + 0, hash2, newblock);
++                        dxtrace(dx_show_index ("node", frames[1].entries));
++                        dxtrace(dx_show_index ("node",
++                               ((struct dx_node *) bh2->b_data)->entries));
++                        err = ext3_journal_dirty_metadata(handle, bh2);
++                        if (err)
++                                goto journal_error;
++                        brelse (bh2);
++                } else {
++                        dxtrace(printk("Creating second level index...\n"));
++                        memcpy((char *) entries2, (char *) entries,
++                               icount * sizeof(struct dx_entry));
++                        dx_set_limit(entries2, dx_node_limit(dir));
++
++                        /* Set up root */
++                        dx_set_count(entries, 1);
++                        dx_set_block(entries + 0, newblock);
++                        ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
++
++                        /* Add new access path frame */
++                        frame = frames + 1;
++                        frame->at = at = at - entries + entries2;
++                        frame->entries = entries = entries2;
++                        frame->bh = bh2;
++                        err = ext3_journal_get_write_access(handle,
++                                                            frame->bh);
++                        if (err)
++                                goto journal_error;
+               }
+-              offset += le16_to_cpu(de->rec_len);
+-              de = (struct ext3_dir_entry_2 *)
+-                      ((char *) de + le16_to_cpu(de->rec_len));
++                ext3_journal_dirty_metadata(handle, frames[0].bh);
+       }
+-      brelse (bh);
+-      return -ENOSPC;
++        de = do_split(handle, dir, &bh, frame, &hinfo, &err);
++        if (!de)
++                goto cleanup;
++        err = add_dirent_to_buf(handle, dentry, inode, de, bh);
++        bh = 0;
++        goto cleanup;
++
++journal_error:
++        ext3_std_error(dir->i_sb, err);
++cleanup:
++        if (bh)
++                brelse(bh);
++        dx_release(frames);
++        return err;
+ }
++#endif
+ /*
+  * ext3_delete_entry deletes a directory entry by merging it with the
+@@ -453,9 +1532,11 @@
+       struct inode * inode;
+       int err;
+-      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
+-      if (IS_ERR(handle))
++      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+       if (IS_SYNC(dir))
+               handle->h_sync = 1;
+@@ -480,9 +1561,11 @@
+       struct inode *inode;
+       int err;
+-      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
+-      if (IS_ERR(handle))
++      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+       if (IS_SYNC(dir))
+               handle->h_sync = 1;
+@@ -512,9 +1595,11 @@
+       if (dir->i_nlink >= EXT3_LINK_MAX)
+               return -EMLINK;
+-      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
+-      if (IS_ERR(handle))
++      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+       if (IS_SYNC(dir))
+               handle->h_sync = 1;
+@@ -526,7 +1611,8 @@
+       inode->i_op = &ext3_dir_inode_operations;
+       inode->i_fop = &ext3_dir_operations;
+-      inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize;
++        inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
++        inode->i_blocks = 0;
+       dir_block = ext3_bread (handle, inode, 0, 1, &err);
+       if (!dir_block) {
+               inode->i_nlink--; /* is this nlink == 0? */
+@@ -555,21 +1641,19 @@
+       brelse (dir_block);
+       ext3_mark_inode_dirty(handle, inode);
+       err = ext3_add_entry (handle, dentry, inode);
+-      if (err)
+-              goto out_no_entry;
++      if (err) {
++              inode->i_nlink = 0;
++              ext3_mark_inode_dirty(handle, inode);
++              iput (inode);
++              goto out_stop;
++      }
+       dir->i_nlink++;
+-      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
++      ext3_update_dx_flag(dir);
+       ext3_mark_inode_dirty(handle, dir);
+       d_instantiate(dentry, inode);
+ out_stop:
+       ext3_journal_stop(handle, dir);
+       return err;
+-
+-out_no_entry:
+-      inode->i_nlink = 0;
+-      ext3_mark_inode_dirty(handle, inode);
+-      iput (inode);
+-      goto out_stop;
+ }
+ /*
+@@ -656,7 +1740,7 @@
+       int err = 0, rc;
+       
+       lock_super(sb);
+-      if (!list_empty(&inode->u.ext3_i.i_orphan))
++      if (!list_empty(&EXT3_I(inode)->i_orphan))
+               goto out_unlock;
+       /* Orphan handling is only valid for files with data blocks
+@@ -697,7 +1781,7 @@
+        * This is safe: on error we're going to ignore the orphan list
+        * anyway on the next recovery. */
+       if (!err)
+-              list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan);
++              list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
+       jbd_debug(4, "superblock will point to %ld\n", inode->i_ino);
+       jbd_debug(4, "orphan inode %ld will point to %d\n",
+@@ -715,25 +1799,26 @@
+ int ext3_orphan_del(handle_t *handle, struct inode *inode)
+ {
+       struct list_head *prev;
++        struct ext3_inode_info *ei = EXT3_I(inode);
+       struct ext3_sb_info *sbi;
+       ino_t ino_next; 
+       struct ext3_iloc iloc;
+       int err = 0;
+       
+       lock_super(inode->i_sb);
+-      if (list_empty(&inode->u.ext3_i.i_orphan)) {
++        if (list_empty(&ei->i_orphan)) {
+               unlock_super(inode->i_sb);
+               return 0;
+       }
+       ino_next = NEXT_ORPHAN(inode);
+-      prev = inode->u.ext3_i.i_orphan.prev;
++        prev = ei->i_orphan.prev;
+       sbi = EXT3_SB(inode->i_sb);
+       jbd_debug(4, "remove inode %ld from orphan list\n", inode->i_ino);
+-      list_del(&inode->u.ext3_i.i_orphan);
+-      INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
++        list_del(&ei->i_orphan);
++        INIT_LIST_HEAD(&ei->i_orphan);
+       /* If we're on an error path, we may not have a valid
+        * transaction handle with which to update the orphan list on
+@@ -794,8 +1879,9 @@
+       handle_t *handle;
+       handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+-      if (IS_ERR(handle))
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+       retval = -ENOENT;
+       bh = ext3_find_entry (dentry, &de);
+@@ -833,7 +1919,7 @@
+       ext3_mark_inode_dirty(handle, inode);
+       dir->i_nlink--;
+       inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+-      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
++        ext3_update_dx_flag(dir);
+       ext3_mark_inode_dirty(handle, dir);
+ end_rmdir:
+@@ -851,8 +1937,9 @@
+       handle_t *handle;
+       handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+-      if (IS_ERR(handle))
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+       if (IS_SYNC(dir))
+               handle->h_sync = 1;
+@@ -879,7 +1966,7 @@
+       if (retval)
+               goto end_unlink;
+       dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+-      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
++      ext3_update_dx_flag(dir);
+       ext3_mark_inode_dirty(handle, dir);
+       inode->i_nlink--;
+       if (!inode->i_nlink)
+@@ -905,9 +1992,11 @@
+       if (l > dir->i_sb->s_blocksize)
+               return -ENAMETOOLONG;
+-      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5);
+-      if (IS_ERR(handle))
++      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5);
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+       if (IS_SYNC(dir))
+               handle->h_sync = 1;
+@@ -917,7 +2006,7 @@
+       if (IS_ERR(inode))
+               goto out_stop;
+-      if (l > sizeof (inode->u.ext3_i.i_data)) {
++        if (l > sizeof (EXT3_I(inode)->i_data)) {
+               inode->i_op = &ext3_symlink_inode_operations;
+               inode->i_mapping->a_ops = &ext3_aops;
+               /*
+@@ -926,25 +2015,23 @@
+                * i_size in generic_commit_write().
+                */
+               err = block_symlink(inode, symname, l);
+-              if (err)
+-                      goto out_no_entry;
++                if (err) {
++                        ext3_dec_count(handle, inode);
++                        ext3_mark_inode_dirty(handle, inode);
++                        iput (inode);
++                        goto out_stop;
++                }
+       } else {
+               inode->i_op = &ext3_fast_symlink_inode_operations;
+-              memcpy((char*)&inode->u.ext3_i.i_data,symname,l);
++                memcpy((char*)&EXT3_I(inode)->i_data,symname,l);
+               inode->i_size = l-1;
+       }
+-      inode->u.ext3_i.i_disksize = inode->i_size;
++        EXT3_I(inode)->i_disksize = inode->i_size;
+       ext3_mark_inode_dirty(handle, inode);
+       err = ext3_add_nondir(handle, dentry, inode);
+ out_stop:
+       ext3_journal_stop(handle, dir);
+       return err;
+-
+-out_no_entry:
+-      ext3_dec_count(handle, inode);
+-      ext3_mark_inode_dirty(handle, inode);
+-      iput (inode);
+-      goto out_stop;
+ }
+ static int ext3_link (struct dentry * old_dentry,
+@@ -957,12 +2044,15 @@
+       if (S_ISDIR(inode->i_mode))
+               return -EPERM;
+-      if (inode->i_nlink >= EXT3_LINK_MAX)
++      if (inode->i_nlink >= EXT3_LINK_MAX) {
+               return -EMLINK;
++      }
+-      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS);
+-      if (IS_ERR(handle))
++      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS);
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+       if (IS_SYNC(dir))
+               handle->h_sync = 1;
+@@ -996,9 +2086,11 @@
+       old_bh = new_bh = dir_bh = NULL;
+-      handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2);
+-      if (IS_ERR(handle))
++      handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS +
++                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+       if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
+               handle->h_sync = 1;
+@@ -1078,7 +2170,7 @@
+               new_inode->i_ctime = CURRENT_TIME;
+       }
+       old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
+-      old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
++      ext3_update_dx_flag(old_dir);
+       if (dir_bh) {
+               BUFFER_TRACE(dir_bh, "get_write_access");
+               ext3_journal_get_write_access(handle, dir_bh);
+@@ -1090,7 +2182,7 @@
+                       new_inode->i_nlink--;
+               } else {
+                       new_dir->i_nlink++;
+-                      new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
++                      ext3_update_dx_flag(new_dir);
+                       ext3_mark_inode_dirty(handle, new_dir);
+               }
+       }
+Index: linux-2.4.19/fs/ext3/super.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/super.c  2004-04-23 18:25:03.000000000 -0400
++++ linux-2.4.19/fs/ext3/super.c       2004-04-23 18:26:27.000000000 -0400
+@@ -741,6 +741,7 @@
+       es->s_mtime = cpu_to_le32(CURRENT_TIME);
+       ext3_update_dynamic_rev(sb);
+       EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
++
+       ext3_commit_super (sb, es, 1);
+       if (test_opt (sb, DEBUG))
+               printk (KERN_INFO
+@@ -751,6 +752,7 @@
+                       EXT3_BLOCKS_PER_GROUP(sb),
+                       EXT3_INODES_PER_GROUP(sb),
+                       sbi->s_mount_opt);
++
+       printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ",
+                               bdevname(sb->s_dev));
+       if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
+@@ -925,6 +927,7 @@
+       return res;
+ }
++
+ struct super_block * ext3_read_super (struct super_block * sb, void * data,
+                                     int silent)
+ {
+@@ -1113,6 +1116,9 @@
+       sbi->s_mount_state = le16_to_cpu(es->s_state);
+       sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb));
+       sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb));
++      for (i=0; i < 4; i++)
++              sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
++      sbi->s_def_hash_version = es->s_def_hash_version;
+       if (sbi->s_blocks_per_group > blocksize * 8) {
+               printk (KERN_ERR
+@@ -1821,6 +1827,7 @@
+       exit_ext3_xattr();
+ }
++EXPORT_SYMBOL(ext3_force_commit);
+ EXPORT_SYMBOL(ext3_bread);
+ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+Index: linux-2.4.19/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.4.19.orig/include/linux/ext3_fs.h  2004-04-23 17:53:55.000000000 -0400
++++ linux-2.4.19/include/linux/ext3_fs.h       2004-04-23 18:26:27.000000000 -0400
+@@ -40,6 +40,11 @@
+ #define EXT3FS_VERSION                "2.4-0.9.18"
+ /*
++ * Always enable hashed directories
++ */
++#define CONFIG_EXT3_INDEX
++
++/*
+  * Debug code
+  */
+ #ifdef EXT3FS_DEBUG
+@@ -414,8 +419,11 @@
+ /*E0*/        __u32   s_journal_inum;         /* inode number of journal file */
+       __u32   s_journal_dev;          /* device number of journal file */
+       __u32   s_last_orphan;          /* start of list of inodes to delete */
+-
+-/*EC*/        __u32   s_reserved[197];        /* Padding to the end of the block */
++      __u32   s_hash_seed[4];         /* HTREE hash seed */
++      __u8    s_def_hash_version;     /* Default hash version to use */
++      __u8    s_reserved_char_pad;
++      __u16   s_reserved_word_pad;
++      __u32   s_reserved[192];        /* Padding to the end of the block */
+ };
+ #ifdef __KERNEL__
+@@ -552,9 +560,46 @@
+ #define EXT3_DIR_ROUND                        (EXT3_DIR_PAD - 1)
+ #define EXT3_DIR_REC_LEN(name_len)    (((name_len) + 8 + EXT3_DIR_ROUND) & \
+                                        ~EXT3_DIR_ROUND)
++/*
++ * Hash Tree Directory indexing
++ * (c) Daniel Phillips, 2001
++ */
++
++#ifdef CONFIG_EXT3_INDEX
++  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
++                                            EXT3_FEATURE_COMPAT_DIR_INDEX) && \
++                    (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
++#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
++#else
++  #define is_dx(dir) 0
++#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
++#endif
++
++/* Legal values for the dx_root hash_version field: */
++
++#define DX_HASH_LEGACY                0
++#define DX_HASH_HALF_MD4      1
++#define DX_HASH_TEA           2
++
++/* hash info structure used by the directory hash */
++struct dx_hash_info
++{
++      u32             hash;
++      u32             minor_hash;
++      int             hash_version;
++      u32             *seed;
++};
+ #ifdef __KERNEL__
+ /*
++ * Control parameters used by ext3_htree_next_block
++ */
++#define HASH_NB_ALWAYS                1
++
++
++/*
+  * Describe an inode's exact location on disk and in memory
+  */
+ struct ext3_iloc
+@@ -564,6 +609,27 @@
+       unsigned long block_group;
+ };
++
++/*
++ * This structure is stuffed into the struct file's private_data field
++ * for directories.  It is where we put information so that we can do
++ * readdir operations in hash tree order.
++ */
++struct dir_private_info {
++      rb_root_t       root;
++      rb_node_t       *curr_node;
++      struct fname    *extra_fname;
++      loff_t          last_pos;
++      __u32           curr_hash;
++      __u32           curr_minor_hash;
++      __u32           next_hash;
++};
++
++/*
++ * Special error return code only used by dx_probe() and its callers.
++ */
++#define ERR_BAD_DX_DIR        -75000
++
+ /*
+  * Function prototypes
+  */
+@@ -591,11 +657,20 @@
+ /* dir.c */
+ extern int ext3_check_dir_entry(const char *, struct inode *,
+-                              struct ext3_dir_entry_2 *, struct buffer_head *,
+-                              unsigned long);
++                              struct ext3_dir_entry_2 *,
++                              struct buffer_head *, unsigned long);
++extern void ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
++                                  __u32 minor_hash,
++                                  struct ext3_dir_entry_2 *dirent);
++extern void ext3_htree_free_dir_info(struct dir_private_info *p);
++
+ /* fsync.c */
+ extern int ext3_sync_file (struct file *, struct dentry *, int);
++/* hash.c */
++extern int ext3fs_dirhash(const char *name, int len, struct
++                        dx_hash_info *hinfo);
++
+ /* ialloc.c */
+ extern struct inode * ext3_new_inode (handle_t *, struct inode *, int);
+ extern void ext3_free_inode (handle_t *, struct inode *);
+@@ -628,6 +703,8 @@
+ /* namei.c */
+ extern int ext3_orphan_add(handle_t *, struct inode *);
+ extern int ext3_orphan_del(handle_t *, struct inode *);
++extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
++                              __u32 start_minor_hash, __u32 *next_hash);
+ /* super.c */
+ extern void ext3_error (struct super_block *, const char *, const char *, ...)
+Index: linux-2.4.19/include/linux/ext3_fs_sb.h
+===================================================================
+--- linux-2.4.19.orig/include/linux/ext3_fs_sb.h       2004-04-23 17:53:54.000000000 -0400
++++ linux-2.4.19/include/linux/ext3_fs_sb.h    2004-04-23 18:26:27.000000000 -0400
+@@ -62,6 +62,8 @@
+       int s_inode_size;
+       int s_first_ino;
+       u32 s_next_generation;
++      u32 s_hash_seed[4];
++      int s_def_hash_version;
+       /* Journaling */
+       struct inode * s_journal_inode;
+Index: linux-2.4.19/include/linux/ext3_jbd.h
+===================================================================
+--- linux-2.4.19.orig/include/linux/ext3_jbd.h 2004-04-23 17:53:54.000000000 -0400
++++ linux-2.4.19/include/linux/ext3_jbd.h      2004-04-23 18:26:27.000000000 -0400
+@@ -69,6 +69,8 @@
+ #define EXT3_RESERVE_TRANS_BLOCKS     12
++#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
++
+ int
+ ext3_mark_iloc_dirty(handle_t *handle, 
+                    struct inode *inode,
+Index: linux-2.4.19/include/linux/rbtree.h
+===================================================================
+--- linux-2.4.19.orig/include/linux/rbtree.h   2001-11-22 14:46:18.000000000 -0500
++++ linux-2.4.19/include/linux/rbtree.h        2004-04-23 18:26:27.000000000 -0400
+@@ -120,6 +120,8 @@
+ extern void rb_insert_color(rb_node_t *, rb_root_t *);
+ extern void rb_erase(rb_node_t *, rb_root_t *);
++extern rb_node_t *rb_get_first(rb_root_t *root);
++extern rb_node_t *rb_get_next(rb_node_t *n);
+ static inline void rb_link_node(rb_node_t * node, rb_node_t * parent, rb_node_t ** rb_link)
+ {
+Index: linux-2.4.19/lib/rbtree.c
+===================================================================
+--- linux-2.4.19.orig/lib/rbtree.c     2002-08-02 20:39:46.000000000 -0400
++++ linux-2.4.19/lib/rbtree.c  2004-04-23 18:26:27.000000000 -0400
+@@ -17,6 +17,8 @@
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+   linux/lib/rbtree.c
++
++  rb_get_first and rb_get_next written by Theodore Ts'o, 9/8/2002
+ */
+ #include <linux/rbtree.h>
+@@ -294,3 +296,43 @@
+               __rb_erase_color(child, parent, root);
+ }
+ EXPORT_SYMBOL(rb_erase);
++
++/*
++ * This function returns the first node (in sort order) of the tree.
++ */
++rb_node_t *rb_get_first(rb_root_t *root)
++{
++      rb_node_t       *n;
++
++      n = root->rb_node;
++      if (!n)
++              return 0;
++      while (n->rb_left)
++              n = n->rb_left;
++      return n;
++}
++EXPORT_SYMBOL(rb_get_first);
++
++/*
++ * Given a node, this function will return the next node in the tree.
++ */
++rb_node_t *rb_get_next(rb_node_t *n)
++{
++      rb_node_t       *parent;
++
++      if (n->rb_right) {
++              n = n->rb_right;
++              while (n->rb_left)
++                      n = n->rb_left;
++              return n;
++      } else {
++              while ((parent = n->rb_parent)) {
++                      if (n == parent->rb_left)
++                              return parent;
++                      n = parent;
++              }
++              return 0;
++      }
++}
++EXPORT_SYMBOL(rb_get_next);
++
index 3c6b5e5..1e26b8c 100644 (file)
@@ -1,7 +1,7 @@
-Index: linux-2.4.19.SuSE/fs/ext3/namei.c
+Index: linux-2.4.19/fs/ext3/namei.c
 ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/namei.c     Sun Nov 16 01:14:50 2003
-+++ linux-2.4.19.SuSE/fs/ext3/namei.c  Sun Nov 16 01:18:04 2003
+--- linux-2.4.19.orig/fs/ext3/namei.c  2004-04-23 22:36:03.000000000 -0400
++++ linux-2.4.19/fs/ext3/namei.c       2004-04-23 22:37:37.000000000 -0400
 @@ -1751,8 +1751,8 @@
        struct super_block *sb = inode->i_sb;
        struct ext3_iloc iloc;
@@ -25,7 +25,7 @@ Index: linux-2.4.19.SuSE/fs/ext3/namei.c
 @@ -1813,20 +1813,19 @@
  {
        struct list_head *prev;
-       struct ext3_inode_info *ei = EXT3_I(inode);
+         struct ext3_inode_info *ei = EXT3_I(inode);
 -      struct ext3_sb_info *sbi;
 +      struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb);
        unsigned long ino_next;
@@ -33,15 +33,15 @@ Index: linux-2.4.19.SuSE/fs/ext3/namei.c
        int err = 0;
  
 -      lock_super(inode->i_sb);
-+      down(&sbi->s_orphan_lock);
-       if (list_empty(&ei->i_orphan)) {
++        down(&sbi->s_orphan_lock);
+         if (list_empty(&ei->i_orphan)) {
 -              unlock_super(inode->i_sb);
-+              up(&sbi->s_orphan_lock);
++                up(&sbi->s_orphan_lock);
                return 0;
        }
  
        ino_next = NEXT_ORPHAN(inode);
-       prev = ei->i_orphan.prev;
+         prev = ei->i_orphan.prev;
 -      sbi = EXT3_SB(inode->i_sb);
  
        jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
@@ -59,11 +59,11 @@ Index: linux-2.4.19.SuSE/fs/ext3/namei.c
        return err;
  
  out_brelse:
-Index: linux-2.4.19.SuSE/fs/ext3/super.c
+Index: linux-2.4.19/fs/ext3/super.c
 ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/super.c     Sun Nov 16 00:40:59 2003
-+++ linux-2.4.19.SuSE/fs/ext3/super.c  Sun Nov 16 01:18:04 2003
-@@ -1182,6 +1182,7 @@
+--- linux-2.4.19.orig/fs/ext3/super.c  2004-04-23 22:30:41.000000000 -0400
++++ linux-2.4.19/fs/ext3/super.c       2004-04-23 22:36:22.000000000 -0400
+@@ -1179,6 +1179,7 @@
         */
        sb->s_op = &ext3_sops;
        INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
@@ -71,10 +71,10 @@ Index: linux-2.4.19.SuSE/fs/ext3/super.c
  
        sb->s_root = 0;
  
-Index: linux-2.4.19.SuSE/include/linux/ext3_fs_sb.h
+Index: linux-2.4.19/include/linux/ext3_fs_sb.h
 ===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/ext3_fs_sb.h  Sat Nov 15 23:58:28 2003
-+++ linux-2.4.19.SuSE/include/linux/ext3_fs_sb.h       Sun Nov 16 01:18:41 2003
+--- linux-2.4.19.orig/include/linux/ext3_fs_sb.h       2004-04-23 18:26:27.000000000 -0400
++++ linux-2.4.19/include/linux/ext3_fs_sb.h    2004-04-23 22:36:22.000000000 -0400
 @@ -69,6 +69,7 @@
        struct inode * s_journal_inode;
        struct journal_s * s_journal;
diff --git a/lustre/kernel_patches/patches/ext3-trusted_ea-suse-2.4.19.patch b/lustre/kernel_patches/patches/ext3-trusted_ea-suse-2.4.19.patch
new file mode 100644 (file)
index 0000000..1c31052
--- /dev/null
@@ -0,0 +1,179 @@
+ fs/ext3/xattr.c            |   12 +++++-
+ fs/ext3/xattr_trusted.c    |   86 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_xattr.h |    6 +++
+ 3 files changed, 102 insertions(+), 2 deletions(-)
+
+Index: linux-2.4.19/fs/ext3/xattr.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/xattr.c  2004-04-23 22:44:57.000000000 -0400
++++ linux-2.4.19/fs/ext3/xattr.c       2004-04-23 22:45:20.000000000 -0400
+@@ -1785,18 +1785,25 @@
+ int __init
+ init_ext3_xattr(void)
+ {
++      int error;
++
+       ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
+               sizeof(struct mb_cache_entry) +
+               sizeof(struct mb_cache_entry_index), 1, 61);
+       if (!ext3_xattr_cache)
+               return -ENOMEM;
+-      return 0;
++      error = init_ext3_xattr_trusted();
++      if (error)
++              mb_cache_destroy(ext3_xattr_cache);
++
++      return error;
+ }
+ void
+ exit_ext3_xattr(void)
+ {
++      exit_ext3_xattr_trusted();
+       if (ext3_xattr_cache)
+               mb_cache_destroy(ext3_xattr_cache);
+       ext3_xattr_cache = NULL;
+@@ -1807,12 +1814,13 @@
+ int __init
+ init_ext3_xattr(void)
+ {
+-      return 0;
++      return init_ext3_xattr_trusted();
+ }
+ void
+ exit_ext3_xattr(void)
+ {
++      exit_ext3_xattr_trusted();
+ }
+ #endif  /* CONFIG_EXT3_FS_XATTR_SHARING */
+Index: linux-2.4.19/fs/ext3/xattr_trusted.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/xattr_trusted.c  2003-01-30 05:24:37.000000000 -0500
++++ linux-2.4.19/fs/ext3/xattr_trusted.c       2004-04-23 22:45:20.000000000 -0400
+@@ -0,0 +1,86 @@
++/*
++ * linux/fs/ext3/xattr_trusted.c
++ * Handler for trusted extended attributes.
++ *
++ * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
++ */
++
++#include <linux/module.h>
++#include <linux/string.h>
++#include <linux/fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_xattr.h>
++
++#define XATTR_TRUSTED_PREFIX "trusted."
++
++static size_t
++ext3_xattr_trusted_list(char *list, struct inode *inode,
++                      const char *name, int name_len)
++{
++      const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
++
++      if (!capable(CAP_SYS_ADMIN))
++              return 0;
++
++      if (list) {
++              memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
++              memcpy(list+prefix_len, name, name_len);
++              list[prefix_len + name_len] = '\0';
++      }
++      return prefix_len + name_len + 1;
++}
++
++static int
++ext3_xattr_trusted_get(struct inode *inode, const char *name,
++                     void *buffer, size_t size)
++{
++      if (strcmp(name, "") == 0)
++              return -EINVAL;
++      if (!capable(CAP_SYS_ADMIN))
++              return -EPERM;
++      return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name,
++                            buffer, size);
++}
++
++static int
++ext3_xattr_trusted_set(struct inode *inode, const char *name,
++                     const void *value, size_t size, int flags)
++{
++      handle_t *handle;
++      int error;
++
++      if (strcmp(name, "") == 0)
++              return -EINVAL;
++      if (!capable(CAP_SYS_ADMIN))
++              return -EPERM;
++      handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS);
++      if (IS_ERR(handle))
++              return PTR_ERR(handle);
++      error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_TRUSTED, name,
++                             value, size, flags);
++      ext3_journal_stop(handle, inode);
++
++      return error;
++}
++
++struct ext3_xattr_handler ext3_xattr_trusted_handler = {
++      .prefix = XATTR_TRUSTED_PREFIX,
++      .list   = ext3_xattr_trusted_list,
++      .get    = ext3_xattr_trusted_get,
++      .set    = ext3_xattr_trusted_set,
++};
++
++int __init
++init_ext3_xattr_trusted(void)
++{
++      return ext3_xattr_register(EXT3_XATTR_INDEX_TRUSTED,
++                                 &ext3_xattr_trusted_handler);
++}
++
++void
++exit_ext3_xattr_trusted(void)
++{
++      ext3_xattr_unregister(EXT3_XATTR_INDEX_TRUSTED,
++                            &ext3_xattr_trusted_handler);
++}
+Index: linux-2.4.19/fs/ext3/Makefile
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/Makefile 2004-04-23 22:38:38.000000000 -0400
++++ linux-2.4.19/fs/ext3/Makefile      2004-04-23 22:49:23.000000000 -0400
+@@ -12,7 +12,7 @@
+ export-objs :=        super.o inode.o
+ obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+-              ioctl.o namei.o super.o symlink.o hash.o
++              ioctl.o namei.o super.o symlink.o hash.o xattr_trusted.o
+ obj-m    := $(O_TARGET)
+ obj-$(CONFIG_EXT3_FS_XATTR) += xattr.o
+Index: linux-2.4.19/include/linux/ext3_xattr.h
+===================================================================
+--- linux-2.4.19.orig/include/linux/ext3_xattr.h       2004-04-23 17:53:54.000000000 -0400
++++ linux-2.4.19/include/linux/ext3_xattr.h    2004-04-23 22:45:20.000000000 -0400
+@@ -21,6 +21,9 @@
+ #define EXT3_XATTR_INDEX_USER                 1
+ #define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS     2
+ #define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT    3
++#define EXT3_XATTR_INDEX_TRUSTED              4
++#define EXT3_XATTR_INDEX_LUSTRE                       5
++#define EXT3_XATTR_INDEX_SECURITY             6
+ struct ext3_xattr_header {
+       __u32   h_magic;        /* magic number for identification */
+@@ -84,6 +87,9 @@
+ extern int init_ext3_xattr(void) __init;
+ extern void exit_ext3_xattr(void);
++extern int init_ext3_xattr_trusted(void) __init;
++extern void exit_ext3_xattr_trusted(void);
++
+ # else  /* CONFIG_EXT3_FS_XATTR */
+ #  define ext3_setxattr               NULL
+ #  define ext3_getxattr               NULL
index 1dab6d8..5a5dc5a 100644 (file)
@@ -1,11 +1,11 @@
  ./fs/ext3/namei.c |   11 +++++------
  1 files changed, 5 insertions(+), 6 deletions(-)
 
-Index: linux-2.4.19.SuSE/./fs/ext3/namei.c
+Index: linux-2.4.19/fs/ext3/namei.c
 ===================================================================
---- linux-2.4.19.SuSE.orig/./fs/ext3/namei.c   Sun Nov 16 01:02:51 2003
-+++ linux-2.4.19.SuSE/./fs/ext3/namei.c        Sun Nov 16 01:14:50 2003
-@@ -1523,8 +1523,11 @@
+--- linux-2.4.19.orig/fs/ext3/namei.c  2004-04-23 22:30:41.000000000 -0400
++++ linux-2.4.19/fs/ext3/namei.c       2004-04-23 22:36:03.000000000 -0400
+@@ -1522,8 +1522,11 @@
  {
        int err = ext3_add_entry(handle, dentry, inode);
        if (!err) {
@@ -19,7 +19,7 @@ Index: linux-2.4.19.SuSE/./fs/ext3/namei.c
        }
        ext3_dec_count(handle, inode);
        iput(inode);
-@@ -1560,7 +1563,6 @@
+@@ -1559,7 +1562,6 @@
                inode->i_op = &ext3_file_inode_operations;
                inode->i_fop = &ext3_file_operations;
                inode->i_mapping->a_ops = &ext3_aops;
@@ -27,7 +27,7 @@ Index: linux-2.4.19.SuSE/./fs/ext3/namei.c
                err = ext3_add_nondir(handle, dentry, inode);
        }
        ext3_journal_stop(handle, dir);
-@@ -1590,7 +1592,6 @@
+@@ -1589,7 +1591,6 @@
  #ifdef CONFIG_EXT3_FS_XATTR
                inode->i_op = &ext3_special_inode_operations;
  #endif
@@ -38,7 +38,7 @@ Index: linux-2.4.19.SuSE/./fs/ext3/namei.c
 @@ -2039,7 +2040,6 @@
                inode->i_size = l-1;
        }
-       EXT3_I(inode)->i_disksize = inode->i_size;
+         EXT3_I(inode)->i_disksize = inode->i_size;
 -      ext3_mark_inode_dirty(handle, inode);
        err = ext3_add_nondir(handle, dentry, inode);
  out_stop:
diff --git a/lustre/kernel_patches/patches/invalidate_show-2.4.21-sles8sp3.patch b/lustre/kernel_patches/patches/invalidate_show-2.4.21-sles8sp3.patch
new file mode 100644 (file)
index 0000000..36e59d3
--- /dev/null
@@ -0,0 +1,134 @@
+
+
+
+ fs/inode.c         |   21 ++++++++++++++-------
+ fs/smbfs/inode.c   |    2 +-
+ fs/super.c         |    4 ++--
+ include/linux/fs.h |    2 +-
+ 4 files changed, 18 insertions(+), 11 deletions(-)
+
+Index: linux-2.4.21/fs/inode.c
+===================================================================
+--- linux-2.4.21.orig/fs/inode.c       2004-04-24 02:38:51.000000000 -0400
++++ linux-2.4.21/fs/inode.c    2004-04-26 19:41:58.000000000 -0400
+@@ -651,7 +651,8 @@
+ /*
+  * Invalidate all inodes for a device.
+  */
+-static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
++static int invalidate_list(struct list_head *head, struct super_block * sb,
++                         struct list_head * dispose, int show)
+ {
+       struct list_head *next;
+       int busy = 0, count = 0;
+@@ -676,6 +677,11 @@
+                       count++;
+                       continue;
+               }
++              if (show)
++                      printk(KERN_ERR
++                             "inode busy: dev %s:%lu (%p) mode %o count %u\n",
++                             kdevname(sb->s_dev), inode->i_ino, inode,
++                             inode->i_mode, atomic_read(&inode->i_count));
+               busy = 1;
+       }
+       /* only unused inodes may be cached with i_count zero */
+@@ -694,22 +700,23 @@
+ /**
+  *    invalidate_inodes       - discard the inodes on a device
+  *    @sb: superblock
++ *    @show: whether we should display any busy inodes found
+  *
+  *    Discard all of the inodes for a given superblock. If the discard
+  *    fails because there are busy inodes then a non zero value is returned.
+  *    If the discard is successful all the inodes have been discarded.
+  */
+  
+-int invalidate_inodes(struct super_block * sb)
++int invalidate_inodes(struct super_block * sb, int show)
+ {
+       int busy;
+       LIST_HEAD(throw_away);
+       spin_lock(&inode_lock);
+-      busy = invalidate_list(&inode_in_use, sb, &throw_away);
+-      busy |= invalidate_list(&inode_unused, sb, &throw_away);
+-      busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
+-      busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away);
++      busy = invalidate_list(&inode_in_use, sb, &throw_away, show);
++      busy |= invalidate_list(&inode_unused, sb, &throw_away, show);
++      busy |= invalidate_list(&sb->s_dirty, sb, &throw_away, show);
++      busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away, show);
+       spin_unlock(&inode_lock);
+       dispose_list(&throw_away);
+@@ -735,7 +742,7 @@
+                * hold).
+                */
+               shrink_dcache_sb(sb);
+-              res = invalidate_inodes(sb);
++              res = invalidate_inodes(sb, 0);
+               drop_super(sb);
+       }
+       invalidate_buffers(dev);
+Index: linux-2.4.21/fs/super.c
+===================================================================
+--- linux-2.4.21.orig/fs/super.c       2004-04-24 02:38:51.000000000 -0400
++++ linux-2.4.21/fs/super.c    2004-04-26 19:41:58.000000000 -0400
+@@ -932,7 +932,7 @@
+       lock_super(sb);
+       lock_kernel();
+       sb->s_flags &= ~MS_ACTIVE;
+-      invalidate_inodes(sb);  /* bad name - it should be evict_inodes() */
++      invalidate_inodes(sb, 0);  /* bad name - it should be evict_inodes() */
+       if (sop) {
+               if (sop->write_super && sb->s_dirt)
+                       sop->write_super(sb);
+@@ -941,7 +941,7 @@
+       }
+       /* Forget any remaining inodes */
+-      if (invalidate_inodes(sb)) {
++      if (invalidate_inodes(sb, 1)) {
+               printk(KERN_ERR "VFS: Busy inodes after unmount. "
+                       "Self-destruct in 5 seconds.  Have a nice day...\n");
+       }
+Index: linux-2.4.21/fs/smbfs/inode.c
+===================================================================
+--- linux-2.4.21.orig/fs/smbfs/inode.c 2004-04-24 02:38:44.000000000 -0400
++++ linux-2.4.21/fs/smbfs/inode.c      2004-04-26 19:41:58.000000000 -0400
+@@ -167,7 +167,7 @@
+ {
+       VERBOSE("\n");
+       shrink_dcache_sb(SB_of(server));
+-      invalidate_inodes(SB_of(server));
++      invalidate_inodes(SB_of(server), 0);
+ }
+ /*
+Index: linux-2.4.21/fs/ntfs/super.c
+===================================================================
+--- linux-2.4.21.orig/fs/ntfs/super.c  2004-04-24 02:38:38.000000000 -0400
++++ linux-2.4.21/fs/ntfs/super.c       2004-04-26 19:42:12.000000000 -0400
+@@ -1604,7 +1604,7 @@
+        * method again... FIXME: Do we need to do this twice now because of
+        * attribute inodes? I think not, so leave as is for now... (AIA)
+        */
+-      if (invalidate_inodes(sb)) {
++      if (invalidate_inodes(sb, 0)) {
+               ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
+                               "driver bug.");
+               /* Copied from fs/super.c. I just love this message. (-; */
+Index: linux-2.4.21/include/linux/fs.h
+===================================================================
+--- linux-2.4.21.orig/include/linux/fs.h       2004-04-26 19:06:32.000000000 -0400
++++ linux-2.4.21/include/linux/fs.h    2004-04-26 19:41:58.000000000 -0400
+@@ -1401,7 +1401,7 @@
+ extern void set_buffer_flushtime(struct buffer_head *);
+ extern void balance_dirty(void);
+ extern int check_disk_change(kdev_t);
+-extern int invalidate_inodes(struct super_block *);
++extern int invalidate_inodes(struct super_block *, int);
+ extern int invalidate_device(kdev_t, int);
+ extern void invalidate_inode_pages(struct inode *);
+ extern void invalidate_inode_pages2(struct address_space *);
index ad213c9..01c040c 100644 (file)
@@ -8,10 +8,10 @@
  include/linux/ext3_fs.h            |    2 
  8 files changed, 318 insertions(+), 2 deletions(-)
 
-Index: linux-2.4.19.SuSE/Documentation/filesystems/ext2.txt
+Index: linux-2.4.19/Documentation/filesystems/ext2.txt
 ===================================================================
---- linux-2.4.19.SuSE.orig/Documentation/filesystems/ext2.txt  Wed Jul 11 15:44:45 2001
-+++ linux-2.4.19.SuSE/Documentation/filesystems/ext2.txt       Sun Nov 16 01:27:31 2003
+--- linux-2.4.19.orig/Documentation/filesystems/ext2.txt       2001-07-11 18:44:45.000000000 -0400
++++ linux-2.4.19/Documentation/filesystems/ext2.txt    2004-04-23 22:37:48.000000000 -0400
 @@ -35,6 +35,22 @@
  
  sb=n                          Use alternate superblock at this location.
@@ -35,23 +35,23 @@ Index: linux-2.4.19.SuSE/Documentation/filesystems/ext2.txt
  grpquota,noquota,quota,usrquota       Quota options are silently ignored by ext2.
  
  
-Index: linux-2.4.19.SuSE/fs/ext3/Makefile
+Index: linux-2.4.19/fs/ext3/Makefile
 ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/Makefile    Sun Nov 16 00:40:59 2003
-+++ linux-2.4.19.SuSE/fs/ext3/Makefile Sun Nov 16 01:27:31 2003
+--- linux-2.4.19.orig/fs/ext3/Makefile 2004-04-23 18:26:27.000000000 -0400
++++ linux-2.4.19/fs/ext3/Makefile      2004-04-23 22:38:38.000000000 -0400
 @@ -11,7 +11,7 @@
  
- export-objs := ext3-exports.o
+ export-objs :=        super.o inode.o
  
 -obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
 +obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
-               ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o
+               ioctl.o namei.o super.o symlink.o hash.o
  obj-m    := $(O_TARGET)
  
-Index: linux-2.4.19.SuSE/fs/ext3/inode.c
+Index: linux-2.4.19/fs/ext3/inode.c
 ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/inode.c     Sun Nov 16 01:26:04 2003
-+++ linux-2.4.19.SuSE/fs/ext3/inode.c  Sun Nov 16 01:27:31 2003
+--- linux-2.4.19.orig/fs/ext3/inode.c  2004-04-23 22:37:42.000000000 -0400
++++ linux-2.4.19/fs/ext3/inode.c       2004-04-23 22:37:48.000000000 -0400
 @@ -34,6 +34,7 @@
  #include <linux/highuid.h>
  #include <linux/quotaops.h>
@@ -70,10 +70,10 @@ Index: linux-2.4.19.SuSE/fs/ext3/inode.c
        if(ext3_get_inode_loc(inode, &iloc))
                goto bad_inode;
        bh = iloc.bh;
-Index: linux-2.4.19.SuSE/fs/ext3/iopen.c
+Index: linux-2.4.19/fs/ext3/iopen.c
 ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/iopen.c     Sun Nov 16 01:27:31 2003
-+++ linux-2.4.19.SuSE/fs/ext3/iopen.c  Sun Nov 16 01:27:31 2003
+--- linux-2.4.19.orig/fs/ext3/iopen.c  2003-01-30 05:24:37.000000000 -0500
++++ linux-2.4.19/fs/ext3/iopen.c       2004-04-23 22:37:48.000000000 -0400
 @@ -0,0 +1,258 @@
 +/*
 + * linux/fs/ext3/iopen.c
@@ -333,10 +333,10 @@ Index: linux-2.4.19.SuSE/fs/ext3/iopen.c
 +
 +      return 1;
 +}
-Index: linux-2.4.19.SuSE/fs/ext3/iopen.h
+Index: linux-2.4.19/fs/ext3/iopen.h
 ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/iopen.h     Sun Nov 16 01:27:31 2003
-+++ linux-2.4.19.SuSE/fs/ext3/iopen.h  Sun Nov 16 01:27:31 2003
+--- linux-2.4.19.orig/fs/ext3/iopen.h  2003-01-30 05:24:37.000000000 -0500
++++ linux-2.4.19/fs/ext3/iopen.h       2004-04-23 22:37:48.000000000 -0400
 @@ -0,0 +1,15 @@
 +/*
 + * iopen.h
@@ -353,10 +353,10 @@ Index: linux-2.4.19.SuSE/fs/ext3/iopen.h
 +extern int ext3_iopen_get_inode(struct inode *inode);
 +extern struct dentry *iopen_connect_dentry(struct dentry *de,
 +                                         struct inode *inode);
-Index: linux-2.4.19.SuSE/fs/ext3/namei.c
+Index: linux-2.4.19/fs/ext3/namei.c
 ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/namei.c     Sun Nov 16 01:23:20 2003
-+++ linux-2.4.19.SuSE/fs/ext3/namei.c  Sun Nov 16 01:27:31 2003
+--- linux-2.4.19.orig/fs/ext3/namei.c  2004-04-23 22:37:42.000000000 -0400
++++ linux-2.4.19/fs/ext3/namei.c       2004-04-23 22:37:48.000000000 -0400
 @@ -36,7 +36,7 @@
  #include <linux/string.h>
  #include <linux/locks.h>
@@ -366,7 +366,7 @@ Index: linux-2.4.19.SuSE/fs/ext3/namei.c
  
  /*
   * define how far ahead to read directories while searching them.
-@@ -922,10 +922,14 @@
+@@ -928,10 +928,14 @@
        struct inode * inode;
        struct ext3_dir_entry_2 * de;
        struct buffer_head * bh;
@@ -381,7 +381,7 @@ Index: linux-2.4.19.SuSE/fs/ext3/namei.c
        bh = ext3_find_entry(dentry, &de);
        inode = NULL;
        if (bh) {
-@@ -943,7 +948,28 @@
+@@ -943,7 +947,28 @@
                        return ERR_PTR(-EACCES);
                }
        }
@@ -411,11 +411,11 @@ Index: linux-2.4.19.SuSE/fs/ext3/namei.c
        return NULL;
  }
  
-Index: linux-2.4.19.SuSE/fs/ext3/super.c
+Index: linux-2.4.19/fs/ext3/super.c
 ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/super.c     Sun Nov 16 01:19:22 2003
-+++ linux-2.4.19.SuSE/fs/ext3/super.c  Sun Nov 16 01:27:31 2003
-@@ -864,6 +864,18 @@
+--- linux-2.4.19.orig/fs/ext3/super.c  2004-04-23 22:37:42.000000000 -0400
++++ linux-2.4.19/fs/ext3/super.c       2004-04-23 22:37:48.000000000 -0400
+@@ -861,6 +861,18 @@
                         || !strcmp (this_char, "quota")
                         || !strcmp (this_char, "usrquota"))
                        /* Don't do anything ;-) */ ;
@@ -434,10 +434,10 @@ Index: linux-2.4.19.SuSE/fs/ext3/super.c
                else if (!strcmp (this_char, "journal")) {
                        /* @@@ FIXME */
                        /* Eventually we will want to be able to create
-Index: linux-2.4.19.SuSE/include/linux/ext3_fs.h
+Index: linux-2.4.19/include/linux/ext3_fs.h
 ===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/ext3_fs.h     Sun Nov 16 01:25:42 2003
-+++ linux-2.4.19.SuSE/include/linux/ext3_fs.h  Sun Nov 16 01:30:05 2003
+--- linux-2.4.19.orig/include/linux/ext3_fs.h  2004-04-23 22:37:42.000000000 -0400
++++ linux-2.4.19/include/linux/ext3_fs.h       2004-04-23 22:37:48.000000000 -0400
 @@ -324,6 +324,8 @@
  #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
  #define EXT3_MOUNT_POSIX_ACL          0x8000  /* POSIX Access Control Lists */
diff --git a/lustre/kernel_patches/patches/iopen-2.4.21-sles8sp3.patch b/lustre/kernel_patches/patches/iopen-2.4.21-sles8sp3.patch
new file mode 100644 (file)
index 0000000..9258544
--- /dev/null
@@ -0,0 +1,449 @@
+ Documentation/filesystems/ext2.txt |   16 ++
+ fs/ext3/Makefile                   |    2 
+ fs/ext3/inode.c                    |    4 
+ fs/ext3/iopen.c                    |  259 +++++++++++++++++++++++++++++++++++++
+ fs/ext3/iopen.h                    |   13 +
+ fs/ext3/namei.c                    |   13 +
+ fs/ext3/super.c                    |   11 +
+ include/linux/ext3_fs.h            |    2 
+ 8 files changed, 318 insertions(+), 2 deletions(-)
+
+Index: linux-2.4.21/Documentation/filesystems/ext2.txt
+===================================================================
+--- linux-2.4.21.orig/Documentation/filesystems/ext2.txt       2001-07-11 18:44:45.000000000 -0400
++++ linux-2.4.21/Documentation/filesystems/ext2.txt    2004-04-24 02:46:32.000000000 -0400
+@@ -35,6 +35,22 @@
+ sb=n                          Use alternate superblock at this location.
++iopen                         Makes an invisible pseudo-directory called
++                              __iopen__ available in the root directory
++                              of the filesystem.  Allows open-by-inode-
++                              number.  i.e., inode 3145 can be accessed
++                              via /mntpt/__iopen__/3145
++
++iopen_nopriv                  This option makes the iopen directory be
++                              world-readable.  This may be safer since it
++                              allows daemons to run as an unprivileged user,
++                              however it significantly changes the security
++                              model of a Unix filesystem, since previously
++                              all files under a mode 700 directory were not
++                              generally avilable even if the
++                              permissions on the file itself is
++                              world-readable.
++
+ grpquota,noquota,quota,usrquota       Quota options are silently ignored by ext2.
+Index: linux-2.4.21/fs/ext3/Makefile
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/Makefile 2004-04-24 02:46:18.000000000 -0400
++++ linux-2.4.21/fs/ext3/Makefile      2004-04-24 02:47:02.000000000 -0400
+@@ -11,7 +11,7 @@
+ export-objs := ext3-exports.o
+-obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
++obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+               ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o
+ obj-m    := $(O_TARGET)
+Index: linux-2.4.21/fs/ext3/inode.c
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/inode.c  2004-04-24 02:46:19.000000000 -0400
++++ linux-2.4.21/fs/ext3/inode.c       2004-04-24 02:46:32.000000000 -0400
+@@ -34,6 +34,7 @@
+ #include <linux/highuid.h>
+ #include <linux/quotaops.h>
+ #include <linux/module.h>
++#include "iopen.h"
+ /*
+  * SEARCH_FROM_ZERO forces each block allocation to search from the start
+@@ -2252,6 +2253,9 @@
+       struct buffer_head *bh;
+       int block;
+       
++      if (ext3_iopen_get_inode(inode))
++              return;
++
+       if(ext3_get_inode_loc(inode, &iloc))
+               goto bad_inode;
+       bh = iloc.bh;
+Index: linux-2.4.21/fs/ext3/iopen.c
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/iopen.c  2003-01-30 05:24:37.000000000 -0500
++++ linux-2.4.21/fs/ext3/iopen.c       2004-04-24 02:46:32.000000000 -0400
+@@ -0,0 +1,258 @@
++/*
++ * linux/fs/ext3/iopen.c
++ *
++ * Special support for open by inode number
++ *
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ *
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ *
++ *
++ * Invariants:
++ *   - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias
++ *     for an inode at one time.
++ *   - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry
++ *     aliases on an inode at the same time.
++ *
++ * If we have any connected dentry aliases for an inode, use one of those
++ * in iopen_lookup().  Otherwise, we instantiate a single NFSD_DISCONNECTED
++ * dentry for this inode, which thereafter will be found by the dcache
++ * when looking up this inode number in __iopen__, so we don't return here
++ * until it is gone.
++ *
++ * If we get an inode via a regular name lookup, then we "rename" the
++ * NFSD_DISCONNECTED dentry to the proper name and parent.  This ensures
++ * existing users of the disconnected dentry will continue to use the same
++ * dentry as the connected users, and there will never be both kinds of
++ * dentry aliases at one time.
++ */
++
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/locks.h>
++#include <linux/ext3_jbd.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/smp_lock.h>
++#include "iopen.h"
++
++#ifndef assert
++#define assert(test) J_ASSERT(test)
++#endif
++
++#define IOPEN_NAME_LEN        32
++
++/*
++ * This implements looking up an inode by number.
++ */
++static struct dentry *iopen_lookup(struct inode *dir, struct dentry *dentry)
++{
++      struct inode *inode;
++      unsigned long ino;
++      struct list_head *lp;
++      struct dentry *alternate;
++      char buf[IOPEN_NAME_LEN];
++
++      if (dentry->d_name.len >= IOPEN_NAME_LEN)
++              return ERR_PTR(-ENAMETOOLONG);
++
++      memcpy(buf, dentry->d_name.name, dentry->d_name.len);
++      buf[dentry->d_name.len] = 0;
++
++      if (strcmp(buf, ".") == 0)
++              ino = dir->i_ino;
++      else if (strcmp(buf, "..") == 0)
++              ino = EXT3_ROOT_INO;
++      else
++              ino = simple_strtoul(buf, 0, 0);
++
++      if ((ino != EXT3_ROOT_INO &&
++           //ino != EXT3_ACL_IDX_INO &&
++           //ino != EXT3_ACL_DATA_INO &&
++           ino < EXT3_FIRST_INO(dir->i_sb)) ||
++          ino > le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count))
++              return ERR_PTR(-ENOENT);
++
++      inode = iget(dir->i_sb, ino);
++      if (!inode)
++              return ERR_PTR(-EACCES);
++      if (is_bad_inode(inode)) {
++              iput(inode);
++              return ERR_PTR(-ENOENT);
++      }
++
++      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
++      assert(list_empty(&dentry->d_hash));            /* d_rehash */
++
++      /* preferrably return a connected dentry */
++      spin_lock(&dcache_lock);
++      list_for_each(lp, &inode->i_dentry) {
++              alternate = list_entry(lp, struct dentry, d_alias);
++              assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED));
++      }
++
++      if (!list_empty(&inode->i_dentry)) {
++              alternate = list_entry(inode->i_dentry.next,
++                                     struct dentry, d_alias);
++              dget_locked(alternate);
++              alternate->d_vfs_flags |= DCACHE_REFERENCED;
++              iput(inode);
++              spin_unlock(&dcache_lock);
++              return alternate;
++      }
++      dentry->d_flags |= DCACHE_NFSD_DISCONNECTED;
++
++      /* d_add(), but don't drop dcache_lock before adding dentry to inode */
++      list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
++      dentry->d_inode = inode;
++
++      __d_rehash(dentry, 0);                          /* d_rehash */
++      spin_unlock(&dcache_lock);
++
++      return NULL;
++}
++
++#define do_switch(x,y) do { \
++      __typeof__ (x) __tmp = x; \
++      x = y; y = __tmp; } while (0)
++
++static inline void switch_names(struct dentry *dentry, struct dentry *target)
++{
++      const unsigned char *old_name, *new_name;
++
++      memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN);
++      old_name = target->d_name.name;
++      new_name = dentry->d_name.name;
++      if (old_name == target->d_iname)
++              old_name = dentry->d_iname;
++      if (new_name == dentry->d_iname)
++              new_name = target->d_iname;
++      target->d_name.name = new_name;
++      dentry->d_name.name = old_name;
++}
++
++/* This function is spliced into ext3_lookup and does the move of a
++ * disconnected dentry (if it exists) to a connected dentry.
++ * Caller must hold dcache_lock.
++ */
++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode)
++{
++      struct dentry *tmp, *goal = NULL;
++      struct list_head *lp;
++
++      /* preferrably return a connected dentry */
++      list_for_each(lp, &inode->i_dentry) {
++              tmp = list_entry(lp, struct dentry, d_alias);
++              if (tmp->d_flags & DCACHE_NFSD_DISCONNECTED) {
++                      assert(tmp->d_alias.next == &inode->i_dentry);
++                      assert(tmp->d_alias.prev == &inode->i_dentry);
++                      goal = tmp;
++                      dget_locked(goal);
++                      break;
++              }
++      }
++
++      if (!goal)
++              return NULL;
++
++      /* Move the goal to the de hash queue - like d_move() */
++      goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED;
++      list_del_init(&goal->d_hash);
++
++      list_del(&goal->d_child);
++      list_del(&de->d_child);
++
++      /* Switch the parents and the names.. */
++      switch_names(goal, de);
++      do_switch(goal->d_parent, de->d_parent);
++      do_switch(goal->d_name.len, de->d_name.len);
++      do_switch(goal->d_name.hash, de->d_name.hash);
++
++      /* And add them back to the (new) parent lists */
++      list_add(&goal->d_child, &goal->d_parent->d_subdirs);
++      list_add(&de->d_child, &de->d_parent->d_subdirs);
++      __d_rehash(goal, 0);
++
++      return goal;
++}
++
++/*
++ * These are the special structures for the iopen pseudo directory.
++ */
++
++static struct inode_operations iopen_inode_operations = {
++      lookup:         iopen_lookup,           /* BKL held */
++};
++
++static struct file_operations iopen_file_operations = {
++      read:           generic_read_dir,
++};
++
++static int match_dentry(struct dentry *dentry, const char *name)
++{
++      int     len;
++
++      len = strlen(name);
++      if (dentry->d_name.len != len)
++              return 0;
++      if (strncmp(dentry->d_name.name, name, len))
++              return 0;
++      return 1;
++}
++
++/*
++ * This function is spliced into ext3_lookup and returns 1 the file
++ * name is __iopen__ and dentry has been filled in appropriately.
++ */
++int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry)
++{
++      struct inode *inode;
++
++      if (dir->i_ino != EXT3_ROOT_INO ||
++          !test_opt(dir->i_sb, IOPEN) ||
++          !match_dentry(dentry, "__iopen__"))
++              return 0;
++
++      inode = iget(dir->i_sb, EXT3_BAD_INO);
++
++      if (!inode)
++              return 0;
++      d_add(dentry, inode);
++      return 1;
++}
++
++/*
++ * This function is spliced into read_inode; it returns 1 if inode
++ * number is the one for /__iopen__, in which case the inode is filled
++ * in appropriately.  Otherwise, this fuction returns 0.
++ */
++int ext3_iopen_get_inode(struct inode *inode)
++{
++      if (inode->i_ino != EXT3_BAD_INO)
++              return 0;
++
++      inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
++      if (test_opt(inode->i_sb, IOPEN_NOPRIV))
++              inode->i_mode |= 0777;
++      inode->i_uid = 0;
++      inode->i_gid = 0;
++      inode->i_nlink = 1;
++      inode->i_size = 4096;
++      inode->i_atime = CURRENT_TIME;
++      inode->i_ctime = CURRENT_TIME;
++      inode->i_mtime = CURRENT_TIME;
++      inode->u.ext3_i.i_dtime = 0;
++      inode->i_blksize = PAGE_SIZE;   /* This is the optimal IO size
++                                       * (for stat), not the fs block
++                                       * size */
++      inode->i_blocks = 0;
++      inode->i_version = 1;
++      inode->i_generation = 0;
++
++      inode->i_op = &iopen_inode_operations;
++      inode->i_fop = &iopen_file_operations;
++      inode->i_mapping->a_ops = 0;
++
++      return 1;
++}
+Index: linux-2.4.21/fs/ext3/iopen.h
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/iopen.h  2003-01-30 05:24:37.000000000 -0500
++++ linux-2.4.21/fs/ext3/iopen.h       2004-04-24 02:46:32.000000000 -0400
+@@ -0,0 +1,15 @@
++/*
++ * iopen.h
++ *
++ * Special support for opening files by inode number.
++ *
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ *
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ */
++
++extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
++extern int ext3_iopen_get_inode(struct inode *inode);
++extern struct dentry *iopen_connect_dentry(struct dentry *de,
++                                         struct inode *inode);
+Index: linux-2.4.21/fs/ext3/namei.c
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/namei.c  2004-04-24 02:46:19.000000000 -0400
++++ linux-2.4.21/fs/ext3/namei.c       2004-04-24 02:46:32.000000000 -0400
+@@ -36,7 +36,7 @@
+ #include <linux/string.h>
+ #include <linux/locks.h>
+ #include <linux/quotaops.h>
+-
++#include "iopen.h"
+ /*
+  * define how far ahead to read directories while searching them.
+@@ -928,10 +928,14 @@
+       struct inode * inode;
+       struct ext3_dir_entry_2 * de;
+       struct buffer_head * bh;
++      struct dentry *alternate = NULL;
+       if (dentry->d_name.len > EXT3_NAME_LEN)
+               return ERR_PTR(-ENAMETOOLONG);
++      if (ext3_check_for_iopen(dir, dentry))
++              return NULL;
++
+       bh = ext3_find_entry(dentry, &de);
+       inode = NULL;
+       if (bh) {
+@@ -943,7 +947,28 @@
+                       return ERR_PTR(-EACCES);
+               }
+       }
+-      d_add(dentry, inode);
++
++      /* verify this dentry is really new */
++      assert(!dentry->d_inode);
++      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
++      assert(list_empty(&dentry->d_hash));            /* d_rehash */
++      assert(list_empty(&dentry->d_subdirs));
++
++      spin_lock(&dcache_lock);
++      if (inode && (alternate = iopen_connect_dentry(dentry, inode))) {
++              spin_unlock(&dcache_lock);
++              iput(inode);
++              return alternate;
++      }
++
++      /* d_add(), but don't drop dcache_lock before adding dentry to inode */
++      if (inode)                                      /* d_instantiate */
++              list_add(&dentry->d_alias, &inode->i_dentry);
++      dentry->d_inode = inode;
++
++      __d_rehash(dentry, 0);                          /* d_rehash */
++      spin_unlock(&dcache_lock);
++
+       return NULL;
+ }
+Index: linux-2.4.21/fs/ext3/super.c
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/super.c  2004-04-24 02:46:19.000000000 -0400
++++ linux-2.4.21/fs/ext3/super.c       2004-04-24 02:46:32.000000000 -0400
+@@ -869,6 +869,18 @@
+                        || !strcmp (this_char, "quota")
+                        || !strcmp (this_char, "usrquota"))
+                       /* Don't do anything ;-) */ ;
++              else if (!strcmp (this_char, "iopen")) {
++                      set_opt (sbi->s_mount_opt, IOPEN);
++                      clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++              }
++              else if (!strcmp (this_char, "noiopen")) {
++                      clear_opt (sbi->s_mount_opt, IOPEN);
++                      clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++              }
++              else if (!strcmp (this_char, "iopen_nopriv")) {
++                      set_opt (sbi->s_mount_opt, IOPEN);
++                      set_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++              }
+               else if (!strcmp (this_char, "journal")) {
+                       /* @@@ FIXME */
+                       /* Eventually we will want to be able to create
+Index: linux-2.4.21/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.4.21.orig/include/linux/ext3_fs.h  2004-04-24 02:46:19.000000000 -0400
++++ linux-2.4.21/include/linux/ext3_fs.h       2004-04-24 02:46:32.000000000 -0400
+@@ -324,6 +324,8 @@
+ #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
+ #define EXT3_MOUNT_POSIX_ACL          0x8000  /* POSIX Access Control Lists */
+ #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
++#define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
diff --git a/lustre/kernel_patches/patches/kernel_text_address-2.4.21-sles8sp3.patch b/lustre/kernel_patches/patches/kernel_text_address-2.4.21-sles8sp3.patch
new file mode 100644 (file)
index 0000000..0541a48
--- /dev/null
@@ -0,0 +1,115 @@
+Index: linux-2.4.21/arch/um/kernel/Makefile
+===================================================================
+--- linux-2.4.21.orig/arch/um/kernel/Makefile  2004-04-24 02:37:58.000000000 -0400
++++ linux-2.4.21/arch/um/kernel/Makefile       2004-04-24 02:51:03.000000000 -0400
+@@ -37,7 +37,8 @@
+ export-objs-$(CONFIG_GPROF) += gprof_syms.o
+ export-objs-$(CONFIG_GCOV) += gmon_syms.o
+-export-objs = ksyms.o process_kern.o signal_kern.o user_syms.o $(export-objs-y)
++export-objs = ksyms.o process_kern.o signal_kern.o user_syms.o sysrq.o \
++      $(export-objs-y)
+ CFLAGS_user_syms.o = -D__AUTOCONF_INCLUDED__ $(DMODULES-y) $(DMODVERSIONS-y) \
+       -I/usr/include -I../include
+Index: linux-2.4.21/arch/um/kernel/sysrq.c
+===================================================================
+--- linux-2.4.21.orig/arch/um/kernel/sysrq.c   2004-04-24 02:37:58.000000000 -0400
++++ linux-2.4.21/arch/um/kernel/sysrq.c        2004-04-24 02:51:03.000000000 -0400
+@@ -86,6 +86,37 @@
+       show_trace((unsigned long *)esp);
+ }
++#ifdef CONFIG_MODULES
++extern struct module *module_list;
++extern struct module kernel_module;
++#endif
++
++int is_kernel_text_address(unsigned long addr)
++{
++      int retval = 0;
++#ifdef CONFIG_MODULES
++      struct module *mod;
++#endif
++      if (addr >= (unsigned long) &_stext &&
++          addr <= (unsigned long) &_etext)
++              return 1;
++
++#ifdef CONFIG_MODULES
++      for (mod = module_list; mod != &kernel_module; mod = mod->next) {
++              /* mod_bound tests for addr being inside the vmalloc'ed
++               * module area. Of course it'd be better to test only
++               * for the .text subset... */
++              if (mod_bound(addr, 0, mod)) {
++                      retval = 1;
++                      break;
++              }
++      }
++#endif
++      return retval;
++}
++
++EXPORT_SYMBOL(is_kernel_text_address);
++
+ /*
+  * Overrides for Emacs so that we follow Linus's tabbing style.
+  * Emacs will notice this stuff at the end of the file and automatically
+Index: linux-2.4.21/arch/i386/kernel/Makefile
+===================================================================
+--- linux-2.4.21.orig/arch/i386/kernel/Makefile        2004-04-24 02:39:05.000000000 -0400
++++ linux-2.4.21/arch/i386/kernel/Makefile     2004-04-24 02:51:26.000000000 -0400
+@@ -20,7 +20,7 @@
+ O_TARGET := kernel.o
+-export-objs     := mca.o mtrr.o msr.o cpuid.o microcode.o i386_ksyms.o time.o traps.o dr_alloc.o
++export-objs     := mca.o mtrr.o msr.o cpuid.o microcode.o i386_ksyms.o time.o traps.o dr_alloc.o traps.o
+ ifdef CONFIG_X86_SPEEDSTEP_ICH
+ export-objs   += speedstep-lib.o
+Index: linux-2.4.21/arch/i386/kernel/traps.c
+===================================================================
+--- linux-2.4.21.orig/arch/i386/kernel/traps.c 2004-04-24 02:42:58.000000000 -0400
++++ linux-2.4.21/arch/i386/kernel/traps.c      2004-04-24 02:51:03.000000000 -0400
+@@ -1339,3 +1339,41 @@
+       cobalt_init();
+ #endif
+ }
++
++#ifdef CONFIG_MODULES
++extern struct module *module_list;
++extern struct module kernel_module;
++#endif
++                                                                                
++int is_kernel_text_address(unsigned long addr)
++{
++       int retval = 0;
++#ifdef CONFIG_MODULES
++       struct module *mod;
++#endif
++       if (addr >= (unsigned long) &_stext &&
++           addr <= (unsigned long) &_etext);
++               return 1;
++                                                                                
++#ifdef CONFIG_MODULES
++       for (mod = module_list; mod != &kernel_module; mod = mod->next) {
++               /* mod_bound tests for addr being inside the vmalloc'ed
++                * module area. Of course it'd be better to test only
++                * for the .text subset... */
++               if (mod_bound(addr, 0, mod)) {
++                       retval = 1;
++                       break;
++               }
++       }
++#endif
++                                                                                
++       return retval;
++}
++
++int lookup_symbol(unsigned long address, char *buf, int buflen)
++{
++      return -ENOSYS;
++}
++
++EXPORT_SYMBOL_GPL(is_kernel_text_address);
++EXPORT_SYMBOL_GPL(lookup_symbol);
index 4cf7592..e694068 100644 (file)
@@ -1881,7 +1881,7 @@ Index: linux-2.4.19-pre1/fs/ext2/xattr.c
 +              } else if (old_bh && header == HDR(old_bh)) {
 +                      /* Keep this block. */
 +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +              } else {
 +                      /* We need to allocate a new block */
 +                      int force = EXT2_I(inode)->i_file_acl != 0;
@@ -1900,7 +1900,7 @@ Index: linux-2.4.19-pre1/fs/ext2/xattr.c
 +                      memcpy(new_bh->b_data, header, new_bh->b_size);
 +                      mark_buffer_uptodate(new_bh, 1);
 +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +                      
 +                      ext2_xattr_update_super_block(sb);
 +              }
@@ -3563,7 +3563,7 @@ Index: linux-2.4.19-pre1/fs/ext3/xattr.c
 +              } else if (old_bh && header == HDR(old_bh)) {
 +                      /* Keep this block. */
 +                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
 +              } else {
 +                      /* We need to allocate a new block */
 +                      int force = EXT3_I(inode)->i_file_acl != 0;
@@ -3588,7 +3588,7 @@ Index: linux-2.4.19-pre1/fs/ext3/xattr.c
 +                      memcpy(new_bh->b_data, header, new_bh->b_size);
 +                      mark_buffer_uptodate(new_bh, 1);
 +                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
 +                      
 +                      ext3_xattr_update_super_block(handle, sb);
 +              }
index 26d3af9..79c48fb 100644 (file)
@@ -2,10 +2,10 @@
  ext3/ext3-exports.c |   13 +++++++++++++
  2 files changed, 14 insertions(+), 2 deletions(-)
 
-Index: linux-2.4.19.SuSE/fs/ext2/super.c
+Index: linux-2.4.19/fs/ext2/super.c
 ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext2/super.c     Mon Jan 27 05:08:00 2003
-+++ linux-2.4.19.SuSE/fs/ext2/super.c  Sun Nov 16 00:40:59 2003
+--- linux-2.4.19.orig/fs/ext2/super.c  2004-04-23 17:53:55.000000000 -0400
++++ linux-2.4.19/fs/ext2/super.c       2004-04-23 22:30:41.000000000 -0400
 @@ -70,6 +70,7 @@
  {
        va_list args;
@@ -14,11 +14,11 @@ Index: linux-2.4.19.SuSE/fs/ext2/super.c
        if (!(sb->s_flags & MS_RDONLY)) {
                sb->u.ext2_sb.s_mount_state |= EXT2_ERROR_FS;
                sb->u.ext2_sb.s_es->s_state =
-Index: linux-2.4.19.SuSE/fs/ext3/super.c
+Index: linux-2.4.19/fs/ext3/super.c
 ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/super.c     Mon Jan 27 05:08:00 2003
-+++ linux-2.4.19.SuSE/fs/ext3/super.c  Sun Nov 16 00:40:59 2003
-@@ -1822,8 +1828,6 @@
+--- linux-2.4.19.orig/fs/ext3/super.c  2004-04-23 18:26:27.000000000 -0400
++++ linux-2.4.19/fs/ext3/super.c       2004-04-23 22:30:41.000000000 -0400
+@@ -1827,8 +1827,6 @@
        exit_ext3_xattr();
  }
  
@@ -27,10 +27,10 @@ Index: linux-2.4.19.SuSE/fs/ext3/super.c
  
  MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
  MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
-Index: linux-2.4.19.SuSE/fs/ext3/ext3-exports.c
+Index: linux-2.4.19/fs/ext3/ext3-exports.c
 ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/ext3-exports.c      Sun Nov 16 00:40:58 2003
-+++ linux-2.4.19.SuSE/fs/ext3/ext3-exports.c   Sun Nov 16 00:40:59 2003
+--- linux-2.4.19.orig/fs/ext3/ext3-exports.c   2003-01-30 05:24:37.000000000 -0500
++++ linux-2.4.19/fs/ext3/ext3-exports.c        2004-04-23 22:30:41.000000000 -0400
 @@ -0,0 +1,13 @@
 +#include <linux/config.h>
 +#include <linux/module.h>
@@ -45,3 +45,16 @@ Index: linux-2.4.19.SuSE/fs/ext3/ext3-exports.c
 +EXPORT_SYMBOL(ext3_xattr_get);
 +EXPORT_SYMBOL(ext3_xattr_list);
 +EXPORT_SYMBOL(ext3_xattr_set);
+Index: linux-2.4.19/fs/ext3/Makefile
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/Makefile 2004-04-23 18:26:27.000000000 -0400
++++ linux-2.4.19/fs/ext3/Makefile      2004-04-23 23:05:20.000000000 -0400
+@@ -9,7 +9,7 @@
+ O_TARGET := ext3.o
+-export-objs :=        super.o inode.o
++export-objs :=        ext3-exports.c
+ obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+               ioctl.o namei.o super.o symlink.o hash.o
index 811c40f..5bdfaff 100644 (file)
 +              } else if (old_bh && header == HDR(old_bh)) {
 +                      /* Keep this block. */
 +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +              } else {
 +                      /* We need to allocate a new block */
 +                      int force = EXT2_I(inode)->i_file_acl != 0;
 +                      memcpy(new_bh->b_data, header, new_bh->b_size);
 +                      mark_buffer_uptodate(new_bh, 1);
 +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +                      
 +                      ext2_xattr_update_super_block(sb);
 +              }
 +              } else if (old_bh && header == HDR(old_bh)) {
 +                      /* Keep this block. */
 +                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
 +              } else {
 +                      /* We need to allocate a new block */
 +                      int force = EXT3_I(inode)->i_file_acl != 0;
 +                      memcpy(new_bh->b_data, header, new_bh->b_size);
 +                      mark_buffer_uptodate(new_bh, 1);
 +                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
 +                      
 +                      ext3_xattr_update_super_block(handle, sb);
 +              }
index c9fb126..f1365d7 100644 (file)
 +              } else if (old_bh && header == HDR(old_bh)) {
 +                      /* Keep this block. */
 +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +              } else {
 +                      /* We need to allocate a new block */
 +                      int force = EXT2_I(inode)->i_file_acl != 0;
 +                      memcpy(new_bh->b_data, header, new_bh->b_size);
 +                      mark_buffer_uptodate(new_bh, 1);
 +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +                      
 +                      ext2_xattr_update_super_block(sb);
 +              }
 +              } else if (old_bh && header == HDR(old_bh)) {
 +                      /* Keep this block. */
 +                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
 +              } else {
 +                      /* We need to allocate a new block */
 +                      int force = EXT3_I(inode)->i_file_acl != 0;
 +                      memcpy(new_bh->b_data, header, new_bh->b_size);
 +                      mark_buffer_uptodate(new_bh, 1);
 +                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
 +                      
 +                      ext3_xattr_update_super_block(handle, sb);
 +              }
index 2e4750b..f078ebe 100644 (file)
 +              } else if (old_bh && header == HDR(old_bh)) {
 +                      /* Keep this block. */
 +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +              } else {
 +                      /* We need to allocate a new block */
 +                      int force = EXT2_I(inode)->i_file_acl != 0;
 +                      memcpy(new_bh->b_data, header, new_bh->b_size);
 +                      mark_buffer_uptodate(new_bh, 1);
 +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +                      
 +                      ext2_xattr_update_super_block(sb);
 +              }
 +              } else if (old_bh && header == HDR(old_bh)) {
 +                      /* Keep this block. */
 +                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
 +              } else {
 +                      /* We need to allocate a new block */
 +                      int force = EXT3_I(inode)->i_file_acl != 0;
 +                      memcpy(new_bh->b_data, header, new_bh->b_size);
 +                      mark_buffer_uptodate(new_bh, 1);
 +                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
 +                      
 +                      ext3_xattr_update_super_block(handle, sb);
 +              }
index e18ac9d..8e198f8 100644 (file)
@@ -1315,7 +1315,7 @@ Index: linux-2.4.21-chaos/fs/ext2/xattr.c
 +              } else if (old_bh && header == HDR(old_bh)) {
 +                      /* Keep this block. */
 +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +              } else {
 +                      /* We need to allocate a new block */
 +                      int force = EXT2_I(inode)->i_file_acl != 0;
@@ -1334,7 +1334,7 @@ Index: linux-2.4.21-chaos/fs/ext2/xattr.c
 +                      memcpy(new_bh->b_data, header, new_bh->b_size);
 +                      mark_buffer_uptodate(new_bh, 1);
 +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +                      
 +                      ext2_xattr_update_super_block(sb);
 +              }
index 22dad3c..da8c15c 100644 (file)
 +              } else if (old_bh && header == HDR(old_bh)) {
 +                      /* Keep this block. */
 +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +              } else {
 +                      /* We need to allocate a new block */
 +                      int force = EXT2_I(inode)->i_file_acl != 0;
 +                      memcpy(new_bh->b_data, header, new_bh->b_size);
 +                      mark_buffer_uptodate(new_bh, 1);
 +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +                      
 +                      ext2_xattr_update_super_block(sb);
 +              }
 +              } else if (old_bh && header == HDR(old_bh)) {
 +                      /* Keep this block. */
 +                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
 +              } else {
 +                      /* We need to allocate a new block */
 +                      int force = EXT3_I(inode)->i_file_acl != 0;
 +                      memcpy(new_bh->b_data, header, new_bh->b_size);
 +                      mark_buffer_uptodate(new_bh, 1);
 +                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
 +                      
 +                      ext3_xattr_update_super_block(handle, sb);
 +              }
index b63cc2e..fd5f0c2 100644 (file)
@@ -1738,7 +1738,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/xattr.c
 +              } else if (old_bh && header == HDR(old_bh)) {
 +                      /* Keep this block. */
 +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +              } else {
 +                      /* We need to allocate a new block */
 +                      int force = EXT2_I(inode)->i_file_acl != 0;
@@ -1757,7 +1757,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/xattr.c
 +                      memcpy(new_bh->b_data, header, new_bh->b_size);
 +                      mark_buffer_uptodate(new_bh, 1);
 +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +                      
 +                      ext2_xattr_update_super_block(sb);
 +              }
@@ -3426,7 +3426,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/xattr.c
 +              } else if (old_bh && header == HDR(old_bh)) {
 +                      /* Keep this block. */
 +                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
 +              } else {
 +                      /* We need to allocate a new block */
 +                      int force = EXT3_I(inode)->i_file_acl != 0;
@@ -3451,7 +3451,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/xattr.c
 +                      memcpy(new_bh->b_data, header, new_bh->b_size);
 +                      mark_buffer_uptodate(new_bh, 1);
 +                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
 +                      
 +                      ext3_xattr_update_super_block(handle, sb);
 +              }
index 0109dd9..4cf5c2c 100644 (file)
@@ -1738,7 +1738,7 @@ Index: linux-2.4.24-vanilla/fs/ext2/xattr.c
 +              } else if (old_bh && header == HDR(old_bh)) {
 +                      /* Keep this block. */
 +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +              } else {
 +                      /* We need to allocate a new block */
 +                      int force = EXT2_I(inode)->i_file_acl != 0;
@@ -1757,7 +1757,7 @@ Index: linux-2.4.24-vanilla/fs/ext2/xattr.c
 +                      memcpy(new_bh->b_data, header, new_bh->b_size);
 +                      mark_buffer_uptodate(new_bh, 1);
 +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
 +                      
 +                      ext2_xattr_update_super_block(sb);
 +              }
@@ -3426,7 +3426,7 @@ Index: linux-2.4.24-vanilla/fs/ext3/xattr.c
 +              } else if (old_bh && header == HDR(old_bh)) {
 +                      /* Keep this block. */
 +                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
 +              } else {
 +                      /* We need to allocate a new block */
 +                      int force = EXT3_I(inode)->i_file_acl != 0;
@@ -3451,7 +3451,7 @@ Index: linux-2.4.24-vanilla/fs/ext3/xattr.c
 +                      memcpy(new_bh->b_data, header, new_bh->b_size);
 +                      mark_buffer_uptodate(new_bh, 1);
 +                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
 +                      
 +                      ext3_xattr_update_super_block(handle, sb);
 +              }
index 7ebb838..77c5531 100644 (file)
@@ -7,6 +7,6 @@
 --- /dev/null  Fri Aug 30 17:31:37 2002
 +++ linux-2.4.18-18.8.0-l12-braam/include/linux/lustre_version.h       Thu Feb 13 07:58:33 2003
 @@ -0,0 +1 @@
-+#define LUSTRE_KERNEL_VERSION 34
++#define LUSTRE_KERNEL_VERSION 35
 
 _
diff --git a/lustre/kernel_patches/patches/mkdep-revert-rh-2.4.patch b/lustre/kernel_patches/patches/mkdep-revert-rh-2.4.patch
new file mode 100644 (file)
index 0000000..5cc34b8
--- /dev/null
@@ -0,0 +1,50 @@
+Index: linux-2.4.20-30.9/scripts/mkdep.c
+===================================================================
+--- linux-2.4.20-30.9.orig/scripts/mkdep.c     2004-02-19 19:40:51.000000000 -0500
++++ linux-2.4.20-30.9/scripts/mkdep.c  2004-04-28 17:24:54.000000000 -0400
+@@ -48,8 +48,6 @@
+ char __depname[512] = "\n\t@touch ";
+ #define depname (__depname+9)
+ int hasdep;
+-char cwd[PATH_MAX];
+-int lcwd;
+ struct path_struct {
+       int len;
+@@ -204,22 +202,8 @@
+               memcpy(path->buffer+path->len, name, len);
+               path->buffer[path->len+len] = '\0';
+               if (access(path->buffer, F_OK) == 0) {
+-                      int l = lcwd + strlen(path->buffer);
+-                      char name2[l+2], *p;
+-                      if (path->buffer[0] == '/') {
+-                              memcpy(name2, path->buffer, l+1);
+-                      }
+-                      else {
+-                              memcpy(name2, cwd, lcwd);
+-                              name2[lcwd] = '/';
+-                              memcpy(name2+lcwd+1, path->buffer, path->len+len+1);
+-                      }
+-                      while ((p = strstr(name2, "/../"))) {
+-                              *p = '\0';
+-                              strcpy(strrchr(name2, '/'), p+3);
+-                      }
+                       do_depname();
+-                      printf(" \\\n   %s", name2);
++                      printf(" \\\n   %s", path->buffer);
+                       return;
+               }
+       }
+@@ -601,12 +585,6 @@
+               return 1;
+       }
+-      if (!getcwd(cwd, sizeof(cwd))) {
+-              fprintf(stderr, "mkdep: getcwd() failed %m\n");
+-              return 1;
+-      }
+-      lcwd = strlen(cwd);
+-
+       add_path(".");          /* for #include "..." */
+       while (++argv, --argc > 0) {
diff --git a/lustre/kernel_patches/patches/tcp-zero-copy-2.4.21-sles8sp3.patch b/lustre/kernel_patches/patches/tcp-zero-copy-2.4.21-sles8sp3.patch
new file mode 100644 (file)
index 0000000..a7859bd
--- /dev/null
@@ -0,0 +1,458 @@
+Index: linux-2.4.21/include/linux/skbuff.h
+===================================================================
+--- linux-2.4.21.orig/include/linux/skbuff.h   2004-04-24 02:38:40.000000000 -0400
++++ linux-2.4.21/include/linux/skbuff.h        2004-04-24 02:47:46.000000000 -0400
+@@ -116,6 +116,30 @@
+       __u16 size;
+ };
++/* Support for callback when skb data has been released */
++typedef struct zccd                           /* Zero Copy Callback Descriptor */
++{                                             /* (embed as first member of custom struct) */
++      atomic_t        zccd_count;             /* reference count */
++      void           (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */
++} zccd_t;
++
++static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *))
++{
++      atomic_set (&d->zccd_count, 1);
++      d->zccd_destructor = callback;
++}
++
++static inline void zccd_get (zccd_t *d)               /* take a reference */
++{
++      atomic_inc (&d->zccd_count);
++}
++
++static inline void zccd_put (zccd_t *d)               /* release a reference */
++{
++      if (atomic_dec_and_test (&d->zccd_count))
++              (d->zccd_destructor)(d);
++}
++
+ /* This data is invariant across clones and lives at
+  * the end of the header data, ie. at skb->end.
+  */
+@@ -123,6 +147,12 @@
+       atomic_t        dataref;
+       unsigned int    nr_frags;
+       struct sk_buff  *frag_list;
++      zccd_t          *zccd;                  /* zero copy descriptor */
++      zccd_t          *zccd2;                 /* 2nd zero copy descriptor */
++      /* NB we expect zero-copy data to be at least 1 packet, so
++       * having 2 zccds means we don't unneccessarily split the packet
++       * where consecutive zero-copy sends abutt.
++       */
+       skb_frag_t      frags[MAX_SKB_FRAGS];
+ };
+Index: linux-2.4.21/include/net/tcp.h
+===================================================================
+--- linux-2.4.21.orig/include/net/tcp.h        2004-04-24 02:39:20.000000000 -0400
++++ linux-2.4.21/include/net/tcp.h     2004-04-24 02:48:27.000000000 -0400
+@@ -646,6 +646,8 @@
+ extern int                    tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size);
+ extern ssize_t                        tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
++extern ssize_t                        tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
++                                                int flags, zccd_t *zccd);
+ extern int                    tcp_ioctl(struct sock *sk, 
+                                         int cmd, 
+@@ -742,6 +744,10 @@
+                                           struct msghdr *msg,
+                                           int len, int nonblock, 
+                                           int flags, int *addr_len);
++extern int                      tcp_recvpackets(struct sock *sk,
++                                                struct sk_buff_head *packets,
++                                                int len, int nonblock);
++
+ extern int                    tcp_listen_start(struct sock *sk);
+Index: linux-2.4.21/net/netsyms.c
+===================================================================
+--- linux-2.4.21.orig/net/netsyms.c    2004-04-24 02:39:13.000000000 -0400
++++ linux-2.4.21/net/netsyms.c 2004-04-24 02:47:46.000000000 -0400
+@@ -403,6 +403,8 @@
+ #endif
++EXPORT_SYMBOL(tcp_sendpage_zccd);
++EXPORT_SYMBOL(tcp_recvpackets);
+ EXPORT_SYMBOL(tcp_read_sock);
+ EXPORT_SYMBOL(netlink_set_err);
+Index: linux-2.4.21/net/core/skbuff.c
+===================================================================
+--- linux-2.4.21.orig/net/core/skbuff.c        2004-04-24 02:38:40.000000000 -0400
++++ linux-2.4.21/net/core/skbuff.c     2004-04-24 02:47:46.000000000 -0400
+@@ -208,6 +208,8 @@
+       atomic_set(&(skb_shinfo(skb)->dataref), 1);
+       skb_shinfo(skb)->nr_frags = 0;
+       skb_shinfo(skb)->frag_list = NULL;
++      skb_shinfo(skb)->zccd = NULL;           /* skbuffs kick off with NO user zero copy descriptors */
++      skb_shinfo(skb)->zccd2 = NULL;
+       return skb;
+ nodata:
+@@ -277,6 +279,10 @@
+ {
+       if (!skb->cloned ||
+           atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
++              if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */
++                      zccd_put (skb_shinfo(skb)->zccd); /* release hold */
++              if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */
++                      zccd_put (skb_shinfo(skb)->zccd2); /* release hold */
+               if (skb_shinfo(skb)->nr_frags) {
+                       int i;
+                       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+@@ -535,6 +541,8 @@
+       atomic_set(&(skb_shinfo(skb)->dataref), 1);
+       skb_shinfo(skb)->nr_frags = 0;
+       skb_shinfo(skb)->frag_list = NULL;
++      skb_shinfo(skb)->zccd = NULL;           /* copied data => no user zero copy descriptor */
++      skb_shinfo(skb)->zccd2 = NULL;
+       /* We are no longer a clone, even if we were. */
+       skb->cloned = 0;
+@@ -581,6 +589,14 @@
+       n->data_len = skb->data_len;
+       n->len = skb->len;
++      if (skb_shinfo(skb)->zccd != NULL)      /* user zero copy descriptor? */
++              zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */
++      skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd;
++
++      if (skb_shinfo(skb)->zccd2 != NULL)     /* 2nd user zero copy descriptor? */
++              zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */
++      skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
++
+       if (skb_shinfo(skb)->nr_frags) {
+               int i;
+@@ -623,6 +639,8 @@
+       u8 *data;
+       int size = nhead + (skb->end - skb->head) + ntail;
+       long off;
++      zccd_t *zccd = skb_shinfo(skb)->zccd;   /* stash user zero copy descriptor */
++      zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */
+       if (skb_shared(skb))
+               BUG();
+@@ -644,6 +662,11 @@
+       if (skb_shinfo(skb)->frag_list)
+               skb_clone_fraglist(skb);
++      if (zccd != NULL)                       /* user zero copy descriptor? */
++              zccd_get (zccd);                /* extra ref (pages are shared) */
++      if (zccd2 != NULL)                      /* 2nd user zero copy descriptor? */
++              zccd_get (zccd2);               /* extra ref (pages are shared) */
++
+       skb_release_data(skb);
+       off = (data+nhead) - skb->head;
+@@ -658,6 +681,8 @@
+       skb->nh.raw += off;
+       skb->cloned = 0;
+       atomic_set(&skb_shinfo(skb)->dataref, 1);
++      skb_shinfo(skb)->zccd = zccd;
++      skb_shinfo(skb)->zccd2 = zccd2;
+       return 0;
+ nodata:
+Index: linux-2.4.21/net/ipv4/tcp.c
+===================================================================
+--- linux-2.4.21.orig/net/ipv4/tcp.c   2004-04-24 02:39:21.000000000 -0400
++++ linux-2.4.21/net/ipv4/tcp.c        2004-04-24 02:50:40.000000000 -0400
+@@ -748,7 +748,7 @@
+       goto out;
+ }
+-ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags);
++ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags, zccd_t *zccd);
+ static inline int
+ can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
+@@ -827,7 +827,7 @@
+       return err;
+ }
+-ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags)
++ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags, zccd_t *zccd)
+ {
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       int mss_now;
+@@ -875,6 +875,17 @@
+                       copy = size;
+               i = skb_shinfo(skb)->nr_frags;
++
++              if (zccd != NULL &&             /* this is a zcc I/O */
++                  skb_shinfo(skb)->zccd != NULL && /* skb is part of a zcc I/O */
++                  skb_shinfo(skb)->zccd2 != NULL &&
++                  skb_shinfo(skb)->zccd != zccd && /* not the same one */
++                  skb_shinfo(skb)->zccd2 != zccd)
++              {
++                      tcp_mark_push (tp, skb);
++                      goto new_segment;
++              }
++
+               if (can_coalesce(skb, i, page, offset)) {
+                       skb_shinfo(skb)->frags[i-1].size += copy;
+               } else if (i < MAX_SKB_FRAGS) {
+@@ -885,6 +896,20 @@
+                       goto new_segment;
+               }
++              if (zccd != NULL &&     /* this is a zcc I/O */
++                  skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */
++                  skb_shinfo(skb)->zccd2 != zccd)
++              {
++                      zccd_get (zccd);        /* bump ref count */
++
++                      BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
++
++                      if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */
++                              skb_shinfo(skb)->zccd = zccd;
++                      else
++                              skb_shinfo(skb)->zccd2 = zccd;
++              }
++
+               skb->len += copy;
+               skb->data_len += copy;
+               skb->ip_summed = CHECKSUM_HW;
+@@ -948,7 +973,29 @@
+       lock_sock(sk);
+       TCP_CHECK_TIMER(sk);
+-      res = do_tcp_sendpages(sk, &page, offset, size, flags);
++      res = do_tcp_sendpages(sk, &page, offset, size, flags, NULL);
++      TCP_CHECK_TIMER(sk);
++      release_sock(sk);
++      return res;
++}
++
++ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
++                          int flags, zccd_t *zccd)
++{
++        ssize_t res;
++        struct sock *sk = sock->sk;
++
++#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
++
++        if (!(sk->route_caps & NETIF_F_SG) ||   /* caller shouldn't waste her time */
++            !(sk->route_caps & TCP_ZC_CSUM_FLAGS)) /* on double mapping */
++                BUG ();
++
++        lock_sock(sk);
++        TCP_CHECK_TIMER(sk);
++
++        res = do_tcp_sendpages(sk, &page, 0, size, flags, zccd);
++
+       TCP_CHECK_TIMER(sk);
+       release_sock(sk);
+       return res;
+@@ -1772,6 +1819,202 @@
+       goto out;
+ }
++int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets,
++                   int len, int nonblock)
++{
++      struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
++      int copied;
++      long timeo;
++
++      BUG_TRAP (len > 0);
++      /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/
++
++      lock_sock(sk);
++
++      TCP_CHECK_TIMER(sk);
++
++      copied = -ENOTCONN;
++      if (sk->state == TCP_LISTEN)
++              goto out;
++
++      copied = 0;
++      timeo = sock_rcvtimeo(sk, nonblock);
++
++      do {
++              struct sk_buff * skb;
++              u32 offset;
++              unsigned long used;
++              int exhausted;
++              int eaten;
++
++              /* Are we at urgent data? Stop if we have read anything. */
++              if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq)
++                      break;
++
++              /* We need to check signals first, to get correct SIGURG
++               * handling. FIXME: Need to check this doesnt impact 1003.1g
++               * and move it down to the bottom of the loop
++               */
++              if (signal_pending(current)) {
++                      if (copied)
++                              break;
++                      copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
++                      break;
++              }
++
++              /* Next get a buffer. */
++
++              skb = skb_peek(&sk->receive_queue);
++
++              if (skb == NULL)                /* nothing ready */
++              {
++                      if (copied) {
++                              if (sk->err ||
++                                  sk->state == TCP_CLOSE ||
++                                  (sk->shutdown & RCV_SHUTDOWN) ||
++                                  !timeo ||
++                                  (0))
++                                      break;
++                      } else {
++                              if (sk->done)
++                                      break;
++
++                              if (sk->err) {
++                                      copied = sock_error(sk);
++                                      break;
++                              }
++
++                              if (sk->shutdown & RCV_SHUTDOWN)
++                                      break;
++
++                              if (sk->state == TCP_CLOSE) {
++                                      if (!sk->done) {
++                                              /* This occurs when user tries to read
++                                               * from never connected socket.
++                                               */
++                                              copied = -ENOTCONN;
++                                              break;
++                                      }
++                                      break;
++                              }
++
++                              if (!timeo) {
++                                      copied = -EAGAIN;
++                                      break;
++                              }
++                      }
++
++                      cleanup_rbuf(sk, copied);
++                      timeo = tcp_data_wait(sk, timeo);
++                      continue;
++              }
++
++              BUG_TRAP (atomic_read (&skb->users) == 1);
++
++              exhausted = eaten = 0;
++
++              offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
++              if (skb->h.th->syn)
++                      offset--;
++
++              used = skb->len - offset;
++
++              if (tp->urg_data) {
++                      u32 urg_offset = tp->urg_seq - tp->copied_seq;
++                      if (urg_offset < used) {
++                              if (!urg_offset) { /* at urgent date */
++                                      if (!sk->urginline) {
++                                              tp->copied_seq++; /* discard the single byte of urgent data */
++                                              offset++;
++                                              used--;
++                                      }
++                              } else          /* truncate read */
++                                      used = urg_offset;
++                      }
++              }
++
++              BUG_TRAP (used >= 0);
++              if (len < used)
++                      used = len;
++
++              if (used == 0)
++                      exhausted = 1;
++              else
++              {
++                      if (skb_is_nonlinear (skb))
++                      {
++                              int   rc = skb_linearize (skb, GFP_KERNEL);
++
++                              printk ("tcp_recvpackets(): linearising: %d\n", rc);
++
++                              if (rc)
++                              {
++                                      if (!copied)
++                                              copied = rc;
++                                      break;
++                              }
++                      }
++
++                      if ((offset + used) == skb->len) /* consuming the whole packet */
++                      {
++                              __skb_unlink (skb, &sk->receive_queue);
++                              dst_release (skb->dst);
++                              skb_orphan (skb);
++                              __skb_pull (skb, offset);
++                              __skb_queue_tail (packets, skb);
++                              exhausted = eaten = 1;
++                      }
++                      else                    /* consuming only part of the packet */
++                      {
++                              struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL);
++
++                              if (skb2 == NULL)
++                              {
++                                      if (!copied)
++                                              copied = -ENOMEM;
++                                      break;
++                              }
++
++                              dst_release (skb2->dst);
++                              __skb_pull (skb2, offset);
++                              __skb_trim (skb2, used);
++                              __skb_queue_tail (packets, skb2);
++                      }
++
++                      tp->copied_seq += used;
++                      copied += used;
++                      len -= used;
++              }
++
++              if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) {
++                      tp->urg_data = 0;
++                      tcp_fast_path_check(sk, tp);
++              }
++
++              if (!exhausted)
++                      continue;
++
++              if (skb->h.th->fin)
++              {
++                      tp->copied_seq++;
++                      if (!eaten)
++                              tcp_eat_skb (sk, skb);
++                      break;
++              }
++
++              if (!eaten)
++                      tcp_eat_skb (sk, skb);
++
++      } while (len > 0);
++
++ out:
++      /* Clean up data we have read: This will do ACK frames. */
++      cleanup_rbuf(sk, copied);
++      TCP_CHECK_TIMER(sk);
++      release_sock(sk);
++      return copied;
++}
++
+ /*
+  *    State processing on a close. This implements the state shift for
+  *    sending our FIN frame. Note that we only send a FIN for some
index dd07ef3..91dc15b 100644 (file)
  kernel/ksyms.c         |    1 
  11 files changed, 564 insertions(+), 126 deletions(-)
 
+Index: linux-2.4.18-p4smp/fs/dcache.c
+===================================================================
+--- linux-2.4.18-p4smp.orig/fs/dcache.c        2004-02-03 01:00:10.000000000 -0500
++++ linux-2.4.18-p4smp/fs/dcache.c     2004-03-19 16:05:42.000000000 -0500
+@@ -186,6 +186,13 @@
+               spin_unlock(&dcache_lock);
+               return 0;
+       }
++
++      /* network invalidation by Lustre */
++      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++              spin_unlock(&dcache_lock);
++              return 0;
++      }
++
+       /*
+        * Check whether to do a partial shrink_dcache
+        * to get rid of unused child entries.
+@@ -859,13 +866,19 @@ void d_delete(struct dentry * dentry)
+  * Adds a dentry to the hash according to its name.
+  */
+  
+-void d_rehash(struct dentry * entry)
++void __d_rehash(struct dentry * entry, int lock)
+ {
+       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
+       if (!list_empty(&entry->d_hash)) BUG();
+-      spin_lock(&dcache_lock);
++      if (lock) spin_lock(&dcache_lock);
+       list_add(&entry->d_hash, list);
+-      spin_unlock(&dcache_lock);
++      if (lock) spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(__d_rehash);
++
++void d_rehash(struct dentry * entry)
++{
++      __d_rehash(entry, 1);
+ }
+ #define do_switch(x,y) do { \
 Index: linux-2.4.18-p4smp/fs/exec.c
 ===================================================================
 --- linux-2.4.18-p4smp.orig/fs/exec.c  2004-02-03 01:00:10.000000000 -0500
@@ -20,7 +61,7 @@ Index: linux-2.4.18-p4smp/fs/exec.c
        struct nameidata nd;
        int error;
 +      struct lookup_intent it = { .it_op = IT_OPEN,
-+                                           .it_flags = FMODE_READ|FMODE_EXEC };
++                                  .it_flags = FMODE_READ|FMODE_EXEC };
  
 -      error = user_path_walk(library, &nd);
 +      error = user_path_walk_it(library, &nd, &it);
@@ -37,14 +78,14 @@ Index: linux-2.4.18-p4smp/fs/exec.c
        error = PTR_ERR(file);
        if (IS_ERR(file))
                goto out;
-@@ -359,8 +362,9 @@ struct file *open_exec(const char *name)
+@@ -359,8 +362,10 @@ struct file *open_exec(const char *name)
        struct inode *inode;
        struct file *file;
        int err = 0;
--
--      err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
 +      struct lookup_intent it = { .it_op = IT_OPEN,
-+                                           .it_flags = FMODE_READ|FMODE_EXEC };
++                                  .it_flags = FMODE_READ|FMODE_EXEC };
+-      err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
 +      err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
        file = ERR_PTR(err);
        if (!err) {
@@ -76,164 +117,6 @@ Index: linux-2.4.18-p4smp/fs/exec.c
                goto close_fail;
  
        retval = binfmt->core_dump(signr, regs, file);
-Index: linux-2.4.18-p4smp/fs/dcache.c
-===================================================================
---- linux-2.4.18-p4smp.orig/fs/dcache.c        2004-02-03 01:00:10.000000000 -0500
-+++ linux-2.4.18-p4smp/fs/dcache.c     2004-03-19 16:05:42.000000000 -0500
-@@ -186,6 +186,13 @@
-               spin_unlock(&dcache_lock);
-               return 0;
-       }
-+
-+      /* network invalidation by Lustre */
-+      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
-+              spin_unlock(&dcache_lock);
-+              return 0;
-+      }
-+
-       /*
-        * Check whether to do a partial shrink_dcache
-        * to get rid of unused child entries.
-@@ -859,13 +866,19 @@ void d_delete(struct dentry * dentry)
-  * Adds a dentry to the hash according to its name.
-  */
-  
--void d_rehash(struct dentry * entry)
-+void __d_rehash(struct dentry * entry, int lock)
- {
-       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-       if (!list_empty(&entry->d_hash)) BUG();
--      spin_lock(&dcache_lock);
-+      if (lock) spin_lock(&dcache_lock);
-       list_add(&entry->d_hash, list);
--      spin_unlock(&dcache_lock);
-+      if (lock) spin_unlock(&dcache_lock);
-+}
-+EXPORT_SYMBOL(__d_rehash);
-+
-+void d_rehash(struct dentry * entry)
-+{
-+      __d_rehash(entry, 1);
- }
- #define do_switch(x,y) do { \
-Index: linux-2.4.18-p4smp/fs/namespace.c
-===================================================================
---- linux-2.4.18-p4smp.orig/fs/namespace.c     2004-02-03 01:00:10.000000000 -0500
-+++ linux-2.4.18-p4smp/fs/namespace.c  2004-03-19 16:05:42.000000000 -0500
-@@ -99,6 +99,7 @@
- {
-       old_nd->dentry = mnt->mnt_mountpoint;
-       old_nd->mnt = mnt->mnt_parent;
-+      UNPIN(old_nd->dentry, old_nd->mnt, 1);
-       mnt->mnt_parent = mnt;
-       mnt->mnt_mountpoint = mnt->mnt_root;
-       list_del_init(&mnt->mnt_child);
-@@ -110,6 +111,7 @@ static void attach_mnt(struct vfsmount *
- {
-       mnt->mnt_parent = mntget(nd->mnt);
-       mnt->mnt_mountpoint = dget(nd->dentry);
-+      PIN(nd->dentry, nd->mnt, 1);
-       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
-       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
-       nd->dentry->d_mounted++;
-@@ -485,14 +487,17 @@ static int do_loopback(struct nameidata 
- {
-       struct nameidata old_nd;
-       struct vfsmount *mnt = NULL;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int err = mount_is_safe(nd);
-       if (err)
-               return err;
-       if (!old_name || !*old_name)
-               return -EINVAL;
--      err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
--      if (err)
-+      err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
-+      if (err) {
-+              intent_release(&it);
-               return err;
-+      }
-       down_write(&current->namespace->sem);
-       err = -EINVAL;
-@@ -515,6 +520,7 @@ static int do_loopback(struct nameidata 
-       }
-       up_write(&current->namespace->sem);
-+      intent_release(&it);
-       path_release(&old_nd);
-       return err;
- }
-@@ -698,6 +704,7 @@ long do_mount(char * dev_name, char * di
-                 unsigned long flags, void *data_page)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int retval = 0;
-       int mnt_flags = 0;
-@@ -722,10 +729,11 @@ long do_mount(char * dev_name, char * di
-       flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
-       /* ... and get the mountpoint */
--      retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
--      if (retval)
-+      retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
-+      if (retval) {
-+              intent_release(&it);
-               return retval;
--
-+      }
-       if (flags & MS_REMOUNT)
-               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-                                   data_page);
-@@ -736,6 +744,8 @@ long do_mount(char * dev_name, char * di
-       else
-               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
-                                     dev_name, data_page);
-+
-+      intent_release(&it);
-       path_release(&nd);
-       return retval;
- }
-@@ -901,6 +911,8 @@ asmlinkage long sys_pivot_root(const cha
- {
-       struct vfsmount *tmp;
-       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
-+      struct lookup_intent new_it = { .it_op = IT_GETATTR };
-+      struct lookup_intent old_it = { .it_op = IT_GETATTR };
-       int error;
-       if (!capable(CAP_SYS_ADMIN))
-@@ -908,14 +920,14 @@ asmlinkage long sys_pivot_root(const cha
-       lock_kernel();
--      error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
-+      error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
-       if (error)
-               goto out0;
-       error = -EINVAL;
-       if (!check_mnt(new_nd.mnt))
-               goto out1;
--      error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
-+      error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
-       if (error)
-               goto out1;
-@@ -970,8 +982,10 @@ out2:
-       up(&old_nd.dentry->d_inode->i_zombie);
-       up_write(&current->namespace->sem);
-       path_release(&user_nd);
-+      intent_release(&old_it);
-       path_release(&old_nd);
- out1:
-+      intent_release(&new_it);
-       path_release(&new_nd);
- out0:
-       unlock_kernel();
 Index: linux-2.4.18-p4smp/fs/namei.c
 ===================================================================
 --- linux-2.4.18-p4smp.orig/fs/namei.c 2004-02-03 01:00:10.000000000 -0500
@@ -399,13 +282,11 @@ Index: linux-2.4.18-p4smp/fs/namei.c
                        ;
  
                err = -ENOENT;
-@@ -548,8 +585,8 @@ int link_path_walk(const char * name, st
-               if (!inode->i_op)
+@@ -549,7 +586,7 @@ int link_path_walk(const char * name, st
                        goto out_dput;
  
--              if (inode->i_op->follow_link) {
+               if (inode->i_op->follow_link) {
 -                      err = do_follow_link(dentry, nd);
-+              if (inode->i_op->follow_link || inode->i_op->follow_link2) {
 +                      err = do_follow_link(dentry, nd, NULL);
                        dput(dentry);
                        if (err)
@@ -419,7 +300,7 @@ Index: linux-2.4.18-p4smp/fs/namei.c
                        break;
                continue;
                /* here ends the main loop */
-@@ -592,22 +629,23 @@ last_component:
+@@ -592,22 +629,22 @@ last_component:
                        if (err < 0)
                                break;
                }
@@ -440,10 +321,8 @@ Index: linux-2.4.18-p4smp/fs/namei.c
                        ;
                inode = dentry->d_inode;
                if ((lookup_flags & LOOKUP_FOLLOW)
--                  && inode && inode->i_op && inode->i_op->follow_link) {
+                   && inode && inode->i_op && inode->i_op->follow_link) {
 -                      err = do_follow_link(dentry, nd);
-+                  && inode && inode->i_op &&
-+                  (inode->i_op->follow_link || inode->i_op->follow_link2)) {
 +                      err = do_follow_link(dentry, nd, it);
                        dput(dentry);
                        if (err)
@@ -471,7 +350,7 @@ Index: linux-2.4.18-p4smp/fs/namei.c
 +                              if (err)
 +                                      break;
 +                              new = real_lookup(dentry->d_parent,
-+                                                &dentry->d_name, 0, NULL);
++                                                &dentry->d_name, 0, it);
 +                              d_invalidate(dentry);
 +                              dput(dentry);
 +                              if (IS_ERR(new)) {
@@ -1046,6 +925,122 @@ Index: linux-2.4.18-p4smp/fs/namei.c
        if (page) {
                kunmap(page);
                page_cache_release(page);
+Index: linux-2.4.18-p4smp/fs/namespace.c
+===================================================================
+--- linux-2.4.18-p4smp.orig/fs/namespace.c     2004-02-03 01:00:10.000000000 -0500
++++ linux-2.4.18-p4smp/fs/namespace.c  2004-03-19 16:05:42.000000000 -0500
+@@ -99,6 +99,7 @@
+ {
+       old_nd->dentry = mnt->mnt_mountpoint;
+       old_nd->mnt = mnt->mnt_parent;
++      UNPIN(old_nd->dentry, old_nd->mnt, 1);
+       mnt->mnt_parent = mnt;
+       mnt->mnt_mountpoint = mnt->mnt_root;
+       list_del_init(&mnt->mnt_child);
+@@ -110,6 +111,7 @@ static void attach_mnt(struct vfsmount *
+ {
+       mnt->mnt_parent = mntget(nd->mnt);
+       mnt->mnt_mountpoint = dget(nd->dentry);
++      PIN(nd->dentry, nd->mnt, 1);
+       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+       nd->dentry->d_mounted++;
+@@ -485,14 +487,17 @@ static int do_loopback(struct nameidata 
+ {
+       struct nameidata old_nd;
+       struct vfsmount *mnt = NULL;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int err = mount_is_safe(nd);
+       if (err)
+               return err;
+       if (!old_name || !*old_name)
+               return -EINVAL;
+-      err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
+-      if (err)
++      err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
++      if (err) {
++              intent_release(&it);
+               return err;
++      }
+       down_write(&current->namespace->sem);
+       err = -EINVAL;
+@@ -515,6 +520,7 @@ static int do_loopback(struct nameidata 
+       }
+       up_write(&current->namespace->sem);
++      intent_release(&it);
+       path_release(&old_nd);
+       return err;
+ }
+@@ -698,6 +704,7 @@ long do_mount(char * dev_name, char * di
+                 unsigned long flags, void *data_page)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int retval = 0;
+       int mnt_flags = 0;
+@@ -722,9 +729,11 @@ long do_mount(char * dev_name, char * di
+       flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
+       /* ... and get the mountpoint */
+-      retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
+-      if (retval)
++      retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
++      if (retval) {
++              intent_release(&it);
+               return retval;
++      }
+       if (flags & MS_REMOUNT)
+               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+@@ -736,6 +744,8 @@ long do_mount(char * dev_name, char * di
+       else
+               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+                                     dev_name, data_page);
++
++      intent_release(&it);
+       path_release(&nd);
+       return retval;
+ }
+@@ -901,6 +911,8 @@ asmlinkage long sys_pivot_root(const cha
+ {
+       struct vfsmount *tmp;
+       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++      struct lookup_intent new_it = { .it_op = IT_GETATTR };
++      struct lookup_intent old_it = { .it_op = IT_GETATTR };
+       int error;
+       if (!capable(CAP_SYS_ADMIN))
+@@ -908,14 +920,14 @@ asmlinkage long sys_pivot_root(const cha
+       lock_kernel();
+-      error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
++      error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
+       if (error)
+               goto out0;
+       error = -EINVAL;
+       if (!check_mnt(new_nd.mnt))
+               goto out1;
+-      error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
++      error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
+       if (error)
+               goto out1;
+@@ -970,8 +982,10 @@ out2:
+       up(&old_nd.dentry->d_inode->i_zombie);
+       up_write(&current->namespace->sem);
+       path_release(&user_nd);
++      intent_release(&old_it);
+       path_release(&old_nd);
+ out1:
++      intent_release(&new_it);
+       path_release(&new_nd);
+ out0:
+       unlock_kernel();
 Index: linux-2.4.18-p4smp/fs/open.c
 ===================================================================
 --- linux-2.4.18-p4smp.orig/fs/open.c  2004-02-03 01:00:10.000000000 -0500
@@ -1229,16 +1224,14 @@ Index: linux-2.4.18-p4smp/fs/open.c
                path_release(&nd);
        }
  
-@@ -385,8 +430,11 @@ asmlinkage long sys_chdir(const char * f
+@@ -385,8 +430,9 @@ asmlinkage long sys_chdir(const char * f
  {
        int error;
        struct nameidata nd;
 +      struct lookup_intent it = { .it_op = IT_GETATTR };
  
 -      error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd);
-+      error = __user_walk_it(filename,
-+                             LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,
-+                             &nd, &it);
++      error = __user_walk_it(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd, &it);
        if (error)
                goto out;
  
@@ -1588,9 +1581,9 @@ Index: linux-2.4.18-p4smp/include/linux/dcache.h
 +      void (*d_unpin)(struct dentry *, struct vfsmount *, int);
  };
  
-+#define PIN(de,mnt,flag)  if (de->d_op && de->d_op->d_pin) \
++#define PIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_pin) \
 +                              de->d_op->d_pin(de, mnt, flag);
-+#define UNPIN(de,mnt,flag)  if (de->d_op && de->d_op->d_unpin) \
++#define UNPIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_unpin) \
 +                              de->d_op->d_unpin(de, mnt, flag);
 +
 +
@@ -1628,7 +1621,7 @@ Index: linux-2.4.18-p4smp/include/linux/fs.h
  #define ATTR_ATTR_FLAG        1024
 +#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
 +#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET        0x2000  /* called from open path, ie O_TRUNC */
++#define ATTR_CTIME_SET        0x2000
  
  /*
   * This is the Inode Attributes structure, used for notify_change().  It
@@ -1666,7 +1659,7 @@ Index: linux-2.4.18-p4smp/include/linux/fs.h
  
  /*
   * File types
-@@ -900,21 +908,34 @@ struct file_operations {
+@@ -900,21 +908,32 @@ struct file_operations {
  
  struct inode_operations {
        int (*create) (struct inode *,struct dentry *,int);
@@ -1690,8 +1683,6 @@ Index: linux-2.4.18-p4smp/include/linux/fs.h
 +      int (*rename_raw) (struct nameidata *, struct nameidata *);
        int (*readlink) (struct dentry *, char *,int);
        int (*follow_link) (struct dentry *, struct nameidata *);
-+      int (*follow_link2) (struct dentry *, struct nameidata *,
-+                           struct lookup_intent *it);
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int);
        int (*revalidate) (struct dentry *);
@@ -1746,24 +1737,6 @@ Index: linux-2.4.18-p4smp/include/linux/fs.h
  extern int page_readlink(struct dentry *, char *, int);
  extern int page_follow_link(struct dentry *, struct nameidata *);
  extern struct inode_operations page_symlink_inode_operations;
-Index: linux-2.4.18-p4smp/kernel/fork.c
-===================================================================
---- linux-2.4.18-p4smp.orig/kernel/fork.c      2004-02-03 01:00:10.000000000 -0500
-+++ linux-2.4.18-p4smp/kernel/fork.c   2004-03-19 16:05:42.000000000 -0500
-@@ -399,10 +399,13 @@
-               fs->umask = old->umask;
-               read_lock(&old->lock);
-               fs->rootmnt = mntget(old->rootmnt);
-+              PIN(old->pwd, old->pwdmnt, 0);
-+              PIN(old->root, old->rootmnt, 1);
-               fs->root = dget(old->root);
-               fs->pwdmnt = mntget(old->pwdmnt);
-               fs->pwd = dget(old->pwd);
-               if (old->altroot) {
-+                      PIN(old->altroot, old->altrootmnt, 1);
-                       fs->altrootmnt = mntget(old->altrootmnt);
-                       fs->altroot = dget(old->altroot);
-               } else {
 Index: linux-2.4.18-p4smp/kernel/exit.c
 ===================================================================
 --- linux-2.4.18-p4smp.orig/kernel/exit.c      2004-02-03 01:00:10.000000000 -0500
@@ -1783,6 +1756,24 @@ Index: linux-2.4.18-p4smp/kernel/exit.c
                        dput(fs->altroot);
                        mntput(fs->altrootmnt);
                }
+Index: linux-2.4.18-p4smp/kernel/fork.c
+===================================================================
+--- linux-2.4.18-p4smp.orig/kernel/fork.c      2004-02-03 01:00:10.000000000 -0500
++++ linux-2.4.18-p4smp/kernel/fork.c   2004-03-19 16:05:42.000000000 -0500
+@@ -399,10 +399,13 @@
+               fs->umask = old->umask;
+               read_lock(&old->lock);
+               fs->rootmnt = mntget(old->rootmnt);
++              PIN(old->pwd, old->pwdmnt, 0);
++              PIN(old->root, old->rootmnt, 1);
+               fs->root = dget(old->root);
+               fs->pwdmnt = mntget(old->pwdmnt);
+               fs->pwd = dget(old->pwd);
+               if (old->altroot) {
++                      PIN(old->altroot, old->altrootmnt, 1);
+                       fs->altrootmnt = mntget(old->altrootmnt);
+                       fs->altroot = dget(old->altroot);
+               } else {
 Index: linux-2.4.18-p4smp/kernel/ksyms.c
 ===================================================================
 --- linux-2.4.18-p4smp.orig/kernel/ksyms.c     2004-03-19 16:05:40.000000000 -0500
index 7c4ea56..4ccfa4d 100644 (file)
  kernel/ksyms.c            |    1 
  12 files changed, 558 insertions(+), 128 deletions(-)
 
+Index: linux-2.4.19-pre1/fs/dcache.c
+===================================================================
+--- linux-2.4.19-pre1.orig/fs/dcache.c 2003-11-21 02:41:00.000000000 +0300
++++ linux-2.4.19-pre1/fs/dcache.c      2003-11-21 02:51:38.000000000 +0300
+@@ -181,6 +181,13 @@
+               spin_unlock(&dcache_lock);
+               return 0;
+       }
++
++      /* network invalidation by Lustre */
++      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++              spin_unlock(&dcache_lock);
++              return 0;
++      }
++
+       /*
+        * Check whether to do a partial shrink_dcache
+        * to get rid of unused child entries.
+@@ -831,13 +838,19 @@
+  * Adds a dentry to the hash according to its name.
+  */
+  
+-void d_rehash(struct dentry * entry)
++void __d_rehash(struct dentry * entry, int lock)
+ {
+       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
+       if (!list_empty(&entry->d_hash)) BUG();
+-      spin_lock(&dcache_lock);
++      if (lock) spin_lock(&dcache_lock);
+       list_add(&entry->d_hash, list);
+-      spin_unlock(&dcache_lock);
++      if (lock) spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(__d_rehash);
++
++void d_rehash(struct dentry * entry)
++{
++      __d_rehash(entry, 1);
+ }
+ #define do_switch(x,y) do { \
 Index: linux-2.4.19-pre1/fs/exec.c
 ===================================================================
 --- linux-2.4.19-pre1.orig/fs/exec.c   2003-11-21 02:41:00.000000000 +0300
@@ -78,165 +119,6 @@ Index: linux-2.4.19-pre1/fs/exec.c
                goto close_fail;
  
        retval = binfmt->core_dump(signr, regs, file);
-Index: linux-2.4.19-pre1/fs/dcache.c
-===================================================================
---- linux-2.4.19-pre1.orig/fs/dcache.c 2003-11-21 02:41:00.000000000 +0300
-+++ linux-2.4.19-pre1/fs/dcache.c      2003-11-21 02:51:38.000000000 +0300
-@@ -181,6 +181,13 @@
-               spin_unlock(&dcache_lock);
-               return 0;
-       }
-+
-+      /* network invalidation by Lustre */
-+      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
-+              spin_unlock(&dcache_lock);
-+              return 0;
-+      }
-+
-       /*
-        * Check whether to do a partial shrink_dcache
-        * to get rid of unused child entries.
-@@ -831,13 +838,19 @@
-  * Adds a dentry to the hash according to its name.
-  */
-  
--void d_rehash(struct dentry * entry)
-+void __d_rehash(struct dentry * entry, int lock)
- {
-       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-       if (!list_empty(&entry->d_hash)) BUG();
--      spin_lock(&dcache_lock);
-+      if (lock) spin_lock(&dcache_lock);
-       list_add(&entry->d_hash, list);
--      spin_unlock(&dcache_lock);
-+      if (lock) spin_unlock(&dcache_lock);
-+}
-+EXPORT_SYMBOL(__d_rehash);
-+
-+void d_rehash(struct dentry * entry)
-+{
-+      __d_rehash(entry, 1);
- }
- #define do_switch(x,y) do { \
-Index: linux-2.4.19-pre1/fs/namespace.c
-===================================================================
---- linux-2.4.19-pre1.orig/fs/namespace.c      2003-11-21 02:41:00.000000000 +0300
-+++ linux-2.4.19-pre1/fs/namespace.c   2003-11-21 02:51:38.000000000 +0300
-@@ -107,6 +107,7 @@
- {
-       old_nd->dentry = mnt->mnt_mountpoint;
-       old_nd->mnt = mnt->mnt_parent;
-+      UNPIN(old_nd->dentry, old_nd->mnt, 1);
-       mnt->mnt_parent = mnt;
-       mnt->mnt_mountpoint = mnt->mnt_root;
-       list_del_init(&mnt->mnt_child);
-@@ -118,6 +119,7 @@
- {
-       mnt->mnt_parent = mntget(nd->mnt);
-       mnt->mnt_mountpoint = dget(nd->dentry);
-+      PIN(nd->dentry, nd->mnt, 1);
-       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
-       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
-       nd->dentry->d_mounted++;
-@@ -500,15 +502,18 @@
- {
-       struct nameidata old_nd;
-       struct vfsmount *mnt = NULL;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int err = mount_is_safe(nd);
-       if (err)
-               return err;
-       if (!old_name || !*old_name)
-               return -EINVAL;
-       if (path_init(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd))
--              err = path_walk(old_name, &old_nd);
--      if (err)
-+              err = path_walk_it(old_name, &old_nd, &it);
-+      if (err) {
-+              intent_release(&it);
-               return err;
-+      }
-       down(&mount_sem);
-       err = -EINVAL;
-@@ -531,6 +536,7 @@
-       }
-       up(&mount_sem);
-+      intent_release(&it);
-       path_release(&old_nd);
-       return err;
- }
-@@ -706,6 +712,7 @@
-                 unsigned long flags, void *data_page)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int retval = 0;
-       int mnt_flags = 0;
-@@ -731,9 +738,11 @@
-       /* ... and get the mountpoint */
-       if (path_init(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
--              retval = path_walk(dir_name, &nd);
--      if (retval)
-+              retval = path_walk_it(dir_name, &nd, &it);
-+      if (retval) {
-+              intent_release(&it);
-               return retval;
-+      }
-       if (flags & MS_REMOUNT)
-               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-@@ -745,6 +754,8 @@
-       else
-               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
-                                     dev_name, data_page);
-+
-+      intent_release(&it);
-       path_release(&nd);
-       return retval;
- }
-@@ -830,6 +841,8 @@
- {
-       struct vfsmount *tmp;
-       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
-+      struct lookup_intent new_it = { .it_op = IT_GETATTR };
-+      struct lookup_intent old_it = { .it_op = IT_GETATTR };
-       char *name;
-       int error;
-@@ -844,7 +857,7 @@
-               goto out0;
-       error = 0;
-       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd))
--              error = path_walk(name, &new_nd);
-+              error = path_walk_it(name, &new_nd, &new_it);
-       putname(name);
-       if (error)
-               goto out0;
-@@ -858,7 +871,7 @@
-               goto out1;
-       error = 0;
-       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd))
--              error = path_walk(name, &old_nd);
-+              error = path_walk_it(name, &old_nd, &old_it);
-       putname(name);
-       if (error)
-               goto out1;
-@@ -914,8 +927,10 @@
-       up(&old_nd.dentry->d_inode->i_zombie);
-       up(&mount_sem);
-       path_release(&user_nd);
-+      intent_release(&old_it);
-       path_release(&old_nd);
- out1:
-+      intent_release(&new_it);
-       path_release(&new_nd);
- out0:
-       unlock_kernel();
 Index: linux-2.4.19-pre1/fs/namei.c
 ===================================================================
 --- linux-2.4.19-pre1.orig/fs/namei.c  2003-11-21 02:41:00.000000000 +0300
@@ -370,7 +252,7 @@ Index: linux-2.4.19-pre1/fs/namei.c
                if (!dentry) {
 -                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
 +                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE,
-+                                              NULL);
++                                           NULL);
                        err = PTR_ERR(dentry);
                        if (IS_ERR(dentry))
                                break;
@@ -431,7 +313,7 @@ Index: linux-2.4.19-pre1/fs/namei.c
                                break;
                }
                goto return_base;
-@@ -625,21 +663,66 @@
+@@ -625,21 +663,68 @@
                        nd->last_type = LAST_DOT;
                else if (this.len == 2 && this.name[1] == '.')
                        nd->last_type = LAST_DOTDOT;
@@ -450,7 +332,7 @@ Index: linux-2.4.19-pre1/fs/namei.c
 +                              if (err)
 +                                      break;
 +                              new = real_lookup(dentry->d_parent,
-+                                                &dentry->d_name, 0, NULL);
++                                                &dentry->d_name, 0, it);
 +                              d_invalidate(dentry);
 +                              dput(dentry);
 +                              if (IS_ERR(new)) {
@@ -459,6 +341,8 @@ Index: linux-2.4.19-pre1/fs/namei.c
 +                              }
 +                              nd->dentry = new;
 +                      }
++                      if (!nd->dentry->d_inode)
++                              goto no_inode;
 +              } else
 +              if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 +                      err = -ESTALE;
@@ -744,12 +628,16 @@ Index: linux-2.4.19-pre1/fs/namei.c
        if (IS_ERR(dentry))
                goto fail;
        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1252,7 +1392,16 @@
+@@ -1252,7 +1392,20 @@
                error = path_walk(tmp, &nd);
        if (error)
                goto out;
 -      dentry = lookup_create(&nd, 0);
 +
++      if (nd.last_type != LAST_NORM) {
++              error = -EEXIST;
++              goto out2;
++      }
 +      if (nd.dentry->d_inode->i_op->mknod_raw) {
 +              struct inode_operations *op = nd.dentry->d_inode->i_op;
 +              error = op->mknod_raw(&nd, mode, dev);
@@ -770,11 +658,15 @@ Index: linux-2.4.19-pre1/fs/namei.c
        path_release(&nd);
  out:
        putname(tmp);
-@@ -1321,7 +1471,14 @@
+@@ -1321,7 +1471,18 @@
                        error = path_walk(tmp, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 1);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out2;
++              }
 +              if (nd.dentry->d_inode->i_op->mkdir_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
 +                      error = op->mkdir_raw(&nd, mode);
@@ -829,11 +721,15 @@ Index: linux-2.4.19-pre1/fs/namei.c
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                /* Why not before? Because we want correct error value */
-@@ -1557,15 +1730,23 @@
+@@ -1557,15 +1730,27 @@
                        error = path_walk(to, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 0);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out2;
++              }
 +              if (nd.dentry->d_inode->i_op->symlink_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
 +                      error = op->symlink_raw(&nd, from);
@@ -855,11 +751,15 @@ Index: linux-2.4.19-pre1/fs/namei.c
                putname(to);
        }
        putname(from);
-@@ -1648,7 +1829,14 @@
+@@ -1648,7 +1829,18 @@
                error = -EXDEV;
                if (old_nd.mnt != nd.mnt)
                        goto out_release;
 -              new_dentry = lookup_create(&nd, 0);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out_release;
++              }
 +              if (nd.dentry->d_inode->i_op->link_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
 +                      error = op->link_raw(&old_nd, &nd);
@@ -981,6 +881,124 @@ Index: linux-2.4.19-pre1/fs/namei.c
        if (page) {
                kunmap(page);
                page_cache_release(page);
+Index: linux-2.4.19-pre1/fs/namespace.c
+===================================================================
+--- linux-2.4.19-pre1.orig/fs/namespace.c      2003-11-21 02:41:00.000000000 +0300
++++ linux-2.4.19-pre1/fs/namespace.c   2003-11-21 02:51:38.000000000 +0300
+@@ -107,6 +107,7 @@
+ {
+       old_nd->dentry = mnt->mnt_mountpoint;
+       old_nd->mnt = mnt->mnt_parent;
++      UNPIN(old_nd->dentry, old_nd->mnt, 1);
+       mnt->mnt_parent = mnt;
+       mnt->mnt_mountpoint = mnt->mnt_root;
+       list_del_init(&mnt->mnt_child);
+@@ -118,6 +119,7 @@
+ {
+       mnt->mnt_parent = mntget(nd->mnt);
+       mnt->mnt_mountpoint = dget(nd->dentry);
++      PIN(nd->dentry, nd->mnt, 1);
+       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+       nd->dentry->d_mounted++;
+@@ -500,15 +502,18 @@
+ {
+       struct nameidata old_nd;
+       struct vfsmount *mnt = NULL;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int err = mount_is_safe(nd);
+       if (err)
+               return err;
+       if (!old_name || !*old_name)
+               return -EINVAL;
+       if (path_init(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd))
+-              err = path_walk(old_name, &old_nd);
+-      if (err)
++              err = path_walk_it(old_name, &old_nd, &it);
++      if (err) {
++              intent_release(&it);
+               return err;
++      }
+       down(&mount_sem);
+       err = -EINVAL;
+@@ -531,6 +536,7 @@
+       }
+       up(&mount_sem);
++      intent_release(&it);
+       path_release(&old_nd);
+       return err;
+ }
+@@ -706,6 +712,7 @@
+                 unsigned long flags, void *data_page)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int retval = 0;
+       int mnt_flags = 0;
+@@ -731,9 +738,11 @@
+       /* ... and get the mountpoint */
+       if (path_init(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
+-              retval = path_walk(dir_name, &nd);
+-      if (retval)
++              retval = path_walk_it(dir_name, &nd, &it);
++      if (retval) {
++              intent_release(&it);
+               return retval;
++      }
+       if (flags & MS_REMOUNT)
+               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+@@ -745,6 +754,8 @@
+       else
+               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+                                     dev_name, data_page);
++
++      intent_release(&it);
+       path_release(&nd);
+       return retval;
+ }
+@@ -830,6 +841,8 @@
+ {
+       struct vfsmount *tmp;
+       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++      struct lookup_intent new_it = { .it_op = IT_GETATTR };
++      struct lookup_intent old_it = { .it_op = IT_GETATTR };
+       char *name;
+       int error;
+@@ -844,7 +857,7 @@
+               goto out0;
+       error = 0;
+       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd))
+-              error = path_walk(name, &new_nd);
++              error = path_walk_it(name, &new_nd, &new_it);
+       putname(name);
+       if (error)
+               goto out0;
+@@ -858,7 +871,7 @@
+               goto out1;
+       error = 0;
+       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd))
+-              error = path_walk(name, &old_nd);
++              error = path_walk_it(name, &old_nd, &old_it);
+       putname(name);
+       if (error)
+               goto out1;
+@@ -914,8 +927,10 @@
+       up(&old_nd.dentry->d_inode->i_zombie);
+       up(&mount_sem);
+       path_release(&user_nd);
++      intent_release(&old_it);
+       path_release(&old_nd);
+ out1:
++      intent_release(&new_it);
+       path_release(&new_nd);
+ out0:
+       unlock_kernel();
 Index: linux-2.4.19-pre1/fs/open.c
 ===================================================================
 --- linux-2.4.19-pre1.orig/fs/open.c   2003-11-21 02:41:00.000000000 +0300
@@ -1397,7 +1415,7 @@ Index: linux-2.4.19-pre1/fs/stat.c
 ===================================================================
 --- linux-2.4.19-pre1.orig/fs/stat.c   2003-11-21 02:41:00.000000000 +0300
 +++ linux-2.4.19-pre1/fs/stat.c        2003-11-21 02:51:38.000000000 +0300
-@@ -17,10 +17,14 @@
+@@ -17,10 +17,12 @@
   * Revalidate the inode. This is required for proper NFS attribute caching.
   */
  static __inline__ int
@@ -1406,8 +1424,6 @@ Index: linux-2.4.19-pre1/fs/stat.c
  {
        struct inode * inode = dentry->d_inode;
 -      if (inode->i_op && inode->i_op->revalidate)
-+      if (!inode)
-+              return -ENOENT;
 +      if (inode->i_op && inode->i_op->revalidate_it)
 +              return inode->i_op->revalidate_it(dentry, it);
 +      else if (inode->i_op && inode->i_op->revalidate)
@@ -1661,7 +1677,7 @@ Index: linux-2.4.19-pre1/include/linux/fs.h
  #define ATTR_ATTR_FLAG        1024
 +#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
 +#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET 0x2000
++#define ATTR_CTIME_SET        0x2000
  
  /*
   * This is the Inode Attributes structure, used for notify_change().  It
@@ -1804,18 +1820,25 @@ Index: linux-2.4.19-pre1/include/linux/fs_struct.h
                dput(old_pwd);
                mntput(old_pwdmnt);
        }
-Index: linux-2.4.19-pre1/kernel/ksyms.c
+Index: linux-2.4.19-pre1/kernel/exit.c
 ===================================================================
---- linux-2.4.19-pre1.orig/kernel/ksyms.c      2003-11-21 02:51:37.000000000 +0300
-+++ linux-2.4.19-pre1/kernel/ksyms.c   2003-11-21 02:51:38.000000000 +0300
-@@ -260,6 +260,7 @@
- EXPORT_SYMBOL(set_page_dirty);
- EXPORT_SYMBOL(vfs_readlink);
- EXPORT_SYMBOL(vfs_follow_link);
-+EXPORT_SYMBOL(vfs_follow_link_it);
- EXPORT_SYMBOL(page_readlink);
- EXPORT_SYMBOL(page_follow_link);
- EXPORT_SYMBOL(page_symlink_inode_operations);
+--- linux-2.4.19-pre1.orig/kernel/exit.c       2003-11-21 02:41:00.000000000 +0300
++++ linux-2.4.19-pre1/kernel/exit.c    2003-11-21 02:51:38.000000000 +0300
+@@ -245,11 +245,14 @@
+ {
+       /* No need to hold fs->lock if we are killing it */
+       if (atomic_dec_and_test(&fs->count)) {
++              UNPIN(fs->pwd, fs->pwdmnt, 0);
++              UNPIN(fs->root, fs->rootmnt, 1);
+               dput(fs->root);
+               mntput(fs->rootmnt);
+               dput(fs->pwd);
+               mntput(fs->pwdmnt);
+               if (fs->altroot) {
++                      UNPIN(fs->altroot, fs->altrootmnt, 1);
+                       dput(fs->altroot);
+                       mntput(fs->altrootmnt);
+               }
 Index: linux-2.4.19-pre1/kernel/fork.c
 ===================================================================
 --- linux-2.4.19-pre1.orig/kernel/fork.c       2003-11-21 02:41:00.000000000 +0300
@@ -1834,22 +1857,15 @@ Index: linux-2.4.19-pre1/kernel/fork.c
                        fs->altrootmnt = mntget(old->altrootmnt);
                        fs->altroot = dget(old->altroot);
                } else {
-Index: linux-2.4.19-pre1/kernel/exit.c
+Index: linux-2.4.19-pre1/kernel/ksyms.c
 ===================================================================
---- linux-2.4.19-pre1.orig/kernel/exit.c       2003-11-21 02:41:00.000000000 +0300
-+++ linux-2.4.19-pre1/kernel/exit.c    2003-11-21 02:51:38.000000000 +0300
-@@ -245,11 +245,14 @@
- {
-       /* No need to hold fs->lock if we are killing it */
-       if (atomic_dec_and_test(&fs->count)) {
-+              UNPIN(fs->pwd, fs->pwdmnt, 0);
-+              UNPIN(fs->root, fs->rootmnt, 1);
-               dput(fs->root);
-               mntput(fs->rootmnt);
-               dput(fs->pwd);
-               mntput(fs->pwdmnt);
-               if (fs->altroot) {
-+                      UNPIN(fs->altroot, fs->altrootmnt, 1);
-                       dput(fs->altroot);
-                       mntput(fs->altrootmnt);
-               }
+--- linux-2.4.19-pre1.orig/kernel/ksyms.c      2003-11-21 02:51:37.000000000 +0300
++++ linux-2.4.19-pre1/kernel/ksyms.c   2003-11-21 02:51:38.000000000 +0300
+@@ -260,6 +260,7 @@
+ EXPORT_SYMBOL(set_page_dirty);
+ EXPORT_SYMBOL(vfs_readlink);
+ EXPORT_SYMBOL(vfs_follow_link);
++EXPORT_SYMBOL(vfs_follow_link_it);
+ EXPORT_SYMBOL(page_readlink);
+ EXPORT_SYMBOL(page_follow_link);
+ EXPORT_SYMBOL(page_symlink_inode_operations);
index 7741be4..b6ab3b6 100644 (file)
  kernel/ksyms.c            |    1 
  12 files changed, 558 insertions(+), 128 deletions(-)
 
+Index: linux-2.4.19.SuSE/fs/dcache.c
+===================================================================
+--- linux-2.4.19.SuSE.orig/fs/dcache.c Mon Jan 27 05:08:04 2003
++++ linux-2.4.19.SuSE/fs/dcache.c      Sat Nov 15 17:29:03 2003
+@@ -186,6 +186,13 @@
+               spin_unlock(&dcache_lock);
+               return 0;
+       }
++
++      /* network invalidation by Lustre */
++      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++              spin_unlock(&dcache_lock);
++              return 0;
++      }
++
+       /*
+        * Check whether to do a partial shrink_dcache
+        * to get rid of unused child entries.
+@@ -838,13 +845,19 @@
+  * Adds a dentry to the hash according to its name.
+  */
+  
+-void d_rehash(struct dentry * entry)
++void __d_rehash(struct dentry * entry, int lock)
+ {
+       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
+       if (!list_empty(&entry->d_hash)) BUG();
+-      spin_lock(&dcache_lock);
++      if (lock) spin_lock(&dcache_lock);
+       list_add(&entry->d_hash, list);
+-      spin_unlock(&dcache_lock);
++      if (lock) spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(__d_rehash);
++
++void d_rehash(struct dentry * entry)
++{
++      __d_rehash(entry, 1);
+ }
+ #define do_switch(x,y) do { \
 Index: linux-2.4.19.SuSE/fs/exec.c
 ===================================================================
 --- linux-2.4.19.SuSE.orig/fs/exec.c   Mon Jan 27 05:08:35 2003
@@ -78,165 +119,6 @@ Index: linux-2.4.19.SuSE/fs/exec.c
                goto close_fail;
  
        retval = binfmt->core_dump(signr, regs, file);
-Index: linux-2.4.19.SuSE/fs/dcache.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/dcache.c Mon Jan 27 05:08:04 2003
-+++ linux-2.4.19.SuSE/fs/dcache.c      Sat Nov 15 17:29:03 2003
-@@ -186,6 +186,13 @@
-               spin_unlock(&dcache_lock);
-               return 0;
-       }
-+
-+      /* network invalidation by Lustre */
-+      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
-+              spin_unlock(&dcache_lock);
-+              return 0;
-+      }
-+
-       /*
-        * Check whether to do a partial shrink_dcache
-        * to get rid of unused child entries.
-@@ -838,13 +845,19 @@
-  * Adds a dentry to the hash according to its name.
-  */
-  
--void d_rehash(struct dentry * entry)
-+void __d_rehash(struct dentry * entry, int lock)
- {
-       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-       if (!list_empty(&entry->d_hash)) BUG();
--      spin_lock(&dcache_lock);
-+      if (lock) spin_lock(&dcache_lock);
-       list_add(&entry->d_hash, list);
--      spin_unlock(&dcache_lock);
-+      if (lock) spin_unlock(&dcache_lock);
-+}
-+EXPORT_SYMBOL(__d_rehash);
-+
-+void d_rehash(struct dentry * entry)
-+{
-+      __d_rehash(entry, 1);
- }
- #define do_switch(x,y) do { \
-Index: linux-2.4.19.SuSE/fs/namespace.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/namespace.c      Mon Jan 27 05:08:07 2003
-+++ linux-2.4.19.SuSE/fs/namespace.c   Sat Nov 15 17:56:42 2003
-@@ -97,6 +97,7 @@
- {
-       old_nd->dentry = mnt->mnt_mountpoint;
-       old_nd->mnt = mnt->mnt_parent;
-+      UNPIN(old_nd->dentry, old_nd->mnt, 1);
-       mnt->mnt_parent = mnt;
-       mnt->mnt_mountpoint = mnt->mnt_root;
-       list_del_init(&mnt->mnt_child);
-@@ -108,6 +109,7 @@
- {
-       mnt->mnt_parent = mntget(nd->mnt);
-       mnt->mnt_mountpoint = dget(nd->dentry);
-+      PIN(nd->dentry, nd->mnt, 1);
-       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
-       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
-       nd->dentry->d_mounted++;
-@@ -491,15 +493,18 @@
- {
-       struct nameidata old_nd;
-       struct vfsmount *mnt = NULL;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int err = mount_is_safe(nd);
-       if (err)
-               return err;
-       if (!old_name || !*old_name)
-               return -EINVAL;
-       if (path_init(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd))
--              err = path_walk(old_name, &old_nd);
--      if (err)
-+              err = path_walk_it(old_name, &old_nd, &it);
-+      if (err) {
-+              intent_release(&it);
-               return err;
-+      }
-       down_write(&current->namespace->sem);
-       err = -EINVAL;
-@@ -522,6 +527,7 @@
-       }
-       up_write(&current->namespace->sem);
-+      intent_release(&it);
-       path_release(&old_nd);
-       return err;
- }
-@@ -725,6 +731,7 @@
-                 unsigned long flags, void *data_page)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int retval = 0;
-       int mnt_flags = 0;
-@@ -750,9 +757,11 @@
-       /* ... and get the mountpoint */
-       if (path_init(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
--              retval = path_walk(dir_name, &nd);
--      if (retval)
-+              retval = path_walk_it(dir_name, &nd, &it);
-+      if (retval) {
-+              intent_release(&it);
-               return retval;
-+      }
-       if (flags & MS_REMOUNT)
-               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-@@ -764,6 +773,8 @@
-       else
-               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
-                                     dev_name, data_page);
-+
-+      intent_release(&it);
-       path_release(&nd);
-       return retval;
- }
-@@ -929,6 +940,8 @@
- {
-       struct vfsmount *tmp;
-       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
-+      struct lookup_intent new_it = { .it_op = IT_GETATTR };
-+      struct lookup_intent old_it = { .it_op = IT_GETATTR };
-       char *name;
-       int error;
-@@ -943,7 +956,7 @@
-               goto out0;
-       error = 0;
-       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd))
--              error = path_walk(name, &new_nd);
-+              error = path_walk_it(name, &new_nd, &new_it);
-       putname(name);
-       if (error)
-               goto out0;
-@@ -957,7 +970,7 @@
-               goto out1;
-       error = 0;
-       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd))
--              error = path_walk(name, &old_nd);
-+              error = path_walk_it(name, &old_nd, &old_it);
-       putname(name);
-       if (error)
-               goto out1;
-@@ -1013,8 +1026,10 @@
-       up(&old_nd.dentry->d_inode->i_zombie);
-       up_write(&current->namespace->sem);
-       path_release(&user_nd);
-+      intent_release(&old_it);
-       path_release(&old_nd);
- out1:
-+      intent_release(&new_it);
-       path_release(&new_nd);
- out0:
-       unlock_kernel();
 Index: linux-2.4.19.SuSE/fs/namei.c
 ===================================================================
 --- linux-2.4.19.SuSE.orig/fs/namei.c  Mon Jan 27 05:08:07 2003
@@ -357,7 +239,7 @@ Index: linux-2.4.19.SuSE/fs/namei.c
                }
                /* This does the actual lookups.. */
 -              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
-+              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE,NULL);
++              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
                if (!dentry) {
                        err = -EWOULDBLOCKIO;
                        if (atomic)
@@ -433,7 +315,7 @@ Index: linux-2.4.19.SuSE/fs/namei.c
 +                              if (err)
 +                                      break;
 +                              new = real_lookup(dentry->d_parent,
-+                                                &dentry->d_name, 0, NULL);
++                                                &dentry->d_name, 0, it);
 +                              d_invalidate(dentry);
 +                              dput(dentry);
 +                              if (IS_ERR(new)) {
@@ -442,6 +324,8 @@ Index: linux-2.4.19.SuSE/fs/namei.c
 +                              }
 +                              nd->dentry = new;
 +                      }
++                      if (!nd->dentry->d_inode)
++                              goto no_inode;
 +              } else
                if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
                        err = -ESTALE;
@@ -722,12 +606,16 @@ Index: linux-2.4.19.SuSE/fs/namei.c
        if (IS_ERR(dentry))
                goto fail;
        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1286,7 +1414,16 @@
+@@ -1286,7 +1414,20 @@
                error = path_walk(tmp, &nd);
        if (error)
                goto out;
 -      dentry = lookup_create(&nd, 0);
 +
++      if (nd.last_type != LAST_NORM) {
++              error = -EEXIST;
++              goto out2;
++      }
 +      if (nd.dentry->d_inode->i_op->mknod_raw) {
 +              struct inode_operations *op = nd.dentry->d_inode->i_op;
 +              error = op->mknod_raw(&nd, mode, dev);
@@ -748,11 +636,15 @@ Index: linux-2.4.19.SuSE/fs/namei.c
        path_release(&nd);
  out:
        putname(tmp);
-@@ -1356,7 +1494,14 @@
+@@ -1356,7 +1494,18 @@
                        error = path_walk(tmp, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 1);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out2;
++              }
 +              if (nd.dentry->d_inode->i_op->mkdir_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
 +                      error = op->mkdir_raw(&nd, mode);
@@ -807,11 +699,15 @@ Index: linux-2.4.19.SuSE/fs/namei.c
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                /* Why not before? Because we want correct error value */
-@@ -1595,15 +1756,23 @@
+@@ -1595,15 +1756,27 @@
                        error = path_walk(to, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 0);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out2;
++              }
 +              if (nd.dentry->d_inode->i_op->symlink_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
 +                      error = op->symlink_raw(&nd, from);
@@ -838,6 +734,10 @@ Index: linux-2.4.19.SuSE/fs/namei.c
                if (old_nd.mnt != nd.mnt)
                        goto out_release;
 -              new_dentry = lookup_create(&nd, 0);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out_release;
++              }
 +              if (nd.dentry->d_inode->i_op->link_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
 +                      error = op->link_raw(&old_nd, &nd);
@@ -959,6 +859,124 @@ Index: linux-2.4.19.SuSE/fs/namei.c
        if (page) {
                kunmap(page);
                page_cache_release(page);
+Index: linux-2.4.19.SuSE/fs/namespace.c
+===================================================================
+--- linux-2.4.19.SuSE.orig/fs/namespace.c      Mon Jan 27 05:08:07 2003
++++ linux-2.4.19.SuSE/fs/namespace.c   Sat Nov 15 17:56:42 2003
+@@ -97,6 +97,7 @@
+ {
+       old_nd->dentry = mnt->mnt_mountpoint;
+       old_nd->mnt = mnt->mnt_parent;
++      UNPIN(old_nd->dentry, old_nd->mnt, 1);
+       mnt->mnt_parent = mnt;
+       mnt->mnt_mountpoint = mnt->mnt_root;
+       list_del_init(&mnt->mnt_child);
+@@ -108,6 +109,7 @@
+ {
+       mnt->mnt_parent = mntget(nd->mnt);
+       mnt->mnt_mountpoint = dget(nd->dentry);
++      PIN(nd->dentry, nd->mnt, 1);
+       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+       nd->dentry->d_mounted++;
+@@ -491,15 +493,18 @@
+ {
+       struct nameidata old_nd;
+       struct vfsmount *mnt = NULL;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int err = mount_is_safe(nd);
+       if (err)
+               return err;
+       if (!old_name || !*old_name)
+               return -EINVAL;
+       if (path_init(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd))
+-              err = path_walk(old_name, &old_nd);
+-      if (err)
++              err = path_walk_it(old_name, &old_nd, &it);
++      if (err) {
++              intent_release(&it);
+               return err;
++      }
+       down_write(&current->namespace->sem);
+       err = -EINVAL;
+@@ -522,6 +527,7 @@
+       }
+       up_write(&current->namespace->sem);
++      intent_release(&it);
+       path_release(&old_nd);
+       return err;
+ }
+@@ -725,6 +731,7 @@
+                 unsigned long flags, void *data_page)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int retval = 0;
+       int mnt_flags = 0;
+@@ -750,9 +757,11 @@
+       /* ... and get the mountpoint */
+       if (path_init(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
+-              retval = path_walk(dir_name, &nd);
+-      if (retval)
++              retval = path_walk_it(dir_name, &nd, &it);
++      if (retval) {
++              intent_release(&it);
+               return retval;
++      }
+       if (flags & MS_REMOUNT)
+               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+@@ -764,6 +773,8 @@
+       else
+               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+                                     dev_name, data_page);
++
++      intent_release(&it);
+       path_release(&nd);
+       return retval;
+ }
+@@ -929,6 +940,8 @@
+ {
+       struct vfsmount *tmp;
+       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++      struct lookup_intent new_it = { .it_op = IT_GETATTR };
++      struct lookup_intent old_it = { .it_op = IT_GETATTR };
+       char *name;
+       int error;
+@@ -943,7 +956,7 @@
+               goto out0;
+       error = 0;
+       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd))
+-              error = path_walk(name, &new_nd);
++              error = path_walk_it(name, &new_nd, &new_it);
+       putname(name);
+       if (error)
+               goto out0;
+@@ -957,7 +970,7 @@
+               goto out1;
+       error = 0;
+       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd))
+-              error = path_walk(name, &old_nd);
++              error = path_walk_it(name, &old_nd, &old_it);
+       putname(name);
+       if (error)
+               goto out1;
+@@ -1013,8 +1026,10 @@
+       up(&old_nd.dentry->d_inode->i_zombie);
+       up_write(&current->namespace->sem);
+       path_release(&user_nd);
++      intent_release(&old_it);
+       path_release(&old_nd);
+ out1:
++      intent_release(&new_it);
+       path_release(&new_nd);
+ out0:
+       unlock_kernel();
 Index: linux-2.4.19.SuSE/fs/open.c
 ===================================================================
 --- linux-2.4.19.SuSE.orig/fs/open.c   Mon Jan 27 05:08:00 2003
@@ -1377,7 +1395,7 @@ Index: linux-2.4.19.SuSE/fs/stat.c
 ===================================================================
 --- linux-2.4.19.SuSE.orig/fs/stat.c   Mon Jan 27 05:08:00 2003
 +++ linux-2.4.19.SuSE/fs/stat.c        Sat Nov 15 17:29:03 2003
-@@ -17,10 +17,14 @@
+@@ -17,10 +17,16 @@
   * Revalidate the inode. This is required for proper NFS attribute caching.
   */
  static __inline__ int
@@ -1386,8 +1404,6 @@ Index: linux-2.4.19.SuSE/fs/stat.c
  {
        struct inode * inode = dentry->d_inode;
 -      if (inode->i_op && inode->i_op->revalidate)
-+      if (!inode)
-+              return -ENOENT;
 +      if (inode->i_op && inode->i_op->revalidate_it)
 +              return inode->i_op->revalidate_it(dentry, it);
 +      else if (inode->i_op && inode->i_op->revalidate)
@@ -1641,7 +1657,7 @@ Index: linux-2.4.19.SuSE/include/linux/fs.h
  #define ATTR_ATTR_FLAG        1024
 +#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
 +#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET 0x2000
++#define ATTR_CTIME_SET        0x2000
  
  /*
   * This is the Inode Attributes structure, used for notify_change().  It
@@ -1784,18 +1800,25 @@ Index: linux-2.4.19.SuSE/include/linux/fs_struct.h
                dput(old_pwd);
                mntput(old_pwdmnt);
        }
-Index: linux-2.4.19.SuSE/kernel/ksyms.c
+Index: linux-2.4.19.SuSE/kernel/exit.c
 ===================================================================
---- linux-2.4.19.SuSE.orig/kernel/ksyms.c      Sat Nov 15 17:24:46 2003
-+++ linux-2.4.19.SuSE/kernel/ksyms.c   Sat Nov 15 17:29:03 2003
-@@ -315,6 +315,7 @@
- EXPORT_SYMBOL(set_page_dirty);
- EXPORT_SYMBOL(vfs_readlink);
- EXPORT_SYMBOL(vfs_follow_link);
-+EXPORT_SYMBOL(vfs_follow_link_it);
- EXPORT_SYMBOL(page_readlink);
- EXPORT_SYMBOL(page_follow_link);
- EXPORT_SYMBOL(page_symlink_inode_operations);
+--- linux-2.4.19.SuSE.orig/kernel/exit.c       Mon Jan 27 05:08:16 2003
++++ linux-2.4.19.SuSE/kernel/exit.c    Sat Nov 15 17:29:03 2003
+@@ -288,11 +288,14 @@
+ {
+       /* No need to hold fs->lock if we are killing it */
+       if (atomic_dec_and_test(&fs->count)) {
++              UNPIN(fs->pwd, fs->pwdmnt, 0);
++              UNPIN(fs->root, fs->rootmnt, 1);
+               dput(fs->root);
+               mntput(fs->rootmnt);
+               dput(fs->pwd);
+               mntput(fs->pwdmnt);
+               if (fs->altroot) {
++                      UNPIN(fs->altroot, fs->altrootmnt, 1);
+                       dput(fs->altroot);
+                       mntput(fs->altrootmnt);
+               }
 Index: linux-2.4.19.SuSE/kernel/fork.c
 ===================================================================
 --- linux-2.4.19.SuSE.orig/kernel/fork.c       Mon Jan 27 05:08:56 2003
@@ -1814,22 +1837,15 @@ Index: linux-2.4.19.SuSE/kernel/fork.c
                        fs->altrootmnt = mntget(old->altrootmnt);
                        fs->altroot = dget(old->altroot);
                } else {
-Index: linux-2.4.19.SuSE/kernel/exit.c
+Index: linux-2.4.19.SuSE/kernel/ksyms.c
 ===================================================================
---- linux-2.4.19.SuSE.orig/kernel/exit.c       Mon Jan 27 05:08:16 2003
-+++ linux-2.4.19.SuSE/kernel/exit.c    Sat Nov 15 17:29:03 2003
-@@ -288,11 +288,14 @@
- {
-       /* No need to hold fs->lock if we are killing it */
-       if (atomic_dec_and_test(&fs->count)) {
-+              UNPIN(fs->pwd, fs->pwdmnt, 0);
-+              UNPIN(fs->root, fs->rootmnt, 1);
-               dput(fs->root);
-               mntput(fs->rootmnt);
-               dput(fs->pwd);
-               mntput(fs->pwdmnt);
-               if (fs->altroot) {
-+                      UNPIN(fs->altroot, fs->altrootmnt, 1);
-                       dput(fs->altroot);
-                       mntput(fs->altrootmnt);
-               }
+--- linux-2.4.19.SuSE.orig/kernel/ksyms.c      Sat Nov 15 17:24:46 2003
++++ linux-2.4.19.SuSE/kernel/ksyms.c   Sat Nov 15 17:29:03 2003
+@@ -315,6 +315,7 @@
+ EXPORT_SYMBOL(set_page_dirty);
+ EXPORT_SYMBOL(vfs_readlink);
+ EXPORT_SYMBOL(vfs_follow_link);
++EXPORT_SYMBOL(vfs_follow_link_it);
+ EXPORT_SYMBOL(page_readlink);
+ EXPORT_SYMBOL(page_follow_link);
+ EXPORT_SYMBOL(page_symlink_inode_operations);
index 8585701..424d90e 100644 (file)
  kernel/ksyms.c            |    1 
  13 files changed, 591 insertions(+), 133 deletions(-)
 
+Index: linux/fs/dcache.c
+===================================================================
+--- linux.orig/fs/dcache.c     Thu Nov 28 18:53:15 2002
++++ linux/fs/dcache.c  Wed Mar 17 13:11:25 2004
+@@ -181,6 +181,13 @@
+               spin_unlock(&dcache_lock);
+               return 0;
+       }
++
++      /* network invalidation by Lustre */
++      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++              spin_unlock(&dcache_lock);
++              return 0;
++      }
++
+       /*
+        * Check whether to do a partial shrink_dcache
+        * to get rid of unused child entries.
+@@ -830,13 +837,19 @@
+  * Adds a dentry to the hash according to its name.
+  */
+  
+-void d_rehash(struct dentry * entry)
++void __d_rehash(struct dentry * entry, int lock)
+ {
+       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
+       if (!list_empty(&entry->d_hash)) BUG();
+-      spin_lock(&dcache_lock);
++      if (lock) spin_lock(&dcache_lock);
+       list_add(&entry->d_hash, list);
+-      spin_unlock(&dcache_lock);
++      if (lock) spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(__d_rehash);
++
++void d_rehash(struct dentry * entry)
++{
++      __d_rehash(entry, 1);
+ }
+ #define do_switch(x,y) do { \
 Index: linux/fs/exec.c
 ===================================================================
 --- linux.orig/fs/exec.c       Wed Mar 17 13:00:38 2004
@@ -78,164 +119,6 @@ Index: linux/fs/exec.c
                goto close_fail;
  
        retval = binfmt->core_dump(signr, regs, file);
-Index: linux/fs/dcache.c
-===================================================================
---- linux.orig/fs/dcache.c     Thu Nov 28 18:53:15 2002
-+++ linux/fs/dcache.c  Wed Mar 17 13:11:25 2004
-@@ -181,6 +181,13 @@
-               spin_unlock(&dcache_lock);
-               return 0;
-       }
-+
-+      /* network invalidation by Lustre */
-+      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
-+              spin_unlock(&dcache_lock);
-+              return 0;
-+      }
-+
-       /*
-        * Check whether to do a partial shrink_dcache
-        * to get rid of unused child entries.
-@@ -830,13 +837,19 @@
-  * Adds a dentry to the hash according to its name.
-  */
-  
--void d_rehash(struct dentry * entry)
-+void __d_rehash(struct dentry * entry, int lock)
- {
-       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-       if (!list_empty(&entry->d_hash)) BUG();
--      spin_lock(&dcache_lock);
-+      if (lock) spin_lock(&dcache_lock);
-       list_add(&entry->d_hash, list);
--      spin_unlock(&dcache_lock);
-+      if (lock) spin_unlock(&dcache_lock);
-+}
-+EXPORT_SYMBOL(__d_rehash);
-+
-+void d_rehash(struct dentry * entry)
-+{
-+      __d_rehash(entry, 1);
- }
- #define do_switch(x,y) do { \
-Index: linux/fs/namespace.c
-===================================================================
---- linux.orig/fs/namespace.c  Thu Nov 28 18:53:15 2002
-+++ linux/fs/namespace.c       Wed Mar 17 13:11:25 2004
-@@ -99,6 +99,7 @@
- {
-       old_nd->dentry = mnt->mnt_mountpoint;
-       old_nd->mnt = mnt->mnt_parent;
-+      UNPIN(old_nd->dentry, old_nd->mnt, 1);
-       mnt->mnt_parent = mnt;
-       mnt->mnt_mountpoint = mnt->mnt_root;
-       list_del_init(&mnt->mnt_child);
-@@ -110,6 +111,7 @@
- {
-       mnt->mnt_parent = mntget(nd->mnt);
-       mnt->mnt_mountpoint = dget(nd->dentry);
-+      PIN(nd->dentry, nd->mnt, 1);
-       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
-       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
-       nd->dentry->d_mounted++;
-@@ -485,14 +487,17 @@
- {
-       struct nameidata old_nd;
-       struct vfsmount *mnt = NULL;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int err = mount_is_safe(nd);
-       if (err)
-               return err;
-       if (!old_name || !*old_name)
-               return -EINVAL;
--      err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
--      if (err)
-+      err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
-+      if (err) {
-+              intent_release(&it);
-               return err;
-+      }
-       down_write(&current->namespace->sem);
-       err = -EINVAL;
-@@ -515,6 +520,7 @@
-       }
-       up_write(&current->namespace->sem);
-+      intent_release(&it);
-       path_release(&old_nd);
-       return err;
- }
-@@ -698,6 +704,7 @@
-                 unsigned long flags, void *data_page)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int retval = 0;
-       int mnt_flags = 0;
-@@ -722,10 +729,11 @@
-       flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
-       /* ... and get the mountpoint */
--      retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
--      if (retval)
-+      retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
-+      if (retval) {
-+              intent_release(&it);
-               return retval;
--
-+      }
-       if (flags & MS_REMOUNT)
-               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-                                   data_page);
-@@ -736,6 +744,8 @@
-       else
-               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
-                                     dev_name, data_page);
-+
-+      intent_release(&it);
-       path_release(&nd);
-       return retval;
- }
-@@ -901,6 +911,8 @@
- {
-       struct vfsmount *tmp;
-       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
-+      struct lookup_intent new_it = { .it_op = IT_GETATTR };
-+      struct lookup_intent old_it = { .it_op = IT_GETATTR };
-       int error;
-       if (!capable(CAP_SYS_ADMIN))
-@@ -908,14 +920,14 @@
-       lock_kernel();
--      error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
-+      error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
-       if (error)
-               goto out0;
-       error = -EINVAL;
-       if (!check_mnt(new_nd.mnt))
-               goto out1;
--      error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
-+      error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
-       if (error)
-               goto out1;
-@@ -970,8 +982,10 @@
-       up(&old_nd.dentry->d_inode->i_zombie);
-       up_write(&current->namespace->sem);
-       path_release(&user_nd);
-+      intent_release(&old_it);
-       path_release(&old_nd);
- out1:
-+      intent_release(&new_it);
-       path_release(&new_nd);
- out0:
-       unlock_kernel();
 Index: linux/fs/namei.c
 ===================================================================
 --- linux.orig/fs/namei.c      Wed Mar 17 13:00:37 2004
@@ -487,7 +370,7 @@ Index: linux/fs/namei.c
 +                              if (err)
 +                                      break;
 +                              new = real_lookup(dentry->d_parent,
-+                                                &dentry->d_name, 0, NULL);
++                                                &dentry->d_name, 0, it);
 +                              d_invalidate(dentry);
 +                              dput(dentry);
 +                              if (IS_ERR(new)) {
@@ -1065,6 +948,123 @@ Index: linux/fs/namei.c
        if (page) {
                kunmap(page);
                page_cache_release(page);
+Index: linux/fs/namespace.c
+===================================================================
+--- linux.orig/fs/namespace.c  Thu Nov 28 18:53:15 2002
++++ linux/fs/namespace.c       Wed Mar 17 13:11:25 2004
+@@ -99,6 +99,7 @@
+ {
+       old_nd->dentry = mnt->mnt_mountpoint;
+       old_nd->mnt = mnt->mnt_parent;
++      UNPIN(old_nd->dentry, old_nd->mnt, 1);
+       mnt->mnt_parent = mnt;
+       mnt->mnt_mountpoint = mnt->mnt_root;
+       list_del_init(&mnt->mnt_child);
+@@ -110,6 +111,7 @@
+ {
+       mnt->mnt_parent = mntget(nd->mnt);
+       mnt->mnt_mountpoint = dget(nd->dentry);
++      PIN(nd->dentry, nd->mnt, 1);
+       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+       nd->dentry->d_mounted++;
+@@ -485,14 +487,17 @@
+ {
+       struct nameidata old_nd;
+       struct vfsmount *mnt = NULL;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int err = mount_is_safe(nd);
+       if (err)
+               return err;
+       if (!old_name || !*old_name)
+               return -EINVAL;
+-      err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
+-      if (err)
++      err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
++      if (err) {
++              intent_release(&it);
+               return err;
++      }
+       down_write(&current->namespace->sem);
+       err = -EINVAL;
+@@ -515,6 +520,7 @@
+       }
+       up_write(&current->namespace->sem);
++      intent_release(&it);
+       path_release(&old_nd);
+       return err;
+ }
+@@ -698,6 +704,7 @@
+                 unsigned long flags, void *data_page)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int retval = 0;
+       int mnt_flags = 0;
+@@ -722,10 +729,11 @@
+       flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
+       /* ... and get the mountpoint */
+-      retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
+-      if (retval)
++      retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
++      if (retval) {
++              intent_release(&it);
+               return retval;
+-
++      }
+       if (flags & MS_REMOUNT)
+               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+                                   data_page);
+@@ -736,6 +744,8 @@
+       else
+               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+                                     dev_name, data_page);
++
++      intent_release(&it);
+       path_release(&nd);
+       return retval;
+ }
+@@ -901,6 +911,8 @@
+ {
+       struct vfsmount *tmp;
+       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++      struct lookup_intent new_it = { .it_op = IT_GETATTR };
++      struct lookup_intent old_it = { .it_op = IT_GETATTR };
+       int error;
+       if (!capable(CAP_SYS_ADMIN))
+@@ -908,14 +920,14 @@
+       lock_kernel();
+-      error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
++      error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
+       if (error)
+               goto out0;
+       error = -EINVAL;
+       if (!check_mnt(new_nd.mnt))
+               goto out1;
+-      error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
++      error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
+       if (error)
+               goto out1;
+@@ -970,8 +982,10 @@
+       up(&old_nd.dentry->d_inode->i_zombie);
+       up_write(&current->namespace->sem);
+       path_release(&user_nd);
++      intent_release(&old_it);
+       path_release(&old_nd);
+ out1:
++      intent_release(&new_it);
+       path_release(&new_nd);
+ out0:
+       unlock_kernel();
 Index: linux/fs/open.c
 ===================================================================
 --- linux.orig/fs/open.c       Thu Nov 28 18:53:15 2002
@@ -1467,6 +1467,20 @@ Index: linux/fs/open.c
  /*
   * Find an empty file descriptor entry, and mark it busy.
   */
+Index: linux/fs/proc/base.c
+===================================================================
+--- linux.orig/fs/proc/base.c  Wed Mar 17 13:00:35 2004
++++ linux/fs/proc/base.c       Wed Mar 17 13:11:25 2004
+@@ -481,6 +481,9 @@
+       error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
+       nd->last_type = LAST_BIND;
++
++      if (nd->intent != NULL)
++              nd->intent->d.lustre.it_int_flags |= IT_FL_FOLLOWED;
+ out:
+       return error;
+ }
 Index: linux/fs/stat.c
 ===================================================================
 --- linux.orig/fs/stat.c       Thu Sep 13 19:04:43 2001
@@ -1628,20 +1642,6 @@ Index: linux/fs/stat.c
                if (!err)
                        err = cp_new_stat64(dentry->d_inode, statbuf);
                fput(f);
-Index: linux/fs/proc/base.c
-===================================================================
---- linux.orig/fs/proc/base.c  Wed Mar 17 13:00:35 2004
-+++ linux/fs/proc/base.c       Wed Mar 17 13:11:25 2004
-@@ -481,6 +481,9 @@
-       error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
-       nd->last_type = LAST_BIND;
-+
-+      if (nd->intent != NULL)
-+              nd->intent->d.lustre.it_int_flags |= IT_FL_FOLLOWED;
- out:
-       return error;
- }
 Index: linux/include/linux/dcache.h
 ===================================================================
 --- linux.orig/include/linux/dcache.h  Thu Nov 28 18:53:15 2002
@@ -1747,7 +1747,7 @@ Index: linux/include/linux/fs.h
  #define ATTR_ATTR_FLAG        1024
 +#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
 +#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET 0x2000
++#define ATTR_CTIME_SET        0x2000
  
  /*
   * This is the Inode Attributes structure, used for notify_change().  It
@@ -1890,18 +1890,25 @@ Index: linux/include/linux/fs_struct.h
                dput(old_pwd);
                mntput(old_pwdmnt);
        }
-Index: linux/kernel/ksyms.c
+Index: linux/kernel/exit.c
 ===================================================================
---- linux.orig/kernel/ksyms.c  Wed Mar 17 13:11:23 2004
-+++ linux/kernel/ksyms.c       Wed Mar 17 13:11:25 2004
-@@ -315,6 +315,7 @@
- EXPORT_SYMBOL(set_page_dirty);
- EXPORT_SYMBOL(vfs_readlink);
- EXPORT_SYMBOL(vfs_follow_link);
-+EXPORT_SYMBOL(vfs_follow_link_it);
- EXPORT_SYMBOL(page_readlink);
- EXPORT_SYMBOL(page_follow_link);
- EXPORT_SYMBOL(page_symlink_inode_operations);
+--- linux.orig/kernel/exit.c   Wed Mar 17 13:00:38 2004
++++ linux/kernel/exit.c        Wed Mar 17 13:11:25 2004
+@@ -239,11 +239,14 @@
+ {
+       /* No need to hold fs->lock if we are killing it */
+       if (atomic_dec_and_test(&fs->count)) {
++              UNPIN(fs->pwd, fs->pwdmnt, 0);
++              UNPIN(fs->root, fs->rootmnt, 1);
+               dput(fs->root);
+               mntput(fs->rootmnt);
+               dput(fs->pwd);
+               mntput(fs->pwdmnt);
+               if (fs->altroot) {
++                      UNPIN(fs->altroot, fs->altrootmnt, 1);
+                       dput(fs->altroot);
+                       mntput(fs->altrootmnt);
+         &