Land b_smallfix onto HEAD (20040428_2142)

author adilger <adilger>

Thu, 29 Apr 2004 08:54:18 +0000 (08:54 +0000)

committer adilger <adilger>

Thu, 29 Apr 2004 08:54:18 +0000 (08:54 +0000)
author adilger <adilger>
Thu, 29 Apr 2004 08:54:18 +0000 (08:54 +0000)
committer adilger <adilger>
Thu, 29 Apr 2004 08:54:18 +0000 (08:54 +0000)
diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h

index 181594f..8a56b55 100644 (file)
--- a/lnet/include/linux/kp30.h
+++ b/lnet/include/linux/kp30.h
@@ -689,27 +689,30 @@ typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data);
  # endif
  #endif
  
+#ifndef LP_POISON
+# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
+# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a)
+# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
+#endif
+
  #if defined(__x86_64__)
  # define LPU64 "%Lu"
  # define LPD64 "%Ld"
  # define LPX64 "%#Lx"
  # define LPSZ  "%lu"
  # define LPSSZ "%ld"
-# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
  #elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
  # define LPU64 "%Lu"
  # define LPD64 "%Ld"
  # define LPX64 "%#Lx"
  # define LPSZ  "%u"
  # define LPSSZ "%d"
-# define LP_POISON ((void *)0x5a5a5a5a)
  #elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
  # define LPU64 "%lu"
  # define LPD64 "%ld"
  # define LPX64 "%#lx"
  # define LPSZ  "%lu"
  # define LPSSZ "%ld"
-# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
  #endif
  #ifndef LPU64
  # error "No word size defined"
diff --git a/lustre/ChangeLog b/lustre/ChangeLog

index 3693d4c..b2cf2ca 100644 (file)
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -5,7 +5,9 @@ tbd  Cluster File Systems, Inc. <info@clusterfs.com>
         - deal with strange write() on x86-64 (3043)
         - don't dereference NULL peer_ni in ldlm_handle_ast_error (3258)
         - clear page->private before handing to FS (3119)
-    - drop scimac NAL
+       - tune the read pipeline (3236)
+       * miscellania
+       - drop scimac NAL (unmaintained)
  
  tbd  Cluster File Systems, Inc. <info@clusterfs.com>
         * version 1.2.2
@@ -30,6 +32,9 @@ tbd  Cluster File Systems, Inc. <info@clusterfs.com>
         - initialize RPC timeout timer earlier for 2.6 (3219)
         - don't dereference NULL reply buffer if mdc_close was never sent (2410)
         - print nal/nid for unknown nid (3258)
+       - additional checks for oscc recovery before doing precreate (3284)
+       - fix ll_extent_lock() error return code for 64-bit systems (3043)
+       - don't crash in mdc_close for bad permissions on open (3285)
         * miscellania
         - allow default OST striping configuration per directory (1414)
         - increase maximum number of MDS request buffers for large systems
diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h

index 30d9574..af80f44 100644 (file)
--- a/lustre/include/liblustre.h
+++ b/lustre/include/liblustre.h
@@ -116,6 +116,9 @@ static inline void *kmalloc(int size, int prot)
  #define PTR_ERR(a) ((long)(a))
  #define ERR_PTR(a) ((void*)((long)(a)))
  
+#define capable(foo) 1
+#define CAP_SYS_ADMIN 1
+
  typedef struct {
          void *cwd;
  }mm_segment_t;
@@ -575,23 +578,12 @@ struct task_struct {
          int pid;
          int fsuid;
          int fsgid;
-        int max_groups;
-        int ngroups;
-        gid_t *groups;
          __u32 cap_effective;
-
-        struct fs_struct __fs;
  };
  
  extern struct task_struct *current;
-int in_group_p(gid_t gid);
-static inline int capable(int cap)
-{
-        if (current->cap_effective & (1 << cap))
-                return 1;
-        else
-                return 0;
-}
+
+#define in_group_p(a) 0 /* FIXME */
  
  #define set_current_state(foo) do { current->state = foo; } while (0)
  
diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h

index 40e9914..72f3817 100644 (file)
--- a/lustre/include/linux/lustre_fsfilt.h
+++ b/lustre/include/linux/lustre_fsfilt.h
@@ -100,7 +100,7 @@ static inline void *fsfilt_start_log(struct obd_device *obd,
          unsigned long now = jiffies;
          void *parent_handle = oti ? oti->oti_handle : NULL;
          void *handle = obd->obd_fsops->fs_start(inode, op, parent_handle, logs);
-        CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle);
+        CDEBUG(D_INFO, "started handle %p (%p)\n", handle, parent_handle);
  
          if (oti != NULL) {
                  if (parent_handle == NULL) {
@@ -162,7 +162,7 @@ static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode,
  {
          unsigned long now = jiffies;
          int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync);
-        CDEBUG(D_HA, "committing handle %p\n", handle);
+        CDEBUG(D_INFO, "committing handle %p\n", handle);
  
          if (time_after(jiffies, now + 15 * HZ))
                  CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h

index ebdfdf6..0bb5f0b 100644 (file)
--- a/lustre/include/linux/lustre_lib.h
+++ b/lustre/include/linux/lustre_lib.h
@@ -40,18 +40,19 @@
  #include <linux/lustre_idl.h>
  #include <linux/lustre_cfg.h>
  
+#define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
+#define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
+
  #ifndef LPU64
  /* x86_64 has 64bit longs and defines u64 as long long */
  #if BITS_PER_LONG > 32 && !defined(__x86_64__)
  #define LPU64 "%lu"
  #define LPD64 "%ld"
  #define LPX64 "%#lx"
-#define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
  #else
  #define LPU64 "%Lu"
  #define LPD64 "%Ld"
  #define LPX64 "%#Lx"
-#define LP_POISON ((void *)0x5a5a5a5a)
  #endif
  #endif
  
diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h

index 84062e2..3c75a8b 100644 (file)
--- a/lustre/include/linux/lustre_net.h
+++ b/lustre/include/linux/lustre_net.h
@@ -79,6 +79,9 @@
  # define PTLRPC_MAX_BRW_PAGES  (PTLRPC_MAX_BRW_SIZE / PAGE_SIZE)
  #endif
  
+#if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0)
+#error "PTLRPC_MAX_BRW_PAGES isn't a power of two"
+#endif
  
  /* Size over which to OBD_VMALLOC() rather than OBD_ALLOC() service request
   * buffers */
diff --git a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-sles8sp3.patch b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-sles8sp3.patch

new file mode 100644 (file)

index 0000000..bc0a1b7
--- /dev/null
+++ b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-sles8sp3.patch
@@ -0,0 +1,330 @@
+Index: linux-2.4.21/arch/i386/kernel/entry.S
+===================================================================
+--- linux-2.4.21.orig/arch/i386/kernel/entry.S 2004-04-24 02:39:01.000000000 -0400
++++ linux-2.4.21/arch/i386/kernel/entry.S      2004-04-24 02:42:58.000000000 -0400
+@@ -45,6 +45,7 @@
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+ #include <asm/smp.h>
++#include <asm/current.h>
+ 
+ EBX           = 0x00
+ ECX           = 0x04
+@@ -130,10 +131,6 @@
+       .long 3b,6b;    \
+ .previous
+ 
+-#define GET_CURRENT(reg) \
+-      movl $-8192, reg; \
+-      andl %esp, reg
+-
+ ENTRY(lcall7)
+       pushfl                  # We get a different stack layout with call gates,
+       pushl %eax              # which has to be cleaned up later..
+@@ -149,7 +146,7 @@
+       movl %ecx,CS(%esp)      #
+       movl %esp,%ebx
+       pushl %ebx
+-      andl $-8192,%ebx        # GET_CURRENT
++      andl $-THREAD_SIZE,%ebx # GET_CURRENT
+       movl exec_domain(%ebx),%edx     # Get the execution domain
+       movl 4(%edx),%edx       # Get the lcall7 handler for the domain
+       pushl $0x7
+@@ -173,7 +170,7 @@
+       movl %ecx,CS(%esp)      #
+       movl %esp,%ebx
+       pushl %ebx
+-      andl $-8192,%ebx        # GET_CURRENT
++      andl $-THREAD_SIZE,%ebx # GET_CURRENT
+       movl exec_domain(%ebx),%edx     # Get the execution domain
+       movl 4(%edx),%edx       # Get the lcall7 handler for the domain
+       pushl $0x27
+Index: linux-2.4.21/arch/i386/kernel/smpboot.c
+===================================================================
+--- linux-2.4.21.orig/arch/i386/kernel/smpboot.c       2004-04-24 02:39:05.000000000 -0400
++++ linux-2.4.21/arch/i386/kernel/smpboot.c    2004-04-24 02:42:58.000000000 -0400
+@@ -837,7 +837,7 @@
+ 
+       /* So we see what's up   */
+       printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+-      stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
++      stack_start.esp = (void *)idle->thread.esp;
+ 
+       /*
+        * This grunge runs the startup process for
+@@ -918,7 +918,7 @@
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+-                      if (*((volatile unsigned char *)phys_to_virt(8192))
++                      if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE))
+                                       == 0xA5)
+                               /* trampoline started but...? */
+                               printk("Stuck ??\n");
+@@ -941,7 +941,7 @@
+       }
+ 
+       /* mark "stuck" area as not stuck */
+-      *((volatile unsigned long *)phys_to_virt(8192)) = 0;
++      *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0;
+ 
+ #ifdef CONFIG_ES7000
+       if (!es7000_plat)
+Index: linux-2.4.21/arch/i386/kernel/traps.c
+===================================================================
+--- linux-2.4.21.orig/arch/i386/kernel/traps.c 2004-04-24 02:39:18.000000000 -0400
++++ linux-2.4.21/arch/i386/kernel/traps.c      2004-04-24 02:42:58.000000000 -0400
+@@ -304,7 +304,7 @@
+       unsigned long esp = tsk->thread.esp;
+ 
+       /* User space on another CPU? */
+-      if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
++      if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1))
+               return;
+       show_trace((unsigned long *)esp);
+ }
+Index: linux-2.4.21/arch/i386/kernel/head.S
+===================================================================
+--- linux-2.4.21.orig/arch/i386/kernel/head.S  2004-04-24 02:38:42.000000000 -0400
++++ linux-2.4.21/arch/i386/kernel/head.S       2004-04-24 02:42:58.000000000 -0400
+@@ -15,6 +15,7 @@
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ #include <asm/desc.h>
++#include <asm/current.h>
+ 
+ #define OLD_CL_MAGIC_ADDR     0x90020
+ #define OLD_CL_MAGIC          0xA33F
+@@ -326,7 +327,7 @@
+       ret
+ 
+ ENTRY(stack_start)
+-      .long SYMBOL_NAME(init_task_union)+8192
++      .long SYMBOL_NAME(init_task_union)+THREAD_SIZE
+       .long __KERNEL_DS
+ 
+ /* This is the default interrupt "handler" :-) */
+Index: linux-2.4.21/arch/i386/kernel/irq.c
+===================================================================
+--- linux-2.4.21.orig/arch/i386/kernel/irq.c   2004-04-24 02:39:18.000000000 -0400
++++ linux-2.4.21/arch/i386/kernel/irq.c        2004-04-24 02:44:26.000000000 -0400
+@@ -602,7 +602,10 @@
+       long esp;
+ 
+       /* Debugging check for stack overflow: is there less than 1KB free? */
+-      __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191));
++        __asm__ __volatile__(
++                "andl %%esp,%0"
++                : "=r" (esp) : "0" (THREAD_SIZE-1));
++
+       if (unlikely(esp < (sizeof(struct task_struct) + sysctl_stackwarn))) {
+               static unsigned long next_jiffies;      /* ratelimiting */
+               static long least_esp = THREAD_SIZE;
+Index: linux-2.4.21/arch/i386/lib/getuser.S
+===================================================================
+--- linux-2.4.21.orig/arch/i386/lib/getuser.S  1998-01-12 16:42:52.000000000 -0500
++++ linux-2.4.21/arch/i386/lib/getuser.S       2004-04-24 02:42:58.000000000 -0400
+@@ -21,6 +21,10 @@
+  * as they get called from within inline assembly.
+  */
+ 
++/* Duplicated from asm/processor.h */
++#include <asm/current.h>
++#include <linux/config.h>
++
+ addr_limit = 12
+ 
+ .text
+@@ -28,7 +32,7 @@
+ .globl __get_user_1
+ __get_user_1:
+       movl %esp,%edx
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 1:    movzbl (%eax),%edx
+@@ -41,7 +45,7 @@
+       addl $1,%eax
+       movl %esp,%edx
+       jc bad_get_user
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 2:    movzwl -1(%eax),%edx
+@@ -54,7 +58,7 @@
+       addl $3,%eax
+       movl %esp,%edx
+       jc bad_get_user
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 3:    movl -3(%eax),%edx
+Index: linux-2.4.21/arch/i386/config.in
+===================================================================
+--- linux-2.4.21.orig/arch/i386/config.in      2004-04-24 02:39:21.000000000 -0400
++++ linux-2.4.21/arch/i386/config.in   2004-04-24 02:42:58.000000000 -0400
+@@ -326,6 +326,29 @@
+ if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
+    define_bool CONFIG_HAVE_DEC_LOCK y
+ fi
++
++choice 'Bigger Stack Size Support' \
++     "off    CONFIG_NOBIGSTACK \
++      16KB   CONFIG_STACK_SIZE_16KB \
++      32KB   CONFIG_STACK_SIZE_32KB \
++      64KB   CONFIG_STACK_SIZE_64KB" off
++
++if [ "$CONFIG_NOBIGSTACK" = "y" ]; then
++   define_int CONFIG_STACK_SIZE_SHIFT 1
++else
++  if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then
++     define_int CONFIG_STACK_SIZE_SHIFT 2
++  else
++    if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then
++      define_int CONFIG_STACK_SIZE_SHIFT 3
++    else
++      if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then
++        define_int CONFIG_STACK_SIZE_SHIFT 4
++      fi
++    fi
++  fi
++fi
++ 
+ endmenu
+ 
+ mainmenu_option next_comment
+Index: linux-2.4.21/arch/i386/vmlinux.lds.S
+===================================================================
+--- linux-2.4.21.orig/arch/i386/vmlinux.lds.S  2004-04-24 02:38:06.000000000 -0400
++++ linux-2.4.21/arch/i386/vmlinux.lds.S       2004-04-24 02:42:58.000000000 -0400
+@@ -39,7 +39,8 @@
+ 
+   _edata = .;                 /* End of data section */
+ 
+-  . = ALIGN(8192);            /* init_task */
++/* chose the biggest of the possible stack sizes here? */
++  . = ALIGN(65536);           /* init_task */
+   .data.init_task : { *(.data.init_task) }
+ 
+   . = ALIGN(4096);            /* Init code and data */
+Index: linux-2.4.21/include/asm-i386/current.h
+===================================================================
+--- linux-2.4.21.orig/include/asm-i386/current.h       1998-08-14 19:35:22.000000000 -0400
++++ linux-2.4.21/include/asm-i386/current.h    2004-04-24 02:42:58.000000000 -0400
+@@ -1,15 +1,43 @@
+ #ifndef _I386_CURRENT_H
+ #define _I386_CURRENT_H
++#include <asm/page.h>
++
++/*
++ * Configurable page sizes on i386, mainly for debugging purposes.
++ * (c) Balbir Singh
++ */
++
++#ifdef __ASSEMBLY__
++
++#define PAGE_SIZE      4096    /* as cannot handle 1UL << 12 */
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++
++#define GET_CURRENT(reg) \
++        movl $-THREAD_SIZE, reg; \
++        andl %esp, reg
++
++#else  /* __ASSEMBLY__ */
++
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++#define alloc_task_struct() \
++  ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT))
++
++#define free_task_struct(p) \
++  free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT)
++
++#define INIT_TASK_SIZE THREAD_SIZE
+ 
+ struct task_struct;
+ 
+ static inline struct task_struct * get_current(void)
+ {
+       struct task_struct *current;
+-      __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
++      __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1)));
+       return current;
+  }
+  
+ #define current get_current()
+ 
++#endif /* __ASSEMBLY__ */
++
+ #endif /* !(_I386_CURRENT_H) */
+Index: linux-2.4.21/include/asm-i386/hw_irq.h
+===================================================================
+--- linux-2.4.21.orig/include/asm-i386/hw_irq.h        2004-04-24 02:39:05.000000000 -0400
++++ linux-2.4.21/include/asm-i386/hw_irq.h     2004-04-24 02:42:58.000000000 -0400
+@@ -16,6 +16,7 @@
+ #include <linux/sched.h>
+ #include <asm/atomic.h>
+ #include <asm/irq.h>
++#include <asm/current.h>
+ 
+ /*
+  * IDT vectors usable for external interrupt sources start
+@@ -120,10 +121,6 @@
+ #define IRQ_NAME2(nr) nr##_interrupt(void)
+ #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+ 
+-#define GET_CURRENT \
+-      "movl %esp, %ebx\n\t" \
+-      "andl $-8192, %ebx\n\t"
+-
+ /*
+  *    SMP has a few special interrupts for IPI messages
+  */
+Index: linux-2.4.21/include/asm-i386/processor.h
+===================================================================
+--- linux-2.4.21.orig/include/asm-i386/processor.h     2004-04-24 02:39:21.000000000 -0400
++++ linux-2.4.21/include/asm-i386/processor.h  2004-04-24 02:42:58.000000000 -0400
+@@ -14,6 +14,7 @@
+ #include <asm/types.h>
+ #include <asm/sigcontext.h>
+ #include <asm/cpufeature.h>
++#include <asm/current.h>
+ #include <linux/cache.h>
+ #include <linux/config.h>
+ #include <linux/threads.h>
+@@ -466,9 +467,6 @@
+ #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+ #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+ 
+-#define THREAD_SIZE (2*PAGE_SIZE)
+-#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+-#define free_task_struct(p) free_pages((unsigned long) (p), 1)
+ #define get_task_struct(tsk)      atomic_inc(&virt_to_page(tsk)->count)
+ 
+ #define init_task     (init_task_union.task)
+Index: linux-2.4.21/include/linux/sched.h
+===================================================================
+--- linux-2.4.21.orig/include/linux/sched.h    2004-04-24 02:39:20.000000000 -0400
++++ linux-2.4.21/include/linux/sched.h 2004-04-24 02:42:58.000000000 -0400
+@@ -2,6 +2,7 @@
+ #define _LINUX_SCHED_H
+ 
+ #include <asm/param.h>        /* for HZ */
++#include <asm/current.h>      /* maybe for INIT_TASK_SIZE */
+ 
+ extern unsigned long event;
+ 
+Index: linux-2.4.21/include/asm-x86_64/current.h
+===================================================================
+--- linux-2.4.21.orig/include/asm-x86_64/current.h     2002-11-28 18:53:15.000000000 -0500
++++ linux-2.4.21/include/asm-x86_64/current.h  2004-04-24 02:42:58.000000000 -0400
+@@ -5,6 +5,7 @@
+ struct task_struct;
+ 
+ #include <asm/pda.h>
++#include <asm/page.h>
+ 
+ static inline struct task_struct *get_current(void) 
+ { 
diff --git a/lustre/kernel_patches/patches/dev_read_only-suse-2.4.19.patch b/lustre/kernel_patches/patches/dev_read_only-suse-2.4.19.patch

new file mode 100644 (file)

index 0000000..f2eb39a
--- /dev/null
+++ b/lustre/kernel_patches/patches/dev_read_only-suse-2.4.19.patch
@@ -0,0 +1,76 @@
+ drivers/block/blkpg.c  |   36 ++++++++++++++++++++++++++++++++++++
+ drivers/block/loop.c   |    3 +++
+ drivers/ide/ide-disk.c |    4 ++++
+ 3 files changed, 43 insertions(+)
+
+Index: linux-2.4.19/drivers/block/blkpg.c
+===================================================================
+--- linux-2.4.19.orig/drivers/block/blkpg.c    2002-08-02 20:39:43.000000000 -0400
++++ linux-2.4.19/drivers/block/blkpg.c 2004-04-23 18:24:40.000000000 -0400
+@@ -296,3 +296,37 @@
+ }
+ 
+ EXPORT_SYMBOL(blk_ioctl);
++
++#define NUM_DEV_NO_WRITE 16
++static int dev_no_write[NUM_DEV_NO_WRITE];
++/*
++ * Debug code for turning block devices "read-only" (will discard writes
++ * silently).  This is for filesystem crash/recovery testing.
++ */
++void dev_set_rdonly(kdev_t dev, int no_write)
++{
++      if (dev) {
++              printk(KERN_WARNING "Turning device %s read-only\n",
++                     bdevname(dev));
++              dev_no_write[no_write] = 0xdead0000 + dev;
++      }
++}
++
++int dev_check_rdonly(kdev_t dev) {
++      int i;
++
++      for (i = 0; i < NUM_DEV_NO_WRITE; i++) {
++      if ((dev_no_write[i] & 0xffff0000) == 0xdead0000 &&
++              dev == (dev_no_write[i] & 0xffff))
++              return 1;
++      }
++      return 0;
++}
++
++void dev_clear_rdonly(int no_write) {
++      dev_no_write[no_write] = 0;
++}
++
++EXPORT_SYMBOL(dev_set_rdonly);
++EXPORT_SYMBOL(dev_check_rdonly);
++EXPORT_SYMBOL(dev_clear_rdonly);
+Index: linux-2.4.19/drivers/block/loop.c
+===================================================================
+--- linux-2.4.19.orig/drivers/block/loop.c     2004-04-23 17:53:56.000000000 -0400
++++ linux-2.4.19/drivers/block/loop.c  2004-04-23 18:23:16.000000000 -0400
+@@ -478,6 +478,9 @@
+       spin_unlock_irq(&lo->lo_lock);
+ 
+       if (rw == WRITE) {
++              if (dev_check_rdonly(rbh->b_rdev))
++                      goto err;
++
+               if (lo->lo_flags & LO_FLAGS_READ_ONLY)
+                       goto err;
+       } else if (rw == READA) {
+Index: linux-2.4.19/drivers/ide/ide-disk.c
+===================================================================
+--- linux-2.4.19.orig/drivers/ide/ide-disk.c   2004-04-23 17:53:51.000000000 -0400
++++ linux-2.4.19/drivers/ide/ide-disk.c        2004-04-23 18:23:16.000000000 -0400
+@@ -558,6 +558,10 @@
+  */
+ static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+ {
++      if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) {
++              ide_end_request(1, HWGROUP(drive));
++              return ide_stopped;
++      }
+       if (IDE_CONTROL_REG)
+               OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
+ 
diff --git a/lustre/kernel_patches/patches/exports_2.4.19-suse.patch b/lustre/kernel_patches/patches/exports_2.4.19-suse.patch

index feaeec6..769f411 100644 (file)
--- a/lustre/kernel_patches/patches/exports_2.4.19-suse.patch
+++ b/lustre/kernel_patches/patches/exports_2.4.19-suse.patch
@@ -4,8 +4,10 @@
   kernel/ksyms.c     |    4 ++++
   4 files changed, 8 insertions(+), 1 deletion(-)
  
---- linux/fs/ext3/Makefile~exports_2.4.20      Wed Apr  9 10:07:14 2003
-+++ linux-mmonroe/fs/ext3/Makefile     Wed Apr  9 10:19:53 2003
+Index: linux-2.4.19/fs/ext3/Makefile
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/Makefile 2004-04-23 17:53:55.000000000 -0400
++++ linux-2.4.19/fs/ext3/Makefile      2004-04-23 18:25:03.000000000 -0400
  @@ -9,6 +9,8 @@
   
   O_TARGET := ext3.o
@@ -15,10 +17,12 @@
   obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
                 ioctl.o namei.o super.o symlink.o
   obj-m    := $(O_TARGET)
---- linux/fs/ext3/super.c~exports_2.4.20       Wed Apr  9 10:07:14 2003
-+++ linux-mmonroe/fs/ext3/super.c      Wed Apr  9 10:19:53 2003
-@@ -1769,7 +1769,7 @@ static void __exit exit_ext3_fs(void)
-       unregister_filesystem(&ext3_fs_type);
+Index: linux-2.4.19/fs/ext3/super.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/super.c  2004-04-23 17:53:55.000000000 -0400
++++ linux-2.4.19/fs/ext3/super.c       2004-04-23 18:25:03.000000000 -0400
+@@ -1821,7 +1821,7 @@
+       exit_ext3_xattr();
   }
   
  -EXPORT_NO_SYMBOLS;
@@ -26,19 +30,23 @@
   
   MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
   MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
---- linux/include/linux/fs.h~exports_2.4.20    Wed Apr  9 10:07:14 2003
-+++ linux-mmonroe/include/linux/fs.h   Wed Apr  9 10:19:53 2003
-@@ -1020,6 +1020,7 @@ extern int unregister_filesystem(struct 
+Index: linux-2.4.19/include/linux/fs.h
+===================================================================
+--- linux-2.4.19.orig/include/linux/fs.h       2004-04-23 17:54:14.000000000 -0400
++++ linux-2.4.19/include/linux/fs.h    2004-04-23 18:25:27.000000000 -0400
+@@ -1183,6 +1183,7 @@
   extern struct vfsmount *kern_mount(struct file_system_type *);
   extern int may_umount(struct vfsmount *);
   extern long do_mount(char *, char *, char *, unsigned long, void *);
  +struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data);
+ extern void umount_tree(struct vfsmount *);
   
   #define kern_umount mntput
- 
---- linux/kernel/ksyms.c~exports_2.4.20        Wed Apr  9 10:07:14 2003
-+++ linux-mmonroe/kernel/ksyms.c       Wed Apr  9 10:19:53 2003
-@@ -308,6 +308,10 @@ EXPORT_SYMBOL(dcache_dir_fsync);
+Index: linux-2.4.19/kernel/ksyms.c
+===================================================================
+--- linux-2.4.19.orig/kernel/ksyms.c   2004-04-23 17:54:14.000000000 -0400
++++ linux-2.4.19/kernel/ksyms.c        2004-04-23 18:25:03.000000000 -0400
+@@ -330,6 +330,10 @@
   EXPORT_SYMBOL(dcache_readdir);
   EXPORT_SYMBOL(dcache_dir_ops);
   
@@ -49,5 +57,3 @@
   /* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */
   EXPORT_SYMBOL(default_llseek);
   EXPORT_SYMBOL(dentry_open);
-
-_
diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-1-suse-2.4.19.patch b/lustre/kernel_patches/patches/ext-2.4-patch-1-suse-2.4.19.patch

new file mode 100644 (file)

index 0000000..e937932
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext-2.4-patch-1-suse-2.4.19.patch
@@ -0,0 +1,2551 @@
+ fs/ext3/Makefile           |    2 
+ fs/ext3/dir.c              |  299 +++++++++
+ fs/ext3/file.c             |    3 
+ fs/ext3/hash.c             |  215 ++++++
+ fs/ext3/namei.c            | 1388 ++++++++++++++++++++++++++++++++++++++++-----
+ fs/ext3/super.c            |    7 
+ include/linux/ext3_fs.h    |   85 ++
+ include/linux/ext3_fs_sb.h |    2 
+ include/linux/ext3_jbd.h   |    2 
+ include/linux/rbtree.h     |    2 
+ lib/rbtree.c               |   42 +
+ 11 files changed, 1887 insertions(+), 160 deletions(-)
+
+Index: linux-2.4.19/fs/ext3/Makefile
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/Makefile 2004-04-23 18:25:03.000000000 -0400
++++ linux-2.4.19/fs/ext3/Makefile      2004-04-23 18:26:27.000000000 -0400
+@@ -12,7 +12,7 @@
+ export-objs :=        super.o inode.o
+ 
+ obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+-              ioctl.o namei.o super.o symlink.o
++              ioctl.o namei.o super.o symlink.o hash.o
+ obj-m    := $(O_TARGET)
+ 
+ obj-$(CONFIG_EXT3_FS_XATTR) += xattr.o
+Index: linux-2.4.19/fs/ext3/dir.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/dir.c    2001-11-09 17:25:04.000000000 -0500
++++ linux-2.4.19/fs/ext3/dir.c 2004-04-23 18:26:27.000000000 -0400
+@@ -21,12 +21,16 @@
+ #include <linux/fs.h>
+ #include <linux/jbd.h>
+ #include <linux/ext3_fs.h>
++#include <linux/slab.h>
++#include <linux/rbtree.h>
+ 
+ static unsigned char ext3_filetype_table[] = {
+       DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
+ };
+ 
+ static int ext3_readdir(struct file *, void *, filldir_t);
++static int ext3_dx_readdir(struct file * filp,
++                         void * dirent, filldir_t filldir);
+ 
+ struct file_operations ext3_dir_operations = {
+       read:           generic_read_dir,
+@@ -35,6 +39,17 @@
+       fsync:          ext3_sync_file,         /* BKL held */
+ };
+ 
++
++static unsigned char get_dtype(struct super_block *sb, int filetype)
++{
++      if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) ||
++          (filetype >= EXT3_FT_MAX))
++              return DT_UNKNOWN;
++
++      return (ext3_filetype_table[filetype]);
++}
++                             
++
+ int ext3_check_dir_entry (const char * function, struct inode * dir,
+                         struct ext3_dir_entry_2 * de,
+                         struct buffer_head * bh,
+@@ -79,6 +94,16 @@
+ 
+       sb = inode->i_sb;
+ 
++      if (is_dx(inode)) {
++              err = ext3_dx_readdir(filp, dirent, filldir);
++              if (err != ERR_BAD_DX_DIR)
++                      return err;
++              /*
++               * We don't set the inode dirty flag since it's not
++               * critical that it get flushed back to the disk.
++               */
++              EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
++      }
+       stored = 0;
+       bh = NULL;
+       offset = filp->f_pos & (sb->s_blocksize - 1);
+@@ -162,18 +187,12 @@
+                                * during the copy operation.
+                                */
+                               unsigned long version = filp->f_version;
+-                              unsigned char d_type = DT_UNKNOWN;
+ 
+-                              if (EXT3_HAS_INCOMPAT_FEATURE(sb,
+-                                              EXT3_FEATURE_INCOMPAT_FILETYPE)
+-                                              && de->file_type < EXT3_FT_MAX)
+-                                      d_type =
+-                                        ext3_filetype_table[de->file_type];
+                               error = filldir(dirent, de->name,
+                                               de->name_len,
+                                               filp->f_pos,
+                                               le32_to_cpu(de->inode),
+-                                              d_type);
++                                              get_dtype(sb, de->file_type));
+                               if (error)
+                                       break;
+                               if (version != filp->f_version)
+@@ -188,3 +207,269 @@
+       UPDATE_ATIME(inode);
+       return 0;
+ }
++
++#ifdef CONFIG_EXT3_INDEX
++/*
++ * These functions convert from the major/minor hash to an f_pos
++ * value.
++ * 
++ * Currently we only use major hash numer.  This is unfortunate, but
++ * on 32-bit machines, the same VFS interface is used for lseek and
++ * llseek, so if we use the 64 bit offset, then the 32-bit versions of
++ * lseek/telldir/seekdir will blow out spectacularly, and from within
++ * the ext2 low-level routine, we don't know if we're being called by
++ * a 64-bit version of the system call or the 32-bit version of the
++ * system call.  Worse yet, NFSv2 only allows for a 32-bit readdir
++ * cookie.  Sigh.
++ */
++#define hash2pos(major, minor)        (major >> 1)
++#define pos2maj_hash(pos)     ((pos << 1) & 0xffffffff)
++#define pos2min_hash(pos)     (0)
++
++/*
++ * This structure holds the nodes of the red-black tree used to store
++ * the directory entry in hash order.
++ */
++struct fname {
++      __u32           hash;
++      __u32           minor_hash;
++      rb_node_t       rb_hash; 
++      struct fname    *next;
++      __u32           inode;
++      __u8            name_len;
++      __u8            file_type;
++      char            name[0];
++};
++
++/*
++ * This functoin implements a non-recursive way of freeing all of the
++ * nodes in the red-black tree.
++ */
++static void free_rb_tree_fname(rb_root_t *root)
++{
++      rb_node_t       *n = root->rb_node;
++      rb_node_t       *parent;
++      struct fname    *fname;
++
++      while (n) {
++              /* Do the node's children first */
++              if ((n)->rb_left) {
++                      n = n->rb_left;
++                      continue;
++              }
++              if (n->rb_right) {
++                      n = n->rb_right;
++                      continue;
++              }
++              /*
++               * The node has no children; free it, and then zero
++               * out parent's link to it.  Finally go to the
++               * beginning of the loop and try to free the parent
++               * node.
++               */
++              parent = n->rb_parent;
++              fname = rb_entry(n, struct fname, rb_hash);
++              kfree(fname);
++              if (!parent)
++                      root->rb_node = 0;
++              else if (parent->rb_left == n)
++                      parent->rb_left = 0;
++              else if (parent->rb_right == n)
++                      parent->rb_right = 0;
++              n = parent;
++      }
++      root->rb_node = 0;
++}
++
++
++struct dir_private_info *create_dir_info(loff_t pos)
++{
++      struct dir_private_info *p;
++
++      p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
++      if (!p)
++              return NULL;
++      p->root.rb_node = 0;
++      p->curr_node = 0;
++      p->extra_fname = 0;
++      p->last_pos = 0;
++      p->curr_hash = pos2maj_hash(pos);
++      p->curr_minor_hash = pos2min_hash(pos);
++      p->next_hash = 0;
++      return p;
++}
++
++void ext3_htree_free_dir_info(struct dir_private_info *p)
++{
++      free_rb_tree_fname(&p->root);
++      kfree(p);
++}
++              
++/*
++ * Given a directory entry, enter it into the fname rb tree.
++ */
++void ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
++                           __u32 minor_hash,
++                           struct ext3_dir_entry_2 *dirent)
++{
++      rb_node_t **p, *parent = NULL;
++      struct fname * fname, *new_fn;
++      struct dir_private_info *info;
++      int len;
++
++      info = (struct dir_private_info *) dir_file->private_data;
++      p = &info->root.rb_node;
++
++      /* Create and allocate the fname structure */
++      len = sizeof(struct fname) + dirent->name_len + 1;
++      new_fn = kmalloc(len, GFP_KERNEL);
++      memset(new_fn, 0, len);
++      new_fn->hash = hash;
++      new_fn->minor_hash = minor_hash;
++      new_fn->inode = le32_to_cpu(dirent->inode);
++      new_fn->name_len = dirent->name_len;
++      new_fn->file_type = dirent->file_type;
++      memcpy(new_fn->name, dirent->name, dirent->name_len);
++      new_fn->name[dirent->name_len] = 0;
++      
++      while (*p) {
++              parent = *p;
++              fname = rb_entry(parent, struct fname, rb_hash);
++
++              /*
++               * If the hash and minor hash match up, then we put
++               * them on a linked list.  This rarely happens...
++               */
++              if ((new_fn->hash == fname->hash) &&
++                  (new_fn->minor_hash == fname->minor_hash)) {
++                      new_fn->next = fname->next;
++                      fname->next = new_fn;
++                      return;
++              }
++                      
++              if (new_fn->hash < fname->hash)
++                      p = &(*p)->rb_left;
++              else if (new_fn->hash > fname->hash)
++                      p = &(*p)->rb_right;
++              else if (new_fn->minor_hash < fname->minor_hash)
++                      p = &(*p)->rb_left;
++              else /* if (new_fn->minor_hash > fname->minor_hash) */
++                      p = &(*p)->rb_right;
++      }
++
++      rb_link_node(&new_fn->rb_hash, parent, p);
++      rb_insert_color(&new_fn->rb_hash, &info->root);
++}
++
++
++
++/*
++ * This is a helper function for ext3_dx_readdir.  It calls filldir
++ * for all entres on the fname linked list.  (Normally there is only
++ * one entry on the linked list, unless there are 62 bit hash collisions.)
++ */
++static int call_filldir(struct file * filp, void * dirent,
++                      filldir_t filldir, struct fname *fname)
++{
++      struct dir_private_info *info = filp->private_data;
++      loff_t  curr_pos;
++      struct inode *inode = filp->f_dentry->d_inode;
++      struct super_block * sb;
++      int error;
++
++      sb = inode->i_sb;
++      
++      if (!fname) {
++              printk("call_filldir: called with null fname?!?\n");
++              return 0;
++      }
++      curr_pos = hash2pos(fname->hash, fname->minor_hash);
++      while (fname) {
++              error = filldir(dirent, fname->name,
++                              fname->name_len, curr_pos, 
++                              fname->inode,
++                              get_dtype(sb, fname->file_type));
++              if (error) {
++                      filp->f_pos = curr_pos;
++                      info->extra_fname = fname->next;
++                      return error;
++              }
++              fname = fname->next;
++      }
++      return 0;
++}
++
++static int ext3_dx_readdir(struct file * filp,
++                       void * dirent, filldir_t filldir)
++{
++      struct dir_private_info *info = filp->private_data;
++      struct inode *inode = filp->f_dentry->d_inode;
++      struct fname *fname;
++      int     ret;
++
++      if (!info) {
++              info = create_dir_info(filp->f_pos);
++              if (!info)
++                      return -ENOMEM;
++              filp->private_data = info;
++      }
++
++      /* Some one has messed with f_pos; reset the world */
++      if (info->last_pos != filp->f_pos) {
++              free_rb_tree_fname(&info->root);
++              info->curr_node = 0;
++              info->extra_fname = 0;
++              info->curr_hash = pos2maj_hash(filp->f_pos);
++              info->curr_minor_hash = pos2min_hash(filp->f_pos);
++      }
++
++      /*
++       * If there are any leftover names on the hash collision
++       * chain, return them first.
++       */
++      if (info->extra_fname &&
++          call_filldir(filp, dirent, filldir, info->extra_fname))
++              goto finished;
++
++      if (!info->curr_node)
++              info->curr_node = rb_get_first(&info->root);
++
++      while (1) {
++              /*
++               * Fill the rbtree if we have no more entries,
++               * or the inode has changed since we last read in the
++               * cached entries. 
++               */
++              if ((!info->curr_node) ||
++                  (filp->f_version != inode->i_version)) {
++                      info->curr_node = 0;
++                      free_rb_tree_fname(&info->root);
++                      filp->f_version = inode->i_version;
++                      ret = ext3_htree_fill_tree(filp, info->curr_hash,
++                                                 info->curr_minor_hash,
++                                                 &info->next_hash);
++                      if (ret < 0)
++                              return ret;
++                      if (ret == 0)
++                              break;
++                      info->curr_node = rb_get_first(&info->root);
++              }
++
++              fname = rb_entry(info->curr_node, struct fname, rb_hash);
++              info->curr_hash = fname->hash;
++              info->curr_minor_hash = fname->minor_hash;
++              if (call_filldir(filp, dirent, filldir, fname))
++                      break;
++
++              info->curr_node = rb_get_next(info->curr_node);
++              if (!info->curr_node) {
++                      info->curr_hash = info->next_hash;
++                      info->curr_minor_hash = 0;
++              }
++      }
++finished:
++      info->last_pos = filp->f_pos;
++      UPDATE_ATIME(inode);
++      return 0;
++}
++#endif
+Index: linux-2.4.19/fs/ext3/file.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/file.c   2004-04-23 17:54:02.000000000 -0400
++++ linux-2.4.19/fs/ext3/file.c        2004-04-23 18:26:27.000000000 -0400
+@@ -38,6 +38,9 @@
+ {
+       if (filp->f_mode & FMODE_WRITE)
+               ext3_discard_prealloc (inode);
++      if (is_dx(inode) && filp->private_data)
++              ext3_htree_free_dir_info(filp->private_data);
++
+       return 0;
+ }
+ 
+Index: linux-2.4.19/fs/ext3/hash.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/hash.c   2003-01-30 05:24:37.000000000 -0500
++++ linux-2.4.19/fs/ext3/hash.c        2004-04-23 18:26:27.000000000 -0400
+@@ -0,0 +1,215 @@
++/*
++ *  linux/fs/ext3/hash.c
++ *
++ * Copyright (C) 2002 by Theodore Ts'o
++ *
++ * This file is released under the GPL v2.
++ * 
++ * This file may be redistributed under the terms of the GNU Public
++ * License.
++ */
++
++#include <linux/fs.h>
++#include <linux/jbd.h>
++#include <linux/sched.h>
++#include <linux/ext3_fs.h>
++
++#define DELTA 0x9E3779B9
++
++static void TEA_transform(__u32 buf[4], __u32 const in[])
++{
++      __u32   sum = 0;
++      __u32   b0 = buf[0], b1 = buf[1];
++      __u32   a = in[0], b = in[1], c = in[2], d = in[3];
++      int     n = 16;
++
++      do {                                                    
++              sum += DELTA;                                   
++              b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); 
++              b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); 
++      } while(--n);
++
++      buf[0] += b0;
++      buf[1] += b1;
++}
++
++/* F, G and H are basic MD4 functions: selection, majority, parity */
++#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
++#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z)))
++#define H(x, y, z) ((x) ^ (y) ^ (z))
++
++/*
++ * The generic round function.  The application is so specific that
++ * we don't bother protecting all the arguments with parens, as is generally
++ * good macro practice, in favor of extra legibility.
++ * Rotation is separate from addition to prevent recomputation
++ */
++#define ROUND(f, a, b, c, d, x, s)    \
++      (a += f(b, c, d) + x, a = (a << s) | (a >> (32-s)))
++#define K1 0
++#define K2 013240474631UL
++#define K3 015666365641UL
++
++/*
++ * Basic cut-down MD4 transform.  Returns only 32 bits of result.
++ */
++static void halfMD4Transform (__u32 buf[4], __u32 const in[])
++{
++      __u32   a = buf[0], b = buf[1], c = buf[2], d = buf[3];
++
++      /* Round 1 */
++      ROUND(F, a, b, c, d, in[0] + K1,  3);
++      ROUND(F, d, a, b, c, in[1] + K1,  7);
++      ROUND(F, c, d, a, b, in[2] + K1, 11);
++      ROUND(F, b, c, d, a, in[3] + K1, 19);
++      ROUND(F, a, b, c, d, in[4] + K1,  3);
++      ROUND(F, d, a, b, c, in[5] + K1,  7);
++      ROUND(F, c, d, a, b, in[6] + K1, 11);
++      ROUND(F, b, c, d, a, in[7] + K1, 19);
++
++      /* Round 2 */
++      ROUND(G, a, b, c, d, in[1] + K2,  3);
++      ROUND(G, d, a, b, c, in[3] + K2,  5);
++      ROUND(G, c, d, a, b, in[5] + K2,  9);
++      ROUND(G, b, c, d, a, in[7] + K2, 13);
++      ROUND(G, a, b, c, d, in[0] + K2,  3);
++      ROUND(G, d, a, b, c, in[2] + K2,  5);
++      ROUND(G, c, d, a, b, in[4] + K2,  9);
++      ROUND(G, b, c, d, a, in[6] + K2, 13);
++
++      /* Round 3 */
++      ROUND(H, a, b, c, d, in[3] + K3,  3);
++      ROUND(H, d, a, b, c, in[7] + K3,  9);
++      ROUND(H, c, d, a, b, in[2] + K3, 11);
++      ROUND(H, b, c, d, a, in[6] + K3, 15);
++      ROUND(H, a, b, c, d, in[1] + K3,  3);
++      ROUND(H, d, a, b, c, in[5] + K3,  9);
++      ROUND(H, c, d, a, b, in[0] + K3, 11);
++      ROUND(H, b, c, d, a, in[4] + K3, 15);
++
++      buf[0] += a;
++      buf[1] += b;
++      buf[2] += c;
++      buf[3] += d;
++}
++
++#undef ROUND
++#undef F
++#undef G
++#undef H
++#undef K1
++#undef K2
++#undef K3
++
++/* The old legacy hash */
++static __u32 dx_hack_hash (const char *name, int len)
++{
++      __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
++      while (len--) {
++              __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
++              
++              if (hash & 0x80000000) hash -= 0x7fffffff;
++              hash1 = hash0;
++              hash0 = hash;
++      }
++      return (hash0 << 1);
++}
++
++static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
++{
++      __u32   pad, val;
++      int     i;
++
++      pad = (__u32)len | ((__u32)len << 8);
++      pad |= pad << 16;
++
++      val = pad;
++      if (len > num*4)
++              len = num * 4;
++      for (i=0; i < len; i++) {
++              if ((i % 4) == 0)
++                      val = pad;
++              val = msg[i] + (val << 8);
++              if ((i % 4) == 3) {
++                      *buf++ = val;
++                      val = pad;
++                      num--;
++              }
++      }
++      if (--num >= 0)
++              *buf++ = val;
++      while (--num >= 0)
++              *buf++ = pad;
++}
++
++/*
++ * Returns the hash of a filename.  If len is 0 and name is NULL, then
++ * this function can be used to test whether or not a hash version is
++ * supported.
++ * 
++ * The seed is an 4 longword (32 bits) "secret" which can be used to
++ * uniquify a hash.  If the seed is all zero's, then some default seed
++ * may be used.
++ * 
++ * A particular hash version specifies whether or not the seed is
++ * represented, and whether or not the returned hash is 32 bits or 64
++ * bits.  32 bit hashes will return 0 for the minor hash.
++ */
++int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
++{
++      __u32   hash;
++      __u32   minor_hash = 0;
++      const char      *p;
++      int             i;
++      __u32           in[8], buf[4];
++
++      /* Initialize the default seed for the hash checksum functions */
++      buf[0] = 0x67452301;
++      buf[1] = 0xefcdab89;
++      buf[2] = 0x98badcfe;
++      buf[3] = 0x10325476;
++
++      /* Check to see if the seed is all zero's */
++      if (hinfo->seed) {
++              for (i=0; i < 4; i++) {
++                      if (hinfo->seed[i])
++                              break;
++              }
++              if (i < 4)
++                      memcpy(buf, hinfo->seed, sizeof(buf));
++      }
++              
++      switch (hinfo->hash_version) {
++      case DX_HASH_LEGACY:
++              hash = dx_hack_hash(name, len);
++              break;
++      case DX_HASH_HALF_MD4:
++              p = name;
++              while (len > 0) {
++                      str2hashbuf(p, len, in, 8);
++                      halfMD4Transform(buf, in);
++                      len -= 32;
++                      p += 32;
++              }
++              minor_hash = buf[2];
++              hash = buf[1];
++              break;
++      case DX_HASH_TEA:
++              p = name;
++              while (len > 0) {
++                      str2hashbuf(p, len, in, 4);
++                      TEA_transform(buf, in);
++                      len -= 16;
++                      p += 16;
++              }
++              hash = buf[0];
++              minor_hash = buf[1];
++              break;
++      default:
++              hinfo->hash = 0;
++              return -1;
++      }
++      hinfo->hash = hash & ~1;
++      hinfo->minor_hash = minor_hash;
++      return 0;
++}
+Index: linux-2.4.19/fs/ext3/namei.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/namei.c  2004-04-23 17:53:55.000000000 -0400
++++ linux-2.4.19/fs/ext3/namei.c       2004-04-23 22:24:05.000000000 -0400
+@@ -16,6 +16,12 @@
+  *        David S. Miller (davem@caip.rutgers.edu), 1995
+  *  Directory entry file type support and forward compatibility hooks
+  *    for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
++ *  Hash Tree Directory indexing (c)
++ *    Daniel Phillips, 2001
++ *  Hash Tree Directory indexing porting
++ *    Christopher Li, 2002
++ *  Hash Tree Directory indexing cleanup
++ *    Theodore Ts'o, 2002
+  */
+ 
+ #include <linux/fs.h>
+@@ -40,6 +46,630 @@
+ #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
+ #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
+ 
++static struct buffer_head *ext3_append(handle_t *handle,
++                                      struct inode *inode,
++                                      u32 *block, int *err)
++{
++      struct buffer_head *bh;
++
++      *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
++
++      if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
++              inode->i_size += inode->i_sb->s_blocksize;
++              EXT3_I(inode)->i_disksize = inode->i_size;
++              ext3_journal_get_write_access(handle,bh);
++      }
++      return bh;
++}
++
++#ifndef assert
++#define assert(test) J_ASSERT(test)
++#endif
++
++#ifndef swap
++#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
++#endif
++
++typedef struct { u32 v; } le_u32;
++typedef struct { u16 v; } le_u16;
++
++#ifdef DX_DEBUG
++#define dxtrace(command) command
++#else
++#define dxtrace(command) 
++#endif
++
++struct fake_dirent
++{
++      /*le*/u32 inode;
++      /*le*/u16 rec_len;
++      u8 name_len;
++      u8 file_type;
++};
++
++struct dx_countlimit
++{
++      le_u16 limit;
++      le_u16 count;
++};
++
++struct dx_entry
++{
++      le_u32 hash;
++      le_u32 block;
++};
++
++/*
++ * dx_root_info is laid out so that if it should somehow get overlaid by a
++ * dirent the two low bits of the hash version will be zero.  Therefore, the
++ * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
++ */
++
++struct dx_root
++{
++      struct fake_dirent dot;
++      char dot_name[4];
++      struct fake_dirent dotdot;
++      char dotdot_name[4];
++      struct dx_root_info
++      {
++              le_u32 reserved_zero;
++              u8 hash_version;
++              u8 info_length; /* 8 */
++              u8 indirect_levels;
++              u8 unused_flags;
++      }
++      info;
++      struct dx_entry entries[0];
++};
++
++struct dx_node
++{
++      struct fake_dirent fake;
++      struct dx_entry entries[0];
++};
++
++
++struct dx_frame
++{
++      struct buffer_head *bh;
++      struct dx_entry *entries;
++      struct dx_entry *at;
++};
++
++struct dx_map_entry
++{
++      u32 hash;
++      u32 offs;
++};
++
++#ifdef CONFIG_EXT3_INDEX
++static inline unsigned dx_get_block (struct dx_entry *entry);
++static void dx_set_block (struct dx_entry *entry, unsigned value);
++static inline unsigned dx_get_hash (struct dx_entry *entry);
++static void dx_set_hash (struct dx_entry *entry, unsigned value);
++static unsigned dx_get_count (struct dx_entry *entries);
++static unsigned dx_get_limit (struct dx_entry *entries);
++static void dx_set_count (struct dx_entry *entries, unsigned value);
++static void dx_set_limit (struct dx_entry *entries, unsigned value);
++static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
++static unsigned dx_node_limit (struct inode *dir);
++static struct dx_frame *dx_probe(struct dentry *dentry,
++                               struct inode *dir,
++                               struct dx_hash_info *hinfo,
++                               struct dx_frame *frame,
++                               int *err);
++static void dx_release (struct dx_frame *frames);
++static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
++                      struct dx_hash_info *hinfo, struct dx_map_entry map[]);
++static void dx_sort_map(struct dx_map_entry *map, unsigned count);
++static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
++              struct dx_map_entry *offsets, int count);
++static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
++static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
++static int ext3_htree_next_block(struct inode *dir, __u32 hash,
++                               struct dx_frame *frame,
++                               struct dx_frame *frames, int *err,
++                               __u32 *start_hash);
++static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
++                     struct ext3_dir_entry_2 **res_dir, int *err);
++static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
++                           struct inode *inode);
++
++/*
++ * Future: use high four bits of block for coalesce-on-delete flags
++ * Mask them off for now.
++ */
++
++static inline unsigned dx_get_block (struct dx_entry *entry)
++{
++      return le32_to_cpu(entry->block.v) & 0x00ffffff;
++}
++
++static inline void dx_set_block (struct dx_entry *entry, unsigned value)
++{
++      entry->block.v = cpu_to_le32(value);
++}
++
++static inline unsigned dx_get_hash (struct dx_entry *entry)
++{
++      return le32_to_cpu(entry->hash.v);
++}
++
++static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
++{
++      entry->hash.v = cpu_to_le32(value);
++}
++
++static inline unsigned dx_get_count (struct dx_entry *entries)
++{
++      return le16_to_cpu(((struct dx_countlimit *) entries)->count.v);
++}
++
++static inline unsigned dx_get_limit (struct dx_entry *entries)
++{
++      return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v);
++}
++
++static inline void dx_set_count (struct dx_entry *entries, unsigned value)
++{
++      ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value);
++}
++
++static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
++{
++      ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value);
++}
++
++static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
++{
++      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
++              EXT3_DIR_REC_LEN(2) - infosize;
++      return 0? 20: entry_space / sizeof(struct dx_entry);
++}
++
++static inline unsigned dx_node_limit (struct inode *dir)
++{
++      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
++      return 0? 22: entry_space / sizeof(struct dx_entry);
++}
++
++/*
++ * Debug
++ */
++#ifdef DX_DEBUG
++struct stats
++{ 
++      unsigned names;
++      unsigned space;
++      unsigned bcount;
++};
++
++static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
++                               int size, int show_names)
++{
++      unsigned names = 0, space = 0;
++      char *base = (char *) de;
++      struct dx_hash_info h = *hinfo;
++      
++      printk("names: ");
++      while ((char *) de < base + size)
++      {
++              if (de->inode)
++              {
++                      if (show_names)
++                      {
++                              int len = de->name_len;
++                              char *name = de->name;
++                              while (len--) printk("%c", *name++);
++                              ext3fs_dirhash(de->name, de->name_len, &h);
++                              printk(":%x.%u ", h.hash,
++                                     ((char *) de - base));
++                      }
++                      space += EXT3_DIR_REC_LEN(de->name_len);
++                      names++;
++              }
++              de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
++      }
++      printk("(%i)\n", names);
++      return (struct stats) { names, space, 1 };
++}
++
++struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
++                           struct dx_entry *entries, int levels)
++{
++      unsigned blocksize = dir->i_sb->s_blocksize;
++      unsigned count = dx_get_count (entries), names = 0, space = 0, i;
++      unsigned bcount = 0;
++      struct buffer_head *bh;
++      int err;
++      printk("%i indexed blocks...\n", count);
++      for (i = 0; i < count; i++, entries++)
++      {
++              u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0;
++              u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
++              struct stats stats;
++              printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
++              if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
++              stats = levels?
++                 dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
++                 dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
++              names += stats.names;
++              space += stats.space;
++              bcount += stats.bcount;
++              brelse (bh);
++      }
++      if (bcount)
++              printk("%snames %u, fullness %u (%u%%)\n", levels?"":"   ",
++                      names, space/bcount,(space/bcount)*100/blocksize);
++      return (struct stats) { names, space, bcount};
++}
++#endif /* DX_DEBUG */
++
++/*
++ * Probe for a directory leaf block to search.
++ *
++ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
++ * error in the directory index, and the caller should fall back to
++ * searching the directory normally.  The callers of dx_probe **MUST**
++ * check for this error code, and make sure it never gets reflected
++ * back to userspace.
++ */
++static struct dx_frame *
++dx_probe(struct dentry *dentry, struct inode *dir,
++       struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
++{
++      unsigned count, indirect;
++      struct dx_entry *at, *entries, *p, *q, *m;
++      struct dx_root *root;
++      struct buffer_head *bh;
++      struct dx_frame *frame = frame_in;
++      u32 hash;
++
++      frame->bh = NULL;
++      if (dentry)
++              dir = dentry->d_parent->d_inode;
++      if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
++              goto fail;
++      root = (struct dx_root *) bh->b_data;
++      if (root->info.hash_version != DX_HASH_TEA &&
++          root->info.hash_version != DX_HASH_HALF_MD4 &&
++          root->info.hash_version != DX_HASH_LEGACY) {
++              ext3_warning(dir->i_sb, __FUNCTION__,
++                           "Unrecognised inode hash code %d",
++                           root->info.hash_version);
++              brelse(bh);
++              *err = ERR_BAD_DX_DIR;
++              goto fail;
++      }
++      hinfo->hash_version = root->info.hash_version;
++      hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed;
++      if (dentry)
++              ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
++      hash = hinfo->hash;
++
++      if (root->info.unused_flags & 1) {
++              ext3_warning(dir->i_sb, __FUNCTION__,
++                           "Unimplemented inode hash flags: %#06x",
++                           root->info.unused_flags);
++              brelse(bh);
++              *err = ERR_BAD_DX_DIR;
++              goto fail;
++      }
++
++      if ((indirect = root->info.indirect_levels) > 1) {
++              ext3_warning(dir->i_sb, __FUNCTION__,
++                           "Unimplemented inode hash depth: %#06x",
++                           root->info.indirect_levels);
++              brelse(bh);
++              *err = ERR_BAD_DX_DIR;
++              goto fail;
++      }
++
++      entries = (struct dx_entry *) (((char *)&root->info) +
++                                     root->info.info_length);
++      assert(dx_get_limit(entries) == dx_root_limit(dir,
++                                                    root->info.info_length));
++      dxtrace (printk("Look up %x", hash));
++      while (1)
++      {
++              count = dx_get_count(entries);
++              assert (count && count <= dx_get_limit(entries));
++              p = entries + 1;
++              q = entries + count - 1;
++              while (p <= q)
++              {
++                      m = p + (q - p)/2;
++                      dxtrace(printk("."));
++                      if (dx_get_hash(m) > hash)
++                              q = m - 1;
++                      else
++                              p = m + 1;
++              }
++
++              if (0) // linear search cross check
++              {
++                      unsigned n = count - 1;
++                      at = entries;
++                      while (n--)
++                      {
++                              dxtrace(printk(","));
++                              if (dx_get_hash(++at) > hash)
++                              {
++                                      at--;
++                                      break;
++                              }
++                      }
++                      assert (at == p - 1);
++              }
++
++              at = p - 1;
++              dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
++              frame->bh = bh;
++              frame->entries = entries;
++              frame->at = at;
++              if (!indirect--) return frame;
++              if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
++                      goto fail2;
++              at = entries = ((struct dx_node *) bh->b_data)->entries;
++              assert (dx_get_limit(entries) == dx_node_limit (dir));
++              frame++;
++      }
++fail2:
++      while (frame >= frame_in) {
++              brelse(frame->bh);
++              frame--;
++      }
++fail:
++      return NULL;
++}
++
++static void dx_release (struct dx_frame *frames)
++{
++      if (frames[0].bh == NULL)
++              return;
++
++      if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
++              brelse(frames[1].bh);
++      brelse(frames[0].bh);
++}
++
++/*
++ * This function increments the frame pointer to search the next leaf
++ * block, and reads in the necessary intervening nodes if the search
++ * should be necessary.  Whether or not the search is necessary is
++ * controlled by the hash parameter.  If the hash value is even, then
++ * the search is only continued if the next block starts with that
++ * hash value.  This is used if we are searching for a specific file.
++ *
++ * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
++ *
++ * This function returns 1 if the caller should continue to search,
++ * or 0 if it should not.  If there is an error reading one of the
++ * index blocks, it will return -1.
++ *
++ * If start_hash is non-null, it will be filled in with the starting
++ * hash of the next page.
++ */
++static int ext3_htree_next_block(struct inode *dir, __u32 hash,
++                               struct dx_frame *frame,
++                               struct dx_frame *frames, int *err,
++                               __u32 *start_hash)
++{
++      struct dx_frame *p;
++      struct buffer_head *bh;
++      int num_frames = 0;
++      __u32 bhash;
++
++      *err = ENOENT;
++      p = frame;
++      /*
++       * Find the next leaf page by incrementing the frame pointer.
++       * If we run out of entries in the interior node, loop around and
++       * increment pointer in the parent node.  When we break out of
++       * this loop, num_frames indicates the number of interior
++       * nodes need to be read.
++       */
++      while (1) {
++              if (++(p->at) < p->entries + dx_get_count(p->entries))
++                      break;
++              if (p == frames)
++                      return 0;
++              num_frames++;
++              p--;
++      }
++
++      /*
++       * If the hash is 1, then continue only if the next page has a
++       * continuation hash of any value.  This is used for readdir
++       * handling.  Otherwise, check to see if the hash matches the
++       * desired contiuation hash.  If it doesn't, return since
++       * there's no point to read in the successive index pages.
++       */
++      bhash = dx_get_hash(p->at);
++      if (start_hash)
++              *start_hash = bhash;
++      if ((hash & 1) == 0) {
++              if ((bhash & ~1) != hash)
++                      return 0;
++      }
++      /*
++       * If the hash is HASH_NB_ALWAYS, we always go to the next
++       * block so no check is necessary
++       */
++      while (num_frames--) {
++              if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
++                                    0, err)))
++                      return -1; /* Failure */
++              p++;
++              brelse (p->bh);
++              p->bh = bh;
++              p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
++      }
++      return 1;
++}
++
++
++/*
++ * p is at least 6 bytes before the end of page
++ */
++static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p)
++{
++      return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
++}
++
++/*
++ * This function fills a red-black tree with information from a
++ * directory.  We start scanning the directory in hash order, starting
++ * at start_hash and start_minor_hash.
++ *
++ * This function returns the number of entries inserted into the tree,
++ * or a negative error code.
++ */
++int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
++                       __u32 start_minor_hash, __u32 *next_hash)
++{
++      struct dx_hash_info hinfo;
++      struct buffer_head *bh;
++      struct ext3_dir_entry_2 *de, *top;
++      static struct dx_frame frames[2], *frame;
++      struct inode *dir;
++      int block, err;
++      int count = 0;
++      int ret;
++      __u32 hashval;
++      
++      dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
++                     start_minor_hash));
++      dir = dir_file->f_dentry->d_inode;
++      hinfo.hash = start_hash;
++      hinfo.minor_hash = 0;
++      frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
++      if (!frame)
++              return err;
++
++      while (1) {
++              block = dx_get_block(frame->at);
++              dxtrace(printk("Reading block %d\n", block));
++              if (!(bh = ext3_bread (NULL, dir, block, 0, &err)))
++                      goto errout;
++      
++              de = (struct ext3_dir_entry_2 *) bh->b_data;
++              top = (struct ext3_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize -
++                                     EXT3_DIR_REC_LEN(0));
++              for (; de < top; de = ext3_next_entry(de)) {
++                      ext3fs_dirhash(de->name, de->name_len, &hinfo);
++                      if ((hinfo.hash < start_hash) ||
++                          ((hinfo.hash == start_hash) &&
++                           (hinfo.minor_hash < start_minor_hash)))
++                              continue;
++                      ext3_htree_store_dirent(dir_file, hinfo.hash,
++                                              hinfo.minor_hash, de);
++                      count++;
++              }
++              brelse (bh);
++              hashval = ~1;
++              ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, 
++                                          frame, frames, &err, &hashval);
++              if (next_hash)
++                      *next_hash = hashval;
++              if (ret == -1)
++                      goto errout;
++              /*
++               * Stop if:  (a) there are no more entries, or
++               * (b) we have inserted at least one entry and the
++               * next hash value is not a continuation
++               */
++              if ((ret == 0) ||
++                  (count && ((hashval & 1) == 0)))
++                      break;
++      }
++      dx_release(frames);
++      dxtrace(printk("Fill tree: returned %d entries\n", count));
++      return count;
++errout:
++      dx_release(frames);
++      return (err);
++}
++
++
++/*
++ * Directory block splitting, compacting
++ */
++
++static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
++                      struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
++{
++      int count = 0;
++      char *base = (char *) de;
++      struct dx_hash_info h = *hinfo;
++      
++      while ((char *) de < base + size)
++      {
++              if (de->name_len && de->inode) {
++                      ext3fs_dirhash(de->name, de->name_len, &h);
++                      map_tail--;
++                      map_tail->hash = h.hash;
++                      map_tail->offs = (u32) ((char *) de - base);
++                      count++;
++              }
++              /* XXX: do we need to check rec_len == 0 case? -Chris */
++              de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
++      }
++      return count;
++}
++
++static void dx_sort_map (struct dx_map_entry *map, unsigned count)
++{
++      struct dx_map_entry *p, *q, *top = map + count - 1;
++      int more;
++      /* Combsort until bubble sort doesn't suck */
++      while (count > 2)
++      {
++              count = count*10/13;
++              if (count - 9 < 2) /* 9, 10 -> 11 */
++                      count = 11;
++              for (p = top, q = p - count; q >= map; p--, q--)
++                      if (p->hash < q->hash)
++                              swap(*p, *q);
++      }
++      /* Garden variety bubble sort */
++      do {
++              more = 0;
++              q = top;
++              while (q-- > map)
++              {
++                      if (q[1].hash >= q[0].hash)
++                              continue;
++                      swap(*(q+1), *q);
++                      more = 1;
++              }
++      } while(more);
++}
++
++static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
++{
++      struct dx_entry *entries = frame->entries;
++      struct dx_entry *old = frame->at, *new = old + 1;
++      int count = dx_get_count(entries);
++
++      assert(count < dx_get_limit(entries));
++      assert(old < entries + count);
++      memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
++      dx_set_hash(new, hash);
++      dx_set_block(new, block);
++      dx_set_count(entries, count + 1);
++}
++#endif
++
++
++static void ext3_update_dx_flag(struct inode *inode)
++{
++      if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
++                                   EXT3_FEATURE_COMPAT_DIR_INDEX))
++              EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
++}
++
+ /*
+  * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
+  *
+@@ -96,6 +726,7 @@
+       return 0;
+ }
+ 
++
+ /*
+  *    ext3_find_entry()
+  *
+@@ -107,6 +738,8 @@
+  * The returned buffer_head has ->b_count elevated.  The caller is expected
+  * to brelse() it when appropriate.
+  */
++
++      
+ static struct buffer_head * ext3_find_entry (struct dentry *dentry,
+                                       struct ext3_dir_entry_2 ** res_dir)
+ {
+@@ -121,12 +754,32 @@
+       int num = 0;
+       int nblocks, i, err;
+       struct inode *dir = dentry->d_parent->d_inode;
++      int namelen;
++      const u8 *name;
++      unsigned blocksize;
+ 
+       *res_dir = NULL;
+       sb = dir->i_sb;
+-
++      blocksize = sb->s_blocksize;
++      namelen = dentry->d_name.len;
++      name = dentry->d_name.name;
++      if (namelen > EXT3_NAME_LEN)
++              return NULL;
++#ifdef CONFIG_EXT3_INDEX
++      if (is_dx(dir)) {
++              bh = ext3_dx_find_entry(dentry, res_dir, &err);
++              /*
++               * On success, or if the error was file not found,
++               * return.  Otherwise, fall back to doing a search the
++               * old fashioned way.
++               */
++              if (bh || (err != ERR_BAD_DX_DIR))
++                      return bh;
++              dxtrace(printk("ext3_find_entry: dx failed, falling back\n"));
++      }
++#endif
+       nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
+-      start = dir->u.ext3_i.i_dir_start_lookup;
++      start = EXT3_I(dir)->i_dir_start_lookup;
+       if (start >= nblocks)
+               start = 0;
+       block = start;
+@@ -167,7 +820,7 @@
+               i = search_dirblock(bh, dir, dentry,
+                           block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
+               if (i == 1) {
+-                      dir->u.ext3_i.i_dir_start_lookup = block;
++                      EXT3_I(dir)->i_dir_start_lookup = block;
+                       ret = bh;
+                       goto cleanup_and_exit;
+               } else {
+@@ -198,6 +851,66 @@
+       return ret;
+ }
+ 
++#ifdef CONFIG_EXT3_INDEX
++static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
++                     struct ext3_dir_entry_2 **res_dir, int *err)
++{
++      struct super_block * sb;
++      struct dx_hash_info     hinfo;
++      u32 hash;
++      struct dx_frame frames[2], *frame;
++      struct ext3_dir_entry_2 *de, *top;
++      struct buffer_head *bh;
++      unsigned long block;
++      int retval;
++      int namelen = dentry->d_name.len;
++      const u8 *name = dentry->d_name.name;
++      struct inode *dir = dentry->d_parent->d_inode;
++      
++      sb = dir->i_sb;
++      if (!(frame = dx_probe (dentry, 0, &hinfo, frames, err)))
++              return NULL;
++      hash = hinfo.hash;
++      do {
++              block = dx_get_block(frame->at);
++              if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
++                      goto errout;
++              de = (struct ext3_dir_entry_2 *) bh->b_data;
++              top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
++                                     EXT3_DIR_REC_LEN(0));
++              for (; de < top; de = ext3_next_entry(de))
++              if (ext3_match (namelen, name, de)) {
++                      if (!ext3_check_dir_entry("ext3_find_entry",
++                                                dir, de, bh,
++                                (block<<EXT3_BLOCK_SIZE_BITS(sb))
++                                        +((char *)de - bh->b_data))) {
++                              brelse (bh);
++                              goto errout;
++                      }
++                      *res_dir = de;
++                      dx_release (frames);
++                      return bh;
++              }
++              brelse (bh);
++              /* Check to see if we should continue to search */
++              retval = ext3_htree_next_block(dir, hash, frame,
++                                             frames, err, 0);
++              if (retval == -1) {
++                      ext3_warning(sb, __FUNCTION__,
++                           "error reading index page in directory #%lu",
++                           dir->i_ino);
++                      goto errout;
++              }
++      } while (retval == 1);
++      
++      *err = -ENOENT;
++errout:
++      dxtrace(printk("%s not found\n", name));
++      dx_release (frames);
++      return NULL;
++}
++#endif
++
+ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry)
+ {
+       struct inode * inode;
+@@ -214,8 +927,9 @@
+               brelse (bh);
+               inode = iget(dir->i_sb, ino);
+ 
+-              if (!inode)
++              if (!inode) {
+                       return ERR_PTR(-EACCES);
++              }
+       }
+       d_add(dentry, inode);
+       return NULL;
+@@ -239,6 +953,301 @@
+               de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
+ }
+ 
++#ifdef CONFIG_EXT3_INDEX
++static struct ext3_dir_entry_2 *
++dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
++{
++      unsigned rec_len = 0;
++
++      while (count--) {
++              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
++              rec_len = EXT3_DIR_REC_LEN(de->name_len);
++              memcpy (to, de, rec_len);
++              ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++              de->inode = 0;
++              map++;
++              to += rec_len;
++      }
++      return (struct ext3_dir_entry_2 *) (to - rec_len);
++}
++
++static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
++{
++      struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
++      unsigned rec_len = 0;
++
++      prev = to = de;
++      while ((char*)de < base + size) {
++              next = (struct ext3_dir_entry_2 *) ((char *) de +
++                                                  le16_to_cpu(de->rec_len));
++              if (de->inode && de->name_len) {
++                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
++                      if (de > to)
++                              memmove(to, de, rec_len);
++                      to->rec_len = rec_len;
++                      prev = to;
++                      to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++              }
++              de = next;
++      }
++      return prev;
++}
++
++static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
++                      struct buffer_head **bh,struct dx_frame *frame,
++                      struct dx_hash_info *hinfo, int *error)
++{
++      unsigned blocksize = dir->i_sb->s_blocksize;
++      unsigned count, continued;
++      struct buffer_head *bh2;
++      u32 newblock;
++      u32 hash2;
++      struct dx_map_entry *map;
++      char *data1 = (*bh)->b_data, *data2;
++      unsigned split;
++      struct ext3_dir_entry_2 *de = NULL, *de2;
++      int     err;
++
++      bh2 = ext3_append (handle, dir, &newblock, error);
++      if (!(bh2)) {
++              brelse(*bh);
++              *bh = NULL;
++              goto errout;
++      }
++
++      BUFFER_TRACE(*bh, "get_write_access");
++      err = ext3_journal_get_write_access(handle, *bh);
++      if (err) {
++      journal_error:
++              brelse(*bh);
++              brelse(bh2);
++              *bh = NULL;
++              ext3_std_error(dir->i_sb, err);
++              goto errout;
++      }
++      BUFFER_TRACE(frame->bh, "get_write_access");
++      err = ext3_journal_get_write_access(handle, frame->bh);
++      if (err)
++              goto journal_error;
++
++      data2 = bh2->b_data;
++
++      /* create map in the end of data2 block */
++      map = (struct dx_map_entry *) (data2 + blocksize);
++      count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
++                           blocksize, hinfo, map);
++      map -= count;
++      split = count/2; // need to adjust to actual middle
++      dx_sort_map (map, count);
++      hash2 = map[split].hash;
++      continued = hash2 == map[split - 1].hash;
++      dxtrace(printk("Split block %i at %x, %i/%i\n",
++              dx_get_block(frame->at), hash2, split, count-split));
++
++      /* Fancy dance to stay within two buffers */
++      de2 = dx_move_dirents(data1, data2, map + split, count - split);
++      de = dx_pack_dirents(data1,blocksize);
++      de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
++      de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
++      dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
++      dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
++
++      /* Which block gets the new entry? */
++      if (hinfo->hash >= hash2)
++      {
++              swap(*bh, bh2);
++              de = de2;
++      }
++      dx_insert_block (frame, hash2 + continued, newblock);
++      err = ext3_journal_dirty_metadata (handle, bh2);
++      if (err)
++              goto journal_error;
++      err = ext3_journal_dirty_metadata (handle, frame->bh);
++      if (err)
++              goto journal_error;
++      brelse (bh2);
++      dxtrace(dx_show_index ("frame", frame->entries));
++errout:
++      return de;
++}
++#endif
++
++
++/*
++ * Add a new entry into a directory (leaf) block.  If de is non-NULL,
++ * it points to a directory entry which is guaranteed to be large
++ * enough for new directory entry.  If de is NULL, then
++ * add_dirent_to_buf will attempt search the directory block for
++ * space.  It will return -ENOSPC if no space is available, and -EIO
++ * and -EEXIST if directory entry already exists.
++ * 
++ * NOTE!  bh is NOT released in the case where ENOSPC is returned.  In
++ * all other cases bh is released.
++ */
++static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
++                           struct inode *inode, struct ext3_dir_entry_2 *de,
++                           struct buffer_head * bh)
++{
++      struct inode    *dir = dentry->d_parent->d_inode;
++      const char      *name = dentry->d_name.name;
++      int             namelen = dentry->d_name.len;
++      unsigned long   offset = 0;
++      unsigned short  reclen;
++      int             nlen, rlen, err;
++      char            *top;
++      
++      reclen = EXT3_DIR_REC_LEN(namelen);
++      if (!de) {
++              de = (struct ext3_dir_entry_2 *)bh->b_data;
++              top = bh->b_data + dir->i_sb->s_blocksize - reclen;
++              while ((char *) de <= top) {
++                      if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
++                                                bh, offset)) {
++                              brelse (bh);
++                              return -EIO;
++                      }
++                      if (ext3_match (namelen, name, de)) {
++                              brelse (bh);
++                              return -EEXIST;
++                      }
++                      nlen = EXT3_DIR_REC_LEN(de->name_len);
++                      rlen = le16_to_cpu(de->rec_len);
++                      if ((de->inode? rlen - nlen: rlen) >= reclen)
++                              break;
++                      de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
++                      offset += rlen;
++              }
++              if ((char *) de > top)
++                      return -ENOSPC;
++      }
++      BUFFER_TRACE(bh, "get_write_access");
++      err = ext3_journal_get_write_access(handle, bh);
++      if (err) {
++              ext3_std_error(dir->i_sb, err);
++              brelse(bh);
++              return err;
++      }
++      
++      /* By now the buffer is marked for journaling */
++      nlen = EXT3_DIR_REC_LEN(de->name_len);
++      rlen = le16_to_cpu(de->rec_len);
++      if (de->inode) {
++              struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
++              de1->rec_len = cpu_to_le16(rlen - nlen);
++              de->rec_len = cpu_to_le16(nlen);
++              de = de1;
++      }
++      de->file_type = EXT3_FT_UNKNOWN;
++      if (inode) {
++              de->inode = cpu_to_le32(inode->i_ino);
++              ext3_set_de_type(dir->i_sb, de, inode->i_mode);
++      } else
++              de->inode = 0;
++      de->name_len = namelen;
++      memcpy (de->name, name, namelen);
++      /*
++       * XXX shouldn't update any times until successful
++       * completion of syscall, but too many callers depend
++       * on this.
++       *
++       * XXX similarly, too many callers depend on
++       * ext3_new_inode() setting the times, but error
++       * recovery deletes the inode, so the worst that can
++       * happen is that the times are slightly out of date
++       * and/or different from the directory change time.
++       */
++      dir->i_mtime = dir->i_ctime = CURRENT_TIME;
++      ext3_update_dx_flag(dir);
++      dir->i_version = ++event;
++      ext3_mark_inode_dirty(handle, dir);
++      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
++      err = ext3_journal_dirty_metadata(handle, bh);
++      if (err)
++              ext3_std_error(dir->i_sb, err);
++      brelse(bh);
++      return 0;
++}
++
++#ifdef CONFIG_EXT3_INDEX
++/*
++ * This converts a one block unindexed directory to a 3 block indexed
++ * directory, and adds the dentry to the indexed directory.
++ */
++static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
++                          struct inode *inode, struct buffer_head *bh)
++{
++      struct inode    *dir = dentry->d_parent->d_inode;
++      const char      *name = dentry->d_name.name;
++      int             namelen = dentry->d_name.len;
++      struct buffer_head *bh2;
++      struct dx_root  *root;
++      struct dx_frame frames[2], *frame;
++      struct dx_entry *entries;
++      struct ext3_dir_entry_2 *de, *de2;
++      char            *data1, *top;
++      unsigned        len;
++      int             retval;
++      unsigned        blocksize;
++      struct dx_hash_info hinfo;
++      u32             block;
++              
++      blocksize =  dir->i_sb->s_blocksize;
++      dxtrace(printk("Creating index\n"));
++      retval = ext3_journal_get_write_access(handle, bh);
++      if (retval) {
++              ext3_std_error(dir->i_sb, retval);
++              brelse(bh);
++              return retval;
++      }
++      root = (struct dx_root *) bh->b_data;
++              
++      EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
++      bh2 = ext3_append (handle, dir, &block, &retval);
++      if (!(bh2)) {
++              brelse(bh);
++              return retval;
++      }
++      data1 = bh2->b_data;
++
++      /* The 0th block becomes the root, move the dirents out */
++      de = (struct ext3_dir_entry_2 *) &root->dotdot;
++      de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len);
++      len = ((char *) root) + blocksize - (char *) de;
++      memcpy (data1, de, len);
++      de = (struct ext3_dir_entry_2 *) data1;
++      top = data1 + len;
++      while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top)
++              de = de2;
++      de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
++      /* Initialize the root; the dot dirents already exist */
++      de = (struct ext3_dir_entry_2 *) (&root->dotdot);
++      de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
++      memset (&root->info, 0, sizeof(root->info));
++      root->info.info_length = sizeof(root->info);
++      root->info.hash_version = dir->i_sb->u.ext3_sb.s_def_hash_version;
++      entries = root->entries;
++      dx_set_block (entries, 1);
++      dx_set_count (entries, 1);
++      dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
++
++      /* Initialize as for dx_probe */
++      hinfo.hash_version = root->info.hash_version;
++      hinfo.seed = dir->i_sb->u.ext3_sb.s_hash_seed;
++      ext3fs_dirhash(name, namelen, &hinfo);
++      frame = frames;
++      frame->entries = entries;
++      frame->at = entries;
++      frame->bh = bh;
++      bh = bh2;
++      de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
++      dx_release (frames);
++      if (!(de))
++              return retval;
++
++      return add_dirent_to_buf(handle, dentry, inode, de, bh);
++}
++#endif
++
+ /*
+  *    ext3_add_entry()
+  *
+@@ -249,127 +1258,197 @@
+  * may not sleep between calling this and putting something into
+  * the entry, as someone else might have used it while you slept.
+  */
+-
+-/*
+- * AKPM: the journalling code here looks wrong on the error paths
+- */
+ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
+       struct inode *inode)
+ {
+       struct inode *dir = dentry->d_parent->d_inode;
+-      const char *name = dentry->d_name.name;
+-      int namelen = dentry->d_name.len;
+       unsigned long offset;
+-      unsigned short rec_len;
+       struct buffer_head * bh;
+-      struct ext3_dir_entry_2 * de, * de1;
++      struct ext3_dir_entry_2 *de;
+       struct super_block * sb;
+       int     retval;
++#ifdef CONFIG_EXT3_INDEX
++        int     dx_fallback=0;
++#endif
++        unsigned blocksize;
++        unsigned nlen, rlen;
++        u32 block, blocks;
+ 
+       sb = dir->i_sb;
+-
+-      if (!namelen)
++        blocksize = sb->s_blocksize;
++      if (!dentry->d_name.len)
+               return -EINVAL;
+-      bh = ext3_bread (handle, dir, 0, 0, &retval);
++#ifdef CONFIG_EXT3_INDEX
++        if (is_dx(dir)) {
++                retval = ext3_dx_add_entry(handle, dentry, inode);
++                if (!retval || (retval != ERR_BAD_DX_DIR))
++                        return retval;
++                EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
++                dx_fallback++;
++                ext3_mark_inode_dirty(handle, dir);
++        }
++#endif
++        blocks = dir->i_size >> sb->s_blocksize_bits;
++        for (block = 0, offset = 0; block < blocks; block++) {
++                bh = ext3_bread(handle, dir, block, 0, &retval);
++                if(!bh)
++                        return retval;
++                retval = add_dirent_to_buf(handle, dentry, inode, 0, bh);
++                if (retval != -ENOSPC)
++                        return retval;
++
++#ifdef CONFIG_EXT3_INDEX
++                if (blocks == 1 && !dx_fallback &&
++                    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
++                        return make_indexed_dir(handle, dentry, inode, bh);
++#endif
++                brelse(bh);
++        }
++        bh = ext3_append(handle, dir, &block, &retval);
+       if (!bh)
+               return retval;
+-      rec_len = EXT3_DIR_REC_LEN(namelen);
+-      offset = 0;
+       de = (struct ext3_dir_entry_2 *) bh->b_data;
+-      while (1) {
+-              if ((char *)de >= sb->s_blocksize + bh->b_data) {
+-                      brelse (bh);
+-                      bh = NULL;
+-                      bh = ext3_bread (handle, dir,
+-                              offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval);
+-                      if (!bh)
+-                              return retval;
+-                      if (dir->i_size <= offset) {
+-                              if (dir->i_size == 0) {
+-                                      brelse(bh);
+-                                      return -ENOENT;
+-                              }
+-
+-                              ext3_debug ("creating next block\n");
+-
+-                              BUFFER_TRACE(bh, "get_write_access");
+-                              ext3_journal_get_write_access(handle, bh);
+-                              de = (struct ext3_dir_entry_2 *) bh->b_data;
+-                              de->inode = 0;
+-                              de->rec_len = le16_to_cpu(sb->s_blocksize);
+-                              dir->u.ext3_i.i_disksize =
+-                                      dir->i_size = offset + sb->s_blocksize;
+-                              dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
+-                              ext3_mark_inode_dirty(handle, dir);
+-                      } else {
+-
+-                              ext3_debug ("skipping to next block\n");
++        de->inode = 0;
++        de->rec_len = cpu_to_le16(rlen = blocksize);
++        nlen = 0;
++        return add_dirent_to_buf(handle, dentry, inode, de, bh);
++}
+ 
+-                              de = (struct ext3_dir_entry_2 *) bh->b_data;
+-                      }
+-              }
+-              if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh,
+-                                         offset)) {
+-                      brelse (bh);
+-                      return -ENOENT;
+-              }
+-              if (ext3_match (namelen, name, de)) {
+-                              brelse (bh);
+-                              return -EEXIST;
++#ifdef CONFIG_EXT3_INDEX
++/*
++ * Returns 0 for success, or a negative error value
++ */
++static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
++                             struct inode *inode)
++{
++        struct dx_frame frames[2], *frame;
++        struct dx_entry *entries, *at;
++        struct dx_hash_info hinfo;
++        struct buffer_head * bh;
++        struct inode *dir = dentry->d_parent->d_inode;
++        struct super_block * sb = dir->i_sb;
++        struct ext3_dir_entry_2 *de;
++        int err;
++
++        frame = dx_probe(dentry, 0, &hinfo, frames, &err);
++        if (!frame)
++                return err;
++        entries = frame->entries;
++        at = frame->at;
++
++        if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
++                goto cleanup;
++
++        BUFFER_TRACE(bh, "get_write_access");
++        err = ext3_journal_get_write_access(handle, bh);
++        if (err)
++                goto journal_error;
++
++        err = add_dirent_to_buf(handle, dentry, inode, 0, bh);
++        if (err != -ENOSPC) {
++                bh = 0;
++                goto cleanup;
++        }
++
++        /* Block full, should compress but for now just split */
++        dxtrace(printk("using %u of %u node entries\n",
++                       dx_get_count(entries), dx_get_limit(entries)));
++        /* Need to split index? */
++        if (dx_get_count(entries) == dx_get_limit(entries)) {
++                u32 newblock;
++                unsigned icount = dx_get_count(entries);
++                int levels = frame - frames;
++                struct dx_entry *entries2;
++                struct dx_node *node2;
++                struct buffer_head *bh2;
++
++                if (levels && (dx_get_count(frames->entries) ==
++                               dx_get_limit(frames->entries))) {
++                        ext3_warning(sb, __FUNCTION__,
++                                     "Directory index full!\n");
++                        err = -ENOSPC;
++                        goto cleanup;
+               }
+-              if ((le32_to_cpu(de->inode) == 0 &&
+-                              le16_to_cpu(de->rec_len) >= rec_len) ||
+-                  (le16_to_cpu(de->rec_len) >=
+-                              EXT3_DIR_REC_LEN(de->name_len) + rec_len)) {
+-                      BUFFER_TRACE(bh, "get_write_access");
+-                      ext3_journal_get_write_access(handle, bh);
+-                      /* By now the buffer is marked for journaling */
+-                      offset += le16_to_cpu(de->rec_len);
+-                      if (le32_to_cpu(de->inode)) {
+-                              de1 = (struct ext3_dir_entry_2 *) ((char *) de +
+-                                      EXT3_DIR_REC_LEN(de->name_len));
+-                              de1->rec_len =
+-                                      cpu_to_le16(le16_to_cpu(de->rec_len) -
+-                                      EXT3_DIR_REC_LEN(de->name_len));
+-                              de->rec_len = cpu_to_le16(
+-                                              EXT3_DIR_REC_LEN(de->name_len));
+-                              de = de1;
++
++                bh2 = ext3_append (handle, dir, &newblock, &err);
++                if (!(bh2))
++                        goto cleanup;
++                node2 = (struct dx_node *)(bh2->b_data);
++                entries2 = node2->entries;
++                node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
++                node2->fake.inode = 0;
++                BUFFER_TRACE(frame->bh, "get_write_access");
++                err = ext3_journal_get_write_access(handle, frame->bh);
++                if (err)
++                        goto journal_error;
++                if (levels) {
++                        unsigned icount1 = icount/2, icount2 = icount - icount1;+                       unsigned hash2 = dx_get_hash(entries + icount1);
++                        dxtrace(printk("Split index %i/%i\n", icount1, icount2));
++
++                        BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
++                        err = ext3_journal_get_write_access(handle,
++                                                            frames[0].bh);
++                        if (err)
++                                goto journal_error;
++
++                        memcpy ((char *) entries2, (char *) (entries + icount1),+                               icount2 * sizeof(struct dx_entry));
++                        dx_set_count (entries, icount1);
++                        dx_set_count (entries2, icount2);
++                        dx_set_limit (entries2, dx_node_limit(dir));
++
++                        /* Which index block gets the new entry? */
++                        if (at - entries >= icount1) {
++                                frame->at = at = at - entries - icount1 + entries2;
++                                frame->entries = entries = entries2;
++                                swap(frame->bh, bh2);
+                       }
+-                      de->file_type = EXT3_FT_UNKNOWN;
+-                      if (inode) {
+-                              de->inode = cpu_to_le32(inode->i_ino);
+-                              ext3_set_de_type(dir->i_sb, de, inode->i_mode);
+-                      } else
+-                              de->inode = 0;
+-                      de->name_len = namelen;
+-                      memcpy (de->name, name, namelen);
+-                      /*
+-                       * XXX shouldn't update any times until successful
+-                       * completion of syscall, but too many callers depend
+-                       * on this.
+-                       *
+-                       * XXX similarly, too many callers depend on
+-                       * ext3_new_inode() setting the times, but error
+-                       * recovery deletes the inode, so the worst that can
+-                       * happen is that the times are slightly out of date
+-                       * and/or different from the directory change time.
+-                       */
+-                      dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+-                      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
+-                      ext3_mark_inode_dirty(handle, dir);
+-                      dir->i_version = ++event;
+-                      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+-                      ext3_journal_dirty_metadata(handle, bh);
+-                      brelse(bh);
+-                      return 0;
++                        dx_insert_block (frames + 0, hash2, newblock);
++                        dxtrace(dx_show_index ("node", frames[1].entries));
++                        dxtrace(dx_show_index ("node",
++                               ((struct dx_node *) bh2->b_data)->entries));
++                        err = ext3_journal_dirty_metadata(handle, bh2);
++                        if (err)
++                                goto journal_error;
++                        brelse (bh2);
++                } else {
++                        dxtrace(printk("Creating second level index...\n"));
++                        memcpy((char *) entries2, (char *) entries,
++                               icount * sizeof(struct dx_entry));
++                        dx_set_limit(entries2, dx_node_limit(dir));
++
++                        /* Set up root */
++                        dx_set_count(entries, 1);
++                        dx_set_block(entries + 0, newblock);
++                        ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
++
++                        /* Add new access path frame */
++                        frame = frames + 1;
++                        frame->at = at = at - entries + entries2;
++                        frame->entries = entries = entries2;
++                        frame->bh = bh2;
++                        err = ext3_journal_get_write_access(handle,
++                                                            frame->bh);
++                        if (err)
++                                goto journal_error;
+               }
+-              offset += le16_to_cpu(de->rec_len);
+-              de = (struct ext3_dir_entry_2 *)
+-                      ((char *) de + le16_to_cpu(de->rec_len));
++                ext3_journal_dirty_metadata(handle, frames[0].bh);
+       }
+-      brelse (bh);
+-      return -ENOSPC;
++        de = do_split(handle, dir, &bh, frame, &hinfo, &err);
++        if (!de)
++                goto cleanup;
++        err = add_dirent_to_buf(handle, dentry, inode, de, bh);
++        bh = 0;
++        goto cleanup;
++
++journal_error:
++        ext3_std_error(dir->i_sb, err);
++cleanup:
++        if (bh)
++                brelse(bh);
++        dx_release(frames);
++        return err;
+ }
++#endif
+ 
+ /*
+  * ext3_delete_entry deletes a directory entry by merging it with the
+@@ -453,9 +1532,11 @@
+       struct inode * inode;
+       int err;
+ 
+-      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
+-      if (IS_ERR(handle))
++      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+ 
+       if (IS_SYNC(dir))
+               handle->h_sync = 1;
+@@ -480,9 +1561,11 @@
+       struct inode *inode;
+       int err;
+ 
+-      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
+-      if (IS_ERR(handle))
++      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+ 
+       if (IS_SYNC(dir))
+               handle->h_sync = 1;
+@@ -512,9 +1595,11 @@
+       if (dir->i_nlink >= EXT3_LINK_MAX)
+               return -EMLINK;
+ 
+-      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
+-      if (IS_ERR(handle))
++      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+ 
+       if (IS_SYNC(dir))
+               handle->h_sync = 1;
+@@ -526,7 +1611,8 @@
+ 
+       inode->i_op = &ext3_dir_inode_operations;
+       inode->i_fop = &ext3_dir_operations;
+-      inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize;
++        inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
++        inode->i_blocks = 0;
+       dir_block = ext3_bread (handle, inode, 0, 1, &err);
+       if (!dir_block) {
+               inode->i_nlink--; /* is this nlink == 0? */
+@@ -555,21 +1641,19 @@
+       brelse (dir_block);
+       ext3_mark_inode_dirty(handle, inode);
+       err = ext3_add_entry (handle, dentry, inode);
+-      if (err)
+-              goto out_no_entry;
++      if (err) {
++              inode->i_nlink = 0;
++              ext3_mark_inode_dirty(handle, inode);
++              iput (inode);
++              goto out_stop;
++      }
+       dir->i_nlink++;
+-      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
++      ext3_update_dx_flag(dir);
+       ext3_mark_inode_dirty(handle, dir);
+       d_instantiate(dentry, inode);
+ out_stop:
+       ext3_journal_stop(handle, dir);
+       return err;
+-
+-out_no_entry:
+-      inode->i_nlink = 0;
+-      ext3_mark_inode_dirty(handle, inode);
+-      iput (inode);
+-      goto out_stop;
+ }
+ 
+ /*
+@@ -656,7 +1740,7 @@
+       int err = 0, rc;
+       
+       lock_super(sb);
+-      if (!list_empty(&inode->u.ext3_i.i_orphan))
++      if (!list_empty(&EXT3_I(inode)->i_orphan))
+               goto out_unlock;
+ 
+       /* Orphan handling is only valid for files with data blocks
+@@ -697,7 +1781,7 @@
+        * This is safe: on error we're going to ignore the orphan list
+        * anyway on the next recovery. */
+       if (!err)
+-              list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan);
++              list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
+ 
+       jbd_debug(4, "superblock will point to %ld\n", inode->i_ino);
+       jbd_debug(4, "orphan inode %ld will point to %d\n",
+@@ -715,25 +1799,26 @@
+ int ext3_orphan_del(handle_t *handle, struct inode *inode)
+ {
+       struct list_head *prev;
++        struct ext3_inode_info *ei = EXT3_I(inode);
+       struct ext3_sb_info *sbi;
+       ino_t ino_next; 
+       struct ext3_iloc iloc;
+       int err = 0;
+       
+       lock_super(inode->i_sb);
+-      if (list_empty(&inode->u.ext3_i.i_orphan)) {
++        if (list_empty(&ei->i_orphan)) {
+               unlock_super(inode->i_sb);
+               return 0;
+       }
+ 
+       ino_next = NEXT_ORPHAN(inode);
+-      prev = inode->u.ext3_i.i_orphan.prev;
++        prev = ei->i_orphan.prev;
+       sbi = EXT3_SB(inode->i_sb);
+ 
+       jbd_debug(4, "remove inode %ld from orphan list\n", inode->i_ino);
+ 
+-      list_del(&inode->u.ext3_i.i_orphan);
+-      INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
++        list_del(&ei->i_orphan);
++        INIT_LIST_HEAD(&ei->i_orphan);
+ 
+       /* If we're on an error path, we may not have a valid
+        * transaction handle with which to update the orphan list on
+@@ -794,8 +1879,9 @@
+       handle_t *handle;
+ 
+       handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+-      if (IS_ERR(handle))
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+ 
+       retval = -ENOENT;
+       bh = ext3_find_entry (dentry, &de);
+@@ -833,7 +1919,7 @@
+       ext3_mark_inode_dirty(handle, inode);
+       dir->i_nlink--;
+       inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+-      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
++        ext3_update_dx_flag(dir);
+       ext3_mark_inode_dirty(handle, dir);
+ 
+ end_rmdir:
+@@ -851,8 +1937,9 @@
+       handle_t *handle;
+ 
+       handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+-      if (IS_ERR(handle))
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+ 
+       if (IS_SYNC(dir))
+               handle->h_sync = 1;
+@@ -879,7 +1966,7 @@
+       if (retval)
+               goto end_unlink;
+       dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+-      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
++      ext3_update_dx_flag(dir);
+       ext3_mark_inode_dirty(handle, dir);
+       inode->i_nlink--;
+       if (!inode->i_nlink)
+@@ -905,9 +1992,11 @@
+       if (l > dir->i_sb->s_blocksize)
+               return -ENAMETOOLONG;
+ 
+-      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5);
+-      if (IS_ERR(handle))
++      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5);
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+ 
+       if (IS_SYNC(dir))
+               handle->h_sync = 1;
+@@ -917,7 +2006,7 @@
+       if (IS_ERR(inode))
+               goto out_stop;
+ 
+-      if (l > sizeof (inode->u.ext3_i.i_data)) {
++        if (l > sizeof (EXT3_I(inode)->i_data)) {
+               inode->i_op = &ext3_symlink_inode_operations;
+               inode->i_mapping->a_ops = &ext3_aops;
+               /*
+@@ -926,25 +2015,23 @@
+                * i_size in generic_commit_write().
+                */
+               err = block_symlink(inode, symname, l);
+-              if (err)
+-                      goto out_no_entry;
++                if (err) {
++                        ext3_dec_count(handle, inode);
++                        ext3_mark_inode_dirty(handle, inode);
++                        iput (inode);
++                        goto out_stop;
++                }
+       } else {
+               inode->i_op = &ext3_fast_symlink_inode_operations;
+-              memcpy((char*)&inode->u.ext3_i.i_data,symname,l);
++                memcpy((char*)&EXT3_I(inode)->i_data,symname,l);
+               inode->i_size = l-1;
+       }
+-      inode->u.ext3_i.i_disksize = inode->i_size;
++        EXT3_I(inode)->i_disksize = inode->i_size;
+       ext3_mark_inode_dirty(handle, inode);
+       err = ext3_add_nondir(handle, dentry, inode);
+ out_stop:
+       ext3_journal_stop(handle, dir);
+       return err;
+-
+-out_no_entry:
+-      ext3_dec_count(handle, inode);
+-      ext3_mark_inode_dirty(handle, inode);
+-      iput (inode);
+-      goto out_stop;
+ }
+ 
+ static int ext3_link (struct dentry * old_dentry,
+@@ -957,12 +2044,15 @@
+       if (S_ISDIR(inode->i_mode))
+               return -EPERM;
+ 
+-      if (inode->i_nlink >= EXT3_LINK_MAX)
++      if (inode->i_nlink >= EXT3_LINK_MAX) {
+               return -EMLINK;
++      }
+ 
+-      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS);
+-      if (IS_ERR(handle))
++      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS);
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+ 
+       if (IS_SYNC(dir))
+               handle->h_sync = 1;
+@@ -996,9 +2086,11 @@
+ 
+       old_bh = new_bh = dir_bh = NULL;
+ 
+-      handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2);
+-      if (IS_ERR(handle))
++      handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS +
++                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
++      if (IS_ERR(handle)) {
+               return PTR_ERR(handle);
++      }
+ 
+       if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
+               handle->h_sync = 1;
+@@ -1078,7 +2170,7 @@
+               new_inode->i_ctime = CURRENT_TIME;
+       }
+       old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
+-      old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
++      ext3_update_dx_flag(old_dir);
+       if (dir_bh) {
+               BUFFER_TRACE(dir_bh, "get_write_access");
+               ext3_journal_get_write_access(handle, dir_bh);
+@@ -1090,7 +2182,7 @@
+                       new_inode->i_nlink--;
+               } else {
+                       new_dir->i_nlink++;
+-                      new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
++                      ext3_update_dx_flag(new_dir);
+                       ext3_mark_inode_dirty(handle, new_dir);
+               }
+       }
+Index: linux-2.4.19/fs/ext3/super.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/super.c  2004-04-23 18:25:03.000000000 -0400
++++ linux-2.4.19/fs/ext3/super.c       2004-04-23 18:26:27.000000000 -0400
+@@ -741,6 +741,7 @@
+       es->s_mtime = cpu_to_le32(CURRENT_TIME);
+       ext3_update_dynamic_rev(sb);
+       EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
++
+       ext3_commit_super (sb, es, 1);
+       if (test_opt (sb, DEBUG))
+               printk (KERN_INFO
+@@ -751,6 +752,7 @@
+                       EXT3_BLOCKS_PER_GROUP(sb),
+                       EXT3_INODES_PER_GROUP(sb),
+                       sbi->s_mount_opt);
++
+       printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ",
+                               bdevname(sb->s_dev));
+       if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
+@@ -925,6 +927,7 @@
+       return res;
+ }
+ 
++
+ struct super_block * ext3_read_super (struct super_block * sb, void * data,
+                                     int silent)
+ {
+@@ -1113,6 +1116,9 @@
+       sbi->s_mount_state = le16_to_cpu(es->s_state);
+       sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb));
+       sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb));
++      for (i=0; i < 4; i++)
++              sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
++      sbi->s_def_hash_version = es->s_def_hash_version;
+ 
+       if (sbi->s_blocks_per_group > blocksize * 8) {
+               printk (KERN_ERR
+@@ -1821,6 +1827,7 @@
+       exit_ext3_xattr();
+ }
+ 
++EXPORT_SYMBOL(ext3_force_commit);
+ EXPORT_SYMBOL(ext3_bread);
+ 
+ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+Index: linux-2.4.19/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.4.19.orig/include/linux/ext3_fs.h  2004-04-23 17:53:55.000000000 -0400
++++ linux-2.4.19/include/linux/ext3_fs.h       2004-04-23 18:26:27.000000000 -0400
+@@ -40,6 +40,11 @@
+ #define EXT3FS_VERSION                "2.4-0.9.18"
+ 
+ /*
++ * Always enable hashed directories
++ */
++#define CONFIG_EXT3_INDEX
++
++/*
+  * Debug code
+  */
+ #ifdef EXT3FS_DEBUG
+@@ -414,8 +419,11 @@
+ /*E0*/        __u32   s_journal_inum;         /* inode number of journal file */
+       __u32   s_journal_dev;          /* device number of journal file */
+       __u32   s_last_orphan;          /* start of list of inodes to delete */
+-
+-/*EC*/        __u32   s_reserved[197];        /* Padding to the end of the block */
++      __u32   s_hash_seed[4];         /* HTREE hash seed */
++      __u8    s_def_hash_version;     /* Default hash version to use */
++      __u8    s_reserved_char_pad;
++      __u16   s_reserved_word_pad;
++      __u32   s_reserved[192];        /* Padding to the end of the block */
+ };
+ 
+ #ifdef __KERNEL__
+@@ -552,9 +560,46 @@
+ #define EXT3_DIR_ROUND                        (EXT3_DIR_PAD - 1)
+ #define EXT3_DIR_REC_LEN(name_len)    (((name_len) + 8 + EXT3_DIR_ROUND) & \
+                                        ~EXT3_DIR_ROUND)
++/*
++ * Hash Tree Directory indexing
++ * (c) Daniel Phillips, 2001
++ */
++
++#ifdef CONFIG_EXT3_INDEX
++  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
++                                            EXT3_FEATURE_COMPAT_DIR_INDEX) && \
++                    (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
++#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
++#else
++  #define is_dx(dir) 0
++#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
++#endif
++
++/* Legal values for the dx_root hash_version field: */
++
++#define DX_HASH_LEGACY                0
++#define DX_HASH_HALF_MD4      1
++#define DX_HASH_TEA           2
++
++/* hash info structure used by the directory hash */
++struct dx_hash_info
++{
++      u32             hash;
++      u32             minor_hash;
++      int             hash_version;
++      u32             *seed;
++};
+ 
+ #ifdef __KERNEL__
+ /*
++ * Control parameters used by ext3_htree_next_block
++ */
++#define HASH_NB_ALWAYS                1
++
++
++/*
+  * Describe an inode's exact location on disk and in memory
+  */
+ struct ext3_iloc
+@@ -564,6 +609,27 @@
+       unsigned long block_group;
+ };
+ 
++
++/*
++ * This structure is stuffed into the struct file's private_data field
++ * for directories.  It is where we put information so that we can do
++ * readdir operations in hash tree order.
++ */
++struct dir_private_info {
++      rb_root_t       root;
++      rb_node_t       *curr_node;
++      struct fname    *extra_fname;
++      loff_t          last_pos;
++      __u32           curr_hash;
++      __u32           curr_minor_hash;
++      __u32           next_hash;
++};
++
++/*
++ * Special error return code only used by dx_probe() and its callers.
++ */
++#define ERR_BAD_DX_DIR        -75000
++
+ /*
+  * Function prototypes
+  */
+@@ -591,11 +657,20 @@
+ 
+ /* dir.c */
+ extern int ext3_check_dir_entry(const char *, struct inode *,
+-                              struct ext3_dir_entry_2 *, struct buffer_head *,
+-                              unsigned long);
++                              struct ext3_dir_entry_2 *,
++                              struct buffer_head *, unsigned long);
++extern void ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
++                                  __u32 minor_hash,
++                                  struct ext3_dir_entry_2 *dirent);
++extern void ext3_htree_free_dir_info(struct dir_private_info *p);
++
+ /* fsync.c */
+ extern int ext3_sync_file (struct file *, struct dentry *, int);
+ 
++/* hash.c */
++extern int ext3fs_dirhash(const char *name, int len, struct
++                        dx_hash_info *hinfo);
++
+ /* ialloc.c */
+ extern struct inode * ext3_new_inode (handle_t *, struct inode *, int);
+ extern void ext3_free_inode (handle_t *, struct inode *);
+@@ -628,6 +703,8 @@
+ /* namei.c */
+ extern int ext3_orphan_add(handle_t *, struct inode *);
+ extern int ext3_orphan_del(handle_t *, struct inode *);
++extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
++                              __u32 start_minor_hash, __u32 *next_hash);
+ 
+ /* super.c */
+ extern void ext3_error (struct super_block *, const char *, const char *, ...)
+Index: linux-2.4.19/include/linux/ext3_fs_sb.h
+===================================================================
+--- linux-2.4.19.orig/include/linux/ext3_fs_sb.h       2004-04-23 17:53:54.000000000 -0400
++++ linux-2.4.19/include/linux/ext3_fs_sb.h    2004-04-23 18:26:27.000000000 -0400
+@@ -62,6 +62,8 @@
+       int s_inode_size;
+       int s_first_ino;
+       u32 s_next_generation;
++      u32 s_hash_seed[4];
++      int s_def_hash_version;
+ 
+       /* Journaling */
+       struct inode * s_journal_inode;
+Index: linux-2.4.19/include/linux/ext3_jbd.h
+===================================================================
+--- linux-2.4.19.orig/include/linux/ext3_jbd.h 2004-04-23 17:53:54.000000000 -0400
++++ linux-2.4.19/include/linux/ext3_jbd.h      2004-04-23 18:26:27.000000000 -0400
+@@ -69,6 +69,8 @@
+ 
+ #define EXT3_RESERVE_TRANS_BLOCKS     12
+ 
++#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
++
+ int
+ ext3_mark_iloc_dirty(handle_t *handle, 
+                    struct inode *inode,
+Index: linux-2.4.19/include/linux/rbtree.h
+===================================================================
+--- linux-2.4.19.orig/include/linux/rbtree.h   2001-11-22 14:46:18.000000000 -0500
++++ linux-2.4.19/include/linux/rbtree.h        2004-04-23 18:26:27.000000000 -0400
+@@ -120,6 +120,8 @@
+ 
+ extern void rb_insert_color(rb_node_t *, rb_root_t *);
+ extern void rb_erase(rb_node_t *, rb_root_t *);
++extern rb_node_t *rb_get_first(rb_root_t *root);
++extern rb_node_t *rb_get_next(rb_node_t *n);
+ 
+ static inline void rb_link_node(rb_node_t * node, rb_node_t * parent, rb_node_t ** rb_link)
+ {
+Index: linux-2.4.19/lib/rbtree.c
+===================================================================
+--- linux-2.4.19.orig/lib/rbtree.c     2002-08-02 20:39:46.000000000 -0400
++++ linux-2.4.19/lib/rbtree.c  2004-04-23 18:26:27.000000000 -0400
+@@ -17,6 +17,8 @@
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ 
+   linux/lib/rbtree.c
++
++  rb_get_first and rb_get_next written by Theodore Ts'o, 9/8/2002
+ */
+ 
+ #include <linux/rbtree.h>
+@@ -294,3 +296,43 @@
+               __rb_erase_color(child, parent, root);
+ }
+ EXPORT_SYMBOL(rb_erase);
++
++/*
++ * This function returns the first node (in sort order) of the tree.
++ */
++rb_node_t *rb_get_first(rb_root_t *root)
++{
++      rb_node_t       *n;
++
++      n = root->rb_node;
++      if (!n)
++              return 0;
++      while (n->rb_left)
++              n = n->rb_left;
++      return n;
++}
++EXPORT_SYMBOL(rb_get_first);
++
++/*
++ * Given a node, this function will return the next node in the tree.
++ */
++rb_node_t *rb_get_next(rb_node_t *n)
++{
++      rb_node_t       *parent;
++
++      if (n->rb_right) {
++              n = n->rb_right;
++              while (n->rb_left)
++                      n = n->rb_left;
++              return n;
++      } else {
++              while ((parent = n->rb_parent)) {
++                      if (n == parent->rb_left)
++                              return parent;
++                      n = parent;
++              }
++              return 0;
++      }
++}
++EXPORT_SYMBOL(rb_get_next);
++
diff --git a/lustre/kernel_patches/patches/ext3-orphan_lock-2.4.19-suse.patch b/lustre/kernel_patches/patches/ext3-orphan_lock-2.4.19-suse.patch

index 3c6b5e5..1e26b8c 100644 (file)
--- a/lustre/kernel_patches/patches/ext3-orphan_lock-2.4.19-suse.patch
+++ b/lustre/kernel_patches/patches/ext3-orphan_lock-2.4.19-suse.patch
@@ -1,7 +1,7 @@
-Index: linux-2.4.19.SuSE/fs/ext3/namei.c
+Index: linux-2.4.19/fs/ext3/namei.c
  ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/namei.c     Sun Nov 16 01:14:50 2003
-+++ linux-2.4.19.SuSE/fs/ext3/namei.c  Sun Nov 16 01:18:04 2003
+--- linux-2.4.19.orig/fs/ext3/namei.c  2004-04-23 22:36:03.000000000 -0400
++++ linux-2.4.19/fs/ext3/namei.c       2004-04-23 22:37:37.000000000 -0400
  @@ -1751,8 +1751,8 @@
         struct super_block *sb = inode->i_sb;
         struct ext3_iloc iloc;
@@ -25,7 +25,7 @@ Index: linux-2.4.19.SuSE/fs/ext3/namei.c
  @@ -1813,20 +1813,19 @@
   {
         struct list_head *prev;
-       struct ext3_inode_info *ei = EXT3_I(inode);
+         struct ext3_inode_info *ei = EXT3_I(inode);
  -      struct ext3_sb_info *sbi;
  +      struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb);
         unsigned long ino_next;
@@ -33,15 +33,15 @@ Index: linux-2.4.19.SuSE/fs/ext3/namei.c
         int err = 0;
   
  -      lock_super(inode->i_sb);
-+      down(&sbi->s_orphan_lock);
-       if (list_empty(&ei->i_orphan)) {
++        down(&sbi->s_orphan_lock);
+         if (list_empty(&ei->i_orphan)) {
  -              unlock_super(inode->i_sb);
-+              up(&sbi->s_orphan_lock);
++                up(&sbi->s_orphan_lock);
                 return 0;
         }
   
         ino_next = NEXT_ORPHAN(inode);
-       prev = ei->i_orphan.prev;
+         prev = ei->i_orphan.prev;
  -      sbi = EXT3_SB(inode->i_sb);
   
         jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
@@ -59,11 +59,11 @@ Index: linux-2.4.19.SuSE/fs/ext3/namei.c
         return err;
   
   out_brelse:
-Index: linux-2.4.19.SuSE/fs/ext3/super.c
+Index: linux-2.4.19/fs/ext3/super.c
  ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/super.c     Sun Nov 16 00:40:59 2003
-+++ linux-2.4.19.SuSE/fs/ext3/super.c  Sun Nov 16 01:18:04 2003
-@@ -1182,6 +1182,7 @@
+--- linux-2.4.19.orig/fs/ext3/super.c  2004-04-23 22:30:41.000000000 -0400
++++ linux-2.4.19/fs/ext3/super.c       2004-04-23 22:36:22.000000000 -0400
+@@ -1179,6 +1179,7 @@
          */
         sb->s_op = &ext3_sops;
         INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
@@ -71,10 +71,10 @@ Index: linux-2.4.19.SuSE/fs/ext3/super.c
   
         sb->s_root = 0;
   
-Index: linux-2.4.19.SuSE/include/linux/ext3_fs_sb.h
+Index: linux-2.4.19/include/linux/ext3_fs_sb.h
  ===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/ext3_fs_sb.h  Sat Nov 15 23:58:28 2003
-+++ linux-2.4.19.SuSE/include/linux/ext3_fs_sb.h       Sun Nov 16 01:18:41 2003
+--- linux-2.4.19.orig/include/linux/ext3_fs_sb.h       2004-04-23 18:26:27.000000000 -0400
++++ linux-2.4.19/include/linux/ext3_fs_sb.h    2004-04-23 22:36:22.000000000 -0400
  @@ -69,6 +69,7 @@
         struct inode * s_journal_inode;
         struct journal_s * s_journal;
diff --git a/lustre/kernel_patches/patches/ext3-trusted_ea-suse-2.4.19.patch b/lustre/kernel_patches/patches/ext3-trusted_ea-suse-2.4.19.patch

new file mode 100644 (file)

index 0000000..1c31052
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-trusted_ea-suse-2.4.19.patch
@@ -0,0 +1,179 @@
+ fs/ext3/xattr.c            |   12 +++++-
+ fs/ext3/xattr_trusted.c    |   86 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_xattr.h |    6 +++
+ 3 files changed, 102 insertions(+), 2 deletions(-)
+
+Index: linux-2.4.19/fs/ext3/xattr.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/xattr.c  2004-04-23 22:44:57.000000000 -0400
++++ linux-2.4.19/fs/ext3/xattr.c       2004-04-23 22:45:20.000000000 -0400
+@@ -1785,18 +1785,25 @@
+ int __init
+ init_ext3_xattr(void)
+ {
++      int error;
++
+       ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
+               sizeof(struct mb_cache_entry) +
+               sizeof(struct mb_cache_entry_index), 1, 61);
+       if (!ext3_xattr_cache)
+               return -ENOMEM;
+ 
+-      return 0;
++      error = init_ext3_xattr_trusted();
++      if (error)
++              mb_cache_destroy(ext3_xattr_cache);
++
++      return error;
+ }
+ 
+ void
+ exit_ext3_xattr(void)
+ {
++      exit_ext3_xattr_trusted();
+       if (ext3_xattr_cache)
+               mb_cache_destroy(ext3_xattr_cache);
+       ext3_xattr_cache = NULL;
+@@ -1807,12 +1814,13 @@
+ int __init
+ init_ext3_xattr(void)
+ {
+-      return 0;
++      return init_ext3_xattr_trusted();
+ }
+ 
+ void
+ exit_ext3_xattr(void)
+ {
++      exit_ext3_xattr_trusted();
+ }
+ 
+ #endif  /* CONFIG_EXT3_FS_XATTR_SHARING */
+Index: linux-2.4.19/fs/ext3/xattr_trusted.c
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/xattr_trusted.c  2003-01-30 05:24:37.000000000 -0500
++++ linux-2.4.19/fs/ext3/xattr_trusted.c       2004-04-23 22:45:20.000000000 -0400
+@@ -0,0 +1,86 @@
++/*
++ * linux/fs/ext3/xattr_trusted.c
++ * Handler for trusted extended attributes.
++ *
++ * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
++ */
++
++#include <linux/module.h>
++#include <linux/string.h>
++#include <linux/fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_xattr.h>
++
++#define XATTR_TRUSTED_PREFIX "trusted."
++
++static size_t
++ext3_xattr_trusted_list(char *list, struct inode *inode,
++                      const char *name, int name_len)
++{
++      const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
++
++      if (!capable(CAP_SYS_ADMIN))
++              return 0;
++
++      if (list) {
++              memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
++              memcpy(list+prefix_len, name, name_len);
++              list[prefix_len + name_len] = '\0';
++      }
++      return prefix_len + name_len + 1;
++}
++
++static int
++ext3_xattr_trusted_get(struct inode *inode, const char *name,
++                     void *buffer, size_t size)
++{
++      if (strcmp(name, "") == 0)
++              return -EINVAL;
++      if (!capable(CAP_SYS_ADMIN))
++              return -EPERM;
++      return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name,
++                            buffer, size);
++}
++
++static int
++ext3_xattr_trusted_set(struct inode *inode, const char *name,
++                     const void *value, size_t size, int flags)
++{
++      handle_t *handle;
++      int error;
++
++      if (strcmp(name, "") == 0)
++              return -EINVAL;
++      if (!capable(CAP_SYS_ADMIN))
++              return -EPERM;
++      handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS);
++      if (IS_ERR(handle))
++              return PTR_ERR(handle);
++      error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_TRUSTED, name,
++                             value, size, flags);
++      ext3_journal_stop(handle, inode);
++
++      return error;
++}
++
++struct ext3_xattr_handler ext3_xattr_trusted_handler = {
++      .prefix = XATTR_TRUSTED_PREFIX,
++      .list   = ext3_xattr_trusted_list,
++      .get    = ext3_xattr_trusted_get,
++      .set    = ext3_xattr_trusted_set,
++};
++
++int __init
++init_ext3_xattr_trusted(void)
++{
++      return ext3_xattr_register(EXT3_XATTR_INDEX_TRUSTED,
++                                 &ext3_xattr_trusted_handler);
++}
++
++void
++exit_ext3_xattr_trusted(void)
++{
++      ext3_xattr_unregister(EXT3_XATTR_INDEX_TRUSTED,
++                            &ext3_xattr_trusted_handler);
++}
+Index: linux-2.4.19/fs/ext3/Makefile
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/Makefile 2004-04-23 22:38:38.000000000 -0400
++++ linux-2.4.19/fs/ext3/Makefile      2004-04-23 22:49:23.000000000 -0400
+@@ -12,7 +12,7 @@
+ export-objs :=        super.o inode.o
+ 
+ obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+-              ioctl.o namei.o super.o symlink.o hash.o
++              ioctl.o namei.o super.o symlink.o hash.o xattr_trusted.o
+ obj-m    := $(O_TARGET)
+ 
+ obj-$(CONFIG_EXT3_FS_XATTR) += xattr.o
+Index: linux-2.4.19/include/linux/ext3_xattr.h
+===================================================================
+--- linux-2.4.19.orig/include/linux/ext3_xattr.h       2004-04-23 17:53:54.000000000 -0400
++++ linux-2.4.19/include/linux/ext3_xattr.h    2004-04-23 22:45:20.000000000 -0400
+@@ -21,6 +21,9 @@
+ #define EXT3_XATTR_INDEX_USER                 1
+ #define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS     2
+ #define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT    3
++#define EXT3_XATTR_INDEX_TRUSTED              4
++#define EXT3_XATTR_INDEX_LUSTRE                       5
++#define EXT3_XATTR_INDEX_SECURITY             6
+ 
+ struct ext3_xattr_header {
+       __u32   h_magic;        /* magic number for identification */
+@@ -84,6 +87,9 @@
+ extern int init_ext3_xattr(void) __init;
+ extern void exit_ext3_xattr(void);
+ 
++extern int init_ext3_xattr_trusted(void) __init;
++extern void exit_ext3_xattr_trusted(void);
++
+ # else  /* CONFIG_EXT3_FS_XATTR */
+ #  define ext3_setxattr               NULL
+ #  define ext3_getxattr               NULL
diff --git a/lustre/kernel_patches/patches/ext3-use-after-free-suse.patch b/lustre/kernel_patches/patches/ext3-use-after-free-suse.patch

index 1dab6d8..5a5dc5a 100644 (file)
--- a/lustre/kernel_patches/patches/ext3-use-after-free-suse.patch
+++ b/lustre/kernel_patches/patches/ext3-use-after-free-suse.patch
@@ -1,11 +1,11 @@
   ./fs/ext3/namei.c |   11 +++++------
   1 files changed, 5 insertions(+), 6 deletions(-)
  
-Index: linux-2.4.19.SuSE/./fs/ext3/namei.c
+Index: linux-2.4.19/fs/ext3/namei.c
  ===================================================================
---- linux-2.4.19.SuSE.orig/./fs/ext3/namei.c   Sun Nov 16 01:02:51 2003
-+++ linux-2.4.19.SuSE/./fs/ext3/namei.c        Sun Nov 16 01:14:50 2003
-@@ -1523,8 +1523,11 @@
+--- linux-2.4.19.orig/fs/ext3/namei.c  2004-04-23 22:30:41.000000000 -0400
++++ linux-2.4.19/fs/ext3/namei.c       2004-04-23 22:36:03.000000000 -0400
+@@ -1522,8 +1522,11 @@
   {
         int err = ext3_add_entry(handle, dentry, inode);
         if (!err) {
@@ -19,7 +19,7 @@ Index: linux-2.4.19.SuSE/./fs/ext3/namei.c
         }
         ext3_dec_count(handle, inode);
         iput(inode);
-@@ -1560,7 +1563,6 @@
+@@ -1559,7 +1562,6 @@
                 inode->i_op = &ext3_file_inode_operations;
                 inode->i_fop = &ext3_file_operations;
                 inode->i_mapping->a_ops = &ext3_aops;
@@ -27,7 +27,7 @@ Index: linux-2.4.19.SuSE/./fs/ext3/namei.c
                 err = ext3_add_nondir(handle, dentry, inode);
         }
         ext3_journal_stop(handle, dir);
-@@ -1590,7 +1592,6 @@
+@@ -1589,7 +1591,6 @@
   #ifdef CONFIG_EXT3_FS_XATTR
                 inode->i_op = &ext3_special_inode_operations;
   #endif
@@ -38,7 +38,7 @@ Index: linux-2.4.19.SuSE/./fs/ext3/namei.c
  @@ -2039,7 +2040,6 @@
                 inode->i_size = l-1;
         }
-       EXT3_I(inode)->i_disksize = inode->i_size;
+         EXT3_I(inode)->i_disksize = inode->i_size;
  -      ext3_mark_inode_dirty(handle, inode);
         err = ext3_add_nondir(handle, dentry, inode);
   out_stop:
diff --git a/lustre/kernel_patches/patches/invalidate_show-2.4.21-sles8sp3.patch b/lustre/kernel_patches/patches/invalidate_show-2.4.21-sles8sp3.patch

new file mode 100644 (file)

index 0000000..36e59d3
--- /dev/null
+++ b/lustre/kernel_patches/patches/invalidate_show-2.4.21-sles8sp3.patch
@@ -0,0 +1,134 @@
+
+
+
+ fs/inode.c         |   21 ++++++++++++++-------
+ fs/smbfs/inode.c   |    2 +-
+ fs/super.c         |    4 ++--
+ include/linux/fs.h |    2 +-
+ 4 files changed, 18 insertions(+), 11 deletions(-)
+
+Index: linux-2.4.21/fs/inode.c
+===================================================================
+--- linux-2.4.21.orig/fs/inode.c       2004-04-24 02:38:51.000000000 -0400
++++ linux-2.4.21/fs/inode.c    2004-04-26 19:41:58.000000000 -0400
+@@ -651,7 +651,8 @@
+ /*
+  * Invalidate all inodes for a device.
+  */
+-static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
++static int invalidate_list(struct list_head *head, struct super_block * sb,
++                         struct list_head * dispose, int show)
+ {
+       struct list_head *next;
+       int busy = 0, count = 0;
+@@ -676,6 +677,11 @@
+                       count++;
+                       continue;
+               }
++              if (show)
++                      printk(KERN_ERR
++                             "inode busy: dev %s:%lu (%p) mode %o count %u\n",
++                             kdevname(sb->s_dev), inode->i_ino, inode,
++                             inode->i_mode, atomic_read(&inode->i_count));
+               busy = 1;
+       }
+       /* only unused inodes may be cached with i_count zero */
+@@ -694,22 +700,23 @@
+ /**
+  *    invalidate_inodes       - discard the inodes on a device
+  *    @sb: superblock
++ *    @show: whether we should display any busy inodes found
+  *
+  *    Discard all of the inodes for a given superblock. If the discard
+  *    fails because there are busy inodes then a non zero value is returned.
+  *    If the discard is successful all the inodes have been discarded.
+  */
+  
+-int invalidate_inodes(struct super_block * sb)
++int invalidate_inodes(struct super_block * sb, int show)
+ {
+       int busy;
+       LIST_HEAD(throw_away);
+ 
+       spin_lock(&inode_lock);
+-      busy = invalidate_list(&inode_in_use, sb, &throw_away);
+-      busy |= invalidate_list(&inode_unused, sb, &throw_away);
+-      busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
+-      busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away);
++      busy = invalidate_list(&inode_in_use, sb, &throw_away, show);
++      busy |= invalidate_list(&inode_unused, sb, &throw_away, show);
++      busy |= invalidate_list(&sb->s_dirty, sb, &throw_away, show);
++      busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away, show);
+       spin_unlock(&inode_lock);
+ 
+       dispose_list(&throw_away);
+@@ -735,7 +742,7 @@
+                * hold).
+                */
+               shrink_dcache_sb(sb);
+-              res = invalidate_inodes(sb);
++              res = invalidate_inodes(sb, 0);
+               drop_super(sb);
+       }
+       invalidate_buffers(dev);
+Index: linux-2.4.21/fs/super.c
+===================================================================
+--- linux-2.4.21.orig/fs/super.c       2004-04-24 02:38:51.000000000 -0400
++++ linux-2.4.21/fs/super.c    2004-04-26 19:41:58.000000000 -0400
+@@ -932,7 +932,7 @@
+       lock_super(sb);
+       lock_kernel();
+       sb->s_flags &= ~MS_ACTIVE;
+-      invalidate_inodes(sb);  /* bad name - it should be evict_inodes() */
++      invalidate_inodes(sb, 0);  /* bad name - it should be evict_inodes() */
+       if (sop) {
+               if (sop->write_super && sb->s_dirt)
+                       sop->write_super(sb);
+@@ -941,7 +941,7 @@
+       }
+ 
+       /* Forget any remaining inodes */
+-      if (invalidate_inodes(sb)) {
++      if (invalidate_inodes(sb, 1)) {
+               printk(KERN_ERR "VFS: Busy inodes after unmount. "
+                       "Self-destruct in 5 seconds.  Have a nice day...\n");
+       }
+Index: linux-2.4.21/fs/smbfs/inode.c
+===================================================================
+--- linux-2.4.21.orig/fs/smbfs/inode.c 2004-04-24 02:38:44.000000000 -0400
++++ linux-2.4.21/fs/smbfs/inode.c      2004-04-26 19:41:58.000000000 -0400
+@@ -167,7 +167,7 @@
+ {
+       VERBOSE("\n");
+       shrink_dcache_sb(SB_of(server));
+-      invalidate_inodes(SB_of(server));
++      invalidate_inodes(SB_of(server), 0);
+ }
+ 
+ /*
+Index: linux-2.4.21/fs/ntfs/super.c
+===================================================================
+--- linux-2.4.21.orig/fs/ntfs/super.c  2004-04-24 02:38:38.000000000 -0400
++++ linux-2.4.21/fs/ntfs/super.c       2004-04-26 19:42:12.000000000 -0400
+@@ -1604,7 +1604,7 @@
+        * method again... FIXME: Do we need to do this twice now because of
+        * attribute inodes? I think not, so leave as is for now... (AIA)
+        */
+-      if (invalidate_inodes(sb)) {
++      if (invalidate_inodes(sb, 0)) {
+               ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
+                               "driver bug.");
+               /* Copied from fs/super.c. I just love this message. (-; */
+Index: linux-2.4.21/include/linux/fs.h
+===================================================================
+--- linux-2.4.21.orig/include/linux/fs.h       2004-04-26 19:06:32.000000000 -0400
++++ linux-2.4.21/include/linux/fs.h    2004-04-26 19:41:58.000000000 -0400
+@@ -1401,7 +1401,7 @@
+ extern void set_buffer_flushtime(struct buffer_head *);
+ extern void balance_dirty(void);
+ extern int check_disk_change(kdev_t);
+-extern int invalidate_inodes(struct super_block *);
++extern int invalidate_inodes(struct super_block *, int);
+ extern int invalidate_device(kdev_t, int);
+ extern void invalidate_inode_pages(struct inode *);
+ extern void invalidate_inode_pages2(struct address_space *);
diff --git a/lustre/kernel_patches/patches/iopen-2.4.19-suse.patch b/lustre/kernel_patches/patches/iopen-2.4.19-suse.patch

index ad213c9..01c040c 100644 (file)
--- a/lustre/kernel_patches/patches/iopen-2.4.19-suse.patch
+++ b/lustre/kernel_patches/patches/iopen-2.4.19-suse.patch
@@ -8,10 +8,10 @@
   include/linux/ext3_fs.h            |    2 
   8 files changed, 318 insertions(+), 2 deletions(-)
  
-Index: linux-2.4.19.SuSE/Documentation/filesystems/ext2.txt
+Index: linux-2.4.19/Documentation/filesystems/ext2.txt
  ===================================================================
---- linux-2.4.19.SuSE.orig/Documentation/filesystems/ext2.txt  Wed Jul 11 15:44:45 2001
-+++ linux-2.4.19.SuSE/Documentation/filesystems/ext2.txt       Sun Nov 16 01:27:31 2003
+--- linux-2.4.19.orig/Documentation/filesystems/ext2.txt       2001-07-11 18:44:45.000000000 -0400
++++ linux-2.4.19/Documentation/filesystems/ext2.txt    2004-04-23 22:37:48.000000000 -0400
  @@ -35,6 +35,22 @@
   
   sb=n                          Use alternate superblock at this location.
@@ -35,23 +35,23 @@ Index: linux-2.4.19.SuSE/Documentation/filesystems/ext2.txt
   grpquota,noquota,quota,usrquota       Quota options are silently ignored by ext2.
   
   
-Index: linux-2.4.19.SuSE/fs/ext3/Makefile
+Index: linux-2.4.19/fs/ext3/Makefile
  ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/Makefile    Sun Nov 16 00:40:59 2003
-+++ linux-2.4.19.SuSE/fs/ext3/Makefile Sun Nov 16 01:27:31 2003
+--- linux-2.4.19.orig/fs/ext3/Makefile 2004-04-23 18:26:27.000000000 -0400
++++ linux-2.4.19/fs/ext3/Makefile      2004-04-23 22:38:38.000000000 -0400
  @@ -11,7 +11,7 @@
   
- export-objs := ext3-exports.o
+ export-objs :=        super.o inode.o
   
  -obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
  +obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
-               ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o
+               ioctl.o namei.o super.o symlink.o hash.o
   obj-m    := $(O_TARGET)
   
-Index: linux-2.4.19.SuSE/fs/ext3/inode.c
+Index: linux-2.4.19/fs/ext3/inode.c
  ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/inode.c     Sun Nov 16 01:26:04 2003
-+++ linux-2.4.19.SuSE/fs/ext3/inode.c  Sun Nov 16 01:27:31 2003
+--- linux-2.4.19.orig/fs/ext3/inode.c  2004-04-23 22:37:42.000000000 -0400
++++ linux-2.4.19/fs/ext3/inode.c       2004-04-23 22:37:48.000000000 -0400
  @@ -34,6 +34,7 @@
   #include <linux/highuid.h>
   #include <linux/quotaops.h>
@@ -70,10 +70,10 @@ Index: linux-2.4.19.SuSE/fs/ext3/inode.c
         if(ext3_get_inode_loc(inode, &iloc))
                 goto bad_inode;
         bh = iloc.bh;
-Index: linux-2.4.19.SuSE/fs/ext3/iopen.c
+Index: linux-2.4.19/fs/ext3/iopen.c
  ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/iopen.c     Sun Nov 16 01:27:31 2003
-+++ linux-2.4.19.SuSE/fs/ext3/iopen.c  Sun Nov 16 01:27:31 2003
+--- linux-2.4.19.orig/fs/ext3/iopen.c  2003-01-30 05:24:37.000000000 -0500
++++ linux-2.4.19/fs/ext3/iopen.c       2004-04-23 22:37:48.000000000 -0400
  @@ -0,0 +1,258 @@
  +/*
  + * linux/fs/ext3/iopen.c
@@ -333,10 +333,10 @@ Index: linux-2.4.19.SuSE/fs/ext3/iopen.c
  +
  +      return 1;
  +}
-Index: linux-2.4.19.SuSE/fs/ext3/iopen.h
+Index: linux-2.4.19/fs/ext3/iopen.h
  ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/iopen.h     Sun Nov 16 01:27:31 2003
-+++ linux-2.4.19.SuSE/fs/ext3/iopen.h  Sun Nov 16 01:27:31 2003
+--- linux-2.4.19.orig/fs/ext3/iopen.h  2003-01-30 05:24:37.000000000 -0500
++++ linux-2.4.19/fs/ext3/iopen.h       2004-04-23 22:37:48.000000000 -0400
  @@ -0,0 +1,15 @@
  +/*
  + * iopen.h
@@ -353,10 +353,10 @@ Index: linux-2.4.19.SuSE/fs/ext3/iopen.h
  +extern int ext3_iopen_get_inode(struct inode *inode);
  +extern struct dentry *iopen_connect_dentry(struct dentry *de,
  +                                         struct inode *inode);
-Index: linux-2.4.19.SuSE/fs/ext3/namei.c
+Index: linux-2.4.19/fs/ext3/namei.c
  ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/namei.c     Sun Nov 16 01:23:20 2003
-+++ linux-2.4.19.SuSE/fs/ext3/namei.c  Sun Nov 16 01:27:31 2003
+--- linux-2.4.19.orig/fs/ext3/namei.c  2004-04-23 22:37:42.000000000 -0400
++++ linux-2.4.19/fs/ext3/namei.c       2004-04-23 22:37:48.000000000 -0400
  @@ -36,7 +36,7 @@
   #include <linux/string.h>
   #include <linux/locks.h>
@@ -366,7 +366,7 @@ Index: linux-2.4.19.SuSE/fs/ext3/namei.c
   
   /*
    * define how far ahead to read directories while searching them.
-@@ -922,10 +922,14 @@
+@@ -928,10 +928,14 @@
         struct inode * inode;
         struct ext3_dir_entry_2 * de;
         struct buffer_head * bh;
@@ -381,7 +381,7 @@ Index: linux-2.4.19.SuSE/fs/ext3/namei.c
         bh = ext3_find_entry(dentry, &de);
         inode = NULL;
         if (bh) {
-@@ -943,7 +948,28 @@
+@@ -943,7 +947,28 @@
                         return ERR_PTR(-EACCES);
                 }
         }
@@ -411,11 +411,11 @@ Index: linux-2.4.19.SuSE/fs/ext3/namei.c
         return NULL;
   }
   
-Index: linux-2.4.19.SuSE/fs/ext3/super.c
+Index: linux-2.4.19/fs/ext3/super.c
  ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/super.c     Sun Nov 16 01:19:22 2003
-+++ linux-2.4.19.SuSE/fs/ext3/super.c  Sun Nov 16 01:27:31 2003
-@@ -864,6 +864,18 @@
+--- linux-2.4.19.orig/fs/ext3/super.c  2004-04-23 22:37:42.000000000 -0400
++++ linux-2.4.19/fs/ext3/super.c       2004-04-23 22:37:48.000000000 -0400
+@@ -861,6 +861,18 @@
                          || !strcmp (this_char, "quota")
                          || !strcmp (this_char, "usrquota"))
                         /* Don't do anything ;-) */ ;
@@ -434,10 +434,10 @@ Index: linux-2.4.19.SuSE/fs/ext3/super.c
                 else if (!strcmp (this_char, "journal")) {
                         /* @@@ FIXME */
                         /* Eventually we will want to be able to create
-Index: linux-2.4.19.SuSE/include/linux/ext3_fs.h
+Index: linux-2.4.19/include/linux/ext3_fs.h
  ===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/ext3_fs.h     Sun Nov 16 01:25:42 2003
-+++ linux-2.4.19.SuSE/include/linux/ext3_fs.h  Sun Nov 16 01:30:05 2003
+--- linux-2.4.19.orig/include/linux/ext3_fs.h  2004-04-23 22:37:42.000000000 -0400
++++ linux-2.4.19/include/linux/ext3_fs.h       2004-04-23 22:37:48.000000000 -0400
  @@ -324,6 +324,8 @@
   #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
   #define EXT3_MOUNT_POSIX_ACL          0x8000  /* POSIX Access Control Lists */
diff --git a/lustre/kernel_patches/patches/iopen-2.4.21-sles8sp3.patch b/lustre/kernel_patches/patches/iopen-2.4.21-sles8sp3.patch

new file mode 100644 (file)

index 0000000..9258544
--- /dev/null
+++ b/lustre/kernel_patches/patches/iopen-2.4.21-sles8sp3.patch
@@ -0,0 +1,449 @@
+ Documentation/filesystems/ext2.txt |   16 ++
+ fs/ext3/Makefile                   |    2 
+ fs/ext3/inode.c                    |    4 
+ fs/ext3/iopen.c                    |  259 +++++++++++++++++++++++++++++++++++++
+ fs/ext3/iopen.h                    |   13 +
+ fs/ext3/namei.c                    |   13 +
+ fs/ext3/super.c                    |   11 +
+ include/linux/ext3_fs.h            |    2 
+ 8 files changed, 318 insertions(+), 2 deletions(-)
+
+Index: linux-2.4.21/Documentation/filesystems/ext2.txt
+===================================================================
+--- linux-2.4.21.orig/Documentation/filesystems/ext2.txt       2001-07-11 18:44:45.000000000 -0400
++++ linux-2.4.21/Documentation/filesystems/ext2.txt    2004-04-24 02:46:32.000000000 -0400
+@@ -35,6 +35,22 @@
+ 
+ sb=n                          Use alternate superblock at this location.
+ 
++iopen                         Makes an invisible pseudo-directory called
++                              __iopen__ available in the root directory
++                              of the filesystem.  Allows open-by-inode-
++                              number.  i.e., inode 3145 can be accessed
++                              via /mntpt/__iopen__/3145
++
++iopen_nopriv                  This option makes the iopen directory be
++                              world-readable.  This may be safer since it
++                              allows daemons to run as an unprivileged user,
++                              however it significantly changes the security
++                              model of a Unix filesystem, since previously
++                              all files under a mode 700 directory were not
++                              generally avilable even if the
++                              permissions on the file itself is
++                              world-readable.
++
+ grpquota,noquota,quota,usrquota       Quota options are silently ignored by ext2.
+ 
+ 
+Index: linux-2.4.21/fs/ext3/Makefile
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/Makefile 2004-04-24 02:46:18.000000000 -0400
++++ linux-2.4.21/fs/ext3/Makefile      2004-04-24 02:47:02.000000000 -0400
+@@ -11,7 +11,7 @@
+ 
+ export-objs := ext3-exports.o
+ 
+-obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
++obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+               ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o
+ obj-m    := $(O_TARGET)
+ 
+Index: linux-2.4.21/fs/ext3/inode.c
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/inode.c  2004-04-24 02:46:19.000000000 -0400
++++ linux-2.4.21/fs/ext3/inode.c       2004-04-24 02:46:32.000000000 -0400
+@@ -34,6 +34,7 @@
+ #include <linux/highuid.h>
+ #include <linux/quotaops.h>
+ #include <linux/module.h>
++#include "iopen.h"
+ 
+ /*
+  * SEARCH_FROM_ZERO forces each block allocation to search from the start
+@@ -2252,6 +2253,9 @@
+       struct buffer_head *bh;
+       int block;
+       
++      if (ext3_iopen_get_inode(inode))
++              return;
++
+       if(ext3_get_inode_loc(inode, &iloc))
+               goto bad_inode;
+       bh = iloc.bh;
+Index: linux-2.4.21/fs/ext3/iopen.c
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/iopen.c  2003-01-30 05:24:37.000000000 -0500
++++ linux-2.4.21/fs/ext3/iopen.c       2004-04-24 02:46:32.000000000 -0400
+@@ -0,0 +1,258 @@
++/*
++ * linux/fs/ext3/iopen.c
++ *
++ * Special support for open by inode number
++ *
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ *
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ *
++ *
++ * Invariants:
++ *   - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias
++ *     for an inode at one time.
++ *   - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry
++ *     aliases on an inode at the same time.
++ *
++ * If we have any connected dentry aliases for an inode, use one of those
++ * in iopen_lookup().  Otherwise, we instantiate a single NFSD_DISCONNECTED
++ * dentry for this inode, which thereafter will be found by the dcache
++ * when looking up this inode number in __iopen__, so we don't return here
++ * until it is gone.
++ *
++ * If we get an inode via a regular name lookup, then we "rename" the
++ * NFSD_DISCONNECTED dentry to the proper name and parent.  This ensures
++ * existing users of the disconnected dentry will continue to use the same
++ * dentry as the connected users, and there will never be both kinds of
++ * dentry aliases at one time.
++ */
++
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/locks.h>
++#include <linux/ext3_jbd.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/smp_lock.h>
++#include "iopen.h"
++
++#ifndef assert
++#define assert(test) J_ASSERT(test)
++#endif
++
++#define IOPEN_NAME_LEN        32
++
++/*
++ * This implements looking up an inode by number.
++ */
++static struct dentry *iopen_lookup(struct inode *dir, struct dentry *dentry)
++{
++      struct inode *inode;
++      unsigned long ino;
++      struct list_head *lp;
++      struct dentry *alternate;
++      char buf[IOPEN_NAME_LEN];
++
++      if (dentry->d_name.len >= IOPEN_NAME_LEN)
++              return ERR_PTR(-ENAMETOOLONG);
++
++      memcpy(buf, dentry->d_name.name, dentry->d_name.len);
++      buf[dentry->d_name.len] = 0;
++
++      if (strcmp(buf, ".") == 0)
++              ino = dir->i_ino;
++      else if (strcmp(buf, "..") == 0)
++              ino = EXT3_ROOT_INO;
++      else
++              ino = simple_strtoul(buf, 0, 0);
++
++      if ((ino != EXT3_ROOT_INO &&
++           //ino != EXT3_ACL_IDX_INO &&
++           //ino != EXT3_ACL_DATA_INO &&
++           ino < EXT3_FIRST_INO(dir->i_sb)) ||
++          ino > le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count))
++              return ERR_PTR(-ENOENT);
++
++      inode = iget(dir->i_sb, ino);
++      if (!inode)
++              return ERR_PTR(-EACCES);
++      if (is_bad_inode(inode)) {
++              iput(inode);
++              return ERR_PTR(-ENOENT);
++      }
++
++      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
++      assert(list_empty(&dentry->d_hash));            /* d_rehash */
++
++      /* preferrably return a connected dentry */
++      spin_lock(&dcache_lock);
++      list_for_each(lp, &inode->i_dentry) {
++              alternate = list_entry(lp, struct dentry, d_alias);
++              assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED));
++      }
++
++      if (!list_empty(&inode->i_dentry)) {
++              alternate = list_entry(inode->i_dentry.next,
++                                     struct dentry, d_alias);
++              dget_locked(alternate);
++              alternate->d_vfs_flags |= DCACHE_REFERENCED;
++              iput(inode);
++              spin_unlock(&dcache_lock);
++              return alternate;
++      }
++      dentry->d_flags |= DCACHE_NFSD_DISCONNECTED;
++
++      /* d_add(), but don't drop dcache_lock before adding dentry to inode */
++      list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
++      dentry->d_inode = inode;
++
++      __d_rehash(dentry, 0);                          /* d_rehash */
++      spin_unlock(&dcache_lock);
++
++      return NULL;
++}
++
++#define do_switch(x,y) do { \
++      __typeof__ (x) __tmp = x; \
++      x = y; y = __tmp; } while (0)
++
++static inline void switch_names(struct dentry *dentry, struct dentry *target)
++{
++      const unsigned char *old_name, *new_name;
++
++      memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN);
++      old_name = target->d_name.name;
++      new_name = dentry->d_name.name;
++      if (old_name == target->d_iname)
++              old_name = dentry->d_iname;
++      if (new_name == dentry->d_iname)
++              new_name = target->d_iname;
++      target->d_name.name = new_name;
++      dentry->d_name.name = old_name;
++}
++
++/* This function is spliced into ext3_lookup and does the move of a
++ * disconnected dentry (if it exists) to a connected dentry.
++ * Caller must hold dcache_lock.
++ */
++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode)
++{
++      struct dentry *tmp, *goal = NULL;
++      struct list_head *lp;
++
++      /* preferrably return a connected dentry */
++      list_for_each(lp, &inode->i_dentry) {
++              tmp = list_entry(lp, struct dentry, d_alias);
++              if (tmp->d_flags & DCACHE_NFSD_DISCONNECTED) {
++                      assert(tmp->d_alias.next == &inode->i_dentry);
++                      assert(tmp->d_alias.prev == &inode->i_dentry);
++                      goal = tmp;
++                      dget_locked(goal);
++                      break;
++              }
++      }
++
++      if (!goal)
++              return NULL;
++
++      /* Move the goal to the de hash queue - like d_move() */
++      goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED;
++      list_del_init(&goal->d_hash);
++
++      list_del(&goal->d_child);
++      list_del(&de->d_child);
++
++      /* Switch the parents and the names.. */
++      switch_names(goal, de);
++      do_switch(goal->d_parent, de->d_parent);
++      do_switch(goal->d_name.len, de->d_name.len);
++      do_switch(goal->d_name.hash, de->d_name.hash);
++
++      /* And add them back to the (new) parent lists */
++      list_add(&goal->d_child, &goal->d_parent->d_subdirs);
++      list_add(&de->d_child, &de->d_parent->d_subdirs);
++      __d_rehash(goal, 0);
++
++      return goal;
++}
++
++/*
++ * These are the special structures for the iopen pseudo directory.
++ */
++
++static struct inode_operations iopen_inode_operations = {
++      lookup:         iopen_lookup,           /* BKL held */
++};
++
++static struct file_operations iopen_file_operations = {
++      read:           generic_read_dir,
++};
++
++static int match_dentry(struct dentry *dentry, const char *name)
++{
++      int     len;
++
++      len = strlen(name);
++      if (dentry->d_name.len != len)
++              return 0;
++      if (strncmp(dentry->d_name.name, name, len))
++              return 0;
++      return 1;
++}
++
++/*
++ * This function is spliced into ext3_lookup and returns 1 the file
++ * name is __iopen__ and dentry has been filled in appropriately.
++ */
++int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry)
++{
++      struct inode *inode;
++
++      if (dir->i_ino != EXT3_ROOT_INO ||
++          !test_opt(dir->i_sb, IOPEN) ||
++          !match_dentry(dentry, "__iopen__"))
++              return 0;
++
++      inode = iget(dir->i_sb, EXT3_BAD_INO);
++
++      if (!inode)
++              return 0;
++      d_add(dentry, inode);
++      return 1;
++}
++
++/*
++ * This function is spliced into read_inode; it returns 1 if inode
++ * number is the one for /__iopen__, in which case the inode is filled
++ * in appropriately.  Otherwise, this fuction returns 0.
++ */
++int ext3_iopen_get_inode(struct inode *inode)
++{
++      if (inode->i_ino != EXT3_BAD_INO)
++              return 0;
++
++      inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
++      if (test_opt(inode->i_sb, IOPEN_NOPRIV))
++              inode->i_mode |= 0777;
++      inode->i_uid = 0;
++      inode->i_gid = 0;
++      inode->i_nlink = 1;
++      inode->i_size = 4096;
++      inode->i_atime = CURRENT_TIME;
++      inode->i_ctime = CURRENT_TIME;
++      inode->i_mtime = CURRENT_TIME;
++      inode->u.ext3_i.i_dtime = 0;
++      inode->i_blksize = PAGE_SIZE;   /* This is the optimal IO size
++                                       * (for stat), not the fs block
++                                       * size */
++      inode->i_blocks = 0;
++      inode->i_version = 1;
++      inode->i_generation = 0;
++
++      inode->i_op = &iopen_inode_operations;
++      inode->i_fop = &iopen_file_operations;
++      inode->i_mapping->a_ops = 0;
++
++      return 1;
++}
+Index: linux-2.4.21/fs/ext3/iopen.h
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/iopen.h  2003-01-30 05:24:37.000000000 -0500
++++ linux-2.4.21/fs/ext3/iopen.h       2004-04-24 02:46:32.000000000 -0400
+@@ -0,0 +1,15 @@
++/*
++ * iopen.h
++ *
++ * Special support for opening files by inode number.
++ *
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ *
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ */
++
++extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
++extern int ext3_iopen_get_inode(struct inode *inode);
++extern struct dentry *iopen_connect_dentry(struct dentry *de,
++                                         struct inode *inode);
+Index: linux-2.4.21/fs/ext3/namei.c
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/namei.c  2004-04-24 02:46:19.000000000 -0400
++++ linux-2.4.21/fs/ext3/namei.c       2004-04-24 02:46:32.000000000 -0400
+@@ -36,7 +36,7 @@
+ #include <linux/string.h>
+ #include <linux/locks.h>
+ #include <linux/quotaops.h>
+-
++#include "iopen.h"
+ 
+ /*
+  * define how far ahead to read directories while searching them.
+@@ -928,10 +928,14 @@
+       struct inode * inode;
+       struct ext3_dir_entry_2 * de;
+       struct buffer_head * bh;
++      struct dentry *alternate = NULL;
+ 
+       if (dentry->d_name.len > EXT3_NAME_LEN)
+               return ERR_PTR(-ENAMETOOLONG);
+ 
++      if (ext3_check_for_iopen(dir, dentry))
++              return NULL;
++
+       bh = ext3_find_entry(dentry, &de);
+       inode = NULL;
+       if (bh) {
+@@ -943,7 +947,28 @@
+                       return ERR_PTR(-EACCES);
+               }
+       }
+-      d_add(dentry, inode);
++
++      /* verify this dentry is really new */
++      assert(!dentry->d_inode);
++      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
++      assert(list_empty(&dentry->d_hash));            /* d_rehash */
++      assert(list_empty(&dentry->d_subdirs));
++
++      spin_lock(&dcache_lock);
++      if (inode && (alternate = iopen_connect_dentry(dentry, inode))) {
++              spin_unlock(&dcache_lock);
++              iput(inode);
++              return alternate;
++      }
++
++      /* d_add(), but don't drop dcache_lock before adding dentry to inode */
++      if (inode)                                      /* d_instantiate */
++              list_add(&dentry->d_alias, &inode->i_dentry);
++      dentry->d_inode = inode;
++
++      __d_rehash(dentry, 0);                          /* d_rehash */
++      spin_unlock(&dcache_lock);
++
+       return NULL;
+ }
+ 
+Index: linux-2.4.21/fs/ext3/super.c
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/super.c  2004-04-24 02:46:19.000000000 -0400
++++ linux-2.4.21/fs/ext3/super.c       2004-04-24 02:46:32.000000000 -0400
+@@ -869,6 +869,18 @@
+                        || !strcmp (this_char, "quota")
+                        || !strcmp (this_char, "usrquota"))
+                       /* Don't do anything ;-) */ ;
++              else if (!strcmp (this_char, "iopen")) {
++                      set_opt (sbi->s_mount_opt, IOPEN);
++                      clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++              }
++              else if (!strcmp (this_char, "noiopen")) {
++                      clear_opt (sbi->s_mount_opt, IOPEN);
++                      clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++              }
++              else if (!strcmp (this_char, "iopen_nopriv")) {
++                      set_opt (sbi->s_mount_opt, IOPEN);
++                      set_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++              }
+               else if (!strcmp (this_char, "journal")) {
+                       /* @@@ FIXME */
+                       /* Eventually we will want to be able to create
+Index: linux-2.4.21/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.4.21.orig/include/linux/ext3_fs.h  2004-04-24 02:46:19.000000000 -0400
++++ linux-2.4.21/include/linux/ext3_fs.h       2004-04-24 02:46:32.000000000 -0400
+@@ -324,6 +324,8 @@
+ #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
+ #define EXT3_MOUNT_POSIX_ACL          0x8000  /* POSIX Access Control Lists */
+ #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
++#define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
+ 
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
diff --git a/lustre/kernel_patches/patches/kernel_text_address-2.4.21-sles8sp3.patch b/lustre/kernel_patches/patches/kernel_text_address-2.4.21-sles8sp3.patch

new file mode 100644 (file)

index 0000000..0541a48
--- /dev/null
+++ b/lustre/kernel_patches/patches/kernel_text_address-2.4.21-sles8sp3.patch
@@ -0,0 +1,115 @@
+Index: linux-2.4.21/arch/um/kernel/Makefile
+===================================================================
+--- linux-2.4.21.orig/arch/um/kernel/Makefile  2004-04-24 02:37:58.000000000 -0400
++++ linux-2.4.21/arch/um/kernel/Makefile       2004-04-24 02:51:03.000000000 -0400
+@@ -37,7 +37,8 @@
+ export-objs-$(CONFIG_GPROF) += gprof_syms.o
+ export-objs-$(CONFIG_GCOV) += gmon_syms.o
+ 
+-export-objs = ksyms.o process_kern.o signal_kern.o user_syms.o $(export-objs-y)
++export-objs = ksyms.o process_kern.o signal_kern.o user_syms.o sysrq.o \
++      $(export-objs-y)
+ 
+ CFLAGS_user_syms.o = -D__AUTOCONF_INCLUDED__ $(DMODULES-y) $(DMODVERSIONS-y) \
+       -I/usr/include -I../include
+Index: linux-2.4.21/arch/um/kernel/sysrq.c
+===================================================================
+--- linux-2.4.21.orig/arch/um/kernel/sysrq.c   2004-04-24 02:37:58.000000000 -0400
++++ linux-2.4.21/arch/um/kernel/sysrq.c        2004-04-24 02:51:03.000000000 -0400
+@@ -86,6 +86,37 @@
+       show_trace((unsigned long *)esp);
+ }
+ 
++#ifdef CONFIG_MODULES
++extern struct module *module_list;
++extern struct module kernel_module;
++#endif
++
++int is_kernel_text_address(unsigned long addr)
++{
++      int retval = 0;
++#ifdef CONFIG_MODULES
++      struct module *mod;
++#endif
++      if (addr >= (unsigned long) &_stext &&
++          addr <= (unsigned long) &_etext)
++              return 1;
++
++#ifdef CONFIG_MODULES
++      for (mod = module_list; mod != &kernel_module; mod = mod->next) {
++              /* mod_bound tests for addr being inside the vmalloc'ed
++               * module area. Of course it'd be better to test only
++               * for the .text subset... */
++              if (mod_bound(addr, 0, mod)) {
++                      retval = 1;
++                      break;
++              }
++      }
++#endif
++      return retval;
++}
++
++EXPORT_SYMBOL(is_kernel_text_address);
++
+ /*
+  * Overrides for Emacs so that we follow Linus's tabbing style.
+  * Emacs will notice this stuff at the end of the file and automatically
+Index: linux-2.4.21/arch/i386/kernel/Makefile
+===================================================================
+--- linux-2.4.21.orig/arch/i386/kernel/Makefile        2004-04-24 02:39:05.000000000 -0400
++++ linux-2.4.21/arch/i386/kernel/Makefile     2004-04-24 02:51:26.000000000 -0400
+@@ -20,7 +20,7 @@
+ 
+ O_TARGET := kernel.o
+ 
+-export-objs     := mca.o mtrr.o msr.o cpuid.o microcode.o i386_ksyms.o time.o traps.o dr_alloc.o
++export-objs     := mca.o mtrr.o msr.o cpuid.o microcode.o i386_ksyms.o time.o traps.o dr_alloc.o traps.o
+ 
+ ifdef CONFIG_X86_SPEEDSTEP_ICH
+ export-objs   += speedstep-lib.o
+Index: linux-2.4.21/arch/i386/kernel/traps.c
+===================================================================
+--- linux-2.4.21.orig/arch/i386/kernel/traps.c 2004-04-24 02:42:58.000000000 -0400
++++ linux-2.4.21/arch/i386/kernel/traps.c      2004-04-24 02:51:03.000000000 -0400
+@@ -1339,3 +1339,41 @@
+       cobalt_init();
+ #endif
+ }
++
++#ifdef CONFIG_MODULES
++extern struct module *module_list;
++extern struct module kernel_module;
++#endif
++                                                                                
++int is_kernel_text_address(unsigned long addr)
++{
++       int retval = 0;
++#ifdef CONFIG_MODULES
++       struct module *mod;
++#endif
++       if (addr >= (unsigned long) &_stext &&
++           addr <= (unsigned long) &_etext);
++               return 1;
++                                                                                
++#ifdef CONFIG_MODULES
++       for (mod = module_list; mod != &kernel_module; mod = mod->next) {
++               /* mod_bound tests for addr being inside the vmalloc'ed
++                * module area. Of course it'd be better to test only
++                * for the .text subset... */
++               if (mod_bound(addr, 0, mod)) {
++                       retval = 1;
++                       break;
++               }
++       }
++#endif
++                                                                                
++       return retval;
++}
++
++int lookup_symbol(unsigned long address, char *buf, int buflen)
++{
++      return -ENOSYS;
++}
++
++EXPORT_SYMBOL_GPL(is_kernel_text_address);
++EXPORT_SYMBOL_GPL(lookup_symbol);
diff --git a/lustre/kernel_patches/patches/linux-2.4.19-pre1-xattr-0.8.54.patch b/lustre/kernel_patches/patches/linux-2.4.19-pre1-xattr-0.8.54.patch

index 4cf7592..e694068 100644 (file)
--- a/lustre/kernel_patches/patches/linux-2.4.19-pre1-xattr-0.8.54.patch
+++ b/lustre/kernel_patches/patches/linux-2.4.19-pre1-xattr-0.8.54.patch
@@ -1881,7 +1881,7 @@ Index: linux-2.4.19-pre1/fs/ext2/xattr.c
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT2_I(inode)->i_file_acl != 0;
@@ -1900,7 +1900,7 @@ Index: linux-2.4.19-pre1/fs/ext2/xattr.c
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +                      
  +                      ext2_xattr_update_super_block(sb);
  +              }
@@ -3563,7 +3563,7 @@ Index: linux-2.4.19-pre1/fs/ext3/xattr.c
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT3_I(inode)->i_file_acl != 0;
@@ -3588,7 +3588,7 @@ Index: linux-2.4.19-pre1/fs/ext3/xattr.c
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +                      
  +                      ext3_xattr_update_super_block(handle, sb);
  +              }
diff --git a/lustre/kernel_patches/patches/linux-2.4.19-xattr-0.8.54-suse.patch b/lustre/kernel_patches/patches/linux-2.4.19-xattr-0.8.54-suse.patch

index 26d3af9..79c48fb 100644 (file)
--- a/lustre/kernel_patches/patches/linux-2.4.19-xattr-0.8.54-suse.patch
+++ b/lustre/kernel_patches/patches/linux-2.4.19-xattr-0.8.54-suse.patch
@@ -2,10 +2,10 @@
   ext3/ext3-exports.c |   13 +++++++++++++
   2 files changed, 14 insertions(+), 2 deletions(-)
  
-Index: linux-2.4.19.SuSE/fs/ext2/super.c
+Index: linux-2.4.19/fs/ext2/super.c
  ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext2/super.c     Mon Jan 27 05:08:00 2003
-+++ linux-2.4.19.SuSE/fs/ext2/super.c  Sun Nov 16 00:40:59 2003
+--- linux-2.4.19.orig/fs/ext2/super.c  2004-04-23 17:53:55.000000000 -0400
++++ linux-2.4.19/fs/ext2/super.c       2004-04-23 22:30:41.000000000 -0400
  @@ -70,6 +70,7 @@
   {
         va_list args;
@@ -14,11 +14,11 @@ Index: linux-2.4.19.SuSE/fs/ext2/super.c
         if (!(sb->s_flags & MS_RDONLY)) {
                 sb->u.ext2_sb.s_mount_state |= EXT2_ERROR_FS;
                 sb->u.ext2_sb.s_es->s_state =
-Index: linux-2.4.19.SuSE/fs/ext3/super.c
+Index: linux-2.4.19/fs/ext3/super.c
  ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/super.c     Mon Jan 27 05:08:00 2003
-+++ linux-2.4.19.SuSE/fs/ext3/super.c  Sun Nov 16 00:40:59 2003
-@@ -1822,8 +1828,6 @@
+--- linux-2.4.19.orig/fs/ext3/super.c  2004-04-23 18:26:27.000000000 -0400
++++ linux-2.4.19/fs/ext3/super.c       2004-04-23 22:30:41.000000000 -0400
+@@ -1827,8 +1827,6 @@
         exit_ext3_xattr();
   }
   
@@ -27,10 +27,10 @@ Index: linux-2.4.19.SuSE/fs/ext3/super.c
   
   MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
   MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
-Index: linux-2.4.19.SuSE/fs/ext3/ext3-exports.c
+Index: linux-2.4.19/fs/ext3/ext3-exports.c
  ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/ext3-exports.c      Sun Nov 16 00:40:58 2003
-+++ linux-2.4.19.SuSE/fs/ext3/ext3-exports.c   Sun Nov 16 00:40:59 2003
+--- linux-2.4.19.orig/fs/ext3/ext3-exports.c   2003-01-30 05:24:37.000000000 -0500
++++ linux-2.4.19/fs/ext3/ext3-exports.c        2004-04-23 22:30:41.000000000 -0400
  @@ -0,0 +1,13 @@
  +#include <linux/config.h>
  +#include <linux/module.h>
@@ -45,3 +45,16 @@ Index: linux-2.4.19.SuSE/fs/ext3/ext3-exports.c
  +EXPORT_SYMBOL(ext3_xattr_get);
  +EXPORT_SYMBOL(ext3_xattr_list);
  +EXPORT_SYMBOL(ext3_xattr_set);
+Index: linux-2.4.19/fs/ext3/Makefile
+===================================================================
+--- linux-2.4.19.orig/fs/ext3/Makefile 2004-04-23 18:26:27.000000000 -0400
++++ linux-2.4.19/fs/ext3/Makefile      2004-04-23 23:05:20.000000000 -0400
+@@ -9,7 +9,7 @@
+ 
+ O_TARGET := ext3.o
+ 
+-export-objs :=        super.o inode.o
++export-objs :=        ext3-exports.c
+ 
+ obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+               ioctl.o namei.o super.o symlink.o hash.o
diff --git a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch b/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch

index 811c40f..5bdfaff 100644 (file)
--- a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch
+++ b/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch
@@ -1786,7 +1786,7 @@
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT2_I(inode)->i_file_acl != 0;
@@ -1805,7 +1805,7 @@
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +                      
  +                      ext2_xattr_update_super_block(sb);
  +              }
@@ -3450,7 +3450,7 @@
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT3_I(inode)->i_file_acl != 0;
@@ -3475,7 +3475,7 @@
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +                      
  +                      ext3_xattr_update_super_block(handle, sb);
  +              }
diff --git a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-hp.patch b/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-hp.patch

index c9fb126..f1365d7 100644 (file)
--- a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-hp.patch
+++ b/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-hp.patch
@@ -1294,7 +1294,7 @@
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT2_I(inode)->i_file_acl != 0;
@@ -1313,7 +1313,7 @@
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +                      
  +                      ext2_xattr_update_super_block(sb);
  +              }
@@ -2957,7 +2957,7 @@
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT3_I(inode)->i_file_acl != 0;
@@ -2982,7 +2982,7 @@
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +                      
  +                      ext3_xattr_update_super_block(handle, sb);
  +              }
diff --git a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54.patch b/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54.patch

index 2e4750b..f078ebe 100644 (file)
--- a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54.patch
+++ b/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54.patch
@@ -1821,7 +1821,7 @@
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT2_I(inode)->i_file_acl != 0;
@@ -1840,7 +1840,7 @@
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +                      
  +                      ext2_xattr_update_super_block(sb);
  +              }
@@ -3485,7 +3485,7 @@
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT3_I(inode)->i_file_acl != 0;
@@ -3510,7 +3510,7 @@
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +                      
  +                      ext3_xattr_update_super_block(handle, sb);
  +              }
diff --git a/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-chaos.patch b/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-chaos.patch

index e18ac9d..8e198f8 100644 (file)
--- a/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-chaos.patch
+++ b/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-chaos.patch
@@ -1315,7 +1315,7 @@ Index: linux-2.4.21-chaos/fs/ext2/xattr.c
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT2_I(inode)->i_file_acl != 0;
@@ -1334,7 +1334,7 @@ Index: linux-2.4.21-chaos/fs/ext2/xattr.c
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +                      
  +                      ext2_xattr_update_super_block(sb);
  +              }
diff --git a/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse.patch b/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse.patch

index 22dad3c..da8c15c 100644 (file)
--- a/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse.patch
+++ b/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse.patch
@@ -1683,7 +1683,7 @@
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT2_I(inode)->i_file_acl != 0;
@@ -1702,7 +1702,7 @@
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +                      
  +                      ext2_xattr_update_super_block(sb);
  +              }
@@ -3347,7 +3347,7 @@
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT3_I(inode)->i_file_acl != 0;
@@ -3372,7 +3372,7 @@
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +                      
  +                      ext3_xattr_update_super_block(handle, sb);
  +              }
diff --git a/lustre/kernel_patches/patches/linux-2.4.22-xattr-0.8.54.patch b/lustre/kernel_patches/patches/linux-2.4.22-xattr-0.8.54.patch

index b63cc2e..fd5f0c2 100644 (file)
--- a/lustre/kernel_patches/patches/linux-2.4.22-xattr-0.8.54.patch
+++ b/lustre/kernel_patches/patches/linux-2.4.22-xattr-0.8.54.patch
@@ -1738,7 +1738,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/xattr.c
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT2_I(inode)->i_file_acl != 0;
@@ -1757,7 +1757,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/xattr.c
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +                      
  +                      ext2_xattr_update_super_block(sb);
  +              }
@@ -3426,7 +3426,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/xattr.c
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT3_I(inode)->i_file_acl != 0;
@@ -3451,7 +3451,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/xattr.c
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +                      
  +                      ext3_xattr_update_super_block(handle, sb);
  +              }
diff --git a/lustre/kernel_patches/patches/linux-2.4.24-xattr-0.8.54.patch b/lustre/kernel_patches/patches/linux-2.4.24-xattr-0.8.54.patch

index 0109dd9..4cf5c2c 100644 (file)
--- a/lustre/kernel_patches/patches/linux-2.4.24-xattr-0.8.54.patch
+++ b/lustre/kernel_patches/patches/linux-2.4.24-xattr-0.8.54.patch
@@ -1738,7 +1738,7 @@ Index: linux-2.4.24-vanilla/fs/ext2/xattr.c
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT2_I(inode)->i_file_acl != 0;
@@ -1757,7 +1757,7 @@ Index: linux-2.4.24-vanilla/fs/ext2/xattr.c
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +                      
  +                      ext2_xattr_update_super_block(sb);
  +              }
@@ -3426,7 +3426,7 @@ Index: linux-2.4.24-vanilla/fs/ext3/xattr.c
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT3_I(inode)->i_file_acl != 0;
@@ -3451,7 +3451,7 @@ Index: linux-2.4.24-vanilla/fs/ext3/xattr.c
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +                      
  +                      ext3_xattr_update_super_block(handle, sb);
  +              }
diff --git a/lustre/kernel_patches/patches/lustre_version.patch b/lustre/kernel_patches/patches/lustre_version.patch

index 7ebb838..77c5531 100644 (file)
--- a/lustre/kernel_patches/patches/lustre_version.patch
+++ b/lustre/kernel_patches/patches/lustre_version.patch
@@ -7,6 +7,6 @@
  --- /dev/null  Fri Aug 30 17:31:37 2002
  +++ linux-2.4.18-18.8.0-l12-braam/include/linux/lustre_version.h       Thu Feb 13 07:58:33 2003
  @@ -0,0 +1 @@
-+#define LUSTRE_KERNEL_VERSION 34
++#define LUSTRE_KERNEL_VERSION 35
  
  _
diff --git a/lustre/kernel_patches/patches/mkdep-revert-rh-2.4.patch b/lustre/kernel_patches/patches/mkdep-revert-rh-2.4.patch

new file mode 100644 (file)

index 0000000..5cc34b8
--- /dev/null
+++ b/lustre/kernel_patches/patches/mkdep-revert-rh-2.4.patch
@@ -0,0 +1,50 @@
+Index: linux-2.4.20-30.9/scripts/mkdep.c
+===================================================================
+--- linux-2.4.20-30.9.orig/scripts/mkdep.c     2004-02-19 19:40:51.000000000 -0500
++++ linux-2.4.20-30.9/scripts/mkdep.c  2004-04-28 17:24:54.000000000 -0400
+@@ -48,8 +48,6 @@
+ char __depname[512] = "\n\t@touch ";
+ #define depname (__depname+9)
+ int hasdep;
+-char cwd[PATH_MAX];
+-int lcwd;
+ 
+ struct path_struct {
+       int len;
+@@ -204,22 +202,8 @@
+               memcpy(path->buffer+path->len, name, len);
+               path->buffer[path->len+len] = '\0';
+               if (access(path->buffer, F_OK) == 0) {
+-                      int l = lcwd + strlen(path->buffer);
+-                      char name2[l+2], *p;
+-                      if (path->buffer[0] == '/') {
+-                              memcpy(name2, path->buffer, l+1);
+-                      }
+-                      else {
+-                              memcpy(name2, cwd, lcwd);
+-                              name2[lcwd] = '/';
+-                              memcpy(name2+lcwd+1, path->buffer, path->len+len+1);
+-                      }
+-                      while ((p = strstr(name2, "/../"))) {
+-                              *p = '\0';
+-                              strcpy(strrchr(name2, '/'), p+3);
+-                      }
+                       do_depname();
+-                      printf(" \\\n   %s", name2);
++                      printf(" \\\n   %s", path->buffer);
+                       return;
+               }
+       }
+@@ -601,12 +585,6 @@
+               return 1;
+       }
+ 
+-      if (!getcwd(cwd, sizeof(cwd))) {
+-              fprintf(stderr, "mkdep: getcwd() failed %m\n");
+-              return 1;
+-      }
+-      lcwd = strlen(cwd);
+-
+       add_path(".");          /* for #include "..." */
+ 
+       while (++argv, --argc > 0) {
diff --git a/lustre/kernel_patches/patches/tcp-zero-copy-2.4.21-sles8sp3.patch b/lustre/kernel_patches/patches/tcp-zero-copy-2.4.21-sles8sp3.patch

new file mode 100644 (file)

index 0000000..a7859bd
--- /dev/null
+++ b/lustre/kernel_patches/patches/tcp-zero-copy-2.4.21-sles8sp3.patch
@@ -0,0 +1,458 @@
+Index: linux-2.4.21/include/linux/skbuff.h
+===================================================================
+--- linux-2.4.21.orig/include/linux/skbuff.h   2004-04-24 02:38:40.000000000 -0400
++++ linux-2.4.21/include/linux/skbuff.h        2004-04-24 02:47:46.000000000 -0400
+@@ -116,6 +116,30 @@
+       __u16 size;
+ };
+ 
++/* Support for callback when skb data has been released */
++typedef struct zccd                           /* Zero Copy Callback Descriptor */
++{                                             /* (embed as first member of custom struct) */
++      atomic_t        zccd_count;             /* reference count */
++      void           (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */
++} zccd_t;
++
++static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *))
++{
++      atomic_set (&d->zccd_count, 1);
++      d->zccd_destructor = callback;
++}
++
++static inline void zccd_get (zccd_t *d)               /* take a reference */
++{
++      atomic_inc (&d->zccd_count);
++}
++
++static inline void zccd_put (zccd_t *d)               /* release a reference */
++{
++      if (atomic_dec_and_test (&d->zccd_count))
++              (d->zccd_destructor)(d);
++}
++
+ /* This data is invariant across clones and lives at
+  * the end of the header data, ie. at skb->end.
+  */
+@@ -123,6 +147,12 @@
+       atomic_t        dataref;
+       unsigned int    nr_frags;
+       struct sk_buff  *frag_list;
++      zccd_t          *zccd;                  /* zero copy descriptor */
++      zccd_t          *zccd2;                 /* 2nd zero copy descriptor */
++      /* NB we expect zero-copy data to be at least 1 packet, so
++       * having 2 zccds means we don't unneccessarily split the packet
++       * where consecutive zero-copy sends abutt.
++       */
+       skb_frag_t      frags[MAX_SKB_FRAGS];
+ };
+ 
+Index: linux-2.4.21/include/net/tcp.h
+===================================================================
+--- linux-2.4.21.orig/include/net/tcp.h        2004-04-24 02:39:20.000000000 -0400
++++ linux-2.4.21/include/net/tcp.h     2004-04-24 02:48:27.000000000 -0400
+@@ -646,6 +646,8 @@
+ 
+ extern int                    tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size);
+ extern ssize_t                        tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
++extern ssize_t                        tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
++                                                int flags, zccd_t *zccd);
+ 
+ extern int                    tcp_ioctl(struct sock *sk, 
+                                         int cmd, 
+@@ -742,6 +744,10 @@
+                                           struct msghdr *msg,
+                                           int len, int nonblock, 
+                                           int flags, int *addr_len);
++extern int                      tcp_recvpackets(struct sock *sk,
++                                                struct sk_buff_head *packets,
++                                                int len, int nonblock);
++
+ 
+ extern int                    tcp_listen_start(struct sock *sk);
+ 
+Index: linux-2.4.21/net/netsyms.c
+===================================================================
+--- linux-2.4.21.orig/net/netsyms.c    2004-04-24 02:39:13.000000000 -0400
++++ linux-2.4.21/net/netsyms.c 2004-04-24 02:47:46.000000000 -0400
+@@ -403,6 +403,8 @@
+ 
+ #endif
+ 
++EXPORT_SYMBOL(tcp_sendpage_zccd);
++EXPORT_SYMBOL(tcp_recvpackets);
+ EXPORT_SYMBOL(tcp_read_sock);
+ 
+ EXPORT_SYMBOL(netlink_set_err);
+Index: linux-2.4.21/net/core/skbuff.c
+===================================================================
+--- linux-2.4.21.orig/net/core/skbuff.c        2004-04-24 02:38:40.000000000 -0400
++++ linux-2.4.21/net/core/skbuff.c     2004-04-24 02:47:46.000000000 -0400
+@@ -208,6 +208,8 @@
+       atomic_set(&(skb_shinfo(skb)->dataref), 1);
+       skb_shinfo(skb)->nr_frags = 0;
+       skb_shinfo(skb)->frag_list = NULL;
++      skb_shinfo(skb)->zccd = NULL;           /* skbuffs kick off with NO user zero copy descriptors */
++      skb_shinfo(skb)->zccd2 = NULL;
+       return skb;
+ 
+ nodata:
+@@ -277,6 +279,10 @@
+ {
+       if (!skb->cloned ||
+           atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
++              if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */
++                      zccd_put (skb_shinfo(skb)->zccd); /* release hold */
++              if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */
++                      zccd_put (skb_shinfo(skb)->zccd2); /* release hold */
+               if (skb_shinfo(skb)->nr_frags) {
+                       int i;
+                       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+@@ -535,6 +541,8 @@
+       atomic_set(&(skb_shinfo(skb)->dataref), 1);
+       skb_shinfo(skb)->nr_frags = 0;
+       skb_shinfo(skb)->frag_list = NULL;
++      skb_shinfo(skb)->zccd = NULL;           /* copied data => no user zero copy descriptor */
++      skb_shinfo(skb)->zccd2 = NULL;
+ 
+       /* We are no longer a clone, even if we were. */
+       skb->cloned = 0;
+@@ -581,6 +589,14 @@
+       n->data_len = skb->data_len;
+       n->len = skb->len;
+ 
++      if (skb_shinfo(skb)->zccd != NULL)      /* user zero copy descriptor? */
++              zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */
++      skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd;
++
++      if (skb_shinfo(skb)->zccd2 != NULL)     /* 2nd user zero copy descriptor? */
++              zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */
++      skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
++
+       if (skb_shinfo(skb)->nr_frags) {
+               int i;
+ 
+@@ -623,6 +639,8 @@
+       u8 *data;
+       int size = nhead + (skb->end - skb->head) + ntail;
+       long off;
++      zccd_t *zccd = skb_shinfo(skb)->zccd;   /* stash user zero copy descriptor */
++      zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */
+ 
+       if (skb_shared(skb))
+               BUG();
+@@ -644,6 +662,11 @@
+       if (skb_shinfo(skb)->frag_list)
+               skb_clone_fraglist(skb);
+ 
++      if (zccd != NULL)                       /* user zero copy descriptor? */
++              zccd_get (zccd);                /* extra ref (pages are shared) */
++      if (zccd2 != NULL)                      /* 2nd user zero copy descriptor? */
++              zccd_get (zccd2);               /* extra ref (pages are shared) */
++
+       skb_release_data(skb);
+ 
+       off = (data+nhead) - skb->head;
+@@ -658,6 +681,8 @@
+       skb->nh.raw += off;
+       skb->cloned = 0;
+       atomic_set(&skb_shinfo(skb)->dataref, 1);
++      skb_shinfo(skb)->zccd = zccd;
++      skb_shinfo(skb)->zccd2 = zccd2;
+       return 0;
+ 
+ nodata:
+Index: linux-2.4.21/net/ipv4/tcp.c
+===================================================================
+--- linux-2.4.21.orig/net/ipv4/tcp.c   2004-04-24 02:39:21.000000000 -0400
++++ linux-2.4.21/net/ipv4/tcp.c        2004-04-24 02:50:40.000000000 -0400
+@@ -748,7 +748,7 @@
+       goto out;
+ }
+ 
+-ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags);
++ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags, zccd_t *zccd);
+ 
+ static inline int
+ can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
+@@ -827,7 +827,7 @@
+       return err;
+ }
+ 
+-ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags)
++ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags, zccd_t *zccd)
+ {
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       int mss_now;
+@@ -875,6 +875,17 @@
+                       copy = size;
+ 
+               i = skb_shinfo(skb)->nr_frags;
++
++              if (zccd != NULL &&             /* this is a zcc I/O */
++                  skb_shinfo(skb)->zccd != NULL && /* skb is part of a zcc I/O */
++                  skb_shinfo(skb)->zccd2 != NULL &&
++                  skb_shinfo(skb)->zccd != zccd && /* not the same one */
++                  skb_shinfo(skb)->zccd2 != zccd)
++              {
++                      tcp_mark_push (tp, skb);
++                      goto new_segment;
++              }
++
+               if (can_coalesce(skb, i, page, offset)) {
+                       skb_shinfo(skb)->frags[i-1].size += copy;
+               } else if (i < MAX_SKB_FRAGS) {
+@@ -885,6 +896,20 @@
+                       goto new_segment;
+               }
+ 
++              if (zccd != NULL &&     /* this is a zcc I/O */
++                  skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */
++                  skb_shinfo(skb)->zccd2 != zccd)
++              {
++                      zccd_get (zccd);        /* bump ref count */
++
++                      BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
++
++                      if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */
++                              skb_shinfo(skb)->zccd = zccd;
++                      else
++                              skb_shinfo(skb)->zccd2 = zccd;
++              }
++
+               skb->len += copy;
+               skb->data_len += copy;
+               skb->ip_summed = CHECKSUM_HW;
+@@ -948,7 +973,29 @@
+ 
+       lock_sock(sk);
+       TCP_CHECK_TIMER(sk);
+-      res = do_tcp_sendpages(sk, &page, offset, size, flags);
++      res = do_tcp_sendpages(sk, &page, offset, size, flags, NULL);
++      TCP_CHECK_TIMER(sk);
++      release_sock(sk);
++      return res;
++}
++
++ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
++                          int flags, zccd_t *zccd)
++{
++        ssize_t res;
++        struct sock *sk = sock->sk;
++
++#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
++
++        if (!(sk->route_caps & NETIF_F_SG) ||   /* caller shouldn't waste her time */
++            !(sk->route_caps & TCP_ZC_CSUM_FLAGS)) /* on double mapping */
++                BUG ();
++
++        lock_sock(sk);
++        TCP_CHECK_TIMER(sk);
++
++        res = do_tcp_sendpages(sk, &page, 0, size, flags, zccd);
++
+       TCP_CHECK_TIMER(sk);
+       release_sock(sk);
+       return res;
+@@ -1772,6 +1819,202 @@
+       goto out;
+ }
+ 
++int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets,
++                   int len, int nonblock)
++{
++      struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
++      int copied;
++      long timeo;
++
++      BUG_TRAP (len > 0);
++      /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/
++
++      lock_sock(sk);
++
++      TCP_CHECK_TIMER(sk);
++
++      copied = -ENOTCONN;
++      if (sk->state == TCP_LISTEN)
++              goto out;
++
++      copied = 0;
++      timeo = sock_rcvtimeo(sk, nonblock);
++
++      do {
++              struct sk_buff * skb;
++              u32 offset;
++              unsigned long used;
++              int exhausted;
++              int eaten;
++
++              /* Are we at urgent data? Stop if we have read anything. */
++              if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq)
++                      break;
++
++              /* We need to check signals first, to get correct SIGURG
++               * handling. FIXME: Need to check this doesnt impact 1003.1g
++               * and move it down to the bottom of the loop
++               */
++              if (signal_pending(current)) {
++                      if (copied)
++                              break;
++                      copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
++                      break;
++              }
++
++              /* Next get a buffer. */
++
++              skb = skb_peek(&sk->receive_queue);
++
++              if (skb == NULL)                /* nothing ready */
++              {
++                      if (copied) {
++                              if (sk->err ||
++                                  sk->state == TCP_CLOSE ||
++                                  (sk->shutdown & RCV_SHUTDOWN) ||
++                                  !timeo ||
++                                  (0))
++                                      break;
++                      } else {
++                              if (sk->done)
++                                      break;
++
++                              if (sk->err) {
++                                      copied = sock_error(sk);
++                                      break;
++                              }
++
++                              if (sk->shutdown & RCV_SHUTDOWN)
++                                      break;
++
++                              if (sk->state == TCP_CLOSE) {
++                                      if (!sk->done) {
++                                              /* This occurs when user tries to read
++                                               * from never connected socket.
++                                               */
++                                              copied = -ENOTCONN;
++                                              break;
++                                      }
++                                      break;
++                              }
++
++                              if (!timeo) {
++                                      copied = -EAGAIN;
++                                      break;
++                              }
++                      }
++
++                      cleanup_rbuf(sk, copied);
++                      timeo = tcp_data_wait(sk, timeo);
++                      continue;
++              }
++
++              BUG_TRAP (atomic_read (&skb->users) == 1);
++
++              exhausted = eaten = 0;
++
++              offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
++              if (skb->h.th->syn)
++                      offset--;
++
++              used = skb->len - offset;
++
++              if (tp->urg_data) {
++                      u32 urg_offset = tp->urg_seq - tp->copied_seq;
++                      if (urg_offset < used) {
++                              if (!urg_offset) { /* at urgent date */
++                                      if (!sk->urginline) {
++                                              tp->copied_seq++; /* discard the single byte of urgent data */
++                                              offset++;
++                                              used--;
++                                      }
++                              } else          /* truncate read */
++                                      used = urg_offset;
++                      }
++              }
++
++              BUG_TRAP (used >= 0);
++              if (len < used)
++                      used = len;
++
++              if (used == 0)
++                      exhausted = 1;
++              else
++              {
++                      if (skb_is_nonlinear (skb))
++                      {
++                              int   rc = skb_linearize (skb, GFP_KERNEL);
++
++                              printk ("tcp_recvpackets(): linearising: %d\n", rc);
++
++                              if (rc)
++                              {
++                                      if (!copied)
++                                              copied = rc;
++                                      break;
++                              }
++                      }
++
++                      if ((offset + used) == skb->len) /* consuming the whole packet */
++                      {
++                              __skb_unlink (skb, &sk->receive_queue);
++                              dst_release (skb->dst);
++                              skb_orphan (skb);
++                              __skb_pull (skb, offset);
++                              __skb_queue_tail (packets, skb);
++                              exhausted = eaten = 1;
++                      }
++                      else                    /* consuming only part of the packet */
++                      {
++                              struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL);
++
++                              if (skb2 == NULL)
++                              {
++                                      if (!copied)
++                                              copied = -ENOMEM;
++                                      break;
++                              }
++
++                              dst_release (skb2->dst);
++                              __skb_pull (skb2, offset);
++                              __skb_trim (skb2, used);
++                              __skb_queue_tail (packets, skb2);
++                      }
++
++                      tp->copied_seq += used;
++                      copied += used;
++                      len -= used;
++              }
++
++              if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) {
++                      tp->urg_data = 0;
++                      tcp_fast_path_check(sk, tp);
++              }
++
++              if (!exhausted)
++                      continue;
++
++              if (skb->h.th->fin)
++              {
++                      tp->copied_seq++;
++                      if (!eaten)
++                              tcp_eat_skb (sk, skb);
++                      break;
++              }
++
++              if (!eaten)
++                      tcp_eat_skb (sk, skb);
++
++      } while (len > 0);
++
++ out:
++      /* Clean up data we have read: This will do ACK frames. */
++      cleanup_rbuf(sk, copied);
++      TCP_CHECK_TIMER(sk);
++      release_sock(sk);
++      return copied;
++}
++
+ /*
+  *    State processing on a close. This implements the state shift for
+  *    sending our FIN frame. Note that we only send a FIN for some
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.18-18-chaos65.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.18-18-chaos65.patch

index dd07ef3..91dc15b 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.4.18-18-chaos65.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.18-18-chaos65.patch
@@ -11,6 +11,47 @@
   kernel/ksyms.c         |    1 
   11 files changed, 564 insertions(+), 126 deletions(-)
  
+Index: linux-2.4.18-p4smp/fs/dcache.c
+===================================================================
+--- linux-2.4.18-p4smp.orig/fs/dcache.c        2004-02-03 01:00:10.000000000 -0500
++++ linux-2.4.18-p4smp/fs/dcache.c     2004-03-19 16:05:42.000000000 -0500
+@@ -186,6 +186,13 @@
+               spin_unlock(&dcache_lock);
+               return 0;
+       }
++
++      /* network invalidation by Lustre */
++      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++              spin_unlock(&dcache_lock);
++              return 0;
++      }
++
+       /*
+        * Check whether to do a partial shrink_dcache
+        * to get rid of unused child entries.
+@@ -859,13 +866,19 @@ void d_delete(struct dentry * dentry)
+  * Adds a dentry to the hash according to its name.
+  */
+  
+-void d_rehash(struct dentry * entry)
++void __d_rehash(struct dentry * entry, int lock)
+ {
+       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
+       if (!list_empty(&entry->d_hash)) BUG();
+-      spin_lock(&dcache_lock);
++      if (lock) spin_lock(&dcache_lock);
+       list_add(&entry->d_hash, list);
+-      spin_unlock(&dcache_lock);
++      if (lock) spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(__d_rehash);
++
++void d_rehash(struct dentry * entry)
++{
++      __d_rehash(entry, 1);
+ }
+ 
+ #define do_switch(x,y) do { \
  Index: linux-2.4.18-p4smp/fs/exec.c
  ===================================================================
  --- linux-2.4.18-p4smp.orig/fs/exec.c  2004-02-03 01:00:10.000000000 -0500
@@ -20,7 +61,7 @@ Index: linux-2.4.18-p4smp/fs/exec.c
         struct nameidata nd;
         int error;
  +      struct lookup_intent it = { .it_op = IT_OPEN,
-+                                           .it_flags = FMODE_READ|FMODE_EXEC };
++                                  .it_flags = FMODE_READ|FMODE_EXEC };
   
  -      error = user_path_walk(library, &nd);
  +      error = user_path_walk_it(library, &nd, &it);
@@ -37,14 +78,14 @@ Index: linux-2.4.18-p4smp/fs/exec.c
         error = PTR_ERR(file);
         if (IS_ERR(file))
                 goto out;
-@@ -359,8 +362,9 @@ struct file *open_exec(const char *name)
+@@ -359,8 +362,10 @@ struct file *open_exec(const char *name)
         struct inode *inode;
         struct file *file;
         int err = 0;
--
--      err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
  +      struct lookup_intent it = { .it_op = IT_OPEN,
-+                                           .it_flags = FMODE_READ|FMODE_EXEC };
++                                  .it_flags = FMODE_READ|FMODE_EXEC };
+ 
+-      err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
  +      err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
         file = ERR_PTR(err);
         if (!err) {
@@ -76,164 +117,6 @@ Index: linux-2.4.18-p4smp/fs/exec.c
                 goto close_fail;
   
         retval = binfmt->core_dump(signr, regs, file);
-Index: linux-2.4.18-p4smp/fs/dcache.c
-===================================================================
---- linux-2.4.18-p4smp.orig/fs/dcache.c        2004-02-03 01:00:10.000000000 -0500
-+++ linux-2.4.18-p4smp/fs/dcache.c     2004-03-19 16:05:42.000000000 -0500
-@@ -186,6 +186,13 @@
-               spin_unlock(&dcache_lock);
-               return 0;
-       }
-+
-+      /* network invalidation by Lustre */
-+      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
-+              spin_unlock(&dcache_lock);
-+              return 0;
-+      }
-+
-       /*
-        * Check whether to do a partial shrink_dcache
-        * to get rid of unused child entries.
-@@ -859,13 +866,19 @@ void d_delete(struct dentry * dentry)
-  * Adds a dentry to the hash according to its name.
-  */
-  
--void d_rehash(struct dentry * entry)
-+void __d_rehash(struct dentry * entry, int lock)
- {
-       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-       if (!list_empty(&entry->d_hash)) BUG();
--      spin_lock(&dcache_lock);
-+      if (lock) spin_lock(&dcache_lock);
-       list_add(&entry->d_hash, list);
--      spin_unlock(&dcache_lock);
-+      if (lock) spin_unlock(&dcache_lock);
-+}
-+EXPORT_SYMBOL(__d_rehash);
-+
-+void d_rehash(struct dentry * entry)
-+{
-+      __d_rehash(entry, 1);
- }
- 
- #define do_switch(x,y) do { \
-Index: linux-2.4.18-p4smp/fs/namespace.c
-===================================================================
---- linux-2.4.18-p4smp.orig/fs/namespace.c     2004-02-03 01:00:10.000000000 -0500
-+++ linux-2.4.18-p4smp/fs/namespace.c  2004-03-19 16:05:42.000000000 -0500
-@@ -99,6 +99,7 @@
- {
-       old_nd->dentry = mnt->mnt_mountpoint;
-       old_nd->mnt = mnt->mnt_parent;
-+      UNPIN(old_nd->dentry, old_nd->mnt, 1);
-       mnt->mnt_parent = mnt;
-       mnt->mnt_mountpoint = mnt->mnt_root;
-       list_del_init(&mnt->mnt_child);
-@@ -110,6 +111,7 @@ static void attach_mnt(struct vfsmount *
- {
-       mnt->mnt_parent = mntget(nd->mnt);
-       mnt->mnt_mountpoint = dget(nd->dentry);
-+      PIN(nd->dentry, nd->mnt, 1);
-       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
-       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
-       nd->dentry->d_mounted++;
-@@ -485,14 +487,17 @@ static int do_loopback(struct nameidata 
- {
-       struct nameidata old_nd;
-       struct vfsmount *mnt = NULL;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int err = mount_is_safe(nd);
-       if (err)
-               return err;
-       if (!old_name || !*old_name)
-               return -EINVAL;
--      err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
--      if (err)
-+      err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
-+      if (err) {
-+              intent_release(&it);
-               return err;
-+      }
- 
-       down_write(&current->namespace->sem);
-       err = -EINVAL;
-@@ -515,6 +520,7 @@ static int do_loopback(struct nameidata 
-       }
- 
-       up_write(&current->namespace->sem);
-+      intent_release(&it);
-       path_release(&old_nd);
-       return err;
- }
-@@ -698,6 +704,7 @@ long do_mount(char * dev_name, char * di
-                 unsigned long flags, void *data_page)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int retval = 0;
-       int mnt_flags = 0;
- 
-@@ -722,10 +729,11 @@ long do_mount(char * dev_name, char * di
-       flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
- 
-       /* ... and get the mountpoint */
--      retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
--      if (retval)
-+      retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
-+      if (retval) {
-+              intent_release(&it);
-               return retval;
--
-+      }
-       if (flags & MS_REMOUNT)
-               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-                                   data_page);
-@@ -736,6 +744,8 @@ long do_mount(char * dev_name, char * di
-       else
-               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
-                                     dev_name, data_page);
-+
-+      intent_release(&it);
-       path_release(&nd);
-       return retval;
- }
-@@ -901,6 +911,8 @@ asmlinkage long sys_pivot_root(const cha
- {
-       struct vfsmount *tmp;
-       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
-+      struct lookup_intent new_it = { .it_op = IT_GETATTR };
-+      struct lookup_intent old_it = { .it_op = IT_GETATTR };
-       int error;
- 
-       if (!capable(CAP_SYS_ADMIN))
-@@ -908,14 +920,14 @@ asmlinkage long sys_pivot_root(const cha
- 
-       lock_kernel();
- 
--      error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
-+      error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
-       if (error)
-               goto out0;
-       error = -EINVAL;
-       if (!check_mnt(new_nd.mnt))
-               goto out1;
- 
--      error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
-+      error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
-       if (error)
-               goto out1;
- 
-@@ -970,8 +982,10 @@ out2:
-       up(&old_nd.dentry->d_inode->i_zombie);
-       up_write(&current->namespace->sem);
-       path_release(&user_nd);
-+      intent_release(&old_it);
-       path_release(&old_nd);
- out1:
-+      intent_release(&new_it);
-       path_release(&new_nd);
- out0:
-       unlock_kernel();
  Index: linux-2.4.18-p4smp/fs/namei.c
  ===================================================================
  --- linux-2.4.18-p4smp.orig/fs/namei.c 2004-02-03 01:00:10.000000000 -0500
@@ -399,13 +282,11 @@ Index: linux-2.4.18-p4smp/fs/namei.c
                         ;
   
                 err = -ENOENT;
-@@ -548,8 +585,8 @@ int link_path_walk(const char * name, st
-               if (!inode->i_op)
+@@ -549,7 +586,7 @@ int link_path_walk(const char * name, st
                         goto out_dput;
   
--              if (inode->i_op->follow_link) {
+               if (inode->i_op->follow_link) {
  -                      err = do_follow_link(dentry, nd);
-+              if (inode->i_op->follow_link || inode->i_op->follow_link2) {
  +                      err = do_follow_link(dentry, nd, NULL);
                         dput(dentry);
                         if (err)
@@ -419,7 +300,7 @@ Index: linux-2.4.18-p4smp/fs/namei.c
                         break;
                 continue;
                 /* here ends the main loop */
-@@ -592,22 +629,23 @@ last_component:
+@@ -592,22 +629,22 @@ last_component:
                         if (err < 0)
                                 break;
                 }
@@ -440,10 +321,8 @@ Index: linux-2.4.18-p4smp/fs/namei.c
                         ;
                 inode = dentry->d_inode;
                 if ((lookup_flags & LOOKUP_FOLLOW)
--                  && inode && inode->i_op && inode->i_op->follow_link) {
+                   && inode && inode->i_op && inode->i_op->follow_link) {
  -                      err = do_follow_link(dentry, nd);
-+                  && inode && inode->i_op &&
-+                  (inode->i_op->follow_link || inode->i_op->follow_link2)) {
  +                      err = do_follow_link(dentry, nd, it);
                         dput(dentry);
                         if (err)
@@ -471,7 +350,7 @@ Index: linux-2.4.18-p4smp/fs/namei.c
  +                              if (err)
  +                                      break;
  +                              new = real_lookup(dentry->d_parent,
-+                                                &dentry->d_name, 0, NULL);
++                                                &dentry->d_name, 0, it);
  +                              d_invalidate(dentry);
  +                              dput(dentry);
  +                              if (IS_ERR(new)) {
@@ -1046,6 +925,122 @@ Index: linux-2.4.18-p4smp/fs/namei.c
         if (page) {
                 kunmap(page);
                 page_cache_release(page);
+Index: linux-2.4.18-p4smp/fs/namespace.c
+===================================================================
+--- linux-2.4.18-p4smp.orig/fs/namespace.c     2004-02-03 01:00:10.000000000 -0500
++++ linux-2.4.18-p4smp/fs/namespace.c  2004-03-19 16:05:42.000000000 -0500
+@@ -99,6 +99,7 @@
+ {
+       old_nd->dentry = mnt->mnt_mountpoint;
+       old_nd->mnt = mnt->mnt_parent;
++      UNPIN(old_nd->dentry, old_nd->mnt, 1);
+       mnt->mnt_parent = mnt;
+       mnt->mnt_mountpoint = mnt->mnt_root;
+       list_del_init(&mnt->mnt_child);
+@@ -110,6 +111,7 @@ static void attach_mnt(struct vfsmount *
+ {
+       mnt->mnt_parent = mntget(nd->mnt);
+       mnt->mnt_mountpoint = dget(nd->dentry);
++      PIN(nd->dentry, nd->mnt, 1);
+       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+       nd->dentry->d_mounted++;
+@@ -485,14 +487,17 @@ static int do_loopback(struct nameidata 
+ {
+       struct nameidata old_nd;
+       struct vfsmount *mnt = NULL;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int err = mount_is_safe(nd);
+       if (err)
+               return err;
+       if (!old_name || !*old_name)
+               return -EINVAL;
+-      err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
+-      if (err)
++      err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
++      if (err) {
++              intent_release(&it);
+               return err;
++      }
+ 
+       down_write(&current->namespace->sem);
+       err = -EINVAL;
+@@ -515,6 +520,7 @@ static int do_loopback(struct nameidata 
+       }
+ 
+       up_write(&current->namespace->sem);
++      intent_release(&it);
+       path_release(&old_nd);
+       return err;
+ }
+@@ -698,6 +704,7 @@ long do_mount(char * dev_name, char * di
+                 unsigned long flags, void *data_page)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int retval = 0;
+       int mnt_flags = 0;
+ 
+@@ -722,9 +729,11 @@ long do_mount(char * dev_name, char * di
+       flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
+ 
+       /* ... and get the mountpoint */
+-      retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
+-      if (retval)
++      retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
++      if (retval) {
++              intent_release(&it);
+               return retval;
++      }
+ 
+       if (flags & MS_REMOUNT)
+               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+@@ -736,6 +744,8 @@ long do_mount(char * dev_name, char * di
+       else
+               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+                                     dev_name, data_page);
++
++      intent_release(&it);
+       path_release(&nd);
+       return retval;
+ }
+@@ -901,6 +911,8 @@ asmlinkage long sys_pivot_root(const cha
+ {
+       struct vfsmount *tmp;
+       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++      struct lookup_intent new_it = { .it_op = IT_GETATTR };
++      struct lookup_intent old_it = { .it_op = IT_GETATTR };
+       int error;
+ 
+       if (!capable(CAP_SYS_ADMIN))
+@@ -908,14 +920,14 @@ asmlinkage long sys_pivot_root(const cha
+ 
+       lock_kernel();
+ 
+-      error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
++      error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
+       if (error)
+               goto out0;
+       error = -EINVAL;
+       if (!check_mnt(new_nd.mnt))
+               goto out1;
+ 
+-      error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
++      error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
+       if (error)
+               goto out1;
+ 
+@@ -970,8 +982,10 @@ out2:
+       up(&old_nd.dentry->d_inode->i_zombie);
+       up_write(&current->namespace->sem);
+       path_release(&user_nd);
++      intent_release(&old_it);
+       path_release(&old_nd);
+ out1:
++      intent_release(&new_it);
+       path_release(&new_nd);
+ out0:
+       unlock_kernel();
  Index: linux-2.4.18-p4smp/fs/open.c
  ===================================================================
  --- linux-2.4.18-p4smp.orig/fs/open.c  2004-02-03 01:00:10.000000000 -0500
@@ -1229,16 +1224,14 @@ Index: linux-2.4.18-p4smp/fs/open.c
                 path_release(&nd);
         }
   
-@@ -385,8 +430,11 @@ asmlinkage long sys_chdir(const char * f
+@@ -385,8 +430,9 @@ asmlinkage long sys_chdir(const char * f
   {
         int error;
         struct nameidata nd;
  +      struct lookup_intent it = { .it_op = IT_GETATTR };
   
  -      error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd);
-+      error = __user_walk_it(filename,
-+                             LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,
-+                             &nd, &it);
++      error = __user_walk_it(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd, &it);
         if (error)
                 goto out;
   
@@ -1588,9 +1581,9 @@ Index: linux-2.4.18-p4smp/include/linux/dcache.h
  +      void (*d_unpin)(struct dentry *, struct vfsmount *, int);
   };
   
-+#define PIN(de,mnt,flag)  if (de->d_op && de->d_op->d_pin) \
++#define PIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_pin) \
  +                              de->d_op->d_pin(de, mnt, flag);
-+#define UNPIN(de,mnt,flag)  if (de->d_op && de->d_op->d_unpin) \
++#define UNPIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_unpin) \
  +                              de->d_op->d_unpin(de, mnt, flag);
  +
  +
@@ -1628,7 +1621,7 @@ Index: linux-2.4.18-p4smp/include/linux/fs.h
   #define ATTR_ATTR_FLAG        1024
  +#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
  +#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET        0x2000  /* called from open path, ie O_TRUNC */
++#define ATTR_CTIME_SET        0x2000
   
   /*
    * This is the Inode Attributes structure, used for notify_change().  It
@@ -1666,7 +1659,7 @@ Index: linux-2.4.18-p4smp/include/linux/fs.h
   
   /*
    * File types
-@@ -900,21 +908,34 @@ struct file_operations {
+@@ -900,21 +908,32 @@ struct file_operations {
   
   struct inode_operations {
         int (*create) (struct inode *,struct dentry *,int);
@@ -1690,8 +1683,6 @@ Index: linux-2.4.18-p4smp/include/linux/fs.h
  +      int (*rename_raw) (struct nameidata *, struct nameidata *);
         int (*readlink) (struct dentry *, char *,int);
         int (*follow_link) (struct dentry *, struct nameidata *);
-+      int (*follow_link2) (struct dentry *, struct nameidata *,
-+                           struct lookup_intent *it);
         void (*truncate) (struct inode *);
         int (*permission) (struct inode *, int);
         int (*revalidate) (struct dentry *);
@@ -1746,24 +1737,6 @@ Index: linux-2.4.18-p4smp/include/linux/fs.h
   extern int page_readlink(struct dentry *, char *, int);
   extern int page_follow_link(struct dentry *, struct nameidata *);
   extern struct inode_operations page_symlink_inode_operations;
-Index: linux-2.4.18-p4smp/kernel/fork.c
-===================================================================
---- linux-2.4.18-p4smp.orig/kernel/fork.c      2004-02-03 01:00:10.000000000 -0500
-+++ linux-2.4.18-p4smp/kernel/fork.c   2004-03-19 16:05:42.000000000 -0500
-@@ -399,10 +399,13 @@
-               fs->umask = old->umask;
-               read_lock(&old->lock);
-               fs->rootmnt = mntget(old->rootmnt);
-+              PIN(old->pwd, old->pwdmnt, 0);
-+              PIN(old->root, old->rootmnt, 1);
-               fs->root = dget(old->root);
-               fs->pwdmnt = mntget(old->pwdmnt);
-               fs->pwd = dget(old->pwd);
-               if (old->altroot) {
-+                      PIN(old->altroot, old->altrootmnt, 1);
-                       fs->altrootmnt = mntget(old->altrootmnt);
-                       fs->altroot = dget(old->altroot);
-               } else {
  Index: linux-2.4.18-p4smp/kernel/exit.c
  ===================================================================
  --- linux-2.4.18-p4smp.orig/kernel/exit.c      2004-02-03 01:00:10.000000000 -0500
@@ -1783,6 +1756,24 @@ Index: linux-2.4.18-p4smp/kernel/exit.c
                         dput(fs->altroot);
                         mntput(fs->altrootmnt);
                 }
+Index: linux-2.4.18-p4smp/kernel/fork.c
+===================================================================
+--- linux-2.4.18-p4smp.orig/kernel/fork.c      2004-02-03 01:00:10.000000000 -0500
++++ linux-2.4.18-p4smp/kernel/fork.c   2004-03-19 16:05:42.000000000 -0500
+@@ -399,10 +399,13 @@
+               fs->umask = old->umask;
+               read_lock(&old->lock);
+               fs->rootmnt = mntget(old->rootmnt);
++              PIN(old->pwd, old->pwdmnt, 0);
++              PIN(old->root, old->rootmnt, 1);
+               fs->root = dget(old->root);
+               fs->pwdmnt = mntget(old->pwdmnt);
+               fs->pwd = dget(old->pwd);
+               if (old->altroot) {
++                      PIN(old->altroot, old->altrootmnt, 1);
+                       fs->altrootmnt = mntget(old->altrootmnt);
+                       fs->altroot = dget(old->altroot);
+               } else {
  Index: linux-2.4.18-p4smp/kernel/ksyms.c
  ===================================================================
  --- linux-2.4.18-p4smp.orig/kernel/ksyms.c     2004-03-19 16:05:40.000000000 -0500
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.19-pre1.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.19-pre1.patch

index 7c4ea56..4ccfa4d 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.4.19-pre1.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.19-pre1.patch
@@ -12,6 +12,47 @@
   kernel/ksyms.c            |    1 
   12 files changed, 558 insertions(+), 128 deletions(-)
  
+Index: linux-2.4.19-pre1/fs/dcache.c
+===================================================================
+--- linux-2.4.19-pre1.orig/fs/dcache.c 2003-11-21 02:41:00.000000000 +0300
++++ linux-2.4.19-pre1/fs/dcache.c      2003-11-21 02:51:38.000000000 +0300
+@@ -181,6 +181,13 @@
+               spin_unlock(&dcache_lock);
+               return 0;
+       }
++
++      /* network invalidation by Lustre */
++      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++              spin_unlock(&dcache_lock);
++              return 0;
++      }
++
+       /*
+        * Check whether to do a partial shrink_dcache
+        * to get rid of unused child entries.
+@@ -831,13 +838,19 @@
+  * Adds a dentry to the hash according to its name.
+  */
+  
+-void d_rehash(struct dentry * entry)
++void __d_rehash(struct dentry * entry, int lock)
+ {
+       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
+       if (!list_empty(&entry->d_hash)) BUG();
+-      spin_lock(&dcache_lock);
++      if (lock) spin_lock(&dcache_lock);
+       list_add(&entry->d_hash, list);
+-      spin_unlock(&dcache_lock);
++      if (lock) spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(__d_rehash);
++
++void d_rehash(struct dentry * entry)
++{
++      __d_rehash(entry, 1);
+ }
+ 
+ #define do_switch(x,y) do { \
  Index: linux-2.4.19-pre1/fs/exec.c
  ===================================================================
  --- linux-2.4.19-pre1.orig/fs/exec.c   2003-11-21 02:41:00.000000000 +0300
@@ -78,165 +119,6 @@ Index: linux-2.4.19-pre1/fs/exec.c
                 goto close_fail;
   
         retval = binfmt->core_dump(signr, regs, file);
-Index: linux-2.4.19-pre1/fs/dcache.c
-===================================================================
---- linux-2.4.19-pre1.orig/fs/dcache.c 2003-11-21 02:41:00.000000000 +0300
-+++ linux-2.4.19-pre1/fs/dcache.c      2003-11-21 02:51:38.000000000 +0300
-@@ -181,6 +181,13 @@
-               spin_unlock(&dcache_lock);
-               return 0;
-       }
-+
-+      /* network invalidation by Lustre */
-+      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
-+              spin_unlock(&dcache_lock);
-+              return 0;
-+      }
-+
-       /*
-        * Check whether to do a partial shrink_dcache
-        * to get rid of unused child entries.
-@@ -831,13 +838,19 @@
-  * Adds a dentry to the hash according to its name.
-  */
-  
--void d_rehash(struct dentry * entry)
-+void __d_rehash(struct dentry * entry, int lock)
- {
-       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-       if (!list_empty(&entry->d_hash)) BUG();
--      spin_lock(&dcache_lock);
-+      if (lock) spin_lock(&dcache_lock);
-       list_add(&entry->d_hash, list);
--      spin_unlock(&dcache_lock);
-+      if (lock) spin_unlock(&dcache_lock);
-+}
-+EXPORT_SYMBOL(__d_rehash);
-+
-+void d_rehash(struct dentry * entry)
-+{
-+      __d_rehash(entry, 1);
- }
- 
- #define do_switch(x,y) do { \
-Index: linux-2.4.19-pre1/fs/namespace.c
-===================================================================
---- linux-2.4.19-pre1.orig/fs/namespace.c      2003-11-21 02:41:00.000000000 +0300
-+++ linux-2.4.19-pre1/fs/namespace.c   2003-11-21 02:51:38.000000000 +0300
-@@ -107,6 +107,7 @@
- {
-       old_nd->dentry = mnt->mnt_mountpoint;
-       old_nd->mnt = mnt->mnt_parent;
-+      UNPIN(old_nd->dentry, old_nd->mnt, 1);
-       mnt->mnt_parent = mnt;
-       mnt->mnt_mountpoint = mnt->mnt_root;
-       list_del_init(&mnt->mnt_child);
-@@ -118,6 +119,7 @@
- {
-       mnt->mnt_parent = mntget(nd->mnt);
-       mnt->mnt_mountpoint = dget(nd->dentry);
-+      PIN(nd->dentry, nd->mnt, 1);
-       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
-       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
-       nd->dentry->d_mounted++;
-@@ -500,15 +502,18 @@
- {
-       struct nameidata old_nd;
-       struct vfsmount *mnt = NULL;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int err = mount_is_safe(nd);
-       if (err)
-               return err;
-       if (!old_name || !*old_name)
-               return -EINVAL;
-       if (path_init(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd))
--              err = path_walk(old_name, &old_nd);
--      if (err)
-+              err = path_walk_it(old_name, &old_nd, &it);
-+      if (err) {
-+              intent_release(&it);
-               return err;
-+      }
- 
-       down(&mount_sem);
-       err = -EINVAL;
-@@ -531,6 +536,7 @@
-       }
- 
-       up(&mount_sem);
-+      intent_release(&it);
-       path_release(&old_nd);
-       return err;
- }
-@@ -706,6 +712,7 @@
-                 unsigned long flags, void *data_page)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int retval = 0;
-       int mnt_flags = 0;
- 
-@@ -731,9 +738,11 @@
- 
-       /* ... and get the mountpoint */
-       if (path_init(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
--              retval = path_walk(dir_name, &nd);
--      if (retval)
-+              retval = path_walk_it(dir_name, &nd, &it);
-+      if (retval) {
-+              intent_release(&it);
-               return retval;
-+      }
- 
-       if (flags & MS_REMOUNT)
-               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-@@ -745,6 +754,8 @@
-       else
-               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
-                                     dev_name, data_page);
-+
-+      intent_release(&it);
-       path_release(&nd);
-       return retval;
- }
-@@ -830,6 +841,8 @@
- {
-       struct vfsmount *tmp;
-       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
-+      struct lookup_intent new_it = { .it_op = IT_GETATTR };
-+      struct lookup_intent old_it = { .it_op = IT_GETATTR };
-       char *name;
-       int error;
- 
-@@ -844,7 +857,7 @@
-               goto out0;
-       error = 0;
-       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd))
--              error = path_walk(name, &new_nd);
-+              error = path_walk_it(name, &new_nd, &new_it);
-       putname(name);
-       if (error)
-               goto out0;
-@@ -858,7 +871,7 @@
-               goto out1;
-       error = 0;
-       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd))
--              error = path_walk(name, &old_nd);
-+              error = path_walk_it(name, &old_nd, &old_it);
-       putname(name);
-       if (error)
-               goto out1;
-@@ -914,8 +927,10 @@
-       up(&old_nd.dentry->d_inode->i_zombie);
-       up(&mount_sem);
-       path_release(&user_nd);
-+      intent_release(&old_it);
-       path_release(&old_nd);
- out1:
-+      intent_release(&new_it);
-       path_release(&new_nd);
- out0:
-       unlock_kernel();
  Index: linux-2.4.19-pre1/fs/namei.c
  ===================================================================
  --- linux-2.4.19-pre1.orig/fs/namei.c  2003-11-21 02:41:00.000000000 +0300
@@ -370,7 +252,7 @@ Index: linux-2.4.19-pre1/fs/namei.c
                 if (!dentry) {
  -                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
  +                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE,
-+                                              NULL);
++                                           NULL);
                         err = PTR_ERR(dentry);
                         if (IS_ERR(dentry))
                                 break;
@@ -431,7 +313,7 @@ Index: linux-2.4.19-pre1/fs/namei.c
                                 break;
                 }
                 goto return_base;
-@@ -625,21 +663,66 @@
+@@ -625,21 +663,68 @@
                         nd->last_type = LAST_DOT;
                 else if (this.len == 2 && this.name[1] == '.')
                         nd->last_type = LAST_DOTDOT;
@@ -450,7 +332,7 @@ Index: linux-2.4.19-pre1/fs/namei.c
  +                              if (err)
  +                                      break;
  +                              new = real_lookup(dentry->d_parent,
-+                                                &dentry->d_name, 0, NULL);
++                                                &dentry->d_name, 0, it);
  +                              d_invalidate(dentry);
  +                              dput(dentry);
  +                              if (IS_ERR(new)) {
@@ -459,6 +341,8 @@ Index: linux-2.4.19-pre1/fs/namei.c
  +                              }
  +                              nd->dentry = new;
  +                      }
++                      if (!nd->dentry->d_inode)
++                              goto no_inode;
  +              } else
  +              if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
  +                      err = -ESTALE;
@@ -744,12 +628,16 @@ Index: linux-2.4.19-pre1/fs/namei.c
         if (IS_ERR(dentry))
                 goto fail;
         if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1252,7 +1392,16 @@
+@@ -1252,7 +1392,20 @@
                 error = path_walk(tmp, &nd);
         if (error)
                 goto out;
  -      dentry = lookup_create(&nd, 0);
  +
++      if (nd.last_type != LAST_NORM) {
++              error = -EEXIST;
++              goto out2;
++      }
  +      if (nd.dentry->d_inode->i_op->mknod_raw) {
  +              struct inode_operations *op = nd.dentry->d_inode->i_op;
  +              error = op->mknod_raw(&nd, mode, dev);
@@ -770,11 +658,15 @@ Index: linux-2.4.19-pre1/fs/namei.c
         path_release(&nd);
   out:
         putname(tmp);
-@@ -1321,7 +1471,14 @@
+@@ -1321,7 +1471,18 @@
                         error = path_walk(tmp, &nd);
                 if (error)
                         goto out;
  -              dentry = lookup_create(&nd, 1);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out2;
++              }
  +              if (nd.dentry->d_inode->i_op->mkdir_raw) {
  +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
  +                      error = op->mkdir_raw(&nd, mode);
@@ -829,11 +721,15 @@ Index: linux-2.4.19-pre1/fs/namei.c
         error = PTR_ERR(dentry);
         if (!IS_ERR(dentry)) {
                 /* Why not before? Because we want correct error value */
-@@ -1557,15 +1730,23 @@
+@@ -1557,15 +1730,27 @@
                         error = path_walk(to, &nd);
                 if (error)
                         goto out;
  -              dentry = lookup_create(&nd, 0);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out2;
++              }
  +              if (nd.dentry->d_inode->i_op->symlink_raw) {
  +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
  +                      error = op->symlink_raw(&nd, from);
@@ -855,11 +751,15 @@ Index: linux-2.4.19-pre1/fs/namei.c
                 putname(to);
         }
         putname(from);
-@@ -1648,7 +1829,14 @@
+@@ -1648,7 +1829,18 @@
                 error = -EXDEV;
                 if (old_nd.mnt != nd.mnt)
                         goto out_release;
  -              new_dentry = lookup_create(&nd, 0);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out_release;
++              }
  +              if (nd.dentry->d_inode->i_op->link_raw) {
  +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
  +                      error = op->link_raw(&old_nd, &nd);
@@ -981,6 +881,124 @@ Index: linux-2.4.19-pre1/fs/namei.c
         if (page) {
                 kunmap(page);
                 page_cache_release(page);
+Index: linux-2.4.19-pre1/fs/namespace.c
+===================================================================
+--- linux-2.4.19-pre1.orig/fs/namespace.c      2003-11-21 02:41:00.000000000 +0300
++++ linux-2.4.19-pre1/fs/namespace.c   2003-11-21 02:51:38.000000000 +0300
+@@ -107,6 +107,7 @@
+ {
+       old_nd->dentry = mnt->mnt_mountpoint;
+       old_nd->mnt = mnt->mnt_parent;
++      UNPIN(old_nd->dentry, old_nd->mnt, 1);
+       mnt->mnt_parent = mnt;
+       mnt->mnt_mountpoint = mnt->mnt_root;
+       list_del_init(&mnt->mnt_child);
+@@ -118,6 +119,7 @@
+ {
+       mnt->mnt_parent = mntget(nd->mnt);
+       mnt->mnt_mountpoint = dget(nd->dentry);
++      PIN(nd->dentry, nd->mnt, 1);
+       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+       nd->dentry->d_mounted++;
+@@ -500,15 +502,18 @@
+ {
+       struct nameidata old_nd;
+       struct vfsmount *mnt = NULL;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int err = mount_is_safe(nd);
+       if (err)
+               return err;
+       if (!old_name || !*old_name)
+               return -EINVAL;
+       if (path_init(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd))
+-              err = path_walk(old_name, &old_nd);
+-      if (err)
++              err = path_walk_it(old_name, &old_nd, &it);
++      if (err) {
++              intent_release(&it);
+               return err;
++      }
+ 
+       down(&mount_sem);
+       err = -EINVAL;
+@@ -531,6 +536,7 @@
+       }
+ 
+       up(&mount_sem);
++      intent_release(&it);
+       path_release(&old_nd);
+       return err;
+ }
+@@ -706,6 +712,7 @@
+                 unsigned long flags, void *data_page)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int retval = 0;
+       int mnt_flags = 0;
+ 
+@@ -731,9 +738,11 @@
+ 
+       /* ... and get the mountpoint */
+       if (path_init(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
+-              retval = path_walk(dir_name, &nd);
+-      if (retval)
++              retval = path_walk_it(dir_name, &nd, &it);
++      if (retval) {
++              intent_release(&it);
+               return retval;
++      }
+ 
+       if (flags & MS_REMOUNT)
+               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+@@ -745,6 +754,8 @@
+       else
+               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+                                     dev_name, data_page);
++
++      intent_release(&it);
+       path_release(&nd);
+       return retval;
+ }
+@@ -830,6 +841,8 @@
+ {
+       struct vfsmount *tmp;
+       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++      struct lookup_intent new_it = { .it_op = IT_GETATTR };
++      struct lookup_intent old_it = { .it_op = IT_GETATTR };
+       char *name;
+       int error;
+ 
+@@ -844,7 +857,7 @@
+               goto out0;
+       error = 0;
+       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd))
+-              error = path_walk(name, &new_nd);
++              error = path_walk_it(name, &new_nd, &new_it);
+       putname(name);
+       if (error)
+               goto out0;
+@@ -858,7 +871,7 @@
+               goto out1;
+       error = 0;
+       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd))
+-              error = path_walk(name, &old_nd);
++              error = path_walk_it(name, &old_nd, &old_it);
+       putname(name);
+       if (error)
+               goto out1;
+@@ -914,8 +927,10 @@
+       up(&old_nd.dentry->d_inode->i_zombie);
+       up(&mount_sem);
+       path_release(&user_nd);
++      intent_release(&old_it);
+       path_release(&old_nd);
+ out1:
++      intent_release(&new_it);
+       path_release(&new_nd);
+ out0:
+       unlock_kernel();
  Index: linux-2.4.19-pre1/fs/open.c
  ===================================================================
  --- linux-2.4.19-pre1.orig/fs/open.c   2003-11-21 02:41:00.000000000 +0300
@@ -1397,7 +1415,7 @@ Index: linux-2.4.19-pre1/fs/stat.c
  ===================================================================
  --- linux-2.4.19-pre1.orig/fs/stat.c   2003-11-21 02:41:00.000000000 +0300
  +++ linux-2.4.19-pre1/fs/stat.c        2003-11-21 02:51:38.000000000 +0300
-@@ -17,10 +17,14 @@
+@@ -17,10 +17,12 @@
    * Revalidate the inode. This is required for proper NFS attribute caching.
    */
   static __inline__ int
@@ -1406,8 +1424,6 @@ Index: linux-2.4.19-pre1/fs/stat.c
   {
         struct inode * inode = dentry->d_inode;
  -      if (inode->i_op && inode->i_op->revalidate)
-+      if (!inode)
-+              return -ENOENT;
  +      if (inode->i_op && inode->i_op->revalidate_it)
  +              return inode->i_op->revalidate_it(dentry, it);
  +      else if (inode->i_op && inode->i_op->revalidate)
@@ -1661,7 +1677,7 @@ Index: linux-2.4.19-pre1/include/linux/fs.h
   #define ATTR_ATTR_FLAG        1024
  +#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
  +#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET 0x2000
++#define ATTR_CTIME_SET        0x2000
   
   /*
    * This is the Inode Attributes structure, used for notify_change().  It
@@ -1804,18 +1820,25 @@ Index: linux-2.4.19-pre1/include/linux/fs_struct.h
                 dput(old_pwd);
                 mntput(old_pwdmnt);
         }
-Index: linux-2.4.19-pre1/kernel/ksyms.c
+Index: linux-2.4.19-pre1/kernel/exit.c
  ===================================================================
---- linux-2.4.19-pre1.orig/kernel/ksyms.c      2003-11-21 02:51:37.000000000 +0300
-+++ linux-2.4.19-pre1/kernel/ksyms.c   2003-11-21 02:51:38.000000000 +0300
-@@ -260,6 +260,7 @@
- EXPORT_SYMBOL(set_page_dirty);
- EXPORT_SYMBOL(vfs_readlink);
- EXPORT_SYMBOL(vfs_follow_link);
-+EXPORT_SYMBOL(vfs_follow_link_it);
- EXPORT_SYMBOL(page_readlink);
- EXPORT_SYMBOL(page_follow_link);
- EXPORT_SYMBOL(page_symlink_inode_operations);
+--- linux-2.4.19-pre1.orig/kernel/exit.c       2003-11-21 02:41:00.000000000 +0300
++++ linux-2.4.19-pre1/kernel/exit.c    2003-11-21 02:51:38.000000000 +0300
+@@ -245,11 +245,14 @@
+ {
+       /* No need to hold fs->lock if we are killing it */
+       if (atomic_dec_and_test(&fs->count)) {
++              UNPIN(fs->pwd, fs->pwdmnt, 0);
++              UNPIN(fs->root, fs->rootmnt, 1);
+               dput(fs->root);
+               mntput(fs->rootmnt);
+               dput(fs->pwd);
+               mntput(fs->pwdmnt);
+               if (fs->altroot) {
++                      UNPIN(fs->altroot, fs->altrootmnt, 1);
+                       dput(fs->altroot);
+                       mntput(fs->altrootmnt);
+               }
  Index: linux-2.4.19-pre1/kernel/fork.c
  ===================================================================
  --- linux-2.4.19-pre1.orig/kernel/fork.c       2003-11-21 02:41:00.000000000 +0300
@@ -1834,22 +1857,15 @@ Index: linux-2.4.19-pre1/kernel/fork.c
                         fs->altrootmnt = mntget(old->altrootmnt);
                         fs->altroot = dget(old->altroot);
                 } else {
-Index: linux-2.4.19-pre1/kernel/exit.c
+Index: linux-2.4.19-pre1/kernel/ksyms.c
  ===================================================================
---- linux-2.4.19-pre1.orig/kernel/exit.c       2003-11-21 02:41:00.000000000 +0300
-+++ linux-2.4.19-pre1/kernel/exit.c    2003-11-21 02:51:38.000000000 +0300
-@@ -245,11 +245,14 @@
- {
-       /* No need to hold fs->lock if we are killing it */
-       if (atomic_dec_and_test(&fs->count)) {
-+              UNPIN(fs->pwd, fs->pwdmnt, 0);
-+              UNPIN(fs->root, fs->rootmnt, 1);
-               dput(fs->root);
-               mntput(fs->rootmnt);
-               dput(fs->pwd);
-               mntput(fs->pwdmnt);
-               if (fs->altroot) {
-+                      UNPIN(fs->altroot, fs->altrootmnt, 1);
-                       dput(fs->altroot);
-                       mntput(fs->altrootmnt);
-               }
+--- linux-2.4.19-pre1.orig/kernel/ksyms.c      2003-11-21 02:51:37.000000000 +0300
++++ linux-2.4.19-pre1/kernel/ksyms.c   2003-11-21 02:51:38.000000000 +0300
+@@ -260,6 +260,7 @@
+ EXPORT_SYMBOL(set_page_dirty);
+ EXPORT_SYMBOL(vfs_readlink);
+ EXPORT_SYMBOL(vfs_follow_link);
++EXPORT_SYMBOL(vfs_follow_link_it);
+ EXPORT_SYMBOL(page_readlink);
+ EXPORT_SYMBOL(page_follow_link);
+ EXPORT_SYMBOL(page_symlink_inode_operations);
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch

index 7741be4..b6ab3b6 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch
@@ -12,6 +12,47 @@
   kernel/ksyms.c            |    1 
   12 files changed, 558 insertions(+), 128 deletions(-)
  
+Index: linux-2.4.19.SuSE/fs/dcache.c
+===================================================================
+--- linux-2.4.19.SuSE.orig/fs/dcache.c Mon Jan 27 05:08:04 2003
++++ linux-2.4.19.SuSE/fs/dcache.c      Sat Nov 15 17:29:03 2003
+@@ -186,6 +186,13 @@
+               spin_unlock(&dcache_lock);
+               return 0;
+       }
++
++      /* network invalidation by Lustre */
++      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++              spin_unlock(&dcache_lock);
++              return 0;
++      }
++
+       /*
+        * Check whether to do a partial shrink_dcache
+        * to get rid of unused child entries.
+@@ -838,13 +845,19 @@
+  * Adds a dentry to the hash according to its name.
+  */
+  
+-void d_rehash(struct dentry * entry)
++void __d_rehash(struct dentry * entry, int lock)
+ {
+       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
+       if (!list_empty(&entry->d_hash)) BUG();
+-      spin_lock(&dcache_lock);
++      if (lock) spin_lock(&dcache_lock);
+       list_add(&entry->d_hash, list);
+-      spin_unlock(&dcache_lock);
++      if (lock) spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(__d_rehash);
++
++void d_rehash(struct dentry * entry)
++{
++      __d_rehash(entry, 1);
+ }
+ 
+ #define do_switch(x,y) do { \
  Index: linux-2.4.19.SuSE/fs/exec.c
  ===================================================================
  --- linux-2.4.19.SuSE.orig/fs/exec.c   Mon Jan 27 05:08:35 2003
@@ -78,165 +119,6 @@ Index: linux-2.4.19.SuSE/fs/exec.c
                 goto close_fail;
   
         retval = binfmt->core_dump(signr, regs, file);
-Index: linux-2.4.19.SuSE/fs/dcache.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/dcache.c Mon Jan 27 05:08:04 2003
-+++ linux-2.4.19.SuSE/fs/dcache.c      Sat Nov 15 17:29:03 2003
-@@ -186,6 +186,13 @@
-               spin_unlock(&dcache_lock);
-               return 0;
-       }
-+
-+      /* network invalidation by Lustre */
-+      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
-+              spin_unlock(&dcache_lock);
-+              return 0;
-+      }
-+
-       /*
-        * Check whether to do a partial shrink_dcache
-        * to get rid of unused child entries.
-@@ -838,13 +845,19 @@
-  * Adds a dentry to the hash according to its name.
-  */
-  
--void d_rehash(struct dentry * entry)
-+void __d_rehash(struct dentry * entry, int lock)
- {
-       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-       if (!list_empty(&entry->d_hash)) BUG();
--      spin_lock(&dcache_lock);
-+      if (lock) spin_lock(&dcache_lock);
-       list_add(&entry->d_hash, list);
--      spin_unlock(&dcache_lock);
-+      if (lock) spin_unlock(&dcache_lock);
-+}
-+EXPORT_SYMBOL(__d_rehash);
-+
-+void d_rehash(struct dentry * entry)
-+{
-+      __d_rehash(entry, 1);
- }
- 
- #define do_switch(x,y) do { \
-Index: linux-2.4.19.SuSE/fs/namespace.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/namespace.c      Mon Jan 27 05:08:07 2003
-+++ linux-2.4.19.SuSE/fs/namespace.c   Sat Nov 15 17:56:42 2003
-@@ -97,6 +97,7 @@
- {
-       old_nd->dentry = mnt->mnt_mountpoint;
-       old_nd->mnt = mnt->mnt_parent;
-+      UNPIN(old_nd->dentry, old_nd->mnt, 1);
-       mnt->mnt_parent = mnt;
-       mnt->mnt_mountpoint = mnt->mnt_root;
-       list_del_init(&mnt->mnt_child);
-@@ -108,6 +109,7 @@
- {
-       mnt->mnt_parent = mntget(nd->mnt);
-       mnt->mnt_mountpoint = dget(nd->dentry);
-+      PIN(nd->dentry, nd->mnt, 1);
-       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
-       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
-       nd->dentry->d_mounted++;
-@@ -491,15 +493,18 @@
- {
-       struct nameidata old_nd;
-       struct vfsmount *mnt = NULL;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int err = mount_is_safe(nd);
-       if (err)
-               return err;
-       if (!old_name || !*old_name)
-               return -EINVAL;
-       if (path_init(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd))
--              err = path_walk(old_name, &old_nd);
--      if (err)
-+              err = path_walk_it(old_name, &old_nd, &it);
-+      if (err) {
-+              intent_release(&it);
-               return err;
-+      }
- 
-       down_write(&current->namespace->sem);
-       err = -EINVAL;
-@@ -522,6 +527,7 @@
-       }
- 
-       up_write(&current->namespace->sem);
-+      intent_release(&it);
-       path_release(&old_nd);
-       return err;
- }
-@@ -725,6 +731,7 @@
-                 unsigned long flags, void *data_page)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int retval = 0;
-       int mnt_flags = 0;
- 
-@@ -750,9 +757,11 @@
- 
-       /* ... and get the mountpoint */
-       if (path_init(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
--              retval = path_walk(dir_name, &nd);
--      if (retval)
-+              retval = path_walk_it(dir_name, &nd, &it);
-+      if (retval) {
-+              intent_release(&it);
-               return retval;
-+      }
- 
-       if (flags & MS_REMOUNT)
-               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-@@ -764,6 +773,8 @@
-       else
-               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
-                                     dev_name, data_page);
-+
-+      intent_release(&it);
-       path_release(&nd);
-       return retval;
- }
-@@ -929,6 +940,8 @@
- {
-       struct vfsmount *tmp;
-       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
-+      struct lookup_intent new_it = { .it_op = IT_GETATTR };
-+      struct lookup_intent old_it = { .it_op = IT_GETATTR };
-       char *name;
-       int error;
- 
-@@ -943,7 +956,7 @@
-               goto out0;
-       error = 0;
-       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd))
--              error = path_walk(name, &new_nd);
-+              error = path_walk_it(name, &new_nd, &new_it);
-       putname(name);
-       if (error)
-               goto out0;
-@@ -957,7 +970,7 @@
-               goto out1;
-       error = 0;
-       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd))
--              error = path_walk(name, &old_nd);
-+              error = path_walk_it(name, &old_nd, &old_it);
-       putname(name);
-       if (error)
-               goto out1;
-@@ -1013,8 +1026,10 @@
-       up(&old_nd.dentry->d_inode->i_zombie);
-       up_write(&current->namespace->sem);
-       path_release(&user_nd);
-+      intent_release(&old_it);
-       path_release(&old_nd);
- out1:
-+      intent_release(&new_it);
-       path_release(&new_nd);
- out0:
-       unlock_kernel();
  Index: linux-2.4.19.SuSE/fs/namei.c
  ===================================================================
  --- linux-2.4.19.SuSE.orig/fs/namei.c  Mon Jan 27 05:08:07 2003
@@ -357,7 +239,7 @@ Index: linux-2.4.19.SuSE/fs/namei.c
                 }
                 /* This does the actual lookups.. */
  -              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
-+              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE,NULL);
++              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
                 if (!dentry) {
                         err = -EWOULDBLOCKIO;
                         if (atomic)
@@ -433,7 +315,7 @@ Index: linux-2.4.19.SuSE/fs/namei.c
  +                              if (err)
  +                                      break;
  +                              new = real_lookup(dentry->d_parent,
-+                                                &dentry->d_name, 0, NULL);
++                                                &dentry->d_name, 0, it);
  +                              d_invalidate(dentry);
  +                              dput(dentry);
  +                              if (IS_ERR(new)) {
@@ -442,6 +324,8 @@ Index: linux-2.4.19.SuSE/fs/namei.c
  +                              }
  +                              nd->dentry = new;
  +                      }
++                      if (!nd->dentry->d_inode)
++                              goto no_inode;
  +              } else
                 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
                         err = -ESTALE;
@@ -722,12 +606,16 @@ Index: linux-2.4.19.SuSE/fs/namei.c
         if (IS_ERR(dentry))
                 goto fail;
         if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1286,7 +1414,16 @@
+@@ -1286,7 +1414,20 @@
                 error = path_walk(tmp, &nd);
         if (error)
                 goto out;
  -      dentry = lookup_create(&nd, 0);
  +
++      if (nd.last_type != LAST_NORM) {
++              error = -EEXIST;
++              goto out2;
++      }
  +      if (nd.dentry->d_inode->i_op->mknod_raw) {
  +              struct inode_operations *op = nd.dentry->d_inode->i_op;
  +              error = op->mknod_raw(&nd, mode, dev);
@@ -748,11 +636,15 @@ Index: linux-2.4.19.SuSE/fs/namei.c
         path_release(&nd);
   out:
         putname(tmp);
-@@ -1356,7 +1494,14 @@
+@@ -1356,7 +1494,18 @@
                         error = path_walk(tmp, &nd);
                 if (error)
                         goto out;
  -              dentry = lookup_create(&nd, 1);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out2;
++              }
  +              if (nd.dentry->d_inode->i_op->mkdir_raw) {
  +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
  +                      error = op->mkdir_raw(&nd, mode);
@@ -807,11 +699,15 @@ Index: linux-2.4.19.SuSE/fs/namei.c
         error = PTR_ERR(dentry);
         if (!IS_ERR(dentry)) {
                 /* Why not before? Because we want correct error value */
-@@ -1595,15 +1756,23 @@
+@@ -1595,15 +1756,27 @@
                         error = path_walk(to, &nd);
                 if (error)
                         goto out;
  -              dentry = lookup_create(&nd, 0);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out2;
++              }
  +              if (nd.dentry->d_inode->i_op->symlink_raw) {
  +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
  +                      error = op->symlink_raw(&nd, from);
@@ -838,6 +734,10 @@ Index: linux-2.4.19.SuSE/fs/namei.c
                 if (old_nd.mnt != nd.mnt)
                         goto out_release;
  -              new_dentry = lookup_create(&nd, 0);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out_release;
++              }
  +              if (nd.dentry->d_inode->i_op->link_raw) {
  +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
  +                      error = op->link_raw(&old_nd, &nd);
@@ -959,6 +859,124 @@ Index: linux-2.4.19.SuSE/fs/namei.c
         if (page) {
                 kunmap(page);
                 page_cache_release(page);
+Index: linux-2.4.19.SuSE/fs/namespace.c
+===================================================================
+--- linux-2.4.19.SuSE.orig/fs/namespace.c      Mon Jan 27 05:08:07 2003
++++ linux-2.4.19.SuSE/fs/namespace.c   Sat Nov 15 17:56:42 2003
+@@ -97,6 +97,7 @@
+ {
+       old_nd->dentry = mnt->mnt_mountpoint;
+       old_nd->mnt = mnt->mnt_parent;
++      UNPIN(old_nd->dentry, old_nd->mnt, 1);
+       mnt->mnt_parent = mnt;
+       mnt->mnt_mountpoint = mnt->mnt_root;
+       list_del_init(&mnt->mnt_child);
+@@ -108,6 +109,7 @@
+ {
+       mnt->mnt_parent = mntget(nd->mnt);
+       mnt->mnt_mountpoint = dget(nd->dentry);
++      PIN(nd->dentry, nd->mnt, 1);
+       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+       nd->dentry->d_mounted++;
+@@ -491,15 +493,18 @@
+ {
+       struct nameidata old_nd;
+       struct vfsmount *mnt = NULL;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int err = mount_is_safe(nd);
+       if (err)
+               return err;
+       if (!old_name || !*old_name)
+               return -EINVAL;
+       if (path_init(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd))
+-              err = path_walk(old_name, &old_nd);
+-      if (err)
++              err = path_walk_it(old_name, &old_nd, &it);
++      if (err) {
++              intent_release(&it);
+               return err;
++      }
+ 
+       down_write(&current->namespace->sem);
+       err = -EINVAL;
+@@ -522,6 +527,7 @@
+       }
+ 
+       up_write(&current->namespace->sem);
++      intent_release(&it);
+       path_release(&old_nd);
+       return err;
+ }
+@@ -725,6 +731,7 @@
+                 unsigned long flags, void *data_page)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int retval = 0;
+       int mnt_flags = 0;
+ 
+@@ -750,9 +757,11 @@
+ 
+       /* ... and get the mountpoint */
+       if (path_init(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
+-              retval = path_walk(dir_name, &nd);
+-      if (retval)
++              retval = path_walk_it(dir_name, &nd, &it);
++      if (retval) {
++              intent_release(&it);
+               return retval;
++      }
+ 
+       if (flags & MS_REMOUNT)
+               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+@@ -764,6 +773,8 @@
+       else
+               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+                                     dev_name, data_page);
++
++      intent_release(&it);
+       path_release(&nd);
+       return retval;
+ }
+@@ -929,6 +940,8 @@
+ {
+       struct vfsmount *tmp;
+       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++      struct lookup_intent new_it = { .it_op = IT_GETATTR };
++      struct lookup_intent old_it = { .it_op = IT_GETATTR };
+       char *name;
+       int error;
+ 
+@@ -943,7 +956,7 @@
+               goto out0;
+       error = 0;
+       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd))
+-              error = path_walk(name, &new_nd);
++              error = path_walk_it(name, &new_nd, &new_it);
+       putname(name);
+       if (error)
+               goto out0;
+@@ -957,7 +970,7 @@
+               goto out1;
+       error = 0;
+       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd))
+-              error = path_walk(name, &old_nd);
++              error = path_walk_it(name, &old_nd, &old_it);
+       putname(name);
+       if (error)
+               goto out1;
+@@ -1013,8 +1026,10 @@
+       up(&old_nd.dentry->d_inode->i_zombie);
+       up_write(&current->namespace->sem);
+       path_release(&user_nd);
++      intent_release(&old_it);
+       path_release(&old_nd);
+ out1:
++      intent_release(&new_it);
+       path_release(&new_nd);
+ out0:
+       unlock_kernel();
  Index: linux-2.4.19.SuSE/fs/open.c
  ===================================================================
  --- linux-2.4.19.SuSE.orig/fs/open.c   Mon Jan 27 05:08:00 2003
@@ -1377,7 +1395,7 @@ Index: linux-2.4.19.SuSE/fs/stat.c
  ===================================================================
  --- linux-2.4.19.SuSE.orig/fs/stat.c   Mon Jan 27 05:08:00 2003
  +++ linux-2.4.19.SuSE/fs/stat.c        Sat Nov 15 17:29:03 2003
-@@ -17,10 +17,14 @@
+@@ -17,10 +17,16 @@
    * Revalidate the inode. This is required for proper NFS attribute caching.
    */
   static __inline__ int
@@ -1386,8 +1404,6 @@ Index: linux-2.4.19.SuSE/fs/stat.c
   {
         struct inode * inode = dentry->d_inode;
  -      if (inode->i_op && inode->i_op->revalidate)
-+      if (!inode)
-+              return -ENOENT;
  +      if (inode->i_op && inode->i_op->revalidate_it)
  +              return inode->i_op->revalidate_it(dentry, it);
  +      else if (inode->i_op && inode->i_op->revalidate)
@@ -1641,7 +1657,7 @@ Index: linux-2.4.19.SuSE/include/linux/fs.h
   #define ATTR_ATTR_FLAG        1024
  +#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
  +#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET 0x2000
++#define ATTR_CTIME_SET        0x2000
   
   /*
    * This is the Inode Attributes structure, used for notify_change().  It
@@ -1784,18 +1800,25 @@ Index: linux-2.4.19.SuSE/include/linux/fs_struct.h
                 dput(old_pwd);
                 mntput(old_pwdmnt);
         }
-Index: linux-2.4.19.SuSE/kernel/ksyms.c
+Index: linux-2.4.19.SuSE/kernel/exit.c
  ===================================================================
---- linux-2.4.19.SuSE.orig/kernel/ksyms.c      Sat Nov 15 17:24:46 2003
-+++ linux-2.4.19.SuSE/kernel/ksyms.c   Sat Nov 15 17:29:03 2003
-@@ -315,6 +315,7 @@
- EXPORT_SYMBOL(set_page_dirty);
- EXPORT_SYMBOL(vfs_readlink);
- EXPORT_SYMBOL(vfs_follow_link);
-+EXPORT_SYMBOL(vfs_follow_link_it);
- EXPORT_SYMBOL(page_readlink);
- EXPORT_SYMBOL(page_follow_link);
- EXPORT_SYMBOL(page_symlink_inode_operations);
+--- linux-2.4.19.SuSE.orig/kernel/exit.c       Mon Jan 27 05:08:16 2003
++++ linux-2.4.19.SuSE/kernel/exit.c    Sat Nov 15 17:29:03 2003
+@@ -288,11 +288,14 @@
+ {
+       /* No need to hold fs->lock if we are killing it */
+       if (atomic_dec_and_test(&fs->count)) {
++              UNPIN(fs->pwd, fs->pwdmnt, 0);
++              UNPIN(fs->root, fs->rootmnt, 1);
+               dput(fs->root);
+               mntput(fs->rootmnt);
+               dput(fs->pwd);
+               mntput(fs->pwdmnt);
+               if (fs->altroot) {
++                      UNPIN(fs->altroot, fs->altrootmnt, 1);
+                       dput(fs->altroot);
+                       mntput(fs->altrootmnt);
+               }
  Index: linux-2.4.19.SuSE/kernel/fork.c
  ===================================================================
  --- linux-2.4.19.SuSE.orig/kernel/fork.c       Mon Jan 27 05:08:56 2003
@@ -1814,22 +1837,15 @@ Index: linux-2.4.19.SuSE/kernel/fork.c
                         fs->altrootmnt = mntget(old->altrootmnt);
                         fs->altroot = dget(old->altroot);
                 } else {
-Index: linux-2.4.19.SuSE/kernel/exit.c
+Index: linux-2.4.19.SuSE/kernel/ksyms.c
  ===================================================================
---- linux-2.4.19.SuSE.orig/kernel/exit.c       Mon Jan 27 05:08:16 2003
-+++ linux-2.4.19.SuSE/kernel/exit.c    Sat Nov 15 17:29:03 2003
-@@ -288,11 +288,14 @@
- {
-       /* No need to hold fs->lock if we are killing it */
-       if (atomic_dec_and_test(&fs->count)) {
-+              UNPIN(fs->pwd, fs->pwdmnt, 0);
-+              UNPIN(fs->root, fs->rootmnt, 1);
-               dput(fs->root);
-               mntput(fs->rootmnt);
-               dput(fs->pwd);
-               mntput(fs->pwdmnt);
-               if (fs->altroot) {
-+                      UNPIN(fs->altroot, fs->altrootmnt, 1);
-                       dput(fs->altroot);
-                       mntput(fs->altrootmnt);
-               }
+--- linux-2.4.19.SuSE.orig/kernel/ksyms.c      Sat Nov 15 17:24:46 2003
++++ linux-2.4.19.SuSE/kernel/ksyms.c   Sat Nov 15 17:29:03 2003
+@@ -315,6 +315,7 @@
+ EXPORT_SYMBOL(set_page_dirty);
+ EXPORT_SYMBOL(vfs_readlink);
+ EXPORT_SYMBOL(vfs_follow_link);
++EXPORT_SYMBOL(vfs_follow_link_it);
+ EXPORT_SYMBOL(page_readlink);
+ EXPORT_SYMBOL(page_follow_link);
+ EXPORT_SYMBOL(page_symlink_inode_operations);
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.20-hp.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.20-hp.patch

index 8585701..424d90e 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.4.20-hp.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.20-hp.patch
@@ -13,6 +13,47 @@
   kernel/ksyms.c            |    1 
   13 files changed, 591 insertions(+), 133 deletions(-)
  
+Index: linux/fs/dcache.c
+===================================================================
+--- linux.orig/fs/dcache.c     Thu Nov 28 18:53:15 2002
++++ linux/fs/dcache.c  Wed Mar 17 13:11:25 2004
+@@ -181,6 +181,13 @@
+               spin_unlock(&dcache_lock);
+               return 0;
+       }
++
++      /* network invalidation by Lustre */
++      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++              spin_unlock(&dcache_lock);
++              return 0;
++      }
++
+       /*
+        * Check whether to do a partial shrink_dcache
+        * to get rid of unused child entries.
+@@ -830,13 +837,19 @@
+  * Adds a dentry to the hash according to its name.
+  */
+  
+-void d_rehash(struct dentry * entry)
++void __d_rehash(struct dentry * entry, int lock)
+ {
+       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
+       if (!list_empty(&entry->d_hash)) BUG();
+-      spin_lock(&dcache_lock);
++      if (lock) spin_lock(&dcache_lock);
+       list_add(&entry->d_hash, list);
+-      spin_unlock(&dcache_lock);
++      if (lock) spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(__d_rehash);
++
++void d_rehash(struct dentry * entry)
++{
++      __d_rehash(entry, 1);
+ }
+ 
+ #define do_switch(x,y) do { \
  Index: linux/fs/exec.c
  ===================================================================
  --- linux.orig/fs/exec.c       Wed Mar 17 13:00:38 2004
@@ -78,164 +119,6 @@ Index: linux/fs/exec.c
                 goto close_fail;
   
         retval = binfmt->core_dump(signr, regs, file);
-Index: linux/fs/dcache.c
-===================================================================
---- linux.orig/fs/dcache.c     Thu Nov 28 18:53:15 2002
-+++ linux/fs/dcache.c  Wed Mar 17 13:11:25 2004
-@@ -181,6 +181,13 @@
-               spin_unlock(&dcache_lock);
-               return 0;
-       }
-+
-+      /* network invalidation by Lustre */
-+      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
-+              spin_unlock(&dcache_lock);
-+              return 0;
-+      }
-+
-       /*
-        * Check whether to do a partial shrink_dcache
-        * to get rid of unused child entries.
-@@ -830,13 +837,19 @@
-  * Adds a dentry to the hash according to its name.
-  */
-  
--void d_rehash(struct dentry * entry)
-+void __d_rehash(struct dentry * entry, int lock)
- {
-       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-       if (!list_empty(&entry->d_hash)) BUG();
--      spin_lock(&dcache_lock);
-+      if (lock) spin_lock(&dcache_lock);
-       list_add(&entry->d_hash, list);
--      spin_unlock(&dcache_lock);
-+      if (lock) spin_unlock(&dcache_lock);
-+}
-+EXPORT_SYMBOL(__d_rehash);
-+
-+void d_rehash(struct dentry * entry)
-+{
-+      __d_rehash(entry, 1);
- }
- 
- #define do_switch(x,y) do { \
-Index: linux/fs/namespace.c
-===================================================================
---- linux.orig/fs/namespace.c  Thu Nov 28 18:53:15 2002
-+++ linux/fs/namespace.c       Wed Mar 17 13:11:25 2004
-@@ -99,6 +99,7 @@
- {
-       old_nd->dentry = mnt->mnt_mountpoint;
-       old_nd->mnt = mnt->mnt_parent;
-+      UNPIN(old_nd->dentry, old_nd->mnt, 1);
-       mnt->mnt_parent = mnt;
-       mnt->mnt_mountpoint = mnt->mnt_root;
-       list_del_init(&mnt->mnt_child);
-@@ -110,6 +111,7 @@
- {
-       mnt->mnt_parent = mntget(nd->mnt);
-       mnt->mnt_mountpoint = dget(nd->dentry);
-+      PIN(nd->dentry, nd->mnt, 1);
-       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
-       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
-       nd->dentry->d_mounted++;
-@@ -485,14 +487,17 @@
- {
-       struct nameidata old_nd;
-       struct vfsmount *mnt = NULL;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int err = mount_is_safe(nd);
-       if (err)
-               return err;
-       if (!old_name || !*old_name)
-               return -EINVAL;
--      err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
--      if (err)
-+      err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
-+      if (err) {
-+              intent_release(&it);
-               return err;
-+      }
- 
-       down_write(&current->namespace->sem);
-       err = -EINVAL;
-@@ -515,6 +520,7 @@
-       }
- 
-       up_write(&current->namespace->sem);
-+      intent_release(&it);
-       path_release(&old_nd);
-       return err;
- }
-@@ -698,6 +704,7 @@
-                 unsigned long flags, void *data_page)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int retval = 0;
-       int mnt_flags = 0;
- 
-@@ -722,10 +729,11 @@
-       flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
- 
-       /* ... and get the mountpoint */
--      retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
--      if (retval)
-+      retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
-+      if (retval) {
-+              intent_release(&it);
-               return retval;
--
-+      }
-       if (flags & MS_REMOUNT)
-               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-                                   data_page);
-@@ -736,6 +744,8 @@
-       else
-               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
-                                     dev_name, data_page);
-+
-+      intent_release(&it);
-       path_release(&nd);
-       return retval;
- }
-@@ -901,6 +911,8 @@
- {
-       struct vfsmount *tmp;
-       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
-+      struct lookup_intent new_it = { .it_op = IT_GETATTR };
-+      struct lookup_intent old_it = { .it_op = IT_GETATTR };
-       int error;
- 
-       if (!capable(CAP_SYS_ADMIN))
-@@ -908,14 +920,14 @@
- 
-       lock_kernel();
- 
--      error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
-+      error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
-       if (error)
-               goto out0;
-       error = -EINVAL;
-       if (!check_mnt(new_nd.mnt))
-               goto out1;
- 
--      error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
-+      error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
-       if (error)
-               goto out1;
- 
-@@ -970,8 +982,10 @@
-       up(&old_nd.dentry->d_inode->i_zombie);
-       up_write(&current->namespace->sem);
-       path_release(&user_nd);
-+      intent_release(&old_it);
-       path_release(&old_nd);
- out1:
-+      intent_release(&new_it);
-       path_release(&new_nd);
- out0:
-       unlock_kernel();
  Index: linux/fs/namei.c
  ===================================================================
  --- linux.orig/fs/namei.c      Wed Mar 17 13:00:37 2004
@@ -487,7 +370,7 @@ Index: linux/fs/namei.c
  +                              if (err)
  +                                      break;
  +                              new = real_lookup(dentry->d_parent,
-+                                                &dentry->d_name, 0, NULL);
++                                                &dentry->d_name, 0, it);
  +                              d_invalidate(dentry);
  +                              dput(dentry);
  +                              if (IS_ERR(new)) {
@@ -1065,6 +948,123 @@ Index: linux/fs/namei.c
         if (page) {
                 kunmap(page);
                 page_cache_release(page);
+Index: linux/fs/namespace.c
+===================================================================
+--- linux.orig/fs/namespace.c  Thu Nov 28 18:53:15 2002
++++ linux/fs/namespace.c       Wed Mar 17 13:11:25 2004
+@@ -99,6 +99,7 @@
+ {
+       old_nd->dentry = mnt->mnt_mountpoint;
+       old_nd->mnt = mnt->mnt_parent;
++      UNPIN(old_nd->dentry, old_nd->mnt, 1);
+       mnt->mnt_parent = mnt;
+       mnt->mnt_mountpoint = mnt->mnt_root;
+       list_del_init(&mnt->mnt_child);
+@@ -110,6 +111,7 @@
+ {
+       mnt->mnt_parent = mntget(nd->mnt);
+       mnt->mnt_mountpoint = dget(nd->dentry);
++      PIN(nd->dentry, nd->mnt, 1);
+       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+       nd->dentry->d_mounted++;
+@@ -485,14 +487,17 @@
+ {
+       struct nameidata old_nd;
+       struct vfsmount *mnt = NULL;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int err = mount_is_safe(nd);
+       if (err)
+               return err;
+       if (!old_name || !*old_name)
+               return -EINVAL;
+-      err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
+-      if (err)
++      err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
++      if (err) {
++              intent_release(&it);
+               return err;
++      }
+ 
+       down_write(&current->namespace->sem);
+       err = -EINVAL;
+@@ -515,6 +520,7 @@
+       }
+ 
+       up_write(&current->namespace->sem);
++      intent_release(&it);
+       path_release(&old_nd);
+       return err;
+ }
+@@ -698,6 +704,7 @@
+                 unsigned long flags, void *data_page)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int retval = 0;
+       int mnt_flags = 0;
+ 
+@@ -722,10 +729,11 @@
+       flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
+ 
+       /* ... and get the mountpoint */
+-      retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
+-      if (retval)
++      retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
++      if (retval) {
++              intent_release(&it);
+               return retval;
+-
++      }
+       if (flags & MS_REMOUNT)
+               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+                                   data_page);
+@@ -736,6 +744,8 @@
+       else
+               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+                                     dev_name, data_page);
++
++      intent_release(&it);
+       path_release(&nd);
+       return retval;
+ }
+@@ -901,6 +911,8 @@
+ {
+       struct vfsmount *tmp;
+       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++      struct lookup_intent new_it = { .it_op = IT_GETATTR };
++      struct lookup_intent old_it = { .it_op = IT_GETATTR };
+       int error;
+ 
+       if (!capable(CAP_SYS_ADMIN))
+@@ -908,14 +920,14 @@
+ 
+       lock_kernel();
+ 
+-      error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
++      error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
+       if (error)
+               goto out0;
+       error = -EINVAL;
+       if (!check_mnt(new_nd.mnt))
+               goto out1;
+ 
+-      error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
++      error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
+       if (error)
+               goto out1;
+ 
+@@ -970,8 +982,10 @@
+       up(&old_nd.dentry->d_inode->i_zombie);
+       up_write(&current->namespace->sem);
+       path_release(&user_nd);
++      intent_release(&old_it);
+       path_release(&old_nd);
+ out1:
++      intent_release(&new_it);
+       path_release(&new_nd);
+ out0:
+       unlock_kernel();
  Index: linux/fs/open.c
  ===================================================================
  --- linux.orig/fs/open.c       Thu Nov 28 18:53:15 2002
@@ -1467,6 +1467,20 @@ Index: linux/fs/open.c
   /*
    * Find an empty file descriptor entry, and mark it busy.
    */
+Index: linux/fs/proc/base.c
+===================================================================
+--- linux.orig/fs/proc/base.c  Wed Mar 17 13:00:35 2004
++++ linux/fs/proc/base.c       Wed Mar 17 13:11:25 2004
+@@ -481,6 +481,9 @@
+ 
+       error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
+       nd->last_type = LAST_BIND;
++
++      if (nd->intent != NULL)
++              nd->intent->d.lustre.it_int_flags |= IT_FL_FOLLOWED;
+ out:
+       return error;
+ }
  Index: linux/fs/stat.c
  ===================================================================
  --- linux.orig/fs/stat.c       Thu Sep 13 19:04:43 2001
@@ -1628,20 +1642,6 @@ Index: linux/fs/stat.c
                 if (!err)
                         err = cp_new_stat64(dentry->d_inode, statbuf);
                 fput(f);
-Index: linux/fs/proc/base.c
-===================================================================
---- linux.orig/fs/proc/base.c  Wed Mar 17 13:00:35 2004
-+++ linux/fs/proc/base.c       Wed Mar 17 13:11:25 2004
-@@ -481,6 +481,9 @@
- 
-       error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
-       nd->last_type = LAST_BIND;
-+
-+      if (nd->intent != NULL)
-+              nd->intent->d.lustre.it_int_flags |= IT_FL_FOLLOWED;
- out:
-       return error;
- }
  Index: linux/include/linux/dcache.h
  ===================================================================
  --- linux.orig/include/linux/dcache.h  Thu Nov 28 18:53:15 2002
@@ -1747,7 +1747,7 @@ Index: linux/include/linux/fs.h
   #define ATTR_ATTR_FLAG        1024
  +#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
  +#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET 0x2000
++#define ATTR_CTIME_SET        0x2000
   
   /*
    * This is the Inode Attributes structure, used for notify_change().  It
@@ -1890,18 +1890,25 @@ Index: linux/include/linux/fs_struct.h
                 dput(old_pwd);
                 mntput(old_pwdmnt);
         }
-Index: linux/kernel/ksyms.c
+Index: linux/kernel/exit.c
  ===================================================================
---- linux.orig/kernel/ksyms.c  Wed Mar 17 13:11:23 2004
-+++ linux/kernel/ksyms.c       Wed Mar 17 13:11:25 2004
-@@ -315,6 +315,7 @@
- EXPORT_SYMBOL(set_page_dirty);
- EXPORT_SYMBOL(vfs_readlink);
- EXPORT_SYMBOL(vfs_follow_link);
-+EXPORT_SYMBOL(vfs_follow_link_it);
- EXPORT_SYMBOL(page_readlink);
- EXPORT_SYMBOL(page_follow_link);
- EXPORT_SYMBOL(page_symlink_inode_operations);
+--- linux.orig/kernel/exit.c   Wed Mar 17 13:00:38 2004
++++ linux/kernel/exit.c        Wed Mar 17 13:11:25 2004
+@@ -239,11 +239,14 @@
+ {
+       /* No need to hold fs->lock if we are killing it */
+       if (atomic_dec_and_test(&fs->count)) {
++              UNPIN(fs->pwd, fs->pwdmnt, 0);
++              UNPIN(fs->root, fs->rootmnt, 1);
+               dput(fs->root);
+               mntput(fs->rootmnt);
+               dput(fs->pwd);
+               mntput(fs->pwdmnt);
+               if (fs->altroot) {
++                      UNPIN(fs->altroot, fs->altrootmnt, 1);
+                       dput(fs->altroot);
+                       mntput(fs->altrootmnt);
+               }
  Index: linux/kernel/fork.c
  ===================================================================
  --- linux.orig/kernel/fork.c   Wed Mar 17 13:00:38 2004
@@ -1920,22 +1927,15 @@ Index: linux/kernel/fork.c
                         fs->altrootmnt = mntget(old->altrootmnt);
                         fs->altroot = dget(old->altroot);
                 } else {
-Index: linux/kernel/exit.c
+Index: linux/kernel/ksyms.c
  ===================================================================
---- linux.orig/kernel/exit.c   Wed Mar 17 13:00:38 2004
-+++ linux/kernel/exit.c        Wed Mar 17 13:11:25 2004
-@@ -239,11 +239,14 @@
- {
-       /* No need to hold fs->lock if we are killing it */
-       if (atomic_dec_and_test(&fs->count)) {
-+              UNPIN(fs->pwd, fs->pwdmnt, 0);
-+              UNPIN(fs->root, fs->rootmnt, 1);
-               dput(fs->root);
-               mntput(fs->rootmnt);
-               dput(fs->pwd);
-               mntput(fs->pwdmnt);
-               if (fs->altroot) {
-+                      UNPIN(fs->altroot, fs->altrootmnt, 1);
-                       dput(fs->altroot);
-                       mntput(fs->altrootmnt);
-               }
+--- linux.orig/kernel/ksyms.c  Wed Mar 17 13:11:23 2004
++++ linux/kernel/ksyms.c       Wed Mar 17 13:11:25 2004
+@@ -315,6 +315,7 @@
+ EXPORT_SYMBOL(set_page_dirty);
+ EXPORT_SYMBOL(vfs_readlink);
+ EXPORT_SYMBOL(vfs_follow_link);
++EXPORT_SYMBOL(vfs_follow_link_it);
+ EXPORT_SYMBOL(page_readlink);
+ EXPORT_SYMBOL(page_follow_link);
+ EXPORT_SYMBOL(page_symlink_inode_operations);
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch

index 409c8f0..37bf227 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch
@@ -286,7 +286,7 @@ Index: linux-2.4.20/fs/namei.c
   {
         struct dentry *dentry;
         struct inode *inode;
-@@ -526,19 +579,18 @@
+@@ -526,18 +579,18 @@
                                 break;
                 }
                 /* This does the actual lookups.. */
@@ -304,11 +304,10 @@ Index: linux-2.4.20/fs/namei.c
                 }
                 /* Check mountpoints.. */
  -              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
--                      ;
-+              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL));
++              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL))
+                       ;
   
                 err = -ENOENT;
-               inode = dentry->d_inode;
  @@ -549,7 +601,7 @@
                         goto out_dput;
   
@@ -377,7 +376,7 @@ Index: linux-2.4.20/fs/namei.c
  +                              if (err)
  +                                      break;
  +                              new = real_lookup(dentry->d_parent,
-+                                                &dentry->d_name, 0, NULL);
++                                                &dentry->d_name, 0, it);
  +                              d_invalidate(dentry);
  +                              dput(dentry);
  +                              if (IS_ERR(new)) {
@@ -1011,7 +1010,7 @@ Index: linux-2.4.20/fs/namespace.c
         int retval = 0;
         int mnt_flags = 0;
   
-@@ -722,10 +729,11 @@
+@@ -722,9 +729,11 @@
         flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
   
         /* ... and get the mountpoint */
@@ -1021,11 +1020,10 @@ Index: linux-2.4.20/fs/namespace.c
  +      if (retval) {
  +              intent_release(&it);
                 return retval;
--
  +      }
+ 
         if (flags & MS_REMOUNT)
                 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-                                   data_page);
  @@ -736,6 +744,8 @@
         else
                 retval = do_add_mount(&nd, type_page, flags, mnt_flags,
@@ -1283,7 +1281,7 @@ Index: linux-2.4.20/fs/open.c
  -      error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
  -                    LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
  +      error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
-+                    LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it);
++                             LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it);
         if (error)
                 goto out;
   
@@ -1680,7 +1678,7 @@ Index: linux-2.4.20/include/linux/fs.h
   #define ATTR_ATTR_FLAG        1024
  +#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
  +#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET 0x2000
++#define ATTR_CTIME_SET        0x2000
   
   /*
    * This is the Inode Attributes structure, used for notify_change().  It
@@ -1823,18 +1821,25 @@ Index: linux-2.4.20/include/linux/fs_struct.h
                 dput(old_pwd);
                 mntput(old_pwdmnt);
         }
-Index: linux-2.4.20/kernel/ksyms.c
+Index: linux-2.4.20/kernel/exit.c
  ===================================================================
---- linux-2.4.20.orig/kernel/ksyms.c   Wed Mar 17 13:57:11 2004
-+++ linux-2.4.20/kernel/ksyms.c        Wed Mar 17 13:57:11 2004
-@@ -297,6 +297,7 @@
- EXPORT_SYMBOL(set_page_dirty);
- EXPORT_SYMBOL(vfs_readlink);
- EXPORT_SYMBOL(vfs_follow_link);
-+EXPORT_SYMBOL(vfs_follow_link_it);
- EXPORT_SYMBOL(page_readlink);
- EXPORT_SYMBOL(page_follow_link);
- EXPORT_SYMBOL(page_symlink_inode_operations);
+--- linux-2.4.20.orig/kernel/exit.c    Wed Mar 17 13:57:05 2004
++++ linux-2.4.20/kernel/exit.c Wed Mar 17 13:57:11 2004
+@@ -345,11 +345,14 @@
+ {
+       /* No need to hold fs->lock if we are killing it */
+       if (atomic_dec_and_test(&fs->count)) {
++              UNPIN(fs->pwd, fs->pwdmnt, 0);
++              UNPIN(fs->root, fs->rootmnt, 1);
+               dput(fs->root);
+               mntput(fs->rootmnt);
+               dput(fs->pwd);
+               mntput(fs->pwdmnt);
+               if (fs->altroot) {
++                      UNPIN(fs->altroot, fs->altrootmnt, 1);
+                       dput(fs->altroot);
+                       mntput(fs->altrootmnt);
+               }
  Index: linux-2.4.20/kernel/fork.c
  ===================================================================
  --- linux-2.4.20.orig/kernel/fork.c    Wed Mar 17 13:57:05 2004
@@ -1853,22 +1858,15 @@ Index: linux-2.4.20/kernel/fork.c
                         fs->altrootmnt = mntget(old->altrootmnt);
                         fs->altroot = dget(old->altroot);
                 } else {
-Index: linux-2.4.20/kernel/exit.c
+Index: linux-2.4.20/kernel/ksyms.c
  ===================================================================
---- linux-2.4.20.orig/kernel/exit.c    Wed Mar 17 13:57:05 2004
-+++ linux-2.4.20/kernel/exit.c Wed Mar 17 13:57:11 2004
-@@ -345,11 +345,14 @@
- {
-       /* No need to hold fs->lock if we are killing it */
-       if (atomic_dec_and_test(&fs->count)) {
-+              UNPIN(fs->pwd, fs->pwdmnt, 0);
-+              UNPIN(fs->root, fs->rootmnt, 1);
-               dput(fs->root);
-               mntput(fs->rootmnt);
-               dput(fs->pwd);
-               mntput(fs->pwdmnt);
-               if (fs->altroot) {
-+                      UNPIN(fs->altroot, fs->altrootmnt, 1);
-                       dput(fs->altroot);
-                       mntput(fs->altrootmnt);
-               }
+--- linux-2.4.20.orig/kernel/ksyms.c   Wed Mar 17 13:57:11 2004
++++ linux-2.4.20/kernel/ksyms.c        Wed Mar 17 13:57:11 2004
+@@ -297,6 +297,7 @@
+ EXPORT_SYMBOL(set_page_dirty);
+ EXPORT_SYMBOL(vfs_readlink);
+ EXPORT_SYMBOL(vfs_follow_link);
++EXPORT_SYMBOL(vfs_follow_link_it);
+ EXPORT_SYMBOL(page_readlink);
+ EXPORT_SYMBOL(page_follow_link);
+ EXPORT_SYMBOL(page_symlink_inode_operations);
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch

index 1ef0b01..dd293f1 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch
@@ -12,6 +12,47 @@
   kernel/ksyms.c            |    1 
   12 files changed, 558 insertions(+), 128 deletions(-)
  
+Index: linux-2.4.24/fs/dcache.c
+===================================================================
+--- linux-2.4.24.orig/fs/dcache.c      Fri Jun 13 07:51:37 2003
++++ linux-2.4.24/fs/dcache.c   Wed Mar 17 17:36:14 2004
+@@ -181,6 +181,13 @@ int d_invalidate(struct dentry * dentry)
+               spin_unlock(&dcache_lock);
+               return 0;
+       }
++
++      /* network invalidation by Lustre */
++      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++              spin_unlock(&dcache_lock);
++              return 0;
++      }
++
+       /*
+        * Check whether to do a partial shrink_dcache
+        * to get rid of unused child entries.
+@@ -830,13 +837,19 @@ void d_delete(struct dentry * dentry)
+  * Adds a dentry to the hash according to its name.
+  */
+  
+-void d_rehash(struct dentry * entry)
++void __d_rehash(struct dentry * entry, int lock)
+ {
+       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
+       if (!list_empty(&entry->d_hash)) BUG();
+-      spin_lock(&dcache_lock);
++      if (lock) spin_lock(&dcache_lock);
+       list_add(&entry->d_hash, list);
+-      spin_unlock(&dcache_lock);
++      if (lock) spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(__d_rehash);
++
++void d_rehash(struct dentry * entry)
++{
++      __d_rehash(entry, 1);
+ }
+ 
+ #define do_switch(x,y) do { \
  Index: linux-2.4.24/fs/exec.c
  ===================================================================
  --- linux-2.4.24.orig/fs/exec.c        Fri Nov 28 10:26:21 2003
@@ -77,164 +118,6 @@ Index: linux-2.4.24/fs/exec.c
                 goto close_fail;
   
         retval = binfmt->core_dump(signr, regs, file);
-Index: linux-2.4.24/fs/dcache.c
-===================================================================
---- linux-2.4.24.orig/fs/dcache.c      Fri Jun 13 07:51:37 2003
-+++ linux-2.4.24/fs/dcache.c   Wed Mar 17 17:36:14 2004
-@@ -181,6 +181,13 @@ int d_invalidate(struct dentry * dentry)
-               spin_unlock(&dcache_lock);
-               return 0;
-       }
-+
-+      /* network invalidation by Lustre */
-+      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
-+              spin_unlock(&dcache_lock);
-+              return 0;
-+      }
-+
-       /*
-        * Check whether to do a partial shrink_dcache
-        * to get rid of unused child entries.
-@@ -830,13 +837,19 @@ void d_delete(struct dentry * dentry)
-  * Adds a dentry to the hash according to its name.
-  */
-  
--void d_rehash(struct dentry * entry)
-+void __d_rehash(struct dentry * entry, int lock)
- {
-       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-       if (!list_empty(&entry->d_hash)) BUG();
--      spin_lock(&dcache_lock);
-+      if (lock) spin_lock(&dcache_lock);
-       list_add(&entry->d_hash, list);
--      spin_unlock(&dcache_lock);
-+      if (lock) spin_unlock(&dcache_lock);
-+}
-+EXPORT_SYMBOL(__d_rehash);
-+
-+void d_rehash(struct dentry * entry)
-+{
-+      __d_rehash(entry, 1);
- }
- 
- #define do_switch(x,y) do { \
-Index: linux-2.4.24/fs/namespace.c
-===================================================================
---- linux-2.4.24.orig/fs/namespace.c   Fri Nov 28 10:26:21 2003
-+++ linux-2.4.24/fs/namespace.c        Wed Mar 17 17:36:14 2004
-@@ -98,6 +98,7 @@ static void detach_mnt(struct vfsmount *
- {
-       old_nd->dentry = mnt->mnt_mountpoint;
-       old_nd->mnt = mnt->mnt_parent;
-+      UNPIN(old_nd->dentry, old_nd->mnt, 1);
-       mnt->mnt_parent = mnt;
-       mnt->mnt_mountpoint = mnt->mnt_root;
-       list_del_init(&mnt->mnt_child);
-@@ -109,6 +110,7 @@ static void attach_mnt(struct vfsmount *
- {
-       mnt->mnt_parent = mntget(nd->mnt);
-       mnt->mnt_mountpoint = dget(nd->dentry);
-+      PIN(nd->dentry, nd->mnt, 1);
-       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
-       list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
-       nd->dentry->d_mounted++;
-@@ -488,14 +490,17 @@ static int do_loopback(struct nameidata 
- {
-       struct nameidata old_nd;
-       struct vfsmount *mnt = NULL;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int err = mount_is_safe(nd);
-       if (err)
-               return err;
-       if (!old_name || !*old_name)
-               return -EINVAL;
--      err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
--      if (err)
-+      err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
-+      if (err) {
-+              intent_release(&it);
-               return err;
-+      }
- 
-       down_write(&current->namespace->sem);
-       err = -EINVAL;
-@@ -518,6 +523,7 @@ static int do_loopback(struct nameidata 
-       }
- 
-       up_write(&current->namespace->sem);
-+      intent_release(&it);
-       path_release(&old_nd);
-       return err;
- }
-@@ -701,6 +707,7 @@ long do_mount(char * dev_name, char * di
-                 unsigned long flags, void *data_page)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int retval = 0;
-       int mnt_flags = 0;
- 
-@@ -725,10 +732,11 @@ long do_mount(char * dev_name, char * di
-       flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
- 
-       /* ... and get the mountpoint */
--      retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
--      if (retval)
-+      retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
-+      if (retval) {
-+              intent_release(&it);
-               return retval;
--
-+      }
-       if (flags & MS_REMOUNT)
-               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-                                   data_page);
-@@ -739,6 +747,8 @@ long do_mount(char * dev_name, char * di
-       else
-               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
-                                     dev_name, data_page);
-+
-+      intent_release(&it);
-       path_release(&nd);
-       return retval;
- }
-@@ -904,6 +914,8 @@ asmlinkage long sys_pivot_root(const cha
- {
-       struct vfsmount *tmp;
-       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
-+      struct lookup_intent new_it = { .it_op = IT_GETATTR };
-+      struct lookup_intent old_it = { .it_op = IT_GETATTR };
-       int error;
- 
-       if (!capable(CAP_SYS_ADMIN))
-@@ -911,14 +923,14 @@ asmlinkage long sys_pivot_root(const cha
- 
-       lock_kernel();
- 
--      error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
-+      error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
-       if (error)
-               goto out0;
-       error = -EINVAL;
-       if (!check_mnt(new_nd.mnt))
-               goto out1;
- 
--      error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
-+      error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
-       if (error)
-               goto out1;
- 
-@@ -973,8 +985,10 @@ out2:
-       up(&old_nd.dentry->d_inode->i_zombie);
-       up_write(&current->namespace->sem);
-       path_release(&user_nd);
-+      intent_release(&old_it);
-       path_release(&old_nd);
- out1:
-+      intent_release(&new_it);
-       path_release(&new_nd);
- out0:
-       unlock_kernel();
  Index: linux-2.4.24/fs/namei.c
  ===================================================================
  --- linux-2.4.24.orig/fs/namei.c       Mon Aug 25 04:44:43 2003
@@ -424,7 +307,7 @@ Index: linux-2.4.24/fs/namei.c
  +                              if (err)
  +                                      break;
  +                              new = real_lookup(dentry->d_parent,
-+                                                &dentry->d_name, 0, NULL);
++                                                &dentry->d_name, 0, it);
  +                              d_invalidate(dentry);
  +                              dput(dentry);
  +                              if (IS_ERR(new)) {
@@ -985,6 +868,122 @@ Index: linux-2.4.24/fs/namei.c
         if (page) {
                 kunmap(page);
                 page_cache_release(page);
+Index: linux-2.4.24/fs/namespace.c
+===================================================================
+--- linux-2.4.24.orig/fs/namespace.c   Fri Nov 28 10:26:21 2003
++++ linux-2.4.24/fs/namespace.c        Wed Mar 17 17:36:14 2004
+@@ -98,6 +98,7 @@ static void detach_mnt(struct vfsmount *
+ {
+       old_nd->dentry = mnt->mnt_mountpoint;
+       old_nd->mnt = mnt->mnt_parent;
++      UNPIN(old_nd->dentry, old_nd->mnt, 1);
+       mnt->mnt_parent = mnt;
+       mnt->mnt_mountpoint = mnt->mnt_root;
+       list_del_init(&mnt->mnt_child);
+@@ -109,6 +110,7 @@ static void attach_mnt(struct vfsmount *
+ {
+       mnt->mnt_parent = mntget(nd->mnt);
+       mnt->mnt_mountpoint = dget(nd->dentry);
++      PIN(nd->dentry, nd->mnt, 1);
+       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+       list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+       nd->dentry->d_mounted++;
+@@ -488,14 +490,17 @@ static int do_loopback(struct nameidata 
+ {
+       struct nameidata old_nd;
+       struct vfsmount *mnt = NULL;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int err = mount_is_safe(nd);
+       if (err)
+               return err;
+       if (!old_name || !*old_name)
+               return -EINVAL;
+-      err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
+-      if (err)
++      err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
++      if (err) {
++              intent_release(&it);
+               return err;
++      }
+ 
+       down_write(&current->namespace->sem);
+       err = -EINVAL;
+@@ -518,6 +523,7 @@ static int do_loopback(struct nameidata 
+       }
+ 
+       up_write(&current->namespace->sem);
++      intent_release(&it);
+       path_release(&old_nd);
+       return err;
+ }
+@@ -701,6 +707,7 @@ long do_mount(char * dev_name, char * di
+                 unsigned long flags, void *data_page)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int retval = 0;
+       int mnt_flags = 0;
+ 
+@@ -725,9 +732,11 @@ long do_mount(char * dev_name, char * di
+       flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
+ 
+       /* ... and get the mountpoint */
+-      retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
+-      if (retval)
++      retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
++      if (retval) {
++              intent_release(&it);
+               return retval;
++      }
+ 
+       if (flags & MS_REMOUNT)
+               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+@@ -739,6 +747,8 @@ long do_mount(char * dev_name, char * di
+       else
+               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+                                     dev_name, data_page);
++
++      intent_release(&it);
+       path_release(&nd);
+       return retval;
+ }
+@@ -904,6 +914,8 @@ asmlinkage long sys_pivot_root(const cha
+ {
+       struct vfsmount *tmp;
+       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++      struct lookup_intent new_it = { .it_op = IT_GETATTR };
++      struct lookup_intent old_it = { .it_op = IT_GETATTR };
+       int error;
+ 
+       if (!capable(CAP_SYS_ADMIN))
+@@ -911,14 +923,14 @@ asmlinkage long sys_pivot_root(const cha
+ 
+       lock_kernel();
+ 
+-      error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
++      error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
+       if (error)
+               goto out0;
+       error = -EINVAL;
+       if (!check_mnt(new_nd.mnt))
+               goto out1;
+ 
+-      error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
++      error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
+       if (error)
+               goto out1;
+ 
+@@ -973,8 +985,10 @@ out2:
+       up(&old_nd.dentry->d_inode->i_zombie);
+       up_write(&current->namespace->sem);
+       path_release(&user_nd);
++      intent_release(&old_it);
+       path_release(&old_nd);
+ out1:
++      intent_release(&new_it);
+       path_release(&new_nd);
+ out0:
+       unlock_kernel();
  Index: linux-2.4.24/fs/open.c
  ===================================================================
  --- linux-2.4.24.orig/fs/open.c        Mon Aug 25 04:44:43 2003
@@ -1196,7 +1195,7 @@ Index: linux-2.4.24/fs/open.c
  -      error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
  -                    LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
  +      error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
-+                    LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it);
++                             LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it);
         if (error)
                 goto out;
   
@@ -1653,7 +1652,7 @@ Index: linux-2.4.24/include/linux/fs.h
   #define ATTR_ATTR_FLAG        1024
  +#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
  +#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET 0x2000
++#define ATTR_CTIME_SET        0x2000
   
   /*
    * This is the Inode Attributes structure, used for notify_change().  It
@@ -1796,18 +1795,25 @@ Index: linux-2.4.24/include/linux/fs_struct.h
                 dput(old_pwd);
                 mntput(old_pwdmnt);
         }
-Index: linux-2.4.24/kernel/ksyms.c
+Index: linux-2.4.24/kernel/exit.c
  ===================================================================
---- linux-2.4.24.orig/kernel/ksyms.c   Wed Mar 17 17:36:14 2004
-+++ linux-2.4.24/kernel/ksyms.c        Wed Mar 17 17:36:14 2004
-@@ -275,6 +275,7 @@ EXPORT_SYMBOL(set_page_dirty);
- EXPORT_SYMBOL(mark_page_accessed);
- EXPORT_SYMBOL(vfs_readlink);
- EXPORT_SYMBOL(vfs_follow_link);
-+EXPORT_SYMBOL(vfs_follow_link_it);
- EXPORT_SYMBOL(page_readlink);
- EXPORT_SYMBOL(page_follow_link);
- EXPORT_SYMBOL(page_symlink_inode_operations);
+--- linux-2.4.24.orig/kernel/exit.c    Thu Nov 28 15:53:15 2002
++++ linux-2.4.24/kernel/exit.c Wed Mar 17 17:36:14 2004
+@@ -238,11 +238,14 @@ static inline void __put_fs_struct(struc
+ {
+       /* No need to hold fs->lock if we are killing it */
+       if (atomic_dec_and_test(&fs->count)) {
++              UNPIN(fs->pwd, fs->pwdmnt, 0);
++              UNPIN(fs->root, fs->rootmnt, 1);
+               dput(fs->root);
+               mntput(fs->rootmnt);
+               dput(fs->pwd);
+               mntput(fs->pwdmnt);
+               if (fs->altroot) {
++                      UNPIN(fs->altroot, fs->altrootmnt, 1);
+                       dput(fs->altroot);
+                       mntput(fs->altrootmnt);
+               }
  Index: linux-2.4.24/kernel/fork.c
  ===================================================================
  --- linux-2.4.24.orig/kernel/fork.c    Fri Nov 28 10:26:21 2003
@@ -1826,22 +1832,15 @@ Index: linux-2.4.24/kernel/fork.c
                         fs->altrootmnt = mntget(old->altrootmnt);
                         fs->altroot = dget(old->altroot);
                 } else {
-Index: linux-2.4.24/kernel/exit.c
+Index: linux-2.4.24/kernel/ksyms.c
  ===================================================================
---- linux-2.4.24.orig/kernel/exit.c    Thu Nov 28 15:53:15 2002
-+++ linux-2.4.24/kernel/exit.c Wed Mar 17 17:36:14 2004
-@@ -238,11 +238,14 @@ static inline void __put_fs_struct(struc
- {
-       /* No need to hold fs->lock if we are killing it */
-       if (atomic_dec_and_test(&fs->count)) {
-+              UNPIN(fs->pwd, fs->pwdmnt, 0);
-+              UNPIN(fs->root, fs->rootmnt, 1);
-               dput(fs->root);
-               mntput(fs->rootmnt);
-               dput(fs->pwd);
-               mntput(fs->pwdmnt);
-               if (fs->altroot) {
-+                      UNPIN(fs->altroot, fs->altrootmnt, 1);
-                       dput(fs->altroot);
-                       mntput(fs->altrootmnt);
-               }
+--- linux-2.4.24.orig/kernel/ksyms.c   Wed Mar 17 17:36:14 2004
++++ linux-2.4.24/kernel/ksyms.c        Wed Mar 17 17:36:14 2004
+@@ -275,6 +275,7 @@ EXPORT_SYMBOL(set_page_dirty);
+ EXPORT_SYMBOL(mark_page_accessed);
+ EXPORT_SYMBOL(vfs_readlink);
+ EXPORT_SYMBOL(vfs_follow_link);
++EXPORT_SYMBOL(vfs_follow_link_it);
+ EXPORT_SYMBOL(page_readlink);
+ EXPORT_SYMBOL(page_follow_link);
+ EXPORT_SYMBOL(page_symlink_inode_operations);
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.21-chaos.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.21-chaos.patch

index 09ef2f9..0026514 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.4.21-chaos.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.21-chaos.patch
@@ -309,11 +309,11 @@ Index: linux-ia64/fs/namei.c
  +                      if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
  +                              struct dentry *new;
  +                              err = permission(dentry->d_parent->d_inode,
-+                                              MAY_EXEC);
++                                               MAY_EXEC);
  +                              if (err)
  +                                      break;
  +                              new = real_lookup(dentry->d_parent,
-+                                              &dentry->d_name, 0, NULL);
++                                                &dentry->d_name, 0, it);
  +                              d_invalidate(dentry);
  +                              dput(dentry);
  +                              if (IS_ERR(new)) {
@@ -554,7 +554,7 @@ Index: linux-ia64/fs/namei.c
                 if (!IS_POSIXACL(dir->d_inode))
                         mode &= ~current->fs->umask;
  -              error = vfs_create(dir->d_inode, dentry, mode);
-+              error = vfs_create_it(dir->d_inode, dentry, mode, it);
++              error = vfs_create_it(dir->d_inode, dentry, mode, it);
                 up(&dir->d_inode->i_sem);
                 dput(nd->dentry);
                 nd->dentry = dentry;
@@ -930,7 +930,7 @@ Index: linux-ia64/fs/namespace.c
         int retval = 0;
         int mnt_flags = 0;
   
-@@ -725,10 +732,11 @@ long do_mount(char * dev_name, char * di
+@@ -725,9 +732,11 @@ long do_mount(char * dev_name, char * di
         flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
   
         /* ... and get the mountpoint */
@@ -940,11 +940,10 @@ Index: linux-ia64/fs/namespace.c
  +      if (retval) {
  +              intent_release(&it);
                 return retval;
--
  +      }
+ 
         if (flags & MS_REMOUNT)
                 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-                                   data_page);
  @@ -739,6 +747,8 @@ long do_mount(char * dev_name, char * di
         else
                 retval = do_add_mount(&nd, type_page, flags, mnt_flags,
@@ -1202,7 +1201,7 @@ Index: linux-ia64/fs/open.c
  -      error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
  -                    LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
  +      error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
-+                    LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it);
++                             LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it);
         if (error)
                 goto out;
   
@@ -1661,7 +1660,7 @@ Index: linux-ia64/include/linux/fs.h
   #define ATTR_ATTR_FLAG        1024
  +#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
  +#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET 0x2000
++#define ATTR_CTIME_SET        0x2000
   
   /*
    * This is the Inode Attributes structure, used for notify_change().  It
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.21-sles8sp3.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.21-sles8sp3.patch

new file mode 100644 (file)

index 0000000..2ff2de8
--- /dev/null
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.21-sles8sp3.patch
@@ -0,0 +1,1862 @@
+ fs/dcache.c               |   19 ++
+ fs/exec.c                 |   17 +-
+ fs/namei.c                |  295 +++++++++++++++++++++++++++++++++++++++-------
+ fs/namespace.c            |   28 +++-
+ fs/open.c                 |  172 +++++++++++++++++++-------
+ fs/stat.c                 |   52 +++++---
+ include/linux/dcache.h    |   60 +++++++++
+ include/linux/fs.h        |   32 ++++
+ include/linux/fs_struct.h |    4 
+ kernel/exit.c             |    3 
+ kernel/fork.c             |    3 
+ kernel/ksyms.c            |    1 
+ 12 files changed, 558 insertions(+), 128 deletions(-)
+
+Index: linux-2.4.21/fs/dcache.c
+===================================================================
+--- linux-2.4.21.orig/fs/dcache.c      2004-04-24 02:38:00.000000000 -0400
++++ linux-2.4.21/fs/dcache.c   2004-04-26 19:06:31.000000000 -0400
+@@ -186,6 +186,13 @@
+               spin_unlock(&dcache_lock);
+               return 0;
+       }
++
++      /* network invalidation by Lustre */
++      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++              spin_unlock(&dcache_lock);
++              return 0;
++      }
++
+       /*
+        * Check whether to do a partial shrink_dcache
+        * to get rid of unused child entries.
+@@ -838,13 +845,19 @@
+  * Adds a dentry to the hash according to its name.
+  */
+  
+-void d_rehash(struct dentry * entry)
++void __d_rehash(struct dentry * entry, int lock)
+ {
+       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
+       if (!list_empty(&entry->d_hash)) BUG();
+-      spin_lock(&dcache_lock);
++      if (lock) spin_lock(&dcache_lock);
+       list_add(&entry->d_hash, list);
+-      spin_unlock(&dcache_lock);
++      if (lock) spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(__d_rehash);
++
++void d_rehash(struct dentry * entry)
++{
++      __d_rehash(entry, 1);
+ }
+ 
+ #define do_switch(x,y) do { \
+Index: linux-2.4.21/fs/exec.c
+===================================================================
+--- linux-2.4.21.orig/fs/exec.c        2004-04-24 02:39:01.000000000 -0400
++++ linux-2.4.21/fs/exec.c     2004-04-26 19:06:31.000000000 -0400
+@@ -113,8 +113,10 @@
+       struct file * file;
+       struct nameidata nd;
+       int error;
++      struct lookup_intent it = { .it_op = IT_OPEN,
++                                  .it_flags = FMODE_READ|FMODE_EXEC };
+ 
+-      error = user_path_walk(library, &nd);
++      error = user_path_walk_it(library, &nd, &it);
+       if (error)
+               goto out;
+ 
+@@ -126,7 +128,8 @@
+       if (error)
+               goto exit;
+ 
+-      file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
++      file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++      intent_release(&it);
+       error = PTR_ERR(file);
+       if (IS_ERR(file))
+               goto out;
+@@ -383,8 +386,10 @@
+       struct inode *inode;
+       struct file *file;
+       int err = 0;
++      struct lookup_intent it = { .it_op = IT_OPEN,
++                                  .it_flags = FMODE_READ|FMODE_EXEC };
+ 
+-      err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
++      err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
+       file = ERR_PTR(err);
+       if (!err) {
+               inode = nd.dentry->d_inode;
+@@ -396,7 +401,8 @@
+                               err = -EACCES;
+                       file = ERR_PTR(err);
+                       if (!err) {
+-                              file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
++                              file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++                              intent_release(&it);
+                               if (!IS_ERR(file)) {
+                                       err = deny_write_access(file);
+                                       if (err) {
+@@ -408,6 +414,7 @@
+                               return file;
+                       }
+               }
++              intent_release(&it);
+               path_release(&nd);
+       }
+       goto out;
+@@ -1147,7 +1154,7 @@
+               goto close_fail;
+       if (!file->f_op->write)
+               goto close_fail;
+-      if (do_truncate(file->f_dentry, 0) != 0)
++      if (do_truncate(file->f_dentry, 0, 0) != 0)
+               goto close_fail;
+ 
+       retval = binfmt->core_dump(signr, regs, file);
+Index: linux-2.4.21/fs/namei.c
+===================================================================
+--- linux-2.4.21.orig/fs/namei.c       2004-04-24 02:39:02.000000000 -0400
++++ linux-2.4.21/fs/namei.c    2004-04-26 19:06:38.000000000 -0400
+@@ -94,6 +94,13 @@
+  * XEmacs seems to be relying on it...
+  */
+ 
++void intent_release(struct lookup_intent *it)
++{
++      if (it && it->it_op_release)
++              it->it_op_release(it);
++
++}
++
+ /* In order to reduce some races, while at the same time doing additional
+  * checking and hopefully speeding things up, we copy filenames to the
+  * kernel data space before using them..
+@@ -274,10 +281,19 @@
+  * Internal lookup() using the new generic dcache.
+  * SMP-safe
+  */
+-static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
++static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name,
++                                  int flags, struct lookup_intent *it)
+ {
+       struct dentry * dentry = d_lookup(parent, name);
+ 
++      if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
++              if (!dentry->d_op->d_revalidate_it(dentry, flags, it) &&
++                  !d_invalidate(dentry)) {
++                      dput(dentry);
++                      dentry = NULL;
++              }
++              return dentry;
++      } else
+       if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+               if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
+                       dput(dentry);
+@@ -295,11 +311,15 @@
+  * make sure that nobody added the entry to the dcache in the meantime..
+  * SMP-safe
+  */
+-static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
++static struct dentry *real_lookup(struct dentry *parent, struct qstr *name,
++                                int flags, struct lookup_intent *it)
+ {
+       struct dentry * result;
+       struct inode *dir = parent->d_inode;
++      int counter = 0;
+ 
++again:
++      counter++;
+       down(&dir->i_sem);
+       /*
+        * First re-do the cached lookup just in case it was created
+@@ -314,6 +334,9 @@
+               result = ERR_PTR(-ENOMEM);
+               if (dentry) {
+                       lock_kernel();
++                      if (dir->i_op->lookup_it)
++                              result = dir->i_op->lookup_it(dir, dentry, it, flags);
++                      else
+                       result = dir->i_op->lookup(dir, dentry);
+                       unlock_kernel();
+                       if (result)
+@@ -335,6 +358,15 @@
+                       dput(result);
+                       result = ERR_PTR(-ENOENT);
+               }
++      } else if (result->d_op && result->d_op->d_revalidate_it) {
++              if (!result->d_op->d_revalidate_it(result, flags, it) &&
++                  !d_invalidate(result)) {
++                      dput(result);
++                      if (counter > 10)
++                              result = ERR_PTR(-ESTALE);
++                      if (!IS_ERR(result))
++                              goto again;
++              }
+       }
+       return result;
+ }
+@@ -346,7 +378,8 @@
+  * Without that kind of total limit, nasty chains of consecutive
+  * symlinks can cause almost arbitrarily long lookups. 
+  */
+-static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
++static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd,
++                               struct lookup_intent *it)
+ {
+       int err;
+       if (current->link_count >= 8)
+@@ -360,10 +393,12 @@
+       current->link_count++;
+       current->total_link_count++;
+       UPDATE_ATIME(dentry->d_inode);
++      nd->intent = it;
+       err = dentry->d_inode->i_op->follow_link(dentry, nd);
+       current->link_count--;
+       return err;
+ loop:
++      intent_release(it);
+       path_release(nd);
+       return -ELOOP;
+ }
+@@ -462,7 +497,8 @@
+  * We expect 'base' to be positive and a directory.
+  */
+ static inline int __attribute__((always_inline))
+-__link_path_walk(const char * name, struct nameidata *nd)
++__link_path_walk_it(const char * name, struct nameidata *nd,
++                  struct lookup_intent *it)
+ {
+       struct dentry *dentry;
+       struct inode *inode;
+@@ -539,12 +575,12 @@
+                               break;
+               }
+               /* This does the actual lookups.. */
+-              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
++              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
+               if (!dentry) {
+                       err = -EWOULDBLOCKIO;
+                       if (atomic)
+                               break;
+-                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
++                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
+                       err = PTR_ERR(dentry);
+                       if (IS_ERR(dentry))
+                               break;
+@@ -562,7 +598,7 @@
+                       goto out_dput;
+ 
+               if (inode->i_op->follow_link) {
+-                      err = do_follow_link(dentry, nd);
++                      err = do_follow_link(dentry, nd, NULL);
+                       dput(dentry);
+                       if (err)
+                               goto return_err;
+@@ -578,7 +614,7 @@
+                       nd->dentry = dentry;
+               }
+               err = -ENOTDIR; 
+-              if (!inode->i_op->lookup)
++              if (!inode->i_op->lookup && !inode->i_op->lookup_it)
+                       break;
+               continue;
+               /* here ends the main loop */
+@@ -605,12 +641,12 @@
+                       if (err < 0)
+                               break;
+               }
+-              dentry = cached_lookup(nd->dentry, &this, 0);
++              dentry = cached_lookup(nd->dentry, &this, 0, it);
+               if (!dentry) {
+                       err = -EWOULDBLOCKIO;
+                       if (atomic)
+                               break;
+-                      dentry = real_lookup(nd->dentry, &this, 0);
++                      dentry = real_lookup(nd->dentry, &this, 0, it);
+                       err = PTR_ERR(dentry);
+                       if (IS_ERR(dentry))
+                               break;
+@@ -620,7 +656,7 @@
+               inode = dentry->d_inode;
+               if ((lookup_flags & LOOKUP_FOLLOW)
+                   && inode && inode->i_op && inode->i_op->follow_link) {
+-                      err = do_follow_link(dentry, nd);
++                      err = do_follow_link(dentry, nd, it);
+                       dput(dentry);
+                       if (err)
+                               goto return_err;
+@@ -634,7 +670,8 @@
+                       goto no_inode;
+               if (lookup_flags & LOOKUP_DIRECTORY) {
+                       err = -ENOTDIR; 
+-                      if (!inode->i_op || !inode->i_op->lookup)
++                      if (!inode->i_op ||
++                          (!inode->i_op->lookup && !inode->i_op->lookup_it))
+                               break;
+               }
+               goto return_base;
+@@ -658,6 +695,27 @@
+                * Check the cached dentry for staleness.
+                */
+               dentry = nd->dentry;
++              if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
++                      err = -ESTALE;
++                      if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
++                              struct dentry *new;
++                              err = permission(dentry->d_parent->d_inode,
++                                               MAY_EXEC);
++                              if (err)
++                                      break;
++                              new = real_lookup(dentry->d_parent,
++                                                &dentry->d_name, 0, it);
++                              d_invalidate(dentry);
++                              dput(dentry);
++                              if (IS_ERR(new)) {
++                                      err = PTR_ERR(new);
++                                      break;
++                              }
++                              nd->dentry = new;
++                      }
++                      if (!nd->dentry->d_inode)
++                              goto no_inode;
++              } else
+               if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+                       err = -ESTALE;
+                       if (!dentry->d_op->d_revalidate(dentry, lookup_flags & LOOKUP_PARENT)) {
+@@ -671,6 +729,8 @@
+               dput(dentry);
+               break;
+       }
++      if (err)
++              intent_release(it);
+       path_release(nd);
+ return_err:
+       return err;
+@@ -678,13 +738,13 @@
+ 
+ int link_path_walk(const char * name, struct nameidata *nd)
+ {
+-      return __link_path_walk(name,nd);
++      return __link_path_walk_it(name, nd, NULL);
+ }
+ 
+ static inline int __path_walk(const char * name, struct nameidata *nd)
+ {
+       current->total_link_count = 0;
+-      return __link_path_walk(name, nd);
++      return __link_path_walk_it(name, nd, NULL);
+ }
+ 
+ int path_walk(const char * name, struct nameidata *nd)
+@@ -692,6 +752,12 @@
+       return __path_walk(name, nd);
+ }
+ 
++int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it)
++{
++      current->total_link_count = 0;
++      return __link_path_walk_it(name, nd, it);
++}
++
+ /* SMP-safe */
+ /* returns 1 if everything is done */
+ static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
+@@ -774,6 +840,17 @@
+ }
+ 
+ /* SMP-safe */
++int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd,
++                 struct lookup_intent *it)
++{
++      int error = 0;
++      if (path_init(path, flags, nd))
++              error = path_walk_it(path, nd, it);
++      return error;
++}
++
++
++/* SMP-safe */
+ int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
+ {
+       int error = 0;
+@@ -788,6 +865,7 @@
+ {
+       nd->last_type = LAST_ROOT; /* if there are only slashes... */
+       nd->flags = flags;
++      nd->intent = NULL;
+       if (*name=='/')
+               return walk_init_root(name,nd);
+       read_lock(&current->fs->lock);
+@@ -802,7 +880,8 @@
+  * needs parent already locked. Doesn't follow mounts.
+  * SMP-safe.
+  */
+-struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
++struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base,
++                             struct lookup_intent *it)
+ {
+       struct dentry * dentry;
+       struct inode *inode;
+@@ -825,13 +904,16 @@
+                       goto out;
+       }
+ 
+-      dentry = cached_lookup(base, name, 0);
++      dentry = cached_lookup(base, name, 0, it);
+       if (!dentry) {
+               struct dentry *new = d_alloc(base, name);
+               dentry = ERR_PTR(-ENOMEM);
+               if (!new)
+                       goto out;
+               lock_kernel();
++              if (inode->i_op->lookup_it)
++                      dentry = inode->i_op->lookup_it(inode, new, it, 0);
++              else
+               dentry = inode->i_op->lookup(inode, new);
+               unlock_kernel();
+               if (!dentry)
+@@ -843,6 +925,12 @@
+       return dentry;
+ }
+ 
++struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
++{
++      return lookup_hash_it(name, base, NULL);
++}
++
++
+ /* SMP-safe */
+ struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
+ {
+@@ -864,7 +952,7 @@
+       }
+       this.hash = end_name_hash(hash);
+ 
+-      return lookup_hash(&this, base);
++      return lookup_hash_it(&this, base, NULL);
+ access:
+       return ERR_PTR(-EACCES);
+ }
+@@ -895,6 +983,23 @@
+       return err;
+ }
+ 
++int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd,
++                 struct lookup_intent *it)
++{
++      char *tmp;
++      int err;
++
++      tmp = getname(name);
++      err = PTR_ERR(tmp);
++      if (!IS_ERR(tmp)) {
++              err = 0;
++              if (path_init(tmp, flags, nd))
++                      err = path_walk_it(tmp, nd, it);
++              putname(tmp);
++      }
++      return err;
++}
++
+ /*
+  * It's inline, so penalty for filesystems that don't use sticky bit is
+  * minimal.
+@@ -992,7 +1097,8 @@
+       return retval;
+ }
+ 
+-int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode,
++                       struct lookup_intent *it)
+ {
+       int error;
+ 
+@@ -1005,12 +1111,15 @@
+               goto exit_lock;
+ 
+       error = -EACCES;        /* shouldn't it be ENOSYS? */
+-      if (!dir->i_op || !dir->i_op->create)
++      if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it))
+               goto exit_lock;
+ 
+       DQUOT_INIT(dir);
+       lock_kernel();
+-      error = dir->i_op->create(dir, dentry, mode);
++      if (dir->i_op->create_it)
++              error = dir->i_op->create_it(dir, dentry, mode, it);
++      else
++              error = dir->i_op->create(dir, dentry, mode);
+       unlock_kernel();
+ exit_lock:
+       up(&dir->i_zombie);
+@@ -1019,6 +1128,11 @@
+       return error;
+ }
+ 
++int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++{
++      return vfs_create_it(dir, dentry, mode, NULL);
++}
++
+ /*
+  *    open_namei()
+  *
+@@ -1033,7 +1147,8 @@
+  * for symlinks (where the permissions are checked later).
+  * SMP-safe
+  */
+-int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
++int open_namei_it(const char *pathname, int flag, int mode,
++                struct nameidata *nd, struct lookup_intent *it)
+ {
+       int acc_mode, error = 0;
+       struct inode *inode;
+@@ -1043,11 +1158,14 @@
+ 
+       acc_mode = ACC_MODE(flag);
+ 
++      if (it)
++              it->it_flags = flag;
++
+       /*
+        * The simplest case - just a plain lookup.
+        */
+       if (!(flag & O_CREAT)) {
+-              error = path_lookup(pathname, lookup_flags(flag), nd);
++              error = path_lookup_it(pathname, lookup_flags(flag), nd, it);
+               if (error)
+                       return error;
+               dentry = nd->dentry;
+@@ -1057,6 +1175,10 @@
+       /*
+        * Create - we need to know the parent.
+        */
++      if (it) {
++              it->it_create_mode = mode;
++              it->it_op |= IT_CREAT;
++      }
+       error = path_lookup(pathname, LOOKUP_PARENT, nd);
+       if (error)
+               return error;
+@@ -1072,7 +1194,7 @@
+ 
+       dir = nd->dentry;
+       down(&dir->d_inode->i_sem);
+-      dentry = lookup_hash(&nd->last, nd->dentry);
++      dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+ 
+ do_last:
+       error = PTR_ERR(dentry);
+@@ -1081,11 +1203,12 @@
+               goto exit;
+       }
+ 
++      it->it_create_mode = mode;
+       /* Negative dentry, just create the file */
+       if (!dentry->d_inode) {
+               if (!IS_POSIXACL(dir->d_inode))
+                       mode &= ~current->fs->umask;
+-              error = vfs_create(dir->d_inode, dentry, mode);
++              error = vfs_create_it(dir->d_inode, dentry, mode, it);
+               up(&dir->d_inode->i_sem);
+ #ifndef DENTRY_WASTE_RAM
+               if (error)
+@@ -1193,7 +1316,7 @@
+               if (!error) {
+                       DQUOT_INIT(inode);
+                       
+-                      error = do_truncate(dentry, 0);
++                      error = do_truncate(dentry, 0, 1);
+               }
+               put_write_access(inode);
+               if (error)
+@@ -1205,8 +1328,10 @@
+       return 0;
+ 
+ exit_dput:
++      intent_release(it);
+       dput(dentry);
+ exit:
++      intent_release(it);
+       path_release(nd);
+       return error;
+ 
+@@ -1225,7 +1350,10 @@
+        * are done. Procfs-like symlinks just set LAST_BIND.
+        */
+       UPDATE_ATIME(dentry->d_inode);
++      nd->intent = it;
+       error = dentry->d_inode->i_op->follow_link(dentry, nd);
++      if (error)
++              intent_release(it);
+       dput(dentry);
+       if (error)
+               return error;
+@@ -1247,13 +1375,20 @@
+       }
+       dir = nd->dentry;
+       down(&dir->d_inode->i_sem);
+-      dentry = lookup_hash(&nd->last, nd->dentry);
++      dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+       putname(nd->last.name);
+       goto do_last;
+ }
+ 
++int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd)
++{
++      return open_namei_it(pathname, flag, mode, nd, NULL);
++}
++
++
+ /* SMP-safe */
+-struct dentry *lookup_create(struct nameidata *nd, int is_dir)
++struct dentry *lookup_create(struct nameidata *nd, int is_dir,
++                                  struct lookup_intent *it)
+ {
+       struct dentry *dentry;
+ 
+@@ -1261,7 +1396,7 @@
+       dentry = ERR_PTR(-EEXIST);
+       if (nd->last_type != LAST_NORM)
+               goto fail;
+-      dentry = lookup_hash(&nd->last, nd->dentry);
++      dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+       if (IS_ERR(dentry))
+               goto fail;
+       if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
+@@ -1317,7 +1452,20 @@
+       error = path_lookup(tmp, LOOKUP_PARENT, &nd);
+       if (error)
+               goto out;
+-      dentry = lookup_create(&nd, 0);
++
++      if (nd.last_type != LAST_NORM) {
++              error = -EEXIST;
++              goto out2;
++      }
++      if (nd.dentry->d_inode->i_op->mknod_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->mknod_raw(&nd, mode, dev);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto out2;
++      }
++
++      dentry = lookup_create(&nd, 0, NULL);
+       error = PTR_ERR(dentry);
+ 
+       if (!IS_POSIXACL(nd.dentry->d_inode))
+@@ -1339,6 +1487,7 @@
+               dput(dentry);
+       }
+       up(&nd.dentry->d_inode->i_sem);
++out2:
+       path_release(&nd);
+ out:
+       putname(tmp);
+@@ -1386,7 +1535,18 @@
+               error = path_lookup(tmp, LOOKUP_PARENT, &nd);
+               if (error)
+                       goto out;
+-              dentry = lookup_create(&nd, 1);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out2;
++              }
++              if (nd.dentry->d_inode->i_op->mkdir_raw) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->mkdir_raw(&nd, mode);
++                      /* the file system wants to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out2;
++              }
++              dentry = lookup_create(&nd, 1, NULL);
+               error = PTR_ERR(dentry);
+               if (!IS_ERR(dentry)) {
+                       if (!IS_POSIXACL(nd.dentry->d_inode))
+@@ -1395,6 +1555,7 @@
+                       dput(dentry);
+               }
+               up(&nd.dentry->d_inode->i_sem);
++out2:
+               path_release(&nd);
+ out:
+               putname(tmp);
+@@ -1495,8 +1656,16 @@
+                       error = -EBUSY;
+                       goto exit1;
+       }
++      if (nd.dentry->d_inode->i_op->rmdir_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++              error = op->rmdir_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
+       down(&nd.dentry->d_inode->i_sem);
+-      dentry = lookup_hash(&nd.last, nd.dentry);
++      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+       error = PTR_ERR(dentry);
+       if (!IS_ERR(dentry)) {
+               error = vfs_rmdir(nd.dentry->d_inode, dentry);
+@@ -1554,8 +1723,15 @@
+       error = -EISDIR;
+       if (nd.last_type != LAST_NORM)
+               goto exit1;
++      if (nd.dentry->d_inode->i_op->unlink_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->unlink_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
+       down(&nd.dentry->d_inode->i_sem);
+-      dentry = lookup_hash(&nd.last, nd.dentry);
++      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+       error = PTR_ERR(dentry);
+       if (!IS_ERR(dentry)) {
+               /* Why not before? Because we want correct error value */
+@@ -1622,15 +1798,27 @@
+               error = path_lookup(to, LOOKUP_PARENT, &nd);
+               if (error)
+                       goto out;
+-              dentry = lookup_create(&nd, 0);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out2;
++              }
++              if (nd.dentry->d_inode->i_op->symlink_raw) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->symlink_raw(&nd, from);
++                      /* the file system wants to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out2;
++              }
++              dentry = lookup_create(&nd, 0, NULL);
+               error = PTR_ERR(dentry);
+               if (!IS_ERR(dentry)) {
+                       error = vfs_symlink(nd.dentry->d_inode, dentry, from);
+                       dput(dentry);
+               }
+               up(&nd.dentry->d_inode->i_sem);
++      out2:
+               path_release(&nd);
+-out:
++      out:
+               putname(to);
+       }
+       putname(from);
+@@ -1706,7 +1894,18 @@
+               error = -EXDEV;
+               if (old_nd.mnt != nd.mnt)
+                       goto out_release;
+-              new_dentry = lookup_create(&nd, 0);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out_release;
++              }
++              if (nd.dentry->d_inode->i_op->link_raw) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->link_raw(&old_nd, &nd);
++                      /* the file system wants to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out_release;
++              }
++              new_dentry = lookup_create(&nd, 0, NULL);
+               error = PTR_ERR(new_dentry);
+               if (!IS_ERR(new_dentry)) {
+                       error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
+@@ -1750,7 +1949,7 @@
+  *       locking].
+  */
+ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
+-             struct inode *new_dir, struct dentry *new_dentry)
++                 struct inode *new_dir, struct dentry *new_dentry)
+ {
+       int error;
+       struct inode *target;
+@@ -1829,7 +2028,7 @@
+ }
+ 
+ int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
+-             struct inode *new_dir, struct dentry *new_dentry)
++                   struct inode *new_dir, struct dentry *new_dentry)
+ {
+       int error;
+ 
+@@ -1917,9 +2116,18 @@
+       if (newnd.last_type != LAST_NORM)
+               goto exit2;
+ 
++      if (old_dir->d_inode->i_op->rename_raw) {
++              lock_kernel();
++              error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++              unlock_kernel();
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit2;
++      }
++
+       double_lock(new_dir, old_dir);
+ 
+-      old_dentry = lookup_hash(&oldnd.last, old_dir);
++      old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL);
+       error = PTR_ERR(old_dentry);
+       if (IS_ERR(old_dentry))
+               goto exit3;
+@@ -1935,16 +2143,16 @@
+               if (newnd.last.name[newnd.last.len])
+                       goto exit4;
+       }
+-      new_dentry = lookup_hash(&newnd.last, new_dir);
++      new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL);
+       error = PTR_ERR(new_dentry);
+       if (IS_ERR(new_dentry))
+               goto exit4;
+ 
++
+       lock_kernel();
+       error = vfs_rename(old_dir->d_inode, old_dentry,
+                                  new_dir->d_inode, new_dentry);
+       unlock_kernel();
+-
+       dput(new_dentry);
+ exit4:
+       dput(old_dentry);
+@@ -1995,20 +2203,26 @@
+ }
+ 
+ static inline int __attribute__((always_inline))
+-__vfs_follow_link(struct nameidata *nd, const char *link)
++__vfs_follow_link(struct nameidata *nd, const char *link,
++                struct lookup_intent *it)
+ {
+       int res = 0;
+       char *name;
+       if (IS_ERR(link))
+               goto fail;
+ 
++      if (it == NULL)
++              it = nd->intent;
++      else if (it != nd->intent)
++              printk("it != nd->intent: tell phil@clusterfs.com\n");
++
+       if (*link == '/') {
+               path_release(nd);
+               if (!walk_init_root(link, nd))
+                       /* weird __emul_prefix() stuff did it */
+                       goto out;
+       }
+-      res = __link_path_walk(link, nd);
++      res = __link_path_walk_it(link, nd, it);
+ out:
+       if (current->link_count || res || nd->last_type!=LAST_NORM)
+               return res;
+@@ -2032,7 +2246,13 @@
+ 
+ int vfs_follow_link(struct nameidata *nd, const char *link)
+ {
+-      return __vfs_follow_link(nd, link);
++      return __vfs_follow_link(nd, link, NULL);
++}
++
++int vfs_follow_link_it(struct nameidata *nd, const char *link,
++                     struct lookup_intent *it)
++{
++      return __vfs_follow_link(nd, link, it);
+ }
+ 
+ /* get the link contents into pagecache */
+@@ -2074,7 +2294,7 @@
+ {
+       struct page *page = NULL;
+       char *s = page_getlink(dentry, &page);
+-      int res = __vfs_follow_link(nd, s);
++      int res = __vfs_follow_link(nd, s, NULL);
+       if (page) {
+               kunmap(page);
+               page_cache_release(page);
+Index: linux-2.4.21/fs/namespace.c
+===================================================================
+--- linux-2.4.21.orig/fs/namespace.c   2004-04-24 02:38:41.000000000 -0400
++++ linux-2.4.21/fs/namespace.c        2004-04-26 19:06:32.000000000 -0400
+@@ -98,6 +98,7 @@
+ {
+       old_nd->dentry = mnt->mnt_mountpoint;
+       old_nd->mnt = mnt->mnt_parent;
++      UNPIN(old_nd->dentry, old_nd->mnt, 1);
+       mnt->mnt_parent = mnt;
+       mnt->mnt_mountpoint = mnt->mnt_root;
+       list_del_init(&mnt->mnt_child);
+@@ -109,6 +110,7 @@
+ {
+       mnt->mnt_parent = mntget(nd->mnt);
+       mnt->mnt_mountpoint = dget(nd->dentry);
++      PIN(nd->dentry, nd->mnt, 1);
+       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+       nd->dentry->d_mounted++;
+@@ -488,14 +490,17 @@
+ {
+       struct nameidata old_nd;
+       struct vfsmount *mnt = NULL;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int err = mount_is_safe(nd);
+       if (err)
+               return err;
+       if (!old_name || !*old_name)
+               return -EINVAL;
+-      err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
+-      if (err)
++      err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
++      if (err) {
++              intent_release(&it);
+               return err;
++      }
+ 
+       down_write(&current->namespace->sem);
+       err = -EINVAL;
+@@ -518,6 +523,7 @@
+       }
+ 
+       up_write(&current->namespace->sem);
++      intent_release(&it);
+       path_release(&old_nd);
+       return err;
+ }
+@@ -701,6 +707,7 @@
+                 unsigned long flags, void *data_page)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int retval = 0;
+       int mnt_flags = 0;
+ 
+@@ -725,9 +732,11 @@
+       flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
+ 
+       /* ... and get the mountpoint */
+-      retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
+-      if (retval)
++      retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
++      if (retval) {
++              intent_release(&it);
+               return retval;
++      }
+ 
+       if (flags & MS_REMOUNT)
+               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+@@ -739,6 +748,8 @@
+       else
+               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+                                     dev_name, data_page);
++
++      intent_release(&it);
+       path_release(&nd);
+       return retval;
+ }
+@@ -904,6 +915,8 @@
+ {
+       struct vfsmount *tmp;
+       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++      struct lookup_intent new_it = { .it_op = IT_GETATTR };
++      struct lookup_intent old_it = { .it_op = IT_GETATTR };
+       int error;
+ 
+       if (!capable(CAP_SYS_ADMIN))
+@@ -911,14 +924,14 @@
+ 
+       lock_kernel();
+ 
+-      error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
++      error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
+       if (error)
+               goto out0;
+       error = -EINVAL;
+       if (!check_mnt(new_nd.mnt))
+               goto out1;
+ 
+-      error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
++      error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
+       if (error)
+               goto out1;
+ 
+@@ -973,8 +986,10 @@
+       up(&old_nd.dentry->d_inode->i_zombie);
+       up_write(&current->namespace->sem);
+       path_release(&user_nd);
++      intent_release(&old_it);
+       path_release(&old_nd);
+ out1:
++      intent_release(&new_it);
+       path_release(&new_nd);
+ out0:
+       unlock_kernel();
+Index: linux-2.4.21/fs/open.c
+===================================================================
+--- linux-2.4.21.orig/fs/open.c        2004-04-24 02:39:01.000000000 -0400
++++ linux-2.4.21/fs/open.c     2004-04-26 19:06:32.000000000 -0400
+@@ -20,6 +20,8 @@
+ #include <asm/uaccess.h>
+ 
+ #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
++extern int path_walk_it(const char *name, struct nameidata *nd,
++                      struct lookup_intent *it);
+ 
+ int vfs_statfs(struct super_block *sb, struct statfs *buf)
+ {
+@@ -96,9 +98,10 @@
+       write_unlock(&files->file_lock);
+ }
+ 
+-int do_truncate(struct dentry *dentry, loff_t length)
++int do_truncate(struct dentry *dentry, loff_t length, int called_from_open)
+ {
+       struct inode *inode = dentry->d_inode;
++      struct inode_operations *op = dentry->d_inode->i_op;
+       int error;
+       struct iattr newattrs;
+ 
+@@ -110,7 +113,13 @@
+       down(&inode->i_sem);
+       newattrs.ia_size = length;
+       newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+-      error = notify_change(dentry, &newattrs);
++      if (called_from_open)
++              newattrs.ia_valid |= ATTR_FROM_OPEN;
++      if (op->setattr_raw) {
++              newattrs.ia_valid |= ATTR_RAW;
++              error = op->setattr_raw(inode, &newattrs);
++      } else
++              error = notify_change(dentry, &newattrs);
+       up(&inode->i_sem);
+       up_write(&inode->i_alloc_sem);
+       return error;
+@@ -121,12 +130,13 @@
+       struct nameidata nd;
+       struct inode * inode;
+       int error;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+ 
+       error = -EINVAL;
+       if (length < 0) /* sorry, but loff_t says... */
+               goto out;
+ 
+-      error = user_path_walk(path, &nd);
++      error = user_path_walk_it(path, &nd, &it);
+       if (error)
+               goto out;
+       inode = nd.dentry->d_inode;
+@@ -166,11 +176,13 @@
+       error = locks_verify_truncate(inode, NULL, length);
+       if (!error) {
+               DQUOT_INIT(inode);
+-              error = do_truncate(nd.dentry, length);
++              intent_release(&it);
++              error = do_truncate(nd.dentry, length, 0);
+       }
+       put_write_access(inode);
+ 
+ dput_and_out:
++      intent_release(&it);
+       path_release(&nd);
+ out:
+       return error;
+@@ -218,7 +230,7 @@
+ 
+       error = locks_verify_truncate(inode, file, length);
+       if (!error)
+-              error = do_truncate(dentry, length);
++              error = do_truncate(dentry, length, 0);
+ out_putf:
+       fput(file);
+ out:
+@@ -263,11 +275,13 @@
+       struct inode * inode;
+       struct iattr newattrs;
+ 
+-      error = user_path_walk(filename, &nd);
++      error = user_path_walk_it(filename, &nd, NULL);
+       if (error)
+               goto out;
+       inode = nd.dentry->d_inode;
+ 
++      /* this is safe without a Lustre lock because it only depends
++         on the super block */
+       error = -EROFS;
+       if (IS_RDONLY(inode))
+               goto dput_and_out;
+@@ -282,11 +296,25 @@
+                       goto dput_and_out;
+ 
+               newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
+-      } else {
++      }
++
++      if (inode->i_op->setattr_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++              newattrs.ia_valid |= ATTR_RAW;
++              error = op->setattr_raw(inode, &newattrs);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto dput_and_out;
++      }
++
++      error = -EPERM;
++      if (!times) {
+               if (current->fsuid != inode->i_uid &&
+                   (error = permission(inode,MAY_WRITE)) != 0)
+                       goto dput_and_out;
+       }
++
+       error = notify_change(nd.dentry, &newattrs);
+ dput_and_out:
+       path_release(&nd);
+@@ -307,12 +335,14 @@
+       struct inode * inode;
+       struct iattr newattrs;
+ 
+-      error = user_path_walk(filename, &nd);
++      error = user_path_walk_it(filename, &nd, NULL);
+ 
+       if (error)
+               goto out;
+       inode = nd.dentry->d_inode;
+ 
++      /* this is safe without a Lustre lock because it only depends
++         on the super block */
+       error = -EROFS;
+       if (IS_RDONLY(inode))
+               goto dput_and_out;
+@@ -327,7 +357,20 @@
+               newattrs.ia_atime = times[0].tv_sec;
+               newattrs.ia_mtime = times[1].tv_sec;
+               newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
+-      } else {
++      }
++
++      if (inode->i_op->setattr_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++              newattrs.ia_valid |= ATTR_RAW;
++              error = op->setattr_raw(inode, &newattrs);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto dput_and_out;
++      }
++
++      error = -EPERM;
++      if (!utimes) {
+               if (current->fsuid != inode->i_uid &&
+                   (error = permission(inode,MAY_WRITE)) != 0)
+                       goto dput_and_out;
+@@ -350,6 +393,7 @@
+       int old_fsuid, old_fsgid;
+       kernel_cap_t old_cap;
+       int res;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+ 
+       if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
+               return -EINVAL;
+@@ -367,13 +411,14 @@
+       else
+               current->cap_effective = current->cap_permitted;
+ 
+-      res = user_path_walk(filename, &nd);
++      res = user_path_walk_it(filename, &nd, &it);
+       if (!res) {
+               res = permission(nd.dentry->d_inode, mode);
+               /* SuS v2 requires we report a read only fs too */
+               if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
+                  && !special_file(nd.dentry->d_inode->i_mode))
+                       res = -EROFS;
++              intent_release(&it);
+               path_release(&nd);
+       }
+ 
+@@ -388,8 +433,9 @@
+ {
+       int error;
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+ 
+-      error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd);
++      error = __user_walk_it(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd, &it);
+       if (error)
+               goto out;
+ 
+@@ -400,6 +446,7 @@
+       set_fs_pwd(current->fs, nd.mnt, nd.dentry);
+ 
+ dput_and_out:
++      intent_release(&it);
+       path_release(&nd);
+ out:
+       return error;
+@@ -439,9 +486,10 @@
+ {
+       int error;
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+ 
+-      error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
+-                    LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
++      error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
++                             LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it);
+       if (error)
+               goto out;
+ 
+@@ -457,39 +505,56 @@
+       set_fs_altroot();
+       error = 0;
+ dput_and_out:
++      intent_release(&it);
+       path_release(&nd);
+ out:
+       return error;
+ }
+ 
+-asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
++int chmod_common(struct dentry *dentry, mode_t mode)
+ {
+-      struct inode * inode;
+-      struct dentry * dentry;
+-      struct file * file;
+-      int err = -EBADF;
++      struct inode *inode = dentry->d_inode;
+       struct iattr newattrs;
++      int err = -EROFS;
+ 
+-      file = fget(fd);
+-      if (!file)
++      if (IS_RDONLY(inode))
+               goto out;
+ 
+-      dentry = file->f_dentry;
+-      inode = dentry->d_inode;
++      if (inode->i_op->setattr_raw) {
++              newattrs.ia_mode = mode;
++              newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
++              newattrs.ia_valid |= ATTR_RAW;
++              err = inode->i_op->setattr_raw(inode, &newattrs);
++              /* the file system wants to use normal vfs path now */
++              if (err != -EOPNOTSUPP)
++                      goto out;
++      }
+ 
+-      err = -EROFS;
+-      if (IS_RDONLY(inode))
+-              goto out_putf;
+       err = -EPERM;
+       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+-              goto out_putf;
++              goto out;
++
+       if (mode == (mode_t) -1)
+               mode = inode->i_mode;
+       newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+       newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+       err = notify_change(dentry, &newattrs);
+ 
+-out_putf:
++out:
++      return err;
++}
++
++asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
++{
++      struct file * file;
++      int err = -EBADF;
++
++      file = fget(fd);
++      if (!file)
++              goto out;
++
++      err = chmod_common(file->f_dentry, mode);
++
+       fput(file);
+ out:
+       return err;
+@@ -498,30 +563,14 @@
+ asmlinkage long sys_chmod(const char * filename, mode_t mode)
+ {
+       struct nameidata nd;
+-      struct inode * inode;
+       int error;
+-      struct iattr newattrs;
+ 
+       error = user_path_walk(filename, &nd);
+       if (error)
+               goto out;
+-      inode = nd.dentry->d_inode;
+-
+-      error = -EROFS;
+-      if (IS_RDONLY(inode))
+-              goto dput_and_out;
+ 
+-      error = -EPERM;
+-      if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+-              goto dput_and_out;
++      error = chmod_common(nd.dentry, mode);
+ 
+-      if (mode == (mode_t) -1)
+-              mode = inode->i_mode;
+-      newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+-      newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+-      error = notify_change(nd.dentry, &newattrs);
+-
+-dput_and_out:
+       path_release(&nd);
+ out:
+       return error;
+@@ -541,6 +590,20 @@
+       error = -EROFS;
+       if (IS_RDONLY(inode))
+               goto out;
++
++      if (inode->i_op->setattr_raw) {
++              struct inode_operations *op = dentry->d_inode->i_op;
++
++              newattrs.ia_uid = user;
++              newattrs.ia_gid = group;
++              newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
++              newattrs.ia_valid |= ATTR_RAW;
++              error = op->setattr_raw(inode, &newattrs);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      return error;
++      }
++
+       error = -EPERM;
+       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+               goto out;
+@@ -645,6 +708,7 @@
+ {
+       int namei_flags, error;
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_OPEN };
+ 
+       namei_flags = flags;
+       if ((namei_flags+1) & O_ACCMODE)
+@@ -652,14 +716,15 @@
+       if (namei_flags & O_TRUNC)
+               namei_flags |= 2;
+ 
+-      error = open_namei(filename, namei_flags, mode, &nd);
+-      if (!error)
+-              return dentry_open(nd.dentry, nd.mnt, flags);
++      error = open_namei_it(filename, namei_flags, mode, &nd, &it);
++      if (error)
++              return ERR_PTR(error);
+ 
+-      return ERR_PTR(error);
++      return dentry_open_it(nd.dentry, nd.mnt, flags, &it);
+ }
+ 
+-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
++struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
++                          int flags, struct lookup_intent *it)
+ {
+       struct file * f;
+       struct inode *inode;
+@@ -687,7 +752,9 @@
+       file_move(f, &inode->i_sb->s_files);
+ 
+       if (f->f_op && f->f_op->open) {
++              f->f_it = it;
+               error = f->f_op->open(inode,f);
++              f->f_it = NULL;
+               if (error)
+                       goto cleanup_all;
+       }
+@@ -699,6 +766,7 @@
+                                     !inode->i_mapping->a_ops->direct_IO))
+               goto cleanup_all;
+ 
++      intent_release(it);
+       return f;
+ 
+ cleanup_all:
+@@ -711,11 +779,17 @@
+ cleanup_file:
+       put_filp(f);
+ cleanup_dentry:
++      intent_release(it);
+       dput(dentry);
+       mntput(mnt);
+       return ERR_PTR(error);
+ }
+ 
++struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
++{
++      return dentry_open_it(dentry, mnt, flags, NULL);
++}
++
+ /*
+  * Find an empty file descriptor entry, and mark it busy.
+  */
+Index: linux-2.4.21/fs/stat.c
+===================================================================
+--- linux-2.4.21.orig/fs/stat.c        2004-04-24 02:37:58.000000000 -0400
++++ linux-2.4.21/fs/stat.c     2004-04-26 19:06:32.000000000 -0400
+@@ -17,10 +17,12 @@
+  * Revalidate the inode. This is required for proper NFS attribute caching.
+  */
+ static __inline__ int
+-do_revalidate(struct dentry *dentry)
++do_revalidate(struct dentry *dentry, struct lookup_intent *it)
+ {
+       struct inode * inode = dentry->d_inode;
+-      if (inode->i_op && inode->i_op->revalidate)
++      if (inode->i_op && inode->i_op->revalidate_it)
++              return inode->i_op->revalidate_it(dentry, it);
++      else if (inode->i_op && inode->i_op->revalidate)
+               return inode->i_op->revalidate(dentry);
+       return 0;
+ }
+@@ -141,13 +143,15 @@
+ asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int error;
+ 
+-      error = user_path_walk(filename, &nd);
++      error = user_path_walk_it(filename, &nd, &it);
+       if (!error) {
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
+               if (!error)
+                       error = cp_old_stat(nd.dentry->d_inode, statbuf);
++              intent_release(&it);
+               path_release(&nd);
+       }
+       return error;
+@@ -157,13 +161,15 @@
+ asmlinkage long sys_newstat(char * filename, struct stat * statbuf)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int error;
+ 
+-      error = user_path_walk(filename, &nd);
++      error = user_path_walk_it(filename, &nd, &it);
+       if (!error) {
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
+               if (!error)
+                       error = cp_new_stat(nd.dentry->d_inode, statbuf);
++              intent_release(&it);
+               path_release(&nd);
+       }
+       return error;
+@@ -178,13 +184,15 @@
+ asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int error;
+ 
+-      error = user_path_walk_link(filename, &nd);
++      error = user_path_walk_link_it(filename, &nd, &it);
+       if (!error) {
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
+               if (!error)
+                       error = cp_old_stat(nd.dentry->d_inode, statbuf);
++              intent_release(&it);
+               path_release(&nd);
+       }
+       return error;
+@@ -195,13 +203,15 @@
+ asmlinkage long sys_newlstat(char * filename, struct stat * statbuf)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int error;
+ 
+-      error = user_path_walk_link(filename, &nd);
++      error = user_path_walk_link_it(filename, &nd, &it);
+       if (!error) {
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
+               if (!error)
+                       error = cp_new_stat(nd.dentry->d_inode, statbuf);
++              intent_release(&it);
+               path_release(&nd);
+       }
+       return error;
+@@ -222,7 +232,7 @@
+       if (f) {
+               struct dentry * dentry = f->f_dentry;
+ 
+-              err = do_revalidate(dentry);
++              err = do_revalidate(dentry, NULL);
+               if (!err)
+                       err = cp_old_stat(dentry->d_inode, statbuf);
+               fput(f);
+@@ -241,7 +251,7 @@
+       if (f) {
+               struct dentry * dentry = f->f_dentry;
+ 
+-              err = do_revalidate(dentry);
++              err = do_revalidate(dentry, NULL);
+               if (!err)
+                       err = cp_new_stat(dentry->d_inode, statbuf);
+               fput(f);
+@@ -263,7 +273,7 @@
+ 
+               error = -EINVAL;
+               if (inode->i_op && inode->i_op->readlink &&
+-                  !(error = do_revalidate(nd.dentry))) {
++                  !(error = do_revalidate(nd.dentry, NULL))) {
+                       UPDATE_ATIME(inode);
+                       error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
+               }
+@@ -339,12 +349,14 @@
+ {
+       struct nameidata nd;
+       int error;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+ 
+-      error = user_path_walk(filename, &nd);
++      error = user_path_walk_it(filename, &nd, &it);
+       if (!error) {
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
+               if (!error)
+                       error = cp_new_stat64(nd.dentry->d_inode, statbuf);
++              intent_release(&it);
+               path_release(&nd);
+       }
+       return error;
+@@ -354,12 +366,14 @@
+ {
+       struct nameidata nd;
+       int error;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+ 
+-      error = user_path_walk_link(filename, &nd);
++      error = user_path_walk_link_it(filename, &nd, &it);
+       if (!error) {
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
+               if (!error)
+                       error = cp_new_stat64(nd.dentry->d_inode, statbuf);
++              intent_release(&it);
+               path_release(&nd);
+       }
+       return error;
+@@ -374,7 +388,7 @@
+       if (f) {
+               struct dentry * dentry = f->f_dentry;
+ 
+-              err = do_revalidate(dentry);
++              err = do_revalidate(dentry, NULL);
+               if (!err)
+                       err = cp_new_stat64(dentry->d_inode, statbuf);
+               fput(f);
+Index: linux-2.4.21/include/linux/dcache.h
+===================================================================
+--- linux-2.4.21.orig/include/linux/dcache.h   2004-04-24 02:37:59.000000000 -0400
++++ linux-2.4.21/include/linux/dcache.h        2004-04-26 19:06:32.000000000 -0400
+@@ -7,6 +7,51 @@
+ #include <linux/gdb.h>
+ #include <linux/mount.h>
+ #include <linux/kernel.h>
++#include <linux/string.h>
++
++#define IT_OPEN     0x0001
++#define IT_CREAT    0x0002
++#define IT_READDIR  0x0004
++#define IT_GETATTR  0x0008
++#define IT_LOOKUP   0x0010
++#define IT_UNLINK   0x0020
++#define IT_GETXATTR 0x0040
++#define IT_EXEC     0x0080
++#define IT_PIN      0x0100
++
++#define IT_FL_LOCKED   0x0001
++#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */
++
++#define INTENT_MAGIC 0x19620323
++
++
++struct lustre_intent_data {
++      int       it_disposition;
++      int       it_status;
++      __u64     it_lock_handle;
++      void     *it_data;
++      int       it_lock_mode;
++      int it_int_flags;
++};
++struct lookup_intent {
++      int     it_magic;
++      void    (*it_op_release)(struct lookup_intent *);
++      int     it_op;
++      int     it_flags;
++      int     it_create_mode;
++      union {
++              struct lustre_intent_data lustre;
++      } d;
++};
++
++static inline void intent_init(struct lookup_intent *it, int op, int flags)
++{
++      memset(it, 0, sizeof(*it));
++      it->it_magic = INTENT_MAGIC;
++      it->it_op = op;
++      it->it_flags = flags;
++}
++
+ 
+ /*
+  * linux/include/linux/dcache.h
+@@ -94,8 +139,22 @@
+       int (*d_delete)(struct dentry *);
+       void (*d_release)(struct dentry *);
+       void (*d_iput)(struct dentry *, struct inode *);
++      int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *);
++      void (*d_pin)(struct dentry *, struct vfsmount * , int);
++      void (*d_unpin)(struct dentry *, struct vfsmount *, int);
+ };
+ 
++#define PIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_pin) \
++                              de->d_op->d_pin(de, mnt, flag);
++#define UNPIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_unpin) \
++                              de->d_op->d_unpin(de, mnt, flag);
++
++
++/* defined in fs/namei.c */
++extern void intent_release(struct lookup_intent *it);
++/* defined in fs/dcache.c */
++extern void __d_rehash(struct dentry * entry, int lock);
++
+ /* the dentry parameter passed to d_hash and d_compare is the parent
+  * directory of the entries to be compared. It is used in case these
+  * functions need any directory specific information for determining
+@@ -127,6 +186,7 @@
+                                        * s_nfsd_free_path semaphore will be down
+                                        */
+ #define DCACHE_REFERENCED     0x0008  /* Recently used, don't discard. */
++#define DCACHE_LUSTRE_INVALID 0x0010  /* Lustre invalidated */
+ 
+ extern spinlock_t dcache_lock;
+ 
+Index: linux-2.4.21/include/linux/fs.h
+===================================================================
+--- linux-2.4.21.orig/include/linux/fs.h       2004-04-26 18:56:44.000000000 -0400
++++ linux-2.4.21/include/linux/fs.h    2004-04-26 19:06:32.000000000 -0400
+@@ -74,6 +74,7 @@
+ 
+ #define FMODE_READ 1
+ #define FMODE_WRITE 2
++#define FMODE_EXEC 4
+ 
+ #define READ 0
+ #define WRITE 1
+@@ -360,6 +361,9 @@
+ #define ATTR_MTIME_SET        256
+ #define ATTR_FORCE    512     /* Not a change, but a change it */
+ #define ATTR_ATTR_FLAG        1024
++#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
++#define ATTR_CTIME_SET        0x2000
+ 
+ /*
+  * This is the Inode Attributes structure, used for notify_change().  It
+@@ -504,6 +508,7 @@
+       struct pipe_inode_info  *i_pipe;
+       struct block_device     *i_bdev;
+       struct char_device      *i_cdev;
++      void                    *i_filterdata;
+ 
+       unsigned long           i_dnotify_mask; /* Directory notify events */
+       struct dnotify_struct   *i_dnotify; /* for directory notifications */
+@@ -666,6 +671,7 @@
+ 
+       /* needed for tty driver, and maybe others */
+       void                    *private_data;
++      struct lookup_intent    *f_it;
+       struct list_head        f_ep_links;
+       spinlock_t              f_ep_lock;
+ };
+@@ -795,6 +801,7 @@
+       struct qstr last;
+       unsigned int flags;
+       int last_type;
++      struct lookup_intent *intent;
+ };
+ 
+ /*
+@@ -916,7 +923,8 @@
+ extern int __vfs_rmdir(struct inode *, struct dentry *);
+ extern int vfs_rmdir(struct inode *, struct dentry *);
+ extern int vfs_unlink(struct inode *, struct dentry *);
+-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
++int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
++             struct inode *new_dir, struct dentry *new_dentry);
+ 
+ /*
+  * File types
+@@ -991,21 +999,32 @@
+ 
+ struct inode_operations {
+       int (*create) (struct inode *,struct dentry *,int);
++      int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *);
+       struct dentry * (*lookup) (struct inode *,struct dentry *);
++      struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags);
+       int (*link) (struct dentry *,struct inode *,struct dentry *);
++      int (*link_raw) (struct nameidata *,struct nameidata *);
+       int (*unlink) (struct inode *,struct dentry *);
++      int (*unlink_raw) (struct nameidata *);
+       int (*symlink) (struct inode *,struct dentry *,const char *);
++      int (*symlink_raw) (struct nameidata *,const char *);
+       int (*mkdir) (struct inode *,struct dentry *,int);
++      int (*mkdir_raw) (struct nameidata *,int);
+       int (*rmdir) (struct inode *,struct dentry *);
++      int (*rmdir_raw) (struct nameidata *);
+       int (*mknod) (struct inode *,struct dentry *,int,int);
++      int (*mknod_raw) (struct nameidata *,int,dev_t);
+       int (*rename) (struct inode *, struct dentry *,
+                       struct inode *, struct dentry *);
++      int (*rename_raw) (struct nameidata *, struct nameidata *);
+       int (*readlink) (struct dentry *, char *,int);
+       int (*follow_link) (struct dentry *, struct nameidata *);
+       void (*truncate) (struct inode *);
+       int (*permission) (struct inode *, int);
+       int (*revalidate) (struct dentry *);
++      int (*revalidate_it) (struct dentry *, struct lookup_intent *);
+       int (*setattr) (struct dentry *, struct iattr *);
++      int (*setattr_raw) (struct inode *, struct iattr *);
+       int (*getattr) (struct dentry *, struct iattr *);
+       int (*setxattr) (struct dentry *, const char *, const void *, size_t, int);
+       ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
+@@ -1204,10 +1223,14 @@
+ 
+ asmlinkage long sys_open(const char *, int, int);
+ asmlinkage long sys_close(unsigned int);      /* yes, it's really unsigned */
+-extern int do_truncate(struct dentry *, loff_t start);
++extern int do_truncate(struct dentry *, loff_t start, int called_from_open);
+ 
+ extern struct file *filp_open(const char *, int, int);
+ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
++extern int open_namei_it(const char *filename, int namei_flags, int mode,
++                       struct nameidata *nd, struct lookup_intent *it);
++extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
++                          int flags, struct lookup_intent *it);
+ extern int filp_close(struct file *, fl_owner_t id);
+ extern char * getname(const char *);
+ 
+@@ -1503,6 +1526,7 @@
+ extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
+ 
+ extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
++extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it));
+ extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
+ extern int FASTCALL(path_walk(const char *, struct nameidata *));
+ extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
+@@ -1515,6 +1539,8 @@
+ extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
+ #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
+ #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
++#define user_path_walk_it(name,nd,it)  __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it)
++#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it)
+ 
+ extern void inode_init_once(struct inode *);
+ extern void _inode_init_once(struct inode *);
+@@ -1666,6 +1692,8 @@
+ 
+ extern int vfs_readlink(struct dentry *, char *, int, const char *);
+ extern int vfs_follow_link(struct nameidata *, const char *);
++extern int vfs_follow_link_it(struct nameidata *, const char *,
++                            struct lookup_intent *it);
+ extern int page_readlink(struct dentry *, char *, int);
+ extern int page_follow_link(struct dentry *, struct nameidata *);
+ extern struct inode_operations page_symlink_inode_operations;
+Index: linux-2.4.21/include/linux/fs_struct.h
+===================================================================
+--- linux-2.4.21.orig/include/linux/fs_struct.h        2001-07-13 18:10:44.000000000 -0400
++++ linux-2.4.21/include/linux/fs_struct.h     2004-04-26 19:06:32.000000000 -0400
+@@ -34,10 +34,12 @@
+       write_lock(&fs->lock);
+       old_root = fs->root;
+       old_rootmnt = fs->rootmnt;
++      PIN(dentry, mnt, 1);
+       fs->rootmnt = mntget(mnt);
+       fs->root = dget(dentry);
+       write_unlock(&fs->lock);
+       if (old_root) {
++              UNPIN(old_root, old_rootmnt, 1);
+               dput(old_root);
+               mntput(old_rootmnt);
+       }
+@@ -57,10 +59,12 @@
+       write_lock(&fs->lock);
+       old_pwd = fs->pwd;
+       old_pwdmnt = fs->pwdmnt;
++      PIN(dentry, mnt, 0);
+       fs->pwdmnt = mntget(mnt);
+       fs->pwd = dget(dentry);
+       write_unlock(&fs->lock);
+       if (old_pwd) {
++              UNPIN(old_pwd, old_pwdmnt, 0);
+               dput(old_pwd);
+               mntput(old_pwdmnt);
+       }
+Index: linux-2.4.21/kernel/exit.c
+===================================================================
+--- linux-2.4.21.orig/kernel/exit.c    2004-04-24 02:39:01.000000000 -0400
++++ linux-2.4.21/kernel/exit.c 2004-04-26 19:06:32.000000000 -0400
+@@ -292,11 +292,14 @@
+ {
+       /* No need to hold fs->lock if we are killing it */
+       if (atomic_dec_and_test(&fs->count)) {
++              UNPIN(fs->pwd, fs->pwdmnt, 0);
++              UNPIN(fs->root, fs->rootmnt, 1);
+               dput(fs->root);
+               mntput(fs->rootmnt);
+               dput(fs->pwd);
+               mntput(fs->pwdmnt);
+               if (fs->altroot) {
++                      UNPIN(fs->altroot, fs->altrootmnt, 1);
+                       dput(fs->altroot);
+                       mntput(fs->altrootmnt);
+               }
+Index: linux-2.4.21/kernel/fork.c
+===================================================================
+--- linux-2.4.21.orig/kernel/fork.c    2004-04-24 02:39:01.000000000 -0400
++++ linux-2.4.21/kernel/fork.c 2004-04-26 19:06:32.000000000 -0400
+@@ -466,10 +466,13 @@
+               fs->umask = old->umask;
+               read_lock(&old->lock);
+               fs->rootmnt = mntget(old->rootmnt);
++              PIN(old->pwd, old->pwdmnt, 0);
++              PIN(old->root, old->rootmnt, 1);
+               fs->root = dget(old->root);
+               fs->pwdmnt = mntget(old->pwdmnt);
+               fs->pwd = dget(old->pwd);
+               if (old->altroot) {
++                      PIN(old->altroot, old->altrootmnt, 1);
+                       fs->altrootmnt = mntget(old->altrootmnt);
+                       fs->altroot = dget(old->altroot);
+               } else {
+Index: linux-2.4.21/kernel/ksyms.c
+===================================================================
+--- linux-2.4.21.orig/kernel/ksyms.c   2004-04-26 18:56:44.000000000 -0400
++++ linux-2.4.21/kernel/ksyms.c        2004-04-26 19:06:32.000000000 -0400
+@@ -329,6 +329,7 @@
+ EXPORT_SYMBOL(set_page_dirty);
+ EXPORT_SYMBOL(vfs_readlink);
+ EXPORT_SYMBOL(vfs_follow_link);
++EXPORT_SYMBOL(vfs_follow_link_it);
+ EXPORT_SYMBOL(page_readlink);
+ EXPORT_SYMBOL(page_follow_link);
+ EXPORT_SYMBOL(page_symlink_inode_operations);
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.21-suse2.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.21-suse2.patch

index ec3b64c..71b46e5 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.4.21-suse2.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.21-suse2.patch
@@ -309,11 +309,11 @@ Index: linux-2.4.21-x86_64/fs/namei.c
  +                      if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
  +                              struct dentry *new;
  +                              err = permission(dentry->d_parent->d_inode,
-+                                              MAY_EXEC);
++                                               MAY_EXEC);
  +                              if (err)
  +                                      break;
  +                              new = real_lookup(dentry->d_parent,
-+                                              &dentry->d_name, 0, NULL);
++                                                &dentry->d_name, 0, it);
  +                              d_invalidate(dentry);
  +                              dput(dentry);
  +                              if (IS_ERR(new)) {
@@ -332,8 +332,8 @@ Index: linux-2.4.21-x86_64/fs/namei.c
                 dput(dentry);
                 break;
         }
-+      if (err)
-+              intent_release(it);
++      if (err)
++              intent_release(it);
         path_release(nd);
   return_err:
         return err;
@@ -938,7 +938,7 @@ Index: linux-2.4.21-x86_64/fs/namespace.c
         int retval = 0;
         int mnt_flags = 0;
   
-@@ -725,10 +732,11 @@
+@@ -725,9 +732,11 @@
         flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
   
         /* ... and get the mountpoint */
@@ -948,11 +948,10 @@ Index: linux-2.4.21-x86_64/fs/namespace.c
  +      if (retval) {
  +              intent_release(&it);
                 return retval;
--
  +      }
+ 
         if (flags & MS_REMOUNT)
                 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-                                   data_page);
  @@ -739,6 +747,8 @@
         else
                 retval = do_add_mount(&nd, type_page, flags, mnt_flags,
@@ -1210,7 +1209,7 @@ Index: linux-2.4.21-x86_64/fs/open.c
  -      error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
  -                    LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
  +      error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
-+                    LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it);
++                             LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it);
         if (error)
                 goto out;
   
@@ -1669,7 +1668,7 @@ Index: linux-2.4.21-x86_64/include/linux/fs.h
   #define ATTR_ATTR_FLAG        1024
  +#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
  +#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET 0x2000
++#define ATTR_CTIME_SET        0x2000
   
   /*
    * This is the Inode Attributes structure, used for notify_change().  It
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.22-rh.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.22-rh.patch

index 29ad74f..7758b2c 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.4.22-rh.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.22-rh.patch
@@ -297,11 +297,11 @@
  +                      if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
  +                              struct dentry *new;
  +                              err = permission(dentry->d_parent->d_inode,
-+                                              MAY_EXEC);
++                                               MAY_EXEC);
  +                              if (err)
  +                                      break;
  +                              new = real_lookup(dentry->d_parent,
-+                                              &dentry->d_name, 0, NULL);
++                                                &dentry->d_name, 0, it);
  +                              d_invalidate(dentry);
  +                              dput(dentry);
  +                              if (IS_ERR(new)) {
@@ -607,12 +607,16 @@
         if (IS_ERR(dentry))
                 goto fail;
         if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1278,7 +1416,16 @@ asmlinkage long sys_mknod(const char * f
+@@ -1278,7 +1416,20 @@ asmlinkage long sys_mknod(const char * f
         error = path_lookup(tmp, LOOKUP_PARENT, &nd);
         if (error)
                 goto out;
  -      dentry = lookup_create(&nd, 0);
  +
++      if (nd.last_type != LAST_NORM) {
++              error = -EEXIST;
++              goto out2;
++      }
  +      if (nd.dentry->d_inode->i_op->mknod_raw) {
  +              struct inode_operations *op = nd.dentry->d_inode->i_op;
  +              error = op->mknod_raw(&nd, mode, dev);
@@ -633,11 +637,15 @@
         path_release(&nd);
   out:
         putname(tmp);
-@@ -1346,7 +1494,14 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1346,7 +1494,20 @@ asmlinkage long sys_mkdir(const char * p
                 error = path_lookup(tmp, LOOKUP_PARENT, &nd);
                 if (error)
                         goto out;
  -              dentry = lookup_create(&nd, 1);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out2;
++              }
  +              if (nd.dentry->d_inode->i_op->mkdir_raw) {
  +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
  +                      error = op->mkdir_raw(&nd, mode);
@@ -692,11 +700,15 @@
         error = PTR_ERR(dentry);
         if (!IS_ERR(dentry)) {
                 /* Why not before? Because we want correct error value */
-@@ -1581,15 +1752,23 @@ asmlinkage long sys_symlink(const char *
+@@ -1581,15 +1752,27 @@ asmlinkage long sys_symlink(const char *
                 error = path_lookup(to, LOOKUP_PARENT, &nd);
                 if (error)
                         goto out;
  -              dentry = lookup_create(&nd, 0);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out2;
++              }
  +              if (nd.dentry->d_inode->i_op->symlink_raw) {
  +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
  +                      error = op->symlink_raw(&nd, from);
@@ -718,11 +730,15 @@
                 putname(to);
         }
         putname(from);
-@@ -1665,7 +1844,14 @@ asmlinkage long sys_link(const char * ol
+@@ -1665,7 +1844,18 @@ asmlinkage long sys_link(const char * ol
                 error = -EXDEV;
                 if (old_nd.mnt != nd.mnt)
                         goto out_release;
  -              new_dentry = lookup_create(&nd, 0);
++              if (nd.last_type != LAST_NORM) {
++                      error = -EEXIST;
++                      goto out2;
++              }
  +              if (nd.dentry->d_inode->i_op->link_raw) {
  +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
  +                      error = op->link_raw(&old_nd, &nd);
@@ -898,7 +914,7 @@
         int retval = 0;
         int mnt_flags = 0;
   
-@@ -725,10 +732,11 @@ long do_mount(char * dev_name, char * di
+@@ -725,9 +732,11 @@ long do_mount(char * dev_name, char * di
         flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
   
         /* ... and get the mountpoint */
@@ -908,11 +924,10 @@
  +      if (retval) {
  +              intent_release(&it);
                 return retval;
--
  +      }
+
         if (flags & MS_REMOUNT)
                 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-                                   data_page);
  @@ -739,6 +747,8 @@ long do_mount(char * dev_name, char * di
         else
                 retval = do_add_mount(&nd, type_page, flags, mnt_flags,
@@ -1168,7 +1183,7 @@
  -      error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
  -                    LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
  +      error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
-+                    LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it);
++                             LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it);
         if (error)
                 goto out;
   
@@ -1361,7 +1376,7 @@
    */
  --- linux-2.4.22-ac1/fs/stat.c~vfs_intent-2.4.22-rh    2003-09-25 14:16:27.000000000 +0400
  +++ linux-2.4.22-ac1-alexey/fs/stat.c  2003-09-25 14:42:46.000000000 +0400
-@@ -17,10 +17,14 @@
+@@ -17,10 +17,12 @@
    * Revalidate the inode. This is required for proper NFS attribute caching.
    */
   static __inline__ int
@@ -1370,8 +1385,6 @@
   {
         struct inode * inode = dentry->d_inode;
  -      if (inode->i_op && inode->i_op->revalidate)
-+      if (!inode)
-+              return -ENOENT;
  +      if (inode->i_op && inode->i_op->revalidate_it)
  +              return inode->i_op->revalidate_it(dentry, it);
  +      else if (inode->i_op && inode->i_op->revalidate)
@@ -1621,7 +1634,7 @@
   #define ATTR_ATTR_FLAG        1024
  +#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
  +#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET 0x2000
++#define ATTR_CTIME_SET        0x2000
   
   /*
    * This is the Inode Attributes structure, used for notify_change().  It
diff --git a/lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch b/lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch

index 9fcec3f..2bd3c6d 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch
+++ b/lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch
@@ -82,20 +82,18 @@ Index: linux-2.6.4-51.0/fs/namei.c
   
         name = getname(pathname);
         if(IS_ERR(name))
-@@ -1716,6 +1737,16 @@
+@@ -1716,6 +1737,14 @@
                         error = -EBUSY;
                         goto exit1;
         }
-+ 
  +      if (nd.dentry->d_inode->i_op->rmdir_raw) {
  +              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ 
++
  +              error = op->rmdir_raw(&nd);
  +              /* the file system wants to use normal vfs path now */
  +              if (error != -EOPNOTSUPP)
  +                      goto exit1;
  +      }
-+ 
         down(&nd.dentry->d_inode->i_sem);
         dentry = lookup_hash(&nd.last, nd.dentry);
         error = PTR_ERR(dentry);
diff --git a/lustre/kernel_patches/patches/xattr-0.8.54-2.4.22-rh.patch b/lustre/kernel_patches/patches/xattr-0.8.54-2.4.22-rh.patch

index 9d6bc19..b221045 100644 (file)
--- a/lustre/kernel_patches/patches/xattr-0.8.54-2.4.22-rh.patch
+++ b/lustre/kernel_patches/patches/xattr-0.8.54-2.4.22-rh.patch
@@ -1716,7 +1716,7 @@
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT2_I(inode)->i_file_acl != 0;
@@ -1735,7 +1735,7 @@
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
++                      (void)ext2_xattr_cache_insert(new_bh);
  +                      
  +                      ext2_xattr_update_super_block(sb);
  +              }
@@ -3402,7 +3402,7 @@
  +              } else if (old_bh && header == HDR(old_bh)) {
  +                      /* Keep this block. */
  +                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +              } else {
  +                      /* We need to allocate a new block */
  +                      int force = EXT3_I(inode)->i_file_acl != 0;
@@ -3427,7 +3427,7 @@
  +                      memcpy(new_bh->b_data, header, new_bh->b_size);
  +                      mark_buffer_uptodate(new_bh, 1);
  +                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
++                      (void)ext3_xattr_cache_insert(new_bh);
  +                      
  +                      ext3_xattr_update_super_block(handle, sb);
  +              }
diff --git a/lustre/kernel_patches/series/chaos-2.4.18 b/lustre/kernel_patches/series/chaos-2.4.18

index c69f42f..99cdf04 100644 (file)
--- a/lustre/kernel_patches/series/chaos-2.4.18
+++ b/lustre/kernel_patches/series/chaos-2.4.18
@@ -38,3 +38,4 @@ procfs-ndynamic-2.4.patch
  ext3-truncate-buffer-head.patch
  inode-max-readahead-2.4.24.patch
  dcache_refcount_debug.patch
+mkdep-revert-rh-2.4.patch
diff --git a/lustre/kernel_patches/series/chaos-2.4.18-pdirops b/lustre/kernel_patches/series/chaos-2.4.18-pdirops

index c180a5b..456c2eb 100644 (file)
--- a/lustre/kernel_patches/series/chaos-2.4.18-pdirops
+++ b/lustre/kernel_patches/series/chaos-2.4.18-pdirops
@@ -33,3 +33,4 @@ ext3-extents-2.4.18-chaos-pdirops.patch
  nfs_export_kernel-2.4.18.patch
  ext3-raw-lookup-pdirops.patch
  ext3-truncate-buffer-head.patch
+mkdep-revert-rh-2.4.patch
diff --git a/lustre/kernel_patches/series/rh-2.4.20 b/lustre/kernel_patches/series/rh-2.4.20

index 06b2642..0ad83a9 100644 (file)
--- a/lustre/kernel_patches/series/rh-2.4.20
+++ b/lustre/kernel_patches/series/rh-2.4.20
@@ -45,3 +45,4 @@ ext3-xattr-ptr-arith-fix.patch
  procfs-ndynamic-2.4.patch
  ext3-truncate-buffer-head.patch
  inode-max-readahead-2.4.24.patch
+mkdep-revert-rh-2.4.patch
diff --git a/lustre/kernel_patches/series/suse-2.4.19 b/lustre/kernel_patches/series/suse-2.4.19

index 9905491..12eab3d 100644 (file)
--- a/lustre/kernel_patches/series/suse-2.4.19
+++ b/lustre/kernel_patches/series/suse-2.4.19
@@ -1,4 +1,4 @@
-dev_read_only_hp_2.4.20.patch
+dev_read_only-suse-2.4.19.patch
  exports_2.4.19-suse.patch 
  lustre_version.patch
  vfs_intent-2.4.19-suse.patch 
@@ -6,11 +6,10 @@ invalidate_show.patch
  export-truncate.patch
  iod-stock-24-exports-2.4.19-suse.patch 
  jbd-2.4.18-jcberr.patch
-ext-2.4-patch-1-chaos.patch 
+ext-2.4-patch-1-suse-2.4.19.patch 
  ext-2.4-patch-2.patch
  ext-2.4-patch-3.patch
  ext-2.4-patch-4.patch
-linux-2.4.20-xattr-0.8.54-hp.patch 
  linux-2.4.19-xattr-0.8.54-suse.patch 
  ext3-2.4-ino_t.patch
  ext3-largefile.patch
@@ -32,5 +31,5 @@ jbd-flushtime-2.4.19-suse.patch
  jbd-get_write_access.patch
  ext3-ea-in-inode-2.4.20.patch 
  listman-2.4.20.patch
-ext3-trusted_ea-2.4.20.patch 
+ext3-trusted_ea-suse-2.4.19.patch
  ext3-truncate-buffer-head.patch
diff --git a/lustre/kernel_patches/series/suse-sles8sp3-2.4.21 b/lustre/kernel_patches/series/suse-sles8sp3-2.4.21

new file mode 100644 (file)

index 0000000..5bc8822
--- /dev/null
+++ b/lustre/kernel_patches/series/suse-sles8sp3-2.4.21
@@ -0,0 +1,31 @@
+configurable-x86-stack-2.4.21-sles8sp3.patch 
+dev_read_only_2.4.20-rh.patch
+exports_2.4.20-rh-hp.patch
+lustre_version.patch
+vfs_intent-2.4.21-sles8sp3.patch 
+invalidate_show-2.4.21-sles8sp3.patch 
+iod-stock-24-exports_hp.patch
+ext3-htree-2.4.21-chaos.patch
+linux-2.4.21-xattr-0.8.54-suse2.patch
+ext3-orphan_lock-2.4.22-rh.patch 
+ext3-noread-2.4.21-suse2.patch
+ext3-delete_thread-2.4.21-chaos.patch 
+extN-wantedi-2.4.21-suse2.patch
+ext3-san-2.4.20.patch
+ext3-map_inode_page-2.4.21-suse2.patch 
+ext3-error-export.patch
+iopen-2.4.21-sles8sp3.patch 
+tcp-zero-copy-2.4.21-sles8sp3.patch 
+jbd-dont-account-blocks-twice.patch
+jbd-commit-tricks.patch
+ext3-no-write-super-chaos.patch
+add_page_private.patch
+nfs_export_kernel-2.4.21-suse2.patch
+ext3-raw-lookup.patch
+ext3-ea-in-inode-2.4.21-suse2.patch
+listman-2.4.20.patch
+gfp_memalloc-2.4.24.patch 
+ext3-xattr-ptr-arith-fix.patch
+kernel_text_address-2.4.21-sles8sp3.patch 
+ext3-truncate-buffer-head.patch
+export-truncate.patch
diff --git a/lustre/kernel_patches/which_patch b/lustre/kernel_patches/which_patch

index f86c76d..93f3411 100644 (file)
--- a/lustre/kernel_patches/which_patch
+++ b/lustre/kernel_patches/which_patch
@@ -6,3 +6,4 @@ vanilla-2.4.20         linux-2.4.20             patch with uml-2.4.20-6     um
  chaos-2.4.20           linux-chaos-2.4.20       same as rh-2.4.20-8         i386
  kgdb-2.5.73            linux-2.5.73             vanilla 2.5.73 with kgdb    i386
  bproc-2.4.20-hp-pnnl   linux-2.4.20-hp4_pnnl9   hp-pnnl + bproc             i386
+suse-2.4.19                                     SUSE ES 8
diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c

index bc174d8..7e75089 100644 (file)
--- a/lustre/ldlm/ldlm_resource.c
+++ b/lustre/ldlm/ldlm_resource.c
@@ -534,7 +534,7 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
  struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res)
  {
          LASSERT(res != NULL);
-        LASSERT(res != (void *)0x5a5a5a5a);
+        LASSERT(res != LP_POISON);
          atomic_inc(&res->lr_refcount);
          CDEBUG(D_INFO, "getref res: %p count: %d\n", res,
                 atomic_read(&res->lr_refcount));
@@ -550,7 +550,7 @@ int ldlm_resource_putref(struct ldlm_resource *res)
          CDEBUG(D_INFO, "putref res: %p count: %d\n", res,
                 atomic_read(&res->lr_refcount) - 1);
          LASSERT(atomic_read(&res->lr_refcount) > 0);
-        LASSERT(atomic_read(&res->lr_refcount) < 0x5a5a5a5a);
+        LASSERT(atomic_read(&res->lr_refcount) < LI_POISON);
  
          if (atomic_dec_and_test(&res->lr_refcount)) {
                  struct ldlm_namespace *ns = res->lr_namespace;
diff --git a/lustre/liblustre/genlib.sh b/lustre/liblustre/genlib.sh

index c31ea2f..f371650 100755 (executable)
--- a/lustre/liblustre/genlib.sh
+++ b/lustre/liblustre/genlib.sh
@@ -83,6 +83,6 @@ $RANLIB $CWD/liblustre.a
  # create shared lib lustre
  rm -f $CWD/liblustre.so
  $LD -shared -o $CWD/liblustre.so -init __liblustre_setup_ -fini __liblustre_cleanup_ \
-       $ALL_OBJS -lcap -lpthread
+       $ALL_OBJS -lpthread
  
  #rm -rf $sysio_tmp
diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c

index a61e368..fbd199b 100644 (file)
--- a/lustre/liblustre/llite_lib.c
+++ b/lustre/liblustre/llite_lib.c
@@ -27,7 +27,6 @@
  #include <signal.h>
  #include <sys/types.h>
  #include <sys/queue.h>
-#include <sys/capability.h>
  
  #include <netinet/in.h>
  #include <sys/socket.h>
@@ -99,112 +98,18 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
          return str;
  }
  
-int in_group_p(gid_t gid)
-{
-        int i;
-
-        if (gid == current->fsgid)
-                return 1;
-
-        for (i = 0; i < current->ngroups; i++) {
-                if (gid == current->groups[i])
-                        return 1;
-        }
-
-        return 0;
-}
-
-static void init_capability(int *res)
-{
-        cap_value_t cap_types[] = {
-                CAP_CHOWN,
-                CAP_DAC_OVERRIDE,
-                CAP_DAC_READ_SEARCH,
-                CAP_FOWNER,
-                CAP_FSETID,
-                CAP_KILL,
-                CAP_SETGID,
-                CAP_SETUID,
-                /* following are linux specific, we could simply
-                 * remove them I think */
-                CAP_SETPCAP,
-                CAP_LINUX_IMMUTABLE,
-                CAP_NET_BIND_SERVICE,
-                CAP_NET_BROADCAST,
-                CAP_NET_ADMIN,
-                CAP_NET_RAW,
-                CAP_IPC_LOCK,
-                CAP_IPC_OWNER,
-                CAP_SYS_MODULE,
-                CAP_SYS_RAWIO,
-                CAP_SYS_CHROOT,
-                CAP_SYS_PTRACE,
-                CAP_SYS_PACCT,
-                CAP_SYS_ADMIN,
-                CAP_SYS_BOOT,
-                CAP_SYS_NICE,
-                CAP_SYS_RESOURCE,
-                CAP_SYS_TIME,
-                CAP_SYS_TTY_CONFIG,
-                CAP_MKNOD,
-                CAP_LEASE,
-        };
-        cap_t syscap;
-        cap_flag_value_t capval;
-        int i;
-
-        *res = 0;
-
-        syscap = cap_get_proc();
-        if (!syscap) {
-                printf("Liblustre: Warning: failed to get system capability, "
-                       "set to minimal\n");
-                return;
-        }
-
-        for (i = 0; i < sizeof(cap_types)/sizeof(cap_t); i++) {
-                LASSERT(cap_types[i] < 32);
-                if (!cap_get_flag(syscap, cap_types[i],
-                     CAP_EFFECTIVE, &capval)) {
-                        if (capval == CAP_SET) {
-                                *res |= 1 << cap_types[i];
-                        }
-                }
-        }
-}
-
-static int init_current(char *comm)
+void init_current(char *comm)
  {
          current = malloc(sizeof(*current));
-        if (!current) {
-                CERROR("Not enough memory\n");
-                return -ENOMEM;
-        }
-        current->fs = &current->__fs;
+        current->fs = malloc(sizeof(*current->fs));
          current->fs->umask = umask(0777);
          umask(current->fs->umask);
-
          strncpy(current->comm, comm, sizeof(current->comm));
          current->pid = getpid();
-        current->fsuid = geteuid();
-        current->fsgid = getegid();
+        current->fsuid = 0;
+        current->fsgid = 0;
+        current->cap_effective = -1;
          memset(&current->pending, 0, sizeof(current->pending));
-
-        current->max_groups = sysconf(_SC_NGROUPS_MAX);
-        current->groups = malloc(sizeof(gid_t) * current->max_groups);
-        if (!current->groups) {
-                CERROR("Not enough memory\n");
-                return -ENOMEM;
-        }
-        current->ngroups = getgroups(current->max_groups, current->groqps);
-        if (current->ngroups < 0) {
-                perror("Error getgroups");
-                return -EINVAL;
-        }
-
-        init_capability(&current->cap_effective);
-
-        return 0;
  }
  
  /* FIXME */
@@ -298,8 +203,8 @@ int lllib_init(char *dumpfile)
                  printf("LibLustre: TCPNAL NID: %016llx\n", tcpnal_mynid);
          }
  
-        if (init_current("dummy") ||
-            init_obdclass() ||
+        init_current("dummy");
+        if (init_obdclass() ||
              init_lib_portals() ||
              ptlrpc_init() ||
              mdc_init() ||
@@ -426,6 +331,11 @@ out:
          RETURN(rc);
  }
  
+static void sighandler_USR1(int signum)
+{
+        /* do nothing */
+}
+
  /* parse host:/mdsname/profile string */
  int ll_parse_mount_target(const char *target, char **mdsnid,
                            char **mdsname, char **profile)
@@ -488,6 +398,8 @@ void __liblustre_setup_(void)
           */
          srand(time(NULL) + getpid());
  
+        signal(SIGUSR1, sighandler_USR1);
+
         lustre_path = getenv(ENV_LUSTRE_MNTPNT);
         if (!lustre_path) {
                  lustre_path = "/mnt/lustre";
diff --git a/lustre/liblustre/namei.c b/lustre/liblustre/namei.c

index 6e596d2..0403ad5 100644 (file)
--- a/lustre/liblustre/namei.c
+++ b/lustre/liblustre/namei.c
@@ -319,11 +319,7 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset,
  
          /* NB 1 request reference will be taken away by ll_intent_lock()
           * when I return
-         */
-        /* FIXME: for CREAT, libsysio require the inode must be generated here
-         * currently here we don't know the whether the create is successful
-         * or failed on mds. thus blinded return -EPERM in llu_iget(). need
-         * a fix later.
+         * Note: libsysio require the inode must be generated here
           */
          if ((it->it_op & IT_CREAT) || !it_disposition(it, DISP_LOOKUP_NEG)) {
                  struct lustre_md md;
@@ -335,11 +331,11 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset,
                          RETURN(rc);
  
                  inode = llu_iget(parent->i_fs, &md);
-                if (!inode || IS_ERR(inode)) {
+                if (!inode) {
                          /* free the lsm if we allocated one above */
                          if (md.lsm != NULL)
                                  obd_free_memmd(sbi->ll_osc_exp, &md.lsm);
-                        RETURN(inode ? PTR_ERR(inode) : -ENOMEM);
+                        RETURN(-ENOMEM);
                  } else if (md.lsm != NULL &&
                             llu_i2info(inode)->lli_smd != md.lsm) {
                          obd_free_memmd(sbi->ll_osc_exp, &md.lsm);
diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c

index de8cf3b..86048e6 100644 (file)
--- a/lustre/liblustre/super.c
+++ b/lustre/liblustre/super.c
@@ -31,7 +31,6 @@
  #include <sys/stat.h>
  #include <sys/fcntl.h>
  #include <sys/queue.h>
-#include <sys/capability.h>
  #ifndef __CYGWIN__
  # include <sys/statvfs.h>
  #else
@@ -1291,11 +1290,8 @@ struct inode *llu_iget(struct filesys *fs, struct lustre_md *md)
  
          if ((md->body->valid &
               (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) !=
-            (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) {
-                /* FIXME this is workaround for for open(O_CREAT),
-                 * see lookup_it_finish(). */
-                return ERR_PTR(-EPERM);
-        }
+            (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE))
+                CERROR("invalide fields!\n");
  
          /* try to find existing inode */
          fid.id = md->body->ino;
@@ -1494,7 +1490,7 @@ llu_fsswop_mount(const char *source,
          LASSERT(sbi->ll_rootino != 0);
  
          root = llu_iget(fs, &md);
-        if (!root || IS_ERR(root)) {
+        if (root == NULL) {
                  CERROR("fail to generate root inode\n");
                  GOTO(out_request, err = -EBADF);
          }
diff --git a/lustre/liblustre/tests/Makefile.am b/lustre/liblustre/tests/Makefile.am

index ff73edf..81e7058 100644 (file)
--- a/lustre/liblustre/tests/Makefile.am
+++ b/lustre/liblustre/tests/Makefile.am
@@ -4,7 +4,7 @@ AM_CPPFLAGS = -I$(SYSIO)/include -I/opt/lam/include $(LLCPPFLAGS) -I$(top_srcdir
  AM_CFLAGS = $(LLCFLAGS)
  LIBS = $(LIBEFENCE) $(LIBREADLINE)
  
-LLIB_EXEC= ../liblustre.a -lcap -lpthread
+LLIB_EXEC= ../liblustre.a -lpthread
  
  if LIBLUSTRE
  noinst_LIBRARIES = libtestcommon.a
@@ -21,7 +21,7 @@ libtestcommon_a_SOURCES = test_common.c test_common.h
  
  echo_test_SOURCES = echo_test.c  ../../utils/parser.c ../../utils/obd.c ../../utils/lustre_cfg.c
  echo_test_CFLAGS = $(LL_CFLAGS)
-echo_test_LDADD = ../liblsupport.a $(LIBREADLINE) -lcap -lpthread 
+echo_test_LDADD = ../liblsupport.a $(LIBREADLINE) -lpthread 
  echo_test_DEPENDENCIES=$(top_builddir)/liblustre/liblsupport.a
  
  sanity_SOURCES = sanity.c
diff --git a/lustre/liblustre/tests/echo_test.c b/lustre/liblustre/tests/echo_test.c

index 48d0e6f..f2230ab 100644 (file)
--- a/lustre/liblustre/tests/echo_test.c
+++ b/lustre/liblustre/tests/echo_test.c
@@ -81,11 +81,6 @@ libcfs_nal_cmd(struct portals_cfg *pcfg)
          return 0;
  }
  
-int in_group_p(gid_t gid)
-{
-        return 0;
-}
-
  int init_current(int argc, char **argv)
  { 
          current = malloc(sizeof(*current));
diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c

index 5307cb1..05f6573 100644 (file)
--- a/lustre/llite/dir.c
+++ b/lustre/llite/dir.c
@@ -201,7 +201,6 @@ Eend:
  fail:
          SetPageChecked(page);
          SetPageError(page);
-        LBUG();
  }
  
  static struct page *ll_get_dir_page(struct inode *dir, unsigned long n)
diff --git a/lustre/llite/file.c b/lustre/llite/file.c

index f9d58f7..61bb36d 100644 (file)
--- a/lustre/llite/file.c
+++ b/lustre/llite/file.c
@@ -699,7 +699,7 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
          struct lov_stripe_md *lsm = lli->lli_smd;
          struct lustre_handle lockh = { 0 };
          ldlm_policy_data_t policy;
-        ldlm_error_t err;
+        int rc;
          ssize_t retval;
          __u64 kms;
          ENTRY;
@@ -720,11 +720,11 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
          policy.l_extent.start = *ppos;
          policy.l_extent.end = *ppos + count - 1;
  
-        err = ll_extent_lock(fd, inode, lsm, LCK_PR, &policy, &lockh,
-                             (filp->f_flags & O_NONBLOCK)?LDLM_FL_BLOCK_NOWAIT:
-                                                          0);
-        if (err != ELDLM_OK)
-                RETURN(err);
+        rc = ll_extent_lock(fd, inode, lsm, LCK_PR, &policy, &lockh,
+                                (filp->f_flags & O_NONBLOCK) ?
+                                        LDLM_FL_BLOCK_NOWAIT: 0);
+        if (rc != 0)
+                RETURN(rc);
  
          kms = lov_merge_size(lsm, 1);
          if (*ppos + count - 1 > kms) {
@@ -767,9 +767,8 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
          struct lustre_handle lockh = { 0 };
          ldlm_policy_data_t policy;
          loff_t maxbytes = ll_file_maxbytes(inode);
-        ldlm_error_t err;
          ssize_t retval;
-        int nonblock = 0;
+        int nonblock = 0, rc;
          ENTRY;
          if (file->f_flags & O_NONBLOCK)
                  nonblock = LDLM_FL_BLOCK_NOWAIT;
@@ -797,9 +796,9 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
                  policy.l_extent.end = *ppos + count - 1;
          }
  
-        err = ll_extent_lock(fd, inode, lsm, LCK_PW, &policy, &lockh, nonblock);
-        if (err != ELDLM_OK)
-                RETURN(err);
+        rc = ll_extent_lock(fd, inode, lsm, LCK_PW, &policy, &lockh, nonblock);
+        if (rc != 0)
+                RETURN(rc);
  
          /* this is ok, g_f_w will overwrite this under i_sem if it races
           * with a local truncate, it just makes our maxbyte checking easier */
@@ -1018,8 +1017,7 @@ static int ll_get_grouplock(struct inode *inode, struct file *file,
          struct lustre_handle lockh = { 0 };
          struct ll_inode_info *lli = ll_i2info(inode);
          struct lov_stripe_md *lsm = lli->lli_smd;
-        ldlm_error_t err;
-        int flags = 0;
+        int flags = 0, rc;
          ENTRY;
  
          if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
@@ -1030,9 +1028,9 @@ static int ll_get_grouplock(struct inode *inode, struct file *file,
          if (file->f_flags & O_NONBLOCK)
                  flags = LDLM_FL_BLOCK_NOWAIT;
  
-        err = ll_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh, flags);
-        if (err)
-                RETURN(err);
+        rc = ll_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh, flags);
+        if (rc != 0)
+                RETURN(rc);
  
          fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK;
          fd->fd_gid = arg;
@@ -1047,7 +1045,7 @@ static int ll_put_grouplock(struct inode *inode, struct file *file,
          struct ll_file_data *fd = file->private_data;
          struct ll_inode_info *lli = ll_i2info(inode);
          struct lov_stripe_md *lsm = lli->lli_smd;
-        ldlm_error_t err;
+        int rc;
          ENTRY;
  
          if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
@@ -1057,18 +1055,18 @@ static int ll_put_grouplock(struct inode *inode, struct file *file,
  
          if (fd->fd_gid != arg) /* Ugh? Unlocking with different gid? */
                  RETURN(-EINVAL);
-        
+
          fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
  
-        err = ll_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh);
-        if (err)
-                RETURN(err);
+        rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh);
+        if (rc)
+                RETURN(rc);
  
          fd->fd_gid = 0;
          memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh));
  
          RETURN(0);
-}       
+}
  
  int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                    unsigned long arg)
@@ -1145,17 +1143,16 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
  
          lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_LLSEEK);
          if (origin == 2) { /* SEEK_END */
-                ldlm_error_t err;
-                int nonblock = 0;
+                int nonblock = 0, rc;
                  ldlm_policy_data_t policy = { .l_extent = {0, OBD_OBJECT_EOF }};
  
                  if (file->f_flags & O_NONBLOCK)
                          nonblock = LDLM_FL_BLOCK_NOWAIT;
  
-                err = ll_extent_lock(fd, inode, lsm, LCK_PR, &policy, &lockh,
+                rc = ll_extent_lock(fd, inode, lsm, LCK_PR, &policy, &lockh,
                                       nonblock);
-                if (err != ELDLM_OK)
-                        RETURN(err);
+                if (rc != 0)
+                        RETURN(rc);
  
                  offset += inode->i_size;
          } else if (origin == 1) { /* SEEK_CUR */
diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h

index 5ff6eb3..5031b20 100644 (file)
--- a/lustre/llite/llite_internal.h
+++ b/lustre/llite/llite_internal.h
@@ -10,27 +10,24 @@
  #ifndef LLITE_INTERNAL_H
  #define LLITE_INTERNAL_H
  
+/* default to about 40meg of readahead on a given system.  That much tied
+ * up in 512k readahead requests serviced at 40ms each is about 1GB/s. */
+#define SBI_DEFAULT_RA_MAX ((40 << 20) >> PAGE_CACHE_SHIFT)
+
  struct ll_sb_info {
+        /* this protects pglist and max_r_a_pages.  It isn't safe to
+         * grab from interrupt contexts */
+        spinlock_t                ll_lock;
          struct obd_uuid           ll_sb_uuid;
-//        struct lustre_handle      ll_mdc_conn;
          struct obd_export        *ll_mdc_exp;
          struct obd_export        *ll_osc_exp;
          struct proc_dir_entry*    ll_proc_root;
          obd_id                    ll_rootino; /* number of root inode */
  
-        struct obd_uuid           ll_mds_uuid;
-        struct obd_uuid           ll_mds_peer_uuid;
          struct lustre_mount_data *ll_lmd;
          char                     *ll_instance;
  
          int                       ll_flags;
-        wait_queue_head_t         ll_commitcbd_waitq;
-        wait_queue_head_t         ll_commitcbd_ctl_waitq;
-        int                       ll_commitcbd_flags;
-        struct task_struct       *ll_commitcbd_thread;
-        time_t                    ll_commitcbd_waketime;
-        time_t                    ll_commitcbd_timeout;
-        spinlock_t                ll_commitcbd_lock;
          struct list_head          ll_conn_chain; /* per-conn chain of SBs */
  
          struct hlist_head         ll_orphan_dentry_list; /*please don't ask -p*/
@@ -38,14 +35,20 @@ struct ll_sb_info {
  
          struct lprocfs_stats     *ll_stats; /* lprocfs stats counter */
  
-        spinlock_t                ll_pglist_lock;
          unsigned long             ll_pglist_gen;
          struct list_head          ll_pglist;
+
+        unsigned long             ll_read_ahead_pages;
+        unsigned long             ll_max_read_ahead_pages;
+
  };
  
  struct ll_readahead_state {
          spinlock_t      ras_lock;
-        unsigned long   ras_last, ras_window, ras_next_index;
+        unsigned long   ras_last_readpage, ras_consecutive;
+        unsigned long   ras_window_start, ras_window_len;
+        unsigned long   ras_next_readahead;
+
  };
  
  extern kmem_cache_t *ll_file_data_slab;
@@ -149,7 +152,6 @@ int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
  void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa);
  void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc);
  void ll_removepage(struct page *page);
-int ll_sync_page(struct page *page);
  int ll_readpage(struct file *file, struct page *page);
  struct ll_async_page *llap_from_cookie(void *cookie);
  struct ll_async_page *llap_from_page(struct page *page);
diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c

index bf8fb4c..4c68ca7 100644 (file)
--- a/lustre/llite/llite_lib.c
+++ b/lustre/llite/llite_lib.c
@@ -53,9 +53,10 @@ struct ll_sb_info *lustre_init_sbi(struct super_block *sb)
          if (!sbi)
                  RETURN(NULL);
  
-        spin_lock_init(&sbi->ll_pglist_lock);
+        spin_lock_init(&sbi->ll_lock);
          INIT_LIST_HEAD(&sbi->ll_pglist);
          sbi->ll_pglist_gen = 0;
+        sbi->ll_max_read_ahead_pages = SBI_DEFAULT_RA_MAX;
          INIT_LIST_HEAD(&sbi->ll_conn_chain);
          INIT_HLIST_HEAD(&sbi->ll_orphan_dentry_list);
          ll_s2sbi(sb) = sbi;
@@ -905,7 +906,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                  rc = ll_extent_lock(NULL, inode, lsm, LCK_PW, &policy, &lockh,
                                      ast_flags);
                  down(&inode->i_sem);
-                if (rc != ELDLM_OK)
+                if (rc != 0)
                          RETURN(rc);
  
                  rc = vmtruncate(inode, attr->ia_size);
diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c

index 9f1987c..35676f2 100644 (file)
--- a/lustre/llite/lproc_llite.c
+++ b/lustre/llite/lproc_llite.c
@@ -161,14 +161,6 @@ static int ll_rd_filesfree(char *page, char **start, off_t off, int count,
  
  }
  
-#if 0
-static int ll_rd_path(char *page, char **start, off_t off, int count, int *eof,
-                      void *data)
-{
-        return 0;
-}
-#endif
-
  static int ll_rd_fstype(char *page, char **start, off_t off, int count,
                          int *eof, void *data)
  {
@@ -222,6 +214,41 @@ static int ll_wr_read_ahead(struct file *file, const char *buffer,
          RETURN(count);
  }
  
+static int ll_rd_max_read_ahead_mb(char *page, char **start, off_t off, 
+                                   int count, int *eof, void *data)
+{
+        struct super_block *sb = data;
+        struct ll_sb_info *sbi = ll_s2sbi(sb);
+        unsigned val;
+
+        spin_lock(&sbi->ll_lock);
+        val = (sbi->ll_max_read_ahead_pages << PAGE_CACHE_SHIFT) >> 20;
+        spin_unlock(&sbi->ll_lock);
+
+        return snprintf(page, count, "%u\n", val);
+}
+
+static int ll_wr_max_read_ahead_mb(struct file *file, const char *buffer,
+                                   unsigned long count, void *data)
+{
+        struct super_block *sb = data;
+        struct ll_sb_info *sbi = ll_s2sbi(sb);
+        int val, rc;
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val < 0 || val > (num_physpages << PAGE_SHIFT) >> 20)
+                return -ERANGE;
+
+        spin_lock(&sbi->ll_lock);
+        sbi->ll_max_read_ahead_pages = (val << 20) >> PAGE_CACHE_SHIFT;
+        spin_unlock(&sbi->ll_lock);
+
+        return count;
+}
+
  static struct lprocfs_vars lprocfs_obd_vars[] = {
          { "uuid",         ll_rd_sb_uuid,          0, 0 },
          //{ "mntpt_path",   ll_rd_path,             0, 0 },
@@ -234,6 +261,8 @@ static struct lprocfs_vars lprocfs_obd_vars[] = {
          { "filesfree",    ll_rd_filesfree,        0, 0 },
          //{ "filegroups",   lprocfs_rd_filegroups,  0, 0 },
          { "read_ahead",   ll_rd_read_ahead, ll_wr_read_ahead, 0 },
+        { "max_read_ahead_mb",   ll_rd_max_read_ahead_mb, 
+                                 ll_wr_max_read_ahead_mb, 0 },
          { 0 }
  };
  
@@ -463,7 +492,7 @@ static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v)
                  return 0;
          }
  
-        spin_lock(&sbi->ll_pglist_lock);
+        spin_lock(&sbi->ll_lock);
  
          llap = llite_pglist_next_llap(sbi, &dummy_llap->llap_proc_item);
          if (llap != NULL)  {
@@ -486,7 +515,7 @@ static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v)
                          seq_puts(seq, "]\n");
          }
  
-        spin_unlock(&sbi->ll_pglist_lock);
+        spin_unlock(&sbi->ll_lock);
  
          return 0;
  }
@@ -516,14 +545,14 @@ static void *llite_dump_pgcache_seq_next(struct seq_file *seq, void *v,
          /* we've just displayed the llap that is after us in the list.
           * we advance to a position beyond it, returning null if there
           * isn't another llap in the list beyond that new position. */
-        spin_lock(&sbi->ll_pglist_lock);
+        spin_lock(&sbi->ll_lock);
          llap = llite_pglist_next_llap(sbi, &dummy_llap->llap_proc_item);
          list_del_init(&dummy_llap->llap_proc_item);
          if (llap) {
                  list_add(&dummy_llap->llap_proc_item, &llap->llap_proc_item);
                  llap = llite_pglist_next_llap(sbi, &dummy_llap->llap_proc_item);
          }
-        spin_unlock(&sbi->ll_pglist_lock);
+        spin_unlock(&sbi->ll_lock);
  
          ++*pos;
          if (llap == NULL) {
@@ -578,9 +607,9 @@ static int llite_dump_pgcache_seq_open(struct inode *inode, struct file *file)
          seq = file->private_data;
          seq->private = llap;
  
-        spin_lock(&sbi->ll_pglist_lock);
+        spin_lock(&sbi->ll_lock);
          list_add(&llap->llap_proc_item, &sbi->ll_pglist);
-        spin_unlock(&sbi->ll_pglist_lock);
+        spin_unlock(&sbi->ll_lock);
  
          return 0;
  }
@@ -592,10 +621,10 @@ static int llite_dump_pgcache_seq_release(struct inode *inode,
          struct ll_async_page *llap = seq->private;
          struct ll_sb_info *sbi = llap->llap_cookie;
  
-        spin_lock(&sbi->ll_pglist_lock);
+        spin_lock(&sbi->ll_lock);
          if (!list_empty(&llap->llap_proc_item))
                  list_del_init(&llap->llap_proc_item);
-        spin_unlock(&sbi->ll_pglist_lock);
+        spin_unlock(&sbi->ll_lock);
          OBD_FREE(llap, sizeof(*llap));
  
          return seq_release(inode, file);
diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c

index 2e71def..18551bc 100644 (file)
--- a/lustre/llite/rw.c
+++ b/lustre/llite/rw.c
@@ -228,8 +228,7 @@ static int ll_ap_make_ready(void *data, int cmd)
  
          page = llap->llap_page;
  
-        if (cmd == OBD_BRW_READ)
-                RETURN(0);
+        LASSERT(cmd != OBD_BRW_READ);
  
          /* we're trying to write, but the page is locked.. come back later */
          if (TryLockPage(page))
@@ -375,10 +374,10 @@ struct ll_async_page *llap_from_page(struct page *page)
          page->private = (unsigned long)llap;
          llap->llap_page = page;
  
-        spin_lock(&sbi->ll_pglist_lock);
+        spin_lock(&sbi->ll_lock);
          sbi->ll_pglist_gen++;
          list_add_tail(&llap->llap_proc_item, &sbi->ll_pglist);
-        spin_unlock(&sbi->ll_pglist_lock);
+        spin_unlock(&sbi->ll_lock);
  
          RETURN(llap);
  }
@@ -475,6 +474,29 @@ out:
          RETURN(rc);
  }
  
+static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len)
+{
+        unsigned long ret;
+        ENTRY;
+
+        spin_lock(&sbi->ll_lock);
+        ret = min(sbi->ll_max_read_ahead_pages - sbi->ll_read_ahead_pages,
+                  len);
+        sbi->ll_read_ahead_pages += ret;
+        spin_unlock(&sbi->ll_lock);
+
+        RETURN(ret);
+}
+
+static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
+{
+        spin_lock(&sbi->ll_lock);
+        LASSERTF(sbi->ll_read_ahead_pages >= len, "r_a_p %lu len %lu\n",
+                 sbi->ll_read_ahead_pages, len);
+        sbi->ll_read_ahead_pages -= len;
+        spin_unlock(&sbi->ll_lock);
+}
+
  /* called for each page in a completed rpc.*/
  void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
  {
@@ -493,6 +515,9 @@ void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
  
          LL_CDEBUG_PAGE(D_PAGE, page, "completing cmd %d with %d\n", cmd, rc);
  
+        if (cmd == OBD_BRW_READ && llap->llap_defer_uptodate)
+                ll_ra_count_put(ll_i2sbi(page->mapping->host), 1);
+
          if (rc == 0)  {
                  if (cmd == OBD_BRW_READ) {
                          if (!llap->llap_defer_uptodate)
@@ -507,7 +532,6 @@ void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
                  SetPageError(page);
          }
  
-
          unlock_page(page);
  
          if (0 && cmd == OBD_BRW_WRITE) {
@@ -568,11 +592,11 @@ void ll_removepage(struct page *page)
           * is providing exclusivity to memory pressure/truncate/writeback..*/
          page->private = 0;
  
-        spin_lock(&sbi->ll_pglist_lock);
+        spin_lock(&sbi->ll_lock);
          if (!list_empty(&llap->llap_proc_item))
                  list_del_init(&llap->llap_proc_item);
          sbi->ll_pglist_gen++;
-        spin_unlock(&sbi->ll_pglist_lock);
+        spin_unlock(&sbi->ll_lock);
          OBD_FREE(llap, sizeof(*llap));
          EXIT;
  }
@@ -610,7 +634,8 @@ static int ll_issue_page_read(struct obd_export *exp,
          llap->llap_defer_uptodate = defer;
          rc = obd_queue_group_io(exp, ll_i2info(page->mapping->host)->lli_smd,
                                  NULL, oig, llap->llap_cookie, OBD_BRW_READ, 0,
-                                PAGE_SIZE, 0, ASYNC_COUNT_STABLE);
+                                PAGE_SIZE, 0, ASYNC_COUNT_STABLE | ASYNC_READY
+                                              | ASYNC_URGENT);
          if (rc) {
                  LL_CDEBUG_PAGE(D_ERROR, page, "read queue failed: rc %d\n", rc);
                  page_cache_release(page);
@@ -618,171 +643,155 @@ static int ll_issue_page_read(struct obd_export *exp,
          RETURN(rc);
  }
  
-#define LL_RA_MIN(inode) ((unsigned long)PTLRPC_MAX_BRW_PAGES / 2)
-#define LL_RA_MAX(inode) ((ll_i2info(inode)->lli_smd->lsm_xfersize * 3) >> \
-                          PAGE_CACHE_SHIFT)
+#define RAS_CDEBUG(ras) \
+        CDEBUG(D_READA, "lrp %lu c %lu ws %lu wl %lu nra %lu\n",        \
+               ras->ras_last_readpage, ras->ras_consecutive,            \
+               ras->ras_window_start, ras->ras_window_len,              \
+               ras->ras_next_readahead);
  
-static void ll_readahead(struct ll_readahead_state *ras,
+static int ll_readahead(struct ll_readahead_state *ras,
                           struct obd_export *exp, struct address_space *mapping,
                           struct obd_io_group *oig, int flags)
  {
-        unsigned long i, start, end;
+        unsigned long i, start = 0, end = 0, reserved;
          struct ll_async_page *llap;
          struct page *page;
-        int rc;
+        int rc, ret = 0;
+        __u64 kms;
+        ENTRY;
  
-        if (mapping->host->i_size == 0)
-                return;
+        kms = lov_merge_size(ll_i2info(mapping->host)->lli_smd, 1);
+        if (kms == 0)
+                RETURN(0);
  
          spin_lock(&ras->ras_lock);
  
-        /* make sure to issue a window's worth of read-ahead pages */
-        end = ras->ras_last;
-        start = end - ras->ras_window;
-        if (start > end)
-                start = 0;
-
-        /* but don't iterate over pages that we've already issued.  this
-         * will set start to end + 1 if we've already read-ahead up to
-         * ras_last sothe for() won't be entered */
-        if (ras->ras_next_index > start)
-                start = ras->ras_next_index;
-        if (end != ~0UL)
-                ras->ras_next_index = end + 1;
+        if (ras->ras_window_len) {
+                start = ras->ras_next_readahead;
+                end = ras->ras_window_start + ras->ras_window_len - 1;
+                end = min(end, (unsigned long)(kms >> PAGE_CACHE_SHIFT));
+                ras->ras_next_readahead = max(end, end + 1);
  
-        CDEBUG(D_READA, "ni %lu last %lu win %lu: reading from %lu to %lu\n",
-               ras->ras_next_index, ras->ras_last, ras->ras_window,
-               start, end);
+                RAS_CDEBUG(ras);
+        }
  
          spin_unlock(&ras->ras_lock);
  
-        /* clamp to filesize */
-        i = (mapping->host->i_size - 1) >> PAGE_CACHE_SHIFT;
-        end = min(end, i);
+        if (end == 0)
+                RETURN(0);
+
+        reserved = ll_ra_count_get(ll_i2sbi(mapping->host), end - start + 1);
  
-        for (i = start; i <= end; i++) {
-                /* grab_cache_page_nowait returns null if this races with
-                 * truncating the page (page->mapping == NULL) */
+        for (i = start; reserved > 0 && i <= end; i++) {
+                /* skip locked pages from previous readpage calls */
                  page = grab_cache_page_nowait(mapping, i);
-                if (page == NULL)
-                       break;
+                if (page == NULL) {
+                        CDEBUG(D_READA, "g_c_p_n failed\n");
+                        continue;
+                }
+                
+                /* we do this first so that we can see the page in the /proc
+                 * accounting */
+                llap = llap_from_page(page);
+                if (IS_ERR(llap) || llap->llap_defer_uptodate)
+                        goto next_page;
  
-                /* the book-keeping above promises that we've tried
-                 * all the indices from start to end, so we don't
-                 * stop if anyone returns an error. This may not be good. */
+                /* skip completed pages */
                  if (Page_Uptodate(page))
                          goto next_page;
  
+                /* bail when we hit the end of the lock. */
                  if ((rc = ll_page_matches(page, flags)) <= 0) {
                          LL_CDEBUG_PAGE(D_READA | D_PAGE, page,
                                         "lock match failed: rc %d\n", rc);
+                        i = end;
                          goto next_page;
                  }
  
-                llap = llap_from_page(page);
-                if (IS_ERR(llap) || llap->llap_defer_uptodate)
-                        goto next_page;
-
                  rc = ll_issue_page_read(exp, llap, oig, 1);
-                if (rc == 0)
-                        LL_CDEBUG_PAGE(D_PAGE, page, "started read-ahead\n");
+                if (rc == 0) {
+                        reserved--;
+                        ret++;
+                        LL_CDEBUG_PAGE(D_READA| D_PAGE, page, 
+                                       "started read-ahead\n");
+                }
                  if (rc) {
          next_page:
-                        LL_CDEBUG_PAGE(D_PAGE, page, "skipping read-ahead\n");
+                        LL_CDEBUG_PAGE(D_READA | D_PAGE, page, 
+                                       "skipping read-ahead\n");
  
                          unlock_page(page);
                  }
                  page_cache_release(page);
          }
+
+        LASSERTF(reserved >= 0, "reserved %lu\n", reserved);
+        if (reserved != 0)
+                ll_ra_count_put(ll_i2sbi(mapping->host), reserved);
+        RETURN(ret);
+}
+
+static void ras_set_start(struct ll_readahead_state *ras,
+                               unsigned long index)
+{
+        ras->ras_window_start = index & (~(PTLRPC_MAX_BRW_PAGES - 1));
+        ras->ras_next_readahead = max(ras->ras_window_start,
+                                      ras->ras_next_readahead);
  }
  
  /* called with the ras_lock held or from places where it doesn't matter */
-static void ll_readahead_set(struct inode *inode,
-                             struct ll_readahead_state *ras,
-                             unsigned long index)
+static void ras_reset(struct ll_readahead_state *ras,
+                      unsigned long index)
  {
-        ras->ras_next_index = index;
-        if (ras->ras_next_index != ~0UL)
-                ras->ras_next_index++;
-        ras->ras_window = LL_RA_MIN(inode);
-        ras->ras_last = ras->ras_next_index + ras->ras_window;
-        if (ras->ras_last < ras->ras_next_index)
-                ras->ras_last = ~0UL;
-        CDEBUG(D_READA, "ni %lu last %lu win %lu: set %lu\n",
-               ras->ras_next_index, ras->ras_last, ras->ras_window,
-               index);
+        ras->ras_last_readpage = index;
+        ras->ras_consecutive = 1;
+        ras->ras_window_len = 0;
+        ras_set_start(ras, index);
+        ras->ras_next_readahead = ras->ras_window_start;
+
+        RAS_CDEBUG(ras);
  }
  
  void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
  {
          spin_lock_init(&ras->ras_lock);
-        ll_readahead_set(inode, ras, 0);
+        ras_reset(ras, 0);
  }
  
-static void ll_readahead_update(struct inode *inode,
-                                struct ll_readahead_state *ras,
-                                unsigned long index, int hit)
+static void ras_update(struct ll_readahead_state *ras,
+                       unsigned long index, unsigned long max)
  {
-        unsigned long issued_start, new_last;
+        ENTRY;
  
          spin_lock(&ras->ras_lock);
  
-        /* we're interested in noticing the index's relation to the
-         * previously issued read-ahead pages */
-        issued_start = ras->ras_next_index - ras->ras_window - 1;
-        if (issued_start > ras->ras_next_index)
-                issued_start = 0;
-
-        CDEBUG(D_READA, "ni %lu last %lu win %lu: %s ind %lu start %lu\n",
-               ras->ras_next_index, ras->ras_last, ras->ras_window,
-               hit ? "hit" : "miss", index, issued_start);
-        if (!hit &&
-            index == ras->ras_next_index && index == ras->ras_last + 1) {
-                /* special case the kernel's read-ahead running into the
-                 * page just beyond our read-ahead window as an extension
-                 * of our read-ahead.  sigh.  wishing it was easier to
-                 * turn off 2.4's read-ahead. */
-                ras->ras_window = min(LL_RA_MAX(inode), ras->ras_window + 1);
-                if (index != ~0UL)
-                        ras->ras_next_index = index + 1;
-                ras->ras_last = index;
-        } else if (!hit &&
-                   (index > issued_start || ras->ras_next_index >= index)) {
-                /* deal with a miss way out of the window.  we interpret
-                 * this as a seek and restart the window */
-                ll_readahead_set(inode, ras, index);
-
-        } else if (!hit &&
-                   issued_start <= index && index < ras->ras_next_index) {
-                /* a miss inside the window?  surely its memory pressure
-                 * evicting our read pages before the app can see them.
-                 * we shrink the window aggressively */
-                unsigned long old_window = ras->ras_window;
-
-                ras->ras_window = max(ras->ras_window / 2, LL_RA_MIN(inode));
-                ras->ras_last -= old_window - ras->ras_window;
-                if (ras->ras_next_index > ras->ras_last)
-                        ras->ras_next_index = ras->ras_last + 1;
-                CDEBUG(D_READA, "ni %lu last %lu win %lu: miss inside\n",
-                       ras->ras_next_index, ras->ras_last, ras->ras_window);
-
-        } else if (hit &&
-                   issued_start <= index && index < ras->ras_next_index) {
-                /* a hit inside the window.  grow the window by twice the
-                 * number of pages that are satisified within the window.  */
-                ras->ras_window = min(LL_RA_MAX(inode), ras->ras_window + 2);
-
-                /* we want the next readahead pass to issue a windows worth
-                 * beyond where the app currently is */
-                new_last = index + ras->ras_window;
-                if (new_last > ras->ras_last)
-                        ras->ras_last = new_last;
-
-                CDEBUG(D_READA, "ni %lu last %lu win %lu: extended window/last\n",
-                       ras->ras_next_index, ras->ras_last, ras->ras_window);
+        if (index != ras->ras_last_readpage + 1) {
+                ras_reset(ras, index);
+                GOTO(out_unlock, 0);
          }
  
+        ras->ras_last_readpage = index;
+        ras->ras_consecutive++;
+        ras_set_start(ras, index);
+
+        if (ras->ras_consecutive == 2) {
+                ras->ras_window_len = PTLRPC_MAX_BRW_PAGES;
+                GOTO(out_unlock, 0);
+        }
+
+        /* we need to increase the window sometimes.  we'll arbitrarily
+         * do it half-way through the pages in an rpc */
+        if ((index & (PTLRPC_MAX_BRW_PAGES - 1)) == 
+            (PTLRPC_MAX_BRW_PAGES >> 1)) {
+                ras->ras_window_len += PTLRPC_MAX_BRW_PAGES;
+                ras->ras_window_len = min(ras->ras_window_len, max);
+        }
+
+        EXIT;
+out_unlock:
+        RAS_CDEBUG(ras);
          spin_unlock(&ras->ras_lock);
+        return;
  }
  
  /*
@@ -823,19 +832,22 @@ int ll_readpage(struct file *filp, struct page *page)
          if (IS_ERR(llap))
                  GOTO(out, rc = PTR_ERR(llap));
  
+        if (ll_i2sbi(inode)->ll_flags & LL_SBI_READAHEAD)
+                ras_update(&fd->fd_ras, page->index, 
+                           ll_i2sbi(inode)->ll_max_read_ahead_pages);
+
          if (llap->llap_defer_uptodate) {
-                ll_readahead_update(inode, &fd->fd_ras, page->index, 1);
-                ll_readahead(&fd->fd_ras, exp, page->mapping, oig,fd->fd_flags);
-                obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd, NULL,
-                                     oig);
+                rc = ll_readahead(&fd->fd_ras, exp, page->mapping, oig,
+                                  fd->fd_flags);
+                if (rc > 0)
+                        obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd, 
+                                             NULL, oig);
                  LL_CDEBUG_PAGE(D_PAGE, page, "marking uptodate from defer\n");
                  SetPageUptodate(page);
                  unlock_page(page);
                  GOTO(out_oig, rc = 0);
          }
  
-        ll_readahead_update(inode, &fd->fd_ras, page->index, 0);
-
          rc = ll_page_matches(page, fd->fd_flags);
          if (rc < 0) {
                  LL_CDEBUG_PAGE(D_ERROR, page, "lock match failed: rc %d\n", rc);
@@ -861,8 +873,9 @@ int ll_readpage(struct file *filp, struct page *page)
                  GOTO(out, rc);
  
          LL_CDEBUG_PAGE(D_PAGE, page, "queued readpage\n");
-        if ((ll_i2sbi(inode)->ll_flags & LL_SBI_READAHEAD))
-                ll_readahead(&fd->fd_ras, exp, page->mapping, oig,fd->fd_flags);
+        if (ll_i2sbi(inode)->ll_flags & LL_SBI_READAHEAD)
+                ll_readahead(&fd->fd_ras, exp, page->mapping, oig,
+                             fd->fd_flags);
  
          rc = obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig);
  
@@ -874,38 +887,3 @@ out_oig:
                  oig_release(oig);
          RETURN(rc);
  }
-
-#if 0
-/* this is for read pages.  we issue them as ready but not urgent.  when
- * someone waits on them we fire them off, hopefully merged with adjacent
- * reads that were queued by read-ahead.  */
-int ll_sync_page(struct page *page)
-{
-        struct obd_export *exp;
-        struct ll_async_page *llap;
-        int rc;
-        ENTRY;
-
-        /* we're using a low bit flag to signify that a queued read should
-         * be issued once someone goes to lock it.  it is also cleared
-         * as the page is built into an RPC */
-        if (!test_and_clear_bit(LL_PRIVBITS_READ, &page->private))
-                RETURN(0);
-
-        /* careful to only deref page->mapping after checking the bit */
-        exp = ll_i2obdexp(page->mapping->host);
-        if (exp == NULL)
-                RETURN(-EINVAL);
-
-        llap = llap_from_page(page);
-        if (IS_ERR(llap))
-                RETURN(PTR_ERR(llap));
-
-        LL_CDEBUG_PAGE(D_PAGE, page, "setting ready|urgent\n");
-
-        rc = obd_set_async_flags(exp, ll_i2info(page->mapping->host)->lli_smd,
-                                 NULL, llap->llap_cookie,
-                                 ASYNC_READY|ASYNC_URGENT);
-        return rc;
-}
-#endif
diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c

index 8f7b3b6..a2bbab9 100644 (file)
--- a/lustre/mdc/mdc_locks.c
+++ b/lustre/mdc/mdc_locks.c
@@ -346,6 +346,9 @@ int mdc_enqueue(struct obd_export *exp,
                  spin_unlock(&req->rq_lock);
          }
  
+        DEBUG_REQ(D_RPCTRACE, req, "disposition: %x, status: %d",
+                  it->d.lustre.it_disposition, it->d.lustre.it_status);
+
          /* We know what to expect, so we do any byte flipping required here */
          LASSERT(reply_buffers == 4 || reply_buffers == 3 || reply_buffers == 1);
          if (reply_buffers >= 3) {
diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c

index afa8ec0..2cb67a2 100644 (file)
--- a/lustre/mdc/mdc_request.c
+++ b/lustre/mdc/mdc_request.c
@@ -366,7 +366,7 @@ void mdc_clear_open_replay_data(struct obd_client_handle *och)
           * we're sure we won't need to fix up the close request in the future),
           * but make sure that replay doesn't poke at the och, which is about to
           * be freed. */
-        LASSERT(mod != (void *)0x5a5a5a5a);
+        LASSERT(mod != LP_POISON);
          if (mod != NULL)
                  mod->mod_och = NULL;
          och->och_mod = NULL;
@@ -388,7 +388,8 @@ static void mdc_commit_close(struct ptlrpc_request *req)
  
          open_req = mod->mod_open_req;
          LASSERT(open_req != NULL);
-        LASSERT(open_req != (void *)0x5a5a5a5a);
+        LASSERT(open_req != LP_POISON);
+        LASSERT(open_req->rq_type != LI_POISON);
  
          DEBUG_REQ(D_HA, open_req, "open req balanced");
          LASSERT(open_req->rq_transno != 0);
@@ -465,6 +466,7 @@ int mdc_close(struct obd_export *exp, struct obdo *obdo,
          mod = och->och_mod;
          if (likely(mod != NULL)) {
                  mod->mod_close_req = req;
+                LASSERT(mod->mod_open_req->rq_type != LI_POISON);
                  DEBUG_REQ(D_HA, mod->mod_open_req, "matched open req %p",
                            mod->mod_open_req);
          } else {
@@ -499,7 +501,7 @@ int mdc_close(struct obd_export *exp, struct obdo *obdo,
          if (req->rq_repmsg == NULL) {
                  CDEBUG(D_HA, "request failed to send: %p, %d\n", req,
                         req->rq_status);
-                rc = req->rq_status;
+                rc = req->rq_status ? req->rq_status : -EIO;
          } else if (rc == 0) {
                  rc = req->rq_repmsg->status;
                  if (req->rq_repmsg->type == PTL_RPC_MSG_ERR) {
diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c

index d62f50e..8954cb5 100644 (file)
--- a/lustre/mds/mds_open.c
+++ b/lustre/mds/mds_open.c
@@ -981,6 +981,7 @@ int mds_open(struct mds_update_record *rec, int offset,
                          GOTO(cleanup, rc = -EISDIR);
                  }
                  if (ll_permission(dchild->d_inode, acc_mode, NULL)) {
+                        intent_set_disposition(rep, DISP_OPEN_OPEN);
                          GOTO(cleanup, rc = -EACCES);
                  }
          }
diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c

index 3978cb8..43be2ae 100644 (file)
--- a/lustre/obdclass/class_obd.c
+++ b/lustre/obdclass/class_obd.c
@@ -647,7 +647,7 @@ static void cleanup_obdclass(void)
   * kernel patch */
  #include <linux/lustre_version.h>
  #define LUSTRE_MIN_VERSION 28
-#define LUSTRE_MAX_VERSION 34
+#define LUSTRE_MAX_VERSION 35
  #if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION)
  # error Cannot continue: Your Lustre kernel patch is older than the sources
  #elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION)
diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c

index aee57a7..8df70b3 100644 (file)
--- a/lustre/obdclass/genops.c
+++ b/lustre/obdclass/genops.c
@@ -537,7 +537,7 @@ struct obd_import *class_new_import(void)
  void class_destroy_import(struct obd_import *import)
  {
          LASSERT(import != NULL);
-        LASSERT((unsigned long)import != 0x5a5a5a5a);
+        LASSERT(import != LP_POISON);
  
          class_handle_unhash(&import->imp_handle);
  
diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c

index e6471c8..67935cb 100644 (file)
--- a/lustre/obdecho/echo_client.c
+++ b/lustre/obdecho/echo_client.c
@@ -31,7 +31,6 @@
  #include <asm/div64.h>
  #else
  #include <liblustre.h>
-#include <sys/capability.h>
  #endif
  
  #include <linux/obd.h>
diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c

index 4809e22..2ab2652 100644 (file)
--- a/lustre/obdfilter/filter.c
+++ b/lustre/obdfilter/filter.c
@@ -320,8 +320,8 @@ int filter_update_last_objid(struct obd_device *obd, obd_gr group,
          int rc;
          ENTRY;
  
-        CDEBUG(D_INODE, "server last_objid for group "LPU64": "LPU64"\n",
-               group, filter->fo_last_objids[group]);
+        CDEBUG(D_INODE, "%s: server last_objid for group "LPU64": "LPU64"\n",
+               obd->obd_name, group, filter->fo_last_objids[group]);
  
          tmp = cpu_to_le64(filter->fo_last_objids[group]);
          rc = fsfilt_write_record(obd, filter->fo_last_objid_files[group],
@@ -696,7 +696,7 @@ static int filter_prep_groups(struct obd_device *obd)
                  }
                  filter->fo_last_objids[i] =
                          le64_to_cpu(filter->fo_last_objids[i]);
-                CDEBUG(D_INODE, "%s: server last_objid group %d: "LPU64"\n",
+                CDEBUG(D_HA, "%s: server last_objid group %d: "LPU64"\n",
                         obd->obd_name, i, filter->fo_last_objids[i]);
          }
  
@@ -1755,12 +1755,16 @@ static void filter_destroy_precreated(struct obd_export *exp, struct obdo *oa,
          doa.o_mode = S_IFREG;
  
          last = filter_last_id(filter, &doa);
-        CWARN("deleting orphan objects from "LPU64" to "LPU64"\n",
-               oa->o_id + 1, last);
+        CWARN("%s: deleting orphan objects from "LPU64" to "LPU64"\n",
+               exp->exp_obd->obd_name, oa->o_id + 1, last);
          for (id = oa->o_id + 1; id <= last; id++) {
                  doa.o_id = id;
                  filter_destroy(exp, &doa, NULL, NULL);
          }
+
+        CDEBUG(D_HA, "%s: after destroy: set last_objids["LPU64"] = "LPU64"\n",
+               exp->exp_obd->obd_name, doa.o_gr, oa->o_id);
+
          spin_lock(&filter->fo_objidlock);
          filter->fo_last_objids[doa.o_gr] = oa->o_id;
          spin_unlock(&filter->fo_objidlock);
@@ -1836,6 +1840,8 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                  recreate_obj = 1;
          }
  
+        CDEBUG(D_HA, "%s: precreating %d objects\n", obd->obd_name, *num); 
+
          for (i = 0; i < *num && err == 0; i++) {
                  int cleanup_phase = 0;
  
@@ -1870,13 +1876,17 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                           * already exists
                           */
                          if (recreate_obj) {
-                                CERROR("Serious error: recreating obj %*s but "
-                                       "obj already exists \n",
-                                       dchild->d_name.len, dchild->d_name.name);
+                                CERROR("%s: Serious error: recreating obj %*s "
+                                       "but obj already exists \n",
+                                       obd->obd_name, dchild->d_name.len, 
+                                       dchild->d_name.name);
+                                LBUG();
                          } else {
-                                CERROR("Serious error: objid %*s already "
+                                CERROR("%s: Serious error: objid %*s already "
                                         "exists; is this filesystem corrupt?\n",
-                                       dchild->d_name.len, dchild->d_name.name);
+                                       obd->obd_name, dchild->d_name.len, 
+                                       dchild->d_name.name);
+                                LBUG();
                          }
                          GOTO(cleanup, rc = -EEXIST);
                  }
@@ -1923,7 +1933,11 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
          }
          *num = i;
  
-        CDEBUG(D_INFO, "filter_precreate() created %d objects\n", i);
+        CDEBUG(D_HA, "%s: server last_objid for group "LPU64": "LPU64"\n",
+               obd->obd_name, group, filter->fo_last_objids[group]);
+
+        CDEBUG(D_HA, "%s: filter_precreate() created %d objects\n", 
+               obd->obd_name, i);
          RETURN(rc);
  }
  
diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c

index b9b7ab3..ccbbc74 100644 (file)
--- a/lustre/obdfilter/filter_io.c
+++ b/lustre/obdfilter/filter_io.c
@@ -628,7 +628,7 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa,
               i++, lnb++, rnb++) {
                  /* We still set up for ungranted pages so that granted pages
                   * can be written to disk as they were promised, and portals
-                 * needs to keep the pages all aligned properly. */ 
+                 * needs to keep the pages all aligned properly. */
                  lnb->dentry = dentry;
                  lnb->offset = rnb->offset;
                  lnb->len    = rnb->len;
diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c

index 88b4d2a..078d0d1 100644 (file)
--- a/lustre/osc/lproc_osc.c
+++ b/lustre/osc/lproc_osc.c
@@ -108,14 +108,13 @@ int osc_rd_max_dirty_mb(char *page, char **start, off_t off, int count,
  {
          struct obd_device *dev = data;
          struct client_obd *cli = &dev->u.cli;
-        int val;
-        int rc;
+        unsigned val;
  
          spin_lock(&cli->cl_loi_list_lock);
          val = cli->cl_dirty_max >> 20;
-        rc = snprintf(page, count, "%d\n", val);
          spin_unlock(&cli->cl_loi_list_lock);
-        return rc;
+
+        return snprintf(page, count, "%u\n", val);
  }
  
  int osc_wr_max_dirty_mb(struct file *file, const char *buffer,
diff --git a/lustre/osc/osc_create.c b/lustre/osc/osc_create.c

index d4db2c7..50b4d12 100644 (file)
--- a/lustre/osc/osc_create.c
+++ b/lustre/osc/osc_create.c
@@ -87,7 +87,7 @@ static int osc_interpret_create(struct ptlrpc_request *req, void *data,
          oscc->oscc_flags &= ~OSCC_FLAG_CREATING;
          spin_unlock(&oscc->oscc_lock);
  
-        CDEBUG(D_INFO, "preallocated through id "LPU64" (last used "LPU64")\n",
+        CDEBUG(D_HA, "preallocated through id "LPU64" (last used "LPU64")\n",
                 oscc->oscc_last_id, oscc->oscc_next_id);
  
          wake_up(&oscc->oscc_waitq);
@@ -102,7 +102,8 @@ static int oscc_internal_create(struct osc_creator *oscc)
          ENTRY;
  
          spin_lock(&oscc->oscc_lock);
-        if (oscc->oscc_flags & OSCC_FLAG_CREATING) {
+        if (oscc->oscc_flags & OSCC_FLAG_CREATING ||
+            oscc->oscc_flags & OSCC_FLAG_RECOVERING) {
                  spin_unlock(&oscc->oscc_lock);
                  RETURN(0);
          }
@@ -124,7 +125,7 @@ static int oscc_internal_create(struct osc_creator *oscc)
          spin_lock(&oscc->oscc_lock);
          body->oa.o_id = oscc->oscc_last_id + oscc->oscc_grow_count;
          body->oa.o_valid |= OBD_MD_FLID;
-        CDEBUG(D_INFO, "preallocating through id "LPU64" (last used "LPU64")\n",
+        CDEBUG(D_HA, "preallocating through id "LPU64" (last used "LPU64")\n",
                 body->oa.o_id, oscc->oscc_next_id);
          spin_unlock(&oscc->oscc_lock);
  
@@ -235,6 +236,10 @@ int osc_create(struct obd_export *exp, struct obdo *oa,
                  oa->o_valid |= OBD_MD_FLID;
                  oa->o_id = oscc->oscc_next_id - 1;
  
+                CDEBUG(D_HA, "%s: deleting to next_id: "LPU64"\n", 
+                       oscc->oscc_obd->u.cli.cl_import->imp_target_uuid.uuid, 
+                       oa->o_id);
+
                  rc = osc_real_create(exp, oa, ea, NULL);
  
                  spin_lock(&oscc->oscc_lock);
@@ -250,26 +255,28 @@ int osc_create(struct obd_export *exp, struct obdo *oa,
                 RETURN(rc);
         }
  
-        /* If orphans are being recovered, then we must wait until it is 
-           finished before we can continue with create. */
-        if (oscc_recovering(oscc)) {
-                struct l_wait_info lwi;
-
-                CDEBUG(D_HA, "%p: oscc recovery in progress, waiting\n", oscc);
-
-                lwi = LWI_TIMEOUT(MAX(obd_timeout * HZ, 1), NULL, NULL);
-                rc = l_wait_event(oscc->oscc_waitq, !oscc_recovering(oscc),
-                                  &lwi);
-                LASSERT(rc == 0 || rc == -ETIMEDOUT);
-                if (rc == -ETIMEDOUT) {
-                        CDEBUG(D_HA, "%p: timed out waiting for recovery\n", oscc);
-                        RETURN(rc);
-                }
-                CDEBUG(D_HA, "%p: oscc recovery over, waking up\n", oscc);
-        }
-        
-        
          while (try_again) {
+                /* If orphans are being recovered, then we must wait until 
+                   it is finished before we can continue with create. */
+                if (oscc_recovering(oscc)) {
+                        struct l_wait_info lwi;
+                        
+                        CDEBUG(D_HA,"%p: oscc recovery in progress, waiting\n", 
+                               oscc);
+                        
+                        lwi = LWI_TIMEOUT(MAX(obd_timeout * HZ, 1), NULL, NULL);
+                        rc = l_wait_event(oscc->oscc_waitq, 
+                                          !oscc_recovering(oscc), &lwi);
+                        LASSERT(rc == 0 || rc == -ETIMEDOUT);
+                        if (rc == -ETIMEDOUT) {
+                                CDEBUG(D_HA, "%p: timed out waiting for "
+                                       "recovery\n", oscc);
+                                RETURN(rc);
+                        }
+                        CDEBUG(D_HA, "%p: oscc recovery over, waking up\n", 
+                               oscc);
+                }
+                
                  spin_lock(&oscc->oscc_lock);
                  if (oscc->oscc_last_id >= oscc->oscc_next_id) {
                          memcpy(oa, &oscc->oscc_oa, sizeof(*oa));
@@ -290,7 +297,9 @@ int osc_create(struct obd_export *exp, struct obdo *oa,
          }
  
          if (rc == 0)
-                CDEBUG(D_INFO, "returning objid "LPU64"\n", lsm->lsm_object_id);
+                CDEBUG(D_HA, "%s: returning objid "LPU64"\n", 
+                       oscc->oscc_obd->u.cli.cl_import->imp_target_uuid.uuid, 
+                       lsm->lsm_object_id);
          else if (*ea == NULL)
                  obd_free_memmd(exp, &lsm);
          RETURN(rc);
diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c

index 85f33d5..458529d 100644 (file)
--- a/lustre/osc/osc_request.c
+++ b/lustre/osc/osc_request.c
@@ -2739,7 +2739,7 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen,
                  if (vallen != sizeof(obd_id))
                          RETURN(-EINVAL);
                 obd->u.cli.cl_oscc.oscc_next_id = *((obd_id*)val) + 1;
-                CDEBUG(D_INODE, "%s: set oscc_next_id = "LPU64"\n",
+                CDEBUG(D_HA, "%s: set oscc_next_id = "LPU64"\n",
                         exp->exp_obd->obd_name,
                         obd->u.cli.cl_oscc.oscc_next_id);
  
diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h

index 181594f..8a56b55 100644 (file)
--- a/lustre/portals/include/linux/kp30.h
+++ b/lustre/portals/include/linux/kp30.h
@@ -689,27 +689,30 @@ typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data);
  # endif
  #endif
  
+#ifndef LP_POISON
+# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
+# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a)
+# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
+#endif
+
  #if defined(__x86_64__)
  # define LPU64 "%Lu"
  # define LPD64 "%Ld"
  # define LPX64 "%#Lx"
  # define LPSZ  "%lu"
  # define LPSSZ "%ld"
-# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
  #elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
  # define LPU64 "%Lu"
  # define LPD64 "%Ld"
  # define LPX64 "%#Lx"
  # define LPSZ  "%u"
  # define LPSSZ "%d"
-# define LP_POISON ((void *)0x5a5a5a5a)
  #elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
  # define LPU64 "%lu"
  # define LPD64 "%ld"
  # define LPX64 "%#lx"
  # define LPSZ  "%lu"
  # define LPSSZ "%ld"
-# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
  #endif
  #ifndef LPU64
  # error "No word size defined"
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c

index d17540c..e78176b 100644 (file)
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -168,7 +168,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
          ENTRY;
  
          LASSERT(desc != NULL);
-        LASSERT(desc->bd_iov_count != 0x5a5a5a5a); /* not freed already */
+        LASSERT(desc->bd_iov_count != LI_POISON); /* not freed already */
          LASSERT(!desc->bd_network_rw);         /* network hands off or */
          LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
          if (desc->bd_export)
diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c

index c6d5c84..bc137f3 100644 (file)
--- a/lustre/ptlrpc/pinger.c
+++ b/lustre/ptlrpc/pinger.c
@@ -81,7 +81,8 @@ static int ptlrpc_pinger_main(void *arg)
          SIGNAL_MASK_UNLOCK(current, flags);
  
          LASSERTF(strlen(data->name) < sizeof(current->comm),
-                 "name %d > len %d\n",strlen(data->name),sizeof(current->comm));
+                 "name %d > len %d\n",
+                 (int)strlen(data->name), (int)sizeof(current->comm));
          THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", data->name);
          unlock_kernel();
  
diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c

index 8db67c7..14c9d60 100644 (file)
--- a/lustre/ptlrpc/recover.c
+++ b/lustre/ptlrpc/recover.c
@@ -275,8 +275,7 @@ void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
          /* Wait for recovery to complete and resend. If evicted, then
             this request will be errored out later.*/
          spin_lock_irqsave(&failed_req->rq_lock, flags);
-        if (!failed_req->rq_no_resend)
-                failed_req->rq_resend = 1;
+        failed_req->rq_resend = 1;
          spin_unlock_irqrestore(&failed_req->rq_lock, flags);
          
          EXIT;
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c

index bd0bb45..8a7474f 100644 (file)
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -668,7 +668,8 @@ static int ptlrpc_main(void *arg)
          SIGNAL_MASK_UNLOCK(current, flags);
  
          LASSERTF(strlen(data->name) < sizeof(current->comm),
-                 "name %d > len %d\n",strlen(data->name),sizeof(current->comm));
+                 "name %d > len %d\n",
+                 (int)strlen(data->name), (int)sizeof(current->comm));
          THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", data->name);
          unlock_kernel();
  
diff --git a/lustre/scripts/version_tag.pl.in b/lustre/scripts/version_tag.pl.in

index 1212441..0e31be6 100644 (file)
--- a/lustre/scripts/version_tag.pl.in
+++ b/lustre/scripts/version_tag.pl.in
@@ -18,7 +18,7 @@ sub get_tag()
      my $tagfile = new IO::File;
      if (!$tagfile->open("CVS/Tag")) {
          my $verfile = new IO::File;
-        if (!$verfile->open("portals/include/config.h")) {
+        if (!$verfile->open("include/config.h")) {
            return "UNKNOWN";
          }
          while(defined($line = <$verfile>)) {
diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh

index f10da1d..1387cf3 100755 (executable)
--- a/lustre/tests/acceptance-small.sh
+++ b/lustre/tests/acceptance-small.sh
@@ -40,7 +40,7 @@ for NAME in $CONFIGS; do
  
         if [ "$DBENCH" != "no" ]; then
                 mount | grep $MOUNT || sh llmount.sh
-               SPACE=`df $MOUNT | tail -1 | awk '{ print $4 }'`
+               SPACE=`df $MOUNT | tail -n 1 | awk '{ print $4 }'`
                 DB_THREADS=`expr $SPACE / 50000`
                 [ $THREADS -lt $DB_THREADS ] && DB_THREADS=$THREADS
  
@@ -82,7 +82,7 @@ for NAME in $CONFIGS; do
         fi
         if [ "$IOZONE_DIR" != "no" ]; then
                 mount | grep $MOUNT || sh llmount.sh
-               SPACE=`df $MOUNT | tail -1 | awk '{ print $4 }'`
+               SPACE=`df $MOUNT | tail -n 1 | awk '{ print $4 }'`
                 IOZ_THREADS=`expr $SPACE / \( $SIZE + $SIZE / 512 \)`
                 [ $THREADS -lt $IOZ_THREADS ] && IOZ_THREADS=$THREADS
  
diff --git a/lustre/tests/cfg/insanity-local.sh b/lustre/tests/cfg/insanity-local.sh

index c6de54c..2ca1485 100644 (file)
--- a/lustre/tests/cfg/insanity-local.sh
+++ b/lustre/tests/cfg/insanity-local.sh
@@ -10,8 +10,8 @@ FAIL_CLIENTS=${FAIL_CLIENTS:-""}
  
  NETTYPE=${NETTYPE:-tcp}
  TIMEOUT=${TIMEOUT:-30}
-PTLDEBUG=${PTLDEBUG:-0}
-SUBSYSTEM=${SUBSYSTEM:-0}
+PTLDEBUG=${PTLDEBUG:-0x3f0400}
+SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff}
  MOUNT=${MOUNT:-"/mnt/lustre"}
  #CLIENT_UPCALL=${CLIENT_UPCALL:-`pwd`/client-upcall-mdev.sh}
  UPCALL=${CLIENT_UPCALL:-`pwd`/replay-single-upcall.sh}
diff --git a/lustre/tests/insanity.sh b/lustre/tests/insanity.sh

index 7ad2c1c..68d0ff9 100755 (executable)
--- a/lustre/tests/insanity.sh
+++ b/lustre/tests/insanity.sh
@@ -137,7 +137,7 @@ setup() {
      wait_for mds
      start mds $MDSLCONFARGS ${REFORMAT}
      while ! do_node $CLIENTS "ls -d $LUSTRE" > /dev/null; do sleep 5; done
-    zconf_mount $CLIENTS $MOUNT
+    grep " $MOUNT " /proc/mounts || zconf_mount $CLIENTS $MOUNT
  
  }
  
@@ -156,7 +156,7 @@ client_touch() {
      file=$1
      for c in $LIVE_CLIENT $FAIL_CLIENTS;  do
         if echo $DOWN_CLIENTS | grep -q $c; then continue; fi
-       $PDSH $c touch $MOUNT/${c}_$file
+       $PDSH $c touch $MOUNT/${c}_$file || return 1
      done
  }
  
diff --git a/lustre/tests/llmountcleanup.sh b/lustre/tests/llmountcleanup.sh

index 15277d6..35eca9b 100755 (executable)
--- a/lustre/tests/llmountcleanup.sh
+++ b/lustre/tests/llmountcleanup.sh
@@ -39,8 +39,8 @@ if [ "$BUSY" ]; then
         mv $TMP/debug $TMP/debug-busy.`date +%s`
         exit 255
  fi
-LEAK_LUSTRE=`dmesg | tail -30 | grep "obd mem.*leaked"`
-LEAK_PORTALS=`dmesg | tail -20 | grep "Portals memory leaked"`
+LEAK_LUSTRE=`dmesg | tail -n 30 | grep "obd mem.*leaked"`
+LEAK_PORTALS=`dmesg | tail -n 20 | grep "Portals memory leaked"`
  if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then
         echo "$LEAK_LUSTRE" 1>&2
         echo "$LEAK_PORTALS" 1>&2
diff --git a/lustre/tests/multiop.c b/lustre/tests/multiop.c

index c8ac394..7ab0208 100755 (executable)
--- a/lustre/tests/multiop.c
+++ b/lustre/tests/multiop.c
@@ -210,14 +210,6 @@ int main(int argc, char **argv)
                                  perror("write");
                                  exit(1);
                          }
-                        /* b=3043 write() on Suse x86-64 is returning -errno 
-                           instead of -1, and not setting errno. */
-                        if (rc < 0) {
-                                fprintf(stderr, "MULTIOP: broken write() "
-                                        "returned %d, errno %d\n",
-                                        rc, errno);
-                                exit(1);
-                        }
                          break;
                  }
                  case 'W':
diff --git a/lustre/tests/oos.sh b/lustre/tests/oos.sh

index 4e6b261..5894c4f 100755 (executable)
--- a/lustre/tests/oos.sh
+++ b/lustre/tests/oos.sh
@@ -22,8 +22,8 @@ sleep 1       # to ensure we get up-to-date statfs info
  #lctl clear
  #lctl debug_daemon start /r/tmp/debug 1024
  
-STRIPECOUNT=`cat /proc/fs/lustre/lov/*/activeobd | head -1`
-ORIGFREE=`cat /proc/fs/lustre/llite/*/kbytesavail | head -1`
+STRIPECOUNT=`cat /proc/fs/lustre/lov/*/activeobd | head -n 1`
+ORIGFREE=`cat /proc/fs/lustre/llite/*/kbytesavail | head -n 1`
  MAXFREE=${MAXFREE:-$((200000 * $STRIPECOUNT))}
  if [ $ORIGFREE -gt $MAXFREE ]; then
         echo "skipping out-of-space test on $OSC"
diff --git a/lustre/tests/oos2.sh b/lustre/tests/oos2.sh

index c9755cb..84c87f1 100644 (file)
--- a/lustre/tests/oos2.sh
+++ b/lustre/tests/oos2.sh
@@ -20,8 +20,8 @@ rm -f $OOS $OOS2 $LOG $LOG2
  
  sleep 1        # to ensure we get up-to-date statfs info
  
-STRIPECOUNT=`cat /proc/fs/lustre/lov/*/activeobd | head -1`
-ORIGFREE=`cat /proc/fs/lustre/llite/*/kbytesavail | head -1`
+STRIPECOUNT=`cat /proc/fs/lustre/lov/*/activeobd | head -n 1`
+ORIGFREE=`cat /proc/fs/lustre/llite/*/kbytesavail | head -n 1`
  MAXFREE=${MAXFREE:-$((200000 * $STRIPECOUNT))}
  if [ $ORIGFREE -gt $MAXFREE ]; then
         echo "skipping out-of-space test on $OSC"
diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh

index d0dd6f0..47d77ef 100755 (executable)
--- a/lustre/tests/recovery-small.sh
+++ b/lustre/tests/recovery-small.sh
@@ -39,7 +39,7 @@ setup() {
      start ost2 --reformat $OSTLCONFARGS 
      [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
      start mds $MDSLCONFARGS --reformat
-    zconf_mount `hostname`  $MOUNT
+    grep " $MOUNT " /proc/mounts || zconf_mount `hostname`  $MOUNT
  }
  
  cleanup() {
@@ -218,7 +218,7 @@ test_16() {
      do_facet client "cmp /etc/termcap $MOUNT/termcap"  && return 1
      sysctl -w lustre.fail_loc=0
      # give recovery a chance to finish (shouldn't take long)
-    sleep 1 
+    sleep $TIMEOUT
      do_facet client "cmp /etc/termcap $MOUNT/termcap"  || return 2
  }
  run_test 16 "timeout bulk put, evict client (2732)"
diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh

index 8e10631..9c1f1e1 100755 (executable)
--- a/lustre/tests/replay-dual.sh
+++ b/lustre/tests/replay-dual.sh
@@ -62,8 +62,8 @@ fi
  start ost2 --reformat $OSTLCONFARGS 
  [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
  start mds $MDSLCONFARGS --reformat
-zconf_mount `hostname` $MOUNT
-zconf_mount `hostname` $MOUNT2
+grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
+grep " $MOUNT2 " /proc/mounts || zconf_mount `hostname` $MOUNT2
  
  echo $TIMEOUT > /proc/sys/lustre/timeout
  echo $UPCALL > /proc/sys/lustre/upcall
diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh

index 4331be5..f1523bb 100755 (executable)
--- a/lustre/tests/replay-ost-single.sh
+++ b/lustre/tests/replay-ost-single.sh
@@ -56,7 +56,7 @@ setup() {
      start ost --reformat $OSTLCONFARGS
      [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
      start mds --reformat $MDSLCONFARGS
-    zconf_mount `hostname` $MOUNT
+    grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
  }
  
  mkdir -p $DIR
diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh

index fbf8d19..76ce388 100755 (executable)
--- a/lustre/tests/replay-single.sh
+++ b/lustre/tests/replay-single.sh
@@ -62,8 +62,7 @@ setup() {
      start ost2 --reformat $OSTLCONFARGS 
      [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
      start mds $MDSLCONFARGS --reformat
-    zconf_mount `hostname` $MOUNT
-    echo 0x3f0410 > /proc/sys/portals/debug
+    grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
  }
  
  $SETUP
@@ -80,6 +79,15 @@ test_0() {
  }
  run_test 0 "empty replay"
  
+test_0b() {
+    # this test attempts to trigger a race in the precreation code, 
+    # and must run before any other objects are created on the filesystem
+    fail ost
+    createmany -o $DIR/$tfile 20 || return 1
+    unlinkmany $DIR/$tfile 20 || return 2
+}
+run_test 0b "ensure object created after recover exists. (3284)"
+
  test_1() {
      replay_barrier mds
      mcreate $DIR/$tfile
@@ -825,14 +833,14 @@ run_test 41 "read from a valid osc while other oscs are invalid"
  
  # test MDS recovery after ost failure
  test_42() {
-    blocks=`df $MOUNT | tail -1 | awk '{ print $1 }'`
+    blocks=`df $MOUNT | tail -n 1 | awk '{ print $1 }'`
      createmany -o $DIR/$tfile-%d 800
      replay_barrier ost
      unlinkmany $DIR/$tfile-%d 0 400
      facet_failover ost
      
      # osc is evicted, fs is smaller
-    blocks_after=`df $MOUNT | tail -1 | awk '{ print $1 }'`
+    blocks_after=`df $MOUNT | tail -n 1 | awk '{ print $1 }'`
      [ $blocks_after -lt $blocks ] || return 1
      echo wait for MDS to timeout and recover
      sleep $((TIMEOUT * 2))
diff --git a/lustre/tests/run-llog.sh b/lustre/tests/run-llog.sh

index 5d46e2b..b7201f2 100644 (file)
--- a/lustre/tests/run-llog.sh
+++ b/lustre/tests/run-llog.sh
@@ -2,7 +2,7 @@
  PATH=`dirname $0`:`dirname $0`/../utils:$PATH
  TMP=${TMP:-/tmp}
  
-MDS=`ls /proc/fs/lustre/mds | grep -v num_refs | head -1`
+MDS=`ls /proc/fs/lustre/mds | grep -v num_refs | head -n 1`
  [ -z "$MDS" ] && echo "no MDS available, skipping llog test" && exit 0
  
  insmod ../obdclass/llog_test.o || exit 1
diff --git a/lustre/tests/runas.c b/lustre/tests/runas.c

index f1df775..91c9c7f 100644 (file)
--- a/lustre/tests/runas.c
+++ b/lustre/tests/runas.c
@@ -6,17 +6,22 @@
  #include <unistd.h>
  #include <string.h>
  #include <errno.h>
+#include <ctype.h>
  #include <sys/types.h>
  #include <grp.h>
  #include <sys/wait.h>
  
  #define DEBUG 0
  
+#ifndef NGROUPS_MAX
+#define NGROUPS_MAX 32
+#endif
+
  static const char usage[] =
  "Usage: %s -u user_id [-g grp_id ] [ -G ] command\n"
-"  -u user_id      switch to UID user_id\n"
-"  -g grp_id       switch to GID grp_id\n"
-"  -G              clear supplementary groups\n";
+"  -u user_id           switch to UID user_id\n"
+"  -g grp_id            switch to GID grp_id\n"
+"  -G[gid0,gid1,...]    set supplementary groups\n";
  
  void Usage_and_abort(const char *name)
  {
@@ -26,20 +31,17 @@ void Usage_and_abort(const char *name)
  
  int main(int argc, char **argv)
  {
-        char **my_argv, *name = argv[0];
-        int status;
-        int c,i;
-        int gid_is_set = 0;
-        int uid_is_set = 0;
-        int clear_supp_groups = 0;
+        char **my_argv, *name = argv[0], *grp;
+        int status, c, i;
+        int gid_is_set = 0, uid_is_set = 0, num_supp = -1;
          uid_t user_id = 0;
-        gid_t grp_id = 0;
+        gid_t grp_id = 0, supp_groups[NGROUPS_MAX] = { 0 };
  
          if (argc == 1)
                  Usage_and_abort(name);
  
          // get UID and GID
-        while ((c = getopt (argc, argv, "+u:g:hG")) != -1) {
+        while ((c = getopt(argc, argv, "+u:g:hG::")) != -1) {
                  switch (c) {
                  case 'u':
                          user_id = (uid_t)atoi(optarg);
@@ -54,7 +56,15 @@ int main(int argc, char **argv)
                          break;
  
                  case 'G':
-                        clear_supp_groups = 1;
+                        num_supp = 0;
+                        if (optarg == NULL || !isdigit(optarg[0]))
+                                break;
+                        while ((grp = strsep(&optarg, ",")) != NULL) {
+                                printf("adding supp group %d\n", atoi(grp));
+                                supp_groups[num_supp++] = atoi(grp);
+                                if (num_supp >= NGROUPS_MAX)
+                                        break;
+                        }
                          break;
  
                  default:
@@ -98,14 +108,14 @@ int main(int argc, char **argv)
                   exit(-1);
          }
  
-        if (clear_supp_groups) {
-                status = setgroups(0, NULL);
+        if (num_supp >= 0) {
+                status = setgroups(num_supp, supp_groups);
                  if (status == -1) {
-                        perror("clearing supplementary groups");
+                        perror("setting supplementary groups");
                          exit(-1);
                  }
          }
-        
+
          // set UID
          status = setreuid(user_id, user_id );
          if(status == -1) {
@@ -114,8 +124,10 @@ int main(int argc, char **argv)
                    exit(-1);
          }
  
-        fprintf(stderr, "running as UID %d, GID %d%s:", user_id, grp_id,
-                clear_supp_groups ? ", cleared groups" : "");
+        fprintf(stderr, "running as UID %d, GID %d", user_id, grp_id);
+        for (i = 0; i < num_supp; i++)
+                fprintf(stderr, ":%d", supp_groups[i]);
+        fprintf(stderr, "\n");
  
          for (i = 0; i < argc - optind; i++)
                   fprintf(stderr, " [%s]", my_argv[i]);
diff --git a/lustre/tests/runregression-brw.sh b/lustre/tests/runregression-brw.sh

index 34a55b0..666b253 100644 (file)
--- a/lustre/tests/runregression-brw.sh
+++ b/lustre/tests/runregression-brw.sh
@@ -9,7 +9,7 @@ COUNT_100=`expr $COUNT / 100`
  
  ENDRUN=endrun-`hostname`
  
-ECHONAME="`lctl device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -1`"
+ECHONAME="`lctl device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -n 1`"
  
  if [ -z "$ECHONAME" ]; then
         echo "$0: needs an ECHO_CLIENT set up first" 1>&2
diff --git a/lustre/tests/runregression-mds.sh b/lustre/tests/runregression-mds.sh

index ecfe0d9..7167d2d 100755 (executable)
--- a/lustre/tests/runregression-mds.sh
+++ b/lustre/tests/runregression-mds.sh
@@ -23,11 +23,11 @@ cleanup() {
  
  [ -z "$*" ] && fail "usage: $0 [--reformat] <conf>.xml" 1
  
-OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
+OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -n 1`"
  if [ -z "$OSCMT" ]; then
         $LCONF $@ || exit 1
          trap cleanup 0
-       OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
+       OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -n 1`"
         [ -z "$OSCMT" ] && fail "no lustre filesystem mounted" 1
  fi
  
@@ -42,7 +42,7 @@ while [ "$1" ]; do
  done
  
  OSCTMP=`echo $OSCMT | tr "/" "."`
-USED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
+USED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -n 1`
  USED=`expr $USED + 16` # Some space for the status file
  
  THREADS=1
@@ -58,7 +58,7 @@ done
  
  rm -f $ENDRUN
  
-NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
+NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -n 1`
  if [ $NOWUSED -gt $USED ]; then
         echo "Space not all freed: now ${NOWUSED}kB, was ${USED}kB." 1>&2
         echo "This is normal on BA OSTs, because of subdirectories." 1>&2
diff --git a/lustre/tests/runregression-net.sh b/lustre/tests/runregression-net.sh

index 73e81ca..77d6768 100644 (file)
--- a/lustre/tests/runregression-net.sh
+++ b/lustre/tests/runregression-net.sh
@@ -9,7 +9,7 @@ COUNT_1000=`expr $COUNT / 1000`
  
  ENDRUN=endrun-`hostname`
  
-ECHONAME="`lctl device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -1`"
+ECHONAME="`lctl device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -n 1`"
  
  if [ -z "$ECHONAME" ]; then
         echo "$0: needs an ECHO_CLIENT set up first" 1>&2
diff --git a/lustre/tests/runtests b/lustre/tests/runtests

index 6a8aac8..d97bdea 100755 (executable)
--- a/lustre/tests/runtests
+++ b/lustre/tests/runtests
@@ -35,16 +35,16 @@ while [ "$1" ]; do
         shift
  done
  
-MOUNT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
+MOUNT="`mount | awk '/ lustre_lite / { print $3 }' | tail -n 1`"
  if [ -z "$MOUNT" ]; then
         sh llmount.sh
-       MOUNT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
+       MOUNT="`mount | awk '/ lustre_lite / { print $3 }' | tail -n 1`"
         [ -z "$MOUNT" ] && fail "no lustre filesystem mounted" 1
         I_MOUNTED="yes"
  fi
  
  OSCTMP=`echo $MOUNT | tr "/" "."`
-USED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
+USED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -n 1`
  USED=`expr $USED + 16` # Some space for the status file
  
  # let's start slowly here...
@@ -77,7 +77,7 @@ mkdir $DST || fail "can't mkdir $DST" 10
  
  # ok, that hopefully worked, so let's do a little more, with files that
  # haven't changed in the last day (hopefully they don't change during test)
-FILES=`find $SRC -type f -mtime +1 -ctime +1 | head -$COUNT`
+FILES=`find $SRC -type f -mtime +1 -ctime +1 | head -n $COUNT`
  log "copying files from $SRC to $DST$SRC"
  tar cf - $FILES | tar xvf - -C $DST || fail "copying $SRC" 11
  
@@ -123,7 +123,7 @@ rmdir $MOUNT/base$$* || fail "mkdirmany cleanup failed"
  
  log "done"
  
-NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
+NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -n 1`
  if [ `expr $NOWUSED - $USED` -gt 1024 ]; then
         echo "Space not all freed: now ${NOWUSED}kB, was ${USED}kB." 1>&2
  fi
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh

index 6b15107..201de89 100644 (file)
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -1192,7 +1192,7 @@ test_36d() {
  run_test 36d "non-root OST utime check (open, utime) ==========="
  
  test_36e() {
-       [ $RUNAS_ID -eq $UID ] && return
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 36e" && return
         [ ! -d $DIR/d36 ] && mkdir $DIR/d36
         touch $DIR/d36/f36e
         $RUNAS utime $DIR/d36/f36e && error "utime worked, want failure" || true
@@ -1832,7 +1832,7 @@ run_test 62 "verify obd_match failure doesn't LBUG (should -EIO)"
  
  # bug 2319 - oig_wait() interrupted causes crash because of invalid waitq.
  test_63() {
-       MAX_DIRTY_MB=`cat /proc/fs/lustre/osc/*/max_dirty_mb | head -1`
+       MAX_DIRTY_MB=`cat /proc/fs/lustre/osc/*/max_dirty_mb | head -n 1`
         for i in /proc/fs/lustre/osc/*/max_dirty_mb ; do
                 echo 0 > $i
         done
@@ -1923,6 +1923,15 @@ test_66() {
  }
  run_test 66 "update inode blocks count on client ==============="
  
+test_67() { # bug 3285 - supplementary group fails on MDS, passes on client
+       [ "$RUNAS_ID" = "$UID" ] && echo "skipping test 67" && return
+       mkdir $DIR/d67
+       chmod 771 $DIR/d67
+       chgrp $RUNAS_ID $DIR/d67
+       $RUNAS -g $((RUNAS_ID + 1)) -G1,2,$RUNAS_ID ls $DIR/d67 && error || true
+}
+run_test 67 "supplementary group failure (should return error) ="
+
  # on the LLNL clusters, runas will still pick up root's $TMP settings,
  # which will not be writable for the runas user, and then you get a CVS
  # error message with a corrupt path string (CVS bug) and panic.
diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh

index e884b73..0c34d6b 100644 (file)
--- a/lustre/tests/sanityN.sh
+++ b/lustre/tests/sanityN.sh
@@ -103,8 +103,8 @@ pass() {
         echo PASS
  }
  
-export MOUNT1=`mount| awk '/ lustre/ { print $3 }'| head -1`
-export MOUNT2=`mount| awk '/ lustre/ { print $3 }'| tail -1`
+export MOUNT1=`mount| awk '/ lustre/ { print $3 }'| head -n 1`
+export MOUNT2=`mount| awk '/ lustre/ { print $3 }'| tail -n 1`
  [ -z "$MOUNT1" ] && error "NAME=$NAME not mounted once"
  [ "$MOUNT1" = "$MOUNT2" ] && error "NAME=$NAME not mounted twice"
  [ `mount| awk '/ lustre/ { print $3 }'| wc -l` -ne 2 ] && \
diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh

index 791b523..33f9786 100644 (file)
--- a/lustre/tests/test-framework.sh
+++ b/lustre/tests/test-framework.sh
@@ -27,7 +27,7 @@ usage() {
  init_test_env() {
      export LUSTRE=`absolute_path $LUSTRE`
      export TESTSUITE=`basename $0 .sh`
-    export XMLCONFIG="${TESTSUITE}.xml"
+    export XMLCONFIG=${XMLCONFIG:-${TESTSUITE}.xml}
      export LTESTDIR=${LTESTDIR:-$LUSTRE/../ltest}
  
      [ -d /r ] && export ROOT=/r
author	adilger <adilger>
	Thu, 29 Apr 2004 08:54:18 +0000 (08:54 +0000)
committer	adilger <adilger>
	Thu, 29 Apr 2004 08:54:18 +0000 (08:54 +0000)
lnet/include/linux/kp30.h		patch \| blob \| history
lustre/ChangeLog		patch \| blob \| history
lustre/include/liblustre.h		patch \| blob \| history
lustre/include/linux/lustre_fsfilt.h		patch \| blob \| history
lustre/include/linux/lustre_lib.h		patch \| blob \| history
lustre/include/linux/lustre_net.h		patch \| blob \| history
lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-sles8sp3.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/dev_read_only-suse-2.4.19.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/exports_2.4.19-suse.patch		patch \| blob \| history
lustre/kernel_patches/patches/ext-2.4-patch-1-suse-2.4.19.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/ext3-orphan_lock-2.4.19-suse.patch		patch \| blob \| history
lustre/kernel_patches/patches/ext3-trusted_ea-suse-2.4.19.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/ext3-use-after-free-suse.patch		patch \| blob \| history
lustre/kernel_patches/patches/invalidate_show-2.4.21-sles8sp3.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/iopen-2.4.19-suse.patch		patch \| blob \| history
lustre/kernel_patches/patches/iopen-2.4.21-sles8sp3.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/kernel_text_address-2.4.21-sles8sp3.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/linux-2.4.19-pre1-xattr-0.8.54.patch		patch \| blob \| history
lustre/kernel_patches/patches/linux-2.4.19-xattr-0.8.54-suse.patch		patch \| blob \| history
lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch		patch \| blob \| history
lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-hp.patch		patch \| blob \| history
lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54.patch		patch \| blob \| history
lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-chaos.patch		patch \| blob \| history
lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse.patch		patch \| blob \| history
lustre/kernel_patches/patches/linux-2.4.22-xattr-0.8.54.patch		patch \| blob \| history
lustre/kernel_patches/patches/linux-2.4.24-xattr-0.8.54.patch		patch \| blob \| history
lustre/kernel_patches/patches/lustre_version.patch		patch \| blob \| history
lustre/kernel_patches/patches/mkdep-revert-rh-2.4.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/tcp-zero-copy-2.4.21-sles8sp3.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/vfs_intent-2.4.18-18-chaos65.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_intent-2.4.19-pre1.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_intent-2.4.20-hp.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_intent-2.4.21-chaos.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_intent-2.4.21-sles8sp3.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/vfs_intent-2.4.21-suse2.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_intent-2.4.22-rh.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch		patch \| blob \| history
lustre/kernel_patches/patches/xattr-0.8.54-2.4.22-rh.patch		patch \| blob \| history
lustre/kernel_patches/series/chaos-2.4.18		patch \| blob \| history
lustre/kernel_patches/series/chaos-2.4.18-pdirops		patch \| blob \| history
lustre/kernel_patches/series/rh-2.4.20		patch \| blob \| history
lustre/kernel_patches/series/suse-2.4.19		patch \| blob \| history
lustre/kernel_patches/series/suse-sles8sp3-2.4.21	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/which_patch		patch \| blob \| history
lustre/ldlm/ldlm_resource.c		patch \| blob \| history
lustre/liblustre/genlib.sh		patch \| blob \| history
lustre/liblustre/llite_lib.c		patch \| blob \| history
lustre/liblustre/namei.c		patch \| blob \| history
lustre/liblustre/super.c		patch \| blob \| history
lustre/liblustre/tests/Makefile.am		patch \| blob \| history
lustre/liblustre/tests/echo_test.c		patch \| blob \| history
lustre/llite/dir.c		patch \| blob \| history
lustre/llite/file.c		patch \| blob \| history
lustre/llite/llite_internal.h		patch \| blob \| history
lustre/llite/llite_lib.c		patch \| blob \| history
lustre/llite/lproc_llite.c		patch \| blob \| history
lustre/llite/rw.c		patch \| blob \| history
lustre/mdc/mdc_locks.c		patch \| blob \| history
lustre/mdc/mdc_request.c		patch \| blob \| history
lustre/mds/mds_open.c		patch \| blob \| history
lustre/obdclass/class_obd.c		patch \| blob \| history
lustre/obdclass/genops.c		patch \| blob \| history
lustre/obdecho/echo_client.c		patch \| blob \| history
lustre/obdfilter/filter.c		patch \| blob \| history
lustre/obdfilter/filter_io.c		patch \| blob \| history
lustre/osc/lproc_osc.c		patch \| blob \| history
lustre/osc/osc_create.c		patch \| blob \| history
lustre/osc/osc_request.c		patch \| blob \| history
lustre/portals/include/linux/kp30.h		patch \| blob \| history
lustre/ptlrpc/client.c		patch \| blob \| history
lustre/ptlrpc/pinger.c		patch \| blob \| history
lustre/ptlrpc/recover.c		patch \| blob \| history
lustre/ptlrpc/service.c		patch \| blob \| history
lustre/scripts/version_tag.pl.in		patch \| blob \| history
lustre/tests/acceptance-small.sh		patch \| blob \| history
lustre/tests/cfg/insanity-local.sh		patch \| blob \| history
lustre/tests/insanity.sh		patch \| blob \| history
lustre/tests/llmountcleanup.sh		patch \| blob \| history
lustre/tests/multiop.c		patch \| blob \| history
lustre/tests/oos.sh		patch \| blob \| history
lustre/tests/oos2.sh		patch \| blob \| history
lustre/tests/recovery-small.sh		patch \| blob \| history
lustre/tests/replay-dual.sh		patch \| blob \| history
lustre/tests/replay-ost-single.sh		patch \| blob \| history
lustre/tests/replay-single.sh		patch \| blob \| history
lustre/tests/run-llog.sh		patch \| blob \| history
lustre/tests/runas.c		patch \| blob \| history
lustre/tests/runregression-brw.sh		patch \| blob \| history
lustre/tests/runregression-mds.sh		patch \| blob \| history
lustre/tests/runregression-net.sh		patch \| blob \| history
lustre/tests/runtests		patch \| blob \| history
lustre/tests/sanity.sh		patch \| blob \| history
lustre/tests/sanityN.sh		patch \| blob \| history
lustre/tests/test-framework.sh		patch \| blob \| history