merge b_devel into HEAD, which will become 0.7.3

author phil <phil>

Fri, 25 Jul 2003 17:58:07 +0000 (17:58 +0000)

committer phil <phil>

Fri, 25 Jul 2003 17:58:07 +0000 (17:58 +0000)
author phil <phil>
Fri, 25 Jul 2003 17:58:07 +0000 (17:58 +0000)
committer phil <phil>
Fri, 25 Jul 2003 17:58:07 +0000 (17:58 +0000)
diff --git a/lnet/.cvsignore b/lnet/.cvsignore

index 99ac885..c1a9bdf 100644 (file)
--- a/lnet/.cvsignore
+++ b/lnet/.cvsignore
@@ -6,3 +6,4 @@ autom4te.cache
  config.log
  config.status
  configure
+.*.o.cmd
diff --git a/lnet/Kernelenv.in b/lnet/Kernelenv.in

index 29a713f..7a48c58 100644 (file)
--- a/lnet/Kernelenv.in
+++ b/lnet/Kernelenv.in
@@ -1 +1,6 @@
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+# portals/utils/debug.c wants <linux/version.h> from userspace.  sigh.
+HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS)
+LIBREADLINE := @LIBREADLINE@
+# 2.5's makefiles aren't nice to cross dir libraries in host programs
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
diff --git a/lnet/Kernelenv.mk b/lnet/Kernelenv.mk

index 29a713f..7c66dfa 100644 (file)
--- a/lnet/Kernelenv.mk
+++ b/lnet/Kernelenv.mk
@@ -1 +1,4 @@
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+HOSTCFLAGS := $(EXTRA_CFLAGS)
+# the kernel doesn't want us to build archives for host binaries :/
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
diff --git a/lnet/Makefile.mk b/lnet/Makefile.mk

index be0e51a..73a19df 100644 (file)
--- a/lnet/Makefile.mk
+++ b/lnet/Makefile.mk
@@ -1,6 +1,12 @@
-include fs/lustre/portals/Kernelenv
+include $(src)/Kernelenv
  
-obj-y += portals/
+# The ordering of these determines the order that each subsystem's 
+# module_init() functions are called in.  if these are changed make sure
+# they reflect the dependencies between each subsystem's _init functions.
  obj-y += libcfs/
-obj-y += knals/
+obj-y += portals/
  obj-y += router/
+obj-y += knals/
+obj-y += tests/
+
+obj-m += utils/
diff --git a/lnet/archdep.m4 b/lnet/archdep.m4

index 7a4e05c..1a7741b 100644 (file)
--- a/lnet/archdep.m4
+++ b/lnet/archdep.m4
@@ -11,8 +11,13 @@ AC_ARG_WITH(lib, [  --with-lib compile lustre library], host_cpu="lib")
  
  AC_ARG_WITH(linux, [  --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux)
  AC_SUBST(LINUX)
+if test x$enable_inkernel = xyes ; then
+        echo ln -s `pwd` $LINUX/fs/lustre
+        rm $LINUX/fs/lustre
+        ln -s `pwd` $LINUX/fs/lustre
+fi
  
-# --------- UML?  --------------------
+#  --------------------
  AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...)
  if test $host_cpu = "lib" ; then 
          host_cpu="lib"
@@ -111,6 +116,13 @@ case ${host_cpu} in
          MOD_LINK=elf64_ia64
  ;;
  
+       x86_64 )
+       AC_MSG_RESULT($host_cpu)
+        KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -fomit-frame-pointer -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -finline-limit=2000 -fno-strength-reduce -fno-asynchronous-unwind-tables'
+       KCPPFLAGS='-D__KERNEL__ -DMODULE'
+        MOD_LINK=elf_x86_64
+;;
+
         sparc64 )
         AC_MSG_RESULT($host_cpu)
          KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs'
@@ -160,21 +172,33 @@ if test $host_cpu != "lib" ; then
        AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.)
    fi
  
-# ------------ RELEASE and moduledir ------------------
+# ------------ LINUXRELEASE and moduledir ------------------
    AC_MSG_CHECKING(for Linux release)
    
    dnl We need to rid ourselves of the nasty [ ] quotes.
    changequote(, )
    dnl Get release from version.h
-  RELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
+  LINUXRELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
    changequote([, ])
    
-  moduledir='$(libdir)/modules/'$RELEASE/kernel
+  moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel
    AC_SUBST(moduledir)
    
    modulefsdir='$(moduledir)/fs/$(PACKAGE)'
    AC_SUBST(modulefsdir)
    
+  AC_MSG_RESULT($LINUXRELEASE)
+  AC_SUBST(LINUXRELEASE)
+
+# ------------ RELEASE --------------------------------
+  AC_MSG_CHECKING(lustre release)
+  
+  dnl We need to rid ourselves of the nasty [ ] quotes.
+  changequote(, )
+  dnl Get release from version.h
+  RELEASE="`sed -ne 's/-/_/g' -e 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_]*\).*/\1/p' $LINUX/include/linux/version.h`_`date +%Y%m%d%H%M`"
+  changequote([, ])
+
    AC_MSG_RESULT($RELEASE)
    AC_SUBST(RELEASE)
  
@@ -302,7 +326,7 @@ AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib)
  # This needs to run after we've defined the KCPPFLAGS
  
  AC_MSG_CHECKING(for kernel version)
-AC_TRY_LINK([#define __KERNEL__
+AC_TRY_COMPILE([#define __KERNEL__
               #include <linux/sched.h>],
              [struct task_struct p;
               p.sighand = NULL;],
@@ -313,5 +337,5 @@ if test $RH_2_4_20 = 1; then
         AC_MSG_RESULT(redhat-2.4.20)
         CPPFLAGS="$CPPFLAGS -DCONFIG_RH_2_4_20"
  else
-       AC_MSG_RESULT($RELEASE)
+       AC_MSG_RESULT($LINUXRELEASE)
  fi 
diff --git a/lnet/include/config.h.in b/lnet/include/config.h.in

index 3aa6909..f9605ab 100644 (file)
--- a/lnet/include/config.h.in
+++ b/lnet/include/config.h.in
@@ -1,5 +1,11 @@
  /* portals/include/config.h.in.  Generated from configure.in by autoheader.  */
  
+/* Compile with orphan support */
+#undef ENABLE_ORPHANS
+
+/* Use the Pinger */
+#undef ENABLE_PINGER
+
  /* Define to 1 if you have the <inttypes.h> header file. */
  #undef HAVE_INTTYPES_H
  
diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h

index ee3b9fc..2133391 100644 (file)
--- a/lnet/include/linux/kp30.h
+++ b/lnet/include/linux/kp30.h
@@ -4,7 +4,6 @@
  #ifndef _KP30_INCLUDED
  #define _KP30_INCLUDED
  
-
  #define PORTAL_DEBUG
  
  #ifndef offsetof
@@ -13,10 +12,6 @@
  
  #define LOWEST_BIT_SET(x)      ((x) & ~((x) - 1))
  
-#ifndef CONFIG_SMP
-# define smp_processor_id() 0
-#endif
-
  /*
   *  Debugging
   */
@@ -24,39 +19,34 @@ extern unsigned int portal_subsystem_debug;
  extern unsigned int portal_stack;
  extern unsigned int portal_debug;
  extern unsigned int portal_printk;
-/* Debugging subsystems  (8 bit ID)
- *
- * If you add debug subsystem #32, you need to send email to phil, because
- * you're going to break kernel subsystem debug filtering. */
-#define S_UNDEFINED    (0 << 24)
-#define S_MDC          (1 << 24)
-#define S_MDS          (2 << 24)
-#define S_OSC          (3 << 24)
-#define S_OST          (4 << 24)
-#define S_CLASS        (5 << 24)
-#define S_OBDFS        (6 << 24) /* obsolete */
-#define S_LLITE        (7 << 24)
-#define S_RPC          (8 << 24)
-#define S_EXT2OBD      (9 << 24) /* obsolete */
-#define S_PORTALS     (10 << 24)
-#define S_SOCKNAL     (11 << 24)
-#define S_QSWNAL      (12 << 24)
-#define S_PINGER      (13 << 24)
-#define S_FILTER      (14 << 24)
-#define S_TRACE       (15 << 24) /* obsolete */
-#define S_ECHO        (16 << 24)
-#define S_LDLM        (17 << 24)
-#define S_LOV         (18 << 24)
-#define S_GMNAL       (19 << 24)
-#define S_PTLROUTER   (20 << 24)
-#define S_COBD        (21 << 24)
-#define S_PTLBD       (22 << 24)
-#define S_LOG         (23 << 24)
-
-/* If you change these values, please keep portals/linux/utils/debug.c
+/* Debugging subsystems (32 bits, non-overlapping) */
+#define S_UNDEFINED    (1 << 0)
+#define S_MDC          (1 << 1)
+#define S_MDS          (1 << 2)
+#define S_OSC          (1 << 3)
+#define S_OST          (1 << 4)
+#define S_CLASS        (1 << 5)
+#define S_LOG          (1 << 6)
+#define S_LLITE        (1 << 7)
+#define S_RPC          (1 << 8)
+#define S_MGMT         (1 << 9)
+#define S_PORTALS     (1 << 10)
+#define S_SOCKNAL     (1 << 11)
+#define S_QSWNAL      (1 << 12)
+#define S_PINGER      (1 << 13)
+#define S_FILTER      (1 << 14)
+#define S_PTLBD       (1 << 15)
+#define S_ECHO        (1 << 16)
+#define S_LDLM        (1 << 17)
+#define S_LOV         (1 << 18)
+#define S_GMNAL       (1 << 19)
+#define S_PTLROUTER   (1 << 20)
+#define S_COBD        (1 << 21)
+
+/* If you change these values, please keep portals/utils/debug.c
   * up to date! */
  
-/* Debugging masks (24 bits, non-overlapping) */
+/* Debugging masks (32 bits, non-overlapping) */
  #define D_TRACE     (1 << 0) /* ENTRY/EXIT markers */
  #define D_INODE     (1 << 1)
  #define D_SUPER     (1 << 2)
@@ -80,20 +70,23 @@ extern unsigned int portal_printk;
  #define D_RPCTRACE  (1 << 20) /* for distributed debugging */
  #define D_VFSTRACE  (1 << 21)
  
-#ifndef __KERNEL__
-#define THREAD_SIZE 8192
+#ifdef __KERNEL__
+# include <linux/sched.h> /* THREAD_SIZE */
+#else
+# define THREAD_SIZE 8192
  #endif
-#ifdef  __ia64__
-#define CDEBUG_STACK() (THREAD_SIZE -                                      \
+
+#ifdef __KERNEL__
+# ifdef  __ia64__
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
                          ((unsigned long)__builtin_dwarf_cfa() &            \
                           (THREAD_SIZE - 1)))
-#else
-#define CDEBUG_STACK() (THREAD_SIZE -                                      \
+# else
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
                          ((unsigned long)__builtin_frame_address(0) &       \
                           (THREAD_SIZE - 1)))
-#endif
+# endif
  
-#ifdef __KERNEL__
  #define CHECK_STACK(stack)                                                    \
          do {                                                                  \
                  if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) {    \
@@ -105,20 +98,21 @@ extern unsigned int portal_printk;
                        /*panic("LBUG");*/                                      \
                  }                                                             \
          } while (0)
-#else
+#else /* __KERNEL__ */
  #define CHECK_STACK(stack) do { } while(0)
-#endif
+#define CDEBUG_STACK (0L)
+#endif /* __KERNEL__ */
  
  #if 1
  #define CDEBUG(mask, format, a...)                                            \
  do {                                                                          \
-        CHECK_STACK(CDEBUG_STACK());                                          \
+        CHECK_STACK(CDEBUG_STACK);                                            \
          if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) ||                      \
              (portal_debug & (mask) &&                                         \
-             portal_subsystem_debug & (1 << (DEBUG_SUBSYSTEM >> 24))))        \
+             portal_subsystem_debug & DEBUG_SUBSYSTEM))                       \
                  portals_debug_msg(DEBUG_SUBSYSTEM, mask,                      \
                                    __FILE__, __FUNCTION__, __LINE__,           \
-                                  CDEBUG_STACK(), format , ## a);             \
+                                  CDEBUG_STACK, format, ## a);                \
  } while (0)
  
  #define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
@@ -162,7 +156,6 @@ do {                                                                    \
  #define EXIT                            do { } while (0)
  #endif
  
-
  #ifdef __KERNEL__
  # include <linux/vmalloc.h>
  # include <linux/time.h>
@@ -210,7 +203,8 @@ static inline void our_cond_resched(void)
  #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
  
  #ifdef PORTAL_DEBUG
-extern void kportal_assertion_failed(char *expr,char *file,char *func,int line);
+extern void kportal_assertion_failed(char *expr, char *file, const char *func,
+                                     const int line);
  #define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__,  \
                                                          __FUNCTION__, __LINE__))
  #else
@@ -560,14 +554,14 @@ extern struct prof_ent prof_ents[MAX_PROFS];
  #endif /* PORTALS_PROFILING */
  
  /* debug.c */
-void portals_run_lbug_upcall(char * file, char *fn, int line);
+void portals_run_lbug_upcall(char * file, const char *fn, const int line);
  void portals_debug_dumplog(void);
  int portals_debug_init(unsigned long bufsize);
  int portals_debug_cleanup(void);
  int portals_debug_clear_buffer(void);
  int portals_debug_mark_buffer(char *text);
  int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
-                char *file, unsigned int size);
+                             char *file, unsigned int size);
  __s32 portals_debug_copy_to_user(char *buf, unsigned long len);
  #if (__GNUC__)
  /* Use the special GNU C __attribute__ hack to have the compiler check the
@@ -578,13 +572,14 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len);
  # warning printf has been defined as a macro...
  # undef printf
  #endif
-void portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                        unsigned long stack, const char *format, ...)
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                       const int line, unsigned long stack,
+                       const char *format, ...)
          __attribute__ ((format (printf, 7, 8)));
  #else
-void portals_debug_msg (int subsys, int mask, char *file, char *fn,
-                        int line, unsigned long stack,
-                        const char *format, ...);
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                       const int line, unsigned long stack,
+                       const char *format, ...);
  #endif /* __GNUC__ */
  void portals_debug_set_level(unsigned int debug_level);
  
@@ -618,9 +613,9 @@ extern void kportal_blockallsigs (void);
  # define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0);
  # define PORTAL_FREE(a, b) do { free(a); } while (0);
  # define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \
-    printf ("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format,                    \
-            (subsys) >> 24, (mask), (long)time(0), file, fn, line,            \
-            getpid() , stack, ## a);
+    printf("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format,                    \
+           (subsys), (mask), (long)time(0), file, fn, line,                   \
+           getpid() , stack, ## a);
  #endif
  
  #ifndef CURRENT_TIME
@@ -911,13 +906,13 @@ ptl_handle_ni_t *kportal_get_ni (int nal);
  void kportal_put_ni (int nal);
  
  #ifdef __CYGWIN__
-#ifndef BITS_PER_LONG
-#if (~0UL) == 0xffffffffUL
-#define BITS_PER_LONG 32
-#else
-#define BITS_PER_LONG 64
-#endif
-#endif
+# ifndef BITS_PER_LONG
+#  if (~0UL) == 0xffffffffUL
+#   define BITS_PER_LONG 32
+#  else
+#   define BITS_PER_LONG 64
+#  endif
+# endif
  #endif
  
  #if (BITS_PER_LONG == 32 || __WORDSIZE == 32)
diff --git a/lnet/include/linux/portals_compat25.h b/lnet/include/linux/portals_compat25.h

index e28fbac..a7cb4d1 100644 (file)
--- a/lnet/include/linux/portals_compat25.h
+++ b/lnet/include/linux/portals_compat25.h
@@ -1,13 +1,56 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _PORTALS_COMPAT_H
+#define _PORTALS_COMPAT_H
+
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+#if SPINLOCK_DEBUG
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
+#  define SIGNAL_MASK_ASSERT() \
+   LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC)
+# else
+#  define SIGNAL_MASK_ASSERT() \
+   LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC)
+# endif
+#else
+# define SIGNAL_MASK_ASSERT()
+#endif
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+
  #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
-# define SIGNAL_MASK_LOCK(task, flags)                              \
+
+# define SIGNAL_MASK_LOCK(task, flags)                                  \
    spin_lock_irqsave(&task->sighand->siglock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags)                            \
+# define SIGNAL_MASK_UNLOCK(task, flags)                                \
    spin_unlock_irqrestore(&task->sighand->siglock, flags)
+# define USERMODEHELPER(path, argv, envp)                               \
+  call_usermodehelper(path, argv, envp, 1)
  # define RECALC_SIGPENDING         recalc_sigpending()
-#else
-# define SIGNAL_MASK_LOCK(task, flags)                              \
+# define CURRENT_SECONDS           get_seconds()
+
+#else /* 2.4.x */
+
+# define SIGNAL_MASK_LOCK(task, flags)                                  \
    spin_lock_irqsave(&task->sigmask_lock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags)                            \
+# define SIGNAL_MASK_UNLOCK(task, flags)                                \
    spin_unlock_irqrestore(&task->sigmask_lock, flags)
+# define USERMODEHELPER(path, argv, envp)                               \
+  call_usermodehelper(path, argv, envp)
  # define RECALC_SIGPENDING         recalc_sigpending(current)
+# define CURRENT_SECONDS           CURRENT_TIME
+
+#endif
+
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt "|%d", ## a, current->thread.extern_pid)
+#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt "|%d", ## a, current->thread.mode.tt.extern_pid)
+#else
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt, ## a)
  #endif
+
+#endif /* _PORTALS_COMPAT_H */
diff --git a/lnet/include/lnet/internal.h b/lnet/include/lnet/internal.h

index d78cad4..a70b465 100644 (file)
--- a/lnet/include/lnet/internal.h
+++ b/lnet/include/lnet/internal.h
@@ -1,5 +1,3 @@
-/*
-*/
  #ifndef _P30_INTERNAL_H_
  #define _P30_INTERNAL_H_
  
diff --git a/lnet/include/lnet/list.h b/lnet/include/lnet/list.h

index 2b63312..78a1e2d 100644 (file)
--- a/lnet/include/lnet/list.h
+++ b/lnet/include/lnet/list.h
@@ -1,6 +1,4 @@
  #ifndef _LINUX_LIST_H
-#define _LINUX_LIST_H
-
  
  /*
   * Simple doubly linked list implementation.
@@ -101,7 +99,9 @@ static inline void list_del_init(struct list_head *entry)
         __list_del(entry->prev, entry->next);
         INIT_LIST_HEAD(entry);
  }
+#endif
  
+#ifndef list_for_each_entry
  /**
   * list_move - delete from one list and add as another's head
   * @list: the entry to move
@@ -124,7 +124,10 @@ static inline void list_move_tail(struct list_head *list,
         __list_del(list->prev, list->next);
         list_add_tail(list, head);
  }
+#endif
  
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
  /**
   * list_empty - tests whether a list is empty
   * @head: the list to test.
diff --git a/lnet/include/lnet/lltrace.h b/lnet/include/lnet/lltrace.h

index 7d1b304..d389aab 100644 (file)
--- a/lnet/include/lnet/lltrace.h
+++ b/lnet/include/lnet/lltrace.h
@@ -2,7 +2,7 @@
   * vim:expandtab:shiftwidth=8:tabstop=8:
   *
   * Compile with:
- * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl 
+ * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
   */
  #ifndef __LTRACE_H_
  #define __LTRACE_H_
@@ -31,20 +31,20 @@ static inline int ltrace_write_file(char* fname)
          argv[0] = "debug_kernel";
          argv[1] = fname;
          argv[2] = "1";
-        
+
          fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
-        
+
          return jt_dbg_debug_kernel(3, argv);
  }
  
  static inline int ltrace_clear()
  {
          char* argv[1];
-        
+
          argv[0] = "clear";
-        
+
          fprintf(stderr, "[ptlctl] %s\n", argv[0]);
-        
+
          return jt_dbg_clear_debug_buf(1, argv);
  }
  
@@ -52,9 +52,9 @@ static inline int ltrace_mark(int indent_level, char* text)
  {
          char* argv[2];
          char mark_buf[PATH_MAX];
-        
+
          snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
-        
+
          argv[0] = "mark";
          argv[1] = mark_buf;
          return jt_dbg_mark_debug_buf(2, argv);
@@ -65,9 +65,9 @@ static inline int ltrace_applymasks()
          char* argv[2];
          argv[0] = "list";
          argv[1] = "applymasks";
-        
+
          fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
-        
+
          return jt_dbg_list(2, argv);
  }
  
@@ -95,19 +95,19 @@ static inline int ltrace_start()
  #ifdef PORTALS_DEV_ID
          rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
  #endif
-        ltrace_filter("class"); 
+        ltrace_filter("class");
          ltrace_filter("socknal");
-        ltrace_filter("qswnal"); 
-        ltrace_filter("gmnal");  
-        ltrace_filter("portals");  
-        
-        ltrace_show("all_types");  
-        ltrace_filter("trace");  
-        ltrace_filter("malloc"); 
-        ltrace_filter("net"); 
-        ltrace_filter("page"); 
-        ltrace_filter("other"); 
-        ltrace_filter("info"); 
+        ltrace_filter("qswnal");
+        ltrace_filter("gmnal");
+        ltrace_filter("portals");
+
+        ltrace_show("all_types");
+        ltrace_filter("trace");
+        ltrace_filter("malloc");
+        ltrace_filter("net");
+        ltrace_filter("page");
+        ltrace_filter("other");
+        ltrace_filter("info");
          ltrace_applymasks();
  
          return rc;
@@ -146,7 +146,7 @@ static inline void ltrace_add_processnames(char* fname)
          struct timezone tz;
          int nob;
          int underuml = !not_uml();
-        
+
          gettimeofday(&tv, &tz);
  
          nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
@@ -167,7 +167,7 @@ static inline void ltrace_add_processnames(char* fname)
                                   "(%s:%d:%s() %d+%lu): ",
                                   "lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
          }
-         
+
          nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
          system(cmdbuf);
  }
diff --git a/lnet/include/lnet/myrnal.h b/lnet/include/lnet/myrnal.h

index 12b1925..13790f7 100644 (file)
--- a/lnet/include/lnet/myrnal.h
+++ b/lnet/include/lnet/myrnal.h
@@ -1,6 +1,3 @@
-/*
-*/
-
  #ifndef MYRNAL_H
  #define MYRNAL_H
  
diff --git a/lnet/include/lnet/nal.h b/lnet/include/lnet/nal.h

index 88be63c..7cb3ab7 100644 (file)
--- a/lnet/include/lnet/nal.h
+++ b/lnet/include/lnet/nal.h
@@ -1,5 +1,3 @@
-/*
-*/
  #ifndef _NAL_H_
  #define _NAL_H_
  
diff --git a/lnet/include/lnet/ppid.h b/lnet/include/lnet/ppid.h

index 4727599..760f465 100644 (file)
--- a/lnet/include/lnet/ppid.h
+++ b/lnet/include/lnet/ppid.h
@@ -1,6 +1,3 @@
-/*
- */
-
  #ifndef _INCppidh_
  #define _INCppidh_
  
diff --git a/lnet/include/lnet/stringtab.h b/lnet/include/lnet/stringtab.h

index c9683f7..33e4375 100644 (file)
--- a/lnet/include/lnet/stringtab.h
+++ b/lnet/include/lnet/stringtab.h
@@ -1,5 +1,3 @@
  /*
-*/
-/*
   * stringtab.h
   */
diff --git a/lnet/include/lnet/types.h b/lnet/include/lnet/types.h

index d4038b6..0269290 100644 (file)
--- a/lnet/include/lnet/types.h
+++ b/lnet/include/lnet/types.h
@@ -2,14 +2,19 @@
  #define _P30_TYPES_H_
  
  #ifdef __linux__
-#include <asm/types.h>
-#include <asm/timex.h>
+# include <asm/types.h>
+# include <asm/timex.h>
  #else
-#include <sys/types.h>
+# include <sys/types.h>
  typedef u_int32_t __u32;
  typedef u_int64_t __u64;
-typedef unsigned long long cycles_t;
-static inline cycles_t get_cycles(void) { return 0; }
+#endif
+
+#ifdef __KERNEL__
+# include <linux/time.h>
+#else
+# include <sys/time.h>
+# define do_gettimeofday(tv) gettimeofday(tv, NULL)
  #endif
  
  typedef __u64 ptl_nid_t;
@@ -31,7 +36,7 @@ typedef ptl_handle_any_t ptl_handle_md_t;
  typedef ptl_handle_any_t ptl_handle_me_t;
  
  #define PTL_HANDLE_NONE \
-((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
+    ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
  #define PTL_EQ_NONE PTL_HANDLE_NONE
  
  static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
@@ -108,17 +113,15 @@ typedef struct {
          ptl_handle_me_t unlinked_me;
          ptl_md_t mem_desc;
          ptl_hdr_data_t hdr_data;
-        cycles_t  arrival_time;
+        struct timeval arrival_time;
          volatile ptl_seq_t sequence;
  } ptl_event_t;
  
-
  typedef enum {
          PTL_ACK_REQ,
          PTL_NOACK_REQ
  } ptl_ack_req_t;
  
-
  typedef struct {
          volatile ptl_seq_t sequence;
          ptl_size_t size;
@@ -130,7 +133,6 @@ typedef struct {
          ptl_eq_t *eq;
  } ptl_ni_t;
  
-
  typedef struct {
          int max_match_entries;    /* max number of match entries */
          int max_mem_descriptors;  /* max number of memory descriptors */
diff --git a/lnet/klnds/.cvsignore b/lnet/klnds/.cvsignore

index 282522d..89a4aa6 100644 (file)
--- a/lnet/klnds/.cvsignore
+++ b/lnet/klnds/.cvsignore
@@ -1,2 +1,3 @@
  Makefile
  Makefile.in
+.*.o.cmd
diff --git a/lnet/klnds/Makefile.mk b/lnet/klnds/Makefile.mk

index ce40a60..cd5d9d6 100644 (file)
--- a/lnet/klnds/Makefile.mk
+++ b/lnet/klnds/Makefile.mk
@@ -1,4 +1,4 @@
-include ../Kernelenv
+include $(obj)/../Kernelenv
  
  obj-y = socknal/
-# more coming...
-\ No newline at end of file
+# more coming...
diff --git a/lnet/klnds/gmlnd/gmnal.c b/lnet/klnds/gmlnd/gmnal.c

index ceeea2a..0cffc15 100644 (file)
--- a/lnet/klnds/gmlnd/gmnal.c
+++ b/lnet/klnds/gmlnd/gmnal.c
@@ -124,7 +124,7 @@ static nal_t *kgmnal_init(int interface, ptl_pt_index_t ptl_size,
          return &kgmnal_api;
  }
  
-static void __exit
+static void /*__exit*/
  kgmnal_finalize(void)
  {
          struct list_head *tmp;
diff --git a/lnet/klnds/scimaclnd/scimacnal.c b/lnet/klnds/scimaclnd/scimacnal.c

index 1066d69..479cc2c 100644 (file)
--- a/lnet/klnds/scimaclnd/scimacnal.c
+++ b/lnet/klnds/scimaclnd/scimacnal.c
@@ -112,7 +112,7 @@ static nal_t *kscimacnal_init(int interface, ptl_pt_index_t  ptl_size,
  
  
  /* Called by kernel at module unload time */
-static void __exit 
+static void /*__exit*/ 
  kscimacnal_finalize(void)
  {
          /* FIXME: How should the shutdown procedure really look? */
diff --git a/lnet/klnds/socklnd/.cvsignore b/lnet/klnds/socklnd/.cvsignore

index e995588..95973d6 100644 (file)
--- a/lnet/klnds/socklnd/.cvsignore
+++ b/lnet/klnds/socklnd/.cvsignore
@@ -1,3 +1,4 @@
  .deps
  Makefile
  Makefile.in
+.*.o.cmd
diff --git a/lnet/klnds/socklnd/Makefile.mk b/lnet/klnds/socklnd/Makefile.mk

index 46edf01..5c1b366 100644 (file)
--- a/lnet/klnds/socklnd/Makefile.mk
+++ b/lnet/klnds/socklnd/Makefile.mk
@@ -3,7 +3,7 @@
  # This code is issued under the GNU General Public License.
  # See the file COPYING in this distribution
  
-include ../../Kernelenv
+include $(src)/../../Kernelenv
  
  obj-y += ksocknal.o
  ksocknal-objs    := socknal.o socknal_cb.o
diff --git a/lnet/klnds/toelnd/toenal.c b/lnet/klnds/toelnd/toenal.c

index 1f5dc38..77ee473 100644 (file)
--- a/lnet/klnds/toelnd/toenal.c
+++ b/lnet/klnds/toelnd/toenal.c
@@ -379,7 +379,7 @@ ktoenal_cmd(struct portal_ioctl_data * data, void * private)
  }
  
  
-void __exit
+void /*__exit*/
  ktoenal_module_fini (void)
  {
          CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
diff --git a/lnet/klnds/toelnd/toenal_cb.c b/lnet/klnds/toelnd/toenal_cb.c

index ec37f6f..abd0731 100644 (file)
--- a/lnet/klnds/toelnd/toenal_cb.c
+++ b/lnet/klnds/toelnd/toenal_cb.c
@@ -893,6 +893,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags)
                                  spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
                                  goto get_fmb;   /* => go get a fwd msg buffer */
                          default:
+                                break;
                          }
                          /* Not Reached */
                          LBUG ();
@@ -934,6 +935,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags)
                  goto out;                       /* (later) */
  
          default:
+                break;
          }
  
          /* Not Reached */
diff --git a/lnet/libcfs/.cvsignore b/lnet/libcfs/.cvsignore

index 67d1a3d..7fa686f 100644 (file)
--- a/lnet/libcfs/.cvsignore
+++ b/lnet/libcfs/.cvsignore
@@ -2,3 +2,4 @@
  Makefile
  Makefile.in
  link-stamp
+.*.o.cmd
diff --git a/lnet/libcfs/Makefile.mk b/lnet/libcfs/Makefile.mk

index 3196ea2..9aa838f 100644 (file)
--- a/lnet/libcfs/Makefile.mk
+++ b/lnet/libcfs/Makefile.mk
@@ -6,4 +6,4 @@
  include fs/lustre/portals/Kernelenv
  
  obj-y += libcfs.o
-licfs-objs    := module.o proc.o debug.o
-\ No newline at end of file
+libcfs-objs    := module.o proc.o debug.o
diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c

index 8d26dbb..f37cd96 100644 (file)
--- a/lnet/libcfs/debug.c
+++ b/lnet/libcfs/debug.c
@@ -571,8 +571,8 @@ int portals_debug_init(unsigned long bufsize)
          memset(debug_buf, 0, debug_size);
          debug_wrapped = 0;
  
-        printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
-               bufsize, debug_buf);
+        //printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
+               //bufsize, debug_buf);
          atomic_set(&debug_off_a, debug_off);
          notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier);
          debug_size = bufsize;
@@ -632,9 +632,9 @@ int portals_debug_mark_buffer(char *text)
          if (debug_buf == NULL)
                  return -EINVAL;
  
-        CDEBUG(0, "*******************************************************************************\n");
+        CDEBUG(0, "********************************************************\n");
          CDEBUG(0, "DEBUG MARKER: %s\n", text);
-        CDEBUG(0, "*******************************************************************************\n");
+        CDEBUG(0, "********************************************************\n");
  
          return 0;
  }
@@ -672,8 +672,8 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len)
  
  /* FIXME: I'm not very smart; someone smarter should make this better. */
  void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                   unsigned long stack, const char *format, ...)
+portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                  const int line, unsigned long stack, const char *format, ...)
  {
          va_list       ap;
          unsigned long flags;
@@ -728,8 +728,8 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
          do_gettimeofday(&tv);
  
          prefix_nob = snprintf(debug_buf + debug_off, max_nob,
-                              "%02x:%06x:%d:%lu.%06lu ",
-                              subsys >> 24, mask, smp_processor_id(),
+                              "%06x:%06x:%d:%lu.%06lu ",
+                              subsys, mask, smp_processor_id(),
                                tv.tv_sec, tv.tv_usec);
          max_nob -= prefix_nob;
  
@@ -752,7 +752,7 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
  
          va_start(ap, format);
          msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob,
-                            max_nob, format, ap);
+                             max_nob, format, ap);
          max_nob -= msg_nob;
          va_end(ap);
  
@@ -790,7 +790,7 @@ void portals_debug_set_level(unsigned int debug_level)
          portal_debug = debug_level;
  }
  
-void portals_run_lbug_upcall(char * file, char *fn, int line)
+void portals_run_lbug_upcall(char *file, const char *fn, const int line)
  {
          char *argv[6];
          char *envp[3];
@@ -803,7 +803,7 @@ void portals_run_lbug_upcall(char * file, char *fn, int line)
          argv[0] = portals_upcall;
          argv[1] = "LBUG";
          argv[2] = file;
-        argv[3] = fn;
+        argv[3] = (char *)fn;
          argv[4] = buf;
          argv[5] = NULL;
  
diff --git a/lnet/libcfs/module.c b/lnet/libcfs/module.c

index 14cc325..e8eb290 100644 (file)
--- a/lnet/libcfs/module.c
+++ b/lnet/libcfs/module.c
@@ -62,10 +62,10 @@ static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
  struct semaphore nal_cmd_sem;
  
  #ifdef PORTAL_DEBUG
-void
-kportal_assertion_failed (char *expr, char *file, char *func, int line)
+void kportal_assertion_failed(char *expr, char *file, const char *func,
+                              const int line)
  {
-        portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK(),
+        portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK,
                            "ASSERTION(%s) failed\n", expr);
          LBUG_WITH_LOC(file, func, line);
  }
diff --git a/lnet/lnet/.cvsignore b/lnet/lnet/.cvsignore

index e995588..95973d6 100644 (file)
--- a/lnet/lnet/.cvsignore
+++ b/lnet/lnet/.cvsignore
@@ -1,3 +1,4 @@
  .deps
  Makefile
  Makefile.in
+.*.o.cmd
diff --git a/lnet/lnet/Makefile.mk b/lnet/lnet/Makefile.mk

index 5627ef7..7822846 100644 (file)
--- a/lnet/lnet/Makefile.mk
+++ b/lnet/lnet/Makefile.mk
@@ -3,7 +3,10 @@
  # This code is issued under the GNU General Public License.
  # See the file COPYING in this distribution
  
-include ../Kernelenv
+include $(src)/../Kernelenv
  
  obj-y += portals.o
-portals-objs    := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o lib-move.o lib-msg.o lib-ni.o lib-not-impl.o lib-pid.o api-eq.o api-errno.o api-init.o api-md.o api-me.o api-ni.o api-wrap.o
+portals-objs    :=     lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
+                       lib-move.o lib-msg.o lib-ni.o lib-pid.o \
+                       api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
+                       api-wrap.o
diff --git a/lnet/lnet/api-init.c b/lnet/lnet/api-init.c

index e59c922..dc1fead 100644 (file)
--- a/lnet/lnet/api-init.c
+++ b/lnet/lnet/api-init.c
@@ -26,7 +26,7 @@
  #include <portals/api-support.h>
  
  int ptl_init;
-unsigned int portal_subsystem_debug = 0xfff7e3ff;
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL | S_GMNAL);
  unsigned int portal_debug = ~0;
  unsigned int portal_printk;
  unsigned int portal_stack;
diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c

index fde4f16..02f8b60 100644 (file)
--- a/lnet/lnet/lib-move.c
+++ b/lnet/lnet/lib-move.c
@@ -544,7 +544,7 @@ get_new_msg (nal_cb_t *nal, lib_md_t *md)
          msg->send_ack = 0;
  
          msg->md = md;
-        msg->ev.arrival_time = get_cycles();
+        do_gettimeofday(&msg->ev.arrival_time);
          md->pending++;
          if (md->threshold != PTL_MD_THRESH_INF) {
                  LASSERT (md->threshold > 0);
diff --git a/lnet/router/.cvsignore b/lnet/router/.cvsignore

index e995588..95973d6 100644 (file)
--- a/lnet/router/.cvsignore
+++ b/lnet/router/.cvsignore
@@ -1,3 +1,4 @@
  .deps
  Makefile
  Makefile.in
+.*.o.cmd
diff --git a/lnet/router/Makefile.mk b/lnet/router/Makefile.mk

index 64bd09b..9b02c03 100644 (file)
--- a/lnet/router/Makefile.mk
+++ b/lnet/router/Makefile.mk
@@ -3,7 +3,7 @@
  # This code is issued under the GNU General Public License.
  # See the file COPYING in this distribution
  
-include ../Kernelenv
+include $(src)/../Kernelenv
  
  obj-y += kptlrouter.o
  kptlrouter-objs    := router.o proc.o
diff --git a/lnet/router/router.c b/lnet/router/router.c

index 6074c3c..27a7fba 100644 (file)
--- a/lnet/router/router.c
+++ b/lnet/router/router.c
@@ -23,8 +23,8 @@
  
  #include "router.h"
  
-struct list_head kpr_routes;
-struct list_head kpr_nals;
+LIST_HEAD(kpr_routes);
+LIST_HEAD(kpr_nals);
  
  unsigned long long kpr_fwd_bytes;
  unsigned long      kpr_fwd_packets;
@@ -35,7 +35,7 @@ atomic_t           kpr_queue_depth;
   *
   * Once in a blue moon we register/deregister NALs and add/remove routing
   * entries (thread context only)... */
-rwlock_t         kpr_rwlock;
+rwlock_t         kpr_rwlock = RW_LOCK_UNLOCKED;
  
  kpr_router_interface_t kpr_router_interface = {
         kprri_register:         kpr_register_nal,
@@ -55,7 +55,7 @@ kpr_control_interface_t kpr_control_interface = {
  int
  kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
  {
-       long               flags;
+       unsigned long      flags;
         struct list_head  *e;
         kpr_nal_entry_t   *ne;
  
@@ -98,7 +98,7 @@ kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
  void
  kpr_shutdown_nal (void *arg)
  {
-       long             flags;
+       unsigned long    flags;
         kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
  
          CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
@@ -123,7 +123,7 @@ kpr_shutdown_nal (void *arg)
  void
  kpr_deregister_nal (void *arg)
  {
-       long              flags;
+       unsigned long     flags;
         kpr_nal_entry_t  *ne = (kpr_nal_entry_t *)arg;
  
          CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
@@ -296,7 +296,7 @@ int
  kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
                 ptl_nid_t hi_nid)
  {
-       long               flags;
+       unsigned long      flags;
         struct list_head  *e;
         kpr_route_entry_t *re;
  
@@ -345,7 +345,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
  int
  kpr_del_route (ptl_nid_t nid)
  {
-       long               flags;
+       unsigned long      flags;
         struct list_head  *e;
  
          CDEBUG(D_OTHER, "Del route "LPX64"\n", nid);
@@ -398,7 +398,7 @@ kpr_get_route(int idx, int *gateway_nalid, ptl_nid_t *gateway_nid,
          return (-ENOENT);
  }
  
-static void __exit
+static void /*__exit*/
  kpr_finalise (void)
  {
          LASSERT (list_empty (&kpr_nals));
@@ -427,10 +427,6 @@ kpr_initialise (void)
          CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
                 atomic_read(&portal_kmemory));
  
-       rwlock_init(&kpr_rwlock);
-       INIT_LIST_HEAD(&kpr_routes);
-       INIT_LIST_HEAD(&kpr_nals);
-
          kpr_proc_init();
  
          PORTAL_SYMBOL_REGISTER(kpr_router_interface);
diff --git a/lnet/tests/.cvsignore b/lnet/tests/.cvsignore

index 051d1bd..d0c4c88 100644 (file)
--- a/lnet/tests/.cvsignore
+++ b/lnet/tests/.cvsignore
@@ -1,3 +1,4 @@
  Makefile
  Makefile.in
  .deps
+.*.o.cmd
diff --git a/lnet/tests/ping_cli.c b/lnet/tests/ping_cli.c

index 389ffbb..4d04ffb 100644 (file)
--- a/lnet/tests/ping_cli.c
+++ b/lnet/tests/ping_cli.c
@@ -260,7 +260,7 @@ pingcli_start(struct portal_ioctl_data *args)
  
  
  /* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
  {
          PORTAL_ALLOC (client, sizeof(struct pingcli_data));
          if (client == NULL)
@@ -282,7 +282,7 @@ static int __init pingcli_init(void)
  } /* pingcli_init() */
  
  
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
  {
          PORTAL_SYMBOL_UNREGISTER (kping_client);
  } /* pingcli_cleanup() */
diff --git a/lnet/tests/ping_srv.c b/lnet/tests/ping_srv.c

index 1037d09..873e11c 100644 (file)
--- a/lnet/tests/ping_srv.c
+++ b/lnet/tests/ping_srv.c
@@ -47,11 +47,11 @@
  #include <asm/semaphore.h>
  
  #define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
-#define MAXSIZE (16*1024*1024)
+#define MAXSIZE (16*1024)
  
  static unsigned ping_head_magic;
  static unsigned ping_bulk_magic;
-static int nal  = 0;                            // Your NAL,
+static int nal  = SOCKNAL;                            // Your NAL,
  static unsigned long packets_valid = 0;         // Valid packets 
  static int running = 1;
  atomic_t pkt;
@@ -282,7 +282,7 @@ static int __init pingsrv_init(void)
  } /* pingsrv_init() */
  
  
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
  {
          remove_proc_entry ("net/pingsrv", NULL);
          
diff --git a/lnet/tests/sping_cli.c b/lnet/tests/sping_cli.c

index 4cef08b..35e114b 100644 (file)
--- a/lnet/tests/sping_cli.c
+++ b/lnet/tests/sping_cli.c
@@ -235,7 +235,7 @@ pingcli_start(struct portal_ioctl_data *args)
  
  
  /* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
  {
  
          PORTAL_ALLOC (client, sizeof(struct pingcli_data));
@@ -258,7 +258,7 @@ static int __init pingcli_init(void)
  } /* pingcli_init() */
  
  
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
  {
          PORTAL_SYMBOL_UNREGISTER (kping_client);
  } /* pingcli_cleanup() */
diff --git a/lnet/tests/sping_srv.c b/lnet/tests/sping_srv.c

index a18ea35..2b45a46 100644 (file)
--- a/lnet/tests/sping_srv.c
+++ b/lnet/tests/sping_srv.c
@@ -269,7 +269,7 @@ static int __init pingsrv_init(void)
  } /* pingsrv_init() */
  
  
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
  {
          remove_proc_entry ("net/pingsrv", NULL);
          
diff --git a/lnet/ulnds/debug.c b/lnet/ulnds/debug.c

index 529bb2d..b73f042 100644 (file)
--- a/lnet/ulnds/debug.c
+++ b/lnet/ulnds/debug.c
@@ -84,8 +84,8 @@ int portals_debug_copy_to_user(char *buf, unsigned long len)
  
  /* FIXME: I'm not very smart; someone smarter should make this better. */
  void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                   const char *format, ...)
+portals_debug_msg (int subsys, int mask, char *file, const char *fn, 
+                   const int line, const char *format, ...)
  {
          va_list       ap;
          unsigned long flags;
diff --git a/lnet/ulnds/socklnd/debug.c b/lnet/ulnds/socklnd/debug.c

index 529bb2d..b73f042 100644 (file)
--- a/lnet/ulnds/socklnd/debug.c
+++ b/lnet/ulnds/socklnd/debug.c
@@ -84,8 +84,8 @@ int portals_debug_copy_to_user(char *buf, unsigned long len)
  
  /* FIXME: I'm not very smart; someone smarter should make this better. */
  void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                   const char *format, ...)
+portals_debug_msg (int subsys, int mask, char *file, const char *fn, 
+                   const int line, const char *format, ...)
  {
          va_list       ap;
          unsigned long flags;
diff --git a/lnet/utils/.cvsignore b/lnet/utils/.cvsignore

index 148310a..8e474ad 100644 (file)
--- a/lnet/utils/.cvsignore
+++ b/lnet/utils/.cvsignore
@@ -5,4 +5,5 @@ debugctl
  ptlctl
  .deps
  routerstat
-wirecheck
-\ No newline at end of file
+wirecheck
+.*.cmd
diff --git a/lnet/utils/debug.c b/lnet/utils/debug.c

index 9ab1c73..0a009d2 100644 (file)
--- a/lnet/utils/debug.c
+++ b/lnet/utils/debug.c
@@ -53,17 +53,18 @@ static char rawbuf[8192];
  static char *buf = rawbuf;
  static int max = 8192;
  //static int g_pfd = -1;
-static int subsystem_array[1 << 8];
+static int subsystem_mask = ~0;
  static int debug_mask = ~0;
  
  static const char *portal_debug_subsystems[] =
-        {"undefined", "mdc", "mds", "osc", "ost", "class", "obdfs", "llite",
-         "rpc", "ext2obd", "portals", "socknal", "qswnal", "pinger", "filter",
-         "obdtrace", "echo", "ldlm", "lov", "gmnal", "router", "ptldb", NULL};
+        {"undefined", "mdc", "mds", "osc", "ost", "class", "log", "llite",
+         "rpc", "mgmt", "portals", "socknal", "qswnal", "pinger", "filter",
+         "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", NULL};
  static const char *portal_debug_masks[] =
          {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
           "blocks", "net", "warning", "buffs", "other", "dentry", "portals",
-         "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", NULL};
+         "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace",
+         NULL};
  
  struct debug_daemon_cmd {
          char *cmd;
@@ -88,7 +89,10 @@ static int do_debug_mask(char *name, int enable)
                          printf("%s output from subsystem \"%s\"\n",
                                  enable ? "Enabling" : "Disabling",
                                  portal_debug_subsystems[i]);
-                        subsystem_array[i] = enable;
+                        if (enable)
+                                subsystem_mask |= (1 << i);
+                        else
+                                subsystem_mask &= ~(1 << i);
                          found = 1;
                  }
          }
@@ -111,7 +115,6 @@ static int do_debug_mask(char *name, int enable)
  
  int dbg_initialize(int argc, char **argv)
  {
-        memset(subsystem_array, 1, sizeof(subsystem_array));
          return 0;
  }
  
@@ -213,12 +216,7 @@ int jt_dbg_list(int argc, char **argv)
                  for (i = 0; portal_debug_masks[i] != NULL; i++)
                          printf(", %s", portal_debug_masks[i]);
                  printf("\n");
-        }
-        else if (strcasecmp(argv[1], "applymasks") == 0) {
-                unsigned int subsystem_mask = 0;
-                for (i = 0; portal_debug_subsystems[i] != NULL; i++) {
-                        if (subsystem_array[i]) subsystem_mask |= (1 << i);
-                }
+        } else if (strcasecmp(argv[1], "applymasks") == 0) {
                  applymask_all(subsystem_mask, debug_mask);
          }
          return 0;
@@ -230,12 +228,6 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw)
  {
          char *p, *z;
          unsigned long subsystem, debug, dropped = 0, kept = 0;
-        int max_sub, max_type;
-
-        for (max_sub = 0; portal_debug_subsystems[max_sub] != NULL; max_sub++)
-                ;
-        for (max_type = 0; portal_debug_masks[max_type] != NULL; max_type++)
-                ;
  
          while (size) {
                  p = memchr(buf, '\n', size);
@@ -247,8 +239,7 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw)
                  z++;
                  /* for some reason %*s isn't working. */
                  *p = '\0';
-                if (subsystem < max_sub &&
-                    subsystem_array[subsystem] &&
+                if ((subsystem_mask & subsystem) &&
                      (!debug || (debug_mask & debug))) {
                          if (raw)
                                  fprintf(fd, "%s\n", buf);
@@ -551,6 +542,8 @@ int jt_dbg_modules(int argc, char **argv)
                  {"mds_ext3", "lustre/mds"},
                  {"mds_extN", "lustre/mds"},
                  {"ptlbd", "lustre/ptlbd"},
+                {"mgmt_svc", "lustre/mgmt"},
+                {"mgmt_cli", "lustre/mgmt"},
                  {NULL, NULL}
          };
          char *path = "..";
diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c

index 90d66f5..a89f4f7 100644 (file)
--- a/lnet/utils/portals.c
+++ b/lnet/utils/portals.c
@@ -22,6 +22,7 @@
  
  #include <stdio.h>
  #include <sys/types.h>
+#include <netdb.h>
  #include <sys/socket.h>
  #include <netinet/tcp.h>
  #include <netdb.h>
@@ -106,6 +107,27 @@ nal2name (int nal)
          return ((e == NULL) ? "???" : e->name);
  }
  
+static struct hostent *
+ptl_gethostbyname(char * hname) {
+        struct hostent *he;
+        he = gethostbyname(hname);
+        if (!he) {
+                switch(h_errno) {
+                case HOST_NOT_FOUND:
+                case NO_ADDRESS:
+                        fprintf(stderr, "Unable to resolve hostname: %s\n",
+                                hname);
+                        break;
+                default:
+                        fprintf(stderr, "gethostbyname error: %s\n",
+                                strerror(errno));
+                        break;
+                }
+                return NULL;
+        }
+        return he;
+}
+
  int
  ptl_parse_nid (ptl_nid_t *nidp, char *str)
  {
@@ -127,7 +149,7 @@ ptl_parse_nid (ptl_nid_t *nidp, char *str)
          
          if ((('a' <= str[0] && str[0] <= 'z') ||
               ('A' <= str[0] && str[0] <= 'Z')) &&
-             (he = gethostbyname (str)) != NULL)
+             (he = ptl_gethostbyname (str)) != NULL)
          {
                  __u32 addr = *(__u32 *)he->h_addr;
  
@@ -351,12 +373,9 @@ int jt_ptl_connect(int argc, char **argv)
                          goto usage;
                  }
  
-                he = gethostbyname(argv[1]);
-                if (!he) {
-                        fprintf(stderr, "gethostbyname error: %s\n",
-                                strerror(errno));
+                he = ptl_gethostbyname(argv[1]);
+                if (!he)
                          return -1;
-                }
  
                  g_port = atol(argv[2]);
  
@@ -525,12 +544,9 @@ int jt_ptl_disconnect(int argc, char **argv)
  
                  PORTAL_IOC_INIT(data);
                  if (argc == 2) {
-                        he = gethostbyname(argv[1]);
-                        if (!he) {
-                                fprintf(stderr, "gethostbyname error: %s\n",
-                                        strerror(errno));
+                        he = ptl_gethostbyname(argv[1]);
+                        if (!he) 
                                  return -1;
-                        }
                          
                          data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
  
@@ -582,12 +598,9 @@ int jt_ptl_push_connection (int argc, char **argv)
  
                  PORTAL_IOC_INIT(data);
                  if (argc == 2) {
-                        he = gethostbyname(argv[1]);
-                        if (!he) {
-                                fprintf(stderr, "gethostbyname error: %s\n",
-                                        strerror(errno));
+                        he = ptl_gethostbyname(argv[1]);
+                        if (!he)
                                  return -1;
-                        }
                          
                          data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
  
diff --git a/lustre/.cvsignore b/lustre/.cvsignore

index 776ef36..a8a5356 100644 (file)
--- a/lustre/.cvsignore
+++ b/lustre/.cvsignore
@@ -15,4 +15,4 @@ cscope.files
  cscope.out
  autom4te-2.53.cache
  autom4te.cache
-
+.*.o.cmd
diff --git a/lustre/ChangeLog b/lustre/ChangeLog

index 89eaef7..17c08c6 100644 (file)
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -1,4 +1,14 @@
  tbd
+       * version v0_8
+       * bug fixes
+        - orphans are moved into the PENDING directory for possible recovery
+        - replayed opens now open by fid for orphan/rename safety (1042)
+        - last close of an orphan inode generates a transno (683)
+       - chdir() and mount() now pin the directory entry (1020)
+       - avoid CERROR in normal ll_setattr_raw() error case (1500)
+       - discard very old requests without processing them (1502)
+
+2003-06-15  Phil Schwan  <phil@clusterfs.com>
         * version v0_7
         * bug fixes
         - imports and exports cleanup too early, need refcounts (349, 879, 1045)
diff --git a/lustre/Makefile.mk b/lustre/Makefile.mk

index e540148..59178a4 100644 (file)
--- a/lustre/Makefile.mk
+++ b/lustre/Makefile.mk
@@ -1,4 +1,22 @@
-include fs/lustre/portals/Kernelenv
+include $(src)/portals/Kernelenv
+
+# for scripts/version_tag.pl
+LINUX = @LINUX@
  
  obj-y += portals/
+# obdclass has to come before anything that does class_register..
+obj-y += obdclass/
+obj-y += ptlrpc/
+obj-y += ldlm/
+obj-y += obdfilter/
+obj-y += mdc/
  obj-y += mds/
+obj-y += obdecho/
+obj-y += osc/
+obj-y += ost/
+obj-y += lov/
+obj-y += llite/
+
+# portals needs to be before utils/, which pulls in ptlctl objects
+obj-m += utils/
+obj-m += tests/ 
diff --git a/lustre/cobd/cache_obd.c b/lustre/cobd/cache_obd.c

index 5efb545..2d3549b 100644 (file)
--- a/lustre/cobd/cache_obd.c
+++ b/lustre/cobd/cache_obd.c
@@ -36,13 +36,13 @@ static int cobd_attach(struct obd_device *dev, obd_count len, void *data)
  {
          struct lprocfs_static_vars lvars;
  
-        lprocfs_init_vars(&lvars);
-       return lprocfs_obd_attach(dev, lvars.obd_vars);
+        lprocfs_init_vars(cobd, &lvars);
+        return lprocfs_obd_attach(dev, lvars.obd_vars);
  }
  
  static int cobd_detach(struct obd_device *dev)
  {
-       return lprocfs_obd_detach(dev);
+        return lprocfs_obd_detach(dev);
  }
  
  static int
@@ -82,24 +82,23 @@ cobd_setup (struct obd_device *dev, obd_count len, void *buf)
          return (0);
  
   fail_0:
-        obd_disconnect (&cobd->cobd_target, 0 );
+        obd_disconnect(&cobd->cobd_target, 0);
          return (rc);
  }
  
-static int
-cobd_cleanup (struct obd_device *dev, int force, int failover)
+static int cobd_cleanup(struct obd_device *dev, int flags)
  {
          struct cache_obd  *cobd = &dev->u.cobd;
          int                rc;
  
-        if (!list_empty (&dev->obd_exports))
+        if (!list_empty(&dev->obd_exports))
                  return (-EBUSY);
  
-        rc = obd_disconnect (&cobd->cobd_cache, failover);
+        rc = obd_disconnect(&cobd->cobd_cache, flags);
          if (rc != 0)
                  CERROR ("error %d disconnecting cache\n", rc);
  
-        rc = obd_disconnect (&cobd->cobd_target, failover);
+        rc = obd_disconnect(&cobd->cobd_target, flags);
          if (rc != 0)
                  CERROR ("error %d disconnecting target\n", rc);
  
@@ -116,13 +115,12 @@ cobd_connect (struct lustre_handle *conn, struct obd_device *obd,
          return (rc);
  }
  
-static int
-cobd_disconnect (struct lustre_handle *conn, int failover)
+static int cobd_disconnect(struct lustre_handle *conn, int flags)
  {
-       int rc = class_disconnect (conn, failover);
+        int rc = class_disconnect(conn, flags);
  
          CERROR ("rc %d\n", rc);
-       return (rc);
+        return (rc);
  }
  
  static int
@@ -144,23 +142,15 @@ cobd_get_info(struct lustre_handle *conn, obd_count keylen,
          return obd_get_info(&cobd->cobd_target, keylen, key, vallen, val);
  }
  
-static int cobd_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+static int cobd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                       unsigned long max_age)
  {
-        struct obd_export *cobd_exp;
-        int rc;
-
-        if (exp->exp_obd == NULL)
-                return -EINVAL;
-
-        cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
-        rc = obd_statfs(cobd_exp, osfs);
-        class_export_put(cobd_exp);
-        return rc;
+        return obd_statfs(class_conn2obd(&obd->u.cobd.cobd_target), osfs,
+                          max_age);
  }
  
-static int
-cobd_getattr(struct lustre_handle *conn, struct obdo *oa,
-             struct lov_stripe_md *lsm)
+static int cobd_getattr(struct lustre_handle *conn, struct obdo *oa,
+                        struct lov_stripe_md *lsm)
  {
          struct obd_device *obd = class_conn2obd(conn);
          struct cache_obd  *cobd;
@@ -207,11 +197,10 @@ cobd_close(struct lustre_handle *conn, struct obdo *oa,
          return (obd_close (&cobd->cobd_target, oa, lsm, oti));
  }
  
-static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
+static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
                         int objcount, struct obd_ioobj *obj,
                         int niocount, struct niobuf_remote *nb,
-                       struct niobuf_local *res, void **desc_private,
-                       struct obd_trans_info *oti)
+                       struct niobuf_local *res, struct obd_trans_info *oti)
  {
          struct obd_export *cobd_exp;
          int rc;
@@ -223,16 +212,17 @@ static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
                  return -EOPNOTSUPP;
  
          cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
-        rc = obd_preprw(cmd, cobd_exp, obdo, objcount, obj, niocount, nb, res,
-                        desc_private, oti);
+        rc = obd_preprw(cmd, cobd_exp, oa, objcount, obj, niocount, nb, res,
+                        oti);
          class_export_put(cobd_exp);
+
          return rc;
  }
  
-static int cobd_commitrw(int cmd, struct obd_export *exp,
+static int cobd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa,
                           int objcount, struct obd_ioobj *obj,
                           int niocount, struct niobuf_local *local,
-                         void *desc_private, struct obd_trans_info *oti)
+                         struct obd_trans_info *oti)
  {
          struct obd_export *cobd_exp;
          int rc;
@@ -244,16 +234,14 @@ static int cobd_commitrw(int cmd, struct obd_export *exp,
                  return -EOPNOTSUPP;
  
          cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
-        rc = obd_commitrw(cmd, cobd_exp, objcount, obj, niocount, local,
-                          desc_private, oti);
+        rc = obd_commitrw(cmd, cobd_exp, oa, objcount, obj,niocount,local,oti);
          class_export_put(cobd_exp);
          return rc;
  }
  
-static inline int
-cobd_brw(int cmd, struct lustre_handle *conn,
-         struct lov_stripe_md *lsm, obd_count oa_bufs,
-         struct brw_page *pga, struct obd_trans_info *oti)
+static int cobd_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
+                    struct lov_stripe_md *lsm, obd_count oa_bufs,
+                    struct brw_page *pga, struct obd_trans_info *oti)
  {
          struct obd_device *obd = class_conn2obd(conn);
          struct cache_obd  *cobd;
@@ -267,13 +255,11 @@ cobd_brw(int cmd, struct lustre_handle *conn,
                  return -EOPNOTSUPP;
  
          cobd = &obd->u.cobd;
-        return (obd_brw (cmd, &cobd->cobd_target,
-                         lsm, oa_bufs, pga, oti));
+        return (obd_brw(cmd, &cobd->cobd_target, oa, lsm, oa_bufs, pga, oti));
  }
  
-static int
-cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
-               void *karg, void *uarg)
+static int cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
+                          void *karg, void *uarg)
  {
          struct obd_device *obd = class_conn2obd(conn);
          struct cache_obd  *cobd;
@@ -286,7 +272,7 @@ cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
          /* intercept? */
  
          cobd = &obd->u.cobd;
-        return (obd_iocontrol (cmd, &cobd->cobd_target, len, karg, uarg));
+        return (obd_iocontrol(cmd, &cobd->cobd_target, len, karg, uarg));
  }
  
  static struct obd_ops cobd_ops = {
@@ -317,16 +303,16 @@ static int __init cobd_init(void)
          struct lprocfs_static_vars lvars;
          ENTRY;
  
-       printk(KERN_INFO "Lustre Caching OBD driver; info@clusterfs.com\n");
+        printk(KERN_INFO "Lustre Caching OBD driver; info@clusterfs.com\n");
  
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(cobd, &lvars);
          RETURN(class_register_type(&cobd_ops, lvars.module_vars,
                                     OBD_CACHE_DEVICENAME));
  }
  
-static void __exit cobd_exit(void)
+static void /*__exit*/ cobd_exit(void)
  {
-       class_unregister_type(OBD_CACHE_DEVICENAME);
+        class_unregister_type(OBD_CACHE_DEVICENAME);
  }
  
  MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
diff --git a/lustre/cobd/lproc_cache.c b/lustre/cobd/lproc_cache.c

index fd7474b..ba9b9cf 100644 (file)
--- a/lustre/cobd/lproc_cache.c
+++ b/lustre/cobd/lproc_cache.c
@@ -25,67 +25,59 @@
  #include <linux/lprocfs_status.h>
  
  #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
  #else
  /* Common STATUS namespace */
-static int rd_target(char *page, char **start, off_t off, int count,
-                     int *eof, void *data)
+static int cobd_rd_target(char *page, char **start, off_t off, int count,
+                          int *eof, void *data)
  {
-        struct obd_device    *dev = (struct obd_device*)data;
-       struct lustre_handle *conn;
-       struct obd_export    *exp;
-       int    rc;
+        struct obd_device *cobd = (struct obd_device *)data;
+        int    rc;
  
-        LASSERT(dev != NULL);
-        conn = &dev->u.cobd.cobd_target;
+        LASSERT(cobd != NULL);
  
-       if (!dev->obd_set_up) {
-               rc = snprintf (page, count, "not set up\n");
-       } else {
-               exp = class_conn2export(conn);
-               LASSERT(exp != NULL);
-               rc = snprintf(page, count, "%s\n", 
-                              exp->exp_obd->obd_uuid.uuid);
-                class_export_put(exp);
-       }
-       return (rc);
+        if (!cobd->obd_set_up) {
+                rc = snprintf(page, count, "not set up\n");
+        } else {
+                struct obd_device *tgt =
+                        class_conn2obd(&cobd->u.cobd.cobd_target);
+                LASSERT(tgt != NULL);
+                rc = snprintf(page, count, "%s\n", tgt->obd_uuid.uuid);
+        }
+        return rc;
  }
  
-static int rd_cache(char *page, char **start, off_t off, int count,
-                    int *eof, void *data)
+static int cobd_rd_cache(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
  {
-        struct obd_device    *dev = (struct obd_device*)data;
-       struct lustre_handle *conn;
-       struct obd_export    *exp;
-       int    rc;
+        struct obd_device *cobd = (struct obd_device*)data;
+        int    rc;
  
-        LASSERT(dev != NULL);
-        conn = &dev->u.cobd.cobd_cache;
+        LASSERT(cobd != NULL);
  
-       if (!dev->obd_set_up) {
-               rc = snprintf (page, count, "not set up\n");
+        if (!cobd->obd_set_up) {
+                rc = snprintf(page, count, "not set up\n");
          } else {
-               exp = class_conn2export(conn);
-               LASSERT (exp != NULL);
-               rc = snprintf(page, count, "%s\n", 
-                              exp->exp_obd->obd_uuid.uuid);
-                class_export_put(exp);
-       }
-       return (rc);
+                struct obd_device *cache =
+                        class_conn2obd(&cobd->u.cobd.cobd_cache);
+                LASSERT(cache != NULL);
+                rc = snprintf(page, count, "%s\n", cache->obd_uuid.uuid);
+        }
+        return rc;
  }
  
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",        lprocfs_rd_uuid,    0, 0 },
-        { "target_uuid", rd_target,          0, 0 },
-        { "cache_uuid",  rd_cache,           0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",         lprocfs_rd_uuid,        0, 0 },
+        { "target_uuid",  cobd_rd_target,         0, 0 },
+        { "cache_uuid",   cobd_rd_cache,          0, 0 },
          { 0 }
  };
  
  struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs",    lprocfs_rd_numrefs, 0, 0 },
+        { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
          { 0 }
  };
  #endif /* LPROCFS */
  
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(cobd, lprocfs_module_vars, lprocfs_obd_vars)
diff --git a/lustre/conf/lustre.dtd b/lustre/conf/lustre.dtd

index 51d1d1a..de4d653 100644 (file)
--- a/lustre/conf/lustre.dtd
+++ b/lustre/conf/lustre.dtd
@@ -33,10 +33,11 @@
  
  <!ELEMENT profile (ldlm_ref | ptlrpc_ref | network_ref | routetbl_ref |
                     osd_ref | mdsdev_ref | lovconfig_ref|
-                   echoclient_ref | mountpoint_ref)*>
+                   echoclient_ref | mountpoint_ref | mgmt_ref)*>
  <!ATTLIST profile %object.attr;>
  
-<!ELEMENT mountpoint (path | fileset | mds_ref | obd_ref)*>
+<!ELEMENT mountpoint path #REQUIRED
+                     filesystem_ref #REQUIRED >
  <!ATTLIST mountpoint %object.attr;>
  
  <!ELEMENT echoclient (obd_ref)>
@@ -45,6 +46,9 @@
  <!ELEMENT ldlm EMPTY>
  <!ATTLIST ldlm %object.attr;>
  
+<!ELEMENT mgmt (active_ref)*>
+<!ATTLIST mgmt %object.attr;>
+
  <!ELEMENT ptlrpc EMPTY>
  <!ATTLIST ptlrpc %object.attr;>
  
@@ -57,6 +61,11 @@
  <!ATTLIST ost %object.attr;
                failover ( 1 | 0 ) #IMPLIED>
  
+<!ELEMENT filesystem mds_ref #REQUIRED
+                     obd_ref #REQUIRED
+                     (mgmt_ref)* >
+<!ATTLIST filesystem %object.attr;>
+
  <!ELEMENT mds (active_ref | lovconfig_ref | group)*>
  <!ATTLIST mds %object.attr;
                failover ( 1 | 0 ) #IMPLIED>
@@ -110,16 +119,20 @@
  <!ATTLIST obd_ref         %objref.attr;>
  <!ELEMENT ost_ref         %objref.content;>
  <!ATTLIST ost_ref         %objref.attr;>
-<!ELEMENT active_ref         %objref.content;>
-<!ATTLIST active_ref         %objref.attr;>
-<!ELEMENT target_ref         %objref.content;>
-<!ATTLIST target_ref         %objref.attr;>
+<!ELEMENT active_ref      %objref.content;>
+<!ATTLIST active_ref      %objref.attr;>
+<!ELEMENT target_ref      %objref.content;>
+<!ATTLIST target_ref      %objref.attr;>
  <!ELEMENT lov_ref         %objref.content;>
  <!ATTLIST lov_ref         %objref.attr;>
  <!ELEMENT lovconfig_ref   %objref.content;>
  <!ATTLIST lovconfig_ref   %objref.attr;>
+<!ELEMENT mgmt_ref        %objref.content;>
+<!ATTLIST mgmt_ref        %objref.attr;>
  <!ELEMENT mountpoint_ref  %objref.content;>
  <!ATTLIST mountpoint_ref  %objref.attr;>
+<!ELEMENT filesystem_ref  %objref.content;>
+<!ATTLIST filesystem_ref  %objref.attr;>
  <!ELEMENT echoclient_ref  %objref.content;>
  <!ATTLIST echoclient_ref  %objref.attr;>
  <!ELEMENT failover_ref    %objref.content;>
diff --git a/lustre/configure.in b/lustre/configure.in

index 8e12135..50f82c8 100644 (file)
--- a/lustre/configure.in
+++ b/lustre/configure.in
@@ -14,6 +14,18 @@ AM_INIT_AUTOMAKE(lustre, builtin([esyscmd], [sed -ne '/^%define version /{ s/.*v
  AC_ARG_ENABLE(extN, [  --enable-extN use extN instead of ext3 for lustre backend])
  AM_CONDITIONAL(EXTN, test x$enable_extN = xyes)
  
+# the pinger is temporary, until we have the recovery node in place
+AC_ARG_ENABLE(pinger, [  --enable-pinger recovery pinger support])
+if test x$enable_pinger = xyes ; then
+  AC_DEFINE(ENABLE_PINGER, 1, Use the Pinger)
+fi
+
+# very experimental orphan support
+AC_ARG_ENABLE(orphans, [  --enable-orphans very experimental orphan recovery support])
+if test x$enable_orphans = xyes ; then
+  AC_DEFINE(ENABLE_ORPHANS, 1, Compile with orphan support)
+fi
+
  AC_ARG_WITH(obd-buffer-size, [  --with-obd-buffer-size=[size] set lctl ioctl maximum (default=8K)],OBD_BUFFER_SIZE=$with_obd_buffer_size,OBD_BUFFER_SIZE=8192)
  AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size])
  
@@ -21,15 +33,8 @@ sinclude(portals/build.m4)
  sinclude(portals/archdep.m4)
  
  if test x$enable_inkernel = xyes ; then
-cp Makefile.mk Makefile.in
-cp mds/Makefile.mk mds/Makefile.in
-cp portals/Kernelenv.mk portals/Kernelenv.in
-cp portals/Makefile.mk portals/Makefile.in
-cp portals/libcfs/Makefile.mk portals/libcfs/Makefile.in
-cp portals/portals/Makefile.mk portals/portals/Makefile.in
-cp portals/knals/Makefile.mk portals/knals/Makefile.in
-cp portals/knals/socknal/Makefile.mk portals/knals/socknal/Makefile.in
-cp portals/router/Makefile.mk portals/router/Makefile.in
+       find . -name Makefile.mk | sed 's/.mk$//' | xargs -n 1 \
+               sh -e -x -c '(cp -f $0.mk $0.in)'
  fi
  
  AM_CONFIG_HEADER(portals/include/config.h)
diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h

index 202a761..6b94901 100644 (file)
--- a/lustre/include/liblustre.h
+++ b/lustre/include/liblustre.h
@@ -363,16 +363,16 @@ struct page {
  #define kmap(page) (page)->addr
  #define kunmap(a) do { int foo = 1; foo++; } while (0)
  
-static inline struct page *alloc_pages(int mask, unsigned long foo)
+static inline struct page *alloc_pages(int mask, unsigned long order)
  {
          struct page *pg = malloc(sizeof(*pg));
  
          if (!pg)
                  return NULL;
  #ifdef MAP_ANONYMOUS
-        pg->addr = mmap(0, PAGE_SIZE, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+        pg->addr = mmap(0, PAGE_SIZE << order, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
  #else
-        pg->addr = malloc(PAGE_SIZE);
+        pg->addr = malloc(PAGE_SIZE << order);
  #endif
  
          if (!pg->addr) {
@@ -407,26 +407,27 @@ static inline struct page* __grab_cache_page(int index)
  /* arithmetic */
  #define do_div(a,b)                     \
          ({                              \
-                unsigned long ret;      \
-                ret = (a)%(b);          \
-                (a) = (a)/(b);          \
-                (ret);                  \
+                unsigned long remainder;\
+                remainder = (a) % (b);  \
+                (a) = (a) / (b);        \
+                (remainder);            \
          })
  
  /* VFS stuff */
-#define ATTR_MODE       1
-#define ATTR_UID        2
-#define ATTR_GID        4
-#define ATTR_SIZE       8
-#define ATTR_ATIME      16
-#define ATTR_MTIME      32
-#define ATTR_CTIME      64
-#define ATTR_ATIME_SET  128
-#define ATTR_MTIME_SET  256
-#define ATTR_FORCE      512     /* Not a change, but a change it */
-#define ATTR_ATTR_FLAG  1024
-#define ATTR_RAW        2048    /* file system, not vfs will massage attrs */
-#define ATTR_FROM_OPEN  4096    /* called from open path, ie O_TRUNC */
+#define ATTR_MODE       0x0001
+#define ATTR_UID        0x0002
+#define ATTR_GID        0x0004
+#define ATTR_SIZE       0x0008
+#define ATTR_ATIME      0x0010
+#define ATTR_MTIME      0x0020
+#define ATTR_CTIME      0x0040
+#define ATTR_ATIME_SET  0x0080
+#define ATTR_MTIME_SET  0x0100
+#define ATTR_FORCE      0x0200  /* Not a change, but a change it */
+#define ATTR_ATTR_FLAG  0x0400
+#define ATTR_RAW        0x0800  /* file system, not vfs will massage attrs */
+#define ATTR_FROM_OPEN  0x1000  /* called from open path, ie O_TRUNC */
+#define ATTR_CTIME_SET  0x2000
  
  struct iattr {
          unsigned int    ia_valid;
diff --git a/lustre/include/linux/lprocfs_status.h b/lustre/include/linux/lprocfs_status.h

index fb96bde..e6678f8 100644 (file)
--- a/lustre/include/linux/lprocfs_status.h
+++ b/lustre/include/linux/lprocfs_status.h
@@ -24,14 +24,25 @@
  #ifndef _LPROCFS_SNMP_H
  #define _LPROCFS_SNMP_H
  
+
  #ifdef __KERNEL__
  #include <linux/config.h>
  #include <linux/autoconf.h>
  #include <linux/proc_fs.h>
+#include <linux/version.h>
  #include <linux/smp.h>
+#include <linux/kp30.h>
+
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#  include <linux/statfs.h>
+# else 
+#  define kstatfs statfs
+# endif
+
+#else 
+#  define kstatfs statfs
  #endif
  
-#include <linux/kp30.h>
  
  #ifndef LPROCFS
  #ifdef  CONFIG_PROC_FS  /* Ensure that /proc is configured */
@@ -116,9 +127,8 @@ struct lprocfs_stats {
  /* class_obd.c */
  extern struct proc_dir_entry *proc_lustre_root;
  
-/* lproc_lov.c */
-extern struct file_operations ll_proc_target_fops;
  struct obd_device;
+struct file;
  
  #ifdef LPROCFS
  
@@ -184,14 +194,18 @@ void lprocfs_init_multi_vars(unsigned int idx,                            \
     x->obd_vars = glob[idx].obd_vars;                                      \
  }                                                                         \
  
-#define LPROCFS_INIT_VARS(vclass, vinstance)           \
-void lprocfs_init_vars(struct lprocfs_static_vars *x)  \
+#define LPROCFS_INIT_VARS(name, vclass, vinstance)           \
+void lprocfs_##name##_init_vars(struct lprocfs_static_vars *x)  \
  {                                                      \
          x->module_vars = vclass;                       \
          x->obd_vars = vinstance;                       \
  }                                                      \
  
-extern void lprocfs_init_vars(struct lprocfs_static_vars *var);
+#define lprocfs_init_vars(NAME, VAR)     \
+do {      \
+        extern void lprocfs_##NAME##_init_vars(struct lprocfs_static_vars *);  \
+        lprocfs_##NAME##_init_vars(VAR);                                       \
+} while (0)
  extern void lprocfs_init_multi_vars(unsigned int idx,
                                      struct lprocfs_static_vars *var);
  /* lprocfs_status.c */
@@ -220,6 +234,8 @@ extern int lprocfs_rd_uuid(char *page, char **start, off_t off,
                             int count, int *eof, void *data);
  extern int lprocfs_rd_name(char *page, char **start, off_t off,
                             int count, int *eof, void *data);
+extern int lprocfs_rd_fstype(char *page, char **start, off_t off,
+                             int count, int *eof, void *data);
  extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
                                    int count, int *eof, void *data);
  extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
@@ -228,38 +244,24 @@ extern int lprocfs_rd_numrefs(char *page, char **start, off_t off,
                                int count, int *eof, void *data);
  
  /* Statfs helpers */
-struct statfs;
  extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
-                              int count, int *eof, struct statfs *sfs);
+                              int count, int *eof, void *data);
  extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
-                                  int count, int *eof, struct statfs *sfs);
+                                  int count, int *eof, void *data);
  extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
-                                 int count, int *eof, struct statfs *sfs);
+                                 int count, int *eof, void *data);
  extern int lprocfs_rd_filestotal(char *page, char **start, off_t off,
-                                 int count, int *eof, struct statfs *sfs);
+                                 int count, int *eof, void *data);
  extern int lprocfs_rd_filesfree(char *page, char **start, off_t off,
-                                int count, int *eof, struct statfs *sfs);
+                                int count, int *eof, void *data);
  extern int lprocfs_rd_filegroups(char *page, char **start, off_t off,
-                                 int count, int *eof, struct statfs *sfs);
+                                 int count, int *eof, void *data);
  
  /* lprocfs_status.c: counter read/write functions */
-struct file;
  extern int lprocfs_counter_read(char *page, char **start, off_t off,
                                  int count, int *eof, void *data);
  extern int lprocfs_counter_write(struct file *file, const char *buffer,
                                   unsigned long count, void *data);
-
-#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct)               \
-int fct_name(char *page, char **start, off_t off,                         \
-             int count, int *eof, void *data)                             \
-{                                                                         \
-        struct statfs sfs;                                                \
-        int rc = get_statfs_fct((struct obd_device*)data, &sfs);          \
-        return (rc == 0 ?                                                 \
-                lprocfs_##fct_name (page, start, off, count, eof, &sfs) : \
-                rc);                                                      \
-}
-
  #else
  /* LPROCFS is not defined */
  static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
@@ -289,18 +291,17 @@ static inline void lprocfs_free_obd_stats(struct obd_device *obddev)
  static inline struct proc_dir_entry *
  lprocfs_register(const char *name, struct proc_dir_entry *parent,
                   struct lprocfs_vars *list, void *data) { return NULL; }
-#define LPROCFS_INIT_MULTI_VARS(array, size)
+#define LPROCFS_INIT_MULTI_VARS(array, size) do {} while (0)
  static inline void lprocfs_init_multi_vars(unsigned int idx,
                                             struct lprocfs_static_vars *x) { return; }
-#define LPROCFS_INIT_VARS(vclass, vinstance)
-static inline void lprocfs_init_vars(struct lprocfs_static_vars *x) { return; }
+#define LPROCFS_INIT_VARS(name, vclass, vinstance) do {} while (0)
+#define lprocfs_init_vars(...) do {} while (0)
  static inline int lprocfs_add_vars(struct proc_dir_entry *root,
                                     struct lprocfs_vars *var,
                                     void *data) { return 0; }
  static inline void lprocfs_remove(struct proc_dir_entry *root) {};
  static inline struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
                                      const char *name) {return 0;}
-struct obd_device;
  static inline int lprocfs_obd_attach(struct obd_device *dev,
                                       struct lprocfs_vars *list) { return 0; }
  static inline int lprocfs_obd_detach(struct obd_device *dev)  { return 0; }
@@ -318,37 +319,30 @@ static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off,
                                       int count, int *eof, void *data) { return 0; }
  
  /* Statfs helpers */
-struct statfs;
  static inline
  int lprocfs_rd_blksize(char *page, char **start, off_t off,
-                       int count, int *eof, struct statfs *sfs) { return 0; }
+                       int count, int *eof, void *data) { return 0; }
  static inline
  int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
-                           int count, int *eof, struct statfs *sfs) { return 0; }
+                           int count, int *eof, void *data) { return 0; }
  static inline
  int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
-                          int count, int *eof, struct statfs *sfs) { return 0; }
+                          int count, int *eof, void *data) { return 0; }
  static inline
  int lprocfs_rd_filestotal(char *page, char **start, off_t off,
-                          int count, int *eof, struct statfs *sfs) { return 0; }
+                          int count, int *eof, void *data) { return 0; }
  static inline
  int lprocfs_rd_filesfree(char *page, char **start, off_t off,
-                         int count, int *eof, struct statfs *sfs)  { return 0; }
+                         int count, int *eof, void *data)  { return 0; }
  static inline
  int lprocfs_rd_filegroups(char *page, char **start, off_t off,
-                          int count, int *eof, struct statfs *sfs) { return 0; }
+                          int count, int *eof, void *data) { return 0; }
  static inline
  int lprocfs_counter_read(char *page, char **start, off_t off,
                           int count, int *eof, void *data) { return 0; }
-struct file;
  static inline
  int lprocfs_counter_write(struct file *file, const char *buffer,
                            unsigned long count, void *data) { return 0; }
-
-#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct)  \
-int fct_name(char *page, char **start, off_t off,            \
-             int count, int *eof, void *data) { *eof = 1; return 0; }
-
  #endif /* LPROCFS */
  
  #endif /* LPROCFS_SNMP_H */
diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h

index 4275a10..3609d52 100644 (file)
--- a/lustre/include/linux/lustre_compat25.h
+++ b/lustre/include/linux/lustre_compat25.h
@@ -23,22 +23,67 @@
  #ifndef _COMPAT25_H
  #define _COMPAT25_H
  
-#include <linux/portals_compat25.h>
+#ifdef __KERNEL__
  
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-#define KDEVT_VAL(dev, val)         dev.value = 0               
-#else
-#define KDEVT_VAL(dev, val)         dev = 0               
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) && LINUX_VERSION_CODE < KERNEL_VERSION(2,5,69)
+#error sorry, lustre requires at least 2.5.69
  #endif
  
+#include <linux/portals_compat25.h>
+
  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
  # define PGCACHE_WRLOCK(mapping)          write_lock(&mapping->page_lock)
  # define PGCACHE_WRUNLOCK(mapping)        write_unlock(&mapping->page_lock)
-#else
+
+#define KDEVT_INIT(val)                 { .value = val }
+#define LTIME_S(time)                   (time.tv_sec)
+#define USERMODEHELPER(path, argv, envp) \
+                                        call_usermodehelper(path, argv, envp, 1)
+#define ll_path_lookup                  path_lookup
+
+
+#define ll_pgcache_lock(mapping)          spin_lock(&mapping->page_lock)
+#define ll_pgcache_unlock(mapping)        spin_unlock(&mapping->page_lock)
+
+#else /* 2.4.. */
+
  # define PGCACHE_WRLOCK(mapping)          spin_lock(&pagecache_lock)
  # define PGCACHE_WRUNLOCK(mapping)        spin_unlock(&pagecache_lock)
+
+/* 2.5 uses hlists for some things, like the d_hash.  we'll treat them
+ * as 2.5 and let macros drop back.. */
+#define hlist_entry                     list_entry
+#define hlist_head                      list_head
+#define hlist_node                      list_head
+#define HLIST_HEAD                      LIST_HEAD
+#define INIT_HLIST_HEAD                 INIT_LIST_HEAD
+#define hlist_del_init                  list_del_init
+#define hlist_add_head                  list_add
+#define hlist_for_each_safe             list_for_each_safe
+#define KDEVT_INIT(val)                 (val)
+#define ext3_xattr_set_handle           ext3_xattr_set
+#define try_module_get                  __MOD_INC_USE_COUNT
+#define module_put                      __MOD_DEC_USE_COUNT
+#define LTIME_S(time)                   (time)
+#ifndef CONFIG_RH_2_4_20
+#define cpu_online(cpu)                 (cpu_online_map & (1<<cpu))
  #endif
+#define USERMODEHELPER(path, argv, envp) \
+                                        call_usermodehelper(path, argv, envp)
+static inline int ll_path_lookup(const char *path, unsigned flags, 
+                              struct nameidata *nd)
+{
+        int error = 0;
+        if (path_init(path, flags, nd))
+                error = path_walk(path, nd);
+        return error;
+}
+typedef long sector_t;
+
+#define ll_pgcache_lock(mapping)        spin_lock(&pagecache_lock)
+#define ll_pgcache_unlock(mapping)      spin_unlock(&pagecache_lock)
  
+#endif /* end of 2.4 compat macros */
  
  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
  # define filemap_fdatasync(mapping)       filemap_fdatawrite(mapping)
@@ -54,18 +99,6 @@
  # define Page_Uptodate(page)              PageUptodate(page)
  #endif
  
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define USERMODEHELPER(path, argv, envp) call_usermodehelper(path, argv, envp, 0)
-#else
-# define USERMODEHELPER(path, argv, envp) call_usermodehelper(path, argv, envp)
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define LL_CHECK_DIRTY(sb)              do { }while(0)
-#else
-# define LL_CHECK_DIRTY(sb)              ll_check_dirty(sb)
-#endif
-
  #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
  #define  rb_node_s rb_node
  #define  rb_root_s rb_root
@@ -73,4 +106,5 @@ typedef struct rb_root_s rb_root_t;
  typedef struct rb_node_s rb_node_t;
  #endif
  
+#endif /* __KERNEL__ */
  #endif /* _COMPAT25_H */
diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h

index 2db4196..8fc90ae 100644 (file)
--- a/lustre/include/linux/lustre_dlm.h
+++ b/lustre/include/linux/lustre_dlm.h
@@ -188,6 +188,7 @@ struct ldlm_lock {
           * it's no longer in use.  If the lock is not granted, a process sleeps
           * on this waitq to learn when it becomes granted. */
          wait_queue_head_t     l_waitq;
+        struct timeval        l_enqueued_time;
  };
  
  typedef int (*ldlm_res_compat)(struct ldlm_lock *child, struct ldlm_lock *new);
@@ -316,6 +317,8 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
                                 ldlm_res_iterator_t iter, void *closure);
  
  int ldlm_replay_locks(struct obd_import *imp);
+void ldlm_change_cbdata(struct ldlm_namespace *, struct ldlm_res_id *,
+                        ldlm_iterator_t iter, void *data);
  
  /* ldlm_extent.c */
  int ldlm_extent_compat(struct ldlm_lock *, struct ldlm_lock *);
@@ -450,6 +453,8 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *, struct ldlm_res_id *,
  
  /* mds/handler.c */
  /* This has to be here because recurisve inclusion sucks. */
+int intent_disposition(struct ldlm_reply *rep, int flag);
+void intent_set_disposition(struct ldlm_reply *rep, int flag);
  int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                       void *data, int flag);
  
diff --git a/lustre/include/linux/lustre_export.h b/lustre/include/linux/lustre_export.h

index 6939a95..677ddc6 100644 (file)
--- a/lustre/include/linux/lustre_export.h
+++ b/lustre/include/linux/lustre_export.h
@@ -11,7 +11,7 @@
  #define __EXPORT_H
  
  #include <linux/lustre_idl.h>
-#include <linux/obd_filter.h>
+#include <linux/lustre_dlm.h>
  
  struct mds_client_data;
  
@@ -19,7 +19,8 @@ struct mds_export_data {
          struct list_head        med_open_head;
          spinlock_t              med_open_lock;
          struct mds_client_data *med_mcd;
-        int                     med_off;
+        loff_t                  med_off;
+        int                     med_idx;
  };
  
  struct ldlm_export_data {
@@ -37,6 +38,16 @@ struct ec_export_data { /* echo client */
          struct list_head eced_locks;
  };
  
+/* In-memory access to client data from OST struct */
+struct filter_client_data;
+struct filter_export_data {
+        struct list_head           fed_open_head; //files to close on disconnect
+        spinlock_t                 fed_lock;      /* protects fed_open_head */
+        struct filter_client_data *fed_fcd;
+        loff_t                     fed_lr_off;
+        int                        fed_lr_idx;
+};
+
  struct obd_export {
          struct portals_handle     exp_handle;
          atomic_t                  exp_refcount;
@@ -48,7 +59,8 @@ struct obd_export {
          struct ptlrpc_request    *exp_outstanding_reply;
          time_t                    exp_last_request_time;
          spinlock_t                exp_lock; /* protects flags int below */
-        int                       exp_failed:1, exp_failover:1;
+        int                       exp_failed:1;
+        int                       exp_flags;
          union {
                  struct mds_export_data    eu_mds_data;
                  struct filter_export_data eu_filter_data;
diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h

index fc00fe1..37ffc4f 100644 (file)
--- a/lustre/include/linux/lustre_fsfilt.h
+++ b/lustre/include/linux/lustre_fsfilt.h
@@ -30,7 +30,8 @@
  #include <linux/obd.h>
  #include <linux/fs.h>
  
-typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, int error);
+typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd,
+                            void *data, int error);
  
  struct fsfilt_objinfo {
          struct dentry *fso_dentry;
@@ -41,9 +42,9 @@ struct fsfilt_operations {
          struct list_head fs_list;
          struct module *fs_owner;
          char   *fs_type;
-        void   *(* fs_start)(struct inode *inode, int op);
+        void   *(* fs_start)(struct inode *inode, int op, void *desc_private);
          void   *(* fs_brw_start)(int objcount, struct fsfilt_objinfo *fso,
-                                 int niocount, struct niobuf_remote *nb);
+                                 int niocount, void *desc_private);
          int     (* fs_commit)(struct inode *inode, void *handle,int force_sync);
          int     (* fs_setattr)(struct dentry *dentry, void *handle,
                                 struct iattr *iattr, int do_trunc);
@@ -54,16 +55,19 @@ struct fsfilt_operations {
                                  loff_t *offset);
          int     (* fs_journal_data)(struct file *file);
          int     (* fs_set_last_rcvd)(struct obd_device *obd, __u64 last_rcvd,
-                                     void *handle, fsfilt_cb_t cb_func);
+                                     void *handle, fsfilt_cb_t cb_func,
+                                     void *cb_data);
          int     (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs);
          int     (* fs_sync)(struct super_block *sb);
          int     (* fs_prep_san_write)(struct inode *inode, long *blocks,
                                        int nblocks, loff_t newsize);
+        int     (* fs_write_record)(struct file *, char *, int size, loff_t *);
+        int     (* fs_read_record)(struct file *, char *, int size, loff_t *);
  };
  
  extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops);
  extern void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops);
-extern struct fsfilt_operations *fsfilt_get_ops(char *type);
+extern struct fsfilt_operations *fsfilt_get_ops(const char *type);
  extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops);
  
  #define FSFILT_OP_UNLINK         1
@@ -75,26 +79,53 @@ extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops);
  #define FSFILT_OP_MKNOD          7
  #define FSFILT_OP_SETATTR        8
  #define FSFILT_OP_LINK           9
+#define FSFILT_OP_CREATE_LOG    10
+#define FSFILT_OP_UNLINK_LOG    11
  
-static inline void *fsfilt_start(struct obd_device *obd,
-                                 struct inode *inode, int op)
+static inline void *fsfilt_start(struct obd_device *obd, struct inode *inode,
+                                 int op, struct obd_trans_info *oti)
  {
          unsigned long now = jiffies;
-        void *handle = obd->obd_fsops->fs_start(inode, op);
-        CDEBUG(D_HA, "started handle %p\n", handle);
-        if (time_after(jiffies, now + 15*HZ))
+        void *parent_handle = oti ? oti->oti_handle : NULL;
+        void *handle = obd->obd_fsops->fs_start(inode, op, parent_handle);
+        CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle);
+
+        if (oti != NULL) {
+                if (parent_handle == NULL) {
+                        oti->oti_handle = handle;
+                } else if (handle != parent_handle) {
+                        CERROR("mismatch: parent %p, handle %p, oti %p\n",
+                               parent_handle, handle, oti->oti_handle);
+                        LBUG();
+                }
+        }
+        if (time_after(jiffies, now + 15 * HZ))
                  CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
          return handle;
  }
  
  static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount,
                                       struct fsfilt_objinfo *fso, int niocount,
-                                     struct niobuf_remote *nb)
+                                     struct obd_trans_info *oti)
  {
          unsigned long now = jiffies;
-        void *handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount,nb);
-        CDEBUG(D_HA, "started handle %p\n", handle);
-        if (time_after(jiffies, now + 15*HZ))
+        void *parent_handle = oti ? oti->oti_handle : NULL;
+        void *handle;
+
+        handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount,
+                                              parent_handle);
+        CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle);
+
+        if (oti != NULL) {
+                if (parent_handle == NULL) {
+                        oti->oti_handle = handle;
+                } else if (handle != parent_handle) {
+                        CERROR("mismatch: parent %p, handle %p, oti %p\n",
+                               parent_handle, handle, oti->oti_handle);
+                        LBUG();
+                }
+        }
+        if (time_after(jiffies, now + 15 * HZ))
                  CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
          return handle;
  }
@@ -105,7 +136,7 @@ static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode,
          unsigned long now = jiffies;
          int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync);
          CDEBUG(D_HA, "committing handle %p\n", handle);
-        if (time_after(jiffies, now + 15*HZ))
+        if (time_after(jiffies, now + 15 * HZ))
                  CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
          return rc;
  }
@@ -116,9 +147,8 @@ static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry,
          unsigned long now = jiffies;
          int rc;
          rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr, do_trunc);
-        if (time_after(jiffies, now + 15*HZ))
+        if (time_after(jiffies, now + 15 * HZ))
                  CERROR("long setattr time %lus\n", (jiffies - now) / HZ);
-
          return rc;
  }
  
@@ -147,9 +177,11 @@ static inline int fsfilt_journal_data(struct obd_device *obd, struct file *file)
  }
  
  static inline int fsfilt_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
-                                       void *handle, fsfilt_cb_t cb_func)
+                                       void *handle, fsfilt_cb_t cb_func,
+                                       void *cb_data)
  {
-        return obd->obd_fsops->fs_set_last_rcvd(obd, last_rcvd,handle,cb_func);
+        return obd->obd_fsops->fs_set_last_rcvd(obd, last_rcvd, handle,
+                                                cb_func, cb_data);
  }
  
  static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *fs,
@@ -172,6 +204,19 @@ static inline int fs_prep_san_write(struct obd_device *obd,
          return obd->obd_fsops->fs_prep_san_write(inode, blocks,
                                                   nblocks, newsize);
  }
+
+static inline int fsfilt_read_record(struct obd_device *obd, struct file *file,
+                                     char *buf, loff_t size, loff_t *offs)
+{
+        return obd->obd_fsops->fs_read_record(file, buf, size, offs);
+}
+
+static inline int fsfilt_write_record(struct obd_device *obd, struct file *file,
+                                      char *buf, loff_t size, loff_t *offs)
+{
+        return obd->obd_fsops->fs_write_record(file, buf, size, offs);
+}
+
  #endif /* __KERNEL__ */
  
  #endif
diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h

index f4a5f2d..055b7a4 100644 (file)
--- a/lustre/include/linux/lustre_idl.h
+++ b/lustre/include/linux/lustre_idl.h
@@ -18,7 +18,7 @@
   *   along with Lustre; if not, write to the Free Software
   *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   *
- * (Un)packing of OST requests
+ * Lustre wire protocol definitions.
   *
   * We assume all nodes are either little-endian or big-endian, and we
   * always send messages in the sender's native format.  The receiver
@@ -29,9 +29,9 @@
   * implemented either here, inline (trivial implementations) or in
   * ptlrpc/pack_generic.c.  These 'swabbers' convert the type from "other"
   * endian, in-place in the message buffer.
- * 
+ *
   * A swabber takes a single pointer argument.  The caller must already have
- * verified that the length of the message buffer >= sizeof (type).  
+ * verified that the length of the message buffer >= sizeof (type).
   *
   * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
   * may be defined that swabs just the variable part, after the caller has
@@ -90,29 +90,33 @@ extern struct obd_uuid lctl_fake_uuid;
   * FOO_BULK_PORTAL    is for incoming bulk on the FOO
   */
  
-#define CONNMGR_REQUEST_PORTAL  1
-#define CONNMGR_REPLY_PORTAL    2
-//#define OSC_REQUEST_PORTAL      3
-#define OSC_REPLY_PORTAL        4
-//#define OSC_BULK_PORTAL         5
-#define OST_REQUEST_PORTAL      6
-//#define OST_REPLY_PORTAL        7
-#define OST_BULK_PORTAL         8
-//#define MDC_REQUEST_PORTAL      9
-#define MDC_REPLY_PORTAL        10
-//#define MDC_BULK_PORTAL         11
-#define MDS_REQUEST_PORTAL      12
-//#define MDS_REPLY_PORTAL        13
-#define MDS_BULK_PORTAL         14
-#define LDLM_CB_REQUEST_PORTAL     15
-#define LDLM_CB_REPLY_PORTAL       16
+#define CONNMGR_REQUEST_PORTAL          1
+#define CONNMGR_REPLY_PORTAL            2
+//#define OSC_REQUEST_PORTAL            3
+#define OSC_REPLY_PORTAL                4
+//#define OSC_BULK_PORTAL               5
+#define OST_REQUEST_PORTAL              6
+//#define OST_REPLY_PORTAL              7
+#define OST_BULK_PORTAL                 8
+//#define MDC_REQUEST_PORTAL            9
+#define MDC_REPLY_PORTAL               10
+//#define MDC_BULK_PORTAL              11
+#define MDS_REQUEST_PORTAL             12
+//#define MDS_REPLY_PORTAL             13
+#define MDS_BULK_PORTAL                14
+#define LDLM_CB_REQUEST_PORTAL         15
+#define LDLM_CB_REPLY_PORTAL           16
  #define LDLM_CANCEL_REQUEST_PORTAL     17
  #define LDLM_CANCEL_REPLY_PORTAL       18
  #define PTLBD_REQUEST_PORTAL           19
  #define PTLBD_REPLY_PORTAL             20
  #define PTLBD_BULK_PORTAL              21
-#define MDS_SETATTR_PORTAL      22
-#define MDS_READPAGE_PORTAL     23
+#define MDS_SETATTR_PORTAL             22
+#define MDS_READPAGE_PORTAL            23
+#define MGMT_REQUEST_PORTAL            24
+#define MGMT_REPLY_PORTAL              25
+#define MGMT_CLI_REQUEST_PORTAL        26
+#define MGMT_CLI_REPLY_PORTAL          27
  
  #define SVC_KILLED               1
  #define SVC_EVENT                2
@@ -159,7 +163,7 @@ struct lustre_msg {
  
  static inline int lustre_msg_swabbed (struct lustre_msg *msg)
  {
-        return (msg->magic == __swab32 (PTLRPC_MSG_MAGIC));
+        return (msg->magic == __swab32(PTLRPC_MSG_MAGIC));
  }
  
  /* Flags that are operation-specific go in the top 16 bits. */
@@ -207,9 +211,10 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags)
   * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT)
   */
  
-#define MSG_CONNECT_RECOVERING 0x1
-#define MSG_CONNECT_RECONNECT  0x2
+#define MSG_CONNECT_RECOVERING  0x1
+#define MSG_CONNECT_RECONNECT   0x2
  #define MSG_CONNECT_REPLAYABLE  0x4
+#define MSG_CONNECT_PEER        0x8
  
  /*
   *   OST requests: OBDO & OBD request records
@@ -234,13 +239,13 @@ typedef enum {
          OST_SAN_READ   = 14,
          OST_SAN_WRITE  = 15,
          OST_SYNCFS     = 16,
+        OST_SET_INFO   = 17,
          OST_LAST_OPC
  } ost_cmd_t;
  #define OST_FIRST_OPC  OST_REPLY
  /* When adding OST RPC opcodes, please update 
   * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */
  
-
  typedef uint64_t        obd_id;
  typedef uint64_t        obd_gr;
  typedef uint64_t        obd_time;
@@ -324,8 +329,23 @@ struct lov_mds_md {
  #define OBD_MD_LINKNAME (0x00040000)    /* symbolic link target */
  #define OBD_MD_FLHANDLE (0x00080000)    /* file handle */
  #define OBD_MD_FLCKSUM  (0x00100000)    /* bulk data checksum */
+#define OBD_MD_FLQOS    (0x00200000)    /* quality of service stats */
+#define OBD_MD_FLOSCOPQ (0x00400000)    /* osc opaque data */
+#define OBD_MD_FLCOOKIE (0x00800000)    /* log cancellation cookie */
  #define OBD_MD_FLNOTOBD (~(OBD_MD_FLOBDFLG | OBD_MD_FLBLOCKS | OBD_MD_LINKNAME|\
-                           OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM))
+                           OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM|\
+                           OBD_MD_FLQOS | OBD_MD_FLOSCOPQ | OBD_MD_FLCOOKIE))
+
+static inline struct lustre_handle *obdo_handle(struct obdo *oa)
+{
+        return (struct lustre_handle *)oa->o_inline;
+}
+
+static inline struct llog_cookie *obdo_logcookie(struct obdo *oa)
+{
+        return (struct llog_cookie *)(oa->o_inline +
+                                      sizeof(struct lustre_handle));
+}
  
  struct obd_statfs {
          __u64           os_type;
@@ -399,6 +419,8 @@ typedef enum {
          MDS_GETSTATUS    = 40,
          MDS_STATFS       = 41,
          MDS_GETLOVINFO   = 42,
+        MDS_PIN          = 43,
+        MDS_UNPIN        = 44,
          MDS_LAST_OPC
  } mds_cmd_t;
  #define MDS_FIRST_OPC    MDS_GETATTR
@@ -417,12 +439,20 @@ typedef enum {
  #define REINT_OPEN     6
  #define REINT_MAX      6
  
-#define IT_INTENT_EXEC   1
-#define IT_OPEN_LOOKUP  (1 << 1)
-#define IT_OPEN_NEG     (1 << 2)
-#define IT_OPEN_POS     (1 << 3)
-#define IT_OPEN_CREATE  (1 << 4)
-#define IT_OPEN_OPEN    (1 << 5)
+/* the disposition of the intent outlines what was executed */
+#define DISP_IT_EXECD   1
+#define DISP_LOOKUP_EXECD  (1 << 1)
+#define DISP_LOOKUP_NEG     (1 << 2)
+#define DISP_LOOKUP_POS     (1 << 3)
+#define DISP_OPEN_CREATE  (1 << 4)
+#define DISP_OPEN_OPEN    (1 << 5)
+#define DISP_ENQ_COMPLETE (1<<6)
+
+
+struct ll_uctxt {
+        __u32 gid1;
+        __u32 gid2;
+};
  
  struct ll_fid {
          __u64 id;
@@ -504,6 +534,11 @@ struct mds_rec_setattr {
          __u32           sa_suppgid;
  };
  
+/* Remove this once we declare it in include/linux/fs.h (v21 kernel patch?) */
+#ifndef ATTR_CTIME_SET
+#define ATTR_CTIME_SET 0x2000
+#endif
+
  extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa);
  
  struct mds_rec_create {
@@ -720,9 +755,109 @@ struct ptlbd_rsp {
  extern void lustre_swab_ptlbd_rsp (struct ptlbd_rsp *r);
  
  /*
+ * Opcodes for management/monitoring node.
+ */
+#define MGMT_CONNECT    250
+#define MGMT_DISCONNECT 251
+#define MGMT_EXCEPTION  252 /* node died, etc. */
+
+/*
   * Opcodes for multiple servers.
   */
  
-#define OBD_PING 400
+#define OBD_PING       400
+#define OBD_LOG_CANCEL 401
+#define OBD_LAST_OPC  (OBD_LOG_CANCEL + 1)
+#define OBD_FIRST_OPC OBD_PING
+
+/* catalog of log objects */
+
+/* Identifier for a single log object */
+struct llog_logid {
+        __u64                   lgl_oid;
+        __u32                   lgl_ogen;
+};
+
+/* Log data record types - there is no specific reason that these need to
+ * be related to the RPC opcodes, but no reason not to (may be handy later?)
+ */
+typedef enum {
+        OST_CREATE_REC = 0x10600000 | (OST_CREATE << 8),
+        OST_ORPHAN_REC = 0x10600000 | (OST_DESTROY << 8),
+        MDS_UNLINK_REC = 0x10610000 | (MDS_REINT << 8) | REINT_UNLINK,
+        LLOG_CATALOG_MAGIC = 0x1062e67d,
+        LLOG_OBJECT_MAGIC = 0x10645539,
+} llog_op_type;
+
+/* Log record header - stored in originating host endian order (use magic to
+ * check order).
+ * Each record must start with this struct, end with a __u32 for the struct
+ * length, and be a multiple of 64 bits in size.
+ */
+struct llog_trans_hdr {
+        __u32                   lth_len;
+        __u32                   lth_type;
+};
+
+struct llog_create_rec {
+        struct llog_trans_hdr   lcr_hdr;
+        struct ll_fid           lcr_fid;
+        obd_id                  lcr_oid;
+        obd_count               lcr_ogen;
+        __u32                   lcr_end_len;
+} __attribute__((packed));
+
+struct llog_orphan_rec {
+        struct llog_trans_hdr   lor_hdr;
+        obd_id                  lor_oid;
+        obd_count               lor_ogen;
+        __u32                   lor_end_len;
+} __attribute__((packed));
+
+struct llog_unlink_rec {
+        struct llog_trans_hdr   lur_hdr;
+        obd_id                  lur_oid;
+        obd_count               lur_ogen;
+        __u32                   lur_end_len;
+} __attribute__((packed));
+
+/* On-disk header structure of each log object - stored in creating host
+ * endian order, with the exception of the bitmap - stored in little endian
+ * order so that we can use ext2_{clear,set,test}_bit() for proper/optimized
+ * little-endian handling of bitmaps (which are otherwise a pain to handle).
+ */
+#define LLOG_CHUNK_SIZE         4096
+#define LLOG_HEADER_SIZE        (96)
+#define LLOG_BITMAP_BYTES       (LLOG_CHUNK_SIZE - LLOG_HEADER_SIZE)
+
+#define LLOG_MIN_REC_SIZE       (16) /* round(struct llog_trans_hdr+end_len) */
+
+struct llog_object_hdr {
+        struct llog_trans_hdr   llh_hdr;
+        __u64                   llh_timestamp;
+        __u32                   llh_count;
+        __u16                   llh_bitmap_offset;
+        __u16                   llh_unused;
+        struct obd_uuid         llh_tgtuuid;
+        __u8                    llh_padding[3];
+        __u32                   llh_reserved[LLOG_HEADER_SIZE/sizeof(__u32)-17];
+        __u32                   llh_bitmap[LLOG_BITMAP_BYTES/sizeof(__u32)];
+        __u32                   llh_hdr_end_len;
+};
+
+static inline int llog_log_swabbed(struct llog_object_hdr *hdr)
+{
+        if (hdr->llh_hdr.lth_type == __swab32(LLOG_OBJECT_MAGIC))
+                return 1;
+        if (hdr->llh_hdr.lth_type == LLOG_OBJECT_MAGIC)
+                return 0;
+        return -1;
+}
+
+/* log cookies are used to reference a specific log file and a record therein */
+struct llog_cookie {
+        struct llog_logid       lgc_lgl;
+        __u32                   lgc_index;
+};
  
  #endif
diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h

index b18e2d2..467132b 100644 (file)
--- a/lustre/include/linux/lustre_lib.h
+++ b/lustre/include/linux/lustre_lib.h
@@ -80,7 +80,7 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id);
  
  int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf);
  int client_sanobd_setup(struct obd_device *obddev, obd_count len, void *buf);
-int client_obd_cleanup(struct obd_device * obddev, int force, int failover);
+int client_obd_cleanup(struct obd_device * obddev, int flags);
  struct client_obd *client_conn2cli(struct lustre_handle *conn);
  struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid);
  
@@ -89,13 +89,16 @@ struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid);
   * the server, we can just send the whole struct unaltered. */
  struct obd_client_handle {
          struct lustre_handle och_fh;
+        struct llog_cookie och_cookie;
          struct ptlrpc_request *och_req;
          __u32 och_magic;
  };
  #define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
  
  /* statfs_pack.c */
-int obd_self_statfs(struct obd_device *dev, struct statfs *sfs);
+struct statfs;
+void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs);
+void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs);
  
  /* l_lock.c */
  struct lustre_lock {
diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h

index 81184e7..fa83fb2 100644 (file)
--- a/lustre/include/linux/lustre_lite.h
+++ b/lustre/include/linux/lustre_lite.h
@@ -16,7 +16,14 @@
  
  #ifdef __KERNEL__
  
+#include <linux/version.h>
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <asm/statfs.h>
+#endif
+
  #include <linux/fs.h>
+#include <linux/dcache.h>
  #include <linux/ext2_fs.h>
  #include <linux/proc_fs.h>
  
@@ -46,20 +53,62 @@ struct lustre_intent_data {
          __u32 it_lock_mode;
  };
  
+#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
+
+static inline struct lookup_intent *ll_nd2it(struct nameidata *nd)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+        return &nd->it;
+#else
+        return nd->it;
+#endif
+}
+
  struct ll_dentry_data {
-        struct semaphore      lld_it_sem;
+        int                      lld_cwd_count;
+        int                      lld_mnt_count;
+        struct obd_client_handle lld_cwd_och;
+        struct obd_client_handle lld_mnt_och;
  };
  
-#define ll_d2d(dentry) ((struct ll_dentry_data*) dentry->d_fsdata)
+#define ll_d2d(de) ((struct ll_dentry_data*) de->d_fsdata)
  
  extern struct file_operations ll_pgcache_seq_fops;
  
+/* 
+ * XXX used in obdecho/echo_client.c  must move (pjb)
+ *'p' list as its a list of pages linked together
+ * by ->private.. 
+ */
+struct plist {
+        struct page *pl_head;
+        struct page *pl_tail;
+        int pl_num;
+};
+
+struct ll_dirty_offsets {
+        rb_root_t       do_root;
+        spinlock_t      do_lock;
+        unsigned long   do_num_dirty;
+};
+
+struct ll_writeback_pages {
+        obd_count npgs, max;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+        int rw;
+        struct inode *inode;
+        struct brw_page pga[0];
+#else
+        struct brw_page *pga;
+#endif
+};
+
  struct ll_inode_info {
          struct lov_stripe_md   *lli_smd;
          char                   *lli_symlink_name;
          struct semaphore        lli_open_sem;
          struct list_head        lli_read_extents;
-        loff_t                  lli_maxbytes;
+        __u64                   lli_maxbytes;
          spinlock_t              lli_read_extent_lock;
          unsigned long           lli_flags;
  #define LLI_F_HAVE_SIZE_LOCK    0
@@ -81,13 +130,6 @@ struct ll_read_extent {
          struct ldlm_extent re_extent;
  };
  
-int ll_check_dirty( struct super_block *sb );
-int ll_batch_writepage( struct inode *inode, struct page *page );
-
-/* interpet return codes from intent lookup */
-#define LL_LOOKUP_POSITIVE 1
-#define LL_LOOKUP_NEGATIVE 2
-
  #define LL_SUPER_MAGIC 0x0BD00BD0
  
  #define LL_COMMITCBD_STOPPING  0x1
@@ -118,14 +160,22 @@ struct ll_sb_info {
          struct lprocfs_stats     *ll_stats; /* lprocfs stats counter */
  };
  
-static inline struct ll_sb_info *ll_s2sbi(struct super_block *sb)
-{
+
  #if  (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-        return (struct ll_sb_info *)(sb->s_fs_info);
-#else
-        return (struct ll_sb_info *)(sb->u.generic_sbp);
-#endif
+#define    ll_s2sbi(sb)     ((struct ll_sb_info *)((sb)->s_fs_info))
+void __d_rehash(struct dentry * entry, int lock);
+static inline __u64 ll_ts2u64(struct timespec *time)
+{
+        __u64 t = time->tv_sec;
+        return t;
+}
+#else  /* 2.4 here */
+#define    ll_s2sbi(sb)     ((struct ll_sb_info *)((sb)->u.generic_sbp))
+static inline __u64 ll_ts2u64(time_t *time)
+{
+        return *time;
  }
+#endif 
  
  static inline struct lustre_handle *ll_s2obdconn(struct super_block *sb)
  {
@@ -146,29 +196,6 @@ static inline struct ll_sb_info *ll_i2sbi(struct inode *inode)
          return ll_s2sbi(inode->i_sb);
  }
  
-static inline void d_unhash_aliases(struct inode *inode)
-{
-        struct dentry *dentry = NULL;
-        struct list_head *tmp;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        ENTRY;
-
-        CDEBUG(D_INODE, "marking dentries for ino %lx/%x invalid\n",
-               inode->i_ino, inode->i_generation);
-
-        spin_lock(&dcache_lock);
-        list_for_each(tmp, &inode->i_dentry) {
-                dentry = list_entry(tmp, struct dentry, d_alias);
-
-                list_del_init(&dentry->d_hash);
-                dentry->d_flags |= DCACHE_LUSTRE_INVALID;
-                list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
-        }
-
-        spin_unlock(&dcache_lock);
-        EXIT;
-}
-
  // FIXME: replace the name of this with LL_I to conform to kernel stuff
  // static inline struct ll_inode_info *LL_I(struct inode *inode)
  static inline struct ll_inode_info *ll_i2info(struct inode *inode)
@@ -199,21 +226,17 @@ static inline int ll_mds_max_easize(struct super_block *sb)
          return sbi2mdc(ll_s2sbi(sb))->cl_max_mds_easize;
  }
  
-static inline loff_t ll_file_maxbytes(struct inode *inode)
+static inline __u64 ll_file_maxbytes(struct inode *inode)
  {
          return ll_i2info(inode)->lli_maxbytes;
  }
  
  /* namei.c */
-int ll_lock(struct inode *dir, struct dentry *dentry,
-            struct lookup_intent *it, struct lustre_handle *lockh);
-int ll_unlock(__u32 mode, struct lustre_handle *lockh);
-
-typedef int (*intent_finish_cb)(int flag, struct ptlrpc_request *,
+typedef int (*intent_finish_cb)(struct ptlrpc_request *,
                                  struct inode *parent, struct dentry **, 
                                  struct lookup_intent *, int offset, obd_id ino);
  int ll_intent_lock(struct inode *parent, struct dentry **,
-                   struct lookup_intent *, intent_finish_cb);
+                   struct lookup_intent *, int, intent_finish_cb);
  int ll_mdc_blocking_ast(struct ldlm_lock *lock,
                          struct ldlm_lock_desc *desc,
                          void *data, int flag);
@@ -222,51 +245,7 @@ void ll_prepare_mdc_op_data(struct mdc_op_data *data,
                              struct inode *i1, struct inode *i2,
                              const char *name, int namelen, int mode);
  
-/* dcache.c */
-void ll_intent_release(struct dentry *, struct lookup_intent *);
-
-/****
-
-I originally implmented these as functions, then realized a macro
-would be more helpful for debugging, so the CDEBUG messages show
-the current calling function.  The orignal functions are in llite/dcache.c
-
-int ll_save_intent(struct dentry * de, struct lookup_intent * it);
-struct lookup_intent * ll_get_intent(struct dentry * de);
-****/
-
-#define IT_RELEASED_MAGIC 0xDEADCAFE
-
-#define LL_SAVE_INTENT(de, it)                                                 \
-do {                                                                           \
-        LASSERT(ll_d2d(de) != NULL);                                           \
-                                                                               \
-        down(&ll_d2d(de)->lld_it_sem);                                         \
-        LASSERT(de->d_it == NULL);                                             \
-        de->d_it = it;                                                         \
-        CDEBUG(D_DENTRY,                                                       \
-               "D_IT DOWN dentry %p fsdata %p intent: %p %s sem %d\n",         \
-               de, ll_d2d(de), de->d_it, ldlm_it2str(de->d_it->it_op),         \
-               atomic_read(&(ll_d2d(de)->lld_it_sem.count)));                  \
-} while(0)
-
-#define LL_GET_INTENT(de, it)                                                  \
-do {                                                                           \
-        it = de->d_it;                                                         \
-                                                                               \
-        LASSERT(ll_d2d(de) != NULL);                                           \
-        LASSERT(it);                                                           \
-        LASSERT(it->it_op != IT_RELEASED_MAGIC);                               \
-                                                                               \
-        CDEBUG(D_DENTRY, "D_IT UP dentry %p fsdata %p intent: %p %s\n",        \
-               de, ll_d2d(de), de->d_it, ldlm_it2str(de->d_it->it_op));        \
-        de->d_it = NULL;                                                       \
-        it->it_op = IT_RELEASED_MAGIC;                                         \
-        up(&ll_d2d(de)->lld_it_sem);                                           \
-} while(0)
-
-#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
-
+/* lprocfs.c */
  enum {
           LPROC_LL_DIRTY_HITS = 0,
           LPROC_LL_DIRTY_MISSES,
@@ -312,8 +291,6 @@ extern struct file_operations ll_file_operations;
  extern struct inode_operations ll_file_inode_operations;
  extern struct inode_operations ll_special_inode_operations;
  struct ldlm_lock;
-int ll_extent_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *,
-                            void *data, int flag);
  int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
                     struct lov_stripe_md *lsm, int mode,
                     struct ldlm_extent *extent, struct lustre_handle *lockh);
@@ -329,30 +306,22 @@ int ll_file_open(struct inode *inode, struct file *file);
  int ll_file_release(struct inode *inode, struct file *file);
  
  
-/* rw.c */
-struct page *ll_getpage(struct inode *inode, unsigned long offset,
-                        int create, int locked);
-void ll_truncate(struct inode *inode);
  
  /* super.c */
  void ll_update_inode(struct inode *, struct mds_body *, struct lov_stripe_md *);
  int ll_setattr_raw(struct inode *inode, struct iattr *attr);
+int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
+                       unsigned long maxage);
  
  /* symlink.c */
  extern struct inode_operations ll_fast_symlink_inode_operations;
  extern struct inode_operations ll_symlink_inode_operations;
  
-/* sysctl.c */
-void ll_sysctl_init(void);
-void ll_sysctl_clean(void);
-
  #else
  #include <linux/lustre_idl.h>
  #endif /* __KERNEL__ */
  
-static inline void ll_ino2fid(struct ll_fid *fid,
-                              obd_id ino,
-                              __u32 generation,
+static inline void ll_ino2fid(struct ll_fid *fid, obd_id ino, __u32 generation,
                                int type)
  {
          fid->id = ino;
@@ -360,11 +329,6 @@ static inline void ll_ino2fid(struct ll_fid *fid,
          fid->f_type = type;
  }
  
-struct ll_read_inode2_cookie {
-        struct mds_body      *lic_body;
-        struct lov_stripe_md *lic_lsm;
-};
-
  #include <asm/types.h>
  
  #define LL_IOC_GETFLAGS                 _IOR ('f', 151, long)
diff --git a/lustre/include/linux/lustre_mds.h b/lustre/include/linux/lustre_mds.h

index 683d78d..e7ee6f0 100644 (file)
--- a/lustre/include/linux/lustre_mds.h
+++ b/lustre/include/linux/lustre_mds.h
@@ -35,6 +35,8 @@
  #include <linux/lustre_idl.h>
  #include <linux/lustre_lib.h>
  #include <linux/lustre_dlm.h>
+#include <linux/lustre_log.h>
+#include <linux/lustre_export.h>
  
  struct ldlm_lock_desc;
  struct mds_obd;
@@ -49,6 +51,11 @@ struct ll_file_data;
  #define LUSTRE_MDT_NAME "mdt"
  #define LUSTRE_MDC_NAME "mdc"
  
+struct lustre_md {
+        struct mds_body *body;
+        struct lov_stripe_md *lsm;
+};
+
  struct mdc_rpc_lock {
          struct semaphore rpcl_sem;
          struct lookup_intent *rpcl_it;
@@ -144,6 +151,8 @@ struct mds_update_record {
          char *ur_tgt;
          int ur_eadatalen;
          void *ur_eadata;
+        int ur_cookielen;
+        struct llog_cookie *ur_logcookies;
          struct iattr ur_iattr;
          struct obd_ucred ur_uc;
          __u64 ur_rdev;
@@ -160,8 +169,31 @@ struct mds_update_record {
  #define ur_suppgid1 ur_uc.ouc_suppgid1
  #define ur_suppgid2 ur_uc.ouc_suppgid2
  
-#define MDS_LR_CLIENT  8192
-#define MDS_LR_SIZE     128
+/* i_attr_flags holds the open count in the inode in 2.4 */
+//Alex implement on 2.4 with i_attr_flags and find soln for 2.5 please
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+# define mds_open_orphan_count(inode)   (0)
+# define mds_open_orphan_inc(inode)  do { } while (0);
+# define mds_open_orphan_dec_test(inode)  (0)
+#else
+# define mds_inode_oatomic(inode)    ((atomic_t *)&(inode)->i_attr_flags)
+# define mds_open_orphan_count(inode)                          \
+  atomic_read(mds_inode_oatomic(inode))
+# define mds_open_orphan_inc(inode)                            \
+  atomic_inc(mds_inode_oatomic(inode))
+# define mds_open_orphan_dec_test(inode)                       \
+  atomic_dec_and_test(mds_inode_oatomic(inode))
+#endif
+#define mds_inode_is_orphan(inode)  ((inode)->i_flags & 0x4000000)
+#define mds_inode_set_orphan(inode) (inode)->i_flags |= 0x4000000
+
+#define MDS_LR_SERVER_SIZE    512
+
+#define MDS_LR_CLIENT_START  8192
+#define MDS_LR_CLIENT_SIZE    128
+#if MDS_LR_CLIENT_START < MDS_LR_SERVER_SIZE
+#error "Can't have MDS_LR_CLIENT_START < MDS_LR_SERVER_SIZE"
+#endif
  
  #define MDS_CLIENT_SLOTS 17
  
@@ -169,11 +201,24 @@ struct mds_update_record {
  
  /* Data stored per server at the head of the last_rcvd file.  In le32 order. */
  struct mds_server_data {
-        __u8 msd_uuid[37];      /* server UUID */
-        __u8 uuid_padding[3];   /* unused */
-        __u64 msd_last_transno; /* last completed transaction ID */
-        __u64 msd_mount_count;  /* MDS incarnation number */
-        __u8 padding[512 - 56];
+        __u8  msd_uuid[37];        /* server UUID */
+        __u8  uuid_padding[3];     /* unused */
+//      __u64 msd_last_objid;      /* last created object ID */
+        __u64 msd_last_transno;    /* last completed transaction ID */
+        __u64 msd_mount_count;     /* MDS incarnation number */
+        __u64 msd_padding_until_last_objid_is_enabled;
+        __u32 msd_feature_compat;  /* compatible feature flags */
+        __u32 msd_feature_rocompat;/* read-only compatible feature flags */
+        __u32 msd_feature_incompat;/* incompatible feature flags */
+        __u32 msd_server_size;     /* size of server data area */
+        __u32 msd_client_start;    /* start of per-client data area */
+        __u16 msd_client_size;     /* size of per-client data area */
+        __u16 msd_subdir_count;    /* number of subdirectories for objects */
+        __u64 msd_catalog_oid;     /* recovery catalog object id */
+        __u32 msd_catalog_ogen;    /* recovery catalog inode generation */
+        __u8  msd_peeruuid[37];    /* UUID of LOV/OSC associated with MDS */
+        __u8  peer_padding[3];     /* unused */
+        __u8  msd_padding[MDS_LR_SERVER_SIZE - 140];
  };
  
  /* Data stored per client in the last_rcvd file.  In le32 order. */
@@ -185,7 +230,7 @@ struct mds_client_data {
          __u64 mcd_last_xid;     /* xid for the last transaction */
          __u32 mcd_last_result;  /* result from last RPC */
          __u32 mcd_last_data;    /* per-op data (disposition for open &c.) */
-        __u8 padding[MDS_LR_SIZE - 74];
+        __u8 mcd_padding[MDS_LR_CLIENT_SIZE - 72];
  };
  
  /* file data for open files on MDS */
@@ -202,10 +247,6 @@ struct mds_file_data {
  int mds_reint_rec(struct mds_update_record *r, int offset,
                    struct ptlrpc_request *req, struct lustre_handle *);
  
-/* mds/mds_open.c */
-int mds_open(struct mds_update_record *rec, int offset,
-             struct ptlrpc_request *req, struct lustre_handle *);
-
  /* mds/handler.c */
  #ifdef __KERNEL__
  struct dentry *mds_name2locked_dentry(struct obd_device *, struct dentry *dir,
@@ -223,13 +264,22 @@ int mds_pack_md(struct obd_device *mds, struct lustre_msg *msg,
                  int offset, struct mds_body *body, struct inode *inode);
  void mds_steal_ack_locks(struct obd_export *exp,
                           struct ptlrpc_request *req);
+int mds_update_server_data(struct obd_device *);
  
  /* mds/mds_fs.c */
  int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt);
  int mds_fs_cleanup(struct obd_device *obddev, int failover);
  #endif
  
+/* mds/mds_lov.c */
+extern int mds_get_lovtgts(struct mds_obd *obd, int tgt_count,
+                           struct obd_uuid *uuidarray);
+extern int mds_get_lovdesc(struct mds_obd  *obd, struct lov_desc *desc);
+
  /* mdc/mdc_request.c */
+int mdc_req2lustre_md(struct ptlrpc_request *req, int offset,
+                      struct lustre_handle *obd_import,
+                      struct lustre_md *md);
  int mdc_enqueue(struct lustre_handle *conn, int lock_type,
                  struct lookup_intent *it, int lock_mode,
                  struct mdc_op_data *enq_data,
@@ -248,7 +298,7 @@ int mdc_getattr_name(struct lustre_handle *conn, struct ll_fid *fid,
                       unsigned int ea_size, struct ptlrpc_request **request);
  int mdc_setattr(struct lustre_handle *conn,
                  struct mdc_op_data *data,
-                struct iattr *iattr, void *ea, int ealen,
+                struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
                  struct ptlrpc_request **request);
  int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
               struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh,
diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h

index ac87d7f..bc70b9a 100644 (file)
--- a/lustre/include/linux/lustre_net.h
+++ b/lustre/include/linux/lustre_net.h
@@ -96,9 +96,10 @@
  /* OST_MAXREQSIZE ~= 1640 bytes =
   * lustre_msg + obdo + 16 * obd_ioobj + 64 * niobuf_remote
   *
- * single object with 16 pages is 512 bytes
+ * - single object with 16 pages is 512 bytes
+ * - OST_MAXREQSIZE must be at least 1 page of cookies plus some spillover
   */
-#define OST_MAXREQSIZE  (2 * 1024)
+#define OST_MAXREQSIZE  (5 * 1024)
  
  #define PTLBD_NUM_THREADS        4
  #define PTLBD_NEVENTS    1024
@@ -188,15 +189,19 @@ union ptlrpc_async_args {
           * big enough.  For _tons_ of context, OBD_ALLOC a struct and store
           * a pointer to it here.  The pointer_arg ensures this struct is at
           * least big enough for that. */
-        void      *pointer_arg[4];
+        void      *pointer_arg[5];
          __u64      space[4];
  };
  
+struct ptlrpc_request_set;
+typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int);
+
  struct ptlrpc_request_set {
          int               set_remaining; /* # uncompleted requests */
          wait_queue_head_t set_waitq;
+        wait_queue_head_t *set_wakeup_ptr;
          struct list_head  set_requests;
-        void             *set_interpret; /* completion callback */
+        set_interpreter_func    set_interpret; /* completion callback */
          union ptlrpc_async_args set_args; /* completion context */
  };
  
diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h

index ba848a9..f30cbb2 100644 (file)
--- a/lustre/include/linux/obd.h
+++ b/lustre/include/linux/obd.h
@@ -10,6 +10,31 @@
  #ifndef __OBD_H
  #define __OBD_H
  
+#define IOC_OSC_TYPE         'h'
+#define IOC_OSC_MIN_NR       20
+#define IOC_OSC_REGISTER_LOV _IOWR(IOC_OSC_TYPE, 20, struct obd_device *)
+#define IOC_OSC_SET_ACTIVE   _IOWR(IOC_OSC_TYPE, 21, struct obd_device *)
+#define IOC_OSC_MAX_NR       50
+
+#define IOC_MDC_TYPE         'i'
+#define IOC_MDC_MIN_NR       20
+#define IOC_MDC_LOOKUP       _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
+#define IOC_MDC_GETSTRIPE    _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *)
+#define IOC_MDC_MAX_NR       50
+
+#ifdef __KERNEL__
+# include <linux/fs.h>
+# include <linux/list.h>
+# include <linux/sched.h> /* for struct task_struct, for current.h */
+# include <asm/current.h> /* for smp_lock.h */
+# include <linux/smp_lock.h>
+# include <linux/proc_fs.h>
+# include <linux/mount.h>
+#endif
+
+#include <linux/lustre_lib.h>
+#include <linux/lustre_idl.h>
+#include <linux/lustre_export.h>
  #include <linux/lustre_otree.h>
  
  struct lov_oinfo { /* per-child structure */
@@ -34,31 +59,6 @@ struct lov_stripe_md {
          struct lov_oinfo lsm_oinfo[0];
  };
  
-#define IOC_OSC_TYPE         'h'
-#define IOC_OSC_MIN_NR       20
-#define IOC_OSC_REGISTER_LOV _IOWR(IOC_OSC_TYPE, 20, struct obd_device *)
-#define IOC_OSC_SET_ACTIVE   _IOWR(IOC_OSC_TYPE, 21, struct obd_device *)
-#define IOC_OSC_MAX_NR       50
-
-#define IOC_MDC_TYPE         'i'
-#define IOC_MDC_MIN_NR       20
-#define IOC_MDC_LOOKUP       _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
-#define IOC_MDC_MAX_NR       50
-
-#ifdef __KERNEL__
-# include <linux/fs.h>
-# include <linux/list.h>
-# include <linux/sched.h> /* for struct task_struct, for current.h */
-# include <asm/current.h> /* for smp_lock.h */
-# include <linux/smp_lock.h>
-# include <linux/proc_fs.h>
-
-# include <linux/lustre_lib.h>
-# include <linux/lustre_idl.h>
-# include <linux/lustre_mds.h>
-# include <linux/lustre_export.h>
-#endif
-
  struct obd_type {
          struct list_head typ_chain;
          struct obd_ops *typ_ops;
@@ -80,7 +80,7 @@ struct ost_server_data;
  
  struct filter_obd {
          const char          *fo_fstype;
-        char *fo_nspath;
+        char                *fo_nspath;
          struct super_block  *fo_sb;
          struct vfsmount     *fo_vfsmnt;
          struct obd_run_ctxt  fo_ctxt;
@@ -103,28 +103,57 @@ struct filter_obd {
          spinlock_t           fo_grant_lock;       /* protects tot_granted */
          obd_size             fo_tot_granted;
          obd_size             fo_tot_cached;
+
+        struct llog_handle  *fo_catalog;
+        struct obd_import   *fo_mdc_imp;
+        struct obd_uuid      fo_mdc_uuid;
+        struct lustre_handle fo_mdc_conn;
+        struct ptlrpc_client fo_mdc_client;
+        struct llog_commit_data *fo_llcd;
+        struct semaphore     fo_sem; /* protects fo_llcd */
  };
  
  struct mds_server_data;
  
  struct client_obd {
-        struct obd_import   *cl_import;
-        struct semaphore     cl_sem;
-        int                  cl_conn_count;
+        struct obd_import       *cl_import;
+        struct semaphore         cl_sem;
+        int                      cl_conn_count;
          /* max_mds_easize is purely a performance thing so we don't have to
           * call obd_size_wiremd() all the time. */
-        int                  cl_max_mds_easize;
-        struct obd_device   *cl_containing_lov;
-        kdev_t               cl_sandev;
-        struct semaphore     cl_dirty_sem;
-        obd_size             cl_dirty;  /* both in bytes */
-        obd_size             cl_dirty_granted;
-        /* this is just to keep existing infinitely caching behaviour between 
-         * clients and OSTs that don't have the grant code in yet.. it can 
+        int                      cl_max_mds_easize;
+        int                      cl_max_mds_cookiesize;
+        /* XXX can we replace cl_containing_lov with mgmt-events? */
+        struct obd_device       *cl_containing_lov;
+        kdev_t                   cl_sandev;
+
+        struct llog_commit_data *cl_llcd;
+        void                    *cl_llcd_offset;
+
+        struct semaphore         cl_dirty_sem;
+        obd_size                 cl_dirty;  /* both in bytes */
+        obd_size                 cl_dirty_granted;
+
+        struct obd_device       *cl_mgmtcli_obd;
+
+        /* this is just to keep existing infinitely caching behaviour between
+         * clients and OSTs that don't have the grant code in yet.. it can
           * be yanked once everything speaks grants */
-        char                 cl_ost_can_grant;
+        char                     cl_ost_can_grant;
  };
  
+/* Like a client, with some hangers-on.  Keep mc_client_obd first so that we
+ * can reuse the various client setup/connect functions. */
+struct mgmtcli_obd {
+        struct client_obd        mc_client_obd; /* nested */
+        struct ptlrpc_thread    *mc_ping_thread;
+        struct lustre_handle     mc_ping_handle; /* XXX single-target */
+        struct list_head         mc_registered;
+        void                    *mc_hammer;
+};
+
+#define mc_import mc_client_obd.cl_import
+
  struct mds_obd {
          struct ptlrpc_service           *mds_service;
          struct ptlrpc_service           *mds_setattr_service;
@@ -139,12 +168,20 @@ struct mds_obd {
          struct address_space_operations *mds_aops;
  
          int                              mds_max_mdsize;
+        int                              mds_max_cookiesize;
          struct file                     *mds_rcvd_filp;
          spinlock_t                       mds_transno_lock;
          __u64                            mds_last_transno;
          __u64                            mds_mount_count;
          struct ll_fid                    mds_rootfid;
          struct mds_server_data          *mds_server_data;
+        struct dentry                   *mds_pending_dir;
+        struct dentry                   *mds_logs_dir;
+
+        struct llog_handle              *mds_catalog;
+        struct obd_device               *mds_osc_obd;
+        struct obd_uuid                  mds_osc_uuid;
+        struct lustre_handle             mds_osc_conn;
  
          int                              mds_has_lov_desc;
          struct lov_desc                  mds_lov_desc;
@@ -159,7 +196,6 @@ struct ldlm_obd {
  };
  
  struct echo_obd {
-        char *eo_fstype;
          struct obdo oa;
          spinlock_t eo_lock;
          __u64 eo_lastino;
@@ -221,6 +257,7 @@ struct cache_obd {
  struct lov_tgt_desc {
          struct obd_uuid uuid;
          struct lustre_handle conn;
+        struct llog_handle *ltd_cathandle;
          int active; /* is this target available for requests, etc */
  };
  
@@ -230,6 +267,7 @@ struct lov_obd {
          struct lov_desc desc;
          int bufsize;
          int refcount;
+        int lo_catalog_loaded:1;
          struct lov_tgt_desc *tgts;
  };
  
@@ -247,14 +285,46 @@ struct niobuf_local {
  #define N_LOCAL_TEMP_PAGE 0x10000000
  
  struct obd_trans_info {
-        __u64     oti_transno;
+        __u64                   oti_transno;
          /* Only used on the server side for tracking acks. */
          struct oti_req_ack_lock {
                  struct lustre_handle lock;
                  __u32                mode;
          } oti_ack_locks[4];
+        void                    *oti_handle;
+        struct llog_cookie       oti_onecookie;
+        struct llog_cookie      *oti_logcookies;
+        int                      oti_numcookies;
  };
  
+static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies)
+{
+        if (!oti)
+                return;
+
+        if (num_cookies == 1)
+                oti->oti_logcookies = &oti->oti_onecookie;
+        else
+                OBD_ALLOC(oti->oti_logcookies,
+                          num_cookies * sizeof(oti->oti_onecookie));
+
+        oti->oti_numcookies = num_cookies;
+}
+
+static inline void oti_free_cookies(struct obd_trans_info *oti)
+{
+        if (!oti || !oti->oti_logcookies)
+                return;
+
+        if (oti->oti_logcookies == &oti->oti_onecookie)
+                LASSERT(oti->oti_numcookies == 1);
+        else
+                OBD_FREE(oti->oti_logcookies,
+                         oti->oti_numcookies * sizeof(oti->oti_onecookie));
+        oti->oti_logcookies = NULL;
+        oti->oti_numcookies = 0;
+}
+
  /* corresponds to one of the obd's */
  struct obd_device {
          struct obd_type *obd_type;
@@ -276,9 +346,11 @@ struct obd_device {
          struct ldlm_namespace *obd_namespace;
          struct ptlrpc_client   obd_ldlm_client; /* XXX OST/MDS only */
          /* a spinlock is OK for what we do now, may need a semaphore later */
-        spinlock_t obd_dev_lock;
+        spinlock_t             obd_dev_lock;
          __u64                  obd_last_committed;
          struct fsfilt_operations *obd_fsops;
+        struct obd_statfs      obd_osfs;
+        unsigned long          obd_osfs_age;
  
          /* XXX encapsulate all this recovery data into one struct */
          svc_handler_t                    obd_recovery_handler;
@@ -297,19 +369,25 @@ struct obd_device {
                  struct mds_obd mds;
                  struct client_obd cli;
                  struct ost_obd ost;
-                struct echo_client_obd echo_client;;
+                struct echo_client_obd echo_client;
                  struct ldlm_obd ldlm;
                  struct echo_obd echo;
                  struct recovd_obd recovd;
                  struct lov_obd lov;
                  struct cache_obd cobd;
                  struct ptlbd_obd ptlbd;
+                struct mgmtcli_obd mgmtcli;
          } u;
         /* Fields used by LProcFS */
          unsigned int           obd_cntr_base;
          struct lprocfs_stats  *obd_stats;
  };
  
+#define OBD_OPT_FORCE           0x0001
+#define OBD_OPT_FAILOVER        0x0002
+
+#define OBD_LLOG_FL_SENDNOW     0x0001
+
  struct obd_ops {
          struct module *o_owner;
          int (*o_iocontrol)(unsigned int cmd, struct lustre_handle *, int len,
@@ -321,16 +399,17 @@ struct obd_ops {
          int (*o_attach)(struct obd_device *dev, obd_count len, void *data);
          int (*o_detach)(struct obd_device *dev);
          int (*o_setup) (struct obd_device *dev, obd_count len, void *data);
-        int (*o_cleanup)(struct obd_device *dev, int force, int failover);
+        int (*o_cleanup)(struct obd_device *dev, int flags);
          int (*o_connect)(struct lustre_handle *conn, struct obd_device *src,
                           struct obd_uuid *cluuid);
-        int (*o_disconnect)(struct lustre_handle *conn, int failover);
+        int (*o_disconnect)(struct lustre_handle *conn, int flags);
  
-        int (*o_statfs)(struct obd_export *exp, struct obd_statfs *osfs);
-        int (*o_syncfs)(struct obd_export *);
+        int (*o_statfs)(struct obd_device *obd, struct obd_statfs *osfs,
+                        unsigned long max_age);
+        int (*o_syncfs)(struct obd_export *exp);
          int (*o_packmd)(struct lustre_handle *, struct lov_mds_md **disk_tgt,
                          struct lov_stripe_md *mem_src);
-        int (*o_unpackmd)(struct lustre_handle *,
+        int (*o_unpackmd)(struct lustre_handle *conn,
                            struct lov_stripe_md **mem_tgt,
                            struct lov_mds_md *disk_src, int disk_len);
          int (*o_preallocate)(struct lustre_handle *, obd_count *req,
@@ -344,42 +423,42 @@ struct obd_ops {
          int (*o_getattr)(struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md *ea);
          int (*o_getattr_async)(struct lustre_handle *conn, struct obdo *oa,
-                               struct lov_stripe_md *ea, 
+                               struct lov_stripe_md *ea,
                                 struct ptlrpc_request_set *set);
          int (*o_open)(struct lustre_handle *conn, struct obdo *oa,
                        struct lov_stripe_md *ea, struct obd_trans_info *oti,
                        struct obd_client_handle *och);
          int (*o_close)(struct lustre_handle *conn, struct obdo *oa,
                         struct lov_stripe_md *ea, struct obd_trans_info *oti);
-        int (*o_brw)(int rw, struct lustre_handle *conn,
+        int (*o_brw)(int rw, struct lustre_handle *conn, struct obdo *oa,
                       struct lov_stripe_md *ea, obd_count oa_bufs,
                       struct brw_page *pgarr, struct obd_trans_info *oti);
-        int (*o_brw_async)(int rw, struct lustre_handle *conn,
+        int (*o_brw_async)(int rw, struct lustre_handle *conn, struct obdo *oa,
                             struct lov_stripe_md *ea, obd_count oa_bufs,
                             struct brw_page *pgarr, struct ptlrpc_request_set *,
                             struct obd_trans_info *oti);
-        int (*o_punch)(struct lustre_handle *conn, struct obdo *tgt,
+        int (*o_punch)(struct lustre_handle *conn, struct obdo *oa,
                         struct lov_stripe_md *ea, obd_size count,
                         obd_off offset, struct obd_trans_info *oti);
-        int (*o_sync)(struct lustre_handle *conn, struct obdo *tgt,
+        int (*o_sync)(struct lustre_handle *conn, struct obdo *oa,
                        obd_size count, obd_off offset);
-        int (*o_migrate)(struct lustre_handle *conn, struct obdo *dst,
-                         struct obdo *src, obd_size count, obd_off offset);
-        int (*o_copy)(struct lustre_handle *dstconn, struct obdo *dst,
-                      struct lustre_handle *srconn, struct obdo *src,
+        int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst,
+                         struct lov_stripe_md *src, obd_size count,
+                         obd_off offset);
+        int (*o_copy)(struct lustre_handle *dstconn, struct lov_stripe_md *dst,
+                      struct lustre_handle *srconn, struct lov_stripe_md *src,
                        obd_size count, obd_off offset, struct obd_trans_info *);
          int (*o_iterate)(struct lustre_handle *conn,
                           int (*)(obd_id, obd_gr, void *),
                           obd_id *startid, obd_gr group, void *data);
-        int (*o_preprw)(int cmd, struct obd_export *, struct obdo *obdo,
+        int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa,
                          int objcount, struct obd_ioobj *obj,
                          int niocount, struct niobuf_remote *remote,
-                        struct niobuf_local *local, void **desc_private, 
-                        struct obd_trans_info *oti);
-        int (*o_commitrw)(int cmd, struct obd_export *,
+                        struct niobuf_local *local, struct obd_trans_info *oti);
+        int (*o_commitrw)(int cmd, struct obd_export *exp, struct obdo *oa,
                            int objcount, struct obd_ioobj *obj,
                            int niocount, struct niobuf_local *local,
-                          void *desc_private, struct obd_trans_info *oti);
+                          struct obd_trans_info *oti);
          int (*o_enqueue)(struct lustre_handle *conn, struct lov_stripe_md *md,
                           struct lustre_handle *parent_lock,
                           __u32 type, void *cookie, int cookielen, __u32 mode,
@@ -391,10 +470,17 @@ struct obd_ops {
          int (*o_cancel)(struct lustre_handle *, struct lov_stripe_md *md,
                          __u32 mode, struct lustre_handle *);
          int (*o_cancel_unused)(struct lustre_handle *, struct lov_stripe_md *,
-                               int local_only, void *opaque);
-        int (*o_san_preprw)(int cmd, struct lustre_handle *conn,
-                            int objcount, struct obd_ioobj *obj,
-                            int niocount, struct niobuf_remote *remote);
+                               int flags, void *opaque);
+        int (*o_log_add)(struct lustre_handle *conn,
+                         struct llog_handle *cathandle,
+                         struct llog_trans_hdr *rec, struct lov_stripe_md *lsm,
+                         struct llog_cookie *logcookies, int numcookies);
+        int (*o_log_cancel)(struct lustre_handle *, struct lov_stripe_md *,
+                            int count, struct llog_cookie *, int flags);
+        int (*o_san_preprw)(int cmd, struct obd_export *exp,
+                            struct obdo *oa, int objcount,
+                            struct obd_ioobj *obj, int niocount,
+                            struct niobuf_remote *remote);
          int (*o_mark_page_dirty)(struct lustre_handle *conn,
                                   struct lov_stripe_md *ea,
                                   unsigned long offset);
@@ -406,14 +492,22 @@ struct obd_ops {
          int (*o_last_dirty_offset)(struct lustre_handle *conn,
                                     struct lov_stripe_md *ea,
                                     unsigned long *offset);
-        void (*o_destroy_export)(struct obd_export *export);
+        void (*o_destroy_export)(struct obd_export *exp);
+
+        /* metadata-only methods */
+        int (*o_pin)(struct lustre_handle *, obd_id ino, __u32 gen, int type,
+                     struct obd_client_handle *, int flag);
+        int (*o_unpin)(struct lustre_handle *, struct obd_client_handle *, int);
+
+        /* If adding ops, also update obdclass/lprocfs_status.c,
+         * and include/linux/obd_class.h */
  };
  
  static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno,
                                           int error)
  {
          if (error) {
-                CDEBUG(D_ERROR, "%s: transno "LPD64" commit error: %d\n",
+                CERROR("%s: transno "LPD64" commit error: %d\n",
                         obd->obd_name, transno, error);
                  return;
          }
@@ -425,8 +519,4 @@ static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno,
          }
  }
  
-/* When adding a function pointer to struct obd_ops, please update 
- * function lprocfs_alloc_obd_counters() in obdclass/lprocfs_status.c
- * accordingly. */
-
  #endif /* __OBD_H */
diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h

index 0c33ceb..2e57d2f 100644 (file)
--- a/lustre/include/linux/obd_class.h
+++ b/lustre/include/linux/obd_class.h
@@ -32,6 +32,7 @@
  #include <linux/types.h>
  #include <linux/fs.h>
  #include <linux/time.h>
+#include <linux/timer.h>
  #endif
  
  #include <linux/obd_support.h>
@@ -81,6 +82,17 @@ void class_disconnect_exports(struct obd_device *obddev, int failover);
  int class_multi_setup(struct obd_device *obddev, uint32_t len, void *data);
  int class_multi_cleanup(struct obd_device *obddev);
  
+/* obdo.c */
+#ifdef __KERNEL__
+void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned ia_valid);
+void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid);
+void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid);
+void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid);
+void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid);
+#endif
+void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid);
+int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_flag compare);
+
  static inline int obd_check_conn(struct lustre_handle *conn)
  {
          struct obd_device *obd;
@@ -277,7 +289,7 @@ static inline int obd_setup(struct obd_device *obd, int datalen, void *data)
          RETURN(rc);
  }
  
-static inline int obd_cleanup(struct obd_device *obd, int force, int failover)
+static inline int obd_cleanup(struct obd_device *obd, int flags)
  {
          int rc;
          ENTRY;
@@ -286,7 +298,7 @@ static inline int obd_cleanup(struct obd_device *obd, int force, int failover)
          OBD_CHECK_OP(obd, cleanup);
          OBD_COUNTER_INCREMENT(obd, cleanup);
  
-        rc = OBP(obd, cleanup)(obd, force, failover);
+        rc = OBP(obd, cleanup)(obd, flags);
          RETURN(rc);
  }
  
@@ -518,7 +530,7 @@ static inline int obd_connect(struct lustre_handle *conn,
          RETURN(rc);
  }
  
-static inline int obd_disconnect(struct lustre_handle *conn, int failover)
+static inline int obd_disconnect(struct lustre_handle *conn, int flags)
  {
          struct obd_export *exp;
          int rc;
@@ -528,7 +540,7 @@ static inline int obd_disconnect(struct lustre_handle *conn, int failover)
          OBD_CHECK_OP(exp->exp_obd, disconnect);
          OBD_COUNTER_INCREMENT(exp->exp_obd, disconnect);
  
-        rc = OBP(exp->exp_obd, disconnect)(conn, failover);
+        rc = OBP(exp->exp_obd, disconnect)(conn, flags);
          class_export_put(exp);
          RETURN(rc);
  }
@@ -541,15 +553,35 @@ static inline void obd_destroy_export(struct obd_export *exp)
          EXIT;
  }
  
-static inline int obd_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+#ifndef time_before
+#define time_before(t1, t2) ((long)t2 - (long)t1 > 0)
+#endif
+
+static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                             unsigned long max_age)
  {
-        int rc;
+        int rc = 0;
          ENTRY;
  
-        OBD_CHECK_OP(exp->exp_obd, statfs);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, statfs);
-
-        rc = OBP(exp->exp_obd, statfs)(exp, osfs);
+        if (obd == NULL)
+                RETURN(-EINVAL);
+
+        OBD_CHECK_OP(obd, statfs);
+        OBD_COUNTER_INCREMENT(obd, statfs);
+
+        CDEBUG(D_SUPER, "osfs %lu, max_age %lu\n", obd->obd_osfs_age, max_age);
+        if (obd->obd_osfs_age == 0 || time_before(obd->obd_osfs_age, max_age)) {
+                rc = OBP(obd, statfs)(obd, osfs, max_age);
+                spin_lock(&obd->obd_dev_lock);
+                memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs));
+                obd->obd_osfs_age = jiffies;
+                spin_unlock(&obd->obd_dev_lock);
+        } else {
+                CDEBUG(D_SUPER, "using cached obd_statfs data\n");
+                spin_lock(&obd->obd_dev_lock);
+                memcpy(osfs, &obd->obd_osfs, sizeof(*osfs));
+                spin_unlock(&obd->obd_dev_lock);
+        }
          RETURN(rc);
  }
  
@@ -582,7 +614,7 @@ static inline int obd_punch(struct lustre_handle *conn, struct obdo *oa,
          RETURN(rc);
  }
  
-static inline int obd_brw(int cmd, struct lustre_handle *conn,
+static inline int obd_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
                            struct lov_stripe_md *ea, obd_count oa_bufs,
                            struct brw_page *pg, struct obd_trans_info *oti)
  {
@@ -600,14 +632,14 @@ static inline int obd_brw(int cmd, struct lustre_handle *conn,
                  LBUG();
          }
  
-        rc = OBP(exp->exp_obd, brw)(cmd, conn, ea, oa_bufs, pg, oti);
+        rc = OBP(exp->exp_obd, brw)(cmd, conn, oa, ea, oa_bufs, pg, oti);
          class_export_put(exp);
          RETURN(rc);
  }
  
  static inline int obd_brw_async(int cmd, struct lustre_handle *conn,
-                                struct lov_stripe_md *ea, obd_count oa_bufs,
-                                struct brw_page *pg,
+                                struct obdo *oa, struct lov_stripe_md *ea,
+                                obd_count oa_bufs, struct brw_page *pg,
                                  struct ptlrpc_request_set *set,
                                  struct obd_trans_info *oti)
  {
@@ -624,15 +656,16 @@ static inline int obd_brw_async(int cmd, struct lustre_handle *conn,
                  LBUG();
          }
  
-        rc = OBP(exp->exp_obd, brw_async)(cmd, conn, ea, oa_bufs, pg, set, oti);
+        rc = OBP(exp->exp_obd, brw_async)(cmd, conn, oa, ea, oa_bufs, pg, set,
+                                          oti);
          class_export_put(exp);
          RETURN(rc);
  }
  
-static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
+static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
                               int objcount, struct obd_ioobj *obj,
                               int niocount, struct niobuf_remote *remote,
-                             struct niobuf_local *local, void **desc_private,
+                             struct niobuf_local *local,
                               struct obd_trans_info *oti)
  {
          int rc;
@@ -641,15 +674,15 @@ static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
          OBD_CHECK_OP(exp->exp_obd, preprw);
          OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
  
-        rc = OBP(exp->exp_obd, preprw)(cmd, exp, obdo, objcount, obj, niocount,
-                                       remote, local, desc_private, oti);
+        rc = OBP(exp->exp_obd, preprw)(cmd, exp, oa, objcount, obj, niocount,
+                                       remote, local, oti);
          RETURN(rc);
  }
  
-static inline int obd_commitrw(int cmd, struct obd_export *exp,
+static inline int obd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa,
                                 int objcount, struct obd_ioobj *obj,
                                 int niocount, struct niobuf_local *local,
-                               void *desc_private, struct obd_trans_info *oti)
+                               struct obd_trans_info *oti)
  {
          int rc;
          ENTRY;
@@ -657,8 +690,8 @@ static inline int obd_commitrw(int cmd, struct obd_export *exp,
          OBD_CHECK_OP(exp->exp_obd, commitrw);
          OBD_COUNTER_INCREMENT(exp->exp_obd, commitrw);
  
-        rc = OBP(exp->exp_obd, commitrw)(cmd, exp, objcount, obj, niocount,
-                                         local, desc_private, oti);
+        rc = OBP(exp->exp_obd, commitrw)(cmd, exp, oa, objcount, obj, niocount,
+                                         local, oti);
          RETURN(rc);
  }
  
@@ -754,25 +787,92 @@ static inline int obd_cancel_unused(struct lustre_handle *conn,
          RETURN(rc);
  }
  
-static inline int obd_san_preprw(int cmd, struct lustre_handle *conn,
+static inline int obd_log_add(struct lustre_handle *conn,
+                              struct llog_handle *cathandle,
+                              struct llog_trans_hdr *rec,
+                              struct lov_stripe_md *lsm,
+                              struct llog_cookie *logcookies,
+                              int numcookies)
+{
+        struct obd_export *exp;
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_SETUP(conn, exp);
+        OBD_CHECK_OP(exp->exp_obd, log_add);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, log_add);
+
+        rc = OBP(exp->exp_obd, log_add)(conn, cathandle, rec, lsm, logcookies,
+                                        numcookies);
+        class_export_put(exp);
+        RETURN(rc);
+}
+
+static inline int obd_log_cancel(struct lustre_handle *conn,
+                                 struct lov_stripe_md *lsm, int count,
+                                 struct llog_cookie *cookies, int flags)
+{
+        struct obd_export *exp;
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_SETUP(conn, exp);
+        OBD_CHECK_OP(exp->exp_obd, log_cancel);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, log_cancel);
+
+        rc = OBP(exp->exp_obd, log_cancel)(conn, lsm, count, cookies, flags);
+        class_export_put(exp);
+        RETURN(rc);
+}
+
+static inline int obd_san_preprw(int cmd, struct obd_export *exp,
+                                 struct obdo *oa,
                                   int objcount, struct obd_ioobj *obj,
                                   int niocount, struct niobuf_remote *remote)
  {
-        struct obd_export *exp;
          int rc;
  
-        OBD_CHECK_ACTIVE(conn, exp);
          OBD_CHECK_OP(exp->exp_obd, preprw);
          OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
  
-        rc = OBP(exp->exp_obd, san_preprw)(cmd, conn, objcount, obj,
+        rc = OBP(exp->exp_obd, san_preprw)(cmd, exp, oa, objcount, obj,
                                             niocount, remote);
          class_export_put(exp);
-        RETURN(rc);
+        return(rc);
+}
+
+static inline int obd_pin(struct lustre_handle *conn, obd_id ino, __u32 gen,
+                          int type, struct obd_client_handle *handle, int flag)
+{
+        struct obd_export *exp;
+        int rc;
+
+        OBD_CHECK_ACTIVE(conn, exp);
+        OBD_CHECK_OP(exp->exp_obd, pin);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, pin);
+
+        rc = OBP(exp->exp_obd, pin)(conn, ino, gen, type, handle, flag);
+        class_export_put(exp);
+        return(rc);
+}
+
+static inline int obd_unpin(struct lustre_handle *conn,
+                            struct obd_client_handle *handle, int flag)
+{
+        struct obd_export *exp;
+        int rc;
+
+        OBD_CHECK_ACTIVE(conn, exp);
+        OBD_CHECK_OP(exp->exp_obd, unpin);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, unpin);
+
+        rc = OBP(exp->exp_obd, unpin)(conn, handle, flag);
+        class_export_put(exp);
+        return(rc);
  }
  
  static inline int obd_mark_page_dirty(struct lustre_handle *conn,
-                                      struct lov_stripe_md *lsm,  
+                                      struct lov_stripe_md *lsm,
                                        unsigned long offset)
  {
          struct obd_export *exp;
@@ -780,14 +880,15 @@ static inline int obd_mark_page_dirty(struct lustre_handle *conn,
  
          OBD_CHECK_SETUP(conn, exp);
          OBD_CHECK_OP(exp->exp_obd, mark_page_dirty);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, mark_page_dirty);
  
          rc = OBP(exp->exp_obd, mark_page_dirty)(conn, lsm, offset);
          class_export_put(exp);
-        RETURN(rc);
+        return(rc);
  }
  
  static inline int obd_clear_dirty_pages(struct lustre_handle *conn,
-                                        struct lov_stripe_md *lsm,  
+                                        struct lov_stripe_md *lsm,
                                          unsigned long start,
                                          unsigned long end,
                                          unsigned long *cleared)
@@ -797,11 +898,12 @@ static inline int obd_clear_dirty_pages(struct lustre_handle *conn,
  
          OBD_CHECK_SETUP(conn, exp);
          OBD_CHECK_OP(exp->exp_obd, clear_dirty_pages);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, clear_dirty_pages);
  
          rc = OBP(exp->exp_obd, clear_dirty_pages)(conn, lsm, start, end,
                                                    cleared);
          class_export_put(exp);
-        RETURN(rc);
+        return(rc);
  }
  
  static inline int obd_last_dirty_offset(struct lustre_handle *conn,
@@ -813,10 +915,11 @@ static inline int obd_last_dirty_offset(struct lustre_handle *conn,
  
          OBD_CHECK_SETUP(conn, exp);
          OBD_CHECK_OP(exp->exp_obd, last_dirty_offset);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, last_dirty_offset);
  
          rc = OBP(exp->exp_obd, last_dirty_offset)(conn, lsm, offset);
          class_export_put(exp);
-        RETURN(rc);
+        return(rc);
  }
  
  /* OBD Metadata Support */
@@ -824,11 +927,6 @@ static inline int obd_last_dirty_offset(struct lustre_handle *conn,
  extern int obd_init_caches(void);
  extern void obd_cleanup_caches(void);
  
-static inline struct lustre_handle *obdo_handle(struct obdo *oa)
-{
-        return (struct lustre_handle *)&oa->o_inline;
-}
-
  /* support routines */
  extern kmem_cache_t *obdo_cachep;
  static inline struct obdo *obdo_alloc(void)
@@ -838,6 +936,7 @@ static inline struct obdo *obdo_alloc(void)
          oa = kmem_cache_alloc(obdo_cachep, SLAB_KERNEL);
          if (oa == NULL)
                  LBUG();
+        CDEBUG(D_MALLOC, "kmem_cache_alloced oa at %p\n", oa);
          memset(oa, 0, sizeof (*oa));
  
          return oa;
@@ -847,6 +946,7 @@ static inline void obdo_free(struct obdo *oa)
  {
          if (!oa)
                  return;
+        CDEBUG(D_MALLOC, "kmem_cache_freed oa at %p\n", oa);
          kmem_cache_free(obdo_cachep, oa);
  }
  
@@ -855,268 +955,6 @@ static inline void obdo_free(struct obdo *oa)
  #define kdev_t_to_nr(dev) dev
  #endif
  
-#ifdef __KERNEL__
-static inline void obdo_from_iattr(struct obdo *oa, struct iattr *attr)
-{
-        unsigned int ia_valid = attr->ia_valid;
-
-        if (ia_valid & ATTR_ATIME) {
-                oa->o_atime = LTIME_S(attr->ia_atime);
-                oa->o_valid |= OBD_MD_FLATIME;
-        }
-        if (ia_valid & ATTR_MTIME) {
-                oa->o_mtime = LTIME_S(attr->ia_mtime);
-                oa->o_valid |= OBD_MD_FLMTIME;
-        }
-        if (ia_valid & ATTR_CTIME) {
-                oa->o_ctime = LTIME_S(attr->ia_ctime);
-                oa->o_valid |= OBD_MD_FLCTIME;
-        }
-        if (ia_valid & ATTR_SIZE) {
-                oa->o_size = attr->ia_size;
-                oa->o_valid |= OBD_MD_FLSIZE;
-        }
-        if (ia_valid & ATTR_MODE) {
-                oa->o_mode = attr->ia_mode;
-                oa->o_valid |= OBD_MD_FLTYPE | OBD_MD_FLMODE;
-                if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
-                        oa->o_mode &= ~S_ISGID;
-        }
-        if (ia_valid & ATTR_UID) {
-                oa->o_uid = attr->ia_uid;
-                oa->o_valid |= OBD_MD_FLUID;
-        }
-        if (ia_valid & ATTR_GID) {
-                oa->o_gid = attr->ia_gid;
-                oa->o_valid |= OBD_MD_FLGID;
-        }
-}
-
-
-static inline void iattr_from_obdo(struct iattr *attr, struct obdo *oa,
-                                   obd_flag valid)
-{
-        memset(attr, 0, sizeof(*attr));
-        if (valid & OBD_MD_FLATIME) {
-                LTIME_S(attr->ia_atime) = oa->o_atime;
-                attr->ia_valid |= ATTR_ATIME;
-        }
-        if (valid & OBD_MD_FLMTIME) {
-                LTIME_S(attr->ia_mtime) = oa->o_mtime;
-                attr->ia_valid |= ATTR_MTIME;
-        }
-        if (valid & OBD_MD_FLCTIME) {
-                LTIME_S(attr->ia_ctime) = oa->o_ctime;
-                attr->ia_valid |= ATTR_CTIME;
-        }
-        if (valid & OBD_MD_FLSIZE) {
-                attr->ia_size = oa->o_size;
-                attr->ia_valid |= ATTR_SIZE;
-        }
-        if (valid & OBD_MD_FLTYPE) {
-                attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT);
-                attr->ia_valid |= ATTR_MODE;
-        }
-        if (valid & OBD_MD_FLMODE) {
-                attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT);
-                attr->ia_valid |= ATTR_MODE;
-                if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
-                        attr->ia_mode &= ~S_ISGID;
-        }
-        if (valid & OBD_MD_FLUID)
-        {
-                attr->ia_uid = oa->o_uid;
-                attr->ia_valid |= ATTR_UID;
-        }
-        if (valid & OBD_MD_FLGID) {
-                attr->ia_gid = oa->o_gid;
-                attr->ia_valid |= ATTR_GID;
-        }
-}
-
-
-/* WARNING: the file systems must take care not to tinker with
-   attributes they don't manage (such as blocks). */
-
-
-static inline void obdo_from_inode(struct obdo *dst, struct inode *src,
-                                   obd_flag valid)
-{
-        if (valid & OBD_MD_FLATIME)
-                dst->o_atime = LTIME_S(src->i_atime);
-        if (valid & OBD_MD_FLMTIME)
-                dst->o_mtime = LTIME_S(src->i_mtime);
-        if (valid & OBD_MD_FLCTIME)
-                dst->o_ctime = LTIME_S(src->i_ctime);
-        if (valid & OBD_MD_FLSIZE)
-                dst->o_size = src->i_size;
-        if (valid & OBD_MD_FLBLOCKS)   /* allocation of space */
-                dst->o_blocks = src->i_blocks;
-        if (valid & OBD_MD_FLBLKSZ)
-                dst->o_blksize = src->i_blksize;
-        if (valid & OBD_MD_FLTYPE)
-                dst->o_mode = (dst->o_mode & ~S_IFMT) | (src->i_mode & S_IFMT);
-        if (valid & OBD_MD_FLMODE)
-                dst->o_mode = (dst->o_mode & S_IFMT) | (src->i_mode & ~S_IFMT);
-        if (valid & OBD_MD_FLUID)
-                dst->o_uid = src->i_uid;
-        if (valid & OBD_MD_FLGID)
-                dst->o_gid = src->i_gid;
-        if (valid & OBD_MD_FLFLAGS)
-                dst->o_flags = src->i_flags;
-        if (valid & OBD_MD_FLNLINK)
-                dst->o_nlink = src->i_nlink;
-        if (valid & OBD_MD_FLGENER)
-                dst->o_generation = src->i_generation;
-        if (valid & OBD_MD_FLRDEV)
-                dst->o_rdev = (__u32)kdev_t_to_nr(src->i_rdev);
-
-        dst->o_valid |= (valid & ~OBD_MD_FLID);
-}
-
-static inline void obdo_refresh_inode(struct inode *dst, struct obdo *src,
-                                      obd_flag valid)
-{
-        valid &= src->o_valid;
-
-        if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(dst->i_atime))
-                LTIME_S(dst->i_atime) = src->o_atime;
-        if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(dst->i_mtime))
-                LTIME_S(dst->i_mtime) = src->o_mtime;
-        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime))
-                LTIME_S(dst->i_ctime) = src->o_ctime;
-        if (valid & OBD_MD_FLSIZE && src->o_size > dst->i_size)
-                dst->i_size = src->o_size;
-        /* allocation of space */
-        if (valid & OBD_MD_FLBLOCKS && src->o_blocks > dst->i_blocks)
-                dst->i_blocks = src->o_blocks;
-}
-
-static inline void obdo_to_inode(struct inode *dst, struct obdo *src,
-                                 obd_flag valid)
-{
-        valid &= src->o_valid;
-
-        if (valid & OBD_MD_FLATIME)
-                LTIME_S(dst->i_atime) = src->o_atime;
-        if (valid & OBD_MD_FLMTIME)
-                LTIME_S(dst->i_mtime) = src->o_mtime;
-        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime))
-                LTIME_S(dst->i_ctime) = src->o_ctime;
-        if (valid & OBD_MD_FLSIZE)
-                dst->i_size = src->o_size;
-        if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
-                dst->i_blocks = src->o_blocks;
-        if (valid & OBD_MD_FLBLKSZ)
-                dst->i_blksize = src->o_blksize;
-        if (valid & OBD_MD_FLTYPE)
-                dst->i_mode = (dst->i_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
-        if (valid & OBD_MD_FLMODE)
-                dst->i_mode = (dst->i_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
-        if (valid & OBD_MD_FLUID)
-                dst->i_uid = src->o_uid;
-        if (valid & OBD_MD_FLGID)
-                dst->i_gid = src->o_gid;
-        if (valid & OBD_MD_FLFLAGS)
-                dst->i_flags = src->o_flags;
-        if (valid & OBD_MD_FLNLINK)
-                dst->i_nlink = src->o_nlink;
-        if (valid & OBD_MD_FLGENER)
-                dst->i_generation = src->o_generation;
-        if (valid & OBD_MD_FLRDEV)
-                dst->i_rdev = to_kdev_t(src->o_rdev);
-}
-#endif
-
-static inline void obdo_cpy_md(struct obdo *dst, struct obdo *src,
-                               obd_flag valid)
-{
-#ifdef __KERNEL__
-        CDEBUG(D_INODE, "src obdo %Ld valid 0x%x, dst obdo %Ld\n",
-               (unsigned long long)src->o_id, src->o_valid,
-               (unsigned long long)dst->o_id);
-#endif
-        if (valid & OBD_MD_FLATIME)
-                dst->o_atime = src->o_atime;
-        if (valid & OBD_MD_FLMTIME)
-                dst->o_mtime = src->o_mtime;
-        if (valid & OBD_MD_FLCTIME)
-                dst->o_ctime = src->o_ctime;
-        if (valid & OBD_MD_FLSIZE)
-                dst->o_size = src->o_size;
-        if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
-                dst->o_blocks = src->o_blocks;
-        if (valid & OBD_MD_FLBLKSZ)
-                dst->o_blksize = src->o_blksize;
-        if (valid & OBD_MD_FLTYPE)
-                dst->o_mode = (dst->o_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
-        if (valid & OBD_MD_FLMODE)
-                dst->o_mode = (dst->o_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
-        if (valid & OBD_MD_FLUID)
-                dst->o_uid = src->o_uid;
-        if (valid & OBD_MD_FLGID)
-                dst->o_gid = src->o_gid;
-        if (valid & OBD_MD_FLFLAGS)
-                dst->o_flags = src->o_flags;
-        /*
-        if (valid & OBD_MD_FLOBDFLG)
-                dst->o_obdflags = src->o_obdflags;
-        */
-        if (valid & OBD_MD_FLNLINK)
-                dst->o_nlink = src->o_nlink;
-        if (valid & OBD_MD_FLGENER)
-                dst->o_generation = src->o_generation;
-        if (valid & OBD_MD_FLRDEV)
-                dst->o_rdev = src->o_rdev;
-        if (valid & OBD_MD_FLINLINE &&
-             src->o_obdflags & OBD_FL_INLINEDATA) {
-                memcpy(dst->o_inline, src->o_inline, sizeof(src->o_inline));
-                dst->o_obdflags |= OBD_FL_INLINEDATA;
-        }
-
-        dst->o_valid |= valid;
-}
-
-
-/* returns FALSE if comparison (by flags) is same, TRUE if changed */
-static inline int obdo_cmp_md(struct obdo *dst, struct obdo *src,
-                              obd_flag compare)
-{
-        int res = 0;
-
-        if ( compare & OBD_MD_FLATIME )
-                res = (res || (dst->o_atime != src->o_atime));
-        if ( compare & OBD_MD_FLMTIME )
-                res = (res || (dst->o_mtime != src->o_mtime));
-        if ( compare & OBD_MD_FLCTIME )
-                res = (res || (dst->o_ctime != src->o_ctime));
-        if ( compare & OBD_MD_FLSIZE )
-                res = (res || (dst->o_size != src->o_size));
-        if ( compare & OBD_MD_FLBLOCKS ) /* allocation of space */
-                res = (res || (dst->o_blocks != src->o_blocks));
-        if ( compare & OBD_MD_FLBLKSZ )
-                res = (res || (dst->o_blksize != src->o_blksize));
-        if ( compare & OBD_MD_FLTYPE )
-                res = (res || (((dst->o_mode ^ src->o_mode) & S_IFMT) != 0));
-        if ( compare & OBD_MD_FLMODE )
-                res = (res || (((dst->o_mode ^ src->o_mode) & ~S_IFMT) != 0));
-        if ( compare & OBD_MD_FLUID )
-                res = (res || (dst->o_uid != src->o_uid));
-        if ( compare & OBD_MD_FLGID )
-                res = (res || (dst->o_gid != src->o_gid));
-        if ( compare & OBD_MD_FLFLAGS )
-                res = (res || (dst->o_flags != src->o_flags));
-        if ( compare & OBD_MD_FLNLINK )
-                res = (res || (dst->o_nlink != src->o_nlink));
-        if ( compare & OBD_MD_FLGENER )
-                res = (res || (dst->o_generation != src->o_generation));
-        /* XXX Don't know if thses should be included here - wasn't previously
-        if ( compare & OBD_MD_FLINLINE )
-                res = (res || memcmp(dst->o_inline, src->o_inline));
-        */
-        return res;
-}
-
  /* I'm as embarrassed about this as you are.
   *
   * <shaver> // XXX do not look into _superhack with remaining eye
@@ -1124,11 +962,6 @@ static inline int obdo_cmp_md(struct obdo *dst, struct obdo *src,
  extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
  extern void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp);
  
-struct obd_statfs;
-struct statfs;
-void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs);
-void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs);
-
  struct obd_class_user_state {
          struct obd_device     *ocus_current_obd;
          struct list_head       ocus_conns;
diff --git a/lustre/include/linux/obd_lov.h b/lustre/include/linux/obd_lov.h

index b12a062..6d68ae9 100644 (file)
--- a/lustre/include/linux/obd_lov.h
+++ b/lustre/include/linux/obd_lov.h
@@ -8,14 +8,17 @@
  #define OBD_LOV_DEVICENAME "lov"
  
  struct lov_brw_async_args {
-        obd_count        aa_oa_bufs;
-        struct brw_page *aa_ioarr;
+        struct lov_stripe_md  *aa_lsm;
+        struct obdo           *aa_obdos;
+        struct obdo           *aa_oa;
+        struct brw_page       *aa_ioarr;
+        obd_count              aa_oa_bufs;
  };
  
  struct lov_getattr_async_args {
          struct lov_stripe_md  *aa_lsm;
          struct obdo           *aa_oa;
-        struct obdo           *aa_stripe_oas;
+        struct obdo           *aa_obdos;
  };
  
  static inline int lov_stripe_md_size(int stripes)
@@ -28,15 +31,6 @@ static inline int lov_mds_md_size(int stripes)
          return sizeof(struct lov_mds_md) + stripes*sizeof(struct lov_object_id);
  }
  
-extern int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmm,
-                       struct lov_stripe_md *lsm);
-extern int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsm,
-                         struct lov_mds_md *lmm, int lmmsize);
-extern int lov_setstripe(struct lustre_handle *conn,
-                         struct lov_stripe_md **lsmp, struct lov_mds_md *lmmu);
-extern int lov_getstripe(struct lustre_handle *conn, 
-                         struct lov_stripe_md *lsm, struct lov_mds_md *lmmu);
-
  #define IOC_LOV_TYPE                   'g'
  #define IOC_LOV_MIN_NR                 50
  #define IOC_LOV_SET_OSC_ACTIVE         _IOWR('g', 50, long)
diff --git a/lustre/include/linux/obd_ost.h b/lustre/include/linux/obd_ost.h

index 22fe694..ac2e24b 100644 (file)
--- a/lustre/include/linux/obd_ost.h
+++ b/lustre/include/linux/obd_ost.h
@@ -35,6 +35,7 @@
  #define LUSTRE_SANOST_NAME "sanost"
  
  struct osc_brw_async_args {
+        struct obdo     *aa_oa;
          int              aa_requested_nob;
          int              aa_nio_count;
          obd_count        aa_page_count;
diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h

index 2a76905..28a9a3d 100644 (file)
--- a/lustre/include/linux/obd_support.h
+++ b/lustre/include/linux/obd_support.h
@@ -74,8 +74,10 @@ extern unsigned long obd_sync_filter;
  #define OBD_FAIL_MDS_STATFS_PACK         0x11d
  #define OBD_FAIL_MDS_STATFS_NET          0x11e
  #define OBD_FAIL_MDS_GETATTR_NAME_NET    0x11f
-#define OBD_FAIL_MDS_ALL_REPLY_NET       0x120
-#define OBD_FAIL_MDS_ALL_REQUEST_NET     0x121
+#define OBD_FAIL_MDS_PIN_NET             0x120
+#define OBD_FAIL_MDS_UNPIN_NET           0x121
+#define OBD_FAIL_MDS_ALL_REPLY_NET       0x122
+#define OBD_FAIL_MDS_ALL_REQUEST_NET     0x123
  
  #define OBD_FAIL_OST                     0x200
  #define OBD_FAIL_OST_CONNECT_NET         0x201
@@ -116,6 +118,9 @@ extern unsigned long obd_sync_filter;
  #define OBD_FAIL_PTLRPC                  0x500
  #define OBD_FAIL_PTLRPC_ACK              0x501
  
+#define OBD_FAIL_OBD_PING_NET            0x600
+#define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
+
  /* preparation for a more advanced failure testbed (not functional yet) */
  #define OBD_FAIL_MASK_SYS    0x0000FF00
  #define OBD_FAIL_MASK_LOC    (0x000000FF | OBD_FAIL_MASK_SYS)
@@ -169,37 +174,27 @@ do {                                                                         \
  
  
  #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#define ll_bdevname(a) __bdevname((a))
+#define BDEVNAME_DECLARE_STORAGE(foo) char foo[BDEVNAME_SIZE]
+#define ll_bdevname(DEV, STORAGE) __bdevname(DEV, STORAGE)
  #define ll_lock_kernel lock_kernel()
-#define LTIME_S(time) (time.tv_sec)
  #else
+#define BDEVNAME_DECLARE_STORAGE(foo) char __unused_##foo
+#define ll_bdevname(DEV, STORAGE) ((void)__unused_##STORAGE, bdevname((DEV)))
  #define ll_lock_kernel
-#define ll_bdevname(a) bdevname((a))
-#define LTIME_S(time) (time)
  #endif
  
  
  static inline void OBD_FAIL_WRITE(int id, kdev_t dev)
  {
          if (OBD_FAIL_CHECK(id)) {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+                BDEVNAME_DECLARE_STORAGE(tmp);
  #ifdef CONFIG_DEV_RDONLY
                  CERROR("obd_fail_loc=%x, fail write operation on %s\n",
-                       id, ll_bdevname(dev));
+                       id, ll_bdevname(kdev_t_to_nr(dev), tmp));
                  dev_set_rdonly(dev, 2);
  #else
                  CERROR("obd_fail_loc=%x, can't fail write operation on %s\n",
-                       id, ll_bdevname(dev));
-#endif
-#else
-#ifdef CONFIG_DEV_RDONLY
-                CERROR("obd_fail_loc=%x, fail write operation on %s\n",
-                       id, ll_bdevname(dev.value));
-                dev_set_rdonly(dev, 2);
-#else
-                CERROR("obd_fail_loc=%x, can't fail write operation on %s\n",
-                       id, ll_bdevname(dev.value));
-#endif
+                       id, ll_bdevname(kdev_t_to_nr(dev), tmp));
  #endif
                  /* We set FAIL_ONCE because we never "un-fail" a device */
                  obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE;
@@ -209,9 +204,9 @@ static inline void OBD_FAIL_WRITE(int id, kdev_t dev)
  #define LTIME_S(time) (time)
  #endif  /* __KERNEL__ */
  
-#define OBD_ALLOC(ptr, size)                                                  \
+#define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                    \
  do {                                                                          \
-        (ptr) = kmalloc(size, GFP_KERNEL);                                    \
+        (ptr) = kmalloc(size, gfp_mask);                                      \
          if ((ptr) == NULL) {                                                  \
                  CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
                         (int)(size), __FILE__, __LINE__);                      \
@@ -225,6 +220,12 @@ do {                                                                          \
          }                                                                     \
  } while (0)
  
+#ifndef OBD_GFP_MASK
+# define OBD_GFP_MASK GFP_KERNEL
+#endif
+
+#define OBD_ALLOC(ptr, size) OBD_ALLOC_GFP(ptr, size, OBD_GFP_MASK)
+
  #ifdef __arch_um__
  # define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size)
  #else
@@ -246,9 +247,9 @@ do {                                                                          \
  #endif
  
  #ifdef CONFIG_DEBUG_SLAB
-#define POISON(lptr, c, s) do {} while (0)
+#define POISON(ptr, c, s) do {} while (0)
  #else
-#define POISON(lptr, c, s) memset(lptr, c, s)
+#define POISON(ptr, c, s) memset(ptr, c, s)
  #endif
  
  #define OBD_FREE(ptr, size)                                                   \
@@ -277,9 +278,12 @@ do {                                                                          \
  } while (0)
  #endif
  
+/* we memset() the slab object to 0 when allocation succeeds, so DO NOT
+ * HAVE A CTOR THAT DOES ANYTHING.  its work will be cleared here.  we'd
+ * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */
  #define OBD_SLAB_ALLOC(ptr, slab, type, size)                                 \
  do {                                                                          \
-        LASSERT (!in_interrupt());                                            \
+        LASSERT(!in_interrupt());                                             \
          (ptr) = kmem_cache_alloc(slab, type);                                 \
          if ((ptr) == NULL) {                                                  \
                  CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \
diff --git a/lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch b/lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch

index 55057d9..1b589b9 100644 (file)
--- a/lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch
+++ b/lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch
@@ -1,13 +1,10 @@
-
-
-
   drivers/block/blkpg.c  |   35 +++++++++++++++++++++++++++++++++++
   drivers/block/loop.c   |    3 +++
- drivers/ide/ide-disk.c |    5 ++++-
- 3 files changed, 42 insertions(+), 1 deletion(-)
+ drivers/ide/ide-disk.c |    5 +++++
+ 3 files changed, 43 insertions(+)
  
---- rh-2.4.20/drivers/block/blkpg.c~dev_read_only_2.4.20       2003-04-11 14:05:03.000000000 +0800
-+++ rh-2.4.20-root/drivers/block/blkpg.c       2003-04-12 13:11:31.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/drivers/block/blkpg.c~dev_read_only_2.4.20-rh    2003-05-15 21:12:48.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/drivers/block/blkpg.c      2003-07-12 15:10:31.000000000 -0600
  @@ -297,3 +297,38 @@ int blk_ioctl(kdev_t dev, unsigned int c
   }
   
@@ -47,8 +44,8 @@
  +EXPORT_SYMBOL(dev_set_rdonly);
  +EXPORT_SYMBOL(dev_check_rdonly);
  +EXPORT_SYMBOL(dev_clear_rdonly);
---- rh-2.4.20/drivers/block/loop.c~dev_read_only_2.4.20        2003-04-11 14:05:08.000000000 +0800
-+++ rh-2.4.20-root/drivers/block/loop.c        2003-04-12 13:11:31.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/drivers/block/loop.c~dev_read_only_2.4.20-rh     2003-05-15 21:12:50.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/drivers/block/loop.c       2003-07-12 15:10:31.000000000 -0600
  @@ -491,6 +491,9 @@ static int loop_make_request(request_que
         spin_unlock_irq(&lo->lo_lock);
   
@@ -59,17 +56,17 @@
                 if (lo->lo_flags & LO_FLAGS_READ_ONLY)
                         goto err;
         } else if (rw == READA) {
---- rh-2.4.20/drivers/ide/ide-disk.c~dev_read_only_2.4.20      2003-04-11 14:04:53.000000000 +0800
-+++ rh-2.4.20-root/drivers/ide/ide-disk.c      2003-04-12 13:14:48.000000000 +0800
-@@ -381,7 +381,10 @@ static ide_startstop_t do_rw_disk (ide_d
-       if (IS_PDC4030_DRIVE)
-               return promise_rw_disk(drive, rq, block);
- #endif /* CONFIG_BLK_DEV_PDC4030 */
--
-+      if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) {
-+              ide_end_request(1, HWGROUP(drive));
-+              return ide_stopped;
-+      }
+--- kernel-2.4.20-6chaos_18_7/drivers/ide/ide-disk.c~dev_read_only_2.4.20-rh   2003-05-15 21:13:09.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/drivers/ide/ide-disk.c     2003-07-12 15:12:03.000000000 -0600
+@@ -371,6 +371,11 @@ ide_startstop_t __ide_do_rw_disk (ide_dr
+       if (driver_blocked)
+               panic("Request while ide driver is blocked?");
+ 
++      if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) {
++              ide_end_request(1, HWGROUP(drive));
++              return ide_stopped;
++      }
++
         if (IDE_CONTROL_REG)
                 hwif->OUTB(drive->ctl, IDE_CONTROL_REG);
   
diff --git a/lustre/kernel_patches/patches/export-truncate-2.5.63.patch b/lustre/kernel_patches/patches/export-truncate-2.5.63.patch

index 3d82572..3063be4 100644 (file)
--- a/lustre/kernel_patches/patches/export-truncate-2.5.63.patch
+++ b/lustre/kernel_patches/patches/export-truncate-2.5.63.patch
@@ -9,7 +9,7 @@
                 return 0;
   }
  +/* truncate.c */
-+extern void truncate_complete_page(struct page *);
++extern void truncate_complete_page(struct address_space *mapping,struct page *);
   
   /* filemap.c */
   extern unsigned long page_unuse(struct page *);
diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch

index e01feca..a173981 100644 (file)
--- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch
+++ b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch
@@ -1,11 +1,17 @@
- fs/ext3/super.c            |  229 +++++++++++++++++++++++++++++++++++++++++++++
- include/linux/ext3_fs.h    |    2 
+
+Create a service thread to handle delete and truncate of inodes, to avoid
+long latency while truncating very large files.
+
+
+ fs/ext3/inode.c            |  116 ++++++++++++++++++++++
+ fs/ext3/super.c            |  231 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h    |    5 
   include/linux/ext3_fs_sb.h |   10 +
- 3 files changed, 241 insertions(+)
+ 4 files changed, 362 insertions(+)
  
  --- linux-2.4.18-18.8.0-l15/fs/ext3/super.c~ext3-delete_thread-2.4.18  Tue Jun  3 17:26:21 2003
-+++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c    Wed Jun 18 11:59:14 2003
-@@ -396,6 +396,219 @@ static void dump_orphan_list(struct supe
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c    Wed Jul  2 23:49:40 2003
+@@ -396,6 +396,220 @@ static void dump_orphan_list(struct supe
         }
   }
   
@@ -130,14 +136,12 @@
  + * If we have any problem deferring the delete, just delete it right away.
  + * If we defer it, we also mark how many blocks it would free, so that we
  + * can keep the statfs data correct, and we know if we should sleep on the
-+ * truncate thread when we run out of space.
-+ *
-+ * In 2.5 this can be done much more cleanly by just registering a "drop"
-+ * method in the super_operations struct.
++ * delete thread when we run out of space.
  + */
  +static void ext3_delete_inode_thread(struct inode *old_inode)
  +{
  +      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
  +      struct inode *new_inode;
  +      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
  +
@@ -146,24 +150,22 @@
  +              return;
  +      }
  +
-+      if (!test_opt(old_inode->i_sb, ASYNCDEL)) {
-+              ext3_delete_inode(old_inode);
-+              return;
-+      }
++      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++              goto out_delete;
  +
  +      /* We may want to delete the inode immediately and not defer it */
-+      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
-+          !sbi->s_delete_list.next) {
-+              ext3_delete_inode(old_inode);
-+              return;
-+      }
++      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
++              goto out_delete;
  +
-+      if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) ||
-+          (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
++      /* We can't use the delete thread as-is during real orphan recovery,
++       * as we add to the orphan list here, causing ext3_orphan_cleanup()
++       * to loop endlessly.  It would be nice to do so, but needs work.
++       */
++      if (oei->i_state & EXT3_STATE_DELETE ||
++          sbi->s_mount_state & EXT3_ORPHAN_FS) {
  +              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
  +                         old_inode->i_ino, blocks);
-+              ext3_delete_inode(old_inode);
-+              return;
++              goto out_delete;
  +      }
  +
  +      /* We can iget this inode again here, because our caller has unhashed
@@ -175,9 +177,9 @@
  +       */
  +      down(&sbi->s_orphan_lock);
  +
-+      EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS;
++      sbi->s_mount_state |= EXT3_ORPHAN_FS;
  +      new_inode = iget(old_inode->i_sb, old_inode->i_ino);
-+      EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
++      sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
  +      if (is_bad_inode(new_inode)) {
  +              printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
  +              iput(new_inode);
@@ -187,20 +189,21 @@
  +              up(&sbi->s_orphan_lock);
  +              ext3_debug("delete inode %lu directly (bad read)\n",
  +                         old_inode->i_ino);
-+              ext3_delete_inode(old_inode);
-+              return;
++              goto out_delete;
  +      }
  +      J_ASSERT(new_inode != old_inode);
  +
-+      J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan));
++      J_ASSERT(!list_empty(&oei->i_orphan));
++
++      nei = EXT3_I(new_inode);
  +      /* Ugh.  We need to insert new_inode into the same spot on the list
  +       * as old_inode was, to ensure the in-memory orphan list is still
  +       * in the same order as the on-disk orphan list (badness otherwise).
  +       */
-+      EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE;
++      nei->i_orphan = oei->i_orphan;
++      nei->i_orphan.next->prev = &nei->i_orphan;
++      nei->i_orphan.prev->next = &nei->i_orphan;
++      nei->i_state |= EXT3_STATE_DELETE;
  +      up(&sbi->s_orphan_lock);
  +
  +      clear_inode(old_inode);
@@ -216,6 +219,10 @@
  +                 new_inode->i_ino, blocks);
  +
  +      wake_up(&sbi->s_delete_thread_queue);
++      return;
++
++out_delete:
++      ext3_delete_inode(old_inode);
  +}
  +#else
  +#define ext3_start_delete_thread(sbi) do {} while(0)
@@ -225,7 +232,7 @@
   void ext3_put_super (struct super_block * sb)
   {
         struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -403,6 +615,7 @@ void ext3_put_super (struct super_block 
+@@ -403,6 +617,7 @@ void ext3_put_super (struct super_block 
         kdev_t j_dev = sbi->s_journal->j_dev;
         int i;
   
@@ -233,7 +240,7 @@
         ext3_xattr_put_super(sb);
         journal_destroy(sbi->s_journal);
         if (!(sb->s_flags & MS_RDONLY)) {
-@@ -451,7 +664,11 @@ static struct super_operations ext3_sops
+@@ -451,7 +666,11 @@ static struct super_operations ext3_sops
         write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
         dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
         put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
@@ -245,7 +252,7 @@
         put_super:      ext3_put_super,         /* BKL held */
         write_super:    ext3_write_super,       /* BKL held */
         write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
-@@ -511,6 +728,14 @@ static int parse_options (char * options
+@@ -511,6 +730,14 @@ static int parse_options (char * options
              this_char = strtok (NULL, ",")) {
                 if ((value = strchr (this_char, '=')) != NULL)
                         *value++ = 0;
@@ -260,7 +267,7 @@
                 if (!strcmp (this_char, "bsddf"))
                         clear_opt (*mount_options, MINIX_DF);
                 else if (!strcmp (this_char, "nouid32")) {
-@@ -1206,6 +1431,7 @@ struct super_block * ext3_read_super (st
+@@ -1206,6 +1433,7 @@ struct super_block * ext3_read_super (st
         }
   
         ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
@@ -268,7 +275,7 @@
         /*
          * akpm: core read_super() calls in here with the superblock locked.
          * That deadlocks, because orphan cleanup needs to lock the superblock
-@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s
+@@ -1648,6 +1876,9 @@ int ext3_remount (struct super_block * s
         if (!parse_options(data, &tmp, sbi, &tmp, 1))
                 return -EINVAL;
   
@@ -278,8 +285,143 @@
         if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
                 ext3_abort(sb, __FUNCTION__, "Abort forced by user");
   
+--- linux/fs/ext3/file.c.orig  Fri Jan 17 10:57:31 2003
++++ linux/fs/ext3/file.c       Mon Jun 30 13:28:52 2003
+@@ -121,7 +121,11 @@ struct file_operations ext3_file_operati
+ };
+ 
+ struct inode_operations ext3_file_inode_operations = {
++#ifdef EXT3_DELETE_THREAD
++      truncate:       ext3_truncate_thread,   /* BKL held */
++#else
+       truncate:       ext3_truncate,          /* BKL held */
++#endif
+       setattr:        ext3_setattr,           /* BKL held */
+ };
+ 
+--- linux-2.4.18-18.8.0-l15/fs/ext3/inode.c~ext3-delete_thread-2.4.18  Wed Jul  2 23:13:58 2003
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/inode.c    Wed Jul  2 23:50:29 2003
+@@ -2004,6 +2004,118 @@ out_stop:
+       ext3_journal_stop(handle, inode);
+ }
+ 
++#ifdef EXT3_DELETE_THREAD
++/* Move blocks from to-be-truncated inode over to a new inode, and delete
++ * that one from the delete thread instead.  This avoids a lot of latency
++ * when truncating large files.
++ *
++ * If we have any problem deferring the truncate, just truncate it right away.
++ * If we defer it, we also mark how many blocks it would free, so that we
++ * can keep the statfs data correct, and we know if we should sleep on the
++ * delete thread when we run out of space.
++ */
++void ext3_truncate_thread(struct inode *old_inode)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
++      struct inode *new_inode;
++      handle_t *handle;
++      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
++
++      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++              goto out_truncate;
++
++      /* XXX This is a temporary limitation for code simplicity.
++       *     We could truncate to arbitrary sizes at some later time.
++       */
++      if (old_inode->i_size != 0)
++              goto out_truncate;
++
++      /* We may want to truncate the inode immediately and not defer it */
++      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
++          old_inode->i_size > oei->i_disksize)
++              goto out_truncate;
++
++      /* We can't use the delete thread as-is during real orphan recovery,
++       * as we add to the orphan list here, causing ext3_orphan_cleanup()
++       * to loop endlessly.  It would be nice to do so, but needs work.
++       */
++      if (oei->i_state & EXT3_STATE_DELETE ||
++          sbi->s_mount_state & EXT3_ORPHAN_FS) {
++              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
++                         old_inode->i_ino, blocks);
++              goto out_truncate;
++      }
++
++      ext3_discard_prealloc(old_inode);
++
++      /* old_inode   = 1
++       * new_inode   = sb + GDT + ibitmap
++       * orphan list = 1 inode/superblock for add, 2 inodes for del
++       * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
++       */
++      handle = ext3_journal_start(old_inode, 7);
++      if (IS_ERR(handle))
++              goto out_truncate;
++
++      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
++      if (IS_ERR(new_inode)) {
++              ext3_debug("truncate inode %lu directly (no new inodes)\n",
++                         old_inode->i_ino);
++              goto out_journal;
++      }
++
++      nei = EXT3_I(new_inode);
++
++      down_write(&oei->truncate_sem);
++      new_inode->i_size = old_inode->i_size;
++      new_inode->i_blocks = old_inode->i_blocks;
++      new_inode->i_uid = old_inode->i_uid;
++      new_inode->i_gid = old_inode->i_gid;
++      new_inode->i_nlink = 0;
++
++      /* FIXME when we do arbitrary truncates */
++      old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
++      old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
++
++      memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
++      memset(oei->i_data, 0, sizeof(oei->i_data));
++
++      nei->i_disksize = oei->i_disksize;
++      nei->i_state |= EXT3_STATE_DELETE;
++      up_write(&oei->truncate_sem);
++
++      if (ext3_orphan_add(handle, new_inode) < 0)
++              goto out_journal;
++
++      if (ext3_orphan_del(handle, old_inode) < 0) {
++              ext3_orphan_del(handle, new_inode);
++              iput(new_inode);
++              goto out_journal;
++      }
++
++      ext3_journal_stop(handle, old_inode);
++
++      spin_lock(&sbi->s_delete_lock);
++      J_ASSERT(list_empty(&new_inode->i_dentry));
++      list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
++      sbi->s_delete_blocks += blocks;
++      sbi->s_delete_inodes++;
++      spin_unlock(&sbi->s_delete_lock);
++
++      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++                 new_inode->i_ino, blocks);
++
++      wake_up(&sbi->s_delete_thread_queue);
++      return;
++
++out_journal:
++      ext3_journal_stop(handle, old_inode);
++out_truncate:
++      ext3_truncate(old_inode);
++}
++#endif /* EXT3_DELETE_THREAD */
++
+ /* 
+  * ext3_get_inode_loc returns with an extra refcount against the
+  * inode's underlying buffer_head on success. 
  --- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18  Tue Jun  3 17:26:20 2003
-+++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h    Tue Jun 17 12:36:56 2003
++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h    Wed Jul  2 23:19:09 2003
  @@ -190,6 +190,7 @@ struct ext3_group_desc
    */
   #define EXT3_STATE_JDATA              0x00000001 /* journaled data exists */
@@ -296,8 +438,18 @@
   
   /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
   #ifndef _LINUX_EXT2_FS_H
+@@ -651,6 +653,9 @@ extern void ext3_discard_prealloc (struc
+ extern void ext3_dirty_inode(struct inode *);
+ extern int ext3_change_inode_journal_flag(struct inode *, int);
+ extern void ext3_truncate (struct inode *);
++#ifdef EXT3_DELETE_THREAD
++extern void ext3_truncate_thread(struct inode *inode);
++#endif
+ 
+ /* ioctl.c */
+ extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
  --- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18       Tue Jun  3 17:26:21 2003
-+++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h Tue Jun 17 12:36:56 2003
++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h Wed Jul  2 23:19:09 2003
  @@ -29,6 +29,8 @@
   
   #define EXT3_MAX_GROUP_LOADED 32
diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch

index 34c5158..a8816ec 100644 (file)
--- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch
+++ b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch
@@ -1,7 +1,13 @@
-diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
---- origin/fs/ext3/super.c     2003-05-04 17:23:52.000000000 +0400
-+++ linux/fs/ext3/super.c      2003-05-04 17:09:20.000000000 +0400
-@@ -398,6 +398,219 @@ static void dump_orphan_list(struct supe
+ fs/ext3/file.c             |    4 
+ fs/ext3/inode.c            |  116 ++++++++++++++++++++++
+ fs/ext3/super.c            |  230 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h    |    5 
+ include/linux/ext3_fs_sb.h |   10 +
+ 5 files changed, 365 insertions(+)
+
+--- linux/fs/ext3/super.c~ext3-delete_thread-2.4.20    Thu Jul 10 14:11:32 2003
++++ linux-mmonroe/fs/ext3/super.c      Thu Jul 10 14:11:33 2003
+@@ -400,6 +400,220 @@ static void dump_orphan_list(struct supe
         }
   }
   
@@ -126,14 +132,12 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
  + * If we have any problem deferring the delete, just delete it right away.
  + * If we defer it, we also mark how many blocks it would free, so that we
  + * can keep the statfs data correct, and we know if we should sleep on the
-+ * truncate thread when we run out of space.
-+ *
-+ * In 2.5 this can be done much more cleanly by just registering a "drop"
-+ * method in the super_operations struct.
++ * delete thread when we run out of space.
  + */
  +static void ext3_delete_inode_thread(struct inode *old_inode)
  +{
  +      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
  +      struct inode *new_inode;
  +      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
  +
@@ -142,24 +146,22 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
  +              return;
  +      }
  +
-+      if (!test_opt(old_inode->i_sb, ASYNCDEL)) {
-+              ext3_delete_inode(old_inode);
-+              return;
-+      }
++      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++              goto out_delete;
  +
  +      /* We may want to delete the inode immediately and not defer it */
-+      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
-+          !sbi->s_delete_list.next) {
-+              ext3_delete_inode(old_inode);
-+              return;
-+      }
++      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
++              goto out_delete;
  +
-+      if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) ||
-+          (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
++      /* We can't use the delete thread as-is during real orphan recovery,
++       * as we add to the orphan list here, causing ext3_orphan_cleanup()
++       * to loop endlessly.  It would be nice to do so, but needs work.
++       */
++      if (oei->i_state & EXT3_STATE_DELETE ||
++          sbi->s_mount_state & EXT3_ORPHAN_FS) {
  +              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
  +                         old_inode->i_ino, blocks);
-+              ext3_delete_inode(old_inode);
-+              return;
++              goto out_delete;
  +      }
  +
  +      /* We can iget this inode again here, because our caller has unhashed
@@ -171,9 +173,9 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
  +       */
  +      down(&sbi->s_orphan_lock);
  +
-+      EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS;
++      sbi->s_mount_state |= EXT3_ORPHAN_FS;
  +      new_inode = iget(old_inode->i_sb, old_inode->i_ino);
-+      EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
++      sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
  +      if (is_bad_inode(new_inode)) {
  +              printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
  +              iput(new_inode);
@@ -183,20 +185,21 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
  +              up(&sbi->s_orphan_lock);
  +              ext3_debug("delete inode %lu directly (bad read)\n",
  +                         old_inode->i_ino);
-+              ext3_delete_inode(old_inode);
-+              return;
++              goto out_delete;
  +      }
  +      J_ASSERT(new_inode != old_inode);
  +
-+      J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan));
++      J_ASSERT(!list_empty(&oei->i_orphan));
++
++      nei = EXT3_I(new_inode);
  +      /* Ugh.  We need to insert new_inode into the same spot on the list
  +       * as old_inode was, to ensure the in-memory orphan list is still
  +       * in the same order as the on-disk orphan list (badness otherwise).
  +       */
-+      EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE;
++      nei->i_orphan = oei->i_orphan;
++      nei->i_orphan.next->prev = &nei->i_orphan;
++      nei->i_orphan.prev->next = &nei->i_orphan;
++      nei->i_state |= EXT3_STATE_DELETE;
  +      up(&sbi->s_orphan_lock);
  +
  +      clear_inode(old_inode);
@@ -212,6 +215,10 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
  +                 new_inode->i_ino, blocks);
  +
  +      wake_up(&sbi->s_delete_thread_queue);
++      return;
++
++out_delete:
++      ext3_delete_inode(old_inode);
  +}
  +#else
  +#define ext3_start_delete_thread(sbi) do {} while(0)
@@ -221,7 +228,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
   void ext3_put_super (struct super_block * sb)
   {
         struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -405,6 +611,7 @@ void ext3_put_super (struct super_block 
+@@ -407,6 +621,7 @@ void ext3_put_super (struct super_block 
         kdev_t j_dev = sbi->s_journal->j_dev;
         int i;
   
@@ -229,7 +236,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
         ext3_xattr_put_super(sb);
         journal_destroy(sbi->s_journal);
         if (!(sb->s_flags & MS_RDONLY)) {
-@@ -453,7 +660,11 @@ static struct super_operations ext3_sops
+@@ -455,7 +670,11 @@ static struct super_operations ext3_sops
         write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
         dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
         put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
@@ -240,11 +247,11 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
  +#endif
         put_super:      ext3_put_super,         /* BKL held */
         write_super:    ext3_write_super,       /* BKL held */
-       write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
-@@ -514,6 +725,13 @@ static int parse_options (char * options
-            this_char = strtok (NULL, ",")) {
-               if ((value = strchr (this_char, '=')) != NULL)
-                       *value++ = 0;
+       sync_fs:        ext3_sync_fs,
+@@ -524,6 +743,13 @@ static int parse_options (char * options
+                       clear_opt (*mount_options, XATTR_USER);
+               else
+ #endif
  +#ifdef EXT3_DELETE_THREAD
  +              if (!strcmp(this_char, "asyncdel"))
  +                      set_opt(*mount_options, ASYNCDEL);
@@ -252,10 +259,10 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
  +                      clear_opt(*mount_options, ASYNCDEL);
  +              else
  +#endif
- #ifdef CONFIG_EXT3_FS_XATTR_USER
-               if (!strcmp (this_char, "user_xattr"))
-                       set_opt (*mount_options, XATTR_USER);
-@@ -1220,6 +1436,7 @@ struct super_block * ext3_read_super (st
+               if (!strcmp (this_char, "bsddf"))
+                       clear_opt (*mount_options, MINIX_DF);
+               else if (!strcmp (this_char, "nouid32")) {
+@@ -1223,6 +1449,7 @@ struct super_block * ext3_read_super (st
         }
   
         ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
@@ -263,7 +270,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
         /*
          * akpm: core read_super() calls in here with the superblock locked.
          * That deadlocks, because orphan cleanup needs to lock the superblock
-@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s
+@@ -1678,6 +1905,9 @@ int ext3_remount (struct super_block * s
         if (!parse_options(data, &tmp, sbi, &tmp, 1))
                 return -EINVAL;
   
@@ -273,9 +280,143 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
         if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
                 ext3_abort(sb, __FUNCTION__, "Abort forced by user");
   
-diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h
---- origin/include/linux/ext3_fs.h     2003-05-04 17:22:49.000000000 +0400
-+++ linux/include/linux/ext3_fs.h      2003-05-04 15:06:10.000000000 +0400
+--- linux/fs/ext3/inode.c~ext3-delete_thread-2.4.20    Thu Jul 10 14:11:29 2003
++++ linux-mmonroe/fs/ext3/inode.c      Thu Jul 10 14:11:33 2003
+@@ -2013,6 +2013,118 @@ out_stop:
+       ext3_journal_stop(handle, inode);
+ }
+ 
++#ifdef EXT3_DELETE_THREAD
++/* Move blocks from to-be-truncated inode over to a new inode, and delete
++ * that one from the delete thread instead.  This avoids a lot of latency
++ * when truncating large files.
++ *
++ * If we have any problem deferring the truncate, just truncate it right away.
++ * If we defer it, we also mark how many blocks it would free, so that we
++ * can keep the statfs data correct, and we know if we should sleep on the
++ * delete thread when we run out of space.
++ */
++void ext3_truncate_thread(struct inode *old_inode)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
++      struct inode *new_inode;
++      handle_t *handle;
++      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
++
++      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++              goto out_truncate;
++
++      /* XXX This is a temporary limitation for code simplicity.
++       *     We could truncate to arbitrary sizes at some later time.
++       */
++      if (old_inode->i_size != 0)
++              goto out_truncate;
++
++      /* We may want to truncate the inode immediately and not defer it */
++      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
++          old_inode->i_size > oei->i_disksize)
++              goto out_truncate;
++
++      /* We can't use the delete thread as-is during real orphan recovery,
++       * as we add to the orphan list here, causing ext3_orphan_cleanup()
++       * to loop endlessly.  It would be nice to do so, but needs work.
++       */
++      if (oei->i_state & EXT3_STATE_DELETE ||
++          sbi->s_mount_state & EXT3_ORPHAN_FS) {
++              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
++                         old_inode->i_ino, blocks);
++              goto out_truncate;
++      }
++
++      ext3_discard_prealloc(old_inode);
++
++      /* old_inode   = 1
++       * new_inode   = sb + GDT + ibitmap
++       * orphan list = 1 inode/superblock for add, 2 inodes for del
++       * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
++       */
++      handle = ext3_journal_start(old_inode, 7);
++      if (IS_ERR(handle))
++              goto out_truncate;
++
++      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
++      if (IS_ERR(new_inode)) {
++              ext3_debug("truncate inode %lu directly (no new inodes)\n",
++                         old_inode->i_ino);
++              goto out_journal;
++      }
++
++      nei = EXT3_I(new_inode);
++
++      down_write(&oei->truncate_sem);
++      new_inode->i_size = old_inode->i_size;
++      new_inode->i_blocks = old_inode->i_blocks;
++      new_inode->i_uid = old_inode->i_uid;
++      new_inode->i_gid = old_inode->i_gid;
++      new_inode->i_nlink = 0;
++
++      /* FIXME when we do arbitrary truncates */
++      old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
++      old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
++
++      memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
++      memset(oei->i_data, 0, sizeof(oei->i_data));
++
++      nei->i_disksize = oei->i_disksize;
++      nei->i_state |= EXT3_STATE_DELETE;
++      up_write(&oei->truncate_sem);
++
++      if (ext3_orphan_add(handle, new_inode) < 0)
++              goto out_journal;
++
++      if (ext3_orphan_del(handle, old_inode) < 0) {
++              ext3_orphan_del(handle, new_inode);
++              iput(new_inode);
++              goto out_journal;
++      }
++
++      ext3_journal_stop(handle, old_inode);
++
++      spin_lock(&sbi->s_delete_lock);
++      J_ASSERT(list_empty(&new_inode->i_dentry));
++      list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
++      sbi->s_delete_blocks += blocks;
++      sbi->s_delete_inodes++;
++      spin_unlock(&sbi->s_delete_lock);
++
++      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++                 new_inode->i_ino, blocks);
++
++      wake_up(&sbi->s_delete_thread_queue);
++      return;
++
++out_journal:
++      ext3_journal_stop(handle, old_inode);
++out_truncate:
++      ext3_truncate(old_inode);
++}
++#endif /* EXT3_DELETE_THREAD */
++
+ /* 
+  * ext3_get_inode_loc returns with an extra refcount against the
+  * inode's underlying buffer_head on success. 
+--- linux/fs/ext3/file.c~ext3-delete_thread-2.4.20     Thu Jul 10 14:11:21 2003
++++ linux-mmonroe/fs/ext3/file.c       Thu Jul 10 14:12:17 2003
+@@ -125,7 +125,11 @@ struct file_operations ext3_file_operati
+ };
+ 
+ struct inode_operations ext3_file_inode_operations = {
++#ifdef EXT3_DELETE_THREAD
++      truncate:       ext3_truncate_thread,   /* BKL held */
++#else
+       truncate:       ext3_truncate,          /* BKL held */
++#endif
+       setattr:        ext3_setattr,           /* BKL held */
+       setxattr:       ext3_setxattr,          /* BKL held */
+       getxattr:       ext3_getxattr,          /* BKL held */
+--- linux/include/linux/ext3_fs.h~ext3-delete_thread-2.4.20    Thu Jul 10 14:11:26 2003
++++ linux-mmonroe/include/linux/ext3_fs.h      Thu Jul 10 14:11:33 2003
  @@ -193,6 +193,7 @@ struct ext3_group_desc
    */
   #define EXT3_STATE_JDATA              0x00000001 /* journaled data exists */
@@ -284,17 +425,26 @@ diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h
   
   /*
    * ioctl commands
-@@ -321,6 +322,7 @@ struct ext3_inode {
+@@ -320,6 +321,7 @@ struct ext3_inode {
   #define EXT3_MOUNT_UPDATE_JOURNAL     0x1000  /* Update the journal format */
   #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
   #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
-+#define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
++#define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
   
   /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
   #ifndef _LINUX_EXT2_FS_H
-diff -puNr origin/include/linux/ext3_fs_sb.h linux/include/linux/ext3_fs_sb.h
---- origin/include/linux/ext3_fs_sb.h  2003-05-04 17:23:52.000000000 +0400
-+++ linux/include/linux/ext3_fs_sb.h   2003-05-04 11:37:04.000000000 +0400
+@@ -694,6 +696,9 @@ extern void ext3_discard_prealloc (struc
+ extern void ext3_dirty_inode(struct inode *);
+ extern int ext3_change_inode_journal_flag(struct inode *, int);
+ extern void ext3_truncate (struct inode *);
++#ifdef EXT3_DELETE_THREAD
++extern void ext3_truncate_thread(struct inode *inode);
++#endif
+ 
+ /* ioctl.c */
+ extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
+--- linux/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:32 2003
++++ linux-mmonroe/include/linux/ext3_fs_sb.h   Thu Jul 10 14:11:33 2003
  @@ -29,6 +29,8 @@
   
   #define EXT3_MAX_GROUP_LOADED 8
@@ -319,3 +469,5 @@ diff -puNr origin/include/linux/ext3_fs_sb.h linux/include/linux/ext3_fs_sb.h
   };
   
   #endif        /* _LINUX_EXT3_FS_SB */
+
+_
diff --git a/lustre/kernel_patches/patches/extN-misc-fixup.patch b/lustre/kernel_patches/patches/extN-misc-fixup.patch

index 06ea72a..65d9347 100644 (file)
--- a/lustre/kernel_patches/patches/extN-misc-fixup.patch
+++ b/lustre/kernel_patches/patches/extN-misc-fixup.patch
@@ -1,6 +1,9 @@
---- linux-2.4.17/fs/ext3/super.c.orig  Fri Dec 21 10:41:55 2001
-+++ linux-2.4.17/fs/ext3/super.c       Fri Mar 22 11:00:41 2002
-@@ -1344,10 +1342,10 @@
+ fs/ext3/super.c |    4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+--- linux-2.4.18-p4smp/fs/ext3/super.c~extN-misc-fixup 2003-07-21 23:07:50.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c   2003-07-21 23:08:06.000000000 -0600
+@@ -1578,10 +1578,10 @@ static journal_t *ext3_get_dev_journal(s
                 printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
                 goto out_journal;
         }
@@ -13,11 +16,5 @@
                 goto out_journal;
         }
         EXT3_SB(sb)->journal_bdev = bdev;
-@@ -1560,6 +1560,7 @@
-       unlock_kernel();
-       return ret;
- }
-+EXPORT_SYMBOL(ext3_force_commit); /* here to avoid potential patch collisions */
- 
- /*
-  * Ext3 always journals updates to the superblock itself, so we don't
+
+_
diff --git a/lustre/kernel_patches/patches/extN-noread.patch b/lustre/kernel_patches/patches/extN-noread.patch

index 63f4463..305f6fd 100644 (file)
--- a/lustre/kernel_patches/patches/extN-noread.patch
+++ b/lustre/kernel_patches/patches/extN-noread.patch
@@ -83,9 +83,7 @@
                 DQUOT_DROP(inode);
  --- linux-2.4.18-chaos52/fs/ext3/inode.c~extN-noread   2003-05-16 12:26:29.000000000 +0800
  +++ linux-2.4.18-chaos52-root/fs/ext3/inode.c  2003-05-16 12:27:06.000000000 +0800
-@@ -2011,23 +2011,28 @@ out_stop:
-       ext3_journal_stop(handle, inode);
- }
+@@ -2013,21 +2013,26 @@ out_stop:
   
  -/* 
  - * ext3_get_inode_loc returns with an extra refcount against the
diff --git a/lustre/kernel_patches/patches/extN-wantedi.patch b/lustre/kernel_patches/patches/extN-wantedi.patch

index fc74c6b..d40d678 100644 (file)
--- a/lustre/kernel_patches/patches/extN-wantedi.patch
+++ b/lustre/kernel_patches/patches/extN-wantedi.patch
@@ -107,6 +107,17 @@
         j += i * EXT3_INODES_PER_GROUP(sb) + 1;
         if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) {
                 ext3_error (sb, "ext3_new_inode",
+--- linux-2.4.18-18.8.0-l15/fs/ext3/inode.c~extN-wantedi       Thu Jul  3 00:15:41 2003
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/inode.c    Thu Jul  3 00:17:28 2003
+@@ -2070,7 +2070,7 @@ void ext3_truncate_thread(struct inode *
+       if (IS_ERR(handle))
+               goto out_truncate;
+ 
+-      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
++      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode, 0);
+       if (IS_ERR(new_inode)) {
+               ext3_debug("truncate inode %lu directly (no new inodes)\n",
+                          old_inode->i_ino);
  --- linux-2.4.20/fs/ext3/ioctl.c~extN-wantedi  2003-04-08 23:35:55.000000000 -0600
  +++ linux-2.4.20-braam/fs/ext3/ioctl.c 2003-04-08 23:35:55.000000000 -0600
  @@ -23,6 +23,31 @@ int ext3_ioctl (struct inode * inode, st
diff --git a/lustre/kernel_patches/patches/iopen-2.4.18.patch b/lustre/kernel_patches/patches/iopen-2.4.18.patch

index 6eabe85..b983b33 100644 (file)
--- a/lustre/kernel_patches/patches/iopen-2.4.18.patch
+++ b/lustre/kernel_patches/patches/iopen-2.4.18.patch
@@ -1,7 +1,15 @@
- 0 files changed
+ Documentation/filesystems/ext2.txt |   16 ++
+ fs/ext3/Makefile                   |    2 
+ fs/ext3/inode.c                    |    4 
+ fs/ext3/iopen.c                    |  259 +++++++++++++++++++++++++++++++++++++
+ fs/ext3/iopen.h                    |   13 +
+ fs/ext3/namei.c                    |   12 +
+ fs/ext3/super.c                    |   11 +
+ include/linux/ext3_fs.h            |    2 
+ 8 files changed, 318 insertions(+), 1 deletion(-)
  
---- linux-2.4.18-chaos52/Documentation/filesystems/ext2.txt~iopen-2.4.18       2003-04-13 15:21:33.000000000 +0800
-+++ linux-2.4.18-chaos52-root/Documentation/filesystems/ext2.txt       2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/Documentation/filesystems/ext2.txt~iopen-2.4.18 2003-07-09 12:17:30.000000000 -0600
++++ linux-2.4.18-p4smp-braam/Documentation/filesystems/ext2.txt        2003-07-09 17:13:02.000000000 -0600
  @@ -35,6 +35,22 @@ resgid=n                    The group ID which may use th
   
   sb=n                          Use alternate superblock at this location.
@@ -25,19 +33,19 @@
   grpquota,noquota,quota,usrquota       Quota options are silently ignored by ext2.
   
   
---- linux-2.4.18-chaos52/fs/ext3/Makefile~iopen-2.4.18 2003-06-01 03:24:07.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/Makefile 2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/Makefile~iopen-2.4.18   2003-07-09 17:12:12.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/Makefile  2003-07-09 17:13:15.000000000 -0600
  @@ -11,7 +11,7 @@ O_TARGET := ext3.o
   
- export-objs :=        super.o inode.o xattr.o
+ export-objs :=        super.o inode.o xattr.o ext3-exports.o
   
  -obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-+obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
-               ioctl.o namei.o super.o symlink.o xattr.o
++obj-y    := balloc.o iopen.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+               ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o
   obj-m    := $(O_TARGET)
   
---- linux-2.4.18-chaos52/fs/ext3/inode.c~iopen-2.4.18  2003-06-03 17:10:21.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/inode.c  2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/inode.c~iopen-2.4.18    2003-07-09 17:11:19.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/inode.c   2003-07-09 17:13:02.000000000 -0600
  @@ -31,6 +31,7 @@
   #include <linux/highuid.h>
   #include <linux/quotaops.h>
@@ -46,7 +54,7 @@
   
   /*
    * SEARCH_FROM_ZERO forces each block allocation to search from the start
-@@ -2135,6 +2136,9 @@ void ext3_read_inode(struct inode * inod
+@@ -2165,6 +2166,9 @@ void ext3_read_inode(struct inode * inod
         struct buffer_head *bh;
         int block;
         
@@ -56,8 +64,8 @@
         if(ext3_get_inode_loc(inode, &iloc))
                 goto bad_inode;
         bh = iloc.bh;
---- /dev/null  2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/iopen.c  2003-06-03 17:10:55.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/iopen.c   2003-07-09 17:13:02.000000000 -0600
  @@ -0,0 +1,259 @@
  +/*
  + * linux/fs/ext3/iopen.c
@@ -318,8 +326,8 @@
  +
  +      return 1;
  +}
---- /dev/null  2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/iopen.h  2003-06-03 17:10:55.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/iopen.h   2003-07-09 17:13:02.000000000 -0600
  @@ -0,0 +1,13 @@
  +/*
  + * iopen.h
@@ -334,8 +342,8 @@
  +
  +extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
  +extern int ext3_iopen_get_inode(struct inode *inode);
---- linux-2.4.18-chaos52/fs/ext3/namei.c~iopen-2.4.18  2003-06-03 17:10:20.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/namei.c  2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/namei.c~iopen-2.4.18    2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/namei.c   2003-07-09 17:13:02.000000000 -0600
  @@ -34,6 +34,7 @@
   #include <linux/locks.h>
   #include <linux/quotaops.h>
@@ -379,9 +387,9 @@
         d_add(dentry, inode);
         return NULL;
   }
---- linux-2.4.18-chaos52/fs/ext3/super.c~iopen-2.4.18  2003-06-03 17:10:21.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/super.c  2003-06-03 17:10:55.000000000 +0800
-@@ -820,6 +820,17 @@ static int parse_options (char * options
+--- linux-2.4.18-p4smp/fs/ext3/super.c~iopen-2.4.18    2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c   2003-07-09 17:13:02.000000000 -0600
+@@ -831,6 +831,17 @@ static int parse_options (char * options
                          || !strcmp (this_char, "quota")
                          || !strcmp (this_char, "usrquota"))
                         /* Don't do anything ;-) */ ;
@@ -399,8 +407,8 @@
                 else if (!strcmp (this_char, "journal")) {
                         /* @@@ FIXME */
                         /* Eventually we will want to be able to create
---- linux-2.4.18-chaos52/include/linux/ext3_fs.h~iopen-2.4.18  2003-06-03 17:10:22.000000000 +0800
-+++ linux-2.4.18-chaos52-root/include/linux/ext3_fs.h  2003-06-03 17:12:08.000000000 +0800
+--- linux-2.4.18-p4smp/include/linux/ext3_fs.h~iopen-2.4.18    2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_fs.h   2003-07-09 17:13:02.000000000 -0600
  @@ -321,6 +321,8 @@ struct ext3_inode {
   #define EXT3_MOUNT_UPDATE_JOURNAL     0x1000  /* Update the journal format */
   #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
diff --git a/lustre/kernel_patches/patches/iopen-2.4.20.patch b/lustre/kernel_patches/patches/iopen-2.4.20.patch

index 3038cc8..ec48814 100644 (file)
--- a/lustre/kernel_patches/patches/iopen-2.4.20.patch
+++ b/lustre/kernel_patches/patches/iopen-2.4.20.patch
@@ -1,15 +1,15 @@
   Documentation/filesystems/ext2.txt |   16 ++
   fs/ext3/Makefile                   |    2 
   fs/ext3/inode.c                    |    4 
- fs/ext3/iopen.c                    |  240 +++++++++++++++++++++++++++++++++++++
- fs/ext3/iopen.h                    |   15 ++
- fs/ext3/namei.c                    |   13 +-
+ fs/ext3/iopen.c                    |  259 +++++++++++++++++++++++++++++++++++++
+ fs/ext3/iopen.h                    |   13 +
+ fs/ext3/namei.c                    |   13 +
   fs/ext3/super.c                    |   11 +
   include/linux/ext3_fs.h            |    2 
- 8 files changed, 301 insertions(+), 2 deletions(-)
+ 8 files changed, 318 insertions(+), 2 deletions(-)
  
---- linux-2.4.20/Documentation/filesystems/ext2.txt~iopen      2001-07-11 16:44:45.000000000 -0600
-+++ linux-2.4.20-braam/Documentation/filesystems/ext2.txt      2003-05-17 14:06:00.000000000 -0600
+--- linux/Documentation/filesystems/ext2.txt~iopen-2.4.20      Wed Jul 11 15:44:45 2001
++++ linux-mmonroe/Documentation/filesystems/ext2.txt   Thu Jul 10 12:28:54 2003
  @@ -35,6 +35,22 @@ resgid=n                    The group ID which may use th
   
   sb=n                          Use alternate superblock at this location.
@@ -33,8 +33,8 @@
   grpquota,noquota,quota,usrquota       Quota options are silently ignored by ext2.
   
   
---- linux-2.4.20/fs/ext3/Makefile~iopen        2003-05-17 14:05:57.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/Makefile        2003-05-17 14:06:00.000000000 -0600
+--- linux/fs/ext3/Makefile~iopen-2.4.20        Thu Jul 10 12:28:44 2003
++++ linux-mmonroe/fs/ext3/Makefile     Thu Jul 10 12:28:54 2003
  @@ -11,7 +11,7 @@ O_TARGET := ext3.o
   
   export-objs := ext3-exports.o
@@ -44,8 +44,8 @@
                 ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o
   obj-m    := $(O_TARGET)
   
---- linux-2.4.20/fs/ext3/inode.c~iopen 2003-05-17 14:06:00.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/inode.c 2003-05-17 14:06:00.000000000 -0600
+--- linux/fs/ext3/inode.c~iopen-2.4.20 Thu Jul 10 12:28:46 2003
++++ linux-mmonroe/fs/ext3/inode.c      Thu Jul 10 12:28:54 2003
  @@ -31,6 +31,7 @@
   #include <linux/highuid.h>
   #include <linux/quotaops.h>
@@ -54,7 +54,7 @@
   
   /*
    * SEARCH_FROM_ZERO forces each block allocation to search from the start
-@@ -2137,6 +2138,9 @@ void ext3_read_inode(struct inode * inod
+@@ -2253,6 +2254,9 @@ void ext3_read_inode(struct inode * inod
         struct buffer_head *bh;
         int block;
         
@@ -64,8 +64,8 @@
         if(ext3_get_inode_loc(inode, &iloc))
                 goto bad_inode;
         bh = iloc.bh;
---- /dev/null  2003-01-30 03:24:37.000000000 -0700
-+++ linux-2.4.20-braam/fs/ext3/iopen.c 2003-05-17 22:18:55.000000000 -0600
+--- /dev/null  Tue Jan 28 04:00:01 2003
++++ linux-mmonroe/fs/ext3/iopen.c      Thu Jul 10 12:28:54 2003
  @@ -0,0 +1,259 @@
  +/*
  + * linux/fs/ext3/iopen.c
@@ -326,8 +326,8 @@
  +
  +      return 1;
  +}
---- /dev/null  2003-01-30 03:24:37.000000000 -0700
-+++ linux-2.4.20-braam/fs/ext3/iopen.h 2003-05-17 14:06:00.000000000 -0600
+--- /dev/null  Tue Jan 28 04:00:01 2003
++++ linux-mmonroe/fs/ext3/iopen.h      Thu Jul 10 12:28:54 2003
  @@ -0,0 +1,13 @@
  +/*
  + * iopen.h
@@ -342,8 +342,8 @@
  +
  +extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
  +extern int ext3_iopen_get_inode(struct inode *inode);
---- linux-2.4.20/fs/ext3/namei.c~iopen 2003-05-17 14:05:59.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/namei.c 2003-05-17 22:23:08.000000000 -0600
+--- linux/fs/ext3/namei.c~iopen-2.4.20 Thu Jul 10 12:28:46 2003
++++ linux-mmonroe/fs/ext3/namei.c      Thu Jul 10 12:28:54 2003
  @@ -35,7 +35,7 @@
   #include <linux/string.h>
   #include <linux/locks.h>
@@ -388,9 +388,9 @@
         d_add(dentry, inode);
         return NULL;
   }
---- linux-2.4.20/fs/ext3/super.c~iopen 2003-05-17 14:05:59.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/super.c 2003-05-17 14:06:00.000000000 -0600
-@@ -820,6 +820,17 @@ static int parse_options (char * options
+--- linux/fs/ext3/super.c~iopen-2.4.20 Thu Jul 10 12:28:45 2003
++++ linux-mmonroe/fs/ext3/super.c      Thu Jul 10 12:28:54 2003
+@@ -835,6 +835,17 @@ static int parse_options (char * options
                          || !strcmp (this_char, "quota")
                          || !strcmp (this_char, "usrquota"))
                         /* Don't do anything ;-) */ ;
@@ -408,15 +408,15 @@
                 else if (!strcmp (this_char, "journal")) {
                         /* @@@ FIXME */
                         /* Eventually we will want to be able to create
---- linux-2.4.20/include/linux/ext3_fs.h~iopen 2003-05-17 14:05:59.000000000 -0600
-+++ linux-2.4.20-braam/include/linux/ext3_fs.h 2003-05-17 14:06:29.000000000 -0600
+--- linux/include/linux/ext3_fs.h~iopen-2.4.20 Thu Jul 10 12:28:46 2003
++++ linux-mmonroe/include/linux/ext3_fs.h      Thu Jul 10 12:30:12 2003
  @@ -322,6 +322,8 @@ struct ext3_inode {
   #define EXT3_MOUNT_UPDATE_JOURNAL     0x1000  /* Update the journal format */
   #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
   #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
  +#define EXT3_MOUNT_IOPEN              0x8000  /* Allow access via iopen */
  +#define EXT3_MOUNT_IOPEN_NOPRIV               0x10000 /* Make iopen world-readable */
- #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
+ #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
   
   /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  
diff --git a/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch b/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch

index 75ebcd0..15f1b2a 100644 (file)
--- a/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch
+++ b/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch
@@ -1,7 +1,18 @@
- 0 files changed
+ fs/ext3/Makefile           |    4 
+ fs/ext3/ext3-exports.c     |   13 
+ fs/ext3/ialloc.c           |    2 
+ fs/ext3/inode.c            |   29 -
+ fs/ext3/namei.c            |   12 
+ fs/ext3/super.c            |   22 
+ fs/ext3/xattr.c            | 1242 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h    |   46 -
+ include/linux/ext3_jbd.h   |    8 
+ include/linux/ext3_xattr.h |  155 +++++
+ include/linux/xattr.h      |   15 
+ 11 files changed, 1496 insertions(+), 52 deletions(-)
  
---- linux-2.4.18-18/fs/ext3/ialloc.c~linux-2.4.18ea-0.8.26     2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/ialloc.c      2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/ialloc.c~linux-2.4.18ea-0.8.26  2003-07-20 17:12:43.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/ialloc.c  2003-07-21 22:49:05.000000000 -0600
  @@ -17,6 +17,7 @@
   #include <linux/jbd.h>
   #include <linux/ext3_fs.h>
@@ -18,8 +29,8 @@
         DQUOT_FREE_INODE(inode);
         DQUOT_DROP(inode);
   
---- linux-2.4.18-18/fs/ext3/inode.c~linux-2.4.18ea-0.8.26      2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/inode.c       2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/inode.c~linux-2.4.18ea-0.8.26   2003-07-20 17:12:43.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/inode.c   2003-07-21 22:49:05.000000000 -0600
  @@ -39,6 +39,18 @@
    */
   #undef SEARCH_FROM_ZERO
@@ -59,7 +70,7 @@
                 goto no_delete;
   
         lock_kernel();
-@@ -1861,6 +1871,8 @@ void ext3_truncate(struct inode * inode)
+@@ -1877,6 +1887,8 @@ void ext3_truncate(struct inode * inode)
         if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
             S_ISLNK(inode->i_mode)))
                 return;
@@ -68,7 +79,7 @@
         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
                 return;
   
-@@ -2008,8 +2020,6 @@ int ext3_get_inode_loc (struct inode *in
+@@ -2038,8 +2050,6 @@ int ext3_get_inode_loc (struct inode *in
         struct ext3_group_desc * gdp;
                 
         if ((inode->i_ino != EXT3_ROOT_INO &&
@@ -77,7 +88,7 @@
                 inode->i_ino != EXT3_JOURNAL_INO &&
                 inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) ||
                 inode->i_ino > le32_to_cpu(
-@@ -2136,10 +2146,7 @@ void ext3_read_inode(struct inode * inod
+@@ -2166,10 +2176,7 @@ void ext3_read_inode(struct inode * inod
   
         brelse (iloc.bh);
   
@@ -89,7 +100,7 @@
                 inode->i_op = &ext3_file_inode_operations;
                 inode->i_fop = &ext3_file_operations;
                 inode->i_mapping->a_ops = &ext3_aops;
-@@ -2147,7 +2154,7 @@ void ext3_read_inode(struct inode * inod
+@@ -2177,7 +2184,7 @@ void ext3_read_inode(struct inode * inod
                 inode->i_op = &ext3_dir_inode_operations;
                 inode->i_fop = &ext3_dir_operations;
         } else if (S_ISLNK(inode->i_mode)) {
@@ -98,8 +109,8 @@
                         inode->i_op = &ext3_fast_symlink_inode_operations;
                 else {
                         inode->i_op = &page_symlink_inode_operations;
---- linux-2.4.18-18/fs/ext3/namei.c~linux-2.4.18ea-0.8.26      2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/namei.c       2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/namei.c~linux-2.4.18ea-0.8.26   2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/namei.c   2003-07-21 22:49:05.000000000 -0600
  @@ -27,6 +27,7 @@
   #include <linux/sched.h>
   #include <linux/ext3_fs.h>
@@ -153,8 +164,8 @@
                 inode->i_op = &page_symlink_inode_operations;
                 inode->i_mapping->a_ops = &ext3_aops;
                 /*
---- linux-2.4.18-18/fs/ext3/super.c~linux-2.4.18ea-0.8.26      2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/super.c       2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/super.c~linux-2.4.18ea-0.8.26   2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c   2003-07-21 22:50:28.000000000 -0600
  @@ -24,6 +24,7 @@
   #include <linux/jbd.h>
   #include <linux/ext3_fs.h>
@@ -163,7 +174,7 @@
   #include <linux/slab.h>
   #include <linux/init.h>
   #include <linux/locks.h>
-@@ -404,6 +405,7 @@ void ext3_put_super (struct super_block 
+@@ -406,6 +407,7 @@ void ext3_put_super (struct super_block 
         kdev_t j_dev = sbi->s_journal->j_dev;
         int i;
   
@@ -171,7 +182,7 @@
         journal_destroy(sbi->s_journal);
         if (!(sb->s_flags & MS_RDONLY)) {
                 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-@@ -1748,14 +1750,25 @@ int ext3_statfs (struct super_block * sb
+@@ -1749,17 +1751,27 @@ int ext3_statfs (struct super_block * sb
   
   static DECLARE_FSTYPE_DEV(ext3_fs_type, "ext3", ext3_read_super);
   
@@ -200,10 +211,29 @@
  +      return error;
   }
   
- EXPORT_SYMBOL(ext3_bread);
---- /dev/null  2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/xattr.c       2003-04-20 16:14:31.000000000 +0800
-@@ -0,0 +1,1247 @@
+-EXPORT_SYMBOL(ext3_bread);
+ 
+ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+ MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/ext3-exports.c    2003-07-21 22:49:05.000000000 -0600
+@@ -0,0 +1,13 @@
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/ext3_xattr.h>
++
++EXPORT_SYMBOL(ext3_force_commit);
++EXPORT_SYMBOL(ext3_bread);
++EXPORT_SYMBOL(ext3_xattr_register);
++EXPORT_SYMBOL(ext3_xattr_unregister);
++EXPORT_SYMBOL(ext3_xattr_get);
++EXPORT_SYMBOL(ext3_xattr_list);
++EXPORT_SYMBOL(ext3_xattr_set);
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/xattr.c   2003-07-21 22:50:40.000000000 -0600
+@@ -0,0 +1,1242 @@
  +/*
  + * linux/fs/ext3/xattr.c
  + *
@@ -277,11 +307,6 @@
  +#include <linux/module.h>
  +
  +/* These symbols may be needed by a module. */
-+EXPORT_SYMBOL(ext3_xattr_register);
-+EXPORT_SYMBOL(ext3_xattr_unregister);
-+EXPORT_SYMBOL(ext3_xattr_get);
-+EXPORT_SYMBOL(ext3_xattr_list);
-+EXPORT_SYMBOL(ext3_xattr_set);
  +
  +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
  +# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1)
@@ -1451,8 +1476,8 @@
  +}
  +
  +#endif  /* CONFIG_EXT3_FS_XATTR_SHARING */
---- linux-2.4.18-18/include/linux/ext3_fs.h~linux-2.4.18ea-0.8.26      2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/ext3_fs.h       2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/include/linux/ext3_fs.h~linux-2.4.18ea-0.8.26   2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_fs.h   2003-07-21 22:49:05.000000000 -0600
  @@ -58,8 +58,6 @@
    */
   #define       EXT3_BAD_INO             1      /* Bad blocks inode */
@@ -1541,8 +1566,8 @@
   extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
   extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
   
---- linux-2.4.18-18/include/linux/ext3_jbd.h~linux-2.4.18ea-0.8.26     2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/ext3_jbd.h      2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/include/linux/ext3_jbd.h~linux-2.4.18ea-0.8.26  2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_jbd.h  2003-07-21 22:49:05.000000000 -0600
  @@ -30,13 +30,19 @@
   
   #define EXT3_SINGLEDATA_TRANS_BLOCKS  8
@@ -1564,8 +1589,8 @@
   
   extern int ext3_writepage_trans_blocks(struct inode *inode);
   
---- /dev/null  2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/ext3_xattr.h    2003-04-20 16:14:31.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_xattr.h        2003-07-21 22:49:05.000000000 -0600
  @@ -0,0 +1,155 @@
  +/*
  +  File: linux/ext3_xattr.h
@@ -1722,8 +1747,8 @@
  +
  +#endif  /* __KERNEL__ */
  +
---- /dev/null  2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/xattr.h 2003-04-20 16:14:31.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/include/linux/xattr.h     2003-07-21 22:49:05.000000000 -0600
  @@ -0,0 +1,15 @@
  +/*
  +  File: linux/xattr.h
@@ -1740,18 +1765,18 @@
  +#define XATTR_REPLACE 2       /* set value, fail if attr does not exist */
  +
  +#endif        /* _LINUX_XATTR_H */
---- linux-2.4.18-18/fs/ext3/Makefile~linux-2.4.18ea-0.8.26     2003-04-20 16:14:54.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/Makefile      2003-04-20 16:15:15.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/Makefile~linux-2.4.18ea-0.8.26  2003-07-21 22:27:37.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/Makefile  2003-07-21 22:51:23.000000000 -0600
  @@ -9,10 +9,10 @@
   
   O_TARGET := ext3.o
   
  -export-objs :=        super.o inode.o
-+export-objs :=        super.o inode.o xattr.o
++export-objs :=        ext3-exports.o
   
   obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
  -              ioctl.o namei.o super.o symlink.o
-+              ioctl.o namei.o super.o symlink.o xattr.o
++              ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o
   obj-m    := $(O_TARGET)
   
   include $(TOPDIR)/Rules.make
diff --git a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch b/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch

index 5c6c6a9..6d8eac6 100644 (file)
--- a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch
+++ b/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch
@@ -31,6 +31,7 @@
   fs/ext2/xattr.c               | 1212 +++++++++++++++++++++++++++++++++++++++++
   fs/ext2/xattr_user.c          |  103 +++
   fs/ext3/Makefile              |   10 
+ fs/ext3/ext3-exports.c        |   13 
   fs/ext3/file.c                |    5 
   fs/ext3/ialloc.c              |    2 
   fs/ext3/inode.c               |   35 -
@@ -59,12 +60,11 @@
   include/linux/mbcache.h       |   69 ++
   kernel/ksyms.c                |    4 
   mm/vmscan.c                   |   36 +
- fs/ext3/ext3-exports.c        |   14 +  
- 62 files changed, 4331 insertions(+), 197 deletions(-)
+ 62 files changed, 4344 insertions(+), 183 deletions(-)
  
---- linux-rh-2.4.20-8/Documentation/Configure.help~linux-2.4.20-xattr-0.8.54-chaos     2003-05-07 17:33:50.000000000 +0800
-+++ linux-rh-2.4.20-8-root/Documentation/Configure.help        2003-05-07 17:34:25.000000000 +0800
-@@ -15226,6 +15226,39 @@ CONFIG_EXT2_FS
+--- kernel-2.4.20-6chaos_18_7/Documentation/Configure.help~linux-2.4.20-xattr-0.8.54-chaos     2003-06-23 10:39:21.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/Documentation/Configure.help       2003-07-12 15:34:44.000000000 -0600
+@@ -15253,6 +15253,39 @@ CONFIG_EXT2_FS
     be compiled as a module, and so this could be dangerous.  Most
     everyone wants to say Y here.
   
@@ -104,7 +104,7 @@
   Ext3 journalling file system support (EXPERIMENTAL)
   CONFIG_EXT3_FS
     This is the journalling version of the Second extended file system
-@@ -15258,6 +15291,39 @@ CONFIG_EXT3_FS
+@@ -15285,6 +15318,39 @@ CONFIG_EXT3_FS
     of your root partition (the one containing the directory /) cannot
     be compiled as a module, and so this may be dangerous.
   
@@ -144,8 +144,8 @@
   Journal Block Device support (JBD for ext3) (EXPERIMENTAL)
   CONFIG_JBD
     This is a generic journalling layer for block devices.  It is
---- linux-rh-2.4.20-8/arch/alpha/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2001-11-20 07:19:42.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/alpha/defconfig        2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/alpha/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2002-05-07 15:53:54.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/alpha/defconfig       2003-07-12 15:34:44.000000000 -0600
  @@ -1,6 +1,13 @@
   #
   # Automatically generated make config: don't edit
@@ -160,8 +160,8 @@
   CONFIG_ALPHA=y
   # CONFIG_UID16 is not set
   # CONFIG_RWSEM_GENERIC_SPINLOCK is not set
---- linux-rh-2.4.20-8/arch/alpha/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos        2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/alpha/kernel/entry.S   2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/alpha/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos        2003-05-15 21:11:53.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/alpha/kernel/entry.S  2003-07-12 15:34:44.000000000 -0600
  @@ -1162,6 +1162,18 @@ sys_call_table:
         .quad sys_readahead
         .quad sys_ni_syscall                    /* 380, sys_security */
@@ -181,8 +181,8 @@
   
   /* Remember to update everything, kids.  */
   .ifne (. - sys_call_table) - (NR_SYSCALLS * 8)
---- linux-rh-2.4.20-8/arch/arm/defconfig~linux-2.4.20-xattr-0.8.54-chaos       2001-05-20 08:43:05.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/arm/defconfig  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/arm/defconfig~linux-2.4.20-xattr-0.8.54-chaos       2002-05-07 15:53:56.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/arm/defconfig 2003-07-12 15:34:44.000000000 -0600
  @@ -1,6 +1,13 @@
   #
   # Automatically generated make config: don't edit
@@ -197,8 +197,8 @@
   CONFIG_ARM=y
   # CONFIG_EISA is not set
   # CONFIG_SBUS is not set
---- linux-rh-2.4.20-8/arch/arm/kernel/calls.S~linux-2.4.20-xattr-0.8.54-chaos  2002-08-03 08:39:42.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/arm/kernel/calls.S     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/arm/kernel/calls.S~linux-2.4.20-xattr-0.8.54-chaos  2002-09-25 11:09:16.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/arm/kernel/calls.S    2003-07-12 15:34:44.000000000 -0600
  @@ -240,18 +240,18 @@ __syscall_start:
                 .long   SYMBOL_NAME(sys_ni_syscall) /* Security */
                 .long   SYMBOL_NAME(sys_gettid)
@@ -230,8 +230,8 @@
                 .long   SYMBOL_NAME(sys_tkill)
                 /*
                  * Please check 2.5 _before_ adding calls here,
---- linux-rh-2.4.20-8/arch/i386/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/i386/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/i386/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-05-15 21:12:00.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/i386/defconfig        2003-07-12 15:34:44.000000000 -0600
  @@ -1,6 +1,13 @@
   #
   # Automatically generated make config: don't edit
@@ -246,8 +246,8 @@
   CONFIG_X86=y
   CONFIG_ISA=y
   # CONFIG_SBUS is not set
---- linux-rh-2.4.20-8/arch/ia64/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-04-11 14:04:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/ia64/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/ia64/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-05-15 21:12:04.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/ia64/defconfig        2003-07-12 15:34:44.000000000 -0600
  @@ -1,6 +1,13 @@
   #
   # Automatically generated make config: don't edit
@@ -262,8 +262,8 @@
   
   #
   # Code maturity level options
---- linux-rh-2.4.20-8/arch/m68k/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2000-06-20 03:56:08.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/m68k/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/m68k/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2002-05-07 15:53:55.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/m68k/defconfig        2003-07-12 15:34:44.000000000 -0600
  @@ -1,6 +1,13 @@
   #
   # Automatically generated make config: don't edit
@@ -278,8 +278,8 @@
   CONFIG_UID16=y
   
   #
---- linux-rh-2.4.20-8/arch/mips/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2002-11-29 07:53:10.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/mips/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/mips/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-02-14 15:58:06.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/mips/defconfig        2003-07-12 15:34:44.000000000 -0600
  @@ -1,6 +1,13 @@
   #
   # Automatically generated make config: don't edit
@@ -294,8 +294,8 @@
   CONFIG_MIPS=y
   CONFIG_MIPS32=y
   # CONFIG_MIPS64 is not set
---- linux-rh-2.4.20-8/arch/mips64/defconfig~linux-2.4.20-xattr-0.8.54-chaos    2002-11-29 07:53:10.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/mips64/defconfig       2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/mips64/defconfig~linux-2.4.20-xattr-0.8.54-chaos    2003-02-14 15:58:11.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/mips64/defconfig      2003-07-12 15:34:44.000000000 -0600
  @@ -1,6 +1,13 @@
   #
   # Automatically generated make config: don't edit
@@ -310,8 +310,8 @@
   CONFIG_MIPS=y
   # CONFIG_MIPS32 is not set
   CONFIG_MIPS64=y
---- linux-rh-2.4.20-8/arch/ppc/defconfig~linux-2.4.20-xattr-0.8.54-chaos       2003-04-11 14:04:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/ppc/defconfig  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/ppc/defconfig~linux-2.4.20-xattr-0.8.54-chaos       2003-05-15 21:12:20.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/ppc/defconfig 2003-07-12 15:34:44.000000000 -0600
  @@ -1,6 +1,20 @@
   #
   # Automatically generated make config: don't edit
@@ -333,8 +333,8 @@
   # CONFIG_UID16 is not set
   # CONFIG_RWSEM_GENERIC_SPINLOCK is not set
   CONFIG_RWSEM_XCHGADD_ALGORITHM=y
---- linux-rh-2.4.20-8/arch/ppc64/kernel/misc.S~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/ppc64/kernel/misc.S    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/ppc64/kernel/misc.S~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:20.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/ppc64/kernel/misc.S   2003-07-12 15:34:44.000000000 -0600
  @@ -731,6 +731,7 @@ _GLOBAL(sys_call_table32)
         .llong .sys_gettid              /* 207 */
   #if 0 /* Reserved syscalls */
@@ -351,8 +351,8 @@
         .llong .sys_futex
   #endif
         .llong .sys_perfmonctl   /* Put this here for now ... */
---- linux-rh-2.4.20-8/arch/s390/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-02-14 15:58:20.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390/defconfig        2003-07-12 15:34:44.000000000 -0600
  @@ -1,6 +1,13 @@
   #
   # Automatically generated make config: don't edit
@@ -367,8 +367,8 @@
   # CONFIG_ISA is not set
   # CONFIG_EISA is not set
   # CONFIG_MCA is not set
---- linux-rh-2.4.20-8/arch/s390/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390/kernel/entry.S    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:20.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390/kernel/entry.S   2003-07-12 15:34:44.000000000 -0600
  @@ -558,18 +558,18 @@ sys_call_table:
           .long  sys_fcntl64 
         .long  sys_ni_syscall
@@ -400,8 +400,8 @@
         .long  sys_gettid
         .long  sys_tkill
         .rept  255-237
---- linux-rh-2.4.20-8/arch/s390x/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390x/defconfig        2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390x/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2003-02-14 15:58:21.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390x/defconfig       2003-07-12 15:34:44.000000000 -0600
  @@ -1,6 +1,13 @@
   #
   # Automatically generated make config: don't edit
@@ -416,8 +416,8 @@
   # CONFIG_ISA is not set
   # CONFIG_EISA is not set
   # CONFIG_MCA is not set
---- linux-rh-2.4.20-8/arch/s390x/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos        2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390x/kernel/entry.S   2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390x/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos        2003-02-14 15:58:21.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390x/kernel/entry.S  2003-07-12 15:34:44.000000000 -0600
  @@ -591,18 +591,18 @@ sys_call_table:
         .long  SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper)
         .long  SYSCALL(sys_ni_syscall,sys_ni_syscall)
@@ -449,8 +449,8 @@
         .long  SYSCALL(sys_gettid,sys_gettid)
         .long  SYSCALL(sys_tkill,sys_tkill)
         .rept  255-237
---- linux-rh-2.4.20-8/arch/s390x/kernel/wrapper32.S~linux-2.4.20-xattr-0.8.54-chaos    2002-02-26 03:37:56.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390x/kernel/wrapper32.S       2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390x/kernel/wrapper32.S~linux-2.4.20-xattr-0.8.54-chaos    2002-05-07 15:53:59.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390x/kernel/wrapper32.S      2003-07-12 15:34:44.000000000 -0600
  @@ -1091,3 +1091,95 @@ sys32_fstat64_wrapper:
         llgtr   %r3,%r3                 # struct stat64 *
         llgfr   %r4,%r4                 # long
@@ -547,8 +547,8 @@
  +      jg      sys_fremovexattr
  +
  +
---- linux-rh-2.4.20-8/arch/sparc/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2002-08-03 08:39:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc/defconfig        2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2002-09-25 11:10:50.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc/defconfig       2003-07-12 15:34:44.000000000 -0600
  @@ -1,6 +1,13 @@
   #
   # Automatically generated make config: don't edit
@@ -563,8 +563,8 @@
   CONFIG_UID16=y
   CONFIG_HIGHMEM=y
   
---- linux-rh-2.4.20-8/arch/sparc/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos      2002-08-03 08:39:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc/kernel/systbls.S 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos      2002-09-25 11:10:52.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc/kernel/systbls.S        2003-07-12 15:34:44.000000000 -0600
  @@ -51,11 +51,11 @@ sys_call_table:
   /*150*/       .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64
   /*155*/       .long sys_fcntl64, sys_nis_syscall, sys_statfs, sys_fstatfs, sys_oldumount
@@ -582,8 +582,8 @@
   /*190*/       .long sys_init_module, sys_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
   /*195*/       .long sys_nis_syscall, sys_nis_syscall, sys_getppid, sparc_sigaction, sys_sgetmask
   /*200*/       .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, old_readdir
---- linux-rh-2.4.20-8/arch/sparc64/defconfig~linux-2.4.20-xattr-0.8.54-chaos   2003-04-11 14:04:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc64/defconfig      2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc64/defconfig~linux-2.4.20-xattr-0.8.54-chaos   2003-05-15 21:12:29.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc64/defconfig     2003-07-12 15:34:44.000000000 -0600
  @@ -1,6 +1,13 @@
   #
   # Automatically generated make config: don't edit
@@ -598,8 +598,8 @@
   
   #
   # Code maturity level options
---- linux-rh-2.4.20-8/arch/sparc64/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos    2002-08-03 08:39:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc64/kernel/systbls.S       2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc64/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos    2002-09-25 11:10:55.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc64/kernel/systbls.S      2003-07-12 15:34:44.000000000 -0600
  @@ -52,11 +52,11 @@ sys_call_table32:
   /*150*/       .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64
         .word sys32_fcntl64, sys_nis_syscall, sys32_statfs, sys32_fstatfs, sys_oldumount
@@ -634,8 +634,8 @@
   /*190*/       .word sys_init_module, sparc64_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
         .word sys_nis_syscall, sys_nis_syscall, sys_getppid, sys_nis_syscall, sys_sgetmask
   /*200*/       .word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall
---- linux-rh-2.4.20-8/fs/Config.in~linux-2.4.20-xattr-0.8.54-chaos     2003-04-11 14:05:03.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/Config.in        2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/Config.in~linux-2.4.20-xattr-0.8.54-chaos     2003-05-15 21:14:24.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/Config.in       2003-07-12 15:34:44.000000000 -0600
  @@ -34,6 +34,11 @@ dep_mbool '  Debug Befs' CONFIG_BEFS_DEB
   dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL
   
@@ -671,8 +671,8 @@
   mainmenu_option next_comment
   comment 'Partition Types'
   source fs/partitions/Config.in
---- linux-rh-2.4.20-8/fs/Makefile~linux-2.4.20-xattr-0.8.54-chaos      2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/Makefile 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/Makefile~linux-2.4.20-xattr-0.8.54-chaos      2003-07-12 15:33:34.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/Makefile        2003-07-12 15:34:44.000000000 -0600
  @@ -84,6 +84,9 @@ obj-y                                += binfmt_script.o
   
   obj-$(CONFIG_BINFMT_ELF)      += binfmt_elf.o
@@ -683,8 +683,8 @@
   # persistent filesystems
   obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o))
   
---- linux-rh-2.4.20-8/fs/ext2/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2001-10-11 23:05:18.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/Makefile    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/Makefile   2003-07-12 15:34:44.000000000 -0600
  @@ -13,4 +13,8 @@ obj-y    := balloc.o bitmap.o dir.o file
                 ioctl.o namei.o super.o symlink.o
   obj-m    := $(O_TARGET)
@@ -694,8 +694,8 @@
  +obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o
  +
   include $(TOPDIR)/Rules.make
---- linux-rh-2.4.20-8/fs/ext2/file.c~linux-2.4.20-xattr-0.8.54-chaos   2001-10-11 23:05:18.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/file.c      2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/file.c~linux-2.4.20-xattr-0.8.54-chaos   2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/file.c     2003-07-12 15:34:44.000000000 -0600
  @@ -20,6 +20,7 @@
   
   #include <linux/fs.h>
@@ -713,8 +713,8 @@
  +      listxattr:      ext2_listxattr,
  +      removexattr:    ext2_removexattr,
   };
---- linux-rh-2.4.20-8/fs/ext2/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/ialloc.c    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:59:09.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/ialloc.c   2003-07-12 15:34:44.000000000 -0600
  @@ -15,6 +15,7 @@
   #include <linux/config.h>
   #include <linux/fs.h>
@@ -731,8 +731,8 @@
                 DQUOT_FREE_INODE(inode);
                 DQUOT_DROP(inode);
         }
---- linux-rh-2.4.20-8/fs/ext2/inode.c~linux-2.4.20-xattr-0.8.54-chaos  2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/inode.c     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/inode.c~linux-2.4.20-xattr-0.8.54-chaos  2003-02-14 15:59:09.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/inode.c    2003-07-12 15:34:44.000000000 -0600
  @@ -39,6 +39,18 @@ MODULE_LICENSE("GPL");
   static int ext2_update_inode(struct inode * inode, int do_sync);
   
@@ -815,8 +815,8 @@
         brelse (bh);
         inode->i_attr_flags = 0;
         if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) {
---- linux-rh-2.4.20-8/fs/ext2/namei.c~linux-2.4.20-xattr-0.8.54-chaos  2001-10-04 13:57:36.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/namei.c     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/namei.c~linux-2.4.20-xattr-0.8.54-chaos  2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/namei.c    2003-07-12 15:34:44.000000000 -0600
  @@ -31,6 +31,7 @@
   
   #include <linux/fs.h>
@@ -850,8 +850,8 @@
  +      listxattr:      ext2_listxattr,
  +      removexattr:    ext2_removexattr,
   };
---- linux-rh-2.4.20-8/fs/ext2/super.c~linux-2.4.20-xattr-0.8.54-chaos  2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/super.c     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/super.c~linux-2.4.20-xattr-0.8.54-chaos  2003-02-14 15:59:09.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/super.c    2003-07-12 15:34:44.000000000 -0600
  @@ -21,6 +21,7 @@
   #include <linux/string.h>
   #include <linux/fs.h>
@@ -921,8 +921,8 @@
   }
   
   EXPORT_NO_SYMBOLS;
---- linux-rh-2.4.20-8/fs/ext2/symlink.c~linux-2.4.20-xattr-0.8.54-chaos        2000-09-28 04:41:33.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/symlink.c   2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/symlink.c~linux-2.4.20-xattr-0.8.54-chaos        2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/symlink.c  2003-07-12 15:34:44.000000000 -0600
  @@ -19,6 +19,7 @@
   
   #include <linux/fs.h>
@@ -952,8 +952,8 @@
  +      listxattr:      ext2_listxattr,
  +      removexattr:    ext2_removexattr,
   };
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/xattr.c     2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/xattr.c    2003-07-12 15:34:44.000000000 -0600
  @@ -0,0 +1,1212 @@
  +/*
  + * linux/fs/ext2/xattr.c
@@ -2167,8 +2167,8 @@
  +}
  +
  +#endif  /* CONFIG_EXT2_FS_XATTR_SHARING */
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/xattr_user.c        2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/xattr_user.c       2003-07-12 15:34:44.000000000 -0600
  @@ -0,0 +1,103 @@
  +/*
  + * linux/fs/ext2/xattr_user.c
@@ -2273,8 +2273,8 @@
  +      ext2_xattr_unregister(EXT2_XATTR_INDEX_USER,
  +                            &ext2_xattr_user_handler);
  +}
---- linux-rh-2.4.20-8/fs/ext3/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/Makefile    2003-05-07 17:45:13.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/Makefile   2003-07-12 15:34:44.000000000 -0600
  @@ -1,5 +1,5 @@
   #
  -# Makefile for the linux ext2-filesystem routines.
@@ -2299,8 +2299,8 @@
  +obj-$(CONFIG_EXT3_FS_XATTR_USER) += xattr_user.o
  +
   include $(TOPDIR)/Rules.make
---- linux-rh-2.4.20-8/fs/ext3/file.c~linux-2.4.20-xattr-0.8.54-chaos   2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/file.c      2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/file.c~linux-2.4.20-xattr-0.8.54-chaos   2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/file.c     2003-07-12 15:34:44.000000000 -0600
  @@ -23,6 +23,7 @@
   #include <linux/locks.h>
   #include <linux/jbd.h>
@@ -2319,8 +2319,8 @@
  +      removexattr:    ext3_removexattr,       /* BKL held */
   };
   
---- linux-rh-2.4.20-8/fs/ext3/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:48.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/ialloc.c    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:14:30.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/ialloc.c   2003-07-12 15:34:44.000000000 -0600
  @@ -17,6 +17,7 @@
   #include <linux/jbd.h>
   #include <linux/ext3_fs.h>
@@ -2337,8 +2337,8 @@
         DQUOT_FREE_INODE(inode);
         DQUOT_DROP(inode);
   
---- linux-rh-2.4.20-8/fs/ext3/inode.c~linux-2.4.20-xattr-0.8.54-chaos  2003-04-11 14:04:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/inode.c     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/inode.c~linux-2.4.20-xattr-0.8.54-chaos  2003-05-15 21:14:30.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/inode.c    2003-07-12 15:34:44.000000000 -0600
  @@ -39,6 +39,18 @@
    */
   #undef SEARCH_FROM_ZERO
@@ -2429,8 +2429,8 @@
         /* inode->i_attr_flags = 0;                             unused */
         if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) {
                 /* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */
---- linux-rh-2.4.20-8/fs/ext3/namei.c~linux-2.4.20-xattr-0.8.54-chaos  2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/namei.c     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/namei.c~linux-2.4.20-xattr-0.8.54-chaos  2003-07-12 15:33:43.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/namei.c    2003-07-12 15:34:44.000000000 -0600
  @@ -29,6 +29,7 @@
   #include <linux/sched.h>
   #include <linux/ext3_fs.h>
@@ -2492,8 +2492,8 @@
  +      removexattr:    ext3_removexattr,       /* BKL held */
  +};
  +
---- linux-rh-2.4.20-8/fs/ext3/super.c~linux-2.4.20-xattr-0.8.54-chaos  2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/super.c     2003-05-07 17:40:45.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/super.c~linux-2.4.20-xattr-0.8.54-chaos  2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/super.c    2003-07-12 15:34:44.000000000 -0600
  @@ -24,6 +24,7 @@
   #include <linux/jbd.h>
   #include <linux/ext3_fs.h>
@@ -2579,8 +2579,8 @@
   MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
   MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
   MODULE_LICENSE("GPL");
---- linux-rh-2.4.20-8/fs/ext3/symlink.c~linux-2.4.20-xattr-0.8.54-chaos        2001-11-10 06:25:04.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/symlink.c   2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/symlink.c~linux-2.4.20-xattr-0.8.54-chaos        2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/symlink.c  2003-07-12 15:34:44.000000000 -0600
  @@ -20,6 +20,7 @@
   #include <linux/fs.h>
   #include <linux/jbd.h>
@@ -2610,8 +2610,8 @@
  +      listxattr:      ext3_listxattr,         /* BKL held */
  +      removexattr:    ext3_removexattr,       /* BKL held */
   };
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/xattr.c     2003-05-07 17:42:06.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/xattr.c    2003-07-12 15:34:44.000000000 -0600
  @@ -0,0 +1,1225 @@
  +/*
  + * linux/fs/ext3/xattr.c
@@ -3838,8 +3838,8 @@
  +}
  +
  +#endif  /* CONFIG_EXT3_FS_XATTR_SHARING */
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/xattr_user.c        2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/xattr_user.c       2003-07-12 15:34:44.000000000 -0600
  @@ -0,0 +1,111 @@
  +/*
  + * linux/fs/ext3/xattr_user.c
@@ -3952,8 +3952,8 @@
  +      ext3_xattr_unregister(EXT3_XATTR_INDEX_USER,
  +                            &ext3_xattr_user_handler);
  +}
---- linux-rh-2.4.20-8/fs/jfs/jfs_xattr.h~linux-2.4.20-xattr-0.8.54-chaos       2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/jfs/jfs_xattr.h  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/jfs/jfs_xattr.h~linux-2.4.20-xattr-0.8.54-chaos       2003-02-14 15:59:11.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/jfs/jfs_xattr.h 2003-07-12 15:34:44.000000000 -0600
  @@ -52,8 +52,10 @@ struct jfs_ea_list {
   #define       END_EALIST(ealist) \
         ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist)))
@@ -3967,8 +3967,8 @@
   extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t);
   extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t);
   extern ssize_t jfs_listxattr(struct dentry *, char *, size_t);
---- linux-rh-2.4.20-8/fs/jfs/xattr.c~linux-2.4.20-xattr-0.8.54-chaos   2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/jfs/xattr.c      2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/jfs/xattr.c~linux-2.4.20-xattr-0.8.54-chaos   2003-02-14 15:59:11.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/jfs/xattr.c     2003-07-12 15:34:44.000000000 -0600
  @@ -641,7 +641,7 @@ static int ea_put(struct inode *inode, s
   }
   
@@ -3996,8 +3996,8 @@
                  size_t value_len, int flags)
   {
         if (value == NULL) {    /* empty EA, do not remove */
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/mbcache.c        2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/mbcache.c       2003-07-12 15:34:44.000000000 -0600
  @@ -0,0 +1,648 @@
  +/*
  + * linux/fs/mbcache.c
@@ -4647,8 +4647,8 @@
  +module_init(init_mbcache)
  +module_exit(exit_mbcache)
  +
---- linux-rh-2.4.20-8/include/asm-arm/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-arm/unistd.h    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-arm/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:14:42.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-arm/unistd.h   2003-07-12 15:34:44.000000000 -0600
  @@ -244,7 +244,6 @@
   #define __NR_security                 (__NR_SYSCALL_BASE+223)
   #define __NR_gettid                   (__NR_SYSCALL_BASE+224)
@@ -4665,8 +4665,8 @@
   #define __NR_tkill                    (__NR_SYSCALL_BASE+238)
   /*
    * Please check 2.5 _before_ adding calls here,
---- linux-rh-2.4.20-8/include/asm-ppc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-ppc64/unistd.h  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-ppc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-09-25 11:13:42.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-ppc64/unistd.h 2003-07-12 15:34:44.000000000 -0600
  @@ -218,6 +218,7 @@
   #define __NR_gettid           207
   #if 0 /* Reserved syscalls */
@@ -4683,8 +4683,8 @@
   #define __NR_futex            221
   #endif
   
---- linux-rh-2.4.20-8/include/asm-s390/unistd.h~linux-2.4.20-xattr-0.8.54-chaos        2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-s390/unistd.h   2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-s390/unistd.h~linux-2.4.20-xattr-0.8.54-chaos        2002-09-25 11:13:44.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-s390/unistd.h  2003-07-12 15:34:44.000000000 -0600
  @@ -212,9 +212,18 @@
   #define __NR_madvise            219
   #define __NR_getdents64               220
@@ -4707,8 +4707,8 @@
   #define __NR_gettid           236
   #define __NR_tkill            237
   
---- linux-rh-2.4.20-8/include/asm-s390x/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-s390x/unistd.h  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-s390x/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-09-25 11:13:45.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-s390x/unistd.h 2003-07-12 15:34:44.000000000 -0600
  @@ -180,9 +180,18 @@
   #define __NR_pivot_root         217
   #define __NR_mincore            218
@@ -4731,8 +4731,8 @@
   #define __NR_gettid           236
   #define __NR_tkill            237
   
---- linux-rh-2.4.20-8/include/asm-sparc/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-sparc/unistd.h  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-sparc/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-09-25 11:13:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-sparc/unistd.h 2003-07-12 15:34:44.000000000 -0600
  @@ -184,24 +184,24 @@
   /* #define __NR_exportfs        166    SunOS Specific                              */
   #define __NR_mount              167 /* Common                                      */
@@ -4770,8 +4770,8 @@
   #define __NR_tkill              187 /* SunOS: fpathconf                            */
   /* #define __NR_sysconf         188    SunOS Specific                              */
   #define __NR_uname              189 /* Linux Specific                              */
---- linux-rh-2.4.20-8/include/asm-sparc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos     2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-sparc64/unistd.h        2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-sparc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos     2002-09-25 11:13:48.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-sparc64/unistd.h       2003-07-12 15:34:44.000000000 -0600
  @@ -184,24 +184,24 @@
   /* #define __NR_exportfs        166    SunOS Specific                              */
   #define __NR_mount              167 /* Common                                      */
@@ -4809,8 +4809,8 @@
   #define __NR_tkill              187 /* SunOS: fpathconf                            */
   /* #define __NR_sysconf         188    SunOS Specific                              */
   #define __NR_uname              189 /* Linux Specific                              */
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/cache_def.h   2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/cache_def.h  2003-07-12 15:34:44.000000000 -0600
  @@ -0,0 +1,15 @@
  +/*
  + * linux/cache_def.h
@@ -4827,8 +4827,8 @@
  +
  +extern void register_cache(struct cache_definition *);
  +extern void unregister_cache(struct cache_definition *);
---- linux-rh-2.4.20-8/include/linux/errno.h~linux-2.4.20-xattr-0.8.54-chaos    2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/errno.h       2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/errno.h~linux-2.4.20-xattr-0.8.54-chaos    2003-05-15 21:15:06.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/errno.h      2003-07-12 15:34:44.000000000 -0600
  @@ -26,4 +26,8 @@
   
   #endif
@@ -4838,8 +4838,8 @@
  +#define ENOTSUP EOPNOTSUPP    /* Operation not supported */
  +
   #endif
---- linux-rh-2.4.20-8/include/linux/ext2_fs.h~linux-2.4.20-xattr-0.8.54-chaos  2003-04-12 15:46:42.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext2_fs.h     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/ext2_fs.h~linux-2.4.20-xattr-0.8.54-chaos  2003-06-24 11:31:16.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext2_fs.h    2003-07-12 15:34:44.000000000 -0600
  @@ -57,8 +57,6 @@
    */
   #define       EXT2_BAD_INO             1      /* Bad blocks inode */
@@ -4911,7 +4911,7 @@
   #define EXT2_FEATURE_INCOMPAT_SUPP    EXT2_FEATURE_INCOMPAT_FILETYPE
   #define EXT2_FEATURE_RO_COMPAT_SUPP   (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
                                          EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
-@@ -623,8 +600,10 @@ extern struct address_space_operations e
+@@ -624,8 +601,10 @@ extern struct address_space_operations e
   
   /* namei.c */
   extern struct inode_operations ext2_dir_inode_operations;
@@ -4922,8 +4922,8 @@
   extern struct inode_operations ext2_fast_symlink_inode_operations;
   
   #endif        /* __KERNEL__ */
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext2_xattr.h  2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext2_xattr.h 2003-07-12 15:34:44.000000000 -0600
  @@ -0,0 +1,157 @@
  +/*
  +  File: linux/ext2_xattr.h
@@ -5082,8 +5082,8 @@
  +
  +#endif  /* __KERNEL__ */
  +
---- linux-rh-2.4.20-8/include/linux/ext3_fs.h~linux-2.4.20-xattr-0.8.54-chaos  2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext3_fs.h     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/ext3_fs.h~linux-2.4.20-xattr-0.8.54-chaos  2003-07-12 15:33:41.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext3_fs.h    2003-07-12 15:34:44.000000000 -0600
  @@ -63,8 +63,6 @@
    */
   #define       EXT3_BAD_INO             1      /* Bad blocks inode */
@@ -5138,7 +5138,7 @@
   
   /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
   #ifndef _LINUX_EXT2_FS_H
-@@ -520,7 +496,7 @@ struct ext3_super_block {
+@@ -521,7 +497,7 @@ struct ext3_super_block {
   #define EXT3_FEATURE_INCOMPAT_RECOVER         0x0004 /* Needs recovery */
   #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV     0x0008 /* Journal device */
   
@@ -5147,7 +5147,7 @@
   #define EXT3_FEATURE_INCOMPAT_SUPP    (EXT3_FEATURE_INCOMPAT_FILETYPE| \
                                          EXT3_FEATURE_INCOMPAT_RECOVER)
   #define EXT3_FEATURE_RO_COMPAT_SUPP   (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
-@@ -703,6 +679,7 @@ extern void ext3_check_inodes_bitmap (st
+@@ -704,6 +680,7 @@ extern void ext3_check_inodes_bitmap (st
   extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
   
   /* inode.c */
@@ -5155,7 +5155,7 @@
   extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
   extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
   
-@@ -771,8 +748,10 @@ extern struct address_space_operations e
+@@ -773,8 +750,10 @@ extern struct address_space_operations e
   
   /* namei.c */
   extern struct inode_operations ext3_dir_inode_operations;
@@ -5166,8 +5166,8 @@
   extern struct inode_operations ext3_fast_symlink_inode_operations;
   
   
---- linux-rh-2.4.20-8/include/linux/ext3_jbd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext3_jbd.h    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/ext3_jbd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext3_jbd.h   2003-07-12 15:34:44.000000000 -0600
  @@ -30,13 +30,19 @@
   
   #define EXT3_SINGLEDATA_TRANS_BLOCKS  8U
@@ -5189,8 +5189,8 @@
   
   extern int ext3_writepage_trans_blocks(struct inode *inode);
   
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext3_xattr.h  2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext3_xattr.h 2003-07-12 15:34:44.000000000 -0600
  @@ -0,0 +1,157 @@
  +/*
  +  File: linux/ext3_xattr.h
@@ -5349,19 +5349,19 @@
  +
  +#endif  /* __KERNEL__ */
  +
---- linux-rh-2.4.20-8/include/linux/fs.h~linux-2.4.20-xattr-0.8.54-chaos       2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/fs.h  2003-05-07 17:34:25.000000000 +0800
-@@ -915,7 +915,7 @@ struct inode_operations {
+--- kernel-2.4.20-6chaos_18_7/include/linux/fs.h~linux-2.4.20-xattr-0.8.54-chaos       2003-07-12 15:31:35.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/fs.h 2003-07-12 15:34:44.000000000 -0600
+@@ -914,7 +914,7 @@ struct inode_operations {
         int (*setattr) (struct dentry *, struct iattr *);
-       int (*setattr_raw) (struct inode *, struct iattr *);
+       int (*setattr_raw) (struct inode *, struct iattr *);
         int (*getattr) (struct dentry *, struct iattr *);
  -      int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
  +      int (*setxattr) (struct dentry *, const char *, const void *, size_t, int);
         ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
         ssize_t (*listxattr) (struct dentry *, char *, size_t);
         int (*removexattr) (struct dentry *, const char *);
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/mbcache.h     2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/mbcache.h    2003-07-12 15:34:44.000000000 -0600
  @@ -0,0 +1,69 @@
  +/*
  +  File: linux/mbcache.h
@@ -5432,8 +5432,8 @@
  +struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int,
  +                                              kdev_t, unsigned int);
  +#endif
---- linux-rh-2.4.20-8/kernel/ksyms.c~linux-2.4.20-xattr-0.8.54-chaos   2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/kernel/ksyms.c      2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/kernel/ksyms.c~linux-2.4.20-xattr-0.8.54-chaos   2003-07-12 15:14:02.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/kernel/ksyms.c     2003-07-12 15:35:19.000000000 -0600
  @@ -12,6 +12,7 @@
   #define __KERNEL_SYSCALLS__
   #include <linux/config.h>
@@ -5442,15 +5442,15 @@
   #include <linux/smp.h>
   #include <linux/module.h>
   #include <linux/blkdev.h>
-@@ -107,6 +108,7 @@ EXPORT_SYMBOL(exit_mm);
+@@ -106,6 +107,7 @@ EXPORT_SYMBOL(do_brk);
+ EXPORT_SYMBOL(exit_mm);
   EXPORT_SYMBOL(exit_files);
   EXPORT_SYMBOL(exit_fs);
- EXPORT_SYMBOL(exit_sighand);
  +EXPORT_SYMBOL(copy_fs_struct);
+ EXPORT_SYMBOL(exit_sighand);
+ EXPORT_SYMBOL_GPL(make_pages_present);
   
- /* internal kernel memory management */
- EXPORT_SYMBOL(_alloc_pages);
-@@ -125,6 +127,8 @@ EXPORT_SYMBOL(kmem_cache_alloc);
+@@ -126,6 +128,8 @@ EXPORT_SYMBOL(kmem_cache_alloc);
   EXPORT_SYMBOL(kmem_cache_free);
   EXPORT_SYMBOL(kmem_cache_validate);
   EXPORT_SYMBOL(kmem_cache_size);
@@ -5459,8 +5459,8 @@
   EXPORT_SYMBOL(kmalloc);
   EXPORT_SYMBOL(kfree);
   EXPORT_SYMBOL(vfree);
---- linux-rh-2.4.20-8/mm/vmscan.c~linux-2.4.20-xattr-0.8.54-chaos      2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/mm/vmscan.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/mm/vmscan.c~linux-2.4.20-xattr-0.8.54-chaos      2003-07-12 15:33:34.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/mm/vmscan.c        2003-07-12 15:34:44.000000000 -0600
  @@ -21,6 +21,7 @@
   #include <linux/kernel_stat.h>
   #include <linux/swap.h>
@@ -5518,8 +5518,8 @@
   #ifdef CONFIG_QUOTA
         ret += shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
   #endif
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-root/fs/ext3/ext3-exports.c  2003-05-05 18:19:11.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/ext3-exports.c     2003-07-12 15:34:44.000000000 -0600
  @@ -0,0 +1,13 @@
  +#include <linux/config.h>
  +#include <linux/module.h>
diff --git a/lustre/kernel_patches/patches/lustre_version.patch b/lustre/kernel_patches/patches/lustre_version.patch

index 78855ac..c987485 100644 (file)
--- a/lustre/kernel_patches/patches/lustre_version.patch
+++ b/lustre/kernel_patches/patches/lustre_version.patch
@@ -7,6 +7,6 @@
  --- /dev/null  Fri Aug 30 17:31:37 2002
  +++ linux-2.4.18-18.8.0-l12-braam/include/linux/lustre_version.h       Thu Feb 13 07:58:33 2003
  @@ -0,0 +1 @@
-+#define LUSTRE_KERNEL_VERSION 19
++#define LUSTRE_KERNEL_VERSION 21
  
  _
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch

index 710cdc9..7aa5941 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch
@@ -1,7 +1,7 @@
   0 files changed
  
---- linux-2.4.20-rh/fs/dcache.c~vfs_intent-2.4.20-rh   2003-04-11 14:04:58.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/dcache.c   2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/dcache.c~vfs_intent-2.4.20-rh      2003-07-17 08:32:59.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/dcache.c   2003-07-17 08:35:22.000000000 -0700
  @@ -186,6 +186,13 @@ int d_invalidate(struct dentry * dentry)
                 spin_unlock(&dcache_lock);
                 return 0;
@@ -16,15 +16,7 @@
         /*
          * Check whether to do a partial shrink_dcache
          * to get rid of unused child entries.
-@@ -624,6 +631,7 @@ struct dentry * d_alloc(struct dentry * 
-       dentry->d_fsdata = NULL;
-       dentry->d_extra_attributes = NULL;
-       dentry->d_mounted = 0;
-+      dentry->d_it = NULL;
-       dentry->d_cookie = NULL;
-       INIT_LIST_HEAD(&dentry->d_hash);
-       INIT_LIST_HEAD(&dentry->d_lru);
-@@ -839,13 +847,19 @@ void d_delete(struct dentry * dentry)
+@@ -839,13 +846,19 @@ void d_delete(struct dentry * dentry)
    * Adds a dentry to the hash according to its name.
    */
    
@@ -47,16 +39,16 @@
   }
   
   #define do_switch(x,y) do { \
---- linux-2.4.20-rh/fs/namei.c~vfs_intent-2.4.20-rh    2003-04-11 14:04:57.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/namei.c    2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/namei.c~vfs_intent-2.4.20-rh       2003-07-17 08:32:47.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/namei.c    2003-07-17 08:35:22.000000000 -0700
  @@ -94,6 +94,13 @@
    * XEmacs seems to be relying on it...
    */
   
-+void intent_release(struct dentry *de, struct lookup_intent *it)
++void intent_release(struct lookup_intent *it)
  +{
-+      if (it && de->d_op && de->d_op->d_intent_release)
-+              de->d_op->d_intent_release(de, it);
++      if (it && it->it_op_release)
++              it->it_op_release(it);
  +
  +}
  +
@@ -73,8 +65,8 @@
   {
         struct dentry * dentry = d_lookup(parent, name);
   
-+      if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
-+              if (!dentry->d_op->d_revalidate2(dentry, flags, it) &&
++      if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
++              if (!dentry->d_op->d_revalidate_it(dentry, flags, it) &&
  +                  !d_invalidate(dentry)) {
  +                      dput(dentry);
  +                      dentry = NULL;
@@ -104,8 +96,8 @@
                 result = ERR_PTR(-ENOMEM);
                 if (dentry) {
                         lock_kernel();
-+                      if (dir->i_op->lookup2)
-+                              result = dir->i_op->lookup2(dir, dentry, it);
++                      if (dir->i_op->lookup_it)
++                              result = dir->i_op->lookup_it(dir, dentry, it, flags);
  +                      else
                         result = dir->i_op->lookup(dir, dentry);
                         unlock_kernel();
@@ -114,8 +106,8 @@
                         dput(result);
                         result = ERR_PTR(-ENOENT);
                 }
-+      } else if (result->d_op && result->d_op->d_revalidate2) {
-+              if (!result->d_op->d_revalidate2(result, flags, it) &&
++      } else if (result->d_op && result->d_op->d_revalidate_it) {
++              if (!result->d_op->d_revalidate_it(result, flags, it) &&
  +                  !d_invalidate(result)) {
  +                      dput(result);
  +                      goto again;
@@ -133,30 +125,26 @@
   {
         int err;
         if (current->link_count >= max_recursive_link)
-@@ -348,10 +377,21 @@ static inline int do_follow_link(struct 
+@@ -348,10 +377,18 @@ static inline int do_follow_link(struct 
         current->link_count++;
         current->total_link_count++;
         UPDATE_ATIME(dentry->d_inode);
--      err = dentry->d_inode->i_op->follow_link(dentry, nd);
  +      nd->it = it;
-+      if (dentry->d_inode->i_op->follow_link2)
-+              err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+      else
-+              err = dentry->d_inode->i_op->follow_link(dentry, nd);
+       err = dentry->d_inode->i_op->follow_link(dentry, nd);
  +      if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
  +              /* vfs_follow_link was never called */
-+              intent_release(dentry, it);
++              intent_release(it);
  +              path_release(nd);
  +              err = -ENOLINK;
  +      }
         current->link_count--;
         return err;
   loop:
-+      intent_release(dentry, it);
++      intent_release(it);
         path_release(nd);
         return -ELOOP;
   }
-@@ -381,15 +421,26 @@ int follow_up(struct vfsmount **mnt, str
+@@ -381,15 +418,26 @@ int follow_up(struct vfsmount **mnt, str
         return __follow_up(mnt, dentry);
   }
   
@@ -176,7 +164,7 @@
  +                      opc = it->it_op;
  +                      mode = it->it_mode;
  +              }
-+              intent_release(*dentry, it);
++              intent_release(it);
  +              if (it) {
  +                      it->it_op = opc;
  +                      it->it_mode = mode;
@@ -184,7 +172,7 @@
                 dput(*dentry);
                 mntput(mounted->mnt_parent);
                 *dentry = dget(mounted->mnt_root);
-@@ -401,7 +452,7 @@ static inline int __follow_down(struct v
+@@ -401,7 +449,7 @@ static inline int __follow_down(struct v
   
   int follow_down(struct vfsmount **mnt, struct dentry **dentry)
   {
@@ -193,7 +181,7 @@
   }
    
   static inline void follow_dotdot(struct nameidata *nd)
-@@ -437,7 +488,7 @@ static inline void follow_dotdot(struct 
+@@ -437,7 +485,7 @@ static inline void follow_dotdot(struct 
                 mntput(nd->mnt);
                 nd->mnt = parent;
         }
@@ -202,7 +190,7 @@
                 ;
   }
   
-@@ -449,7 +500,8 @@ static inline void follow_dotdot(struct 
+@@ -449,7 +497,8 @@ static inline void follow_dotdot(struct 
    *
    * We expect 'base' to be positive and a directory.
    */
@@ -212,117 +200,114 @@
   {
         struct dentry *dentry;
         struct inode *inode;
-@@ -526,18 +578,18 @@ int link_path_walk(const char * name, st
+@@ -526,19 +575,18 @@ int link_path_walk(const char * name, st
                                 break;
                 }
                 /* This does the actual lookups.. */
  -              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
-+              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
++              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
                 if (!dentry) {
                         err = -EWOULDBLOCKIO;
                         if (atomic)
                                 break;
  -                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
-+                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
++                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
                         err = PTR_ERR(dentry);
                         if (IS_ERR(dentry))
                                 break;
                 }
                 /* Check mountpoints.. */
  -              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
-+              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL))
-                       ;
+-                      ;
++              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL));
   
                 err = -ENOENT;
-@@ -548,8 +600,8 @@ int link_path_walk(const char * name, st
-               if (!inode->i_op)
+               inode = dentry->d_inode;
+@@ -549,7 +597,7 @@ int link_path_walk(const char * name, st
                         goto out_dput;
   
--              if (inode->i_op->follow_link) {
+               if (inode->i_op->follow_link) {
  -                      err = do_follow_link(dentry, nd);
-+              if (inode->i_op->follow_link || inode->i_op->follow_link2) {
  +                      err = do_follow_link(dentry, nd, NULL);
                         dput(dentry);
                         if (err)
                                 goto return_err;
-@@ -565,7 +617,7 @@ int link_path_walk(const char * name, st
+@@ -565,7 +613,7 @@ int link_path_walk(const char * name, st
                         nd->dentry = dentry;
                 }
                 err = -ENOTDIR; 
  -              if (!inode->i_op->lookup)
-+              if (!inode->i_op->lookup && !inode->i_op->lookup2)
++              if (!inode->i_op->lookup && !inode->i_op->lookup_it)
                         break;
                 continue;
                 /* here ends the main loop */
-@@ -592,22 +644,23 @@ last_component:
+@@ -592,22 +640,22 @@ last_component:
                         if (err < 0)
                                 break;
                 }
  -              dentry = cached_lookup(nd->dentry, &this, 0);
-+              dentry = cached_lookup(nd->dentry, &this, 0, it);
++              dentry = cached_lookup(nd->dentry, &this, 0, it);
                 if (!dentry) {
                         err = -EWOULDBLOCKIO;
                         if (atomic)
                                 break;
  -                      dentry = real_lookup(nd->dentry, &this, 0);
-+                      dentry = real_lookup(nd->dentry, &this, 0, it);
++                      dentry = real_lookup(nd->dentry, &this, 0, it);
                         err = PTR_ERR(dentry);
                         if (IS_ERR(dentry))
                                 break;
                 }
  -              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
-+              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it))
++              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it))
                         ;
                 inode = dentry->d_inode;
                 if ((lookup_flags & LOOKUP_FOLLOW)
--                  && inode && inode->i_op && inode->i_op->follow_link) {
+                   && inode && inode->i_op && inode->i_op->follow_link) {
  -                      err = do_follow_link(dentry, nd);
-+                  && inode && inode->i_op &&
-+                  (inode->i_op->follow_link || inode->i_op->follow_link2)) {
-+                      err = do_follow_link(dentry, nd, it);
++                      err = do_follow_link(dentry, nd, it);
                         dput(dentry);
                         if (err)
                                 goto return_err;
-@@ -621,7 +674,8 @@ last_component:
+@@ -621,7 +669,8 @@ last_component:
                         goto no_inode;
                 if (lookup_flags & LOOKUP_DIRECTORY) {
                         err = -ENOTDIR; 
  -                      if (!inode->i_op || !inode->i_op->lookup)
  +                      if (!inode->i_op ||
-+                          (!inode->i_op->lookup && !inode->i_op->lookup2))
++                          (!inode->i_op->lookup && !inode->i_op->lookup_it))
                                 break;
                 }
                 goto return_base;
-@@ -645,6 +699,23 @@ return_reval:
+@@ -645,6 +694,23 @@ return_reval:
                  * Check the cached dentry for staleness.
                  */
                 dentry = nd->dentry;
-+        revalidate_again:
-+              if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
++      revalidate_again:
++              if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
  +                      err = -ESTALE;
-+                      if (!dentry->d_op->d_revalidate2(dentry, 0, it)) {
-+                                struct dentry *new;
-+                                err = permission(dentry->d_parent->d_inode, 
-+                                                 MAY_EXEC);
-+                                if (err)
-+                                        break;
-+                                new = real_lookup(dentry->d_parent,
-+                                                  &dentry->d_name, 0, NULL);
++                      if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
++                              struct dentry *new;
++                              err = permission(dentry->d_parent->d_inode,
++                                               MAY_EXEC);
++                              if (err)
++                                      break;
++                              new = real_lookup(dentry->d_parent,
++                                                &dentry->d_name, 0, NULL);
  +                              d_invalidate(dentry);
-+                                dput(dentry);
-+                                dentry = new;
-+                                goto revalidate_again;
-+                        }
++                              dput(dentry);
++                              dentry = new;
++                              goto revalidate_again;
++                      }
  +              } else
                 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
                         err = -ESTALE;
                         if (!dentry->d_op->d_revalidate(dentry, 0)) {
-@@ -658,15 +729,28 @@ out_dput:
+@@ -658,15 +724,28 @@ out_dput:
                 dput(dentry);
                 break;
         }
  +      if (err)
-+              intent_release(nd->dentry, it);
++              intent_release(it);
         path_release(nd);
   return_err:
         return err;
@@ -347,7 +332,7 @@
   }
   
   /* SMP-safe */
-@@ -751,6 +835,17 @@ walk_init_root(const char *name, struct 
+@@ -751,6 +830,17 @@ walk_init_root(const char *name, struct 
   }
   
   /* SMP-safe */
@@ -365,7 +350,7 @@
   int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
   {
         int error = 0;
-@@ -765,6 +860,7 @@ int path_init(const char *name, unsigned
+@@ -765,6 +855,7 @@ int path_init(const char *name, unsigned
   {
         nd->last_type = LAST_ROOT; /* if there are only slashes... */
         nd->flags = flags;
@@ -373,7 +358,7 @@
         if (*name=='/')
                 return walk_init_root(name,nd);
         read_lock(&current->fs->lock);
-@@ -779,7 +875,8 @@ int path_init(const char *name, unsigned
+@@ -779,7 +870,8 @@ int path_init(const char *name, unsigned
    * needs parent already locked. Doesn't follow mounts.
    * SMP-safe.
    */
@@ -383,7 +368,7 @@
   {
         struct dentry * dentry;
         struct inode *inode;
-@@ -802,13 +899,16 @@ struct dentry * lookup_hash(struct qstr 
+@@ -802,13 +894,16 @@ struct dentry * lookup_hash(struct qstr 
                         goto out;
         }
   
@@ -395,13 +380,13 @@
                 if (!new)
                         goto out;
                 lock_kernel();
-+              if (inode->i_op->lookup2)
-+                      dentry = inode->i_op->lookup2(inode, new, it);
++              if (inode->i_op->lookup_it)
++                      dentry = inode->i_op->lookup_it(inode, new, it, 0);
  +              else
                 dentry = inode->i_op->lookup(inode, new);
                 unlock_kernel();
                 if (!dentry)
-@@ -820,6 +920,12 @@ out:
+@@ -820,6 +915,12 @@ out:
         return dentry;
   }
   
@@ -414,7 +399,7 @@
   /* SMP-safe */
   struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
   {
-@@ -841,7 +947,7 @@ struct dentry * lookup_one_len(const cha
+@@ -841,7 +942,7 @@ struct dentry * lookup_one_len(const cha
         }
         this.hash = end_name_hash(hash);
   
@@ -423,7 +408,7 @@
   access:
         return ERR_PTR(-EACCES);
   }
-@@ -872,6 +978,23 @@ int __user_walk(const char *name, unsign
+@@ -872,6 +973,23 @@ int __user_walk(const char *name, unsign
         return err;
   }
   
@@ -447,7 +432,47 @@
   /*
    * It's inline, so penalty for filesystems that don't use sticky bit is
    * minimal.
-@@ -1010,7 +1133,8 @@ exit_lock:
+@@ -969,7 +1087,8 @@ static inline int lookup_flags(unsigned 
+       return retval;
+ }
+ 
+-int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode,
++                       struct lookup_intent *it)
+ {
+       int error;
+ 
+@@ -982,12 +1101,15 @@ int vfs_create(struct inode *dir, struct
+               goto exit_lock;
+ 
+       error = -EACCES;        /* shouldn't it be ENOSYS? */
+-      if (!dir->i_op || !dir->i_op->create)
++      if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it))
+               goto exit_lock;
+ 
+       DQUOT_INIT(dir);
+       lock_kernel();
+-      error = dir->i_op->create(dir, dentry, mode);
++      if (dir->i_op->create_it)
++              error = dir->i_op->create_it(dir, dentry, mode, it);
++      else
++              error = dir->i_op->create(dir, dentry, mode);
+       unlock_kernel();
+ exit_lock:
+       up(&dir->i_zombie);
+@@ -996,6 +1118,11 @@ exit_lock:
+       return error;
+ }
+ 
++int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++{
++      return vfs_create_it(dir, dentry, mode, NULL);
++}
++
+ /*
+  *    open_namei()
+  *
+@@ -1010,7 +1137,8 @@ exit_lock:
    * for symlinks (where the permissions are checked later).
    * SMP-safe
    */
@@ -457,7 +482,7 @@
   {
         int acc_mode, error = 0;
         struct inode *inode;
-@@ -1024,7 +1148,7 @@ int open_namei(const char * pathname, in
+@@ -1024,7 +1152,7 @@ int open_namei(const char * pathname, in
          * The simplest case - just a plain lookup.
          */
         if (!(flag & O_CREAT)) {
@@ -466,7 +491,7 @@
                 if (error)
                         return error;
                 dentry = nd->dentry;
-@@ -1034,6 +1158,10 @@ int open_namei(const char * pathname, in
+@@ -1034,6 +1162,10 @@ int open_namei(const char * pathname, in
         /*
          * Create - we need to know the parent.
          */
@@ -477,7 +502,7 @@
         error = path_lookup(pathname, LOOKUP_PARENT, nd);
         if (error)
                 return error;
-@@ -1049,7 +1177,7 @@ int open_namei(const char * pathname, in
+@@ -1049,7 +1181,7 @@ int open_namei(const char * pathname, in
   
         dir = nd->dentry;
         down(&dir->d_inode->i_sem);
@@ -486,15 +511,21 @@
   
   do_last:
         error = PTR_ERR(dentry);
-@@ -1058,6 +1186,7 @@ do_last:
+@@ -1058,10 +1190,11 @@ do_last:
                 goto exit;
         }
   
  +      it->it_mode = mode;
         /* Negative dentry, just create the file */
         if (!dentry->d_inode) {
-               error = vfs_create(dir->d_inode, dentry,
-@@ -1086,12 +1215,13 @@ do_last:
+-              error = vfs_create(dir->d_inode, dentry,
+-                                 mode & ~current->fs->umask);
++              error = vfs_create_it(dir->d_inode, dentry,
++                                 mode & ~current->fs->umask, it);
+               up(&dir->d_inode->i_sem);
+               dput(nd->dentry);
+               nd->dentry = dentry;
+@@ -1086,7 +1219,7 @@ do_last:
                 error = -ELOOP;
                 if (flag & O_NOFOLLOW)
                         goto exit_dput;
@@ -503,14 +534,7 @@
         }
         error = -ENOENT;
         if (!dentry->d_inode)
-               goto exit_dput;
--      if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
-+      if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link ||
-+                                    dentry->d_inode->i_op->follow_link2))
-               goto do_link;
- 
-       dput(nd->dentry);
-@@ -1165,7 +1295,7 @@ ok:
+@@ -1165,7 +1298,7 @@ ok:
                 if (!error) {
                         DQUOT_INIT(inode);
                         
@@ -519,32 +543,28 @@
                 }
                 put_write_access(inode);
                 if (error)
-@@ -1177,8 +1307,10 @@ ok:
+@@ -1177,8 +1310,10 @@ ok:
         return 0;
   
   exit_dput:
-+      intent_release(dentry, it);
++      intent_release(it);
         dput(dentry);
   exit:
-+      intent_release(nd->dentry, it);
++      intent_release(it);
         path_release(nd);
         return error;
   
-@@ -1197,7 +1329,19 @@ do_link:
+@@ -1197,7 +1332,16 @@ do_link:
          * are done. Procfs-like symlinks just set LAST_BIND.
          */
         UPDATE_ATIME(dentry->d_inode);
--      error = dentry->d_inode->i_op->follow_link(dentry, nd);
  +      nd->it = it;
-+      if (dentry->d_inode->i_op->follow_link2)
-+              error = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+      else
-+              error = dentry->d_inode->i_op->follow_link(dentry, nd);
+       error = dentry->d_inode->i_op->follow_link(dentry, nd);
  +      if (error) {
-+              intent_release(dentry, it);
++              intent_release(it);
  +      } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
  +              /* vfs_follow_link was never called */
-+              intent_release(dentry, it);
++              intent_release(it);
  +              path_release(nd);
  +              error = -ENOLINK;
  +      }
@@ -583,18 +603,15 @@
         if (IS_ERR(dentry))
                 goto fail;
         if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1289,7 +1440,19 @@ asmlinkage long sys_mknod(const char * f
+@@ -1289,7 +1440,16 @@ asmlinkage long sys_mknod(const char * f
         error = path_lookup(tmp, LOOKUP_PARENT, &nd);
         if (error)
                 goto out;
  -      dentry = lookup_create(&nd, 0);
  +
-+      if (nd.dentry->d_inode->i_op->mknod2) {
++      if (nd.dentry->d_inode->i_op->mknod_raw) {
  +              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->mknod2(nd.dentry->d_inode,
-+                                 nd.last.name,
-+                                 nd.last.len,
-+                                 mode, dev);
++              error = op->mknod_raw(&nd, mode, dev);
  +              /* the file system wants to use normal vfs path now */
  +              if (error != -EOPNOTSUPP)
  +                      goto out2;
@@ -604,7 +621,7 @@
         error = PTR_ERR(dentry);
   
         mode &= ~current->fs->umask;
-@@ -1310,6 +1473,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1310,6 +1470,7 @@ asmlinkage long sys_mknod(const char * f
                 dput(dentry);
         }
         up(&nd.dentry->d_inode->i_sem);
@@ -612,17 +629,14 @@
         path_release(&nd);
   out:
         putname(tmp);
-@@ -1357,7 +1521,17 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1357,7 +1518,14 @@ asmlinkage long sys_mkdir(const char * p
                 error = path_lookup(tmp, LOOKUP_PARENT, &nd);
                 if (error)
                         goto out;
  -              dentry = lookup_create(&nd, 1);
-+              if (nd.dentry->d_inode->i_op->mkdir2) {
++              if (nd.dentry->d_inode->i_op->mkdir_raw) {
  +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->mkdir2(nd.dentry->d_inode,
-+                                         nd.last.name,
-+                                         nd.last.len,
-+                                         mode);
++                      error = op->mkdir_raw(&nd, mode);
  +                      /* the file system wants to use normal vfs path now */
  +                      if (error != -EOPNOTSUPP)
  +                              goto out2;
@@ -631,7 +645,7 @@
                 error = PTR_ERR(dentry);
                 if (!IS_ERR(dentry)) {
                         error = vfs_mkdir(nd.dentry->d_inode, dentry,
-@@ -1365,6 +1539,7 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1365,6 +1533,7 @@ asmlinkage long sys_mkdir(const char * p
                         dput(dentry);
                 }
                 up(&nd.dentry->d_inode->i_sem);
@@ -639,71 +653,49 @@
                 path_release(&nd);
   out:
                 putname(tmp);
-@@ -1465,8 +1640,33 @@ asmlinkage long sys_rmdir(const char * p
+@@ -1465,8 +1634,16 @@ asmlinkage long sys_rmdir(const char * p
                         error = -EBUSY;
                         goto exit1;
         }
-+      if (nd.dentry->d_inode->i_op->rmdir2) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              struct dentry *last;
-+
-+              down(&nd.dentry->d_inode->i_sem);
-+              last = lookup_hash_it(&nd.last, nd.dentry, NULL);
-+              up(&nd.dentry->d_inode->i_sem);
-+              if (IS_ERR(last)) {
-+                      error = PTR_ERR(last);
-+                      goto exit1;
-+              }
-+              if (d_mountpoint(last)) {
-+                      dput(last);
-+                      error = -EBUSY;
-+                      goto exit1;
-+              }
-+              dput(last);
++      if (nd.dentry->d_inode->i_op->rmdir_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
  +
-+              error = op->rmdir2(nd.dentry->d_inode,
-+                                 nd.last.name,
-+                                 nd.last.len);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++              error = op->rmdir_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
         down(&nd.dentry->d_inode->i_sem);
  -      dentry = lookup_hash(&nd.last, nd.dentry);
  +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
         error = PTR_ERR(dentry);
         if (!IS_ERR(dentry)) {
                 error = vfs_rmdir(nd.dentry->d_inode, dentry);
-@@ -1524,8 +1724,17 @@ asmlinkage long sys_unlink(const char * 
+@@ -1524,8 +1701,15 @@ asmlinkage long sys_unlink(const char * 
         error = -EISDIR;
         if (nd.last_type != LAST_NORM)
                 goto exit1;
-+      if (nd.dentry->d_inode->i_op->unlink2) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->unlink2(nd.dentry->d_inode,
-+                                  nd.last.name,
-+                                  nd.last.len);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++      if (nd.dentry->d_inode->i_op->unlink_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->unlink_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
         down(&nd.dentry->d_inode->i_sem);
  -      dentry = lookup_hash(&nd.last, nd.dentry);
  +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
         error = PTR_ERR(dentry);
         if (!IS_ERR(dentry)) {
                 /* Why not before? Because we want correct error value */
-@@ -1592,15 +1801,26 @@ asmlinkage long sys_symlink(const char *
+@@ -1592,15 +1776,23 @@ asmlinkage long sys_symlink(const char *
                 error = path_lookup(to, LOOKUP_PARENT, &nd);
                 if (error)
                         goto out;
  -              dentry = lookup_create(&nd, 0);
-+              if (nd.dentry->d_inode->i_op->symlink2) {
++              if (nd.dentry->d_inode->i_op->symlink_raw) {
  +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->symlink2(nd.dentry->d_inode,
-+                                           nd.last.name,
-+                                           nd.last.len,
-+                                           from);
++                      error = op->symlink_raw(&nd, from);
  +                      /* the file system wants to use normal vfs path now */
  +                      if (error != -EOPNOTSUPP)
  +                              goto out2;
@@ -722,17 +714,14 @@
                 putname(to);
         }
         putname(from);
-@@ -1676,7 +1896,17 @@ asmlinkage long sys_link(const char * ol
+@@ -1676,7 +1868,14 @@ asmlinkage long sys_link(const char * ol
                 error = -EXDEV;
                 if (old_nd.mnt != nd.mnt)
                         goto out_release;
  -              new_dentry = lookup_create(&nd, 0);
-+              if (nd.dentry->d_inode->i_op->link2) {
++              if (nd.dentry->d_inode->i_op->link_raw) {
  +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->link2(old_nd.dentry->d_inode,
-+                                        nd.dentry->d_inode,
-+                                        nd.last.name,
-+                                        nd.last.len);
++                      error = op->link_raw(&old_nd, &nd);
  +                      /* the file system wants to use normal vfs path now */
  +                      if (error != -EOPNOTSUPP)
  +                              goto out_release;
@@ -741,62 +730,37 @@
                 error = PTR_ERR(new_dentry);
                 if (!IS_ERR(new_dentry)) {
                         error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
-@@ -1720,7 +1950,8 @@ exit:
+@@ -1720,7 +1919,7 @@ exit:
    *       locking].
    */
   int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
  -             struct inode *new_dir, struct dentry *new_dentry)
-+                 struct inode *new_dir, struct dentry *new_dentry,
-+                 struct lookup_intent *it)
++                 struct inode *new_dir, struct dentry *new_dentry)
   {
         int error;
         struct inode *target;
-@@ -1778,6 +2009,7 @@ int vfs_rename_dir(struct inode *old_dir
-               error = -EBUSY;
-       else 
-               error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+      intent_release(new_dentry, it);
-       if (target) {
-               if (!error)
-                       target->i_flags |= S_DEAD;
-@@ -1799,7 +2031,8 @@ out_unlock:
+@@ -1799,7 +1998,7 @@ out_unlock:
   }
   
   int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
  -             struct inode *new_dir, struct dentry *new_dentry)
-+                   struct inode *new_dir, struct dentry *new_dentry,
-+                   struct lookup_intent *it)
++                   struct inode *new_dir, struct dentry *new_dentry)
   {
         int error;
   
-@@ -1830,6 +2063,7 @@ int vfs_rename_other(struct inode *old_d
-               error = -EBUSY;
-       else
-               error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+      intent_release(new_dentry, it);
-       double_up(&old_dir->i_zombie, &new_dir->i_zombie);
-       if (error)
-               return error;
-@@ -1841,13 +2075,14 @@ int vfs_rename_other(struct inode *old_d
- }
- 
- int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
--             struct inode *new_dir, struct dentry *new_dentry)
-+             struct inode *new_dir, struct dentry *new_dentry,
-+             struct lookup_intent *it)
- {
-       int error;
-       if (S_ISDIR(old_dentry->d_inode->i_mode))
--              error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
-+              error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it);
-       else
--              error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
-+              error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it);
-       if (!error) {
-               if (old_dir == new_dir)
-                       inode_dir_notify(old_dir, DN_RENAME);
-@@ -1889,7 +2124,7 @@ static inline int do_rename(const char *
+@@ -1887,9 +2086,18 @@ static inline int do_rename(const char *
+       if (newnd.last_type != LAST_NORM)
+               goto exit2;
   
++      if (old_dir->d_inode->i_op->rename_raw) {
++              lock_kernel();
++              error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++              unlock_kernel();
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit2;
++      }
++
         double_lock(new_dir, old_dir);
   
  -      old_dentry = lookup_hash(&oldnd.last, old_dir);
@@ -804,7 +768,7 @@
         error = PTR_ERR(old_dentry);
         if (IS_ERR(old_dentry))
                 goto exit3;
-@@ -1905,16 +2140,37 @@ static inline int do_rename(const char *
+@@ -1905,16 +2113,16 @@ static inline int do_rename(const char *
                 if (newnd.last.name[newnd.last.len])
                         goto exit4;
         }
@@ -814,38 +778,16 @@
         if (IS_ERR(new_dentry))
                 goto exit4;
   
-+      if (old_dir->d_inode->i_op->rename2) {
-+              lock_kernel();
-+              /* don't rename mount point. mds will take care of
-+               * the rest sanity checking */
-+              if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) {
-+                      error = -EBUSY;
-+                      goto exit5;
-+              }
-+
-+              error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
-+                                                      new_dir->d_inode,
-+                                                      oldnd.last.name,
-+                                                      oldnd.last.len,
-+                                                      newnd.last.name,
-+                                                      newnd.last.len);
-+              unlock_kernel();
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit5;
-+      }
  +
         lock_kernel();
         error = vfs_rename(old_dir->d_inode, old_dentry,
--                                 new_dir->d_inode, new_dentry);
-+                                 new_dir->d_inode, new_dentry, NULL);
+                                  new_dir->d_inode, new_dentry);
         unlock_kernel();
  -
-+exit5:
         dput(new_dentry);
   exit4:
         dput(old_dentry);
-@@ -1965,20 +2221,28 @@ out:
+@@ -1965,20 +2173,28 @@ out:
   }
   
   static inline int
@@ -876,7 +818,7 @@
   out:
         if (current->link_count || res || nd->last_type!=LAST_NORM)
                 return res;
-@@ -2002,7 +2266,13 @@ fail:
+@@ -2002,7 +2218,13 @@ fail:
   
   int vfs_follow_link(struct nameidata *nd, const char *link)
   {
@@ -891,7 +833,7 @@
   }
   
   /* get the link contents into pagecache */
-@@ -2044,7 +2314,7 @@ int page_follow_link(struct dentry *dent
+@@ -2044,7 +2266,7 @@ int page_follow_link(struct dentry *dent
   {
         struct page *page = NULL;
         char *s = page_getlink(dentry, &page);
@@ -900,19 +842,8 @@
         if (page) {
                 kunmap(page);
                 page_cache_release(page);
---- linux-2.4.20-rh/fs/nfsd/vfs.c~vfs_intent-2.4.20-rh 2003-04-11 14:04:48.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/nfsd/vfs.c 2003-06-09 23:18:07.000000000 +0800
-@@ -1293,7 +1293,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
-                       err = nfserr_perm;
-       } else
- #endif
--      err = vfs_rename(fdir, odentry, tdir, ndentry);
-+      err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
-       if (!err && EX_ISSYNC(tfhp->fh_export)) {
-               nfsd_sync_dir(tdentry);
-               nfsd_sync_dir(fdentry);
---- linux-2.4.20-rh/fs/open.c~vfs_intent-2.4.20-rh     2003-04-11 14:04:57.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/open.c     2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/open.c~vfs_intent-2.4.20-rh        2003-07-17 08:32:45.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/open.c     2003-07-17 08:35:22.000000000 -0700
  @@ -19,6 +19,8 @@
   #include <asm/uaccess.h>
   
@@ -934,7 +865,7 @@
         int error;
         struct iattr newattrs;
   
-@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l
+@@ -108,7 +111,13 @@ int do_truncate(struct dentry *dentry, l
         down(&inode->i_sem);
         newattrs.ia_size = length;
         newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
@@ -943,14 +874,13 @@
  +              newattrs.ia_valid |= ATTR_FROM_OPEN;
  +      if (op->setattr_raw) {
  +              newattrs.ia_valid |= ATTR_RAW;
-+              newattrs.ia_ctime = CURRENT_TIME;
  +              error = op->setattr_raw(inode, &newattrs);
-+      } else 
++      } else
  +              error = notify_change(dentry, &newattrs);
         up(&inode->i_sem);
         return error;
   }
-@@ -118,12 +128,13 @@ static inline long do_sys_truncate(const
+@@ -118,12 +127,13 @@ static inline long do_sys_truncate(const
         struct nameidata nd;
         struct inode * inode;
         int error;
@@ -965,22 +895,22 @@
         if (error)
                 goto out;
         inode = nd.dentry->d_inode;
-@@ -163,11 +174,13 @@ static inline long do_sys_truncate(const
+@@ -163,11 +173,13 @@ static inline long do_sys_truncate(const
         error = locks_verify_truncate(inode, NULL, length);
         if (!error) {
                 DQUOT_INIT(inode);
  -              error = do_truncate(nd.dentry, length);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
  +              error = do_truncate(nd.dentry, length, 0);
         }
         put_write_access(inode);
   
   dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
         path_release(&nd);
   out:
         return error;
-@@ -215,7 +228,7 @@ static inline long do_sys_ftruncate(unsi
+@@ -215,7 +227,7 @@ static inline long do_sys_ftruncate(unsi
   
         error = locks_verify_truncate(inode, file, length);
         if (!error)
@@ -989,7 +919,7 @@
   out_putf:
         fput(file);
   out:
-@@ -260,11 +273,13 @@ asmlinkage long sys_utime(char * filenam
+@@ -260,11 +272,13 @@ asmlinkage long sys_utime(char * filenam
         struct inode * inode;
         struct iattr newattrs;
   
@@ -1004,7 +934,7 @@
         error = -EROFS;
         if (IS_RDONLY(inode))
                 goto dput_and_out;
-@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam
+@@ -279,11 +293,25 @@ asmlinkage long sys_utime(char * filenam
                         goto dput_and_out;
   
                 newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
@@ -1021,10 +951,6 @@
  +                      goto dput_and_out;
  +      }
  +
-+      error = -EROFS;
-+      if (IS_RDONLY(inode))
-+              goto dput_and_out;
-+
  +      error = -EPERM;
  +      if (!times) {
                 if (current->fsuid != inode->i_uid &&
@@ -1035,7 +961,7 @@
         error = notify_change(nd.dentry, &newattrs);
   dput_and_out:
         path_release(&nd);
-@@ -304,12 +337,14 @@ asmlinkage long sys_utimes(char * filena
+@@ -304,12 +332,14 @@ asmlinkage long sys_utimes(char * filena
         struct inode * inode;
         struct iattr newattrs;
   
@@ -1051,7 +977,7 @@
         error = -EROFS;
         if (IS_RDONLY(inode))
                 goto dput_and_out;
-@@ -324,7 +359,20 @@ asmlinkage long sys_utimes(char * filena
+@@ -324,7 +354,20 @@ asmlinkage long sys_utimes(char * filena
                 newattrs.ia_atime = times[0].tv_sec;
                 newattrs.ia_mtime = times[1].tv_sec;
                 newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
@@ -1073,7 +999,7 @@
                 if (current->fsuid != inode->i_uid &&
                     (error = permission(inode,MAY_WRITE)) != 0)
                         goto dput_and_out;
-@@ -347,6 +395,7 @@ asmlinkage long sys_access(const char * 
+@@ -347,6 +390,7 @@ asmlinkage long sys_access(const char * 
         int old_fsuid, old_fsgid;
         kernel_cap_t old_cap;
         int res;
@@ -1081,7 +1007,7 @@
   
         if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
                 return -EINVAL;
-@@ -364,13 +413,14 @@ asmlinkage long sys_access(const char * 
+@@ -364,13 +408,14 @@ asmlinkage long sys_access(const char * 
         else
                 current->cap_effective = current->cap_permitted;
   
@@ -1093,11 +1019,11 @@
                 if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
                    && !special_file(nd.dentry->d_inode->i_mode))
                         res = -EROFS;
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                 path_release(&nd);
         }
   
-@@ -385,8 +435,9 @@ asmlinkage long sys_chdir(const char * f
+@@ -385,8 +430,9 @@ asmlinkage long sys_chdir(const char * f
   {
         int error;
         struct nameidata nd;
@@ -1108,15 +1034,15 @@
         if (error)
                 goto out;
   
-@@ -397,6 +448,7 @@ asmlinkage long sys_chdir(const char * f
+@@ -397,6 +443,7 @@ asmlinkage long sys_chdir(const char * f
         set_fs_pwd(current->fs, nd.mnt, nd.dentry);
   
   dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
         path_release(&nd);
   out:
         return error;
-@@ -436,9 +488,10 @@ asmlinkage long sys_chroot(const char * 
+@@ -436,9 +483,10 @@ asmlinkage long sys_chroot(const char * 
   {
         int error;
         struct nameidata nd;
@@ -1129,15 +1055,15 @@
         if (error)
                 goto out;
   
-@@ -454,6 +507,7 @@ asmlinkage long sys_chroot(const char * 
+@@ -454,6 +502,7 @@ asmlinkage long sys_chroot(const char * 
         set_fs_altroot();
         error = 0;
   dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
         path_release(&nd);
   out:
         return error;
-@@ -508,6 +562,18 @@ asmlinkage long sys_chmod(const char * f
+@@ -508,6 +557,18 @@ asmlinkage long sys_chmod(const char * f
         if (IS_RDONLY(inode))
                 goto dput_and_out;
   
@@ -1156,7 +1082,7 @@
         error = -EPERM;
         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                 goto dput_and_out;
-@@ -538,6 +604,20 @@ static int chown_common(struct dentry * 
+@@ -538,6 +599,20 @@ static int chown_common(struct dentry * 
         error = -EROFS;
         if (IS_RDONLY(inode))
                 goto out;
@@ -1166,7 +1092,7 @@
  +
  +              newattrs.ia_uid = user;
  +              newattrs.ia_gid = group;
-+              newattrs.ia_valid = ATTR_UID | ATTR_GID;
++              newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
  +              newattrs.ia_valid |= ATTR_RAW;
  +              error = op->setattr_raw(inode, &newattrs);
  +              /* the file system wants to use normal vfs path now */
@@ -1177,15 +1103,19 @@
         error = -EPERM;
         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                 goto out;
-@@ -642,6 +722,7 @@ struct file *filp_open(const char * file
+@@ -642,8 +717,9 @@ struct file *filp_open(const char * file
   {
         int namei_flags, error;
         struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = flags };
-       
-       flags &= ~O_DIRECT;
+-      
+-      flags &= ~O_DIRECT;
++      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = flags };
++
++      //flags &= ~O_DIRECT;
   
-@@ -651,14 +732,15 @@ struct file *filp_open(const char * file
+       namei_flags = flags;
+       if ((namei_flags+1) & O_ACCMODE)
+@@ -651,14 +727,15 @@ struct file *filp_open(const char * file
         if (namei_flags & O_TRUNC)
                 namei_flags |= 2;
   
@@ -1206,19 +1136,27 @@
   {
         struct file * f;
         struct inode *inode;
-@@ -701,6 +783,7 @@ struct file *dentry_open(struct dentry *
+@@ -695,12 +772,15 @@ struct file *dentry_open(struct dentry *
+       }
+ 
+       if (f->f_op && f->f_op->open) {
++              f->f_it = it;
+               error = f->f_op->open(inode,f);
++              f->f_it = NULL;
+               if (error)
+                       goto cleanup_all;
         }
         f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
   
-+      intent_release(dentry, it);
++      intent_release(it);
         return f;
   
   cleanup_all:
-@@ -715,11 +798,17 @@ cleanup_all:
+@@ -715,11 +795,17 @@ cleanup_all:
   cleanup_file:
         put_filp(f);
   cleanup_dentry:
-+      intent_release(dentry, it);
++      intent_release(it);
         dput(dentry);
         mntput(mnt);
         return ERR_PTR(error);
@@ -1232,56 +1170,114 @@
   /*
    * Find an empty file descriptor entry, and mark it busy.
    */
---- linux-2.4.20-rh/fs/stat.c~vfs_intent-2.4.20-rh     2003-04-11 14:05:08.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/stat.c     2003-06-09 23:18:07.000000000 +0800
-@@ -110,11 +110,13 @@ static int do_getattr(struct vfsmount *m
- int vfs_stat(char *name, struct kstat *stat)
+--- linux-2.4.20/fs/stat.c~vfs_intent-2.4.20-rh        2003-07-17 08:33:05.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/stat.c     2003-07-17 08:51:33.000000000 -0700
+@@ -17,10 +17,12 @@
+  * Revalidate the inode. This is required for proper NFS attribute caching.
+  */
+ static __inline__ int
+-do_revalidate(struct dentry *dentry)
++do_revalidate(struct dentry *dentry, struct lookup_intent *it)
+ {
+       struct inode * inode = dentry->d_inode;
+-      if (inode->i_op && inode->i_op->revalidate)
++      if (inode->i_op && inode->i_op->revalidate_it)
++              return inode->i_op->revalidate_it(dentry, it);
++      else if (inode->i_op && inode->i_op->revalidate)
+               return inode->i_op->revalidate(dentry);
+       return 0;
+ }
+@@ -32,13 +34,13 @@ static inline nlink_t user_nlink(struct 
+       return inode->i_nlink;
+ }
+ 
+-static int do_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
++static int do_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat, struct lookup_intent *it)
+ {
+       int res = 0;
+       unsigned int blocks, indirect;
+       struct inode *inode = dentry->d_inode;
+ 
+-      res = do_revalidate(dentry);
++      res = do_revalidate(dentry, it);
+       if (res)
+               return res;
+ 
+@@ -111,10 +113,12 @@ int vfs_stat(char *name, struct kstat *s
   {
         struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
         int error;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
   
  -      error = user_path_walk(name, &nd);
-+      error = user_path_walk_it(name, &nd, &it);
++      error = user_path_walk_it(name, &nd, &it);
         if (!error) {
-               error = do_getattr(nd.mnt, nd.dentry, stat);
-+              intent_release(nd.dentry, &it);
+-              error = do_getattr(nd.mnt, nd.dentry, stat);
++              error = do_getattr(nd.mnt, nd.dentry, stat, &it);
++              intent_release(&it);
                 path_release(&nd);
         }
         return error;
-@@ -123,11 +125,13 @@ int vfs_stat(char *name, struct kstat *s
- int vfs_lstat(char *name, struct kstat *stat)
+@@ -124,10 +128,12 @@ int vfs_lstat(char *name, struct kstat *
   {
         struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
         int error;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
   
  -      error = user_path_walk_link(name, &nd);
-+      error = user_path_walk_link_it(name, &nd, &it);
++      error = user_path_walk_link_it(name, &nd, &it);
         if (!error) {
-               error = do_getattr(nd.mnt, nd.dentry, stat);
-+              intent_release(nd.dentry, &it);
+-              error = do_getattr(nd.mnt, nd.dentry, stat);
++              error = do_getattr(nd.mnt, nd.dentry, stat, &it);
++              intent_release(&it);
                 path_release(&nd);
         }
         return error;
---- linux-2.4.20-rh/include/linux/dcache.h~vfs_intent-2.4.20-rh        2003-04-12 15:46:39.000000000 +0800
-+++ linux-2.4.20-rh-root/include/linux/dcache.h        2003-06-09 23:18:07.000000000 +0800
-@@ -7,6 +7,28 @@
+@@ -139,7 +145,7 @@ int vfs_fstat(unsigned int fd, struct ks
+       int error = -EBADF;
+ 
+       if (f) {
+-              error = do_getattr(f->f_vfsmnt, f->f_dentry, stat);
++              error = do_getattr(f->f_vfsmnt, f->f_dentry, stat, NULL);
+               fput(f);
+       }
+       return error;
+@@ -286,7 +292,7 @@ asmlinkage long sys_readlink(const char 
+ 
+               error = -EINVAL;
+               if (inode->i_op && inode->i_op->readlink &&
+-                  !(error = do_revalidate(nd.dentry))) {
++                  !(error = do_revalidate(nd.dentry, NULL))) {
+                       UPDATE_ATIME(inode);
+                       error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
+               }
+--- linux-2.4.20/include/linux/dcache.h~vfs_intent-2.4.20-rh   2003-07-17 08:32:48.000000000 -0700
++++ linux-2.4.20-mmonroe/include/linux/dcache.h        2003-07-17 08:35:22.000000000 -0700
+@@ -6,6 +6,45 @@
+ #include <asm/atomic.h>
   #include <linux/mount.h>
   #include <linux/kernel.h>
- 
-+#define IT_OPEN     (1)
-+#define IT_CREAT    (1<<1)
-+#define IT_READDIR  (1<<2)
-+#define IT_GETATTR  (1<<3)
-+#define IT_LOOKUP   (1<<4)
-+#define IT_UNLINK   (1<<5)
++#include <linux/string.h>
++
++#define IT_OPEN     0x0001
++#define IT_CREAT    0x0002
++#define IT_READDIR  0x0004
++#define IT_GETATTR  0x0008
++#define IT_LOOKUP   0x0010
++#define IT_UNLINK   0x0020
++#define IT_GETXATTR 0x0040
++#define IT_EXEC     0x0080
++#define IT_PIN      0x0100
  +
-+#define IT_FL_LOCKED   (1)
-+#define IT_FL_FOLLOWED (1<<1) /* set by vfs_follow_link */
++#define IT_FL_LOCKED   0x0001
++#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */
++
++#define INTENT_MAGIC 0x19620323
  +
  +struct lookup_intent {
  +      int it_op;
++      void (*it_op_release)(struct lookup_intent *);
++      int it_magic;
  +      int it_mode;
  +      int it_flags;
  +      int it_disposition;
@@ -1292,34 +1288,41 @@
  +      void *it_data;
  +};
  +
++static inline void intent_init(struct lookup_intent *it, int op, int flags)
++{
++      memset(it, 0, sizeof(*it));
++      it->it_magic = INTENT_MAGIC;
++      it->it_op = op;
++      it->it_flags = flags;
++}
++
+ 
   /*
    * linux/include/linux/dcache.h
-  *
-@@ -82,6 +104,7 @@ struct dentry {
-       unsigned long d_time;           /* used by d_revalidate */
-       struct dentry_operations  *d_op;
-       struct super_block * d_sb;      /* The root of the dentry tree */
-+      struct lookup_intent *d_it;
-       unsigned long d_vfs_flags;
-       void * d_fsdata;                /* fs-specific data */
-       void * d_extra_attributes;      /* TUX-specific data */
-@@ -96,8 +119,15 @@ struct dentry_operations {
+@@ -96,8 +135,22 @@ struct dentry_operations {
         int (*d_delete)(struct dentry *);
         void (*d_release)(struct dentry *);
         void (*d_iput)(struct dentry *, struct inode *);
-+      int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *);
-+      void (*d_intent_release)(struct dentry *, struct lookup_intent *);
++      int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *);
++      void (*d_pin)(struct dentry *, struct vfsmount * , int);
++      void (*d_unpin)(struct dentry *, struct vfsmount *, int);
   };
   
++#define PIN(de,mnt,flag)  if (de->d_op && de->d_op->d_pin) \
++                              de->d_op->d_pin(de, mnt, flag);
++#define UNPIN(de,mnt,flag)  if (de->d_op && de->d_op->d_unpin) \
++                              de->d_op->d_unpin(de, mnt, flag);
++
++
  +/* defined in fs/namei.c */
-+extern void intent_release(struct dentry *de, struct lookup_intent *it);
++extern void intent_release(struct lookup_intent *it);
  +/* defined in fs/dcache.c */
  +extern void __d_rehash(struct dentry * entry, int lock);
  +
   /* the dentry parameter passed to d_hash and d_compare is the parent
    * directory of the entries to be compared. It is used in case these
    * functions need any directory specific information for determining
-@@ -129,6 +159,7 @@ d_iput:            no              no              yes
+@@ -129,6 +182,7 @@ d_iput:            no              no              yes
                                          * s_nfsd_free_path semaphore will be down
                                          */
   #define DCACHE_REFERENCED     0x0008  /* Recently used, don't discard. */
@@ -1327,26 +1330,27 @@
   
   extern spinlock_t dcache_lock;
   
---- linux-2.4.20-rh/include/linux/fs.h~vfs_intent-2.4.20-rh    2003-05-30 02:07:39.000000000 +0800
-+++ linux-2.4.20-rh-root/include/linux/fs.h    2003-06-09 23:18:07.000000000 +0800
-@@ -337,6 +337,8 @@ extern void set_bh_page(struct buffer_he
+--- linux-2.4.20/include/linux/fs.h~vfs_intent-2.4.20-rh       2003-07-17 08:34:44.000000000 -0700
++++ linux-2.4.20-mmonroe/include/linux/fs.h    2003-07-17 08:35:22.000000000 -0700
+@@ -337,6 +337,9 @@ extern void set_bh_page(struct buffer_he
   #define ATTR_MTIME_SET        256
   #define ATTR_FORCE    512     /* Not a change, but a change it */
   #define ATTR_ATTR_FLAG        1024
-+#define ATTR_RAW      2048    /* file system, not vfs will massage attrs */
-+#define ATTR_FROM_OPEN        4096    /* called from open path, ie O_TRUNC */
++#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
++#define ATTR_CTIME_SET 0x2000
   
   /*
    * This is the Inode Attributes structure, used for notify_change().  It
-@@ -574,6 +576,7 @@ struct file {
+@@ -574,6 +577,7 @@ struct file {
   
         /* needed for tty driver, and maybe others */
         void                    *private_data;
-+      struct lookup_intent    *f_intent;
++      struct lookup_intent    *f_it;
   
         /* preallocated helper kiobuf to speedup O_DIRECT */
         struct kiobuf           *f_iobuf;
-@@ -701,6 +704,7 @@ struct nameidata {
+@@ -701,6 +705,7 @@ struct nameidata {
         struct qstr last;
         unsigned int flags;
         int last_type;
@@ -1354,52 +1358,50 @@
   };
   
   /*
-@@ -821,7 +825,9 @@ extern int vfs_symlink(struct inode *, s
+@@ -821,7 +826,8 @@ extern int vfs_symlink(struct inode *, s
   extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
   extern int vfs_rmdir(struct inode *, struct dentry *);
   extern int vfs_unlink(struct inode *, struct dentry *);
  -extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
  +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-+              struct inode *new_dir, struct dentry *new_dentry,
-+              struct lookup_intent *it);
++             struct inode *new_dir, struct dentry *new_dentry);
   
   /*
    * File types
-@@ -882,20 +888,33 @@ struct file_operations {
+@@ -881,21 +887,32 @@ struct file_operations {
+ 
   struct inode_operations {
         int (*create) (struct inode *,struct dentry *,int);
++      int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *);
         struct dentry * (*lookup) (struct inode *,struct dentry *);
-+      struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
++      struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags);
         int (*link) (struct dentry *,struct inode *,struct dentry *);
-+      int (*link2) (struct inode *,struct inode *, const char *, int);
++      int (*link_raw) (struct nameidata *,struct nameidata *);
         int (*unlink) (struct inode *,struct dentry *);
-+      int (*unlink2) (struct inode *, const char *, int);
++      int (*unlink_raw) (struct nameidata *);
         int (*symlink) (struct inode *,struct dentry *,const char *);
-+      int (*symlink2) (struct inode *, const char *, int, const char *);
++      int (*symlink_raw) (struct nameidata *,const char *);
         int (*mkdir) (struct inode *,struct dentry *,int);
-+      int (*mkdir2) (struct inode *, const char *, int,int);
++      int (*mkdir_raw) (struct nameidata *,int);
         int (*rmdir) (struct inode *,struct dentry *);
-+      int (*rmdir2) (struct inode *, const char *, int);
++      int (*rmdir_raw) (struct nameidata *);
         int (*mknod) (struct inode *,struct dentry *,int,int);
-+      int (*mknod2) (struct inode *, const char *, int,int,int);
++      int (*mknod_raw) (struct nameidata *,int,dev_t);
         int (*rename) (struct inode *, struct dentry *,
                         struct inode *, struct dentry *);
-+      int (*rename2) (struct inode *, struct inode *,
-+                      const char *oldname, int oldlen,
-+                      const char *newname, int newlen);
++      int (*rename_raw) (struct nameidata *, struct nameidata *);
         int (*readlink) (struct dentry *, char *,int);
         int (*follow_link) (struct dentry *, struct nameidata *);
-+      int (*follow_link2) (struct dentry *, struct nameidata *,
-+                           struct lookup_intent *it);
         void (*truncate) (struct inode *);
         int (*permission) (struct inode *, int);
         int (*revalidate) (struct dentry *);
++      int (*revalidate_it) (struct dentry *, struct lookup_intent *);
         int (*setattr) (struct dentry *, struct iattr *);
-+      int (*setattr_raw) (struct inode *, struct iattr *);
++      int (*setattr_raw) (struct inode *, struct iattr *);
         int (*getattr) (struct dentry *, struct iattr *);
         int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
         ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
-@@ -1091,10 +1110,14 @@ static inline int get_lease(struct inode
+@@ -1091,10 +1108,14 @@ static inline int get_lease(struct inode
   
   asmlinkage long sys_open(const char *, int, int);
   asmlinkage long sys_close(unsigned int);      /* yes, it's really unsigned */
@@ -1415,7 +1417,7 @@
   extern int filp_close(struct file *, fl_owner_t id);
   extern char * getname(const char *);
   
-@@ -1385,6 +1408,7 @@ typedef int (*read_actor_t)(read_descrip
+@@ -1385,6 +1406,7 @@ typedef int (*read_actor_t)(read_descrip
   extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
   
   extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
@@ -1423,7 +1425,7 @@
   extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
   extern int FASTCALL(path_walk(const char *, struct nameidata *));
   extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
-@@ -1396,6 +1420,8 @@ extern struct dentry * lookup_one_len(co
+@@ -1396,6 +1418,8 @@ extern struct dentry * lookup_one_len(co
   extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
   #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
   #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
@@ -1432,7 +1434,7 @@
   
   extern void inode_init_once(struct inode *);
   extern void iput(struct inode *);
-@@ -1495,6 +1521,8 @@ extern struct file_operations generic_ro
+@@ -1497,6 +1521,8 @@ extern struct file_operations generic_ro
   
   extern int vfs_readlink(struct dentry *, char *, int, const char *);
   extern int vfs_follow_link(struct nameidata *, const char *);
@@ -1441,8 +1443,8 @@
   extern int page_readlink(struct dentry *, char *, int);
   extern int page_follow_link(struct dentry *, struct nameidata *);
   extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.20-rh/kernel/ksyms.c~vfs_intent-2.4.20-rh        2003-05-30 02:07:42.000000000 +0800
-+++ linux-2.4.20-rh-root/kernel/ksyms.c        2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/kernel/ksyms.c~vfs_intent-2.4.20-rh   2003-07-17 08:34:45.000000000 -0700
++++ linux-2.4.20-mmonroe/kernel/ksyms.c        2003-07-17 08:35:22.000000000 -0700
  @@ -298,6 +298,7 @@ EXPORT_SYMBOL(read_cache_page);
   EXPORT_SYMBOL(set_page_dirty);
   EXPORT_SYMBOL(vfs_readlink);
@@ -1451,17 +1453,16 @@
   EXPORT_SYMBOL(page_readlink);
   EXPORT_SYMBOL(page_follow_link);
   EXPORT_SYMBOL(page_symlink_inode_operations);
---- linux-2.4.20-rh/fs/exec.c~vfs_intent-2.4.20-rh     2003-04-13 10:07:02.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/exec.c     2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/exec.c~vfs_intent-2.4.20-rh        2003-07-17 08:33:09.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/exec.c     2003-07-17 08:35:22.000000000 -0700
  @@ -114,8 +114,9 @@ asmlinkage long sys_uselib(const char * 
         struct file * file;
         struct nameidata nd;
         int error;
--
++      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
+ 
  -      error = user_path_walk(library, &nd);
-+              struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
-+                                                                                                                                             
-+        error = user_path_walk_it(library, &nd, &it);
++      error = user_path_walk_it(library, &nd, &it);
         if (error)
                 goto out;
   
@@ -1470,8 +1471,8 @@
                 goto exit;
   
  -      file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
-+      file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);        
-+      intent_release(nd.dentry, &it);
++      file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++      intent_release(&it);
         error = PTR_ERR(file);
         if (IS_ERR(file))
                 goto out;
@@ -1479,32 +1480,32 @@
         struct inode *inode;
         struct file *file;
         int err = 0;
--
--      err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
  +      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
-+                                                                                                                                             
+ 
+-      err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
  +      err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
         file = ERR_PTR(err);
         if (!err) {
                 inode = nd.dentry->d_inode;
-@@ -395,7 +398,7 @@ struct file *open_exec(const char *name)
+@@ -395,7 +398,8 @@ struct file *open_exec(const char *name)
                                 err = -EACCES;
                         file = ERR_PTR(err);
                         if (!err) {
  -                              file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
-+                                file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++                              file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++                              intent_release(&it);
                                 if (!IS_ERR(file)) {
                                         err = deny_write_access(file);
                                         if (err) {
-@@ -404,6 +407,7 @@ struct file *open_exec(const char *name)
-                                       }
-                               }
- out:
-+                              intent_release(nd.dentry, &it);
+@@ -407,6 +411,7 @@ out:
                                 return file;
                         }
                 }
-@@ -1283,7 +1287,7 @@ int do_coredump(long signr, int exit_cod
++              intent_release(&it);
+               path_release(&nd);
+       }
+       goto out;
+@@ -1283,7 +1288,7 @@ int do_coredump(long signr, int exit_cod
                 goto close_fail;
         if (!file->f_op->write)
                 goto close_fail;
@@ -1513,15 +1514,15 @@
                 goto close_fail;
   
         retval = binfmt->core_dump(signr, regs, file);
---- linux-2.4.20-rh/fs/proc/base.c~vfs_intent-2.4.20-rh        2003-06-09 23:16:51.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/proc/base.c        2003-06-09 23:18:52.000000000 +0800
+--- linux-2.4.20/fs/proc/base.c~vfs_intent-2.4.20-rh   2003-07-17 08:33:05.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/proc/base.c        2003-07-17 08:35:22.000000000 -0700
  @@ -464,6 +464,9 @@ static int proc_pid_follow_link(struct d
   
         error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
         nd->last_type = LAST_BIND;
  +
-+        if (nd->it != NULL)
-+                nd->it->it_int_flags |= IT_FL_FOLLOWED;
++      if (nd->it != NULL)
++              nd->it->it_int_flags |= IT_FL_FOLLOWED;
   out:
         return error;
   }
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch

index 09bcb22..e522896 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch
@@ -1,17 +1,20 @@
- fs/dcache.c            |   20 ++
- fs/exec.c              |   15 +
- fs/namei.c             |  378 ++++++++++++++++++++++++++++++++++++++++++-------
- fs/nfsd/vfs.c          |    2 
- fs/open.c              |  126 ++++++++++++++--
- fs/proc/base.c         |    3 
- fs/stat.c              |   24 ++-
- include/linux/dcache.h |   31 ++++
- include/linux/fs.h     |   32 +++-
- kernel/ksyms.c         |    1 
- 10 files changed, 543 insertions(+), 89 deletions(-)
+ fs/dcache.c               |   19 ++
+ fs/exec.c                 |   15 +-
+ fs/namei.c                |  329 ++++++++++++++++++++++++++++++++++++++--------
+ fs/namespace.c            |   30 +++-
+ fs/open.c                 |  128 +++++++++++++++--
+ fs/proc/base.c            |    3 
+ fs/stat.c                 |   50 ++++--
+ include/linux/dcache.h    |   53 +++++++
+ include/linux/fs.h        |   29 +++-
+ include/linux/fs_struct.h |    4 
+ kernel/exit.c             |    3 
+ kernel/fork.c             |    3 
+ kernel/ksyms.c            |    1 
+ 13 files changed, 560 insertions(+), 107 deletions(-)
  
---- linux-2.4.20-l18/fs/exec.c~vfs_intent-2.4.20-vanilla       Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/exec.c    Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/fs/exec.c~vfs_intent-2.4.20-vanilla        2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/exec.c    2003-07-07 15:13:53.000000000 -0600
  @@ -107,8 +107,9 @@ asmlinkage long sys_uselib(const char * 
         struct file * file;
         struct nameidata nd;
@@ -29,7 +32,7 @@
   
  -      file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
  +      file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
         error = PTR_ERR(file);
         if (IS_ERR(file))
                 goto out;
@@ -50,7 +53,7 @@
                         if (!err) {
  -                              file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
  +                              file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
-+                                intent_release(nd.dentry, &it);
++                              intent_release(&it);
                                 if (!IS_ERR(file)) {
                                         err = deny_write_access(file);
                                         if (err) {
@@ -58,7 +61,7 @@
                                 return file;
                         }
                 }
-+                intent_release(nd.dentry, &it);
++              intent_release(&it);
                 path_release(&nd);
         }
         goto out;
@@ -71,8 +74,8 @@
                 goto close_fail;
   
         retval = binfmt->core_dump(signr, regs, file);
---- linux-2.4.20-l18/fs/dcache.c~vfs_intent-2.4.20-vanilla     Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/dcache.c  Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/fs/dcache.c~vfs_intent-2.4.20-vanilla      2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/dcache.c  2003-07-09 01:46:27.000000000 -0600
  @@ -181,6 +181,13 @@ int d_invalidate(struct dentry * dentry)
                 spin_unlock(&dcache_lock);
                 return 0;
@@ -87,15 +90,7 @@
         /*
          * Check whether to do a partial shrink_dcache
          * to get rid of unused child entries.
-@@ -616,6 +623,7 @@ struct dentry * d_alloc(struct dentry * 
-       dentry->d_op = NULL;
-       dentry->d_fsdata = NULL;
-       dentry->d_mounted = 0;
-+      dentry->d_it = NULL;
-       INIT_LIST_HEAD(&dentry->d_hash);
-       INIT_LIST_HEAD(&dentry->d_lru);
-       INIT_LIST_HEAD(&dentry->d_subdirs);
-@@ -830,13 +838,19 @@ void d_delete(struct dentry * dentry)
+@@ -830,13 +837,19 @@ void d_delete(struct dentry * dentry)
    * Adds a dentry to the hash according to its name.
    */
    
@@ -118,16 +113,133 @@
   }
   
   #define do_switch(x,y) do { \
---- linux-2.4.20-l18/fs/namei.c~vfs_intent-2.4.20-vanilla      Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/namei.c   Sun Jun  1 23:41:35 2003
+--- linux-2.4.20-ad/fs/namespace.c~vfs_intent-2.4.20-vanilla   2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/namespace.c       2003-07-07 15:13:53.000000000 -0600
+@@ -99,6 +99,7 @@ static void detach_mnt(struct vfsmount *
+ {
+       old_nd->dentry = mnt->mnt_mountpoint;
+       old_nd->mnt = mnt->mnt_parent;
++      UNPIN(old_nd->dentry, old_nd->mnt, 1);
+       mnt->mnt_parent = mnt;
+       mnt->mnt_mountpoint = mnt->mnt_root;
+       list_del_init(&mnt->mnt_child);
+@@ -110,6 +111,7 @@ static void attach_mnt(struct vfsmount *
+ {
+       mnt->mnt_parent = mntget(nd->mnt);
+       mnt->mnt_mountpoint = dget(nd->dentry);
++      PIN(nd->dentry, nd->mnt, 1);
+       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+       nd->dentry->d_mounted++;
+@@ -485,14 +487,17 @@ static int do_loopback(struct nameidata 
+ {
+       struct nameidata old_nd;
+       struct vfsmount *mnt = NULL;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int err = mount_is_safe(nd);
+       if (err)
+               return err;
+       if (!old_name || !*old_name)
+               return -EINVAL;
+-      err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
+-      if (err)
++      err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
++      if (err) {
++              intent_release(&it);
+               return err;
++      }
+ 
+       down_write(&current->namespace->sem);
+       err = -EINVAL;
+@@ -515,6 +520,7 @@ static int do_loopback(struct nameidata 
+       }
+ 
+       up_write(&current->namespace->sem);
++      intent_release(&it);
+       path_release(&old_nd);
+       return err;
+ }
+@@ -698,7 +704,8 @@ long do_mount(char * dev_name, char * di
+                 unsigned long flags, void *data_page)
+ {
+       struct nameidata nd;
+-      int retval = 0;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
++      int retval = 0;
+       int mnt_flags = 0;
+ 
+       /* Discard magic */
+@@ -722,10 +729,11 @@ long do_mount(char * dev_name, char * di
+       flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
+ 
+       /* ... and get the mountpoint */
+-      retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
+-      if (retval)
++      retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
++      if (retval) {
++              intent_release(&it);
+               return retval;
+-
++      }
+       if (flags & MS_REMOUNT)
+               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+                                   data_page);
+@@ -736,6 +744,8 @@ long do_mount(char * dev_name, char * di
+       else
+               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+                                     dev_name, data_page);
++
++      intent_release(&it);
+       path_release(&nd);
+       return retval;
+ }
+@@ -901,6 +911,8 @@ asmlinkage long sys_pivot_root(const cha
+ {
+       struct vfsmount *tmp;
+       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++      struct lookup_intent new_it = { .it_op = IT_GETATTR };
++      struct lookup_intent old_it = { .it_op = IT_GETATTR };
+       int error;
+ 
+       if (!capable(CAP_SYS_ADMIN))
+@@ -908,14 +920,14 @@ asmlinkage long sys_pivot_root(const cha
+ 
+       lock_kernel();
+ 
+-      error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
++      error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
+       if (error)
+               goto out0;
+       error = -EINVAL;
+       if (!check_mnt(new_nd.mnt))
+               goto out1;
+ 
+-      error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
++      error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
+       if (error)
+               goto out1;
+ 
+@@ -970,8 +982,10 @@ out2:
+       up(&old_nd.dentry->d_inode->i_zombie);
+       up_write(&current->namespace->sem);
+       path_release(&user_nd);
++      intent_release(&old_it);
+       path_release(&old_nd);
+ out1:
++      intent_release(&new_it);
+       path_release(&new_nd);
+ out0:
+       unlock_kernel();
+--- linux-2.4.20-ad/fs/namei.c~vfs_intent-2.4.20-vanilla       2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/namei.c   2003-07-08 13:53:48.000000000 -0600
  @@ -94,6 +94,13 @@
    * XEmacs seems to be relying on it...
    */
   
-+void intent_release(struct dentry *de, struct lookup_intent *it)
++void intent_release(struct lookup_intent *it)
  +{
-+      if (it && de->d_op && de->d_op->d_intent_release)
-+              de->d_op->d_intent_release(de, it);
++      if (it && it->it_op_release)
++              it->it_op_release(it);
  +
  +}
  +
@@ -144,8 +256,8 @@
   {
         struct dentry * dentry = d_lookup(parent, name);
   
-+      if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
-+              if (!dentry->d_op->d_revalidate2(dentry, flags, it) &&
++      if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
++              if (!dentry->d_op->d_revalidate_it(dentry, flags, it) &&
  +                  !d_invalidate(dentry)) {
  +                      dput(dentry);
  +                      dentry = NULL;
@@ -175,8 +287,8 @@
                 result = ERR_PTR(-ENOMEM);
                 if (dentry) {
                         lock_kernel();
-+                      if (dir->i_op->lookup2)
-+                              result = dir->i_op->lookup2(dir, dentry, it);
++                      if (dir->i_op->lookup_it)
++                              result = dir->i_op->lookup_it(dir, dentry, it, flags);
  +                      else
                         result = dir->i_op->lookup(dir, dentry);
                         unlock_kernel();
@@ -185,8 +297,8 @@
                         dput(result);
                         result = ERR_PTR(-ENOENT);
                 }
-+      } else if (result->d_op && result->d_op->d_revalidate2) {
-+              if (!result->d_op->d_revalidate2(result, flags, it) &&
++      } else if (result->d_op && result->d_op->d_revalidate_it) {
++              if (!result->d_op->d_revalidate_it(result, flags, it) &&
  +                  !d_invalidate(result)) {
  +                      dput(result);
  +                      goto again;
@@ -204,30 +316,27 @@
   {
         int err;
         if (current->link_count >= 5)
-@@ -346,10 +375,21 @@ static inline int do_follow_link(struct 
+@@ -346,10 +375,18 @@ static inline int do_follow_link(struct 
         current->link_count++;
         current->total_link_count++;
         UPDATE_ATIME(dentry->d_inode);
  -      err = dentry->d_inode->i_op->follow_link(dentry, nd);
-+        nd->it = it;
-+      if (dentry->d_inode->i_op->follow_link2)
-+              err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+        else
-+              err = dentry->d_inode->i_op->follow_link(dentry, nd);
-+        if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
-+                /* vfs_follow_link was never called */
-+              intent_release(dentry, it);
-+                path_release(nd);
-+                err = -ENOLINK;
-+        }
++      nd->it = it;
++      err = dentry->d_inode->i_op->follow_link(dentry, nd);
++      if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
++              /* vfs_follow_link was never called */
++              intent_release(it);
++              path_release(nd);
++              err = -ENOLINK;
++      }
         current->link_count--;
         return err;
   loop:
-+      intent_release(dentry, it);
++      intent_release(it);
         path_release(nd);
         return -ELOOP;
   }
-@@ -379,15 +419,26 @@ int follow_up(struct vfsmount **mnt, str
+@@ -379,15 +416,26 @@ int follow_up(struct vfsmount **mnt, str
         return __follow_up(mnt, dentry);
   }
   
@@ -247,7 +356,7 @@
  +                      opc = it->it_op;
  +                      mode = it->it_mode;
  +              }
-+              intent_release(*dentry, it);
++              intent_release(it);
  +              if (it) {
  +                      it->it_op = opc;
  +                      it->it_mode = mode;
@@ -255,7 +364,7 @@
                 dput(*dentry);
                 mntput(mounted->mnt_parent);
                 *dentry = dget(mounted->mnt_root);
-@@ -399,7 +450,7 @@ static inline int __follow_down(struct v
+@@ -399,7 +447,7 @@ static inline int __follow_down(struct v
   
   int follow_down(struct vfsmount **mnt, struct dentry **dentry)
   {
@@ -264,7 +373,7 @@
   }
    
   static inline void follow_dotdot(struct nameidata *nd)
-@@ -435,7 +486,7 @@ static inline void follow_dotdot(struct 
+@@ -435,7 +483,7 @@ static inline void follow_dotdot(struct 
                 mntput(nd->mnt);
                 nd->mnt = parent;
         }
@@ -273,7 +382,7 @@
                 ;
   }
   
-@@ -447,7 +498,8 @@ static inline void follow_dotdot(struct 
+@@ -447,7 +495,8 @@ static inline void follow_dotdot(struct 
    *
    * We expect 'base' to be positive and a directory.
    */
@@ -283,7 +392,7 @@
   {
         struct dentry *dentry;
         struct inode *inode;
-@@ -520,15 +572,15 @@ int link_path_walk(const char * name, st
+@@ -520,15 +569,15 @@ int link_path_walk(const char * name, st
                                 break;
                 }
                 /* This does the actual lookups.. */
@@ -302,27 +411,27 @@
                         ;
   
                 err = -ENOENT;
-@@ -539,8 +591,8 @@ int link_path_walk(const char * name, st
+@@ -539,8 +588,8 @@ int link_path_walk(const char * name, st
                 if (!inode->i_op)
                         goto out_dput;
   
  -              if (inode->i_op->follow_link) {
  -                      err = do_follow_link(dentry, nd);
-+              if (inode->i_op->follow_link || inode->i_op->follow_link2) {
++              if (inode->i_op->follow_link) {
  +                      err = do_follow_link(dentry, nd, NULL);
                         dput(dentry);
                         if (err)
                                 goto return_err;
-@@ -556,7 +608,7 @@ int link_path_walk(const char * name, st
+@@ -556,7 +605,7 @@ int link_path_walk(const char * name, st
                         nd->dentry = dentry;
                 }
                 err = -ENOTDIR; 
  -              if (!inode->i_op->lookup)
-+              if (!inode->i_op->lookup && !inode->i_op->lookup2)
++              if (!inode->i_op->lookup && !inode->i_op->lookup_it)
                         break;
                 continue;
                 /* here ends the main loop */
-@@ -583,19 +635,20 @@ last_component:
+@@ -583,19 +632,19 @@ last_component:
                         if (err < 0)
                                 break;
                 }
@@ -340,54 +449,52 @@
                         ;
                 inode = dentry->d_inode;
                 if ((lookup_flags & LOOKUP_FOLLOW)
--                  && inode && inode->i_op && inode->i_op->follow_link) {
+                   && inode && inode->i_op && inode->i_op->follow_link) {
  -                      err = do_follow_link(dentry, nd);
-+                  && inode && inode->i_op &&
-+                  (inode->i_op->follow_link || inode->i_op->follow_link2)) {
  +                      err = do_follow_link(dentry, nd, it);
                         dput(dentry);
                         if (err)
                                 goto return_err;
-@@ -609,7 +662,8 @@ last_component:
+@@ -609,7 +658,8 @@ last_component:
                         goto no_inode;
                 if (lookup_flags & LOOKUP_DIRECTORY) {
                         err = -ENOTDIR; 
  -                      if (!inode->i_op || !inode->i_op->lookup)
  +                      if (!inode->i_op ||
-+                          (!inode->i_op->lookup && !inode->i_op->lookup2))
++                          (!inode->i_op->lookup && !inode->i_op->lookup_it))
                                 break;
                 }
                 goto return_base;
-@@ -633,6 +687,23 @@ return_reval:
+@@ -633,6 +683,23 @@ return_reval:
                  * Check the cached dentry for staleness.
                  */
                 dentry = nd->dentry;
-+        revalidate_again:
-+              if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
++      revalidate_again:
++              if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
  +                      err = -ESTALE;
-+                      if (!dentry->d_op->d_revalidate2(dentry, 0, it)) {
-+                                struct dentry *new;
-+                                err = permission(dentry->d_parent->d_inode, 
-+                                                 MAY_EXEC);
-+                                if (err)
-+                                        break;
-+                                new = real_lookup(dentry->d_parent,
-+                                                  &dentry->d_name, 0, NULL);
++                      if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
++                              struct dentry *new;
++                              err = permission(dentry->d_parent->d_inode,
++                                               MAY_EXEC);
++                              if (err)
++                                      break;
++                              new = real_lookup(dentry->d_parent,
++                                                &dentry->d_name, 0, NULL);
  +                              d_invalidate(dentry);
-+                                dput(dentry);
-+                                dentry = new;
-+                                goto revalidate_again;
-+                        }
++                              dput(dentry);
++                              dentry = new;
++                              goto revalidate_again;
++                      }
  +              } else
                 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
                         err = -ESTALE;
                         if (!dentry->d_op->d_revalidate(dentry, 0)) {
-@@ -646,15 +717,28 @@ out_dput:
+@@ -646,15 +713,28 @@ out_dput:
                 dput(dentry);
                 break;
         }
  +      if (err)
-+              intent_release(nd->dentry, it);
++              intent_release(it);
         path_release(nd);
   return_err:
         return err;
@@ -412,7 +519,7 @@
   }
   
   /* SMP-safe */
-@@ -739,6 +823,17 @@ walk_init_root(const char *name, struct 
+@@ -739,6 +819,17 @@ walk_init_root(const char *name, struct 
   }
   
   /* SMP-safe */
@@ -430,15 +537,15 @@
   int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
   {
         int error = 0;
-@@ -753,6 +848,7 @@ int path_init(const char *name, unsigned
+@@ -753,6 +844,7 @@ int path_init(const char *name, unsigned
   {
         nd->last_type = LAST_ROOT; /* if there are only slashes... */
         nd->flags = flags;
-+        nd->it = NULL;
++      nd->it = NULL;
         if (*name=='/')
                 return walk_init_root(name,nd);
         read_lock(&current->fs->lock);
-@@ -767,7 +863,8 @@ int path_init(const char *name, unsigned
+@@ -767,7 +859,8 @@ int path_init(const char *name, unsigned
    * needs parent already locked. Doesn't follow mounts.
    * SMP-safe.
    */
@@ -448,7 +555,7 @@
   {
         struct dentry * dentry;
         struct inode *inode;
-@@ -790,13 +887,16 @@ struct dentry * lookup_hash(struct qstr 
+@@ -790,13 +883,16 @@ struct dentry * lookup_hash(struct qstr 
                         goto out;
         }
   
@@ -460,13 +567,13 @@
                 if (!new)
                         goto out;
                 lock_kernel();
-+              if (inode->i_op->lookup2)
-+                      dentry = inode->i_op->lookup2(inode, new, it);
++              if (inode->i_op->lookup_it)
++                      dentry = inode->i_op->lookup_it(inode, new, it, 0);
  +              else
                 dentry = inode->i_op->lookup(inode, new);
                 unlock_kernel();
                 if (!dentry)
-@@ -808,6 +908,12 @@ out:
+@@ -808,6 +904,12 @@ out:
         return dentry;
   }
   
@@ -479,7 +586,7 @@
   /* SMP-safe */
   struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
   {
-@@ -829,7 +935,7 @@ struct dentry * lookup_one_len(const cha
+@@ -829,7 +931,7 @@ struct dentry * lookup_one_len(const cha
         }
         this.hash = end_name_hash(hash);
   
@@ -488,7 +595,7 @@
   access:
         return ERR_PTR(-EACCES);
   }
-@@ -860,6 +966,23 @@ int __user_walk(const char *name, unsign
+@@ -860,6 +962,23 @@ int __user_walk(const char *name, unsign
         return err;
   }
   
@@ -512,7 +619,47 @@
   /*
    * It's inline, so penalty for filesystems that don't use sticky bit is
    * minimal.
-@@ -996,7 +1119,8 @@ exit_lock:
+@@ -955,7 +1074,8 @@ static inline int lookup_flags(unsigned 
+       return retval;
+ }
+ 
+-int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode,
++                       struct lookup_intent *it)
+ {
+       int error;
+ 
+@@ -968,12 +1088,15 @@ int vfs_create(struct inode *dir, struct
+               goto exit_lock;
+ 
+       error = -EACCES;        /* shouldn't it be ENOSYS? */
+-      if (!dir->i_op || !dir->i_op->create)
++      if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it))
+               goto exit_lock;
+ 
+       DQUOT_INIT(dir);
+       lock_kernel();
+-      error = dir->i_op->create(dir, dentry, mode);
++      if (dir->i_op->create_it)
++              error = dir->i_op->create_it(dir, dentry, mode, it);
++      else
++              error = dir->i_op->create(dir, dentry, mode);
+       unlock_kernel();
+ exit_lock:
+       up(&dir->i_zombie);
+@@ -982,6 +1105,11 @@ exit_lock:
+       return error;
+ }
+ 
++int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++{
++      return vfs_create_it(dir, dentry, mode, NULL);
++}
++
+ /*
+  *    open_namei()
+  *
+@@ -996,7 +1124,8 @@ exit_lock:
    * for symlinks (where the permissions are checked later).
    * SMP-safe
    */
@@ -522,7 +669,7 @@
   {
         int acc_mode, error = 0;
         struct inode *inode;
-@@ -1010,7 +1134,7 @@ int open_namei(const char * pathname, in
+@@ -1010,7 +1139,7 @@ int open_namei(const char * pathname, in
          * The simplest case - just a plain lookup.
          */
         if (!(flag & O_CREAT)) {
@@ -531,7 +678,7 @@
                 if (error)
                         return error;
                 dentry = nd->dentry;
-@@ -1020,6 +1144,10 @@ int open_namei(const char * pathname, in
+@@ -1020,6 +1149,10 @@ int open_namei(const char * pathname, in
         /*
          * Create - we need to know the parent.
          */
@@ -542,7 +689,7 @@
         error = path_lookup(pathname, LOOKUP_PARENT, nd);
         if (error)
                 return error;
-@@ -1035,7 +1163,7 @@ int open_namei(const char * pathname, in
+@@ -1035,7 +1168,7 @@ int open_namei(const char * pathname, in
   
         dir = nd->dentry;
         down(&dir->d_inode->i_sem);
@@ -551,15 +698,21 @@
   
   do_last:
         error = PTR_ERR(dentry);
-@@ -1044,6 +1172,7 @@ do_last:
+@@ -1044,10 +1177,11 @@ do_last:
                 goto exit;
         }
   
  +      it->it_mode = mode;
         /* Negative dentry, just create the file */
         if (!dentry->d_inode) {
-               error = vfs_create(dir->d_inode, dentry,
-@@ -1072,12 +1201,13 @@ do_last:
+-              error = vfs_create(dir->d_inode, dentry,
+-                                 mode & ~current->fs->umask);
++              error = vfs_create_it(dir->d_inode, dentry,
++                                 mode & ~current->fs->umask, it);
+               up(&dir->d_inode->i_sem);
+               dput(nd->dentry);
+               nd->dentry = dentry;
+@@ -1072,7 +1206,7 @@ do_last:
                 error = -ELOOP;
                 if (flag & O_NOFOLLOW)
                         goto exit_dput;
@@ -568,14 +721,7 @@
         }
         error = -ENOENT;
         if (!dentry->d_inode)
-               goto exit_dput;
--      if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
-+      if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link ||
-+                                    dentry->d_inode->i_op->follow_link2))
-               goto do_link;
- 
-       dput(nd->dentry);
-@@ -1151,7 +1281,7 @@ ok:
+@@ -1151,7 +1285,7 @@ ok:
                 if (!error) {
                         DQUOT_INIT(inode);
                         
@@ -584,39 +730,36 @@
                 }
                 put_write_access(inode);
                 if (error)
-@@ -1163,8 +1293,10 @@ ok:
+@@ -1163,8 +1297,10 @@ ok:
         return 0;
   
   exit_dput:
-+      intent_release(dentry, it);
++      intent_release(it);
         dput(dentry);
   exit:
-+      intent_release(nd->dentry, it);
++      intent_release(it);
         path_release(nd);
         return error;
   
-@@ -1183,7 +1315,19 @@ do_link:
+@@ -1183,7 +1319,16 @@ do_link:
          * are done. Procfs-like symlinks just set LAST_BIND.
          */
         UPDATE_ATIME(dentry->d_inode);
  -      error = dentry->d_inode->i_op->follow_link(dentry, nd);
-+        nd->it = it;
-+      if (dentry->d_inode->i_op->follow_link2)
-+              error = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+      else
-+              error = dentry->d_inode->i_op->follow_link(dentry, nd);
++      nd->it = it;
++      error = dentry->d_inode->i_op->follow_link(dentry, nd);
  +      if (error) {
-+              intent_release(dentry, it);
-+        } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
-+                /* vfs_follow_link was never called */
-+              intent_release(dentry, it);
-+                path_release(nd);
-+                error = -ENOLINK;
-+        }
++              intent_release(it);
++      } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
++              /* vfs_follow_link was never called */
++              intent_release(it);
++              path_release(nd);
++              error = -ENOLINK;
++      }
         dput(dentry);
         if (error)
                 return error;
-@@ -1205,13 +1349,20 @@ do_link:
+@@ -1205,13 +1350,20 @@ do_link:
         }
         dir = nd->dentry;
         down(&dir->d_inode->i_sem);
@@ -639,7 +782,7 @@
   {
         struct dentry *dentry;
   
-@@ -1219,7 +1370,7 @@ static struct dentry *lookup_create(stru
+@@ -1219,7 +1371,7 @@ static struct dentry *lookup_create(stru
         dentry = ERR_PTR(-EEXIST);
         if (nd->last_type != LAST_NORM)
                 goto fail;
@@ -648,18 +791,15 @@
         if (IS_ERR(dentry))
                 goto fail;
         if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1275,7 +1426,19 @@ asmlinkage long sys_mknod(const char * f
+@@ -1275,7 +1427,16 @@ asmlinkage long sys_mknod(const char * f
         error = path_lookup(tmp, LOOKUP_PARENT, &nd);
         if (error)
                 goto out;
  -      dentry = lookup_create(&nd, 0);
  +
-+      if (nd.dentry->d_inode->i_op->mknod2) {
++      if (nd.dentry->d_inode->i_op->mknod_raw) {
  +              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->mknod2(nd.dentry->d_inode,
-+                                 nd.last.name,
-+                                 nd.last.len,
-+                                 mode, dev);
++              error = op->mknod_raw(&nd, mode, dev);
  +              /* the file system wants to use normal vfs path now */
  +              if (error != -EOPNOTSUPP)
  +                      goto out2;
@@ -669,7 +809,7 @@
         error = PTR_ERR(dentry);
   
         mode &= ~current->fs->umask;
-@@ -1296,6 +1459,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1296,6 +1457,7 @@ asmlinkage long sys_mknod(const char * f
                 dput(dentry);
         }
         up(&nd.dentry->d_inode->i_sem);
@@ -677,17 +817,14 @@
         path_release(&nd);
   out:
         putname(tmp);
-@@ -1343,7 +1507,17 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1343,7 +1505,14 @@ asmlinkage long sys_mkdir(const char * p
                 error = path_lookup(tmp, LOOKUP_PARENT, &nd);
                 if (error)
                         goto out;
  -              dentry = lookup_create(&nd, 1);
-+              if (nd.dentry->d_inode->i_op->mkdir2) {
++              if (nd.dentry->d_inode->i_op->mkdir_raw) {
  +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->mkdir2(nd.dentry->d_inode,
-+                                         nd.last.name,
-+                                         nd.last.len,
-+                                         mode);
++                      error = op->mkdir_raw(&nd, mode);
  +                      /* the file system wants to use normal vfs path now */
  +                      if (error != -EOPNOTSUPP)
  +                              goto out2;
@@ -696,7 +833,7 @@
                 error = PTR_ERR(dentry);
                 if (!IS_ERR(dentry)) {
                         error = vfs_mkdir(nd.dentry->d_inode, dentry,
-@@ -1351,6 +1525,7 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1351,6 +1520,7 @@ asmlinkage long sys_mkdir(const char * p
                         dput(dentry);
                 }
                 up(&nd.dentry->d_inode->i_sem);
@@ -704,71 +841,49 @@
                 path_release(&nd);
   out:
                 putname(tmp);
-@@ -1451,8 +1626,33 @@ asmlinkage long sys_rmdir(const char * p
+@@ -1451,8 +1621,16 @@ asmlinkage long sys_rmdir(const char * p
                         error = -EBUSY;
                         goto exit1;
         }
-+      if (nd.dentry->d_inode->i_op->rmdir2) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              struct dentry *last;
-+
-+              down(&nd.dentry->d_inode->i_sem);
-+              last = lookup_hash_it(&nd.last, nd.dentry, NULL);
-+              up(&nd.dentry->d_inode->i_sem);
-+              if (IS_ERR(last)) {
-+                      error = PTR_ERR(last);
-+                      goto exit1;
-+              }
-+              if (d_mountpoint(last)) {
-+                      dput(last);
-+                      error = -EBUSY;
-+                      goto exit1;
-+              }
-+              dput(last);
++      if (nd.dentry->d_inode->i_op->rmdir_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
  +
-+              error = op->rmdir2(nd.dentry->d_inode,
-+                                 nd.last.name,
-+                                 nd.last.len);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++              error = op->rmdir_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
         down(&nd.dentry->d_inode->i_sem);
  -      dentry = lookup_hash(&nd.last, nd.dentry);
  +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
         error = PTR_ERR(dentry);
         if (!IS_ERR(dentry)) {
                 error = vfs_rmdir(nd.dentry->d_inode, dentry);
-@@ -1510,8 +1710,17 @@ asmlinkage long sys_unlink(const char * 
+@@ -1510,8 +1688,15 @@ asmlinkage long sys_unlink(const char * 
         error = -EISDIR;
         if (nd.last_type != LAST_NORM)
                 goto exit1;
-+      if (nd.dentry->d_inode->i_op->unlink2) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->unlink2(nd.dentry->d_inode,
-+                                  nd.last.name,
-+                                  nd.last.len);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++      if (nd.dentry->d_inode->i_op->unlink_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->unlink_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
         down(&nd.dentry->d_inode->i_sem);
  -      dentry = lookup_hash(&nd.last, nd.dentry);
  +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
         error = PTR_ERR(dentry);
         if (!IS_ERR(dentry)) {
                 /* Why not before? Because we want correct error value */
-@@ -1578,15 +1787,26 @@ asmlinkage long sys_symlink(const char *
+@@ -1578,15 +1763,23 @@ asmlinkage long sys_symlink(const char *
                 error = path_lookup(to, LOOKUP_PARENT, &nd);
                 if (error)
                         goto out;
  -              dentry = lookup_create(&nd, 0);
-+              if (nd.dentry->d_inode->i_op->symlink2) {
++              if (nd.dentry->d_inode->i_op->symlink_raw) {
  +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->symlink2(nd.dentry->d_inode,
-+                                           nd.last.name,
-+                                           nd.last.len,
-+                                           from);
++                      error = op->symlink_raw(&nd, from);
  +                      /* the file system wants to use normal vfs path now */
  +                      if (error != -EOPNOTSUPP)
  +                              goto out2;
@@ -787,17 +902,14 @@
                 putname(to);
         }
         putname(from);
-@@ -1662,7 +1882,17 @@ asmlinkage long sys_link(const char * ol
+@@ -1662,7 +1855,14 @@ asmlinkage long sys_link(const char * ol
                 error = -EXDEV;
                 if (old_nd.mnt != nd.mnt)
                         goto out_release;
  -              new_dentry = lookup_create(&nd, 0);
-+              if (nd.dentry->d_inode->i_op->link2) {
++              if (nd.dentry->d_inode->i_op->link_raw) {
  +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->link2(old_nd.dentry->d_inode,
-+                                        nd.dentry->d_inode,
-+                                        nd.last.name,
-+                                        nd.last.len);
++                      error = op->link_raw(&old_nd, &nd);
  +                      /* the file system wants to use normal vfs path now */
  +                      if (error != -EOPNOTSUPP)
  +                              goto out_release;
@@ -806,62 +918,37 @@
                 error = PTR_ERR(new_dentry);
                 if (!IS_ERR(new_dentry)) {
                         error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
-@@ -1706,7 +1936,8 @@ exit:
+@@ -1706,7 +1906,7 @@ exit:
    *       locking].
    */
   int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
  -             struct inode *new_dir, struct dentry *new_dentry)
-+                 struct inode *new_dir, struct dentry *new_dentry,
-+                 struct lookup_intent *it)
++                 struct inode *new_dir, struct dentry *new_dentry)
   {
         int error;
         struct inode *target;
-@@ -1764,6 +1995,7 @@ int vfs_rename_dir(struct inode *old_dir
-               error = -EBUSY;
-       else 
-               error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+      intent_release(new_dentry, it);
-       if (target) {
-               if (!error)
-                       target->i_flags |= S_DEAD;
-@@ -1785,7 +2017,8 @@ out_unlock:
+@@ -1785,7 +1985,7 @@ out_unlock:
   }
   
   int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
  -             struct inode *new_dir, struct dentry *new_dentry)
-+                   struct inode *new_dir, struct dentry *new_dentry,
-+                   struct lookup_intent *it)
++                   struct inode *new_dir, struct dentry *new_dentry)
   {
         int error;
   
-@@ -1816,6 +2049,7 @@ int vfs_rename_other(struct inode *old_d
-               error = -EBUSY;
-       else
-               error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+      intent_release(new_dentry, it);
-       double_up(&old_dir->i_zombie, &new_dir->i_zombie);
-       if (error)
-               return error;
-@@ -1827,13 +2061,14 @@ int vfs_rename_other(struct inode *old_d
- }
- 
- int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
--             struct inode *new_dir, struct dentry *new_dentry)
-+             struct inode *new_dir, struct dentry *new_dentry,
-+             struct lookup_intent *it)
- {
-       int error;
-       if (S_ISDIR(old_dentry->d_inode->i_mode))
--              error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
-+              error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it);
-       else
--              error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
-+              error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it);
-       if (!error) {
-               if (old_dir == new_dir)
-                       inode_dir_notify(old_dir, DN_RENAME);
-@@ -1875,7 +2110,7 @@ static inline int do_rename(const char *
+@@ -1873,9 +2073,18 @@ static inline int do_rename(const char *
+       if (newnd.last_type != LAST_NORM)
+               goto exit2;
   
++      if (old_dir->d_inode->i_op->rename_raw) {
++              lock_kernel();
++              error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++              unlock_kernel();
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit2;
++      }
++
         double_lock(new_dir, old_dir);
   
  -      old_dentry = lookup_hash(&oldnd.last, old_dir);
@@ -869,7 +956,7 @@
         error = PTR_ERR(old_dentry);
         if (IS_ERR(old_dentry))
                 goto exit3;
-@@ -1891,16 +2126,37 @@ static inline int do_rename(const char *
+@@ -1891,16 +2100,16 @@ static inline int do_rename(const char *
                 if (newnd.last.name[newnd.last.len])
                         goto exit4;
         }
@@ -879,38 +966,16 @@
         if (IS_ERR(new_dentry))
                 goto exit4;
   
-+      if (old_dir->d_inode->i_op->rename2) {
-+              lock_kernel();
-+              /* don't rename mount point. mds will take care of
-+               * the rest sanity checking */
-+              if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) {
-+                      error = -EBUSY;
-+                      goto exit5;
-+              }
-+
-+              error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
-+                                                      new_dir->d_inode,
-+                                                      oldnd.last.name,
-+                                                      oldnd.last.len,
-+                                                      newnd.last.name,
-+                                                      newnd.last.len);
-+              unlock_kernel();
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit5;
-+      }
  +
         lock_kernel();
         error = vfs_rename(old_dir->d_inode, old_dentry,
--                                 new_dir->d_inode, new_dentry);
-+                                 new_dir->d_inode, new_dentry, NULL);
+                                  new_dir->d_inode, new_dentry);
         unlock_kernel();
  -
-+exit5:
         dput(new_dentry);
   exit4:
         dput(old_dentry);
-@@ -1951,20 +2207,28 @@ out:
+@@ -1951,20 +2160,28 @@ out:
   }
   
   static inline int
@@ -923,12 +988,12 @@
         if (IS_ERR(link))
                 goto fail;
   
-+        if (it == NULL)
-+                it = nd->it;
-+        else if (it != nd->it)
-+                printk("it != nd->it: tell phil@clusterfs.com\n");
-+        if (it != NULL)
-+                it->it_int_flags |= IT_FL_FOLLOWED;
++      if (it == NULL)
++              it = nd->it;
++      else if (it != nd->it)
++              printk("it != nd->it: tell phil@clusterfs.com\n");
++      if (it != NULL)
++              it->it_int_flags |= IT_FL_FOLLOWED;
  +
         if (*link == '/') {
                 path_release(nd);
@@ -941,7 +1006,7 @@
   out:
         if (current->link_count || res || nd->last_type!=LAST_NORM)
                 return res;
-@@ -1986,7 +2250,13 @@ fail:
+@@ -1986,7 +2203,13 @@ fail:
   
   int vfs_follow_link(struct nameidata *nd, const char *link)
   {
@@ -956,7 +1021,7 @@
   }
   
   /* get the link contents into pagecache */
-@@ -2028,7 +2298,7 @@ int page_follow_link(struct dentry *dent
+@@ -2028,7 +2251,7 @@ int page_follow_link(struct dentry *dent
   {
         struct page *page = NULL;
         char *s = page_getlink(dentry, &page);
@@ -965,19 +1030,8 @@
         if (page) {
                 kunmap(page);
                 page_cache_release(page);
---- linux-2.4.20-l18/fs/nfsd/vfs.c~vfs_intent-2.4.20-vanilla   Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/nfsd/vfs.c        Wed May 28 01:39:18 2003
-@@ -1291,7 +1291,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
-                       err = nfserr_perm;
-       } else
- #endif
--      err = vfs_rename(fdir, odentry, tdir, ndentry);
-+      err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
-       if (!err && EX_ISSYNC(tfhp->fh_export)) {
-               nfsd_sync_dir(tdentry);
-               nfsd_sync_dir(fdentry);
---- linux-2.4.20-l18/fs/open.c~vfs_intent-2.4.20-vanilla       Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/open.c    Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/fs/open.c~vfs_intent-2.4.20-vanilla        2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/open.c    2003-07-08 13:51:14.000000000 -0600
  @@ -19,6 +19,8 @@
   #include <asm/uaccess.h>
   
@@ -999,7 +1053,7 @@
         int error;
         struct iattr newattrs;
   
-@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l
+@@ -108,7 +111,13 @@ int do_truncate(struct dentry *dentry, l
         down(&inode->i_sem);
         newattrs.ia_size = length;
         newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
@@ -1008,9 +1062,8 @@
  +              newattrs.ia_valid |= ATTR_FROM_OPEN;
  +      if (op->setattr_raw) {
  +              newattrs.ia_valid |= ATTR_RAW;
-+              newattrs.ia_ctime = CURRENT_TIME;
  +              error = op->setattr_raw(inode, &newattrs);
-+      } else 
++      } else
  +              error = notify_change(dentry, &newattrs);
         up(&inode->i_sem);
         return error;
@@ -1035,13 +1088,13 @@
         if (!error) {
                 DQUOT_INIT(inode);
  -              error = do_truncate(nd.dentry, length);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
  +              error = do_truncate(nd.dentry, length, 0);
         }
         put_write_access(inode);
   
   dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
         path_release(&nd);
   out:
         return error;
@@ -1069,7 +1122,7 @@
         error = -EROFS;
         if (IS_RDONLY(inode))
                 goto dput_and_out;
-@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam
+@@ -279,11 +294,25 @@ asmlinkage long sys_utime(char * filenam
                         goto dput_and_out;
   
                 newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
@@ -1086,10 +1139,6 @@
  +                      goto dput_and_out;
  +      }
  +
-+      error = -EROFS;
-+      if (IS_RDONLY(inode))
-+              goto dput_and_out;
-+
  +      error = -EPERM;
  +      if (!times) {
                 if (current->fsuid != inode->i_uid &&
@@ -1158,7 +1207,7 @@
                 if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
                    && !special_file(nd.dentry->d_inode->i_mode))
                         res = -EROFS;
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                 path_release(&nd);
         }
   
@@ -1177,7 +1226,7 @@
         set_fs_pwd(current->fs, nd.mnt, nd.dentry);
   
   dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
         path_release(&nd);
   out:
         return error;
@@ -1198,7 +1247,7 @@
         set_fs_altroot();
         error = 0;
   dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
         path_release(&nd);
   out:
         return error;
@@ -1231,7 +1280,7 @@
  +
  +              newattrs.ia_uid = user;
  +              newattrs.ia_gid = group;
-+              newattrs.ia_valid = ATTR_UID | ATTR_GID;
++              newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
  +              newattrs.ia_valid |= ATTR_RAW;
  +              error = op->setattr_raw(inode, &newattrs);
  +              /* the file system wants to use normal vfs path now */
@@ -1276,19 +1325,27 @@
   {
         struct file * f;
         struct inode *inode;
-@@ -699,6 +782,7 @@ struct file *dentry_open(struct dentry *
+@@ -693,12 +776,15 @@ struct file *dentry_open(struct dentry *
+       }
+ 
+       if (f->f_op && f->f_op->open) {
++              f->f_it = it;
+               error = f->f_op->open(inode,f);
++              f->f_it = NULL;
+               if (error)
+                       goto cleanup_all;
         }
         f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
   
-+      intent_release(dentry, it);
++      intent_release(it);
         return f;
   
   cleanup_all:
-@@ -713,11 +797,17 @@ cleanup_all:
+@@ -713,11 +799,17 @@ cleanup_all:
   cleanup_file:
         put_filp(f);
   cleanup_dentry:
-+      intent_release(dentry, it);
++      intent_release(it);
         dput(dentry);
         mntput(mnt);
         return ERR_PTR(error);
@@ -1302,9 +1359,24 @@
   /*
    * Find an empty file descriptor entry, and mark it busy.
    */
---- linux-2.4.20-l18/fs/stat.c~vfs_intent-2.4.20-vanilla       Thu Sep 13 19:04:43 2001
-+++ linux-2.4.20-l18-phil/fs/stat.c    Wed May 28 01:39:18 2003
-@@ -135,13 +135,15 @@ static int cp_new_stat(struct inode * in
+--- linux-2.4.20-ad/fs/stat.c~vfs_intent-2.4.20-vanilla        2001-09-13 17:04:43.000000000 -0600
++++ linux-2.4.20-ad-braam/fs/stat.c    2003-07-07 15:13:53.000000000 -0600
+@@ -17,10 +17,12 @@
+  * Revalidate the inode. This is required for proper NFS attribute caching.
+  */
+ static __inline__ int
+-do_revalidate(struct dentry *dentry)
++do_revalidate(struct dentry *dentry, struct lookup_intent *it)
+ {
+       struct inode * inode = dentry->d_inode;
+-      if (inode->i_op && inode->i_op->revalidate)
++      if (inode->i_op && inode->i_op->revalidate_it)
++              return inode->i_op->revalidate_it(dentry, it);
++      else if (inode->i_op && inode->i_op->revalidate)
+               return inode->i_op->revalidate(dentry);
+       return 0;
+ }
+@@ -135,13 +137,15 @@ static int cp_new_stat(struct inode * in
   asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf)
   {
         struct nameidata nd;
@@ -1314,14 +1386,15 @@
  -      error = user_path_walk(filename, &nd);
  +      error = user_path_walk_it(filename, &nd, &it);
         if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                 if (!error)
                         error = cp_old_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                 path_release(&nd);
         }
         return error;
-@@ -151,13 +153,15 @@ asmlinkage long sys_stat(char * filename
+@@ -151,13 +155,15 @@ asmlinkage long sys_stat(char * filename
   asmlinkage long sys_newstat(char * filename, struct stat * statbuf)
   {
         struct nameidata nd;
@@ -1331,14 +1404,15 @@
  -      error = user_path_walk(filename, &nd);
  +      error = user_path_walk_it(filename, &nd, &it);
         if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                 if (!error)
                         error = cp_new_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                 path_release(&nd);
         }
         return error;
-@@ -172,13 +176,15 @@ asmlinkage long sys_newstat(char * filen
+@@ -172,13 +178,15 @@ asmlinkage long sys_newstat(char * filen
   asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf)
   {
         struct nameidata nd;
@@ -1348,14 +1422,15 @@
  -      error = user_path_walk_link(filename, &nd);
  +      error = user_path_walk_link_it(filename, &nd, &it);
         if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                 if (!error)
                         error = cp_old_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                 path_release(&nd);
         }
         return error;
-@@ -189,13 +195,15 @@ asmlinkage long sys_lstat(char * filenam
+@@ -189,13 +197,15 @@ asmlinkage long sys_lstat(char * filenam
   asmlinkage long sys_newlstat(char * filename, struct stat * statbuf)
   {
         struct nameidata nd;
@@ -1365,14 +1440,42 @@
  -      error = user_path_walk_link(filename, &nd);
  +      error = user_path_walk_link_it(filename, &nd, &it);
         if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                 if (!error)
                         error = cp_new_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                 path_release(&nd);
         }
         return error;
-@@ -333,12 +341,14 @@ asmlinkage long sys_stat64(char * filena
+@@ -216,7 +226,7 @@ asmlinkage long sys_fstat(unsigned int f
+       if (f) {
+               struct dentry * dentry = f->f_dentry;
+ 
+-              err = do_revalidate(dentry);
++              err = do_revalidate(dentry, NULL);
+               if (!err)
+                       err = cp_old_stat(dentry->d_inode, statbuf);
+               fput(f);
+@@ -235,7 +245,7 @@ asmlinkage long sys_newfstat(unsigned in
+       if (f) {
+               struct dentry * dentry = f->f_dentry;
+ 
+-              err = do_revalidate(dentry);
++              err = do_revalidate(dentry, NULL);
+               if (!err)
+                       err = cp_new_stat(dentry->d_inode, statbuf);
+               fput(f);
+@@ -257,7 +267,7 @@ asmlinkage long sys_readlink(const char 
+ 
+               error = -EINVAL;
+               if (inode->i_op && inode->i_op->readlink &&
+-                  !(error = do_revalidate(nd.dentry))) {
++                  !(error = do_revalidate(nd.dentry, NULL))) {
+                       UPDATE_ATIME(inode);
+                       error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
+               }
+@@ -333,12 +343,14 @@ asmlinkage long sys_stat64(char * filena
   {
         struct nameidata nd;
         int error;
@@ -1381,14 +1484,15 @@
  -      error = user_path_walk(filename, &nd);
  +      error = user_path_walk_it(filename, &nd, &it);
         if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                 if (!error)
                         error = cp_new_stat64(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                 path_release(&nd);
         }
         return error;
-@@ -348,12 +358,14 @@ asmlinkage long sys_lstat64(char * filen
+@@ -348,12 +360,14 @@ asmlinkage long sys_lstat64(char * filen
   {
         struct nameidata nd;
         int error;
@@ -1397,43 +1501,60 @@
  -      error = user_path_walk_link(filename, &nd);
  +      error = user_path_walk_link_it(filename, &nd, &it);
         if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                 if (!error)
                         error = cp_new_stat64(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                 path_release(&nd);
         }
         return error;
---- linux-2.4.20-l18/fs/proc/base.c~vfs_intent-2.4.20-vanilla  Wed Jun  4 22:53:14 2003
-+++ linux-2.4.20-l18-phil/fs/proc/base.c       Wed Jun  4 22:50:35 2003
+@@ -368,7 +382,7 @@ asmlinkage long sys_fstat64(unsigned lon
+       if (f) {
+               struct dentry * dentry = f->f_dentry;
+ 
+-              err = do_revalidate(dentry);
++              err = do_revalidate(dentry, NULL);
+               if (!err)
+                       err = cp_new_stat64(dentry->d_inode, statbuf);
+               fput(f);
+--- linux-2.4.20-ad/fs/proc/base.c~vfs_intent-2.4.20-vanilla   2002-08-02 18:39:45.000000000 -0600
++++ linux-2.4.20-ad-braam/fs/proc/base.c       2003-07-07 15:13:53.000000000 -0600
  @@ -464,6 +464,9 @@ static int proc_pid_follow_link(struct d
   
         error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
         nd->last_type = LAST_BIND;
  +
-+        if (nd->it != NULL)
-+                nd->it->it_int_flags |= IT_FL_FOLLOWED;
++      if (nd->it != NULL)
++              nd->it->it_int_flags |= IT_FL_FOLLOWED;
   out:
         return error;
   }
---- linux-2.4.20-l18/include/linux/dcache.h~vfs_intent-2.4.20-vanilla  Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/include/linux/dcache.h       Sun Jun  1 22:35:10 2003
-@@ -7,6 +7,28 @@
+--- linux-2.4.20-ad/include/linux/dcache.h~vfs_intent-2.4.20-vanilla   2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/include/linux/dcache.h       2003-07-09 01:40:11.000000000 -0600
+@@ -7,6 +7,44 @@
   #include <linux/mount.h>
   #include <linux/kernel.h>
   
-+#define IT_OPEN     (1)
-+#define IT_CREAT    (1<<1)
-+#define IT_READDIR  (1<<2)
-+#define IT_GETATTR  (1<<3)
-+#define IT_LOOKUP   (1<<4)
-+#define IT_UNLINK   (1<<5)
++#define IT_OPEN     0x0001
++#define IT_CREAT    0x0002
++#define IT_READDIR  0x0004
++#define IT_GETATTR  0x0008
++#define IT_LOOKUP   0x0010
++#define IT_UNLINK   0x0020
++#define IT_GETXATTR 0x0040
++#define IT_EXEC     0x0080
++#define IT_PIN      0x0100
++
++#define IT_FL_LOCKED   0x0001
++#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */
  +
-+#define IT_FL_LOCKED   (1)
-+#define IT_FL_FOLLOWED (1<<1) /* set by vfs_follow_link */
++#define INTENT_MAGIC 0x19620323
  +
  +struct lookup_intent {
  +      int it_op;
++      void (*it_op_release)(struct lookup_intent *);
++      int it_magic;
  +      int it_mode;
  +      int it_flags;
  +      int it_disposition;
@@ -1444,34 +1565,42 @@
  +      void *it_data;
  +};
  +
++static inline void intent_init(struct lookup_intent *it, int op, int flags)
++{
++      memset(it, 0, sizeof(*it));
++      it->it_magic = INTENT_MAGIC;
++      it->it_op = op;
++      it->it_flags = flags;
++}
++
++
   /*
    * linux/include/linux/dcache.h
    *
-@@ -79,6 +101,7 @@ struct dentry {
-       unsigned long d_time;           /* used by d_revalidate */
-       struct dentry_operations  *d_op;
-       struct super_block * d_sb;      /* The root of the dentry tree */
-+      struct lookup_intent *d_it;
-       unsigned long d_vfs_flags;
-       void * d_fsdata;                /* fs-specific data */
-       unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
-@@ -91,8 +114,15 @@ struct dentry_operations {
+@@ -91,8 +129,22 @@ struct dentry_operations {
         int (*d_delete)(struct dentry *);
         void (*d_release)(struct dentry *);
         void (*d_iput)(struct dentry *, struct inode *);
-+      int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *);
-+      void (*d_intent_release)(struct dentry *, struct lookup_intent *);
++      int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *);
++      void (*d_pin)(struct dentry *, struct vfsmount * , int);
++      void (*d_unpin)(struct dentry *, struct vfsmount *, int);
   };
   
++#define PIN(de,mnt,flag)  if (de->d_op && de->d_op->d_pin) \
++                              de->d_op->d_pin(de, mnt, flag);
++#define UNPIN(de,mnt,flag)  if (de->d_op && de->d_op->d_unpin) \
++                              de->d_op->d_unpin(de, mnt, flag);
++
++
  +/* defined in fs/namei.c */
-+extern void intent_release(struct dentry *de, struct lookup_intent *it);
++extern void intent_release(struct lookup_intent *it);
  +/* defined in fs/dcache.c */
  +extern void __d_rehash(struct dentry * entry, int lock);
  +
   /* the dentry parameter passed to d_hash and d_compare is the parent
    * directory of the entries to be compared. It is used in case these
    * functions need any directory specific information for determining
-@@ -124,6 +154,7 @@ d_iput:            no              no              yes
+@@ -124,6 +176,7 @@ d_iput:            no              no              yes
                                          * s_nfsd_free_path semaphore will be down
                                          */
   #define DCACHE_REFERENCED     0x0008  /* Recently used, don't discard. */
@@ -1479,14 +1608,15 @@
   
   extern spinlock_t dcache_lock;
   
---- linux-2.4.20-l18/include/linux/fs.h~vfs_intent-2.4.20-vanilla      Wed May 28 01:39:17 2003
-+++ linux-2.4.20-l18-phil/include/linux/fs.h   Sun Jun  1 22:07:11 2003
-@@ -338,6 +338,8 @@ extern void set_bh_page(struct buffer_he
+--- linux-2.4.20/include/linux/fs.h~vfs_intent-2.4.20-vanilla  2003-06-12 03:24:59.000000000 -0600
++++ linux-2.4.20-braam/include/linux/fs.h      2003-06-12 03:25:00.000000000 -0600
+@@ -338,6 +338,9 @@ extern void set_bh_page(struct buffer_he
   #define ATTR_MTIME_SET        256
   #define ATTR_FORCE    512     /* Not a change, but a change it */
   #define ATTR_ATTR_FLAG        1024
-+#define ATTR_RAW      2048    /* file system, not vfs will massage attrs */
-+#define ATTR_FROM_OPEN        4096    /* called from open path, ie O_TRUNC */
++#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
++#define ATTR_CTIME_SET 0x2000
   
   /*
    * This is the Inode Attributes structure, used for notify_change().  It
@@ -1494,7 +1624,7 @@
   
         /* needed for tty driver, and maybe others */
         void                    *private_data;
-+      struct lookup_intent    *f_intent;
++      struct lookup_intent    *f_it;
   
         /* preallocated helper kiobuf to speedup O_DIRECT */
         struct kiobuf           *f_iobuf;
@@ -1502,56 +1632,54 @@
         struct qstr last;
         unsigned int flags;
         int last_type;
-+        struct lookup_intent *it;
++      struct lookup_intent *it;
   };
   
   #define DQUOT_USR_ENABLED     0x01            /* User diskquotas enabled */
-@@ -794,7 +798,9 @@ extern int vfs_symlink(struct inode *, s
+@@ -794,7 +798,8 @@ extern int vfs_symlink(struct inode *, s
   extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
   extern int vfs_rmdir(struct inode *, struct dentry *);
   extern int vfs_unlink(struct inode *, struct dentry *);
  -extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
  +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-+              struct inode *new_dir, struct dentry *new_dentry,
-+              struct lookup_intent *it);
++             struct inode *new_dir, struct dentry *new_dentry);
   
   /*
    * File types
-@@ -855,20 +861,33 @@ struct file_operations {
+@@ -854,21 +859,32 @@ struct file_operations {
+ 
   struct inode_operations {
         int (*create) (struct inode *,struct dentry *,int);
++      int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *);
         struct dentry * (*lookup) (struct inode *,struct dentry *);
-+      struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
++      struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags);
         int (*link) (struct dentry *,struct inode *,struct dentry *);
-+      int (*link2) (struct inode *,struct inode *, const char *, int);
++      int (*link_raw) (struct nameidata *,struct nameidata *);
         int (*unlink) (struct inode *,struct dentry *);
-+      int (*unlink2) (struct inode *, const char *, int);
++      int (*unlink_raw) (struct nameidata *);
         int (*symlink) (struct inode *,struct dentry *,const char *);
-+      int (*symlink2) (struct inode *, const char *, int, const char *);
++      int (*symlink_raw) (struct nameidata *,const char *);
         int (*mkdir) (struct inode *,struct dentry *,int);
-+      int (*mkdir2) (struct inode *, const char *, int,int);
++      int (*mkdir_raw) (struct nameidata *,int);
         int (*rmdir) (struct inode *,struct dentry *);
-+      int (*rmdir2) (struct inode *, const char *, int);
++      int (*rmdir_raw) (struct nameidata *);
         int (*mknod) (struct inode *,struct dentry *,int,int);
-+      int (*mknod2) (struct inode *, const char *, int,int,int);
++      int (*mknod_raw) (struct nameidata *,int,dev_t);
         int (*rename) (struct inode *, struct dentry *,
                         struct inode *, struct dentry *);
-+      int (*rename2) (struct inode *, struct inode *,
-+                      const char *oldname, int oldlen,
-+                      const char *newname, int newlen);
++      int (*rename_raw) (struct nameidata *, struct nameidata *);
         int (*readlink) (struct dentry *, char *,int);
         int (*follow_link) (struct dentry *, struct nameidata *);
-+      int (*follow_link2) (struct dentry *, struct nameidata *,
-+                           struct lookup_intent *it);
         void (*truncate) (struct inode *);
         int (*permission) (struct inode *, int);
         int (*revalidate) (struct dentry *);
++      int (*revalidate_it) (struct dentry *, struct lookup_intent *);
         int (*setattr) (struct dentry *, struct iattr *);
-+      int (*setattr_raw) (struct inode *, struct iattr *);
++      int (*setattr_raw) (struct inode *, struct iattr *);
         int (*getattr) (struct dentry *, struct iattr *);
         int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
         ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
-@@ -1070,10 +1089,14 @@ static inline int get_lease(struct inode
+@@ -1070,10 +1086,14 @@ static inline int get_lease(struct inode
   
   asmlinkage long sys_open(const char *, int, int);
   asmlinkage long sys_close(unsigned int);      /* yes, it's really unsigned */
@@ -1567,7 +1695,7 @@
   extern int filp_close(struct file *, fl_owner_t id);
   extern char * getname(const char *);
   
-@@ -1335,6 +1358,7 @@ typedef int (*read_actor_t)(read_descrip
+@@ -1335,6 +1355,7 @@ typedef int (*read_actor_t)(read_descrip
   extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
   
   extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
@@ -1575,7 +1703,7 @@
   extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
   extern int FASTCALL(path_walk(const char *, struct nameidata *));
   extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
-@@ -1346,6 +1370,8 @@ extern struct dentry * lookup_one_len(co
+@@ -1346,6 +1367,8 @@ extern struct dentry * lookup_one_len(co
   extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
   #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
   #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
@@ -1584,7 +1712,7 @@
   
   extern void iput(struct inode *);
   extern void force_delete(struct inode *);
-@@ -1455,6 +1481,8 @@ extern struct file_operations generic_ro
+@@ -1455,6 +1478,8 @@ extern struct file_operations generic_ro
   
   extern int vfs_readlink(struct dentry *, char *, int, const char *);
   extern int vfs_follow_link(struct nameidata *, const char *);
@@ -1593,8 +1721,36 @@
   extern int page_readlink(struct dentry *, char *, int);
   extern int page_follow_link(struct dentry *, struct nameidata *);
   extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.20-l18/kernel/ksyms.c~vfs_intent-2.4.20-vanilla  Wed May 28 01:39:18 2003
-+++ linux-2.4.20-l18-phil/kernel/ksyms.c       Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/include/linux/fs_struct.h~vfs_intent-2.4.20-vanilla        2001-07-13 16:10:44.000000000 -0600
++++ linux-2.4.20-ad-braam/include/linux/fs_struct.h    2003-07-07 15:13:53.000000000 -0600
+@@ -34,10 +34,12 @@ static inline void set_fs_root(struct fs
+       write_lock(&fs->lock);
+       old_root = fs->root;
+       old_rootmnt = fs->rootmnt;
++      PIN(dentry, mnt, 1);
+       fs->rootmnt = mntget(mnt);
+       fs->root = dget(dentry);
+       write_unlock(&fs->lock);
+       if (old_root) {
++              UNPIN(old_root, old_rootmnt, 1);
+               dput(old_root);
+               mntput(old_rootmnt);
+       }
+@@ -57,10 +59,12 @@ static inline void set_fs_pwd(struct fs_
+       write_lock(&fs->lock);
+       old_pwd = fs->pwd;
+       old_pwdmnt = fs->pwdmnt;
++      PIN(dentry, mnt, 0);
+       fs->pwdmnt = mntget(mnt);
+       fs->pwd = dget(dentry);
+       write_unlock(&fs->lock);
+       if (old_pwd) {
++              UNPIN(old_pwd, old_pwdmnt, 0);
+               dput(old_pwd);
+               mntput(old_pwdmnt);
+       }
+--- linux-2.4.20-ad/kernel/ksyms.c~vfs_intent-2.4.20-vanilla   2003-07-07 15:13:52.000000000 -0600
++++ linux-2.4.20-ad-braam/kernel/ksyms.c       2003-07-07 15:13:53.000000000 -0600
  @@ -269,6 +269,7 @@ EXPORT_SYMBOL(read_cache_page);
   EXPORT_SYMBOL(set_page_dirty);
   EXPORT_SYMBOL(vfs_readlink);
@@ -1603,5 +1759,38 @@
   EXPORT_SYMBOL(page_readlink);
   EXPORT_SYMBOL(page_follow_link);
   EXPORT_SYMBOL(page_symlink_inode_operations);
+--- linux-2.4.20-ad/kernel/fork.c~vfs_intent-2.4.20-vanilla    2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/kernel/fork.c        2003-07-07 15:13:53.000000000 -0600
+@@ -384,10 +384,13 @@ static inline struct fs_struct *__copy_f
+               fs->umask = old->umask;
+               read_lock(&old->lock);
+               fs->rootmnt = mntget(old->rootmnt);
++              PIN(old->pwd, old->pwdmnt, 0);
++              PIN(old->root, old->rootmnt, 1);
+               fs->root = dget(old->root);
+               fs->pwdmnt = mntget(old->pwdmnt);
+               fs->pwd = dget(old->pwd);
+               if (old->altroot) {
++                      PIN(old->altroot, old->altrootmnt, 1);
+                       fs->altrootmnt = mntget(old->altrootmnt);
+                       fs->altroot = dget(old->altroot);
+               } else {
+--- linux-2.4.20-ad/kernel/exit.c~vfs_intent-2.4.20-vanilla    2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/kernel/exit.c        2003-07-07 15:13:53.000000000 -0600
+@@ -238,11 +238,14 @@ static inline void __put_fs_struct(struc
+ {
+       /* No need to hold fs->lock if we are killing it */
+       if (atomic_dec_and_test(&fs->count)) {
++              UNPIN(fs->pwd, fs->pwdmnt, 0);
++              UNPIN(fs->root, fs->rootmnt, 1);
+               dput(fs->root);
+               mntput(fs->rootmnt);
+               dput(fs->pwd);
+               mntput(fs->pwdmnt);
+               if (fs->altroot) {
++                      UNPIN(fs->altroot, fs->altrootmnt, 1);
+                       dput(fs->altroot);
+                       mntput(fs->altrootmnt);
+               }
  
  _
diff --git a/lustre/kernel_patches/pc/ext3-delete_thread-2.4.18.pc b/lustre/kernel_patches/pc/ext3-delete_thread-2.4.18.pc

index 5770132..1afa4d4 100644 (file)
--- a/lustre/kernel_patches/pc/ext3-delete_thread-2.4.18.pc
+++ b/lustre/kernel_patches/pc/ext3-delete_thread-2.4.18.pc
@@ -1,3 +1,5 @@
  fs/ext3/super.c
+fs/ext3/file.c
+fs/ext3/inode.c
  include/linux/ext3_fs.h
  include/linux/ext3_fs_sb.h
diff --git a/lustre/kernel_patches/pc/ext3-delete_thread-2.4.20.pc b/lustre/kernel_patches/pc/ext3-delete_thread-2.4.20.pc

index 5770132..a2c3109 100644 (file)
--- a/lustre/kernel_patches/pc/ext3-delete_thread-2.4.20.pc
+++ b/lustre/kernel_patches/pc/ext3-delete_thread-2.4.20.pc
@@ -1,3 +1,5 @@
  fs/ext3/super.c
+fs/ext3/inode.c
+fs/ext3/file.c
  include/linux/ext3_fs.h
  include/linux/ext3_fs_sb.h
diff --git a/lustre/kernel_patches/pc/extN-wantedi.pc b/lustre/kernel_patches/pc/extN-wantedi.pc

index 31901ee..6ad2589 100644 (file)
--- a/lustre/kernel_patches/pc/extN-wantedi.pc
+++ b/lustre/kernel_patches/pc/extN-wantedi.pc
@@ -1,4 +1,5 @@
  fs/ext3/namei.c
  fs/ext3/ialloc.c
+fs/ext3/inode.c
  fs/ext3/ioctl.c
  include/linux/ext3_fs.h
diff --git a/lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc b/lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc

index b647d5a..6c80106 100644 (file)
--- a/lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc
+++ b/lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc
@@ -2,6 +2,7 @@ fs/ext3/ialloc.c
  fs/ext3/inode.c
  fs/ext3/namei.c
  fs/ext3/super.c
+fs/ext3/ext3-exports.c
  fs/ext3/xattr.c
  include/linux/ext3_fs.h
  include/linux/ext3_jbd.h
diff --git a/lustre/kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc b/lustre/kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc

index f8a99ea..f3375a3 100644 (file)
--- a/lustre/kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc
+++ b/lustre/kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc
@@ -1,5 +1,6 @@
  fs/exec.c
  fs/dcache.c
+fs/namespace.c
  fs/namei.c
  fs/nfsd/vfs.c
  fs/open.c
@@ -7,4 +8,7 @@ fs/stat.c
  fs/proc/base.c
  include/linux/dcache.h
  include/linux/fs.h
+include/linux/fs_struct.h
  kernel/ksyms.c
+kernel/fork.c
+kernel/exit.c
diff --git a/lustre/kernel_patches/scripts/patchfns b/lustre/kernel_patches/scripts/patchfns

index 78e494b..8d3d4f0 100644 (file)
--- a/lustre/kernel_patches/scripts/patchfns
+++ b/lustre/kernel_patches/scripts/patchfns
@@ -78,7 +78,7 @@ check_pc_match()
                 if [ $? != 0 ]; then
                         echo " $1 do not match with $2 "
                         echo " $2 will be changed to match $2"
-                       cat $tmpfile > $P/pc/$PATCH_NAME.pc
+                       # cat $tmpfile > $P/pc/$PATCH_NAME.pc
                 fi
                 rm -rf $tmpfile
         fi
diff --git a/lustre/kernel_patches/series/hp-pnnl-2.4.20 b/lustre/kernel_patches/series/hp-pnnl-2.4.20

index b951209..c2cc2fa 100644 (file)
--- a/lustre/kernel_patches/series/hp-pnnl-2.4.20
+++ b/lustre/kernel_patches/series/hp-pnnl-2.4.20
@@ -2,7 +2,7 @@ dev_read_only_hp_2.4.20.patch
  exports_2.4.20-rh-hp.patch
  kmem_cache_validate_hp.patch
  lustre_version.patch
-vfs_intent-2.4.20-vanilla.patch
+vfs_intent-2.4.20-hp.patch
  invalidate_show.patch
  export-truncate.patch
  iod-stock-24-exports_hp.patch
@@ -21,5 +21,7 @@ ext3-delete_thread-2.4.20.patch
  ext3-noread-2.4.20.patch
  extN-wantedi.patch
  ext3-san-2.4.20.patch
+ext3-map_inode_page.patch
+ext3-error-export.patch
  iopen-2.4.20.patch
  tcp-zero-copy.patch
diff --git a/lustre/kernel_patches/series/rh-2.4.20 b/lustre/kernel_patches/series/rh-2.4.20

index a97c37c..970061d 100644 (file)
--- a/lustre/kernel_patches/series/rh-2.4.20
+++ b/lustre/kernel_patches/series/rh-2.4.20
@@ -15,9 +15,12 @@ ext-2.4-patch-4.patch
  linux-2.4.20-xattr-0.8.54-chaos.patch
  ext3-2.4.20-fixes.patch
  ext3_orphan_lock-2.4.20-rh.patch
-ext3-delete_thread-2.4.20.patch
+ext3_delete_thread_2.4.20_chaos.patch
  ext3-noread-2.4.20.patch
  extN-wantedi.patch
  ext3-san-2.4.20.patch
+ext3-map_inode_page.patch
+ext3-error-export.patch
  iopen-2.4.20.patch
-tcp-zero-copy.patch
+tcp_zero_copy_2.4.20_chaos.patch
+gpl_header-chaos-2.4.20.patch
diff --git a/lustre/kernel_patches/series/vanilla-2.4.20 b/lustre/kernel_patches/series/vanilla-2.4.20

index e56cac6..726a028 100644 (file)
--- a/lustre/kernel_patches/series/vanilla-2.4.20
+++ b/lustre/kernel_patches/series/vanilla-2.4.20
@@ -1,4 +1,4 @@
-uml-patch-2.4.20-4.patch
+uml-patch-2.4.20-6.patch
  dev_read_only_2.4.20.patch
  exports_2.4.20.patch
  kmem_cache_validate_2.4.20.patch
@@ -25,5 +25,7 @@ ext3-noread-2.4.20.patch
  ext3-delete_thread-2.4.20.patch
  extN-wantedi.patch
  ext3-san-2.4.20.patch
+ext3-map_inode_page.patch
+ext3-error-export.patch
  iopen-2.4.20.patch
  tcp-zero-copy.patch
diff --git a/lustre/kernel_patches/which_patch b/lustre/kernel_patches/which_patch

index 2ef001d..28e8648 100644 (file)
--- a/lustre/kernel_patches/which_patch
+++ b/lustre/kernel_patches/which_patch
@@ -1,13 +1,8 @@
-SERIES               MEMNONIC                  COMMENT
+SERIES            MEMNONIC                 COMMENT                     ARCH
  
-hp-pnnl-2.4.20       linux-2.4.20-hp4_pnnl1    same as vanilla but no uml
-vanilla-2.4.20       linux-2.4.20              patch includes uml
-chaos-2.4.20         linux-chaos-2.4.20        same as rh-2.4.20-8
-rh-2.4.20            linux-rh-2.4.20-8         same as chaos-2.4.20
-rh-2.4.18-18         linux-rh-2.4.18-18        same as chaos but includes uml
-chaos                linux-chaos-2.4.18        same as rh-2.4.18-18 but no uml
-
-REVIEW:
-
-vanilla-2.5          linux-2.5.63
-hp-pnnl              linux-2.4.19-hp2_pnnl6
+chaos-2.4.18      linux-chaos-2.4.18       LLNL 2.4.18 chaos ~65       i386
+hp-pnnl-2.4.20    linux-2.4.20-hp4_pnnl1   same as vanilla but no uml  ia64
+vanilla-2.4.20    linux-2.4.20             patch with uml-2.4.20-6     um
+chaos-2.4.20      linux-chaos-2.4.20       same as rh-2.4.20-8         i386
+rh-2.4.20         linux-rh-2.4.20-8        same as chaos-2.4.20        i386
+kgdb-2.5.73       linux-2.5.73             vanilla 2.5.73 with kgdb    i386
diff --git a/lustre/ldlm/.cvsignore b/lustre/ldlm/.cvsignore

index e995588..e69dc6d 100644 (file)
--- a/lustre/ldlm/.cvsignore
+++ b/lustre/ldlm/.cvsignore
@@ -1,3 +1,4 @@
  .deps
  Makefile
  Makefile.in
+.*.cmd
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c

index 9b53b54..e3f8673 100644 (file)
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -32,7 +32,7 @@
  #include <linux/lustre_mds.h>
  #include <linux/lustre_net.h>
  
-int client_import_connect(struct lustre_handle *dlm_handle, 
+int client_import_connect(struct lustre_handle *dlm_handle,
                            struct obd_device *obd,
                            struct obd_uuid *cluuid)
  {
@@ -47,7 +47,6 @@ int client_import_connect(struct lustre_handle *dlm_handle,
          char *tmp[] = {imp->imp_target_uuid.uuid,
                         obd->obd_uuid.uuid,
                         (char *)dlm_handle};
-        int rq_opc = (obd->obd_type->typ_ops->o_brw) ? OST_CONNECT :MDS_CONNECT;
          int msg_flags;
  
          ENTRY;
@@ -67,13 +66,15 @@ int client_import_connect(struct lustre_handle *dlm_handle,
          if (obd->obd_namespace == NULL)
                  GOTO(out_disco, rc = -ENOMEM);
  
-        request = ptlrpc_prep_req(imp, rq_opc, 3, size, tmp);
+        request = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp);
          if (!request)
                  GOTO(out_ldlm, rc = -ENOMEM);
  
          request->rq_level = LUSTRE_CONN_NEW;
          request->rq_replen = lustre_msg_size(0, NULL);
  
+        lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_PEER);
+
          imp->imp_dlm_handle = *dlm_handle;
  
          imp->imp_level = LUSTRE_CONN_CON;
@@ -88,7 +89,7 @@ int client_import_connect(struct lustre_handle *dlm_handle,
          class_export_put(exp);
  
          msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
-        if (rq_opc == MDS_CONNECT || msg_flags & MSG_CONNECT_REPLAYABLE) {
+        if (msg_flags & MSG_CONNECT_REPLAYABLE) {
                  imp->imp_replayable = 1;
                  CDEBUG(D_HA, "connected to replayable target: %s\n",
                         imp->imp_target_uuid.uuid);
@@ -130,7 +131,16 @@ int client_import_disconnect(struct lustre_handle *dlm_handle, int failover)
                  RETURN(-EINVAL);
          }
  
-        rq_opc = obd->obd_type->typ_ops->o_brw ? OST_DISCONNECT:MDS_DISCONNECT;
+        switch (imp->imp_connect_op) {
+        case OST_CONNECT: rq_opc = OST_DISCONNECT; break;
+        case MDS_CONNECT: rq_opc = MDS_DISCONNECT; break;
+        case MGMT_CONNECT:rq_opc = MGMT_DISCONNECT;break;
+        default:
+                CERROR("don't know how to disconnect from %s (connect_op %d)\n",
+                       imp->imp_target_uuid.uuid, imp->imp_connect_op);
+                RETURN(-EINVAL);
+        }
+
          down(&cli->cl_sem);
          if (!cli->cl_conn_count) {
                  CERROR("disconnecting disconnected device (%s)\n",
@@ -229,36 +239,31 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
          struct obd_uuid remote_uuid;
          struct list_head *p;
          char *str, *tmp;
-        int rc, i, abort_recovery;
+        int rc = 0, abort_recovery;
          ENTRY;
  
          LASSERT_REQSWAB (req, 0);
-        str = lustre_msg_string (req->rq_reqmsg, 0, sizeof (tgtuuid.uuid) - 1);
+        str = lustre_msg_string(req->rq_reqmsg, 0, sizeof(tgtuuid) - 1);
          if (str == NULL) {
                  CERROR("bad target UUID for connect\n");
                  GOTO(out, rc = -EINVAL);
          }
+
          obd_str2uuid (&tgtuuid, str);
+        target = class_uuid2obd(&tgtuuid);
+        if (!target || target->obd_stopping || !target->obd_set_up) {
+                CERROR("UUID '%s' is not available for connect\n", str);
+                GOTO(out, rc = -ENODEV);
+        }
  
          LASSERT_REQSWAB (req, 1);
-        str = lustre_msg_string (req->rq_reqmsg, 1, sizeof (cluuid.uuid) - 1);
+        str = lustre_msg_string(req->rq_reqmsg, 1, sizeof(cluuid) - 1);
          if (str == NULL) {
                  CERROR("bad client UUID for connect\n");
                  GOTO(out, rc = -EINVAL);
          }
-        obd_str2uuid (&cluuid, str);
  
-        i = class_uuid2dev(&tgtuuid);
-        if (i == -1) {
-                CERROR("UUID '%s' not found for connect\n", tgtuuid.uuid);
-                GOTO(out, rc = -ENODEV);
-        }
-
-        target = &obd_dev[i];
-        if (!target || target->obd_stopping || !target->obd_set_up) {
-                CERROR("UUID '%s' is not available for connect\n", str);
-                GOTO(out, rc = -ENODEV);
-        }
+        obd_str2uuid (&cluuid, str);
  
          /* XXX extract a nettype and format accordingly */
          snprintf(remote_uuid.uuid, sizeof remote_uuid,
@@ -491,8 +496,7 @@ static void reset_recovery_timer(struct obd_device *obd)
  
          if (!recovering)
                  return;
-        CDEBUG(D_ERROR, "timer will expire in %ld seconds\n",
-               OBD_RECOVERY_TIMEOUT / HZ);
+        CERROR("timer will expire in %ld seconds\n", OBD_RECOVERY_TIMEOUT / HZ);
          mod_timer(&obd->obd_recovery_timer, jiffies + OBD_RECOVERY_TIMEOUT);
  }
  
@@ -568,7 +572,8 @@ static void process_recovery_queue(struct obd_device *obd)
                  DEBUG_REQ(D_ERROR, req, "processing: ");
                  (void)obd->obd_recovery_handler(req);
                  reset_recovery_timer(obd);
-#warning FIXME: mds_fsync_super(mds->mds_sb);
+                /* bug 1580: decide how to properly sync() in recovery */
+                //mds_fsync_super(mds->mds_sb);
                  class_export_put(req->rq_export);
                  OBD_FREE(req->rq_reqmsg, req->rq_reqlen);
                  OBD_FREE(req, sizeof *req);
@@ -715,8 +720,7 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc)
          if (recovery_done) {
                  struct list_head *tmp, *n;
                  ldlm_reprocess_all_ns(req->rq_export->exp_obd->obd_namespace);
-                CDEBUG(D_ERROR,
-                       "%s: all clients recovered, sending delayed replies\n",
+                CERROR("%s: all clients recovered, sending delayed replies\n",
                         obd->obd_name);
                  obd->obd_recovering = 0;
                  list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) {
diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c

index 2dc60cf..3995e95 100644 (file)
--- a/lustre/ldlm/ldlm_lock.c
+++ b/lustre/ldlm/ldlm_lock.c
@@ -71,6 +71,8 @@ char *ldlm_it2str(int it)
                  return "lookup";
          case IT_UNLINK:
                  return "unlink";
+        case IT_GETXATTR:
+                return "getxattr";
          default:
                  CERROR("Unknown intent %d\n", it);
                  return "UNKNOWN";
@@ -954,8 +956,8 @@ int ldlm_run_ast_work(struct list_head *rpc_list)
                  if (rc == -ERESTART)
                          retval = rc;
                  else if (rc)
-                        CERROR("Failed AST - should clean & disconnect "
-                               "client\n");
+                        CDEBUG(D_DLMTRACE, "Failed AST - should clean & "
+                               "disconnect client\n");
                  LDLM_LOCK_PUT(w->w_lock);
                  list_del(&w->w_list);
                  OBD_FREE(w, sizeof(*w));
diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c

index de304d4..50bc96a 100644 (file)
--- a/lustre/ldlm/ldlm_lockd.c
+++ b/lustre/ldlm/ldlm_lockd.c
@@ -243,8 +243,7 @@ int ldlm_del_waiting_lock(struct ldlm_lock *lock)
  
  #endif /* __KERNEL__ */
  
-static inline void ldlm_failed_ast(struct ldlm_lock *lock, int rc,
-                                   char *ast_type)
+static void ldlm_failed_ast(struct ldlm_lock *lock, int rc, char *ast_type)
  {
          CERROR("%s AST failed (%d) for res "LPU64"/"LPU64
                 ", mode %s: evicting client %s@%s NID "LPU64"\n",
@@ -347,10 +346,19 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
          RETURN(rc);
  }
  
+/* XXX copied from ptlrpc/service.c */
+static long timeval_sub(struct timeval *large, struct timeval *small)
+{
+        return (large->tv_sec - small->tv_sec) * 1000000 +
+                (large->tv_usec - small->tv_usec);
+}
+
  int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
  {
          struct ldlm_request *body;
          struct ptlrpc_request *req;
+        struct timeval granted_time;
+        long total_enqueue_wait;
          int rc = 0, size = sizeof(*body);
          ENTRY;
  
@@ -359,6 +367,12 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
                  RETURN(-EINVAL);
          }
  
+        do_gettimeofday(&granted_time);
+        total_enqueue_wait = timeval_sub(&granted_time, &lock->l_enqueued_time);
+
+        if (total_enqueue_wait / 1000000 > obd_timeout)
+                LDLM_ERROR(lock, "enqueue wait took %ldus", total_enqueue_wait);
+
          req = ptlrpc_prep_req(lock->l_export->exp_ldlm_data.led_import,
                                LDLM_CP_CALLBACK, 1, &size, NULL);
          if (!req)
@@ -370,7 +384,8 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
          body->lock_flags = flags;
          ldlm_lock2desc(lock, &body->lock_desc);
  
-        LDLM_DEBUG(lock, "server preparing completion AST");
+        LDLM_DEBUG(lock, "server preparing completion AST (after %ldus wait)",
+                   total_enqueue_wait);
          req->rq_replen = lustre_msg_size(0, NULL);
  
          req->rq_level = LUSTRE_CONN_RECOVER;
@@ -447,6 +462,7 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req,
          if (!lock)
                  GOTO(out, err = -ENOMEM);
  
+        do_gettimeofday(&lock->l_enqueued_time);
          memcpy(&lock->l_remote_handle, &dlm_req->lock_handle1,
                 sizeof(lock->l_remote_handle));
          LDLM_DEBUG(lock, "server-side enqueue handler, new lock created");
@@ -640,22 +656,10 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
                  lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
                  LDLM_DEBUG(lock, "completion AST, new lock mode");
          }
-        if (lock->l_resource->lr_type == LDLM_EXTENT) {
+        if (lock->l_resource->lr_type == LDLM_EXTENT)
                  memcpy(&lock->l_extent, &dlm_req->lock_desc.l_extent,
                         sizeof(lock->l_extent));
  
-                if ((lock->l_extent.end & ~PAGE_MASK) != ~PAGE_MASK) {
-                        /* XXX Old versions of BA OST code have a fencepost bug
-                         * which will cause them to grant a lock that's one
-                         * byte too large.  This can be safely removed after BA
-                         * ships their next release -phik (02 Apr 2003) */
-                        lock->l_extent.end--;
-                } else if ((lock->l_extent.start & ~PAGE_MASK) ==
-                           ~PAGE_MASK) {
-                        lock->l_extent.start++;
-                }
-        }
-
          ldlm_resource_unlink_lock(lock);
          if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
                     &lock->l_resource->lr_name,
@@ -961,7 +965,7 @@ static int ldlm_setup(struct obd_device *obddev, obd_count len, void *buf)
          return rc;
  }
  
-static int ldlm_cleanup(struct obd_device *obddev, int force, int failover)
+static int ldlm_cleanup(struct obd_device *obddev, int flags)
  {
          struct ldlm_obd *ldlm = &obddev->u.ldlm;
          ENTRY;
@@ -973,7 +977,7 @@ static int ldlm_cleanup(struct obd_device *obddev, int force, int failover)
          }
  
  #ifdef __KERNEL__
-        if (force) {
+        if (flags & OBD_OPT_FORCE) {
                  ptlrpc_put_ldlm_hooks();
          } else if (ptlrpc_ldlm_hooks_referenced()) {
                  CERROR("Some connections weren't cleaned up; run lconf with "
@@ -1084,6 +1088,7 @@ EXPORT_SYMBOL(ldlm_replay_locks);
  EXPORT_SYMBOL(ldlm_resource_foreach);
  EXPORT_SYMBOL(ldlm_namespace_foreach);
  EXPORT_SYMBOL(ldlm_namespace_foreach_res);
+EXPORT_SYMBOL(ldlm_change_cbdata);
  
  /* ldlm_lockd.c */
  EXPORT_SYMBOL(ldlm_server_blocking_ast);
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c

index e6a8229..75e6dbd 100644 (file)
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -273,6 +273,7 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
                  /* Set a flag to prevent us from sending a CANCEL (bug 407) */
                  l_lock(&ns->ns_lock);
                  lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+                LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
                  l_unlock(&ns->ns_lock);
  
                  ldlm_lock_decref_and_cancel(lockh, mode);
@@ -295,7 +296,7 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
                  CERROR ("Can't unpack ldlm_reply\n");
                  GOTO (out_req, rc = -EPROTO);
          }
-        
+
          memcpy(&lock->l_remote_handle, &reply->lock_handle,
                 sizeof(lock->l_remote_handle));
          *flags = reply->lock_flags;
@@ -309,17 +310,6 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
                         body->lock_desc.l_extent.end,
                         reply->lock_extent.start, reply->lock_extent.end);
  
-                if ((reply->lock_extent.end & ~PAGE_MASK) != ~PAGE_MASK) {
-                        /* XXX Old versions of BA OST code have a fencepost bug
-                         * which will cause them to grant a lock that's one
-                         * byte too large.  This can be safely removed after BA
-                         * ships their next release -phik (02 Apr 2003) */
-                        reply->lock_extent.end--;
-                } else if ((reply->lock_extent.start & ~PAGE_MASK) ==
-                           ~PAGE_MASK) {
-                        reply->lock_extent.start++;
-                }
-
                  cookie = &reply->lock_extent; /* FIXME bug 267 */
                  cookielen = sizeof(reply->lock_extent);
          }
@@ -454,7 +444,7 @@ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags)
                  CERROR ("Can't unpack ldlm_reply\n");
                  GOTO (out, rc = -EPROTO);
          }
-        
+
          res = ldlm_lock_convert(lock, new_mode, &reply->lock_flags);
          if (res != NULL)
                  ldlm_reprocess_all(res);
@@ -535,11 +525,11 @@ int ldlm_cli_cancel(struct lustre_handle *lockh)
          local_cancel:
                  ldlm_lock_cancel(lock);
          } else {
-                LDLM_DEBUG(lock, "client-side local cancel");
                  if (lock->l_resource->lr_namespace->ns_client) {
-                        CERROR("Trying to cancel local lock\n");
+                        LDLM_ERROR(lock, "Trying to cancel local lock\n");
                          LBUG();
                  }
+                LDLM_DEBUG(lock, "client-side local cancel");
                  ldlm_lock_cancel(lock);
                  ldlm_reprocess_all(lock->l_resource);
                  LDLM_DEBUG(lock, "client-side local cancel handler END");
@@ -631,9 +621,8 @@ static int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
                  lock = list_entry(tmp, struct ldlm_lock, l_res_link);
  
                  if (opaque != NULL && lock->l_data != opaque) {
-                        LDLM_ERROR(lock, "data %p doesn't match opaque %p res"
-                                  LPU64":"LPU64, lock->l_data, opaque,
-                                  res_id.name[0], res_id.name[1]);
+                        LDLM_ERROR(lock, "data %p doesn't match opaque %p",
+                                  lock->l_data, opaque);
                          //LBUG();
                          continue;
                  }
@@ -797,12 +786,12 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
                                 ldlm_res_iterator_t iter, void *closure)
  {
          int i, rc = LDLM_ITER_CONTINUE;
-        
+
          l_lock(&ns->ns_lock);
          for (i = 0; i < RES_HASH_SIZE; i++) {
                  struct list_head *tmp, *next;
                  list_for_each_safe(tmp, next, &(ns->ns_hash[i])) {
-                        struct ldlm_resource *res = 
+                        struct ldlm_resource *res =
                                  list_entry(tmp, struct ldlm_resource, lr_hash);
  
                          ldlm_resource_getref(res);
@@ -817,6 +806,34 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
          RETURN(rc);
  }
  
+/* non-blocking function to manipulate a lock whose cb_data is being put away.*/
+void ldlm_change_cbdata(struct ldlm_namespace *ns, 
+                       struct ldlm_res_id *res_id, 
+                       ldlm_iterator_t iter,
+                       void *data)
+{
+        struct ldlm_resource *res;
+        int rc = 0;
+        ENTRY;
+
+        if (ns == NULL) {
+                CERROR("must pass in namespace");
+                LBUG();
+        }
+
+        res = ldlm_resource_get(ns, NULL, *res_id, 0, 0);
+        if (res == NULL) {
+                EXIT;
+                return;
+        }
+
+        l_lock(&ns->ns_lock);
+        rc = ldlm_resource_foreach(res, iter, data);
+        l_unlock(&ns->ns_lock);
+        ldlm_resource_putref(res);
+        EXIT;
+}
+
  /* Lock replay */
  
  static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
@@ -858,7 +875,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
                  flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_WAIT;
          else
                  flags = LDLM_FL_REPLAY;
-                
+
          size = sizeof(*body);
          req = ptlrpc_prep_req(imp, LDLM_ENQUEUE, 1, &size, NULL);
          if (!req)
@@ -866,7 +883,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
  
          /* We're part of recovery, so don't wait for it. */
          req->rq_level = LUSTRE_CONN_RECOVER;
-        
+
          body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
          ldlm_lock2desc(lock, &body->lock_desc);
          body->lock_flags = flags;
@@ -879,14 +896,14 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
          rc = ptlrpc_queue_wait(req);
          if (rc != ELDLM_OK)
                  GOTO(out, rc);
-        
+
          reply = lustre_swab_repbuf(req, 0, sizeof (*reply),
                                     lustre_swab_ldlm_reply);
          if (reply == NULL) {
                  CERROR("Can't unpack ldlm_reply\n");
                  GOTO (out, rc = -EPROTO);
          }
-        
+
          memcpy(&lock->l_remote_handle, &reply->lock_handle,
                 sizeof(lock->l_remote_handle));
          LDLM_DEBUG(lock, "replayed lock:");
@@ -901,7 +918,7 @@ int ldlm_replay_locks(struct obd_import *imp)
          struct list_head list, *pos, *next;
          struct ldlm_lock *lock;
          int rc = 0;
-        
+
          ENTRY;
          INIT_LIST_HEAD(&list);
  
diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c

index 84fdecc..4449c79 100644 (file)
--- a/lustre/ldlm/ldlm_resource.c
+++ b/lustre/ldlm/ldlm_resource.c
@@ -114,12 +114,10 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
          if (!ns)
                  RETURN(NULL);
  
-        ns->ns_hash = vmalloc(sizeof(*ns->ns_hash) * RES_HASH_SIZE);
+        OBD_VMALLOC(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
          if (!ns->ns_hash)
                  GOTO(out_ns, NULL);
  
-        atomic_add(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
-
          OBD_ALLOC(ns->ns_name, strlen(name) + 1);
          if (!ns->ns_name)
                  GOTO(out_hash, NULL);
@@ -152,8 +150,7 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
  
  out_hash:
          POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
-        vfree(ns->ns_hash);
-        atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
+        OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
  out_ns:
          OBD_FREE(ns, sizeof(*ns));
          return NULL;
@@ -186,6 +183,7 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
                          lock->l_flags |= LDLM_FL_CBPENDING;
                          /* ... without sending a CANCEL message. */
                          lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+                        LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
                          /* ... and without calling the cancellation callback */
                          lock->l_flags |= LDLM_FL_CANCEL;
                          LDLM_LOCK_PUT(lock);
@@ -272,8 +270,7 @@ int ldlm_namespace_free(struct ldlm_namespace *ns)
          ldlm_namespace_cleanup(ns, 0);
  
          POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
-        vfree(ns->ns_hash /* , sizeof(*ns->ns_hash) * RES_HASH_SIZE */);
-        atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
+        OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
          OBD_FREE(ns->ns_name, strlen(ns->ns_name) + 1);
          OBD_FREE(ns, sizeof(*ns));
  
diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c

index 8344af5..88af047 100644 (file)
--- a/lustre/liblustre/file.c
+++ b/lustre/liblustre/file.c
@@ -145,7 +145,7 @@ int llu_create(struct inode *dir, struct pnode_base *pnode, int mode)
  
          it = dentry->d_it;
  
-        rc = ll_it_open_error(IT_OPEN_CREATE, it);
+        rc = ll_it_open_error(DISP_OPEN_CREATE, it);
          if (rc) {
                  LL_GET_INTENT(dentry, it);
                  ptlrpc_req_finished(it->it_data);
@@ -317,7 +317,7 @@ static int llu_file_open(struct inode *inode)
  #if 0
          CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
          LL_GET_INTENT(file->f_dentry, it);
-        rc = ll_it_open_error(IT_OPEN_OPEN, it);
+        rc = ll_it_open_error(DISP_OPEN_OPEN, it);
          if (rc)
                  RETURN(rc);
  #endif
@@ -477,7 +477,7 @@ static int llu_file_release(struct inode *inode)
                  oa.o_id = lsm->lsm_object_id;
                  oa.o_mode = S_IFREG;
                  oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
-                
+
                  memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE);
                  oa.o_valid |= OBD_MD_FLHANDLE;
  
diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c

index 0e88933..0939352 100644 (file)
--- a/lustre/liblustre/super.c
+++ b/lustre/liblustre/super.c
@@ -715,7 +715,7 @@ llu_fsswop_mount(const char *source,
  
  /* XXX do we need this??
          memset(&osfs, 0, sizeof(osfs));
-        rc = obd_statfs(&sbi->ll_mdc_conn, &osfs);
+        rc = obd_statfs(class_conn2obd(&sbi->ll_mdc_conn),&osfs,jiffies-100*HZ);
  */
          /* fetch attr of root inode */
          err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid,
@@ -765,9 +765,9 @@ out_inode:
  out_request:
          ptlrpc_req_finished(request);
  out_osc:
-        obd_disconnect(&sbi->ll_osc_conn);
+        obd_disconnect(&sbi->ll_osc_conn, 0);
  out_mdc:
-        obd_disconnect(&sbi->ll_mdc_conn);
+        obd_disconnect(&sbi->ll_mdc_conn, 0);
  out_free:
          OBD_FREE(sbi, sizeof(*sbi));
          return err;
diff --git a/lustre/llite/.cvsignore b/lustre/llite/.cvsignore

index e530020..49c6100 100644 (file)
--- a/lustre/llite/.cvsignore
+++ b/lustre/llite/.cvsignore
@@ -6,3 +6,4 @@ Makefile
  Makefile.in
  .deps
  TAGS
+.*.cmd
diff --git a/lustre/llite/Makefile.am b/lustre/llite/Makefile.am

index b6fc501..9ef9b7f 100644 (file)
--- a/lustre/llite/Makefile.am
+++ b/lustre/llite/Makefile.am
@@ -9,8 +9,8 @@ MODULE = llite
  modulefs_DATA = llite.o
  EXTRA_PROGRAMS = llite
  
-llite_SOURCES = dcache.c commit_callback.c super.c rw.c iod.c super25.c
-llite_SOURCES += file.c dir.c sysctl.c symlink.c
-llite_SOURCES += namei.c lproc_llite.c llite_internal.h
+llite_SOURCES = dcache.c commit_callback.c  rw.c  super25.c
+llite_SOURCES += file.c dir.c sysctl.c symlink.c llite_lib.c
+llite_SOURCES += namei.c lproc_llite.c super.c iod.c llite_internal.h
  
  include $(top_srcdir)/Rules
diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c

index 0684968..8c55b3d 100644 (file)
--- a/lustre/llite/dcache.c
+++ b/lustre/llite/dcache.c
@@ -31,22 +31,19 @@
  #include <linux/lustre_idl.h>
  #include <linux/lustre_dlm.h>
  
+#include "llite_internal.h"
+
  /* should NOT be called with the dcache lock, see fs/dcache.c */
-void ll_release(struct dentry *de)
+static void ll_release(struct dentry *de)
  {
+        struct ll_dentry_data *lld = ll_d2d(de);
          ENTRY;
+
+        LASSERT(lld->lld_cwd_count == 0);
+        LASSERT(lld->lld_mnt_count == 0);
          OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data));
-        EXIT;
-}
  
-int ll_delete(struct dentry *de)
-{
-        if (de->d_it != 0) {
-                CERROR("%s put dentry %p+%p with d_it %p\n", current->comm,
-                       de, de->d_fsdata, de->d_it);
-                LBUG();
-        }
-        return 0;
+        EXIT;
  }
  
  void ll_set_dd(struct dentry *de)
@@ -55,23 +52,20 @@ void ll_set_dd(struct dentry *de)
          LASSERT(de != NULL);
  
          lock_kernel();
-
          if (de->d_fsdata == NULL) {
                  OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
-                sema_init(&ll_d2d(de)->lld_it_sem, 1);
          }
-
          unlock_kernel();
  
          EXIT;
  }
  
-void ll_intent_release(struct dentry *de, struct lookup_intent *it)
+void ll_intent_release(struct lookup_intent *it)
  {
          struct lustre_handle *handle;
          ENTRY;
  
-        if (it->it_lock_mode) {
+        if (it->it_op && it->it_lock_mode) {
                  handle = (struct lustre_handle *)it->it_lock_handle;
                  CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
                         " from it %p\n",
@@ -83,84 +77,73 @@ void ll_intent_release(struct dentry *de, struct lookup_intent *it)
                     lock (see bug 494) */
                  it->it_lock_mode = 0;
          }
+        it->it_magic = 0;
+        it->it_op_release = 0;
+        EXIT;
+}
  
-        if (!de->d_it || it->it_op == IT_RELEASED_MAGIC) {
-                EXIT;
+void ll_unhash_aliases(struct inode *inode)
+{
+        struct dentry *dentry = NULL;
+        struct list_head *tmp;
+        struct ll_sb_info *sbi;
+        ENTRY;
+
+        if (inode == NULL) {
+                CERROR("unexpected NULL inode, tell phil\n");
                  return;
          }
  
-        if (de->d_it == it)
-                LL_GET_INTENT(de, it);
-        else
-                CDEBUG(D_INODE, "STRANGE intent release: %p %p\n",
-                       de->d_it, it);
+        sbi = ll_i2sbi(inode);
+
+        CDEBUG(D_INODE, "marking dentries for ino %lx/%x invalid\n",
+               inode->i_ino, inode->i_generation);
  
+        spin_lock(&dcache_lock);
+        list_for_each(tmp, &inode->i_dentry) {
+                dentry = list_entry(tmp, struct dentry, d_alias);
+
+                list_del_init(&dentry->d_hash);
+                dentry->d_flags |= DCACHE_LUSTRE_INVALID;
+                list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
+        }
+
+        spin_unlock(&dcache_lock);
          EXIT;
  }
  
  extern struct dentry *ll_find_alias(struct inode *, struct dentry *);
  
-static int revalidate2_finish(int flag, struct ptlrpc_request *request,
+static int revalidate_it_finish(struct ptlrpc_request *request,
                                struct inode *parent, struct dentry **de,
                                struct lookup_intent *it, int offset, obd_id ino)
  {
          struct ll_sb_info     *sbi = ll_i2sbi(parent);
-        struct mds_body       *body;
-        struct lov_stripe_md  *lsm = NULL;
-        struct lov_mds_md     *lmm;
-        int                    lmmsize;
+        struct lustre_md      md;
          int                    rc = 0;
          ENTRY;
  
          /* NB 1 request reference will be taken away by ll_intent_lock()
           * when I return */
  
-        if ((flag & LL_LOOKUP_NEGATIVE) != 0)
-                GOTO (out, rc = -ENOENT);
+        if (it_disposition(it, DISP_LOOKUP_NEG))
+                RETURN(-ENOENT);
  
-        /* We only get called if the mdc_enqueue() called from
-         * ll_intent_lock() was successful.  Therefore the mds_body is
-         * present and correct, and the eadata is present (but still
-         * opaque, so only obd_unpackmd() can check the size) */
-        body = lustre_msg_buf(request->rq_repmsg, offset, sizeof (*body));
-        LASSERT (body != NULL);
-        LASSERT_REPSWABBED (request, offset);
+        /* ll_intent_lock was successful, now prepare the lustre_md) */
+        rc = mdc_req2lustre_md(request, offset, &sbi->ll_osc_conn, &md);
+        if (rc)
+                RETURN(rc);
  
-        if (body->valid & OBD_MD_FLEASIZE) {
-                /* Only bother with this if inodes's LSM not set? */
-
-                if (body->eadatasize == 0) {
-                        CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n");
-                        GOTO (out, rc = -EPROTO);
-                }
-                lmmsize = body->eadatasize;
-                lmm = lustre_msg_buf (request->rq_repmsg, offset + 1, lmmsize);
-                LASSERT (lmm != NULL);
-                LASSERT_REPSWABBED (request, offset + 1);
-
-                rc = obd_unpackmd (&sbi->ll_osc_conn,
-                                   &lsm, lmm, lmmsize);
-                if (rc < 0) {
-                        CERROR ("Error %d unpacking eadata\n", rc);
-                        LBUG();
-                        /* XXX don't know if I should do this... */
-                        GOTO (out, rc);
-                        /* or skip the ll_update_inode but still do
-                         * mdc_lock_set_inode() */
-                }
-                LASSERT (rc >= sizeof (*lsm));
-                rc = 0;
-        }
+        ll_update_inode((*de)->d_inode, md.body, md.lsm);
  
-        ll_update_inode((*de)->d_inode, body, lsm);
+        if (md.lsm != NULL && ll_i2info((*de)->d_inode)->lli_smd != md.lsm)
+                obd_free_memmd (&sbi->ll_osc_conn, &md.lsm);
  
-        if (lsm != NULL &&
-            ll_i2info((*de)->d_inode)->lli_smd != lsm)
-                obd_free_memmd (&sbi->ll_osc_conn, &lsm);
-
-        ll_mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle,
-                              (*de)->d_inode);
- out:
+        CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+               (*de)->d_inode, (*de)->d_inode->i_ino,
+               (*de)->d_inode->i_generation);
+        ldlm_lock_set_data((struct lustre_handle *)it->it_lock_handle,
+                           (*de)->d_inode);
          RETURN(rc);
  }
  
@@ -197,20 +180,26 @@ int ll_have_md_lock(struct dentry *de)
          RETURN(0);
  }
  
-int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
+int ll_revalidate_it(struct dentry *de, int flags, struct lookup_intent *it)
  {
          int rc;
          ENTRY;
          CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
                 LL_IT2STR(it));
  
-        /* We don't want to cache negative dentries, so return 0 immediately.
-         * We believe that this is safe, that negative dentries cannot be
-         * pinned by someone else */
-        if (de->d_inode == NULL) {
-                CDEBUG(D_INODE, "negative dentry: ret 0 to force lookup2\n");
+        /* Cached negative dentries are unsafe for now - look them up again */
+        if (de->d_inode == NULL)
                  RETURN(0);
-        }
+
+        /* 
+         * never execute intents for mount points
+         * - attrs will be fixed up in ll_revalidate_inode
+         */
+        if (d_mountpoint(de))
+                RETURN(1);
+
+        if (it)
+                it->it_op_release = ll_intent_release;
  
          if (it == NULL || it->it_op == IT_GETATTR) {
                  /* We could just return 1 immediately, but since we should only
@@ -233,7 +222,6 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
                                  memcpy(it->it_lock_handle, &lockh,
                                         sizeof(lockh));
                                  it->it_lock_mode = LCK_PR;
-                                LL_SAVE_INTENT(de, it);
                          } else {
                                  ldlm_lock_decref(&lockh, LCK_PR);
                          }
@@ -248,7 +236,6 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
                                  memcpy(it->it_lock_handle, &lockh,
                                         sizeof(lockh));
                                  it->it_lock_mode = LCK_PW;
-                                LL_SAVE_INTENT(de, it);
                          } else {
                                  ldlm_lock_decref(&lockh, LCK_PW);
                          }
@@ -256,31 +243,123 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
                  }
                  if (S_ISDIR(de->d_inode->i_mode))
                          ll_invalidate_inode_pages(de->d_inode);
-                d_unhash_aliases(de->d_inode);
+                ll_unhash_aliases(de->d_inode);
                  RETURN(0);
          }
  
-        rc = ll_intent_lock(de->d_parent->d_inode, &de, it, revalidate2_finish);
+        rc = ll_intent_lock(de->d_parent->d_inode, &de, it, flags,
+                            revalidate_it_finish);
          if (rc < 0) {
                  if (rc != -ESTALE) {
                          CERROR("ll_intent_lock: rc %d : it->it_status %d\n", rc,
                                 it->it_status);
                  }
+                ll_unhash_aliases(de->d_inode);
                  RETURN(0);
          }
          /* unfortunately ll_intent_lock may cause a callback and revoke our
             dentry */
          spin_lock(&dcache_lock);
-        list_del_init(&de->d_hash);
+        hlist_del_init(&de->d_hash);
          __d_rehash(de, 0);
          spin_unlock(&dcache_lock);
  
          RETURN(1);
  }
  
+static void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
+{
+        struct inode *inode= de->d_inode;
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        struct ll_dentry_data *ldd = ll_d2d(de);
+        struct obd_client_handle *handle;
+        int rc = 0;
+        ENTRY;
+        LASSERT(ldd);
+
+        lock_kernel();
+        /* Strictly speaking this introduces an additional race: the
+         * increments should wait until the rpc has returned.
+         * However, given that at present the function is void, this
+         * issue is moot. */
+        if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
+                unlock_kernel();
+                EXIT;
+                return;
+        }
+
+        if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
+                unlock_kernel();
+                EXIT;
+                return;
+        }
+        unlock_kernel();
+
+        handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
+        rc = obd_pin(&sbi->ll_mdc_conn, inode->i_ino, inode->i_generation,
+                     inode->i_mode & S_IFMT, handle, flag);
+
+        if (rc) {
+                lock_kernel();
+                memset(handle, 0, sizeof(*handle));
+                if (flag == 0)
+                        ldd->lld_cwd_count--;
+                else
+                        ldd->lld_mnt_count--;
+                unlock_kernel();
+        }
+
+        EXIT;
+        return;
+}
+
+static void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
+{
+        struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
+        struct ll_dentry_data *ldd = ll_d2d(de);
+        struct obd_client_handle handle;
+        int count, rc = 0;
+        ENTRY;
+        LASSERT(ldd);
+
+        lock_kernel();
+        /* Strictly speaking this introduces an additional race: the
+         * increments should wait until the rpc has returned.
+         * However, given that at present the function is void, this
+         * issue is moot. */
+        handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
+        if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
+                /* the "pin" failed */
+                unlock_kernel();
+                EXIT;
+                return;
+        }
+
+        if (flag)
+                count = --ldd->lld_mnt_count;
+        else
+                count = --ldd->lld_cwd_count;
+        unlock_kernel();
+
+        if (count != 0) {
+                EXIT;
+                return;
+        }
+
+        rc = obd_unpin(&sbi->ll_mdc_conn, &handle, flag);
+        EXIT;
+        return;
+}
+
  struct dentry_operations ll_d_ops = {
-        .d_revalidate2 = ll_revalidate2,
-        .d_intent_release = ll_intent_release,
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+        .d_revalidate_nd = ll_revalidate_nd,
+#else
+        .d_revalidate_it = ll_revalidate_it,
+#endif
          .d_release = ll_release,
-        .d_delete = ll_delete,
+#if 0
+        .d_pin = ll_pin,
+        .d_unpin = ll_unpin,
+#endif
  };
diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c

index 115ed4e..a81a7d4 100644 (file)
--- a/lustre/llite/dir.c
+++ b/lustre/llite/dir.c
@@ -54,14 +54,6 @@ typedef struct ext2_dir_entry_2 ext2_dirent;
  #define PageChecked(page)        test_bit(PG_checked, &(page)->flags)
  #define SetPageChecked(page)     set_bit(PG_checked, &(page)->flags)
  
-
-static int ll_dir_prepare_write(struct file *file, struct page *page,
-                                unsigned from, unsigned to)
-{
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-        return 0;
-}
-
  /* returns the page unlocked, but with a reference */
  static int ll_dir_readpage(struct file *file, struct page *page)
  {
@@ -98,7 +90,7 @@ static int ll_dir_readpage(struct file *file, struct page *page)
                               &lockh);
          if (!rc) {
                  ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0);
-                
+
                  rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_PR,
                                   &data, &lockh, NULL, 0,
                                   ldlm_completion_ast, ll_mdc_blocking_ast,
@@ -137,39 +129,14 @@ static int ll_dir_readpage(struct file *file, struct page *page)
                  SetPageUptodate(page);
  
          unlock_page(page);
-        ll_unlock(LCK_PR, &lockh);
-        if (rc != ELDLM_OK)
-                CERROR("ll_unlock: err: %d\n", rc);
+        ldlm_lock_decref(&lockh, LCK_PR);
          return rc;
  }
  
  struct address_space_operations ll_dir_aops = {
          readpage: ll_dir_readpage,
-        prepare_write: ll_dir_prepare_write
  };
  
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3))
-int waitfor_one_page(struct page *page)
-{
-        int error = 0;
-        struct buffer_head *bh, *head = page->buffers;
-
-        bh = head;
-        do {
-                wait_on_buffer(bh);
-                if (buffer_req(bh) && !buffer_uptodate(bh))
-                        error = -EIO;
-        } while ((bh = bh->b_this_page) != head);
-        return error;
-}
-#elif (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-int waitfor_one_page(struct page *page)
-{
-        wait_on_page_locked(page);
-        return 0;
-}
-#endif
-
  /*
   * ext2 uses block-sized chunks. Arguably, sector-sized ones would be
   * more robust, but we have what we have
@@ -190,27 +157,6 @@ static inline unsigned long dir_pages(struct inode *inode)
          return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
  }
  
-extern void set_page_clean(struct page *page);
-
-static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to)
-{
-        struct inode *dir = page->mapping->host;
-        loff_t new_size = (page->index << PAGE_CACHE_SHIFT) + to;
-        int err = 0;
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        dir->i_version = ++event;
-#endif
-        if (new_size > dir->i_size)
-                dir->i_size = new_size;
-        SetPageUptodate(page);
-        set_page_clean(page);
-
-        //page->mapping->a_ops->commit_write(NULL, page, from, to);
-        //if (IS_SYNC(dir))
-        //      err = waitfor_one_page(page);
-        return err;
-}
  
  static void ext2_check_page(struct page *page)
  {
@@ -324,20 +270,6 @@ fail:
          return ERR_PTR(-EIO);
  }
  
-/*
- * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure.
- *
- * len <= EXT2_NAME_LEN and de != NULL are guaranteed by caller.
- */
-static inline int ext2_match (int len, const char * const name,
-                                        struct ext2_dir_entry_2 * de)
-{
-        if (len != de->name_len)
-                return 0;
-        if (!de->inode)
-                return 0;
-        return !memcmp(name, de->name, len);
-}
  
  /*
   * p is at least 6 bytes before the end of page
@@ -368,33 +300,6 @@ static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
          [EXT2_FT_SYMLINK]       DT_LNK,
  };
  
-static unsigned int ll_dt2fmt[DT_WHT + 1] = {
-        [EXT2_FT_UNKNOWN]       0,
-        [EXT2_FT_REG_FILE]      S_IFREG,
-        [EXT2_FT_DIR]           S_IFDIR,
-        [EXT2_FT_CHRDEV]        S_IFCHR,
-        [EXT2_FT_BLKDEV]        S_IFBLK,
-        [EXT2_FT_FIFO]          S_IFIFO,
-        [EXT2_FT_SOCK]          S_IFSOCK,
-        [EXT2_FT_SYMLINK]       S_IFLNK
-};
-
-#define S_SHIFT 12
-static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = {
-        [S_IFREG >> S_SHIFT]    EXT2_FT_REG_FILE,
-        [S_IFDIR >> S_SHIFT]    EXT2_FT_DIR,
-        [S_IFCHR >> S_SHIFT]    EXT2_FT_CHRDEV,
-        [S_IFBLK >> S_SHIFT]    EXT2_FT_BLKDEV,
-        [S_IFIFO >> S_SHIFT]    EXT2_FT_FIFO,
-        [S_IFSOCK >> S_SHIFT]   EXT2_FT_SOCK,
-        [S_IFLNK >> S_SHIFT]    EXT2_FT_SYMLINK,
-};
-
-static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode)
-{
-        mode_t mode = inode->i_mode;
-        de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
-}
  
  int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
  {
@@ -437,7 +342,7 @@ int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
                  }
                  de = (ext2_dirent *)(kaddr+offset);
                  limit = kaddr + PAGE_CACHE_SIZE - EXT2_DIR_REC_LEN(1);
-                for ( ;(char*)de <= limit; de = ext2_next_entry(de))
+                for ( ;(char*)de <= limit; de = ext2_next_entry(de)) {
                          if (de->inode) {
                                  int over;
                                  unsigned char d_type = DT_UNKNOWN;
@@ -454,334 +359,31 @@ int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
                                          GOTO(done,0);
                                  }
                          }
+                }
                  ext2_put_page(page);
          }
  
  done:
          filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset;
          filp->f_version = inode->i_version;
-        UPDATE_ATIME(inode);
+        update_atime(inode);
          RETURN(0);
  }
  
-/*
- *      ext2_find_entry()
- *
- * finds an entry in the specified directory with the wanted name. It
- * returns the page in which the entry was found, and the entry itself
- * (as a parameter - res_dir). Page is returned mapped and unlocked.
- * Entry is guaranteed to be valid.
- */
-struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
-                        struct dentry *dentry, struct page ** res_page)
-{
-        const char *name = dentry->d_name.name;
-        int namelen = dentry->d_name.len;
-        unsigned reclen = EXT2_DIR_REC_LEN(namelen);
-        unsigned long start, n;
-        unsigned long npages = dir_pages(dir);
-        struct page *page = NULL;
-        ext2_dirent * de;
-
-        /* OFFSET_CACHE */
-        *res_page = NULL;
-
-        //      start = dir->u.ext2_i.i_dir_start_lookup;
-        start = 0;
-        if (start >= npages)
-                start = 0;
-        n = start;
-        do {
-                char *kaddr;
-                page = ll_get_dir_page(dir, n);
-                if (!IS_ERR(page)) {
-                        kaddr = page_address(page);
-                        de = (ext2_dirent *) kaddr;
-                        kaddr += PAGE_CACHE_SIZE - reclen;
-                        while ((char *) de <= kaddr) {
-                                if (ext2_match (namelen, name, de))
-                                        goto found;
-                                de = ext2_next_entry(de);
-                        }
-                        ext2_put_page(page);
-                }
-                if (++n >= npages)
-                        n = 0;
-        } while (n != start);
-        return NULL;
-
-found:
-        *res_page = page;
-        //      dir->u.ext2_i.i_dir_start_lookup = n;
-        return de;
-}
-
-struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p)
-{
-        struct page *page = ll_get_dir_page(dir, 0);
-        ext2_dirent *de = NULL;
-
-        if (!IS_ERR(page)) {
-                de = ext2_next_entry((ext2_dirent *) page_address(page));
-                *p = page;
-        }
-        return de;
-}
-
-obd_id ll_inode_by_name(struct inode * dir, struct dentry *dentry, int *type)
-{
-        obd_id res = 0;
-        struct ext2_dir_entry_2 * de;
-        struct page *page;
-
-        de = ext2_find_entry (dir, dentry, &page);
-        if (de) {
-                res = le32_to_cpu(de->inode);
-                *type = ll_dt2fmt[de->file_type];
-                kunmap(page);
-                page_cache_release(page);
-        }
-        return res;
-}
-
-/* Releases the page */
-void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
-                        struct page *page, struct inode *inode)
-{
-        unsigned from = (char *) de - (char *) page_address(page);
-        unsigned to = from + le16_to_cpu(de->rec_len);
-        int err;
-
-        lock_page(page);
-        err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
-        if (err)
-                LBUG();
-        de->inode = cpu_to_le32(inode->i_ino);
-        ext2_set_de_type (de, inode);
-        dir->i_mtime = dir->i_ctime = CURRENT_TIME;
-        err = ext2_commit_chunk(page, from, to);
-        unlock_page(page);
-        ext2_put_page(page);
-}
-
-/*
- *      Parent is locked.
- */
-int ll_add_link (struct dentry *dentry, struct inode *inode)
-{
-        struct inode *dir = dentry->d_parent->d_inode;
-        const char *name = dentry->d_name.name;
-        int namelen = dentry->d_name.len;
-        unsigned reclen = EXT2_DIR_REC_LEN(namelen);
-        unsigned short rec_len, name_len;
-        struct page *page = NULL;
-        ext2_dirent * de;
-        unsigned long npages = dir_pages(dir);
-        unsigned long n;
-        char *kaddr;
-        unsigned from, to;
-        int err;
-
-        /* We take care of directory expansion in the same loop */
-        for (n = 0; n <= npages; n++) {
-                page = ll_get_dir_page(dir, n);
-                err = PTR_ERR(page);
-                if (IS_ERR(page))
-                        goto out;
-                kaddr = page_address(page);
-                de = (ext2_dirent *)kaddr;
-                kaddr += PAGE_CACHE_SIZE - reclen;
-                while ((char *)de <= kaddr) {
-                        err = -EEXIST;
-                        if (ext2_match (namelen, name, de))
-                                goto out_page;
-                        name_len = EXT2_DIR_REC_LEN(de->name_len);
-                        rec_len = le16_to_cpu(de->rec_len);
-                        if ( n==npages && rec_len == 0) {
-                                CERROR("Fatal dir behaviour\n");
-                                goto out_page;
-                        }
-                        if (!de->inode && rec_len >= reclen)
-                                goto got_it;
-                        if (rec_len >= name_len + reclen)
-                                goto got_it;
-                        de = (ext2_dirent *) ((char *) de + rec_len);
-                }
-                ext2_put_page(page);
-        }
-        LBUG();
-        return -EINVAL;
-
-got_it:
-        from = (char*)de - (char*)page_address(page);
-        to = from + rec_len;
-        lock_page(page);
-        err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
-        if (err)
-                goto out_unlock;
-        if (de->inode) {
-                ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
-                de1->rec_len = cpu_to_le16(rec_len - name_len);
-                de->rec_len = cpu_to_le16(name_len);
-                de = de1;
-        }
-        de->name_len = namelen;
-        memcpy (de->name, name, namelen);
-        de->inode = cpu_to_le32(inode->i_ino);
-        ext2_set_de_type (de, inode);
-        CDEBUG(D_INODE, "type set to %o\n", de->file_type);
-        dir->i_mtime = dir->i_ctime = CURRENT_TIME;
-        err = ext2_commit_chunk(page, from, to);
-
-        // change_inode happens with the commit_chunk
-        /* XXX OFFSET_CACHE */
-
-out_unlock:
-        unlock_page(page);
-out_page:
-        ext2_put_page(page);
-out:
-        return err;
-}
-
-/*
- * ext2_delete_entry deletes a directory entry by merging it with the
- * previous entry. Page is up-to-date. Releases the page.
- */
-int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
-{
-        struct address_space *mapping = page->mapping;
-        struct inode *inode = mapping->host;
-        char *kaddr = page_address(page);
-        unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
-        unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len);
-        ext2_dirent * pde = NULL;
-        ext2_dirent * de = (ext2_dirent *) (kaddr + from);
-        int err;
-
-        while ((char*)de < (char*)dir) {
-                pde = de;
-                de = ext2_next_entry(de);
-        }
-        if (pde)
-                from = (char*)pde - (char*)page_address(page);
-        lock_page(page);
-        err = mapping->a_ops->prepare_write(NULL, page, from, to);
-        if (err)
-                LBUG();
-        if (pde)
-                pde->rec_len = cpu_to_le16(to-from);
-        dir->inode = 0;
-        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-        err = ext2_commit_chunk(page, from, to);
-        unlock_page(page);
-        ext2_put_page(page);
-        return err;
-}
-
-/*
- * Set the first fragment of directory.
- */
-int ext2_make_empty(struct inode *inode, struct inode *parent)
-{
-        struct address_space *mapping = inode->i_mapping;
-        struct page *page = grab_cache_page(mapping, 0);
-        unsigned chunk_size = ext2_chunk_size(inode);
-        struct ext2_dir_entry_2 * de;
-        char *base;
-        int err;
-        ENTRY;
-
-        if (!page)
-                return -ENOMEM;
-        base = kmap(page);
-        if (!base)
-                return -ENOMEM;
-
-        err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size);
-        if (err)
-                goto fail;
-
-        de = (struct ext2_dir_entry_2 *) base;
-        de->name_len = 1;
-        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
-        memcpy (de->name, ".\0\0", 4);
-        de->inode = cpu_to_le32(inode->i_ino);
-        ext2_set_de_type (de, inode);
-
-        de = (struct ext2_dir_entry_2 *) (base + EXT2_DIR_REC_LEN(1));
-        de->name_len = 2;
-        de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1));
-        de->inode = cpu_to_le32(parent->i_ino);
-        memcpy (de->name, "..\0", 4);
-        ext2_set_de_type (de, inode);
-
-        err = ext2_commit_chunk(page, 0, chunk_size);
-fail:
-        kunmap(page);
-        unlock_page(page);
-        page_cache_release(page);
-        ENTRY;
-        return err;
-}
-
-/*
- * routine to check that the specified directory is empty (for rmdir)
- */
-int ext2_empty_dir (struct inode * inode)
-{
-        struct page *page = NULL;
-        unsigned long i, npages = dir_pages(inode);
-
-        for (i = 0; i < npages; i++) {
-                char *kaddr;
-                ext2_dirent * de;
-                page = ll_get_dir_page(inode, i);
-
-                if (IS_ERR(page))
-                        continue;
-
-                kaddr = page_address(page);
-                de = (ext2_dirent *)kaddr;
-                kaddr += PAGE_CACHE_SIZE-EXT2_DIR_REC_LEN(1);
-
-                while ((char *)de <= kaddr) {
-                        if (de->inode != 0) {
-                                /* check for . and .. */
-                                if (de->name[0] != '.')
-                                        goto not_empty;
-                                if (de->name_len > 2)
-                                        goto not_empty;
-                                if (de->name_len < 2) {
-                                        if (de->inode !=
-                                            cpu_to_le32(inode->i_ino))
-                                                goto not_empty;
-                                } else if (de->name[1] != '.')
-                                        goto not_empty;
-                        }
-                        de = ext2_next_entry(de);
-                }
-                ext2_put_page(page);
-        }
-        return 1;
-
-not_empty:
-        ext2_put_page(page);
-        return 0;
-}
-
  static int ll_dir_ioctl(struct inode *inode, struct file *file,
                          unsigned int cmd, unsigned long arg)
  {
          struct ll_sb_info *sbi = ll_i2sbi(inode);
          struct obd_ioctl_data *data;
          ENTRY;
+
          CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino,
                 inode->i_generation, inode, cmd);
  
          if (_IOC_TYPE(cmd) == 'T') /* tty ioctls */
                  return -ENOTTY;
  
+        lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_IOCTL);
          switch(cmd) {
          case IOC_MDC_LOOKUP: {
                  struct ptlrpc_request *request = NULL;
@@ -834,9 +436,61 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                  obd_ioctl_freedata(buf, len);
                  return rc;
          }
-        default:
-                CERROR("unrecognized ioctl %#x\n", cmd);
+        case LL_IOC_LOV_SETSTRIPE:
+        case LL_IOC_LOV_GETSTRIPE:
                  RETURN(-ENOTTY);
+        case IOC_MDC_GETSTRIPE: {
+                struct ptlrpc_request *request = NULL;
+                struct ll_fid fid;
+                struct mds_body *body;
+                struct lov_mds_md *lmm;
+                char *filename;
+                int rc, lmmsize;
+
+                filename = getname((const char *)arg);
+                if (IS_ERR(filename))
+                        RETURN(PTR_ERR(filename));
+
+                ll_inode2fid(&fid, inode);
+                rc = mdc_getattr_name(&sbi->ll_mdc_conn, &fid, filename,
+                                      strlen(filename)+1, OBD_MD_FLEASIZE,
+                                      obd_size_diskmd(&sbi->ll_osc_conn, NULL),
+                                      &request);
+                if (rc < 0) {
+                        CERROR("mdc_getattr_name: failed on %s: rc %d\n",
+                               filename, rc);
+                        GOTO(out_name, rc);
+                }
+
+                body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body));
+                LASSERT(body != NULL);         /* checked by mdc_getattr_name */
+                LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */
+
+                lmmsize = body->eadatasize;
+
+                if (!(body->valid & OBD_MD_FLEASIZE) || lmmsize == 0)
+                        GOTO(out_req, rc = -ENODATA);
+
+                if (lmmsize > 4096)
+                        GOTO(out_req, rc = -EFBIG);
+
+                lmm = lustre_msg_buf(request->rq_repmsg, 1, lmmsize);
+                LASSERT(lmm != NULL);
+                LASSERT_REPSWABBED(request, 1);
+
+                rc = copy_to_user((struct lov_mds_md *)arg, lmm, lmmsize);
+                if (rc)
+                        GOTO(out_req, rc = -EFAULT);
+
+                EXIT;
+        out_req:
+                ptlrpc_req_finished(request);
+        out_name:
+                putname(filename);
+                return rc;
+        }
+        default:
+                return obd_iocontrol(cmd,&sbi->ll_osc_conn,0,NULL,(void *)arg);
          }
  }
  
diff --git a/lustre/llite/file.c b/lustre/llite/file.c

index 943ba1b..67d18fd 100644 (file)
--- a/lustre/llite/file.c
+++ b/lustre/llite/file.c
@@ -32,8 +32,7 @@
  #include <linux/lustre_compat25.h>
  #endif
  
-int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
-extern int ll_setattr(struct dentry *de, struct iattr *attr);
+#include "llite_internal.h"
  
  static int ll_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode,
                          struct file *file)
@@ -135,28 +134,21 @@ int ll_file_release(struct inode *inode, struct file *file)
          lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_RELEASE);
          fd = (struct ll_file_data *)file->private_data;
          if (!fd) /* no process opened the file after an mcreate */
-                RETURN(rc = 0);
+                RETURN(0);
  
          /* we might not be able to get a valid handle on this file
           * again so we really want to flush our write cache.. */
-        if (S_ISREG(inode->i_mode)) {
-                filemap_fdatasync(inode->i_mapping);
-                filemap_fdatawait(inode->i_mapping);
-
-                if (lsm != NULL) {
-                        memset(&oa, 0, sizeof(oa));
-                        oa.o_id = lsm->lsm_object_id;
-                        oa.o_mode = S_IFREG;
-                        oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
-
-                        memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE);
-                        oa.o_valid |= OBD_MD_FLHANDLE;
+        if (S_ISREG(inode->i_mode) && lsm) {
+                write_inode_now(inode, 0);
+                obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+                                            OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+                memcpy(obdo_handle(&oa), &fd->fd_ost_och, FD_OSTDATA_SIZE);
+                oa.o_valid |= OBD_MD_FLHANDLE;
  
-                        rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
-                        if (rc)
-                                CERROR("inode %lu object close failed: rc = "
-                                       "%d\n", inode->i_ino, rc);
-                }
+                rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
+                if (rc)
+                        CERROR("inode %lu object close failed: rc %d\n",
+                               inode->i_ino, rc);
          }
  
          rc2 = ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
@@ -206,16 +198,16 @@ static int ll_osc_open(struct lustre_handle *conn, struct inode *inode,
                  RETURN(-ENOMEM);
          oa->o_id = lsm->lsm_object_id;
          oa->o_mode = S_IFREG;
-        oa->o_valid = (OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLBLOCKS |
-                       OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
          rc = obd_open(conn, oa, lsm, NULL, &fd->fd_ost_och);
          if (rc)
                  GOTO(out, rc);
  
          file->f_flags &= ~O_LOV_DELAY_CREATE;
-        obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
-                                 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-
+        obdo_refresh_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
+                                      OBD_MD_FLATIME | OBD_MD_FLMTIME |
+                                      OBD_MD_FLCTIME);
          EXIT;
  out:
          obdo_free(oa);
@@ -236,24 +228,33 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode,
          struct obdo *oa;
          struct iattr iattr;
          struct mdc_op_data op_data;
-        int rc, err, lmm_size = 0;;
+        struct obd_trans_info oti = { 0 };
+        int rc, err, lmm_size = 0;
          ENTRY;
  
          oa = obdo_alloc();
          if (!oa)
                  RETURN(-ENOMEM);
  
+        LASSERT(S_ISREG(inode->i_mode));
          oa->o_mode = S_IFREG | 0600;
          oa->o_id = inode->i_ino;
+        oa->o_generation = inode->i_generation;
          /* Keep these 0 for now, because chown/chgrp does not change the
           * ownership on the OST, and we don't want to allow BA OST NFS
           * users to access these objects by mistake. */
          oa->o_uid = 0;
          oa->o_gid = 0;
-        oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
-                OBD_MD_FLUID | OBD_MD_FLGID;
+        oa->o_valid = OBD_MD_FLID | OBD_MD_FLGENER | OBD_MD_FLTYPE |
+                OBD_MD_FLMODE | OBD_MD_FLUID | OBD_MD_FLGID;
+#ifdef ENABLE_ORPHANS
+        oa->o_valid |= OBD_MD_FLCOOKIE;
+#endif
  
-        rc = obd_create(conn, oa, &lsm, NULL);
+        obdo_from_inode(oa, inode, OBD_MD_FLTYPE|OBD_MD_FLATIME|OBD_MD_FLMTIME|
+                        OBD_MD_FLCTIME | (inode->i_size ? OBD_MD_FLSIZE : 0));
+
+        rc = obd_create(conn, oa, &lsm, &oti);
          if (rc) {
                  CERROR("error creating objects for inode %lu: rc = %d\n",
                         inode->i_ino, rc);
@@ -263,7 +264,7 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode,
                  }
                  GOTO(out_oa, rc);
          }
-        obdo_to_inode(inode, oa, OBD_MD_FLBLKSZ);
+        obdo_refresh_inode(inode, oa, OBD_MD_FLBLKSZ);
  
          LASSERT(lsm && lsm->lsm_object_id);
          rc = obd_packmd(conn, &lmm, lsm);
@@ -278,11 +279,18 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode,
  
          ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
  
-        rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, &op_data,
-                         &iattr, lmm, lmm_size, &req);
+#if 0
+#warning FIXME: next line is for debugging purposes only
+        obd_log_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, oti.oti_numcookies,
+                       oti.oti_logcookies, OBD_LLOG_FL_SENDNOW);
+#endif
+
+        rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, &op_data, &iattr,
+                         lmm, lmm_size, oti.oti_logcookies,
+                         oti.oti_numcookies * sizeof(oti.oti_onecookie), &req);
          ptlrpc_req_finished(req);
  
-        obd_free_diskmd (conn, &lmm);
+        obd_free_diskmd(conn, &lmm);
  
          /* If we couldn't complete mdc_open() and store the stripe MD on the
           * MDS, we need to destroy the objects now or they will be leaked.
@@ -297,13 +305,21 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode,
  
          EXIT;
  out_oa:
+        oti_free_cookies(&oti);
          obdo_free(oa);
          return rc;
  
  out_destroy:
-        obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
          oa->o_id = lsm->lsm_object_id;
-        oa->o_valid |= OBD_MD_FLID;
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
+#if 0
+        err = obd_log_cancel(conn, lsm, oti.oti_numcookies, oti.oti_logcookies,
+                             OBD_LLOG_FL_SENDNOW);
+        if (err)
+                CERROR("error cancelling inode %lu log cookies: rc %d\n",
+                       inode->i_ino, err);
+#endif
          err = obd_destroy(conn, oa, lsm, NULL);
          obd_free_memmd(conn, &lsm);
          if (err)
@@ -327,8 +343,6 @@ out_destroy:
   * before returning in the O_LOV_DELAY_CREATE case and dropping it here
   * or in ll_file_release(), but I'm not sure that is desirable/necessary.
   */
-extern int ll_it_open_error(int phase, struct lookup_intent *it);
-
  int ll_file_open(struct inode *inode, struct file *file)
  {
          struct ll_sb_info *sbi = ll_i2sbi(inode);
@@ -346,9 +360,10 @@ int ll_file_open(struct inode *inode, struct file *file)
          if (inode->i_sb->s_root == file->f_dentry)
                  RETURN(0);
  
+        it = file->f_it;
          lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
-        LL_GET_INTENT(file->f_dentry, it);
-        rc = ll_it_open_error(IT_OPEN_OPEN, it);
+
+        rc = ll_it_open_error(DISP_OPEN_OPEN, it);
          if (rc)
                  RETURN(rc);
  
@@ -363,7 +378,8 @@ int ll_file_open(struct inode *inode, struct file *file)
  
          lsm = lli->lli_smd;
          if (lsm == NULL) {
-                if (file->f_flags & O_LOV_DELAY_CREATE) {
+                if (file->f_flags & O_LOV_DELAY_CREATE ||
+                    !(file->f_mode & FMODE_WRITE)) {
                          CDEBUG(D_INODE, "delaying object creation\n");
                          RETURN(0);
                  }
@@ -418,7 +434,7 @@ int ll_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm,
                  OBD_MD_FLCTIME;
  
          if (ostdata != NULL) {
-                memcpy(&oa.o_inline, ostdata, FD_OSTDATA_SIZE);
+                memcpy(obdo_handle(&oa), ostdata, FD_OSTDATA_SIZE);
                  oa.o_valid |= OBD_MD_FLHANDLE;
          }
  
@@ -455,8 +471,8 @@ int ll_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm,
                   (aft != 0 || after < before) &&
                   oa.o_size < ((u64)before + 1) << PAGE_CACHE_SHIFT);
  
-        obdo_to_inode(inode, &oa, (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
-                                   OBD_MD_FLMTIME | OBD_MD_FLCTIME));
+        obdo_refresh_inode(inode, &oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
+                                       OBD_MD_FLMTIME | OBD_MD_FLCTIME);
          if (inode->i_blksize < PAGE_CACHE_SIZE)
                  inode->i_blksize = PAGE_CACHE_SIZE;
  
@@ -477,102 +493,6 @@ int ll_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm,
          RETURN(0);
  }
  
-/*
- * some callers, notably truncate, really don't want i_size set based
- * on the the size returned by the getattr, or lock acquisition in
- * the future.
- */
-int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
-                   struct lov_stripe_md *lsm,
-                   int mode, struct ldlm_extent *extent,
-                   struct lustre_handle *lockh)
-{
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        int rc, flags = 0;
-        ENTRY;
-
-        LASSERT(lockh->cookie == 0);
-
-        /* XXX phil: can we do this?  won't it screw the file size up? */
-        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
-            (sbi->ll_flags & LL_SBI_NOLCK))
-                RETURN(0);
-
-        CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n",
-               inode->i_ino, extent->start, extent->end);
-
-        rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, extent,
-                         sizeof(extent), mode, &flags, ll_extent_lock_callback,
-                         inode, lockh);
-
-        RETURN(rc);
-}
-
-/*
- * this grabs a lock and manually implements behaviour that makes it look like
- * the OST is returning the file size with each lock acquisition.
- */
-int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
-                   struct lov_stripe_md *lsm, int mode,
-                   struct ldlm_extent *extent, struct lustre_handle *lockh)
-{
-        struct ll_inode_info *lli = ll_i2info(inode);
-        struct ldlm_extent size_lock;
-        struct lustre_handle match_lockh = {0};
-        int flags, rc, matched;
-        ENTRY;
-
-        rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh);
-        if (rc != ELDLM_OK)
-                RETURN(rc);
-
-        if (test_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags))
-                RETURN(0);
-
-        rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL);
-        if (rc) {
-                ll_extent_unlock(fd, inode, lsm, mode, lockh);
-                RETURN(rc);
-        }
-
-        size_lock.start = inode->i_size;
-        size_lock.end = OBD_OBJECT_EOF;
-
-        /* XXX I bet we should be checking the lock ignore flags.. */
-        flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA;
-        matched = obd_match(&ll_i2sbi(inode)->ll_osc_conn, lsm, LDLM_EXTENT,
-                            &size_lock, sizeof(size_lock), LCK_PR, &flags,
-                            inode, &match_lockh);
-
-        /* hey, alright, we hold a size lock that covers the size we
-         * just found, its not going to change for a while.. */
-        if (matched == 1) {
-                set_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags);
-                obd_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, LCK_PR,
-                           &match_lockh);
-        }
-
-        RETURN(0);
-}
-
-int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
-                struct lov_stripe_md *lsm, int mode,
-                struct lustre_handle *lockh)
-{
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        int rc;
-        ENTRY;
-
-        /* XXX phil: can we do this?  won't it screw the file size up? */
-        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
-            (sbi->ll_flags & LL_SBI_NOLCK))
-                RETURN(0);
-
-        rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh);
-
-        RETURN(rc);
-}
-
  static inline void ll_remove_suid(struct inode *inode)
  {
          unsigned int mode;
@@ -591,22 +511,10 @@ static inline void ll_remove_suid(struct inode *inode)
  #if 0
  static void ll_update_atime(struct inode *inode)
  {
-#ifdef USE_ATIME
-        struct iattr attr;
-
-        attr.ia_atime = LTIME_S(CURRENT_TIME);
-        attr.ia_valid = ATTR_ATIME;
-
-        if (inode->i_atime == attr.ia_atime) return;
          if (IS_RDONLY(inode)) return;
-        if (IS_NOATIME(inode)) return;
  
-        /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */
-        ll_inode_setattr(inode, &attr, 0);
-#else
          /* update atime, but don't explicitly write it out just this change */
          inode->i_atime = CURRENT_TIME;
-#endif
  }
  #endif
  
@@ -676,19 +584,19 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
  
          /* start writeback on dirty pages in the extent when its PW */
          for (i = start, j = start % count;
-                        lock->l_granted_mode == LCK_PW && i < end; j++, i++) {
+             lock->l_granted_mode == LCK_PW && i < end; j++, i++) {
                  if (j == count) {
                          i += skip;
                          j = 0;
                  }
                  /* its unlikely, but give us a chance to bail when we're out */
-                PGCACHE_WRLOCK(inode->i_mapping);
+                ll_pgcache_lock(inode->i_mapping);
                  if (list_empty(&inode->i_mapping->dirty_pages)) {
                          CDEBUG(D_INODE, "dirty list empty\n");
-                        PGCACHE_WRUNLOCK(inode->i_mapping);
+                        ll_pgcache_unlock(inode->i_mapping);
                          break;
                  }
-                PGCACHE_WRUNLOCK(inode->i_mapping);
+                ll_pgcache_unlock(inode->i_mapping);
  
                  if (need_resched())
                          schedule();
@@ -702,10 +610,10 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
                  }
                  if (PageDirty(page)) {
                          CDEBUG(D_INODE, "writing page %p\n", page);
-                        PGCACHE_WRLOCK(inode->i_mapping);
+                        ll_pgcache_lock(inode->i_mapping);
                          list_del(&page->list);
                          list_add(&page->list, &inode->i_mapping->locked_pages);
-                        PGCACHE_WRUNLOCK(inode->i_mapping);
+                        ll_pgcache_unlock(inode->i_mapping);
  
                          /* this writepage might write out pages outside
                           * this extent, but that's ok, the pages are only
@@ -730,19 +638,19 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
          LASSERT((extent->start & ~PAGE_CACHE_MASK) == 0);
          LASSERT(((extent->end+1) & ~PAGE_CACHE_MASK) == 0);
          for (i = start, j = start % count ; i < end ; j++, i++) {
-                if ( j == count ) {
+                if (j == count) {
                          i += skip;
                          j = 0;
                  }
-                PGCACHE_WRLOCK(inode->i_mapping);
+                ll_pgcache_lock(inode->i_mapping);
                  if (list_empty(&inode->i_mapping->dirty_pages) &&
                       list_empty(&inode->i_mapping->clean_pages) &&
                       list_empty(&inode->i_mapping->locked_pages)) {
                          CDEBUG(D_INODE, "nothing left\n");
-                        PGCACHE_WRUNLOCK(inode->i_mapping);
+                        ll_pgcache_unlock(inode->i_mapping);
                          break;
                  }
-                PGCACHE_WRUNLOCK(inode->i_mapping);
+                ll_pgcache_unlock(inode->i_mapping);
                  if (need_resched())
                          schedule();
                  page = find_get_page(inode->i_mapping, i);
@@ -755,15 +663,16 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
                          truncate_complete_page(page);
  #else
                          truncate_complete_page(page->mapping, page);
-#endif                
+#endif
                  unlock_page(page);
                  page_cache_release(page);
          }
          EXIT;
  }
  
-int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
-                            void *data, int flag)
+static int ll_extent_lock_callback(struct ldlm_lock *lock,
+                                   struct ldlm_lock_desc *new, void *data,
+                                   int flag)
  {
          struct inode *inode = data;
          struct ll_inode_info *lli = ll_i2info(inode);
@@ -771,7 +680,10 @@ int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
          int rc;
          ENTRY;
  
-        LASSERT(inode != NULL);
+        if ((unsigned long)inode < 0x1000) {
+                LDLM_ERROR(lock, "cancelling lock with bad data %p", data);
+                LBUG();
+        }
  
          switch (flag) {
          case LDLM_CB_BLOCKING:
@@ -785,9 +697,15 @@ int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
                   * could know to write-back or simply throw away the pages
                   * based on if the cancel comes from a desire to, say,
                   * read or truncate.. */
-                LASSERT((unsigned long)inode > 0x1000);
-                LASSERT((unsigned long)lli > 0x1000);
-                LASSERT((unsigned long)lli->lli_smd > 0x1000);
+                if ((unsigned long)lli->lli_smd < 0x1000) {
+                        /* note that lli is part of the inode itself, so it
+                         * is valid if as checked the inode pointer above. */
+                        CERROR("inode %lu, sb %p, lli %p, lli_smd %p\n",
+                               inode->i_ino, inode->i_sb, lli, lli->lli_smd);
+                        LDLM_ERROR(lock, "cancel lock on bad inode %p", inode);
+                        LBUG();
+                }
+
                  ll_pgcache_remove_extent(inode, lli->lli_smd, lock);
                  break;
          default:
@@ -797,6 +715,102 @@ int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
          RETURN(0);
  }
  
+/*
+ * some callers, notably truncate, really don't want i_size set based
+ * on the the size returned by the getattr, or lock acquisition in
+ * the future.
+ */
+int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
+                   struct lov_stripe_md *lsm,
+                   int mode, struct ldlm_extent *extent,
+                   struct lustre_handle *lockh)
+{
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        int rc, flags = 0;
+        ENTRY;
+
+        LASSERT(lockh->cookie == 0);
+
+        /* XXX phil: can we do this?  won't it screw the file size up? */
+        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
+            (sbi->ll_flags & LL_SBI_NOLCK))
+                RETURN(0);
+
+        CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n",
+               inode->i_ino, extent->start, extent->end);
+
+        rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, extent,
+                         sizeof(extent), mode, &flags, ll_extent_lock_callback,
+                         inode, lockh);
+
+        RETURN(rc);
+}
+
+/*
+ * this grabs a lock and manually implements behaviour that makes it look like
+ * the OST is returning the file size with each lock acquisition.
+ */
+int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
+                   struct lov_stripe_md *lsm, int mode,
+                   struct ldlm_extent *extent, struct lustre_handle *lockh)
+{
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct ldlm_extent size_lock;
+        struct lustre_handle match_lockh = {0};
+        int flags, rc, matched;
+        ENTRY;
+
+        rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh);
+        if (rc != ELDLM_OK)
+                RETURN(rc);
+
+        if (test_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags))
+                RETURN(0);
+
+        rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL);
+        if (rc) {
+                ll_extent_unlock(fd, inode, lsm, mode, lockh);
+                RETURN(rc);
+        }
+
+        size_lock.start = inode->i_size;
+        size_lock.end = OBD_OBJECT_EOF;
+
+        /* XXX I bet we should be checking the lock ignore flags.. */
+        flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA;
+        matched = obd_match(&ll_i2sbi(inode)->ll_osc_conn, lsm, LDLM_EXTENT,
+                            &size_lock, sizeof(size_lock), LCK_PR, &flags,
+                            inode, &match_lockh);
+
+        /* hey, alright, we hold a size lock that covers the size we
+         * just found, its not going to change for a while.. */
+        if (matched == 1) {
+                set_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags);
+                obd_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, LCK_PR,
+                           &match_lockh);
+        }
+
+        RETURN(0);
+}
+
+int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
+                struct lov_stripe_md *lsm, int mode,
+                struct lustre_handle *lockh)
+{
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        int rc;
+        ENTRY;
+
+        /* XXX phil: can we do this?  won't it screw the file size up? */
+        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
+            (sbi->ll_flags & LL_SBI_NOLCK))
+                RETURN(0);
+
+        rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh);
+
+        RETURN(rc);
+}
+
  static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
                              loff_t *ppos)
  {
@@ -819,6 +833,10 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
  
          lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_READ_BYTES,
                              count);
+
+        if (!lsm)
+                RETURN(0);
+
          /* grab a -> eof extent to push extending writes out of node's caches
           * so we can see them at the getattr after lock acquisition.  this will
           * turn into a seperate [*ppos + count, EOF] 'size intent' lock attempt
@@ -852,8 +870,8 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
  /*
   * Write to a file (through the page cache).
   */
-static ssize_t
-ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
+static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
+                             loff_t *ppos)
  {
          struct ll_file_data *fd = file->private_data;
          struct inode *inode = file->f_dentry->d_inode;
@@ -868,6 +886,7 @@ ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
          CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
                 inode->i_ino, inode->i_generation, inode, count, *ppos);
  
+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
          /*
           * sleep doing some writeback work of this mount's dirty data
           * if the VM thinks we're low on memory.. other dirtying code
@@ -875,12 +894,14 @@ ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
           * careful not to hold locked pages while they do so.  like
           * ll_prepare_write.  *cough*
           */
-        LL_CHECK_DIRTY(inode->i_sb);
+        ll_check_dirty(inode->i_sb);
  
          /* POSIX, but surprised the VFS doesn't check this already */
          if (count == 0)
                  RETURN(0);
  
+        LASSERT(lsm);
+
          if (file->f_flags & O_APPEND) {
                  extent.start = 0;
                  extent.end = OBD_OBJECT_EOF;
@@ -943,7 +964,8 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file,
          lsm = lli->lli_smd;
          if (lsm) {
                  up(&lli->lli_open_sem);
-                CERROR("stripe already exists for ino %lu\n", inode->i_ino);
+                CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
+                       inode->i_ino);
                  /* If we haven't already done the open, do so now */
                  if (file->f_flags & O_LOV_DELAY_CREATE) {
                          int rc2 = ll_osc_open(conn, inode, file, lsm);
@@ -987,6 +1009,7 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
          struct ll_file_data *fd = file->private_data;
          struct lustre_handle *conn;
          int flags;
+
          CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino,
                 inode->i_generation, inode, cmd);
  
@@ -1077,8 +1100,8 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
  
  int ll_fsync(struct file *file, struct dentry *dentry, int data)
  {
-        int ret;
          struct inode *inode = dentry->d_inode;
+        int rc;
          ENTRY;
          CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
                 inode->i_generation, inode);
@@ -1090,17 +1113,17 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data)
           * still holding the PW lock that covered the dirty pages.  XXX we
           * should probably get a reference on it, though, just to be clear.
           */
-        ret = filemap_fdatasync(dentry->d_inode->i_mapping);
-        if ( ret == 0 )
-                ret = filemap_fdatawait(dentry->d_inode->i_mapping);
+        rc = filemap_fdatasync(inode->i_mapping);
+        if (rc == 0)
+                rc = filemap_fdatawait(inode->i_mapping);
  
-        RETURN(ret);
+        RETURN(rc);
  }
  
-int ll_inode_revalidate(struct dentry *dentry)
+int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
  {
          struct inode *inode = dentry->d_inode;
-        struct lov_stripe_md *lsm = NULL;
+        struct lov_stripe_md *lsm;
          ENTRY;
  
          if (!inode) {
@@ -1118,70 +1141,41 @@ int ll_inode_revalidate(struct dentry *dentry)
             below when the lock is marked CB_PENDING.  That RPC may not
             go out because someone else may be in another RPC waiting for
             that lock*/
-        if (!(dentry->d_it && dentry->d_it->it_lock_mode) &&
-            !ll_have_md_lock(dentry)) {
+        if (!(it && it->it_lock_mode) && !ll_have_md_lock(dentry)) {
+                struct lustre_md md;
                  struct ptlrpc_request *req = NULL;
                  struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
                  struct ll_fid fid;
-                struct mds_body *body;
-                struct lov_mds_md *lmm;
                  unsigned long valid = 0;
-                int eadatalen = 0, rc;
+                int rc;
+                int ealen = 0;
  
-                /* Why don't we update all valid MDS fields here, if we're
-                 * doing an RPC anyways?  -phil */
                  if (S_ISREG(inode->i_mode)) {
-                        eadatalen = obd_size_diskmd(&sbi->ll_osc_conn, NULL);
+                        ealen = obd_size_diskmd(&sbi->ll_osc_conn, NULL);
                          valid |= OBD_MD_FLEASIZE;
                  }
                  ll_inode2fid(&fid, inode);
-                rc = mdc_getattr(&sbi->ll_mdc_conn, &fid,
-                                 valid, eadatalen, &req);
+                rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, valid, ealen, &req);
                  if (rc) {
                          CERROR("failure %d inode %lu\n", rc, inode->i_ino);
                          RETURN(-abs(rc));
                  }
-
-                body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
-                LASSERT (body != NULL);         /* checked by mdc_getattr() */
-                LASSERT_REPSWABBED (req, 0);    /* swabbed by mdc_getattr() */
-
-                if (S_ISREG(inode->i_mode) &&
-                    (body->valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))) {
-                        CERROR("MDS sent back size for regular file\n");
-                        body->valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
-                }
+                rc = mdc_req2lustre_md(req, 0, &sbi->ll_osc_conn, &md);
  
                  /* XXX Too paranoid? */
-                if ((body->valid ^ valid) & OBD_MD_FLEASIZE)
+                if ((md.body->valid ^ valid) & OBD_MD_FLEASIZE)
                          CERROR("Asked for %s eadata but got %s\n",
                                 (valid & OBD_MD_FLEASIZE) ? "some" : "no",
-                               (body->valid & OBD_MD_FLEASIZE) ? "some":"none");
-
-                if (S_ISREG(inode->i_mode) &&
-                    (body->valid & OBD_MD_FLEASIZE)) {
-                        if (body->eadatasize == 0) { /* no EA data */
-                                CERROR("OBD_MD_FLEASIZE set but no data\n");
-                                RETURN(-EPROTO);
-                        }
-                        /* Only bother with this if inode's lsm not set? */
-                        lmm = lustre_msg_buf(req->rq_repmsg,1,body->eadatasize);
-                        LASSERT(lmm != NULL);       /* mdc_getattr() checked */
-                        LASSERT_REPSWABBED(req, 1); /* mdc_getattr() swabbed */
-
-                        rc = obd_unpackmd (&sbi->ll_osc_conn,
-                                           &lsm, lmm, body->eadatasize);
-                        if (rc < 0) {
-                                CERROR("Error %d unpacking eadata\n", rc);
-                                ptlrpc_req_finished(req);
-                                RETURN(rc);
-                        }
-                        LASSERT(rc >= sizeof(*lsm));
+                               (md.body->valid & OBD_MD_FLEASIZE) ? "some":
+                               "none");
+                if (rc) {
+                        ptlrpc_req_finished(req);
+                        RETURN(rc);
                  }
  
-                ll_update_inode(inode, body, lsm);
-                if (lsm != NULL && ll_i2info(inode)->lli_smd != lsm)
-                        obd_free_memmd(&sbi->ll_osc_conn, &lsm);
+                ll_update_inode(inode, md.body, md.lsm);
+                if (md.lsm != NULL && ll_i2info(inode)->lli_smd != md.lsm)
+                        obd_free_memmd(&sbi->ll_osc_conn, &md.lsm);
  
                  ptlrpc_req_finished(req);
          }
@@ -1211,19 +1205,20 @@ int ll_inode_revalidate(struct dentry *dentry)
  }
  
  #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-static int ll_getattr(struct vfsmount *mnt, struct dentry *de,
+int ll_getattr(struct vfsmount *mnt, struct dentry *de,
+                      struct lookup_intent *it, 
                        struct kstat *stat)
  {
          int res = 0;
          struct inode *inode = de->d_inode;
  
+        res = ll_inode_revalidate_it(de, it);
          lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_GETATTR);
-        res = ll_inode_revalidate(de);
+
          if (res)
                  return res;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        stat->dev = inode->i_dev;
-#endif
+
+        stat->dev = inode->i_sb->s_dev;
          stat->ino = inode->i_ino;
          stat->mode = inode->i_mode;
          stat->nlink = inode->i_nlink;
@@ -1234,6 +1229,8 @@ static int ll_getattr(struct vfsmount *mnt, struct dentry *de,
          stat->mtime = inode->i_mtime;
          stat->ctime = inode->i_ctime;
          stat->size = inode->i_size;
+        stat->blksize = inode->i_blksize;
+        stat->blocks = inode->i_blocks;
          return 0;
  }
  #endif
@@ -1254,9 +1251,9 @@ struct inode_operations ll_file_inode_operations = {
          setattr:    ll_setattr,
          truncate:   ll_truncate,
  #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-        getattr: ll_getattr,
+        getattr_it: ll_getattr,
  #else
-        revalidate: ll_inode_revalidate,
+        revalidate_it: ll_inode_revalidate_it,
  #endif
  };
  
@@ -1264,8 +1261,8 @@ struct inode_operations ll_special_inode_operations = {
          setattr_raw:    ll_setattr_raw,
          setattr:    ll_setattr,
  #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-        getattr:    ll_getattr,
+        getattr_it:    ll_getattr,
  #else
-        revalidate: ll_inode_revalidate,
+        revalidate_it: ll_inode_revalidate_it,
  #endif
  };
diff --git a/lustre/llite/iod.c b/lustre/llite/iod.c

index e3fabe6..c30ef8a 100644 (file)
--- a/lustre/llite/iod.c
+++ b/lustre/llite/iod.c
@@ -38,7 +38,6 @@
  #include <linux/rbtree.h>
  #include <linux/seq_file.h>
  #include <linux/time.h>
-#include "llite_internal.h"
  
  /* PG_inactive_clean is shorthand for rmap, we want free_high/low here.. */
  #ifdef PG_inactive_clean
@@ -47,6 +46,7 @@
  
  #define DEBUG_SUBSYSTEM S_LLITE
  #include <linux/lustre_lite.h>
+#include "llite_internal.h"
  
  #ifndef list_for_each_prev_safe
  #define list_for_each_prev_safe(pos, n, head) \
@@ -56,11 +56,6 @@
  
  extern spinlock_t inode_lock;
  
-struct ll_writeback_pages {
-        obd_count npgs, max;
-        struct brw_page *pga;
-};
-
  /*
   * check to see if we're racing with truncate and put the page in
   * the brw_page array.  returns 0 if there is more room and 1
@@ -139,13 +134,13 @@ static void ll_get_dirty_pages(struct inode *inode,
                  list_del(&page->list);
                  list_add(&page->list, &mapping->locked_pages);
  
-                if ( ! PageDirty(page) ) {
+                if (!PageDirty(page)) {
                          unlock_page(page);
                          continue;
                  }
                  ClearPageDirty(page);
  
-                if ( llwp_consume_page(llwp, inode, page) != 0)
+                if (llwp_consume_page(llwp, inode, page) != 0)
                          break;
          }
  
@@ -153,26 +148,31 @@ static void ll_get_dirty_pages(struct inode *inode,
          EXIT;
  }
  
-static void ll_writeback(struct inode *inode, struct ll_writeback_pages *llwp)
+static void ll_writeback(struct inode *inode, struct obdo *oa,
+                         struct ll_writeback_pages *llwp)
  {
-        int rc, i;
          struct ptlrpc_request_set *set;
+        int rc, i;
          ENTRY;
  
          CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),bytes=%u\n",
                 inode->i_ino, inode->i_generation, inode,
                 ((llwp->npgs-1) << PAGE_SHIFT) + llwp->pga[llwp->npgs-1].count);
  
+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
          set = ptlrpc_prep_set();
          if (set == NULL) {
                  CERROR ("Can't create request set\n");
                  rc = -ENOMEM;
          } else {
-                rc = obd_brw_async(OBD_BRW_WRITE, ll_i2obdconn(inode),
+                rc = obd_brw_async(OBD_BRW_WRITE, ll_i2obdconn(inode), oa,
                                     ll_i2info(inode)->lli_smd, llwp->npgs,
                                     llwp->pga, set, NULL);
                  if (rc == 0)
-                        rc = ptlrpc_set_wait (set);
+                        rc = ptlrpc_set_wait(set);
+                if (rc == 0)
+                        obdo_refresh_inode(inode, oa,
+                                           oa->o_valid & ~OBD_MD_FLSIZE);
                  ptlrpc_set_destroy (set);
          }
          /*
@@ -278,6 +278,7 @@ int ll_check_dirty(struct super_block *sb)
          unsigned long old_flags; /* hack? */
          int making_progress;
          struct inode *inode;
+        struct obdo oa;
          int rc = 0;
          ENTRY;
  
@@ -328,12 +329,18 @@ int ll_check_dirty(struct super_block *sb)
                          llwp.npgs = 0;
                          ll_get_dirty_pages(inode, &llwp);
                          if (llwp.npgs) {
-                               lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
-                                                   LPROC_LL_WB_PRESSURE,
-                                                   llwp.npgs);
-                               ll_writeback(inode, &llwp);
-                               rc += llwp.npgs;
-                               making_progress = 1;
+                                oa.o_id =
+                                      ll_i2info(inode)->lli_smd->lsm_object_id;
+                                oa.o_valid = OBD_MD_FLID;
+                                obdo_from_inode(&oa, inode,
+                                                OBD_MD_FLTYPE | OBD_MD_FLATIME|
+                                                OBD_MD_FLMTIME| OBD_MD_FLCTIME);
+                                lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
+                                                    LPROC_LL_WB_PRESSURE,
+                                                    llwp.npgs);
+                                ll_writeback(inode, &oa, &llwp);
+                                rc += llwp.npgs;
+                                making_progress = 1;
                          }
                  } while (llwp.npgs && should_writeback());
  
@@ -382,13 +389,14 @@ cleanup:
  }
  #endif /* linux 2.5 */
  
-int ll_batch_writepage(struct inode *inode, struct page *page)
+int ll_batch_writepage(struct inode *inode, struct obdo *oa, struct page *page)
  {
          unsigned long old_flags; /* hack? */
          struct ll_writeback_pages llwp;
          int rc = 0;
          ENTRY;
  
+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
          old_flags = current->flags;
          current->flags |= PF_MEMALLOC;
          rc = ll_alloc_brw(inode, &llwp);
@@ -401,7 +409,7 @@ int ll_batch_writepage(struct inode *inode, struct page *page)
          if (llwp.npgs) {
                  lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                      LPROC_LL_WB_WRITEPAGE, llwp.npgs);
-                ll_writeback(inode, &llwp);
+                ll_writeback(inode, oa, &llwp);
          }
          kfree(llwp.pga);
  
diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h

index 4684383..fd37709 100644 (file)
--- a/lustre/llite/llite_internal.h
+++ b/lustre/llite/llite_internal.h
@@ -10,11 +10,49 @@
  #ifndef LLITE_INTERNAL_H
  #define LLITE_INTERNAL_H
  
+
+struct ll_sb_info;
  struct lustre_handle;
  struct lov_stripe_md;
  
+extern void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
+extern struct proc_dir_entry *proc_lustre_fs_root;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# define hlist_del_init list_del_init
+#endif 
+
+static inline struct inode *ll_info2i(struct ll_inode_info *lli)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+        return &lli->lli_vfs_inode;
+#else
+        return list_entry(lli, struct inode, u.generic_ip);
+#endif
+}
+
+/* llite/commit_callback.c */
+int ll_commitcbd_setup(struct ll_sb_info *);
+int ll_commitcbd_cleanup(struct ll_sb_info *);
+
+/* lproc_llite.c */
+int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
+                                struct super_block *sb, char *osc, char *mdc);
+void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
+
+/* llite/namei.c */
+struct inode *ll_iget(struct super_block *sb, ino_t hash,
+                      struct lustre_md *lic);
+struct dentry *ll_find_alias(struct inode *, struct dentry *);
+int ll_it_open_error(int phase, struct lookup_intent *it);
  int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
                           int flags, void *opaque);
+
+/* llite/rw.c */
+void ll_end_writeback(struct inode *, struct page *);
+
+void ll_remove_dirty(struct inode *inode, unsigned long start,
+                     unsigned long end);
  int ll_rd_dirty_pages(char *page, char **start, off_t off, int count,
                        int *eof, void *data);
  int ll_rd_max_dirty_pages(char *page, char **start, off_t off, int count,
@@ -26,4 +64,96 @@ int ll_clear_dirty_pages(struct lustre_handle *conn, struct lov_stripe_md *lsm,
  int ll_mark_dirty_page(struct lustre_handle *conn, struct lov_stripe_md *lsm,
                         unsigned long index);
  
+/* llite/file.c */
+extern int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *);
+
+/* llite/super.c */
+int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
+int ll_setattr(struct dentry *de, struct iattr *attr);
+
+/* iod.c */
+#define IO_STAT_ADD(FIS, STAT, VAL) do {        \
+        struct file_io_stats *_fis_ = (FIS);    \
+        spin_lock(&_fis_->fis_lock);            \
+        _fis_->fis_##STAT += VAL;               \
+        spin_unlock(&_fis_->fis_lock);          \
+} while (0)
+
+#define INODE_IO_STAT_ADD(INODE, STAT, VAL)        \
+        IO_STAT_ADD(&ll_i2sbi(INODE)->ll_iostats, STAT, VAL)
+
+#define PAGE_IO_STAT_ADD(PAGE, STAT, VAL)               \
+        INODE_IO_STAT_ADD((PAGE)->mapping, STAT, VAL)
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+/* XXX lliod needs more work in 2.5 before being proven and brought back
+ * to 2.4, it'll at least require a patch to introduce page->private */
+int lliod_start(struct ll_sb_info *sbi, struct inode *inode);
+void lliod_stop(struct ll_sb_info *sbi);
+#else
+#define lliod_start(sbi, inode) ({int _ret = 0; (void)sbi, (void)inode; _ret;})
+#define lliod_stop(sbi) do { (void)sbi; } while (0)
+#endif
+void lliod_wakeup(struct inode *inode);
+void lliod_give_plist(struct inode *inode, struct plist *plist, int rw);
+void lliod_give_page(struct inode *inode, struct page *page, int rw);
+void plist_init(struct plist *plist); /* for lli initialization.. */
+
+void ll_lldo_init(struct ll_dirty_offsets *lldo);
+void ll_record_dirty(struct inode *inode, unsigned long offset);
+void ll_remove_dirty(struct inode *inode, unsigned long start,
+                     unsigned long end);
+int ll_find_dirty(struct ll_dirty_offsets *lldo, unsigned long *start,
+                  unsigned long *end);
+int ll_farthest_dirty(struct ll_dirty_offsets *lldo, unsigned long *farthest);
+
+
+/* llite/super25.c */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+int ll_getattr(struct vfsmount *mnt, struct dentry *de,
+               struct lookup_intent *it, 
+               struct kstat *stat);
+#endif
+
+
+/* llite/dcache.c */
+void ll_intent_release(struct lookup_intent *);
+extern void ll_set_dd(struct dentry *de);
+void ll_unhash_aliases(struct inode *);
+
+/* llite/rw.c */
+void ll_truncate(struct inode *inode);
+void ll_end_writeback(struct inode *inode, struct page *page);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+int ll_check_dirty(struct super_block *sb);
+int ll_batch_writepage(struct inode *inode, struct obdo *oa, struct page *page);
+#else
+#define ll_check_dirty(SB) do { (void)SB; } while (0)
+#endif
+
+/* llite/llite_lib.c */
+
+extern struct super_operations ll_super_operations;
+
+char *ll_read_opt(const char *opt, char *data);
+int ll_set_opt(const char *opt, char *data, int fl);
+void ll_options(char *options, char **ost, char **mds, int *flags);
+void ll_lli_init(struct ll_inode_info *lli);
+int ll_fill_super(struct super_block *sb, void *data, int silent);
+void ll_put_super(struct super_block *sb);
+void ll_clear_inode(struct inode *inode);
+int ll_attr2inode(struct inode *inode, struct iattr *attr, int trunc);
+int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
+int ll_setattr_raw(struct inode *inode, struct iattr *attr);
+int ll_setattr(struct dentry *de, struct iattr *attr);
+int ll_statfs(struct super_block *sb, struct kstatfs *sfs);
+void ll_update_inode(struct inode *inode, struct mds_body *body,
+                     struct lov_stripe_md *lsm);
+int it_disposition(struct lookup_intent *it, int flag);
+void it_set_disposition(struct lookup_intent *it, int flag);
+void ll_read_inode2(struct inode *inode, void *opaque);
+void ll_umount_begin(struct super_block *sb);
+
+
+
  #endif /* LLITE_INTERNAL_H */
diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c

index 42fea4b..8908d44 100644 (file)
--- a/lustre/llite/lproc_llite.c
+++ b/lustre/llite/lproc_llite.c
@@ -22,15 +22,13 @@
  #define DEBUG_SUBSYSTEM S_LLITE
  
  #include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
  #include <linux/lustre_lite.h>
  #include <linux/lprocfs_status.h>
  
  #include "llite_internal.h"
  
  /* /proc/lustre/llite mount point registration */
+struct proc_dir_entry *proc_lustre_fs_root;
  
  #ifndef LPROCFS
  int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
@@ -41,36 +39,113 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
  void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi){}
  #else
  
-#define LPROC_LLITE_STAT_FCT(fct_name, get_statfs_fct)                    \
-int fct_name(char *page, char **start, off_t off,                         \
-             int count, int *eof, void *data)                             \
-{                                                                         \
-        struct statfs sfs;                                                \
-        int rc;                                                           \
-        LASSERT(data != NULL);                                            \
-        rc = get_statfs_fct((struct super_block*)data, &sfs);             \
-        return (rc==0                                                     \
-                ? lprocfs_##fct_name (page, start, off, count, eof, &sfs) \
-                : rc);                                                    \
+long long mnt_instance;
+
+static int ll_rd_blksize(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
+{
+        struct super_block *sb = (struct super_block *)data;
+        struct obd_statfs osfs;
+        int rc;
+
+        LASSERT(sb != NULL);
+        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        if (!rc) {
+              *eof = 1;
+              rc = snprintf(page, count, "%u\n", osfs.os_bsize);
+        }
+
+        return rc;
  }
  
-long long mnt_instance;
+static int ll_rd_kbytestotal(char *page, char **start, off_t off, int count,
+                             int *eof, void *data)
+{
+        struct super_block *sb = (struct super_block *)data;
+        struct obd_statfs osfs;
+        int rc;
+
+        LASSERT(sb != NULL);
+        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        if (!rc) {
+                __u32 blk_size = osfs.os_bsize >> 10;
+                __u64 result = osfs.os_blocks;
+
+                while (blk_size >>= 1)
+                        result <<= 1;
+
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", result);
+        }
+        return rc;
+
+}
+
+static int ll_rd_kbytesfree(char *page, char **start, off_t off, int count,
+                            int *eof, void *data)
+{
+        struct super_block *sb = (struct super_block *)data;
+        struct obd_statfs osfs;
+        int rc;
  
-LPROC_LLITE_STAT_FCT(rd_blksize,     vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_kbytestotal, vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_kbytesfree,  vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_filestotal,  vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_filesfree,   vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_filegroups,  vfs_statfs);
+        LASSERT(sb != NULL);
+        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        if (!rc) {
+                __u32 blk_size = osfs.os_bsize >> 10;
+                __u64 result = osfs.os_bfree;
+
+                while (blk_size >>= 1)
+                        result <<= 1;
+
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", result);
+        }
+        return rc;
+}
+
+static int ll_rd_filestotal(char *page, char **start, off_t off, int count,
+                            int *eof, void *data)
+{
+        struct super_block *sb = (struct super_block *)data;
+        struct obd_statfs osfs;
+        int rc;
+
+        LASSERT(sb != NULL);
+        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        if (!rc) {
+                 *eof = 1;
+                 rc = snprintf(page, count, LPU64"\n", osfs.os_files);
+        }
+        return rc;
+}
  
-int rd_path(char *page, char **start, off_t off, int count, int *eof,
-            void *data)
+static int ll_rd_filesfree(char *page, char **start, off_t off, int count,
+                           int *eof, void *data)
+{
+        struct super_block *sb = (struct super_block *)data;
+        struct obd_statfs osfs;
+        int rc;
+
+        LASSERT(sb != NULL);
+        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        if (!rc) {
+                 *eof = 1;
+                 rc = snprintf(page, count, LPU64"\n", osfs.os_ffree);
+        }
+        return rc;
+
+}
+
+#if 0
+static int ll_rd_path(char *page, char **start, off_t off, int count, int *eof,
+                      void *data)
  {
          return 0;
  }
+#endif
  
-int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
-              void *data)
+static int ll_rd_fstype(char *page, char **start, off_t off, int count,
+                        int *eof, void *data)
  {
          struct super_block *sb = (struct super_block*)data;
  
@@ -79,8 +154,8 @@ int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
          return snprintf(page, count, "%s\n", sb->s_type->name);
  }
  
-int rd_sb_uuid(char *page, char **start, off_t off, int count, int *eof,
-               void *data)
+static int ll_rd_sb_uuid(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
  {
          struct super_block *sb = (struct super_block *)data;
  
@@ -89,18 +164,20 @@ int rd_sb_uuid(char *page, char **start, off_t off, int count, int *eof,
          return snprintf(page, count, "%s\n", ll_s2sbi(sb)->ll_sb_uuid.uuid);
  }
  
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",        rd_sb_uuid,     0, 0 },
-        { "mntpt_path",  rd_path,        0, 0 },
-        { "fstype",      rd_fstype,      0, 0 },
-        { "blocksize",   rd_blksize,     0, 0 },
-        { "kbytestotal", rd_kbytestotal, 0, 0 },
-        { "kbytesfree",  rd_kbytesfree,  0, 0 },
-        { "filestotal",  rd_filestotal,  0, 0 },
-        { "filesfree",   rd_filesfree,   0, 0 },
-        { "filegroups",  rd_filegroups,  0, 0 },
-        { "dirty_pages", ll_rd_dirty_pages, 0, 0},
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",         ll_rd_sb_uuid,          0, 0 },
+        //{ "mntpt_path",   ll_rd_path,             0, 0 },
+        { "fstype",       ll_rd_fstype,           0, 0 },
+        { "blocksize",    ll_rd_blksize,          0, 0 },
+        { "kbytestotal",  ll_rd_kbytestotal,      0, 0 },
+        { "kbytesfree",   ll_rd_kbytesfree,       0, 0 },
+        { "filestotal",   ll_rd_filestotal,       0, 0 },
+        { "filesfree",    ll_rd_filesfree,        0, 0 },
+        //{ "filegroups",   lprocfs_rd_filegroups,  0, 0 },
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        { "dirty_pages",  ll_rd_dirty_pages,      0, 0},
          { "max_dirty_pages", ll_rd_max_dirty_pages, ll_wr_max_dirty_pages, 0},
+#endif
          { 0 }
  };
  
diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c

index da6e670..b9223e8 100644 (file)
--- a/lustre/llite/namei.c
+++ b/lustre/llite/namei.c
@@ -41,74 +41,29 @@
  #include <linux/obd_support.h>
  #include <linux/lustre_lite.h>
  #include <linux/lustre_dlm.h>
-
-/* from dcache.c */
-extern void ll_set_dd(struct dentry *de);
-
-/* from super.c */
-extern void ll_change_inode(struct inode *inode);
-extern int ll_setattr(struct dentry *de, struct iattr *attr);
-
-/* from dir.c */
-extern int ll_add_link (struct dentry *dentry, struct inode *inode);
-obd_id ll_inode_by_name(struct inode * dir, struct dentry *dentry, int *typ);
-int ext2_make_empty(struct inode *inode, struct inode *parent);
-struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
-                   struct dentry *dentry, struct page ** res_page);
-int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page );
-int ext2_empty_dir (struct inode * inode);
-struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p);
-void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
-                   struct page *page, struct inode *inode);
-
-/*
- * Couple of helper functions - make the code slightly cleaner.
- */
-static inline void ext2_inc_count(struct inode *inode)
-{
-        inode->i_nlink++;
-}
-
-/* postpone the disk update until the inode really goes away */
-static inline void ext2_dec_count(struct inode *inode)
-{
-        inode->i_nlink--;
-}
-static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
-{
-        int err;
-        err = ll_add_link(dentry, inode);
-        if (!err) {
-                d_instantiate(dentry, inode);
-                return 0;
-        }
-        ext2_dec_count(inode);
-        iput(inode);
-        return err;
-}
+#include "llite_internal.h"
  
  /* methods */
  
  #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-static int ll_find_inode(struct inode *inode, unsigned long ino, void *opaque)
+static int ll_test_inode(struct inode *inode, unsigned long ino, void *opaque)
  #else
  static int ll_test_inode(struct inode *inode, void *opaque)
  #endif
  {
-        struct ll_read_inode2_cookie *lic = opaque;
-        struct mds_body *body = lic->lic_body;
+        struct lustre_md *md = opaque;
  
-        if (!(lic->lic_body->valid & (OBD_MD_FLGENER | OBD_MD_FLID)))
+        if (!(md->body->valid & (OBD_MD_FLGENER | OBD_MD_FLID)))
                  CERROR("invalid generation\n");
-        CDEBUG(D_VFSTRACE, "comparing inode %p ino %lu/%u to body %lu/%u\n",
-               inode, inode->i_ino, inode->i_generation, ino,
-               lic->lic_body->generation);
+        CDEBUG(D_VFSTRACE, "comparing inode %p ino %lu/%u to body %u/%u\n",
+               inode, inode->i_ino, inode->i_generation, 
+               md->body->ino, md->body->generation);
  
-        if (inode->i_generation != lic->lic_body->generation)
+        if (inode->i_generation != md->body->generation)
                  return 0;
  
          /* Apply the attributes in 'opaque' to this inode */
-        ll_update_inode(inode, body, lic->lic_lsm);
+        ll_update_inode(inode, md->body, md->lsm);
          return 1;
  }
  
@@ -127,16 +82,21 @@ int ll_unlock(__u32 mode, struct lustre_handle *lockh)
   * Returns inode or NULL
   */
  #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-extern int ll_read_inode2(struct inode *inode, void *opaque);
+int ll_set_inode(struct inode *inode, void *opaque)
+{
+        ll_read_inode2(inode, opaque);
+        return 0;
+}
  struct inode *ll_iget(struct super_block *sb, ino_t hash,
-                      struct ll_read_inode2_cookie *lic)
+                      struct lustre_md *md)
  {
          struct inode *inode;
  
          LASSERT(hash != 0);
-        inode = iget5_locked(sb, hash, ll_test_inode, ll_read_inode2, lic);
-        if (inode == NULL)
-                return NULL;              /* removed ERR_PTR(-ENOMEM) -eeb */
+        inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md);
+
+        if (!inode)
+                return (NULL);              /* removed ERR_PTR(-ENOMEM) -eeb */
  
          if (inode->i_state & I_NEW)
                  unlock_new_inode(inode);
@@ -146,11 +106,11 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash,
  }
  #else
  struct inode *ll_iget(struct super_block *sb, ino_t hash,
-                      struct ll_read_inode2_cookie *lic)
+                      struct lustre_md *md)
  {
          struct inode *inode;
          LASSERT(hash != 0);
-        inode = iget4(sb, hash, ll_find_inode, lic);
+        inode = iget4(sb, hash, ll_test_inode, md);
          CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", inode->i_ino,
                 inode->i_generation, inode);
          return inode;
@@ -171,36 +131,37 @@ static int ll_intent_to_lock_mode(struct lookup_intent *it)
  
  int ll_it_open_error(int phase, struct lookup_intent *it)
  {
-        if (it->it_disposition & IT_OPEN_OPEN) {
-                if (phase == IT_OPEN_OPEN)
+        if (it_disposition(it, DISP_OPEN_OPEN)) {
+                if (phase == DISP_OPEN_OPEN)
                          return it->it_status;
                  else
                          return 0;
          }
  
-        if (it->it_disposition & IT_OPEN_CREATE) {
-                if (phase == IT_OPEN_CREATE)
+        if (it_disposition(it, DISP_OPEN_CREATE)) {
+                if (phase == DISP_OPEN_CREATE)
                          return it->it_status;
                  else
                          return 0;
          }
  
-        if (it->it_disposition & IT_OPEN_LOOKUP) {
-                if (phase == IT_OPEN_LOOKUP)
+        if (it_disposition(it, DISP_LOOKUP_EXECD)) {
+                if (phase == DISP_LOOKUP_EXECD)
                          return it->it_status;
                  else
                          return 0;
          }
+        CERROR("it disp: %X, status: %d\n", it->it_disposition, it->it_status);
          LBUG();
          return 0;
  }
  
-int ll_mdc_blocking_ast(struct ldlm_lock *lock,
-                        struct ldlm_lock_desc *desc,
+int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                          void *data, int flag)
  {
          int rc;
          struct lustre_handle lockh;
+        struct inode *inode = lock->l_data;
          ENTRY;
  
          switch (flag) {
@@ -214,9 +175,13 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock,
                  break;
          case LDLM_CB_CANCELING: {
                  /* Invalidate all dentries associated with this inode */
-                struct inode *inode = lock->l_data;
-                LASSERT(inode != NULL);
-
+                if (inode == NULL)
+                        break;
+                if (lock->l_resource->lr_name.name[0] != inode->i_ino ||
+                    lock->l_resource->lr_name.name[1] != inode->i_generation) {
+                        LDLM_ERROR(lock, "data mismatch with ino %lu/%u",
+                                   inode->i_ino, inode->i_generation);
+                }
                  if (S_ISDIR(inode->i_mode)) {
                          CDEBUG(D_INODE, "invalidating inode %lu\n",
                                 inode->i_ino);
@@ -227,7 +192,7 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock,
  #warning FIXME: we should probably free this inode if there are no aliases
                  if (inode->i_sb->s_root &&
                      inode != inode->i_sb->s_root->d_inode)
-                        d_unhash_aliases(inode);
+                        ll_unhash_aliases(inode);
                  break;
          }
          default:
@@ -237,17 +202,6 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock,
          RETURN(0);
  }
  
-void ll_mdc_lock_set_inode(struct lustre_handle *lockh, struct inode *inode)
-{
-        struct ldlm_lock *lock = ldlm_handle2lock(lockh);
-        ENTRY;
-
-        LASSERT(lock != NULL);
-        lock->l_data = inode;
-        LDLM_LOCK_PUT(lock);
-        EXIT;
-}
-
  int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
                           int flags, void *opaque)
  {
@@ -287,35 +241,74 @@ void ll_prepare_mdc_op_data(struct mdc_op_data *data,
          data->mode = mode;
  }
  
-#define IT_ENQ_COMPLETE (1<<16)
-
+/* 
+ *This long block is all about fixing up the local state so that it is
+ *correct as of the moment _before_ the operation was applied; that
+ *way, the VFS will think that everything is normal and call Lustre's
+ *regular VFS methods.
+ *
+ * If we're performing a creation, that means that unless the creation
+ * failed with EEXIST, we should fake up a negative dentry.
+ *
+ * For everything else, we want to lookup to succeed.
+ *
+ * One additional note: if CREATE or OPEN succeeded, we add an extra
+ * reference to the request because we need to keep it around until
+ * ll_create/ll_open gets called.
+ *
+ * The server will return to us, in it_disposition, an indication of
+ * exactly what it_status refers to.
+ *
+ * If DISP_OPEN_OPEN is set, then it_status refers to the open() call,
+ * otherwise if DISP_OPEN_CREATE is set, then it status is the
+ * creation failure mode.  In either case, one of DISP_LOOKUP_NEG or
+ * DISP_LOOKUP_POS will be set, indicating whether the child lookup
+ * was successful.
+ *
+ * Else, if DISP_LOOKUP_EXECD then it_status is the rc of the child
+ * lookup.
+ */
  int ll_intent_lock(struct inode *parent, struct dentry **de,
-                   struct lookup_intent *it, intent_finish_cb intent_finish)
+                   struct lookup_intent *it, int flags, intent_finish_cb intent_finish)
  {
          struct dentry *dentry = *de;
          struct inode *inode = dentry->d_inode;
          struct ll_sb_info *sbi = ll_i2sbi(parent);
          struct lustre_handle lockh;
          struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
-        struct ptlrpc_request *request = NULL;
-        int rc = 0, offset, flag = 0;
+        struct ptlrpc_request *request;
+        int rc = 0;
+        struct mds_body *mds_body;
+        int mode;
          obd_id ino = 0;
          ENTRY;
  
  #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-        if (it && it->it_op == 0)
-                *it = lookup_it;
+        if (it && it->it_magic != INTENT_MAGIC) { 
+                CERROR("WARNING: uninitialized intent\n");
+                LBUG();
+                intent_init(it, IT_LOOKUP, 0);
+        }
+        if (it->it_op == IT_GETATTR || 
+            it->it_op == 0)
+                it->it_op = IT_LOOKUP;
+        
  #endif
-        if (it == NULL)
+        if (!it ||it->it_op == IT_GETXATTR)
                  it = &lookup_it;
  
+        it->it_op_release = ll_intent_release;
+
          CDEBUG(D_DLMTRACE, "name: %*s, intent: %s\n", dentry->d_name.len,
                 dentry->d_name.name, ldlm_it2str(it->it_op));
-
+        
          if (dentry->d_name.len > EXT2_NAME_LEN)
                  RETURN(-ENAMETOOLONG);
  
-        if (!(it->it_disposition & IT_ENQ_COMPLETE)) {
+        /* This function may be called twice, we only once want to
+           execute the request associated with the intent. If it was
+           done already, we skip past this and use the results. */ 
+        if (!it_disposition(it, DISP_ENQ_COMPLETE)) {
                  struct mdc_op_data op_data;
  
                  ll_prepare_mdc_op_data(&op_data, parent, dentry->d_inode,
@@ -325,174 +318,73 @@ int ll_intent_lock(struct inode *parent, struct dentry **de,
                  rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, it,
                                   ll_intent_to_lock_mode(it), &op_data,
                                   &lockh, NULL, 0, ldlm_completion_ast,
-                                 ll_mdc_blocking_ast, parent);
+                                 ll_mdc_blocking_ast, NULL);
                  if (rc < 0)
                          RETURN(rc);
                  memcpy(it->it_lock_handle, &lockh, sizeof(lockh));
          }
-
-        request = (struct ptlrpc_request *)it->it_data;
+        request = it->it_data;
+        LASSERT(request != NULL);
  
          /* non-zero it_disposition indicates that the server performed the
           * intent on our behalf. */
-        if (it->it_disposition) {
-                struct mds_body *mds_body;
-                int mode;
-
-                /* This long block is all about fixing up the local
-                 * state so that it is correct as of the moment
-                 * _before_ the operation was applied; that way, the
-                 * VFS will think that everything is normal and call
-                 * Lustre's regular FS function.
-                 *
-                 * If we're performing a creation, that means that unless the
-                 * creation failed with EEXIST, we should fake up a negative
-                 * dentry.  Likewise for the target of a hard link.
-                 *
-                 * For everything else, we want to lookup to succeed. */
-
-                /* One additional note: if CREATE/MKDIR/etc succeeded,
-                 * we add an extra reference to the request because we
-                 * need to keep it around until ll_create gets called.
-                 * For anything else which results in
-                 * LL_LOOKUP_POSITIVE, we can do the iget()
-                 * immediately with the contents of the reply (in the
-                 * intent_finish callback).  In the create case,
-                 * however, we need to wait until ll_create_node to do
-                 * the iget() or the VFS will abort with -EEXISTS.
-                 */
-
-                offset = 1;
-                mds_body = lustre_msg_buf(request->rq_repmsg, offset,
-                                          sizeof(*mds_body));
-                LASSERT (mds_body != NULL);           /* mdc_enqueue checked */
-                LASSERT_REPSWABBED (request, offset); /* mdc_enqueue swabbed */
-
-                ino = mds_body->fid1.id;
-                mode = mds_body->mode;
-
-                /*We were called from revalidate2: did we find the same inode?*/
-                if (inode && (ino != inode->i_ino ||
-                    mds_body->fid1.generation != inode->i_generation)) {
-                        it->it_disposition |= IT_ENQ_COMPLETE;
-                        RETURN(-ESTALE);
-                }
+        LASSERT(it_disposition(it, DISP_IT_EXECD));
+
+                
+        mds_body = lustre_msg_buf(request->rq_repmsg, 1, sizeof(*mds_body));
+        LASSERT(mds_body != NULL);           /* mdc_enqueue checked */
+        LASSERT_REPSWABBED(request, 1); /* mdc_enqueue swabbed */
+
+        /* XXX everything with fids please, no ino's inode's etc */
+        ino = mds_body->fid1.id;
+        mode = mds_body->mode;
+
+        /*We were called from revalidate2: did we find the same inode?*/
+        if (inode && 
+            (ino != inode->i_ino ||
+             mds_body->fid1.generation != inode->i_generation)) {
+                it_set_disposition(it, DISP_ENQ_COMPLETE);
+                RETURN(-ESTALE);
+        }
  
-                /* If we're doing an IT_OPEN which did not result in an actual
-                 * successful open, then we need to remove the bit which saves
-                 * this request for unconditional replay. */
-                if (it->it_op & IT_OPEN &&
-                    (!(it->it_disposition & IT_OPEN_OPEN) ||
-                     it->it_status != 0)) {
+        /* If we're doing an IT_OPEN which did not result in an actual
+         * successful open, then we need to remove the bit which saves
+         * this request for unconditional replay. */
+        if (it->it_op & IT_OPEN) {
+                if (!it_disposition(it, DISP_OPEN_OPEN) ||
+                    it->it_status != 0) {
                          unsigned long flags;
-
+                
                          spin_lock_irqsave (&request->rq_lock, flags);
                          request->rq_replay = 0;
                          spin_unlock_irqrestore (&request->rq_lock, flags);
                  }
-
-                if (it->it_op & IT_CREAT) {
-                        mdc_store_inode_generation(request, 2, 1);
-                        /* The server will return to us, in it_disposition, an
-                         * indication of exactly what it_status refers to.
-                         *
-                         * If IT_OPEN_OPEN is set, then it_status refers to the
-                         * open() call, otherwise if IT_OPEN_CREATE is set, then
-                         * it status is the creation failure mode.  In either
-                         * case, one of IT_OPEN_NEG or IT_OPEN_POS will be set,
-                         * indicating whether the child lookup was successful.
-                         *
-                         * Else, if IT_OPEN_LOOKUP then it_status is the rc
-                         * of the child lookup.
-                         *
-                         * Finally, if none of the bits are set, then the
-                         * failure occurred while looking up the parent. */
-                        rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
-                        if (rc)
-                                GOTO(drop_req, rc);
-
-                        if (it->it_disposition & IT_OPEN_CREATE)
-                                ptlrpc_request_addref(request);
-                        if (it->it_disposition & IT_OPEN_OPEN)
-                                ptlrpc_request_addref(request);
-
-                        if (it->it_disposition & IT_OPEN_NEG)
-                                flag = LL_LOOKUP_NEGATIVE;
-                        else
-                                flag = LL_LOOKUP_POSITIVE;
-                } else if (it->it_op == IT_OPEN) {
-                        LASSERT(!(it->it_disposition & IT_OPEN_CREATE));
-
-                        rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
-                        if (rc)
-                                GOTO(drop_req, rc);
-
-                        if (it->it_disposition & IT_OPEN_OPEN)
-                                ptlrpc_request_addref(request);
-
-                        if (it->it_disposition & IT_OPEN_NEG)
-                                flag = LL_LOOKUP_NEGATIVE;
-                        else
-                                flag = LL_LOOKUP_POSITIVE;
-                } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
-                        /* For check ops, we want the lookup to succeed */
-                        it->it_data = NULL;
-                        if (it->it_status)
-                                flag = LL_LOOKUP_NEGATIVE;
-                        else
-                                flag = LL_LOOKUP_POSITIVE;
-                } else
-                        LBUG();
-        } else {
-                struct ll_fid fid;
-                obd_flag valid;
-                int eadatalen;
-                int mode;
-
-                LBUG(); /* For the moment, no non-intent locks */
-
-                /* it_disposition == 0 indicates that it just did a simple lock
-                 * request, for which we are very thankful.  move along with
-                 * the local lookup then. */
-
-                //memcpy(&lli->lli_intent_lock_handle, &lockh, sizeof(lockh));
-                offset = 0;
-
-                ino = ll_inode_by_name(parent, dentry, &mode);
-                if (!ino) {
-                        CERROR("inode %*s not found by name\n",
-                               dentry->d_name.len, dentry->d_name.name);
-                        GOTO(drop_lock, rc = -ENOENT);
-                }
-
-                valid = OBD_MD_FLNOTOBD;
-
-                if (S_ISREG(mode)) {
-                        eadatalen = obd_size_diskmd(&sbi->ll_osc_conn, NULL),
-                        valid |= OBD_MD_FLEASIZE;
-                } else {
-                        eadatalen = 0;
-                        valid |= OBD_MD_FLBLOCKS;
-                }
-
-                fid.id = ino;
-                fid.generation = 0;
-                fid.f_type = mode;
-                rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, valid,
-                                 eadatalen, &request);
-                if (rc) {
-                        CERROR("failure %d inode "LPX64"\n", rc, ino);
-                        GOTO(drop_lock, rc = -abs(rc));
-                }
          }
  
-        LASSERT (request != NULL);
+        rc = ll_it_open_error(DISP_LOOKUP_EXECD, it);
+        if (rc)
+                GOTO(drop_req, rc);
+        
+        /* keep requests around for the multiple phases of the call
+         * this shows the DISP_XX must guarantee we make it into the call 
+         */ 
+        if (it_disposition(it, DISP_OPEN_CREATE))
+                ptlrpc_request_addref(request);
+        if (it_disposition(it, DISP_OPEN_OPEN))
+                ptlrpc_request_addref(request);
+        
+        if (it->it_op & IT_CREAT) {
+                /* XXX this belongs in ll_create_iit */
+        } else if (it->it_op == IT_OPEN) {
+                LASSERT(!it_disposition(it, DISP_OPEN_CREATE));
+        } else 
+                LASSERT(it->it_op & (IT_GETATTR | IT_LOOKUP));
  
          if (intent_finish != NULL) {
                  struct lustre_handle old_lock;
                  struct ldlm_lock *lock;
  
-                rc = intent_finish(flag, request, parent, de, it, offset, ino);
+                rc = intent_finish(request, parent, de, it, 1, ino);
                  dentry = *de; /* intent_finish may change *de */
                  inode = dentry->d_inode;
                  if (rc != 0)
@@ -525,29 +417,16 @@ int ll_intent_lock(struct inode *parent, struct dentry **de,
          }
          ptlrpc_req_finished(request);
  
-        /* This places the intent in the dentry so that the vfs_xxx
-         * operation can lay its hands on it; but that is not always
-         * needed...  (we need to save it in the GETATTR case for the
-         * benefit of ll_inode_revalidate -phil) */
-        /* Ignore trying to save the intent for "special" inodes as
-         * they have special semantics that can cause deadlocks on
-         * the intent semaphore. -mmex */
-        if ((!inode || S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
-             S_ISLNK(inode->i_mode)) && (it->it_op & (IT_OPEN | IT_GETATTR)))
-                LL_SAVE_INTENT(dentry, it);
-        else
-                CDEBUG(D_DENTRY,
-                       "D_IT dentry %p fsdata %p intent: %s status %d\n",
-                       dentry, ll_d2d(dentry), ldlm_it2str(it->it_op),
-                       it->it_status);
-
+        CDEBUG(D_DENTRY, "D_IT dentry %p intent: %s status %d disp %x\n",
+               dentry, ldlm_it2str(it->it_op), it->it_status, it->it_disposition);
+        
+        /* drop IT_LOOKUP locks */
          if (it->it_op == IT_LOOKUP)
-                ll_intent_release(dentry, it);
-
+                ll_intent_release(it);
          RETURN(rc);
  
   drop_lock:
-        ll_intent_release(dentry, it);
+        ll_intent_release(it);
   drop_req:
          ptlrpc_req_finished(request);
          RETURN(rc);
@@ -582,7 +461,7 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
                  if (!list_empty(&dentry->d_lru))
                          list_del_init(&dentry->d_lru);
  
-                list_del_init(&dentry->d_hash);
+                hlist_del_init(&dentry->d_hash);
                  __d_rehash(dentry, 0); /* avoid taking dcache_lock inside */
                  spin_unlock(&dcache_lock);
                  atomic_inc(&dentry->d_count);
@@ -597,68 +476,34 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
  }
  
  static int
-lookup2_finish(int flag, struct ptlrpc_request *request,
+lookup2_finish(struct ptlrpc_request *request,
                 struct inode *parent, struct dentry **de,
                 struct lookup_intent *it, int offset, obd_id ino)
  {
          struct ll_sb_info *sbi = ll_i2sbi(parent);
          struct dentry *dentry = *de, *saved = *de;
          struct inode *inode = NULL;
-        struct ll_read_inode2_cookie lic = {.lic_body = NULL, .lic_lsm = NULL};
+        int rc;
  
          /* NB 1 request reference will be taken away by ll_intent_lock()
           * when I return */
-
-        if (!(flag & LL_LOOKUP_NEGATIVE)) {
+        if (!it_disposition(it, DISP_LOOKUP_NEG)) {
+                struct lustre_md md;
                  ENTRY;
  
-                /* We only get called if the mdc_enqueue() called from
-                 * ll_intent_lock() was successful.  Therefore the mds_body
-                 * is present and correct, and the eadata is present if
-                 * body->eadatasize != 0 (but still opaque, so only
-                 * obd_unpackmd() can check the size) */
-                lic.lic_body = lustre_msg_buf(request->rq_repmsg, offset,
-                                              sizeof (*lic.lic_body));
-                LASSERT(lic.lic_body != NULL);
-                LASSERT_REPSWABBED(request, offset);
-
-                if (S_ISREG(lic.lic_body->mode) &&
-                    (lic.lic_body->valid & OBD_MD_FLEASIZE)) {
-                        struct lov_mds_md    *lmm;
-                        int                   lmm_size;
-                        int                   rc;
-
-                        lmm_size = lic.lic_body->eadatasize;
-                        if (lmm_size == 0) {
-                                CERROR("OBD_MD_FLEASIZE set but "
-                                       "eadatasize 0\n");
-                                RETURN(-EPROTO);
-                        }
-                        lmm = lustre_msg_buf(request->rq_repmsg, offset + 1,
-                                             lmm_size);
-                        LASSERT(lmm != NULL);
-                        LASSERT_REPSWABBED(request, offset + 1);
-
-                        rc = obd_unpackmd(&sbi->ll_osc_conn,
-                                          &lic.lic_lsm, lmm, lmm_size);
-                        if (rc < 0) {
-                                CERROR("Error %d unpacking eadata\n", rc);
-                                RETURN(rc);
-                        }
-                        LASSERT(rc >= sizeof(*lic.lic_lsm));
-                }
+                rc =mdc_req2lustre_md(request, offset, &sbi->ll_osc_conn, &md);
+                if (rc) 
+                        RETURN(rc);
  
-                /* Both ENOMEM and an RPC timeout are possible in ll_iget; which
-                 * to pick?  A more generic EIO?  -phik */
-                inode = ll_iget(dentry->d_sb, ino, &lic);
+                inode = ll_iget(dentry->d_sb, ino, &md);
                  if (!inode) {
                          /* free the lsm if we allocated one above */
-                        if (lic.lic_lsm != NULL)
-                                obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm);
+                        if (md.lsm != NULL)
+                                obd_free_memmd(&sbi->ll_osc_conn, &md.lsm);
                          RETURN(-ENOMEM);
-                } else if (lic.lic_lsm != NULL &&
-                           ll_i2info(inode)->lli_smd != lic.lic_lsm) {
-                        obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm);
+                } else if (md.lsm != NULL &&
+                           ll_i2info(inode)->lli_smd != md.lsm) {
+                        obd_free_memmd(&sbi->ll_osc_conn, &md.lsm);
                  }
  
                  /* If this is a stat, get the authoritative file size */
@@ -685,8 +530,10 @@ lookup2_finish(int flag, struct ptlrpc_request *request,
                  /* We asked for a lock on the directory, and may have been
                   * granted a lock on the inode.  Just in case, fixup the data
                   * pointer. */
-                ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle,
-                                      inode);
+                CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+                       inode, inode->i_ino, inode->i_generation);
+                ldlm_lock_set_data((struct lustre_handle*)it->it_lock_handle,
+                                   inode);
          } else {
                  ENTRY;
          }
@@ -700,8 +547,8 @@ lookup2_finish(int flag, struct ptlrpc_request *request,
          RETURN(0);
  }
  
-static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry,
-                                 struct lookup_intent *it)
+static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
+                                   struct lookup_intent *it, int flags)
  {
          struct dentry *save = dentry, *retval;
          int rc;
@@ -711,7 +558,11 @@ static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry,
                 dentry->d_name.name, parent->i_ino, parent->i_generation,
                 parent, LL_IT2STR(it));
  
-        rc = ll_intent_lock(parent, &dentry, it, lookup2_finish);
+        if (d_mountpoint(dentry)) { 
+                CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
+        }
+
+        rc = ll_intent_lock(parent, &dentry, it, flags, lookup2_finish);
          if (rc < 0) {
                  CDEBUG(D_INFO, "ll_intent_lock: %d\n", rc);
                  GOTO(out, retval = ERR_PTR(rc));
@@ -725,167 +576,136 @@ static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry,
          return retval;
  }
  
-/* We depend on "mode" being set with the proper file type/umask by now */
-static struct inode *ll_create_node(struct inode *dir, const char *name,
-                                    int namelen, const void *data, int datalen,
-                                    int mode, __u64 extra,
-                                    struct lookup_intent *it)
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, 
+                                   struct nameidata *nd)
  {
-        struct inode *inode;
-        struct ptlrpc_request *request = NULL;
-        struct mds_body *body;
-        time_t time = LTIME_S(CURRENT_TIME);
-        struct ll_sb_info *sbi = ll_i2sbi(dir);
-        struct ll_read_inode2_cookie lic;
+        struct dentry *de;
          ENTRY;
  
-        if (it && it->it_disposition) {
-                ll_invalidate_inode_pages(dir);
-                request = it->it_data;
-                body = lustre_msg_buf(request->rq_repmsg, 1, sizeof (*body));
-                LASSERT (body != NULL);         /* checked already */
-                LASSERT_REPSWABBED (request, 1); /* swabbed already */
-        } else {
-                struct mdc_op_data op_data;
-                int gid = current->fsgid;
-                int rc;
-
-                if (dir->i_mode & S_ISGID) {
-                        gid = dir->i_gid;
-                        if (S_ISDIR(mode))
-                                mode |= S_ISGID;
-                }
-
-                ll_prepare_mdc_op_data(&op_data, dir, NULL, name, namelen, 0);
-                rc = mdc_create(&sbi->ll_mdc_conn, &op_data,
-                                data, datalen, mode, current->fsuid, gid,
-                                time, extra, &request);
-                if (rc) {
-                        inode = ERR_PTR(rc);
-                        GOTO(out, rc);
-                }
-                body = lustre_swab_repbuf(request, 0, sizeof (*body),
-                                          lustre_swab_mds_body);
-                if (body == NULL) {
-                        CERROR ("Can't unpack mds_body\n");
-                        GOTO (out, inode = ERR_PTR(-EPROTO));
-                }
-        }
-
-        lic.lic_body = body;
-        lic.lic_lsm = NULL;
-
-        inode = ll_iget(dir->i_sb, body->ino, &lic);
-        if (!inode || is_bad_inode(inode)) {
-                /* XXX might need iput() for bad inode */
-                int rc = -EIO;
-                CERROR("new_inode -fatal: rc %d\n", rc);
-                LBUG();
-                GOTO(out, rc);
-        }
-
-        if (!list_empty(&inode->i_dentry)) {
-                CERROR("new_inode -fatal: inode %d, ct %d lnk %d\n",
-                       body->ino, atomic_read(&inode->i_count),
-                       inode->i_nlink);
-                iput(inode);
-                LBUG();
-                inode = ERR_PTR(-EIO);
-                GOTO(out, -EIO);
-        }
-
-        if (it && it->it_disposition) {
-                /* We asked for a lock on the directory, but were
-                 * granted a lock on the inode.  Since we finally have
-                 * an inode pointer, stuff it in the lock. */
-                ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle,
-                                      inode);
-        }
+        if (nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
+                de = ll_lookup_it(parent, dentry, &nd->it, nd->flags);
+        else 
+                de = ll_lookup_it(parent, dentry, NULL, 0);
  
-        EXIT;
- out:
-        ptlrpc_req_finished(request);
-        return inode;
+        RETURN(de);
  }
+#endif
  
  static int ll_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode,
                           const char *name, int len)
  {
          struct ptlrpc_request *request = NULL;
-        struct ll_sb_info *sbi = ll_i2sbi(dir);
          struct mds_body *body;
          struct lov_mds_md *eadata;
          struct lov_stripe_md *lsm = NULL;
-        struct lustre_handle lockh;
-        struct lookup_intent it = { .it_op = IT_UNLINK };
-        struct obdo *oa;
-        int err;
+        struct obd_trans_info oti = { 0 };
          struct mdc_op_data op_data;
+        struct obdo *oa;
+        int rc;
          ENTRY;
  
          ll_prepare_mdc_op_data(&op_data, dir, child, name, len, mode);
-
-        err = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_EX,
-                         &op_data, &lockh, NULL, 0,
-                         ldlm_completion_ast, ll_mdc_blocking_ast,
-                         dir);
-        request = (struct ptlrpc_request *)it.it_data;
-        if (err < 0)
-                GOTO(out, err);
-        if (it.it_status)
-                GOTO(out, err = it.it_status);
-        err = 0;
-
-        body = lustre_msg_buf (request->rq_repmsg, 1, sizeof (*body));
-        LASSERT (body != NULL);                 /* checked by mdc_enqueue() */
-        LASSERT_REPSWABBED (request, 1);        /* swabbed by mdc_enqueue() */
+        rc = mdc_unlink(&ll_i2sbi(dir)->ll_mdc_conn, &op_data, &request);
+        if (rc)
+                GOTO(out, rc);
+        /* req is swabbed so this is safe */
+        body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body));
  
          if (!(body->valid & OBD_MD_FLEASIZE))
-                GOTO(out, 0);
+                GOTO(out, rc = 0);
  
          if (body->eadatasize == 0) {
-                CERROR ("OBD_MD_FLEASIZE set but eadatasize zero\n");
-                GOTO (out, err = -EPROTO);
+                CERROR("OBD_MD_FLEASIZE set but eadatasize zero\n");
+                GOTO(out, rc = -EPROTO);
          }
  
          /* The MDS sent back the EA because we unlinked the last reference
           * to this file. Use this EA to unlink the objects on the OST.
-         * Note that mdc_enqueue() has already checked there _is_ some EA
-         * data, but this data is opaque to both mdc_enqueue() and the MDS.
-         * We have to leave it to obd_unpackmd() to check it is complete
-         * and sensible. */
-        eadata = lustre_msg_buf (request->rq_repmsg, 2, body->eadatasize);
-        LASSERT (eadata != NULL);
-        LASSERT_REPSWABBED (request, 2);
-
-        err = obd_unpackmd(ll_i2obdconn(dir), &lsm, eadata,
-                           body->eadatasize);
-        if (err < 0) {
-                CERROR("obd_unpackmd: %d\n", err);
-                GOTO (out_unlock, err);
+         * It's opaque so we don't swab here; we leave it to obd_unpackmd() to
+         * check it is complete and sensible. */
+        eadata = lustre_swab_repbuf(request, 1, body->eadatasize, NULL);
+        LASSERT(eadata != NULL);
+        if (eadata == NULL) {
+                CERROR("Can't unpack MDS EA data\n");
+                GOTO(out, rc = -EPROTO);
          }
-        LASSERT (err >= sizeof (*lsm));
+
+        rc = obd_unpackmd(ll_i2obdconn(dir), &lsm, eadata, body->eadatasize);
+        if (rc < 0) {
+                CERROR("obd_unpackmd: %d\n", rc);
+                GOTO(out, rc);
+        }
+        LASSERT(rc >= sizeof(*lsm));
  
          oa = obdo_alloc();
          if (oa == NULL)
-                GOTO(out_free_memmd, err = -ENOMEM);
+                GOTO(out_free_memmd, rc = -ENOMEM);
  
          oa->o_id = lsm->lsm_object_id;
          oa->o_mode = body->mode & S_IFMT;
          oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
  
-        err = obd_destroy(ll_i2obdconn(dir), oa, lsm, NULL);
+        if (body->valid & OBD_MD_FLCOOKIE) {
+                oa->o_valid |= OBD_MD_FLCOOKIE;
+                oti.oti_logcookies = lustre_msg_buf(request->rq_repmsg, 3,
+                                                    body->eadatasize);
+        }
+
+        rc = obd_destroy(ll_i2obdconn(dir), oa, lsm, &oti);
          obdo_free(oa);
-        if (err)
+        if (rc)
                  CERROR("obd destroy objid 0x"LPX64" error %d\n",
-                       lsm->lsm_object_id, err);
+                       lsm->lsm_object_id, rc);
   out_free_memmd:
          obd_free_memmd(ll_i2obdconn(dir), &lsm);
- out_unlock:
-        ldlm_lock_decref_and_cancel(&lockh, LCK_EX);
   out:
          ptlrpc_req_finished(request);
-        return err;
+        return rc;
+}
+
+/* We depend on "mode" being set with the proper file type/umask by now */
+static struct inode *ll_create_node(struct inode *dir, const char *name,
+                                    int namelen, const void *data, int datalen,
+                                    int mode, __u64 extra,
+                                    struct lookup_intent *it)
+{
+        struct inode *inode;
+        struct ptlrpc_request *request = NULL;
+        struct ll_sb_info *sbi = ll_i2sbi(dir);
+        struct lustre_md md;
+        int rc;
+        ENTRY;
+
+        LASSERT(it && it->it_disposition);
+
+        ll_invalidate_inode_pages(dir);
+
+        request = it->it_data;
+        rc = mdc_req2lustre_md(request, 1, &sbi->ll_osc_conn, &md);
+        if (rc) { 
+                GOTO(out, inode = ERR_PTR(rc));
+        }
+
+        inode = ll_iget(dir->i_sb, md.body->ino, &md);
+        if (!inode || is_bad_inode(inode)) {
+                /* XXX might need iput() for bad inode */
+                int rc = -EIO;
+                CERROR("new_inode -fatal: rc %d\n", rc);
+                LBUG();
+                GOTO(out, rc);
+        }
+        LASSERT(list_empty(&inode->i_dentry));
+
+        CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+               inode, inode->i_ino, inode->i_generation);
+        ldlm_lock_set_data((struct lustre_handle*)it->it_lock_handle,
+                           inode);
+
+        EXIT;
+ out:
+        ptlrpc_req_finished(request);
+        return inode;
  }
  
  /*
@@ -902,54 +722,46 @@ static int ll_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode,
   * If the create succeeds, we fill in the inode information
   * with d_instantiate().
   */
-static int ll_create(struct inode *dir, struct dentry *dentry, int mode)
+static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, struct lookup_intent *it)
  {
-        struct lookup_intent *it;
          struct inode *inode;
+        struct ptlrpc_request *request = it->it_data;
          int rc = 0;
          ENTRY;
  
          CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
                 dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
-               LL_IT2STR(dentry->d_it));
-
-        it = dentry->d_it;
+               LL_IT2STR(it));
  
-        rc = ll_it_open_error(IT_OPEN_CREATE, it);
+        rc = ll_it_open_error(DISP_OPEN_CREATE, it);
          if (rc) {
-                LL_GET_INTENT(dentry, it);
-                ptlrpc_req_finished(it->it_data);
+                ptlrpc_req_finished(request);
                  RETURN(rc);
          }
  
+        mdc_store_inode_generation(request, 2, 1);
          inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
                                 NULL, 0, mode, 0, it);
-
          if (IS_ERR(inode)) {
-                LL_GET_INTENT(dentry, it);
                  RETURN(PTR_ERR(inode));
          }
  
-        /* no directory data updates when intents rule */
-        if (it && it->it_disposition) {
-                d_instantiate(dentry, inode);
-                RETURN(0);
-        }
-
-        rc = ext2_add_nondir(dentry, inode);
-        RETURN(rc);
+        d_instantiate(dentry, inode);
+        RETURN(0);
  }
  
-static int ll_mknod(struct inode *dir, struct dentry *dentry, int mode,
-                    int rdev)
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+static int ll_create_nd(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
  {
-        LBUG();
-        return -ENOSYS;
+        return ll_create_it(dir, dentry, mode, &nd->it);
  }
+#endif
  
-static int ll_mknod2(struct inode *dir, const char *name, int len, int mode,
-                     int rdev)
+static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev)
  {
+        struct inode *dir = nd->dentry->d_inode;
+        const char *name = nd->last.name;
+        int len = nd->last.len;
          struct ptlrpc_request *request = NULL;
          time_t time = LTIME_S(CURRENT_TIME);
          struct ll_sb_info *sbi = ll_i2sbi(dir);
@@ -966,10 +778,13 @@ static int ll_mknod2(struct inode *dir, const char *name, int len, int mode,
          mode &= ~current->fs->umask;
  
          switch (mode & S_IFMT) {
-        case 0: case S_IFREG:
+        case 0: 
+        case S_IFREG:
                  mode |= S_IFREG; /* for mode = 0 case, fallthrough */
-        case S_IFCHR: case S_IFBLK:
-        case S_IFIFO: case S_IFSOCK:
+        case S_IFCHR: 
+        case S_IFBLK:
+        case S_IFIFO: 
+        case S_IFSOCK:
                  ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
                  err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode,
                                   current->fsuid, current->fsgid, time,
@@ -985,16 +800,11 @@ static int ll_mknod2(struct inode *dir, const char *name, int len, int mode,
          RETURN(err);
  }
  
-static int ll_symlink(struct inode *dir, struct dentry *dentry,
-                      const char *symname)
-{
-        LBUG();
-        return -ENOSYS;
-}
-
-static int ll_symlink2(struct inode *dir, const char *name, int len,
-                       const char *tgt)
+static int ll_symlink_raw(struct nameidata *nd, const char *tgt)
  {
+        struct inode *dir = nd->dentry->d_inode;
+        const char *name = nd->last.name;
+        int len = nd->last.len;
          struct ptlrpc_request *request = NULL;
          time_t time = LTIME_S(CURRENT_TIME);
          struct ll_sb_info *sbi = ll_i2sbi(dir);
@@ -1016,16 +826,12 @@ static int ll_symlink2(struct inode *dir, const char *name, int len,
          RETURN(err);
  }
  
-static int ll_link(struct dentry *old_dentry, struct inode * dir,
-                   struct dentry *dentry)
-{
-        LBUG();
-        return -ENOSYS;
-}
-
-static int ll_link2(struct inode *src, struct inode *dir,
-                    const char *name, int len)
+static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
  {
+        struct inode *src = srcnd->dentry->d_inode;
+        struct inode *dir = tgtnd->dentry->d_inode;
+        const char *name = tgtnd->last.name;
+        int len = tgtnd->last.len;
          struct ptlrpc_request *request = NULL;
          struct mdc_op_data op_data;
          int err;
@@ -1043,14 +849,12 @@ static int ll_link2(struct inode *src, struct inode *dir,
          RETURN(err);
  }
  
-static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
-        LBUG();
-        return -ENOSYS;
-}
  
-static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode)
+static int ll_mkdir_raw(struct nameidata *nd, int mode)
  {
+        struct inode *dir = nd->dentry->d_inode;
+        const char *name = nd->last.name;
+        int len = nd->last.len;
          struct ptlrpc_request *request = NULL;
          time_t time = LTIME_S(CURRENT_TIME);
          struct ll_sb_info *sbi = ll_i2sbi(dir);
@@ -1066,14 +870,16 @@ static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode)
          mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
          ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
          err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode,
-                         current->fsuid, current->fsgid,
-                         time, 0, &request);
+                         current->fsuid, current->fsgid, time, 0, &request);
          ptlrpc_req_finished(request);
          RETURN(err);
  }
  
-static int ll_rmdir2(struct inode *dir, const char *name, int len)
+static int ll_rmdir_raw(struct nameidata *nd)
  {
+        struct inode *dir = nd->dentry->d_inode;
+        const char *name = nd->last.name;
+        int len = nd->last.len;
          int rc;
          ENTRY;
          CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
@@ -1083,8 +889,11 @@ static int ll_rmdir2(struct inode *dir, const char *name, int len)
          RETURN(rc);
  }
  
-static int ll_unlink2(struct inode *dir, const char *name, int len)
+static int ll_unlink_raw(struct nameidata *nd)
  {
+        struct inode *dir = nd->dentry->d_inode;
+        const char *name = nd->last.name;
+        int len = nd->last.len;
          int rc;
          ENTRY;
          CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
@@ -1094,29 +903,14 @@ static int ll_unlink2(struct inode *dir, const char *name, int len)
          RETURN(rc);
  }
  
-static int ll_unlink(struct inode *dir, struct dentry *dentry)
-{
-        LBUG();
-        return -ENOSYS;
-}
-
-static int ll_rmdir(struct inode *dir, struct dentry *dentry)
-{
-        LBUG();
-        return -ENOSYS;
-}
-
-static int ll_rename(struct inode * old_dir, struct dentry * old_dentry,
-                     struct inode * new_dir, struct dentry * new_dentry)
-{
-        LBUG();
-        return -ENOSYS;
-}
-
-static int ll_rename2(struct inode *src, struct inode *tgt,
-                      const char *oldname, int oldlen,
-                      const char *newname, int newlen)
+static int ll_rename_raw(struct nameidata *oldnd, struct nameidata *newnd)
  {
+        struct inode *src = oldnd->dentry->d_inode;
+        struct inode *tgt = newnd->dentry->d_inode;
+        const char *oldname = oldnd->last.name;
+        int oldlen  = oldnd->last.len;
+        const char *newname = newnd->last.name;
+        int newlen  = newnd->last.len;
          struct ptlrpc_request *request = NULL;
          struct ll_sb_info *sbi = ll_i2sbi(src);
          struct mdc_op_data op_data;
@@ -1134,27 +928,23 @@ static int ll_rename2(struct inode *src, struct inode *tgt,
          RETURN(err);
  }
  
-extern int ll_inode_revalidate(struct dentry *dentry);
  struct inode_operations ll_dir_inode_operations = {
-        create:          ll_create,
-        lookup2:         ll_lookup2,
-        link:            ll_link,          /* LBUG() */
-        link2:           ll_link2,
-        unlink:          ll_unlink,        /* LBUG() */
-        unlink2:         ll_unlink2,
-        symlink:         ll_symlink,       /* LBUG() */
-        symlink2:        ll_symlink2,
-        mkdir:           ll_mkdir,         /* LBUG() */
-        mkdir2:          ll_mkdir2,
-        rmdir:           ll_rmdir,         /* LBUG() */
-        rmdir2:          ll_rmdir2,
-        mknod:           ll_mknod,         /* LBUG() */
-        mknod2:          ll_mknod2,
-        rename:          ll_rename,        /* LBUG() */
-        rename2:         ll_rename2,
+        link_raw:           ll_link_raw,
+        unlink_raw:         ll_unlink_raw,
+        symlink_raw:        ll_symlink_raw,
+        mkdir_raw:          ll_mkdir_raw,
+        rmdir_raw:          ll_rmdir_raw,
+        mknod_raw:          ll_mknod_raw,
+        rename_raw:         ll_rename_raw,
          setattr:         ll_setattr,
          setattr_raw:     ll_setattr_raw,
  #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        revalidate:      ll_inode_revalidate,
+        create_it:          ll_create_it,
+        lookup_it:            ll_lookup_it,
+        revalidate_it:      ll_inode_revalidate_it,
+#else
+        lookup_it:          ll_lookup_nd,
+        create_nd:          ll_create_nd,
+        getattr_it:         ll_getattr,
  #endif
  };
diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c

index 98f6086..b4004b5 100644 (file)
--- a/lustre/llite/rw.c
+++ b/lustre/llite/rw.c
@@ -32,11 +32,12 @@
  #include <linux/version.h>
  #include <asm/system.h>
  #include <asm/uaccess.h>
-#include "llite_internal.h"
  
  #include <linux/fs.h>
  #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
  #include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include <linux/writeback.h>
  #else
  #include <linux/iobuf.h>
  #endif
@@ -51,7 +52,7 @@
  
  #include <linux/lustre_mds.h>
  #include <linux/lustre_lite.h>
-#include <linux/lustre_lib.h>
+#include "llite_internal.h"
  #include <linux/lustre_compat25.h>
  
  /*
@@ -90,7 +91,8 @@ void set_page_clean(struct page *page)
  }
  
  /* SYNCHRONOUS I/O to object storage for an inode */
-static int ll_brw(int cmd, struct inode *inode, struct page *page, int flags)
+static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,
+                  struct page *page, int flags)
  {
          struct ll_inode_info *lli = ll_i2info(inode);
          struct lov_stripe_md *lsm = lli->lli_smd;
@@ -124,8 +126,8 @@ static int ll_brw(int cmd, struct inode *inode, struct page *page, int flags)
          else
                  lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                      LPROC_LL_BRW_READ, pg.count);
-        rc = obd_brw(cmd, ll_i2obdconn(inode), lsm, 1, &pg, NULL);
-        if (rc)
+        rc = obd_brw(cmd, ll_i2obdconn(inode), oa, lsm, 1, &pg, NULL);
+        if (rc != 0 && rc != -EIO)
                  CERROR("error from obd_brw: rc = %d\n", rc);
  
          RETURN(rc);
@@ -142,6 +144,7 @@ static int ll_readpage(struct file *file, struct page *first_page)
          struct page *page = first_page;
          struct list_head *pos;
          struct brw_page *pgs;
+        struct obdo *oa;
          unsigned long end_index, extent_end = 0;
          struct ptlrpc_request_set *set;
          int npgs = 0, rc = 0, max_pages;
@@ -276,19 +279,33 @@ static int ll_readpage(struct file *file, struct page *first_page)
  
          } while (page);
  
-        set = ptlrpc_prep_set();
-        if (set == NULL) {
+        if ((oa = obdo_alloc()) == NULL) {
+                CERROR("ENOMEM allocing obdo\n");
+                rc = -ENOMEM;
+        } else if ((set = ptlrpc_prep_set()) == NULL) {
                  CERROR("ENOMEM allocing request set\n");
+                obdo_free(oa);
                  rc = -ENOMEM;
          } else {
-                rc = obd_brw_async(OBD_BRW_READ, ll_i2obdconn(inode),
+                struct ll_file_data *fd = file->private_data;
+
+                oa->o_id = lli->lli_smd->lsm_object_id;
+                memcpy(obdo_handle(oa), &fd->fd_ost_och.och_fh,
+                       sizeof(fd->fd_ost_och.och_fh));
+                oa->o_valid = OBD_MD_FLID | OBD_MD_FLHANDLE;
+                obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME);
+
+                rc = obd_brw_async(OBD_BRW_READ, ll_i2obdconn(inode), oa,
                                     ll_i2info(inode)->lli_smd, npgs, pgs,
                                     set, NULL);
                  if (rc == 0)
                          rc = ptlrpc_set_wait(set);
                  ptlrpc_set_destroy(set);
+                if (rc == 0)
+                        obdo_refresh_inode(inode, oa, oa->o_valid);
                  if (rc && rc != -EIO)
                          CERROR("error from obd_brw_async: rc = %d\n", rc);
+                obdo_free(oa);
          }
  
          while (npgs-- > 0) {
@@ -310,15 +327,15 @@ static int ll_readpage(struct file *file, struct page *first_page)
  void ll_truncate(struct inode *inode)
  {
          struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-        struct obdo oa = {0};
+        struct obdo oa;
          int err;
          ENTRY;
          CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
                 inode->i_generation, inode);
  
+        /* object not yet allocated */
          if (!lsm) {
-                /* object not yet allocated */
-                inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+                CERROR("truncate on inode %lu with no objects\n", inode->i_ino);
                  EXIT;
                  return;
          }
@@ -331,8 +348,9 @@ void ll_truncate(struct inode *inode)
                          ~0);
  
          oa.o_id = lsm->lsm_object_id;
-        oa.o_mode = inode->i_mode;
-        oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;
+        oa.o_valid = OBD_MD_FLID;
+        obdo_from_inode(&oa, inode, OBD_MD_FLTYPE|OBD_MD_FLMODE|OBD_MD_FLATIME|
+                                    OBD_MD_FLMTIME | OBD_MD_FLCTIME);
  
          CDEBUG(D_INFO, "calling punch for "LPX64" (all bytes after %Lu)\n",
                 oa.o_id, inode->i_size);
@@ -343,7 +361,9 @@ void ll_truncate(struct inode *inode)
          if (err)
                  CERROR("obd_truncate fails (%d) ino %lu\n", err, inode->i_ino);
          else
-                obdo_to_inode(inode, &oa, oa.o_valid);
+                obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                          OBD_MD_FLATIME | OBD_MD_FLMTIME |
+                                          OBD_MD_FLCTIME);
  
          EXIT;
          return;
@@ -356,9 +376,11 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
  {
          struct inode *inode = page->mapping->host;
          struct ll_inode_info *lli = ll_i2info(inode);
+        struct ll_file_data *fd = file->private_data;
          struct lov_stripe_md *lsm = lli->lli_smd;
          obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
          struct brw_page pg;
+        struct obdo oa;
          int rc = 0;
          ENTRY;
  
@@ -375,7 +397,7 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
          pg.off = offset;
          pg.count = PAGE_SIZE;
          pg.flag = 0;
-        rc = obd_brw(OBD_BRW_CHECK, ll_i2obdconn(inode), lsm, 1, &pg, NULL);
+        rc = obd_brw(OBD_BRW_CHECK, ll_i2obdconn(inode), NULL, lsm, 1,&pg,NULL);
          if (rc)
                  RETURN(rc);
  
@@ -393,7 +415,15 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
                  GOTO(prepare_done, rc = 0);
          }
  
-        rc = ll_brw(OBD_BRW_READ, inode, page, 0);
+        oa.o_id = lsm->lsm_object_id;
+        oa.o_mode = inode->i_mode;
+        memcpy(obdo_handle(&oa), &fd->fd_ost_och.och_fh,
+               sizeof(fd->fd_ost_och.och_fh));
+        oa.o_valid = OBD_MD_FLID |OBD_MD_FLMODE |OBD_MD_FLTYPE |OBD_MD_FLHANDLE;
+
+        rc = ll_brw(OBD_BRW_READ, inode, &oa, page, 0);
+        if (rc == 0)
+                obdo_refresh_inode(inode, &oa, oa.o_valid);
  
          EXIT;
   prepare_done:
@@ -544,15 +574,19 @@ int ll_mark_dirty_page(struct lustre_handle *conn, struct lov_stripe_md *lsm,
  static int ll_writepage(struct page *page)
  {
          struct inode *inode = page->mapping->host;
+        struct obdo oa;
          ENTRY;
  
          CDEBUG(D_CACHE, "page %p [lau %d] inode %p\n", page,
-                        PageLaunder(page), inode);
+               PageLaunder(page), inode);
          LASSERT(PageLocked(page));
  
-        /* XXX should obd_brw errors trickle up? */
-        ll_batch_writepage(inode, page);
-        RETURN(0);
+        oa.o_id = ll_i2info(inode)->lli_smd->lsm_object_id;
+        oa.o_valid = OBD_MD_FLID;
+        obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+                                    OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
+        RETURN(ll_batch_writepage(inode, &oa, page));
  }
  
  /*
@@ -567,6 +601,7 @@ static int ll_commit_write(struct file *file, struct page *page,
          int rc = 0;
          ENTRY;
  
+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
          LASSERT(inode == file->f_dentry->d_inode);
          LASSERT(PageLocked(page));
  
@@ -595,7 +630,18 @@ static int ll_commit_write(struct file *file, struct page *page,
          /* This means that we've hit either the local cache limit or the limit
           * of the OST's grant. */
          if (rc == -EDQUOT) {
-                int rc = ll_batch_writepage(inode, page);
+                struct ll_file_data *fd = file->private_data;
+                struct obdo oa;
+                int rc;
+
+                oa.o_id = ll_i2info(inode)->lli_smd->lsm_object_id;
+                memcpy(obdo_handle(&oa), &fd->fd_ost_och.och_fh,
+                       sizeof(fd->fd_ost_och.och_fh));
+                oa.o_valid = OBD_MD_FLID | OBD_MD_FLHANDLE;
+                obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+                                            OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
+                rc = ll_batch_writepage(inode, &oa, page);
                  lock_page(page); /* caller expects to unlock */
                  RETURN(rc);
          }
@@ -624,12 +670,13 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
          struct lov_stripe_md *lsm = lli->lli_smd;
          struct brw_page *pga;
          struct ptlrpc_request_set *set;
+        struct obdo oa;
          int length, i, flags, rc = 0;
          loff_t offset;
          ENTRY;
  
          if (!lsm || !lsm->lsm_object_id)
-                RETURN(-ENOMEM);
+                RETURN(-EBADF);
  
          if ((iobuf->offset & (blocksize - 1)) ||
              (iobuf->length & (blocksize - 1)))
@@ -663,6 +710,11 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
                  }
          }
  
+        oa.o_id = lsm->lsm_object_id;
+        oa.o_valid = OBD_MD_FLID;
+        obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+                                    OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
          if (rw == WRITE)
                  lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                      LPROC_LL_DIRECT_WRITE, iobuf->length);
@@ -670,8 +722,8 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
                  lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                      LPROC_LL_DIRECT_READ, iobuf->length);
          rc = obd_brw_async(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
-                           ll_i2obdconn(inode), lsm, iobuf->nr_pages, pga, set,
-                           NULL);
+                           ll_i2obdconn(inode), &oa, lsm, iobuf->nr_pages, pga,
+                           set, NULL);
          if (rc) {
                  CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
                         "error from obd_brw_async: rc = %d\n", rc);
diff --git a/lustre/llite/super.c b/lustre/llite/super.c

index 85532f0..9a3ffa1 100644 (file)
--- a/lustre/llite/super.c
+++ b/lustre/llite/super.c
@@ -35,770 +35,18 @@
  #include "llite_internal.h"
  
  #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-kmem_cache_t *ll_file_data_slab;
  extern struct address_space_operations ll_aops;
  extern struct address_space_operations ll_dir_aops;
-struct super_operations ll_super_operations;
-
-/* /proc/lustre/llite root that tracks llite mount points */
-struct proc_dir_entry *proc_lustre_fs_root = NULL;
-/* lproc_llite.c */
-extern void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
-extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
-                                       struct super_block *sb,
-                                       char *osc, char *mdc);
-
-extern int ll_recover(struct recovd_data *, int);
-extern int ll_commitcbd_setup(struct ll_sb_info *);
-extern int ll_commitcbd_cleanup(struct ll_sb_info *);
-
-static char *ll_read_opt(const char *opt, char *data)
-{
-        char *value;
-        char *retval;
-        ENTRY;
-
-        CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
-        if (strncmp(opt, data, strlen(opt)))
-                RETURN(NULL);
-        if ((value = strchr(data, '=')) == NULL)
-                RETURN(NULL);
-
-        value++;
-        OBD_ALLOC(retval, strlen(value) + 1);
-        if (!retval) {
-                CERROR("out of memory!\n");
-                RETURN(NULL);
-        }
-
-        memcpy(retval, value, strlen(value)+1);
-        CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
-        RETURN(retval);
-}
-
-static int ll_set_opt(const char *opt, char *data, int fl)
-{
-        ENTRY;
-
-        CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
-        if (strncmp(opt, data, strlen(opt)))
-                RETURN(0);
-        else
-                RETURN(fl);
-}
-
-static void ll_options(char *options, char **ost, char **mds, int *flags)
-{
-        char *this_char;
-        ENTRY;
-
-        if (!options) {
-                EXIT;
-                return;
-        }
-
-        for (this_char = strtok (options, ",");
-             this_char != NULL;
-             this_char = strtok (NULL, ",")) {
-                CDEBUG(D_SUPER, "this_char %s\n", this_char);
-                if ((!*ost && (*ost = ll_read_opt("osc", this_char)))||
-                    (!*mds && (*mds = ll_read_opt("mdc", this_char)))||
-                    (!(*flags & LL_SBI_NOLCK) &&
-                     ((*flags) = (*flags) |
-                      ll_set_opt("nolock", this_char, LL_SBI_NOLCK))))
-                        continue;
-        }
-        EXIT;
-}
-
-#ifndef log2
-#define log2(n) ffz(~(n))
-#endif
  
  static struct super_block *ll_read_super(struct super_block *sb,
                                           void *data, int silent)
  {
-        struct inode *root = 0;
-        struct obd_device *obd;
-        struct ll_sb_info *sbi;
-        struct obd_export *mdc_export;
-        char *osc = NULL;
-        char *mdc = NULL;
          int err;
-        struct ll_fid rootfid;
-        struct obd_statfs osfs;
-        struct ptlrpc_request *request = NULL;
-        struct ptlrpc_connection *mdc_conn;
-        struct ll_read_inode2_cookie lic;
-        class_uuid_t uuid;
-
          ENTRY;
-
-        CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
-        OBD_ALLOC(sbi, sizeof(*sbi));
-        if (!sbi)
+        err = ll_fill_super(sb, data, silent);
+        if (err)
                  RETURN(NULL);
-
-        INIT_LIST_HEAD(&sbi->ll_conn_chain);
-        INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
-        generate_random_uuid(uuid);
-        class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
-
-        sb->u.generic_sbp = sbi;
-
-        ll_options(data, &osc, &mdc, &sbi->ll_flags);
-
-        if (!osc) {
-                CERROR("no osc\n");
-                GOTO(out_free, sb = NULL);
-        }
-
-        if (!mdc) {
-                CERROR("no mdc\n");
-                GOTO(out_free, sb = NULL);
-        }
-
-        obd = class_name2obd(mdc);
-        if (!obd) {
-                CERROR("MDC %s: not setup or attached\n", mdc);
-                GOTO(out_free, sb = NULL);
-        }
-
-        err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid);
-        if (err) {
-                CERROR("cannot connect to %s: rc = %d\n", mdc, err);
-                GOTO(out_free, sb = NULL);
-        }
-
-        mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection;
-
-        obd = class_name2obd(osc);
-        if (!obd) {
-                CERROR("OSC %s: not setup or attached\n", osc);
-                GOTO(out_mdc, sb = NULL);
-        }
-
-        err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid);
-        if (err) {
-                CERROR("cannot connect to %s: rc = %d\n", osc, err);
-                GOTO(out_mdc, sb = NULL);
-        }
-
-        err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
-        if (err) {
-                CERROR("cannot mds_connect: rc = %d\n", err);
-                GOTO(out_osc, sb = NULL);
-        }
-        CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
-        sbi->ll_rootino = rootfid.id;
-
-        memset(&osfs, 0, sizeof(osfs));
-        mdc_export = class_conn2export(&sbi->ll_mdc_conn);
-        if (mdc_export == NULL) {
-                CERROR("null mdc_export\n");
-                GOTO(out_osc, sb = NULL);
-        }
-        err = obd_statfs(mdc_export, &osfs);
-        class_export_put(mdc_export);
-        sb->s_blocksize = osfs.os_bsize;
-        sb->s_blocksize_bits = log2(osfs.os_bsize);
-        sb->s_magic = LL_SUPER_MAGIC;
-        sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
-
-        sb->s_op = &ll_super_operations;
-
-        /* make root inode 
-         * XXX: move this to after cbd setup? */
-        err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid,
-                          OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request);
-        if (err) {
-                CERROR("mdc_getattr failed for root: rc = %d\n", err);
-                GOTO(out_osc, sb = NULL);
-        }
-
-        /* initialize committed transaction callback daemon */
-        spin_lock_init(&sbi->ll_commitcbd_lock);
-        init_waitqueue_head(&sbi->ll_commitcbd_waitq);
-        init_waitqueue_head(&sbi->ll_commitcbd_ctl_waitq);
-        sbi->ll_commitcbd_flags = 0;
-        err = ll_commitcbd_setup(sbi);
-        if (err) {
-                CERROR("failed to start commit callback daemon: rc = %d\n",err);
-                ptlrpc_req_finished (request);
-                GOTO(out_osc, sb = NULL);
-        }
-
-        lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0,
-                                      sizeof(*lic.lic_body));
-        LASSERT (lic.lic_body != NULL);         /* checked by mdc_getattr() */
-        LASSERT_REPSWABBED (request, 0);        /* swabbed by mdc_getattr() */
-
-        lic.lic_lsm = NULL;
-
-        LASSERT(sbi->ll_rootino != 0);
-        root = iget4(sb, sbi->ll_rootino, NULL, &lic);
-
-        ptlrpc_req_finished(request);
-
-        if (root == NULL || is_bad_inode(root)) {
-                /* XXX might need iput() for bad inode */
-                CERROR("lustre_lite: bad iget4 for root\n");
-                GOTO(out_cbd, sb = NULL);
-        }
-
-        sb->s_root = d_alloc_root(root);
-
-        if (proc_lustre_fs_root) {
-                err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
-                                                  osc, mdc);
-                if (err < 0)
-                        CERROR("could not register mount in /proc/lustre");
-        }
-
-out_dev:
-        if (mdc)
-                OBD_FREE(mdc, strlen(mdc) + 1);
-        if (osc)
-                OBD_FREE(osc, strlen(osc) + 1);
-
          RETURN(sb);
-
-out_cbd:
-        ll_commitcbd_cleanup(sbi);
-out_osc:
-        obd_disconnect(&sbi->ll_osc_conn, 0);
-out_mdc:
-        obd_disconnect(&sbi->ll_mdc_conn, 0);
-out_free:
-        lprocfs_unregister_mountpoint(sbi);
-        OBD_FREE(sbi, sizeof(*sbi));
-
-        goto out_dev;
-} /* ll_read_super */
-
-static void ll_put_super(struct super_block *sb)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct list_head *tmp, *next;
-        struct ll_fid rootfid;
-        struct obd_device *obd = class_conn2obd(&sbi->ll_mdc_conn);
-        ENTRY;
-
-        CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
-        list_del(&sbi->ll_conn_chain);
-        ll_commitcbd_cleanup(sbi);
-        obd_disconnect(&sbi->ll_osc_conn, 0);
-
-        /* NULL request to force sync on the MDS, and get the last_committed
-         * value to flush remaining RPCs from the sending queue on client.
-         *
-         * XXX This should be an mdc_sync() call to sync the whole MDS fs,
-         *     which we can call for other reasons as well.
-         */
-        if (!obd->obd_no_recov)
-                mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
-
-        lprocfs_unregister_mountpoint(sbi);
-        if (sbi->ll_proc_root) {
-                lprocfs_remove(sbi->ll_proc_root);
-                sbi->ll_proc_root = NULL;
-        }
-
-        obd_disconnect(&sbi->ll_mdc_conn, 0);
-
-        spin_lock(&dcache_lock);
-        list_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) {
-                struct dentry *dentry = list_entry(tmp, struct dentry, d_hash);
-                shrink_dcache_parent(dentry);
-        }
-        spin_unlock(&dcache_lock);
-
-        OBD_FREE(sbi, sizeof(*sbi));
-
-        EXIT;
-} /* ll_put_super */
-
-static void ll_clear_inode(struct inode *inode)
-{
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        struct ll_inode_info *lli = ll_i2info(inode);
-        int rc;
-        ENTRY;
-
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-               inode->i_generation, inode);
-        rc = ll_mdc_cancel_unused(&sbi->ll_mdc_conn, inode,
-                                  LDLM_FL_NO_CALLBACK, inode);
-        if (rc < 0) {
-                CERROR("ll_mdc_cancel_unused: %d\n", rc);
-                /* XXX FIXME do something dramatic */
-        }
-
-        if (atomic_read(&inode->i_count) != 0)
-                CERROR("clearing in-use inode %lu: count = %d\n",
-                       inode->i_ino, atomic_read(&inode->i_count));
-
-        if (lli->lli_smd) {
-                rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd,
-                                       LDLM_FL_WARN, inode);
-                if (rc < 0) {
-                        CERROR("obd_cancel_unused: %d\n", rc);
-                        /* XXX FIXME do something dramatic */
-                }
-                obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
-                lli->lli_smd = NULL;
-        }
-
-        if (lli->lli_symlink_name) {
-                OBD_FREE(lli->lli_symlink_name,
-                         strlen(lli->lli_symlink_name) + 1);
-                lli->lli_symlink_name = NULL;
-        }
-
-        EXIT;
-}
-
-#if 0
-static void ll_delete_inode(struct inode *inode)
-{
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-               inode->i_generation, inode);
-        if (S_ISREG(inode->i_mode)) {
-                int err;
-                struct obdo *oa;
-                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-
-                /* mcreate with no open */
-                if (!lsm)
-                        GOTO(out, 0);
-
-                if (lsm->lsm_object_id == 0) {
-                        CERROR("This really happens\n");
-                        /* No obdo was ever created */
-                        GOTO(out, 0);
-                }
-
-                oa = obdo_alloc();
-                if (oa == NULL)
-                        GOTO(out, -ENOMEM);
-
-                oa->o_id = lsm->lsm_object_id;
-                obdo_from_inode(oa, inode, OBD_MD_FLID | OBD_MD_FLTYPE);
-
-                err = obd_destroy(ll_i2obdconn(inode), oa, lsm, NULL);
-                obdo_free(oa);
-                if (err)
-                        CDEBUG(D_INODE,
-                               "inode %lu obd_destroy objid "LPX64" error %d\n",
-                               inode->i_ino, lsm->lsm_object_id, err);
-        }
-out:
-        clear_inode(inode);
-        EXIT;
-}
-#endif
-
-/* like inode_setattr, but doesn't mark the inode dirty */
-static int ll_attr2inode(struct inode *inode, struct iattr *attr, int trunc)
-{
-        unsigned int ia_valid = attr->ia_valid;
-        int error = 0;
-
-        if ((ia_valid & ATTR_SIZE) && trunc) {
-                if (attr->ia_size > ll_file_maxbytes(inode)) {
-                        error = -EFBIG;
-                        goto out;
-                }
-                error = vmtruncate(inode, attr->ia_size);
-                if (error)
-                        goto out;
-        } else if (ia_valid & ATTR_SIZE)
-                inode->i_size = attr->ia_size;
-
-        if (ia_valid & ATTR_UID)
-                inode->i_uid = attr->ia_uid;
-        if (ia_valid & ATTR_GID)
-                inode->i_gid = attr->ia_gid;
-        if (ia_valid & ATTR_ATIME)
-                inode->i_atime = attr->ia_atime;
-        if (ia_valid & ATTR_MTIME)
-                inode->i_mtime = attr->ia_mtime;
-        if (ia_valid & ATTR_CTIME)
-                inode->i_ctime = attr->ia_ctime;
-        if (ia_valid & ATTR_MODE) {
-                inode->i_mode = attr->ia_mode;
-                if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-                        inode->i_mode &= ~S_ISGID;
-        }
-out:
-        return error;
-}
-
-int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc)
-{
-        struct ptlrpc_request *request = NULL;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        int err = 0;
-        ENTRY;
-
-        /* change incore inode */
-        err = ll_attr2inode(inode, attr, do_trunc);
-        if (err)
-                RETURN(err);
-
-        /* Don't send size changes to MDS to avoid "fast EA" problems, and
-         * also avoid a pointless RPC (we get file size from OST anyways).
-         */
-        attr->ia_valid &= ~ATTR_SIZE;
-        if (attr->ia_valid) {
-                struct mdc_op_data op_data;
-
-                ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
-                err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
-                                  attr, NULL, 0, &request);
-                if (err)
-                        CERROR("mdc_setattr fails: err = %d\n", err);
-
-                ptlrpc_req_finished(request);
-                if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
-                        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-                        struct obdo oa;
-                        int err2;
-
-                        CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
-                               inode->i_ino, attr->ia_mtime);
-                        oa.o_id = lsm->lsm_object_id;
-                        oa.o_mode = S_IFREG;
-                        oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME;
-                        oa.o_mtime = attr->ia_mtime;
-                        err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
-                        if (err2) {
-                                CERROR("obd_setattr fails: rc=%d\n", err);
-                                if (!err)
-                                        err = err2;
-                        }
-                }
-        }
-
-        RETURN(err);
-}
-
-int ll_setattr_raw(struct inode *inode, struct iattr *attr)
-{
-        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        struct ptlrpc_request *request = NULL;
-        struct mdc_op_data op_data;
-        int rc = 0, err;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-               inode->i_generation, inode);
-
-        if ((attr->ia_valid & ATTR_SIZE)) {
-                struct ldlm_extent extent = {attr->ia_size, OBD_OBJECT_EOF};
-                struct lustre_handle lockh = { 0 };
-
-                if (attr->ia_size > ll_file_maxbytes(inode))
-                        RETURN(-EFBIG);
-
-                /* writeback uses inode->i_size to determine how far out
-                 * its cached pages go.  ll_truncate gets a PW lock, canceling
-                 * our lock, _after_ it has updated i_size.  this can confuse
-                 *
-                 * If this file doesn't have stripes yet, it is already,
-                 * by definition, truncated. */
-                if ((attr->ia_valid & ATTR_FROM_OPEN) && lsm == NULL) {
-                        LASSERT(attr->ia_size == 0);
-                        GOTO(skip_extent_lock, rc = 0);
-                }
-
-                /* we really need to get our PW lock before we change
-                 * inode->i_size.  if we don't we can race with other
-                 * i_size updaters on our node, like ll_file_read.  we
-                 * can also race with i_size propogation to other
-                 * nodes through dirtying and writeback of final cached
-                 * pages.  this last one is especially bad for racing
-                 * o_append users on other nodes. */
-                rc = ll_extent_lock_no_validate(NULL, inode, lsm, LCK_PW,
-                                                &extent, &lockh);
-                if (rc != ELDLM_OK) {
-                        if (rc > 0)
-                                RETURN(-ENOLCK);
-                        RETURN(rc);
-                }
-
-                rc = vmtruncate(inode, attr->ia_size);
-                if (rc == 0)
-                        set_bit(LLI_F_HAVE_SIZE_LOCK,
-                                &ll_i2info(inode)->lli_flags);
-
-                /* unlock now as we don't mind others file lockers racing with
-                 * the mds updates below? */
-                err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh);
-                if (err)
-                        CERROR("ll_extent_unlock failed: %d\n", err);
-                if (rc)
-                        RETURN(rc);
-        }
-
-skip_extent_lock:
-        /* Don't send size changes to MDS to avoid "fast EA" problems, and
-         * also avoid a pointless RPC (we get file size from OST anyways).
-         */
-        attr->ia_valid &= ~ATTR_SIZE;
-        if (!attr->ia_valid)
-                RETURN(0);
-
-        ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
-
-        err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
-                          attr, NULL, 0, &request);
-        if (err)
-                CERROR("mdc_setattr fails: err = %d\n", err);
-
-        ptlrpc_req_finished(request);
-
-        if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_MTIME_SET)) {
-                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-                struct obdo oa;
-                int err2;
-
-                if (lsm == NULL) {
-                        CDEBUG(D_INODE, "no lsm: not setting mtime on OSTs\n");
-                        RETURN(err);
-                }
-
-                CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
-                       inode->i_ino, attr->ia_mtime);
-                oa.o_id = lsm->lsm_object_id;
-                oa.o_mode = S_IFREG;
-                oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMTIME;
-                oa.o_mtime = attr->ia_mtime;
-                err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
-                if (err2) {
-                        CERROR("obd_setattr fails: rc=%d\n", err);
-                        if (!err)
-                                err = err2;
-                }
-        }
-        RETURN(err);
-}
-
-int ll_setattr(struct dentry *de, struct iattr *attr)
-{
-        int rc = inode_change_ok(de->d_inode, attr);
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s\n", de->d_name.name);
-        if (rc)
-                return rc;
-        lprocfs_counter_incr(ll_i2sbi(de->d_inode)->ll_stats, LPROC_LL_SETATTR);
-
-        return ll_inode_setattr(de->d_inode, attr, 1);
-}
-
-static int ll_statfs(struct super_block *sb, struct statfs *sfs)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct obd_export *mdc_exp = class_conn2export(&sbi->ll_mdc_conn);
-        struct obd_export *osc_exp;
-        struct obd_statfs osfs;
-        int rc;
-        ENTRY;
-
-        if (mdc_exp == NULL)
-                RETURN(-EINVAL);
-
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-        lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_STAFS);
-        memset(sfs, 0, sizeof(*sfs));
-        rc = obd_statfs(mdc_exp, &osfs);
-        statfs_unpack(sfs, &osfs);
-        if (rc)
-                CERROR("mdc_statfs fails: rc = %d\n", rc);
-        else
-                CDEBUG(D_SUPER, "mdc_statfs shows blocks "LPU64"/"LPU64
-                       " objects "LPU64"/"LPU64"\n",
-                       osfs.os_bavail, osfs.os_blocks,
-                       osfs.os_ffree, osfs.os_files);
-
-        /* temporary until mds_statfs returns statfs info for all OSTs */
-        if (!rc) {
-                osc_exp = class_conn2export(&sbi->ll_osc_conn);
-                if (osc_exp == NULL)
-                        GOTO(out, rc = -EINVAL);
-                rc = obd_statfs(osc_exp, &osfs);
-                class_export_put(osc_exp);
-                if (rc) {
-                        CERROR("obd_statfs fails: rc = %d\n", rc);
-                        GOTO(out, rc);
-                }
-                CDEBUG(D_SUPER, "obd_statfs shows blocks "LPU64"/"LPU64
-                       " objects "LPU64"/"LPU64"\n",
-                       osfs.os_bavail, osfs.os_blocks,
-                       osfs.os_ffree, osfs.os_files);
-
-                while (osfs.os_blocks > ~0UL) {
-                        sfs->f_bsize <<= 1;
-
-                        osfs.os_blocks >>= 1;
-                        osfs.os_bfree >>= 1;
-                        osfs.os_bavail >>= 1;
-                }
-
-                sfs->f_blocks = osfs.os_blocks;
-                sfs->f_bfree = osfs.os_bfree;
-                sfs->f_bavail = osfs.os_bavail;
-
-                /* If we don't have as many objects free on the OST as inodes
-                 * on the MDS, we reduce the total number of inodes to
-                 * compensate, so that the "inodes in use" number is correct.
-                 */
-                if (osfs.os_ffree < (__u64)sfs->f_ffree) {
-                        sfs->f_files = (sfs->f_files - sfs->f_ffree) +
-                                       osfs.os_ffree;
-                        sfs->f_ffree = osfs.os_ffree;
-                }
-        }
-
-out:
-        class_export_put(mdc_exp);
-        RETURN(rc);
-}
-
-void dump_lsm(int level, struct lov_stripe_md *lsm)
-{
-        CDEBUG(level, "objid "LPX64", maxbytes "LPX64", magic %#08x, "
-               "stripe_size %#08x, offset %u, stripe_count %u\n",
-               lsm->lsm_object_id, lsm->lsm_maxbytes, lsm->lsm_magic,
-               lsm->lsm_stripe_size, lsm->lsm_stripe_offset,
-               lsm->lsm_stripe_count);
-}
-
-void ll_update_inode(struct inode *inode, struct mds_body *body,
-                     struct lov_stripe_md *lsm)
-{
-        struct ll_inode_info *lli = ll_i2info(inode);
-
-        LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
-        if (lsm != NULL) {
-                if (lli->lli_smd == NULL) {
-                        lli->lli_maxbytes = lsm->lsm_maxbytes;
-                        if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
-                                lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
-                        lli->lli_smd = lsm;
-                } else {
-                        if (memcmp(lli->lli_smd, lsm, sizeof(*lsm))) {
-                                CERROR("lsm mismatch for inode %ld\n",
-                                       inode->i_ino);
-                                CERROR("lli_smd:\n");
-                                dump_lsm(D_ERROR, lli->lli_smd);
-                                CERROR("lsm:\n");
-                                dump_lsm(D_ERROR, lsm);
-                                LBUG();
-                        }
-                }
-        }
-
-        if (body->valid & OBD_MD_FLID)
-                inode->i_ino = body->ino;
-        if (body->valid & OBD_MD_FLATIME)
-                LTIME_S(inode->i_atime) = body->atime;
-        if (body->valid & OBD_MD_FLMTIME)
-                LTIME_S(inode->i_mtime) = body->mtime;
-        if (body->valid & OBD_MD_FLCTIME)
-                LTIME_S(inode->i_ctime) = body->ctime;
-        if (body->valid & OBD_MD_FLMODE)
-                inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
-        if (body->valid & OBD_MD_FLTYPE)
-                inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
-        if (body->valid & OBD_MD_FLUID)
-                inode->i_uid = body->uid;
-        if (body->valid & OBD_MD_FLGID)
-                inode->i_gid = body->gid;
-        if (body->valid & OBD_MD_FLFLAGS)
-                inode->i_flags = body->flags;
-        if (body->valid & OBD_MD_FLNLINK)
-                inode->i_nlink = body->nlink;
-        if (body->valid & OBD_MD_FLGENER)
-                inode->i_generation = body->generation;
-        if (body->valid & OBD_MD_FLRDEV)
-                inode->i_rdev = body->rdev;
-        if (body->valid & OBD_MD_FLSIZE)
-                inode->i_size = body->size;
-        if (body->valid & OBD_MD_FLBLOCKS)
-                inode->i_blocks = body->blocks;
-}
-
-static void ll_read_inode2(struct inode *inode, void *opaque)
-{
-        struct ll_read_inode2_cookie *lic = opaque;
-        struct mds_body *body = lic->lic_body;
-        struct ll_inode_info *lli = ll_i2info(inode);
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-               inode->i_generation, inode);
-
-        sema_init(&lli->lli_open_sem, 1);
-        spin_lock_init(&lli->lli_read_extent_lock);
-        INIT_LIST_HEAD(&lli->lli_read_extents);
-        lli->lli_flags = 0;
-        /* We default to 2T-4k until the LSM is created/read, at which point
-         * it'll be updated. */
-        lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES;
-
-        LASSERT(!lli->lli_smd);
-
-        /* core attributes from the MDS first */
-        ll_update_inode(inode, body, lic->lic_lsm);
-
-        /* OIDEBUG(inode); */
-
-        if (S_ISREG(inode->i_mode)) {
-                inode->i_op = &ll_file_inode_operations;
-                inode->i_fop = &ll_file_operations;
-                inode->i_mapping->a_ops = &ll_aops;
-                EXIT;
-        } else if (S_ISDIR(inode->i_mode)) {
-                inode->i_op = &ll_dir_inode_operations;
-                inode->i_fop = &ll_dir_operations;
-                inode->i_mapping->a_ops = &ll_dir_aops;
-                EXIT;
-        } else if (S_ISLNK(inode->i_mode)) {
-                inode->i_op = &ll_fast_symlink_inode_operations;
-                EXIT;
-        } else {
-                inode->i_op = &ll_special_inode_operations;
-                init_special_inode(inode, inode->i_mode, inode->i_rdev);
-                EXIT;
-        }
-}
-
-void ll_umount_begin(struct super_block *sb)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct obd_device *obd;
-        struct obd_ioctl_data ioc_data = { 0 };
-
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
-        obd = class_conn2obd(&sbi->ll_mdc_conn);
-        obd->obd_no_recov = 1;
-        obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_mdc_conn, sizeof ioc_data,
-                      &ioc_data, NULL);
-
-        obd = class_conn2obd(&sbi->ll_osc_conn);
-        obd->obd_no_recov = 1;
-        obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_osc_conn, sizeof ioc_data,
-                      &ioc_data, NULL);
-
-        /* Really, we'd like to wait until there are no requests outstanding,
-         * and then continue.  For now, we just invalidate the requests,
-         * schedule, and hope.
-         */
-        schedule();
-
-        EXIT;
  }
  
  /* exported operations */
diff --git a/lustre/llite/super25.c b/lustre/llite/super25.c

index 980bfcd..5ab03ff 100644 (file)
--- a/lustre/llite/super25.c
+++ b/lustre/llite/super25.c
@@ -34,736 +34,35 @@
  #include <linux/lprocfs_status.h>
  #include "llite_internal.h"
  
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-kmem_cache_t *ll_file_data_slab;
-extern struct address_space_operations ll_aops;
-extern struct address_space_operations ll_dir_aops;
-struct super_operations ll_super_operations;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
  
-/* /proc/lustre/llite root that tracks llite mount points */
-struct proc_dir_entry *proc_lustre_fs_root = NULL;
-/* lproc_llite.c */
-extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
-                                       struct super_block *sb,
-                                       char *osc, char *mdc);
-
-extern int ll_init_inodecache(void);
-extern void ll_destroy_inodecache(void);
-extern int ll_recover(struct recovd_data *, int);
-extern int ll_commitcbd_setup(struct ll_sb_info *);
-extern int ll_commitcbd_cleanup(struct ll_sb_info *);
-int ll_read_inode2(struct inode *inode, void *opaque);
-
-extern int ll_proc_namespace(struct super_block* sb, char* osc, char* mdc);
-
-static char *ll_read_opt(const char *opt, char *data)
-{
-        char *value;
-        char *retval;
-        ENTRY;
-
-        CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
-        if (strncmp(opt, data, strlen(opt)))
-                RETURN(NULL);
-        if ((value = strchr(data, '=')) == NULL)
-                RETURN(NULL);
-
-        value++;
-        OBD_ALLOC(retval, strlen(value) + 1);
-        if (!retval) {
-                CERROR("out of memory!\n");
-                RETURN(NULL);
-        }
-
-        memcpy(retval, value, strlen(value)+1);
-        CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
-        RETURN(retval);
-}
-
-static int ll_set_opt(const char *opt, char *data, int fl)
-{
-        ENTRY;
-
-        CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
-        if (strncmp(opt, data, strlen(opt)))
-                RETURN(0);
-        else
-                RETURN(fl);
-}
-
-static void ll_options(char *options, char **ost, char **mds, int *flags)
-{
-        char *opt_ptr = options;
-        char *this_char;
-        ENTRY;
-
-        if (!options) {
-                EXIT;
-                return;
-        }
-
-        while ((this_char = strsep (&opt_ptr, ",")) != NULL) {
-                CDEBUG(D_SUPER, "this_char %s\n", this_char);
-                if ((!*ost && (*ost = ll_read_opt("osc", this_char)))||
-                    (!*mds && (*mds = ll_read_opt("mdc", this_char)))||
-                    (!(*flags & LL_SBI_NOLCK) &&
-                     ((*flags) = (*flags) |
-                      ll_set_opt("nolock", this_char, LL_SBI_NOLCK))))
-                        continue;
-        }
-        EXIT;
-}
-
-#ifndef log2
-#define log2(n) ffz(~(n))
-#endif
-
-
-static int ll_fill_super(struct super_block *sb, void *data, int silent)
-{
-        struct inode *root = 0;
-        struct obd_device *obd;
-        struct ll_sb_info *sbi;
-        char *osc = NULL;
-        char *mdc = NULL;
-        int err;
-        struct ll_fid rootfid;
-        struct obd_statfs osfs;
-        struct ptlrpc_request *request = NULL;
-        struct ptlrpc_connection *mdc_conn;
-        struct ll_read_inode2_cookie lic;
-        class_uuid_t uuid;
-
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
-        OBD_ALLOC(sbi, sizeof(*sbi));
-        if (!sbi)
-                RETURN(-ENOMEM);
-
-        INIT_LIST_HEAD(&sbi->ll_conn_chain);
-        INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
-        generate_random_uuid(uuid);
-        class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
-
-        sb->s_fs_info = sbi;
-
-        ll_options(data, &osc, &mdc, &sbi->ll_flags);
-
-        if (!osc) {
-                CERROR("no osc\n");
-                GOTO(out_free, sb = NULL);
-        }
-
-        if (!mdc) {
-                CERROR("no mdc\n");
-                GOTO(out_free, sb = NULL);
-        }
-
-        obd = class_name2obd(mdc);
-        if (!obd) {
-                CERROR("MDC %s: not setup or attached\n", mdc);
-                GOTO(out_free, sb = NULL);
-        }
-
-        err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid);
-        if (err) {
-                CERROR("cannot connect to %s: rc = %d\n", mdc, err);
-                GOTO(out_free, sb = NULL);
-        }
-
-        mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection;
-
-        obd = class_name2obd(osc);
-        if (!obd) {
-                CERROR("OSC %s: not setup or attached\n", osc);
-                GOTO(out_mdc, sb = NULL);
-        }
-
-        err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid);
-        if (err) {
-                CERROR("cannot connect to %s: rc = %d\n", osc, err);
-                GOTO(out_mdc, sb = NULL);
-        }
-
-        err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
-        if (err) {
-                CERROR("cannot mds_connect: rc = %d\n", err);
-                GOTO(out_osc, sb = NULL);
-        }
-        CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
-        sbi->ll_rootino = rootfid.id;
-
-        memset(&osfs, 0, sizeof(osfs));
-        err = obd_statfs(&sbi->ll_mdc_conn, &osfs);
-        sb->s_blocksize = osfs.os_bsize;
-        sb->s_blocksize_bits = log2(osfs.os_bsize);
-        sb->s_magic = LL_SUPER_MAGIC;
-        sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
-
-        sb->s_op = &ll_super_operations;
-
-        /* make root inode 
-         * XXX: move this to after cbd setup? */
-        err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid,
-                          OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request);
-        if (err) {
-                CERROR("mdc_getattr failed for root: rc = %d\n", err);
-                GOTO(out_osc, sb = NULL);
-        }
-
-        /* initialize committed transaction callback daemon */
-        spin_lock_init(&sbi->ll_commitcbd_lock);
-        init_waitqueue_head(&sbi->ll_commitcbd_waitq);
-        init_waitqueue_head(&sbi->ll_commitcbd_ctl_waitq);
-        sbi->ll_commitcbd_flags = 0;
-        err = ll_commitcbd_setup(sbi);
-        if (err) {
-                CERROR("failed to start commit callback daemon: rc = %d\n",err);
-                ptlrpc_req_finished (request);
-                GOTO(out_osc, sb = NULL);
-        }
-
-        lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*lic.lic_body));
-        LASSERT (lic.lic_body != NULL);         /* checked by mdc_getattr() */
-        LASSERT_REPSWABBED (request, 0);        /* swabbed by mdc_getattr() */
-
-        lic.lic_lsm = NULL;
-
-        root = iget5_locked(sb, sbi->ll_rootino, NULL,
-                            ll_read_inode2, &lic);
-
-        ptlrpc_req_finished(request);
-
-        if (root == NULL || is_bad_inode(root)) {
-                /* XXX might need iput() for bad inode */
-                CERROR("lustre_lite: bad iget5 for root\n");
-                GOTO(out_cbd, sb = NULL);
-        }
-
-        sb->s_root = d_alloc_root(root);
-        root->i_state &= ~(I_LOCK | I_NEW);
-        printk("AMRUT 1\n");
-        if (proc_lustre_fs_root) {
-                err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
-                                                  osc, mdc);
-                if (err < 0)
-                        CERROR("could not register mount in /proc/lustre");
-        }
-
-out_dev:
-        if (mdc)
-                OBD_FREE(mdc, strlen(mdc) + 1);
-        if (osc)
-                OBD_FREE(osc, strlen(osc) + 1);
-        printk("AMRUT 2\n");
-
-        RETURN(0);
-
-out_cbd:
-        ll_commitcbd_cleanup(sbi);
-out_osc:
-        obd_disconnect(&sbi->ll_osc_conn, 0);
-out_mdc:
-        obd_disconnect(&sbi->ll_mdc_conn, 0);
-out_free:
-        lprocfs_unregister_mountpoint(sbi);
-        OBD_FREE(sbi, sizeof(*sbi));
-
-        goto out_dev;
-} /* ll_fill_super */
-
-
-int ll_setattr_raw(struct inode *inode, struct iattr *attr)
-{
-        struct ptlrpc_request *request = NULL;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        struct mdc_op_data op_data;
-        int err = 0;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
-
-        LPROC_COUNTER_INODE_INCBY1(inode, LPROC_LL_SETATTR);
-        if ((attr->ia_valid & ATTR_SIZE)) {
-                /* writeback uses inode->i_size to determine how far out
-                 * its cached pages go.  ll_truncate gets a PW lock, canceling
-                 * our lock, _after_ it has updated i_size.  this can confuse
-                 * us into zero extending the file to the newly truncated
-                 * size, and this has bad implications for a racing o_append.
-                 * if we're extending our size we need to flush the pages
-                 * with the correct i_size before vmtruncate stomps on
-                 * the new i_size.  again, this can only find pages to
-                 * purge if the PW lock that generated them is still held.
-                 */
-                if ( attr->ia_size > inode->i_size ) {
-                        filemap_fdatasync(inode->i_mapping);
-                        filemap_fdatawait(inode->i_mapping);
-                }
-                err = vmtruncate(inode, attr->ia_size);
-                if (err)
-                        RETURN(err);
-        }
-
-        /* Don't send size changes to MDS to avoid "fast EA" problems, and
-         * also avoid a pointless RPC (we get file size from OST anyways).
-         */
-        attr->ia_valid &= ~ATTR_SIZE;
-        if (!attr->ia_valid)
-                RETURN(0);
-
-        ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
-
-        err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
-                          attr, NULL, 0, &request);
-        if (err)
-                CERROR("mdc_setattr fails: err = %d\n", err);
-
-        ptlrpc_req_finished(request);
-
-        if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
-                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-                struct obdo oa;
-                int err2;
-
-                CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
-                       inode->i_ino, attr->ia_mtime);
-                oa.o_id = lsm->lsm_object_id;
-                oa.o_mode = S_IFREG;
-                oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMTIME;
-                oa.o_mtime = LTIME_S(attr->ia_mtime);
-                err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
-                if (err2) {
-                        CERROR("obd_setattr fails: rc=%d\n", err);
-                        if (!err)
-                                err = err2;
-                }
-        }
-        RETURN(err);
-}
  struct super_block * ll_get_sb(struct file_system_type *fs_type,
-                               int flags, char *devname, void * data)
+                               int flags, const char *devname, void * data)
  {
+        /* calls back in fill super */
          return get_sb_nodev(fs_type, flags, data, ll_fill_super);
  }
  
-static void ll_put_super(struct super_block *sb)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct list_head *tmp, *next;
-        struct ll_fid rootfid;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
-        list_del(&sbi->ll_conn_chain);
-        ll_commitcbd_cleanup(sbi);
-        obd_disconnect(&sbi->ll_osc_conn, 0);
-
-        /* NULL request to force sync on the MDS, and get the last_committed
-         * value to flush remaining RPCs from the pending queue on client.
-         *
-         * XXX This should be an mdc_sync() call to sync the whole MDS fs,
-         *     which we can call for other reasons as well.
-         */
-        mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
-
-        lprocfs_unregister_mountpoint(sbi);
-        if (sbi->ll_proc_root) {
-                lprocfs_remove(sbi->ll_proc_root);
-        sbi->ll_proc_root = NULL;
-        }
-
-        obd_disconnect(&sbi->ll_mdc_conn, 0);
-
-        spin_lock(&dcache_lock);
-        list_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list){
-                struct dentry *dentry = list_entry(tmp, struct dentry, d_hash);
-                shrink_dcache_parent(dentry);
-        }
-        spin_unlock(&dcache_lock);
-
-        OBD_FREE(sbi, sizeof(*sbi));
-
-        EXIT;
-} /* ll_put_super */
-
-static void ll_clear_inode(struct inode *inode)
-{
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        struct ll_inode_info *lli = ll_i2info(inode);
-        int rc;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
-
-#warning "Is there a reason we don't do this in 2.5, but we do in 2.4?"
-#if 0
-        rc = ll_mdc_cancel_unused(&sbi->ll_mdc_conn, inode, LDLM_FL_NO_CALLBACK);
-        if (rc < 0) {
-                CERROR("ll_mdc_cancel_unused: %d\n", rc);
-                /* XXX FIXME do something dramatic */
-        }
-
-        if (lli->lli_smd) {
-                rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd, 0);
-                if (rc < 0) {
-                        CERROR("obd_cancel_unused: %d\n", rc);
-                        /* XXX FIXME do something dramatic */
-                }
-        }
-#endif
-
-        if (atomic_read(&inode->i_count) != 0)
-                CERROR("clearing in-use inode %lu: count = %d\n",
-                       inode->i_ino, atomic_read(&inode->i_count));
-
-        if (lli->lli_smd) {
-                obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
-                lli->lli_smd = NULL;
-        }
-
-        if (lli->lli_symlink_name) {
-                OBD_FREE(lli->lli_symlink_name,strlen(lli->lli_symlink_name)+1);
-                lli->lli_symlink_name = NULL;
-        }
-
-        EXIT;
-}
-
-#if 0
-static void ll_delete_inode(struct inode *inode)
-{
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
-        if (S_ISREG(inode->i_mode)) {
-                int err;
-                struct obdo *oa;
-                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-
-                /* mcreate with no open */
-                if (!lsm)
-                        GOTO(out, 0);
-
-                if (lsm->lsm_object_id == 0) {
-                        CERROR("This really happens\n");
-                        /* No obdo was ever created */
-                        GOTO(out, 0);
-                }
-
-                oa = obdo_alloc();
-                if (oa == NULL)
-                        GOTO(out, -ENOMEM);
-
-                oa->o_id = lsm->lsm_object_id;
-                oa->o_mode = inode->i_mode;
-                oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
-
-                err = obd_destroy(ll_i2obdconn(inode), oa, lsm);
-                obdo_free(oa);
-                if (err)
-                        CDEBUG(D_SUPER, "obd destroy objid "LPX64" error %d\n",
-                               lsm->lsm_object_id, err);
-        }
-out:
-        clear_inode(inode);
-        EXIT;
-}
-#endif
-
-/* like inode_setattr, but doesn't mark the inode dirty */
-static int ll_attr2inode(struct inode * inode, struct iattr * attr, int trunc)
-{
-        unsigned int ia_valid = attr->ia_valid;
-        int error = 0;
-
-        if ((ia_valid & ATTR_SIZE) && trunc) {
-                if (attr->ia_size > ll_file_maxbytes(inode)) {
-                        error = -EFBIG;
-                        goto out;
-                }
-                error = vmtruncate(inode, attr->ia_size);
-                if (error)
-                        goto out;
-        } else if (ia_valid & ATTR_SIZE)
-                inode->i_size = attr->ia_size;
-
-        if (ia_valid & ATTR_UID)
-                inode->i_uid = attr->ia_uid;
-        if (ia_valid & ATTR_GID)
-                inode->i_gid = attr->ia_gid;
-        if (ia_valid & ATTR_ATIME)
-                inode->i_atime = attr->ia_atime;
-        if (ia_valid & ATTR_MTIME)
-                inode->i_mtime = attr->ia_mtime;
-        if (ia_valid & ATTR_CTIME)
-                inode->i_ctime = attr->ia_ctime;
-        if (ia_valid & ATTR_MODE) {
-                inode->i_mode = attr->ia_mode;
-                if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-                        inode->i_mode &= ~S_ISGID;
-        }
-out:
-        return error;
-}
-
-int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc)
-{
-        struct ptlrpc_request *request = NULL;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        int err = 0;
-
-        ENTRY;
-
-        /* change incore inode */
-        err = ll_attr2inode(inode, attr, do_trunc);
-        if (err)
-                RETURN(err);
-
-        /* Don't send size changes to MDS to avoid "fast EA" problems, and
-         * also avoid a pointless RPC (we get file size from OST anyways).
-         */
-        attr->ia_valid &= ~ATTR_SIZE;
-        if (attr->ia_valid) {
-                struct mdc_op_data op_data;
-
-                ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
-
-                err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
-                                  attr, NULL, 0, &request);
-                if (err)
-                        CERROR("mdc_setattr fails: err = %d\n", err);
-
-                ptlrpc_req_finished(request);
-                if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
-                        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-                        struct obdo oa;
-                        int err2;
-
-                        CDEBUG(D_ERROR, "setting mtime on OST\n");
-                        oa.o_id = lsm->lsm_object_id;
-                        oa.o_mode = S_IFREG;
-                        oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME;
-                        oa.o_mtime = LTIME_S(attr->ia_mtime);
-                        err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
-                        if (err2) {
-                                CERROR("obd_setattr fails: rc=%d\n", err);
-                                if (!err)
-                                        err = err2;
-                        }
-                }
-        }
-
-        RETURN(err);
-}
-
-int ll_setattr(struct dentry *de, struct iattr *attr)
-{
-        int rc = inode_change_ok(de->d_inode, attr);
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s\n", de->d_name.name);
-        if (rc)
-                return rc;
-
-        LPROC_COUNTER_INODE_INCBY1((de->d_inode), LPROC_LL_SETATTR);
-        return ll_inode_setattr(de->d_inode, attr, 1);
-}
-
-static int ll_statfs(struct super_block *sb, struct statfs *sfs)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct obd_statfs osfs;
-        int rc;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
-        LPROC_COUNTER_SBI_INCBY1(sbi, LPROC_LL_STAFS);
-        memset(sfs, 0, sizeof(*sfs));
-        rc = obd_statfs(&sbi->ll_mdc_conn, &osfs);
-        statfs_unpack(sfs, &osfs);
-        if (rc)
-                CERROR("mdc_statfs fails: rc = %d\n", rc);
-        else
-                CDEBUG(D_SUPER, "mdc_statfs shows blocks "LPU64"/"LPU64
-                       " objects "LPU64"/"LPU64"\n",
-                       osfs.os_bavail, osfs.os_blocks,
-                       osfs.os_ffree, osfs.os_files);
-
-        /* temporary until mds_statfs returns statfs info for all OSTs */
-        if (!rc) {
-                rc = obd_statfs(&sbi->ll_osc_conn, &osfs);
-                if (rc) {
-                        CERROR("obd_statfs fails: rc = %d\n", rc);
-                        GOTO(out, rc);
-                }
-                CDEBUG(D_SUPER, "obd_statfs shows blocks "LPU64"/"LPU64
-                       " objects "LPU64"/"LPU64"\n",
-                       osfs.os_bavail, osfs.os_blocks,
-                       osfs.os_ffree, osfs.os_files);
-
-                while (osfs.os_blocks > ~0UL) {
-                        sfs->f_bsize <<= 1;
-
-                        osfs.os_blocks >>= 1;
-                        osfs.os_bfree >>= 1;
-                        osfs.os_bavail >>= 1;
-                }
-                sfs->f_blocks = osfs.os_blocks;
-                sfs->f_bfree = osfs.os_bfree;
-                sfs->f_bavail = osfs.os_bavail;
-                if (osfs.os_ffree < (__u64)sfs->f_ffree) {
-                        sfs->f_files = (sfs->f_files - sfs->f_ffree) +
-                                       osfs.os_ffree;
-                        sfs->f_ffree = osfs.os_ffree;
-                }
-        }
-
-out:
-        RETURN(rc);
-}
-
-void ll_update_inode(struct inode *inode, struct mds_body *body,
-                     struct lov_stripe_md *lsm)
-{
-        struct ll_inode_info *lli = ll_i2info(inode);
-
-        LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
-        if (lsm != NULL) {
-                if (lli->lli_smd == NULL) {
-                        lli->lli_smd = lsm;
-                        lli->lli_maxbytes = lsm->lsm_maxbytes;
-                        if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
-                                lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
-                } else {
-                        LASSERT (!memcmp (lli->lli_smd, lsm, sizeof (*lsm)));
-                }
-        }
-
-        if (body->valid & OBD_MD_FLID)
-                inode->i_ino = body->ino;
-        if (body->valid & OBD_MD_FLATIME)
-                LTIME_S(inode->i_atime) = body->atime;
-        if (body->valid & OBD_MD_FLMTIME)
-                LTIME_S(inode->i_mtime) = body->mtime;
-        if (body->valid & OBD_MD_FLCTIME)
-                LTIME_S(inode->i_ctime) = body->ctime;
-        if (body->valid & OBD_MD_FLMODE)
-                inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
-        if (body->valid & OBD_MD_FLTYPE)
-                inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
-        if (body->valid & OBD_MD_FLUID)
-                inode->i_uid = body->uid;
-        if (body->valid & OBD_MD_FLGID)
-                inode->i_gid = body->gid;
-        if (body->valid & OBD_MD_FLFLAGS)
-                inode->i_flags = body->flags;
-        if (body->valid & OBD_MD_FLNLINK)
-                inode->i_nlink = body->nlink;
-        if (body->valid & OBD_MD_FLGENER)
-                inode->i_generation = body->generation;
-        if (body->valid & OBD_MD_FLRDEV)
-                inode->i_rdev = to_kdev_t(body->rdev);
-        if (body->valid & OBD_MD_FLSIZE)
-                inode->i_size = body->size;
-        if (body->valid & OBD_MD_FLBLOCKS)
-                inode->i_blocks = body->blocks;
-}
-
-int ll_read_inode2(struct inode *inode, void *opaque)
-{
-        struct ll_read_inode2_cookie *lic = opaque;
-        struct mds_body *body = lic->lic_body;
-        struct ll_inode_info *lli = ll_i2info(inode);
-        int rc = 0;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
-
-        sema_init(&lli->lli_open_sem, 1);
-        /* these are 2.4 only, but putting them here for consistency.. */
-        spin_lock_init(&lli->lli_read_extent_lock);
-        INIT_LIST_HEAD(&lli->lli_read_extents);
-        ll_lldo_init(&lli->lli_dirty);
-        lli->lli_flags = 0;
-        lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES;
-
-        LASSERT(!lli->lli_smd);
-
-        /* core attributes first */
-        ll_update_inode(inode, body, lic ? lic->lic_lsm : NULL);
-
-        /* OIDEBUG(inode); */
-
-        if (S_ISREG(inode->i_mode)) {
-                inode->i_op = &ll_file_inode_operations;
-                inode->i_fop = &ll_file_operations;
-                inode->i_mapping->a_ops = &ll_aops;
-                EXIT;
-        } else if (S_ISDIR(inode->i_mode)) {
-                inode->i_op = &ll_dir_inode_operations;
-                inode->i_fop = &ll_dir_operations;
-                inode->i_mapping->a_ops = &ll_dir_aops;
-                EXIT;
-        } else if (S_ISLNK(inode->i_mode)) {
-                inode->i_op = &ll_fast_symlink_inode_operations;
-                EXIT;
-        } else {
-                inode->i_op = &ll_special_inode_operations;
-                init_special_inode(inode, inode->i_mode,
-                                   kdev_t_to_nr(inode->i_rdev));
-                EXIT;
-        }
-
-        return rc;
-}
-
-
-void ll_umount_begin(struct super_block *sb)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct obd_device *obd;
-        struct obd_ioctl_data ioc_data = { 0 };
-
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
-        obd = class_conn2obd(&sbi->ll_mdc_conn);
-        obd->obd_no_recov = 1;
-        obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_mdc_conn, sizeof ioc_data,
-                      &ioc_data, NULL);
-
-        obd = class_conn2obd(&sbi->ll_osc_conn);
-        obd->obd_no_recov = 1;
-        obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_osc_conn, sizeof ioc_data,
-                      &ioc_data, NULL);
-        
-        /* Really, we'd like to wait until there are no requests outstanding,
-         * and then continue.  For now, we just invalidate the requests,
-         * schedule, and hope.
-         */
-        schedule();
-
-        EXIT;
-}
-
  static kmem_cache_t *ll_inode_cachep;
  
  static struct inode *ll_alloc_inode(struct super_block *sb)
  {
          struct ll_inode_info *lli;
-        LPROC_COUNTER_SBI_INCBY1((ll_s2sbi(sb)), LL_ALLOC_INODE);
+        lprocfs_counter_incr((ll_s2sbi(sb))->ll_stats, LPROC_LL_ALLOC_INODE);
          OBD_SLAB_ALLOC(lli, ll_inode_cachep, SLAB_KERNEL, sizeof *lli);
          if (lli == NULL)
                  return NULL;
  
-        memset(lli, 0, (char *)&lli->lli_vfs_inode - (char *)lli);
-        sema_init(&lli->lli_open_sem, 1);
-        init_MUTEX(&lli->lli_size_valid_sem);
-        lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+        inode_init_once(&lli->lli_vfs_inode);
+        ll_lli_init(lli);
  
          return &lli->lli_vfs_inode;
  }
  
  static void ll_destroy_inode(struct inode *inode)
  {
-        OBD_SLAB_FREE(ll_inode_cachep, ll_i2info(inode),
-                      sizeof(struct ll_inode_info));
+        struct ll_inode_info *ptr = ll_i2info(inode);
+        OBD_SLAB_FREE(ptr, ll_inode_cachep, sizeof(*ptr));
  }
  
  static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
@@ -792,15 +91,12 @@ void ll_destroy_inodecache(void)
                  CERROR("ll_inode_cache: not all structures were freed\n");
  }
  
-
-
  /* exported operations */
  struct super_operations ll_super_operations =
  {
          alloc_inode: ll_alloc_inode,
          destroy_inode: ll_destroy_inode,
          clear_inode: ll_clear_inode,
-//        delete_inode: ll_delete_inode,
          put_super: ll_put_super,
          statfs: ll_statfs,
          umount_begin: ll_umount_begin
diff --git a/lustre/llite/symlink.c b/lustre/llite/symlink.c

index 19d234e..427f7f0 100644 (file)
--- a/lustre/llite/symlink.c
+++ b/lustre/llite/symlink.c
@@ -24,12 +24,10 @@
  #include <linux/stat.h>
  #include <linux/smp_lock.h>
  #include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
  #define DEBUG_SUBSYSTEM S_LLITE
  
  #include <linux/lustre_lite.h>
+#include "llite_internal.h"
  
  static int ll_readlink_internal(struct inode *inode,
                                  struct ptlrpc_request **request, char **symname)
@@ -117,82 +115,46 @@ static int ll_readlink(struct dentry *dentry, char *buffer, int buflen)
          RETURN(rc);
  }
  
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-static int ll_follow_link(struct dentry *dentry, struct nameidata *nd,
-                          struct lookup_intent *it)
+static int ll_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
          struct inode *inode = dentry->d_inode;
          struct ll_inode_info *lli = ll_i2info(inode);
+        struct lookup_intent *it = ll_nd2it(nd);
          struct ptlrpc_request *request;
-        int op = 0, mode = 0, rc;
+        int rc;
          char *symname;
          ENTRY;
  
-        CDEBUG(D_VFSTRACE, "VFS Op\n");
          if (it != NULL) {
-                op = it->it_op;
-                mode = it->it_mode;
-
-                ll_intent_release(dentry, it);
-        }
-
-        down(&lli->lli_open_sem);
-        rc = ll_readlink_internal(inode, &request, &symname);
-        up(&lli->lli_open_sem);
-        if (rc)
-                GOTO(out, rc);
+                int op = it->it_op;
+                int mode = it->it_mode;
  
-        if (it != NULL) {
+                ll_intent_release(it);
                  it->it_op = op;
                  it->it_mode = mode;
          }
  
-        rc = vfs_follow_link_it(nd, symname, it);
-        ptlrpc_req_finished(request);
- out:
-        RETURN(rc);
-}
-#else
-static int ll_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-        struct inode *inode = dentry->d_inode;
-        struct ll_inode_info *lli = ll_i2info(inode);
-        struct ptlrpc_request *request;
-        int op = 0, mode = 0, rc;
-        char *symname;
-        ENTRY;
-
-        op = nd->it.it_op;
-        mode = nd->it.it_mode;
-
-        ll_intent_release(dentry, &nd->it);
-
+        CDEBUG(D_VFSTRACE, "VFS Op\n");
          down(&lli->lli_open_sem);
-
          rc = ll_readlink_internal(inode, &request, &symname);
+        up(&lli->lli_open_sem);
          if (rc)
                  GOTO(out, rc);
  
-        nd->it.it_op = op;
-        nd->it.it_mode = mode;
-
          rc = vfs_follow_link(nd, symname);
          ptlrpc_req_finished(request);
   out:
-        up(&lli->lli_open_sem);
-
          RETURN(rc);
  }
-#endif
  
-extern int ll_inode_revalidate(struct dentry *dentry);
-extern int ll_setattr(struct dentry *de, struct iattr *attr);
  struct inode_operations ll_fast_symlink_inode_operations = {
          readlink:       ll_readlink,
          setattr:        ll_setattr,
          setattr_raw:    ll_setattr_raw,
-        follow_link2:   ll_follow_link,
+        follow_link:    ll_follow_link,
  #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        revalidate:     ll_inode_revalidate
+        revalidate_it:  ll_inode_revalidate_it
+#else 
+        getattr_it:     ll_getattr
  #endif
  };
diff --git a/lustre/lov/.cvsignore b/lustre/lov/.cvsignore

index e995588..e69dc6d 100644 (file)
--- a/lustre/lov/.cvsignore
+++ b/lustre/lov/.cvsignore
@@ -1,3 +1,4 @@
  .deps
  Makefile
  Makefile.in
+.*.cmd
diff --git a/lustre/lov/Makefile.am b/lustre/lov/Makefile.am

index 879e44d..83dba1a 100644 (file)
--- a/lustre/lov/Makefile.am
+++ b/lustre/lov/Makefile.am
@@ -7,12 +7,12 @@ DEFS=
  
  if LIBLUSTRE
  lib_LIBRARIES = liblov.a
-liblov_a_SOURCES = lov_obd.c lov_pack.c
+liblov_a_SOURCES = lov_obd.c lov_pack.c lov_internal.h
  else
  MODULE = lov
  modulefs_DATA = lov.o
  EXTRA_PROGRAMS = lov
-lov_SOURCES = lov_obd.c lov_pack.c lproc_lov.c
+lov_SOURCES = lov_obd.c lov_pack.c lproc_lov.c lov_internal.h
  endif
  
  include $(top_srcdir)/Rules
diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c

index 2974b2a..9562a4f 100644 (file)
--- a/lustre/lov/lov_obd.c
+++ b/lustre/lov/lov_obd.c
@@ -47,6 +47,11 @@
  #include <linux/seq_file.h>
  #include <linux/lprocfs_status.h>
  
+#include "lov_internal.h"
+
+static int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
+                             int stripeno, obd_off *obd_off);
+
  struct lov_file_handles {
          struct portals_handle lfh_handle;
          atomic_t lfh_refcount;
@@ -68,7 +73,7 @@ static void lov_lfh_addref(void *lfhp)
          struct lov_file_handles *lfh = lfhp;
  
          atomic_inc(&lfh->lfh_refcount);
-        CDEBUG(D_INFO, "GETting lfh %p : new refcount %d\n", lfh,
+        CDEBUG(D_MALLOC, "GETting lfh %p : new refcount %d\n", lfh,
                 atomic_read(&lfh->lfh_refcount));
  }
  
@@ -99,7 +104,7 @@ static struct lov_file_handles *lov_handle2lfh(struct lustre_handle *handle)
  
  static void lov_lfh_put(struct lov_file_handles *lfh)
  {
-        CDEBUG(D_INFO, "PUTting lfh %p : new refcount %d\n", lfh,
+        CDEBUG(D_MALLOC, "PUTting lfh %p : new refcount %d\n", lfh,
                 atomic_read(&lfh->lfh_refcount) - 1);
          LASSERT(atomic_read(&lfh->lfh_refcount) > 0 &&
                  atomic_read(&lfh->lfh_refcount) < 0x5a5a);
@@ -174,19 +179,18 @@ int lov_attach(struct obd_device *dev, obd_count len, void *data)
          struct proc_dir_entry *entry;
          int rc;
  
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(lov, &lvars);
          rc = lprocfs_obd_attach(dev, lvars.obd_vars);
-        if (rc) 
+        if (rc)
                  return rc;
  
          entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry);
-        if (entry == NULL) 
+        if (entry == NULL)
                  RETURN(-ENOMEM);
-        entry->proc_fops = &ll_proc_target_fops;
+        entry->proc_fops = &lov_proc_target_fops;
          entry->data = dev;
-        
+
          return rc;
-        
  }
  
  int lov_detach(struct obd_device *dev)
@@ -214,15 +218,17 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
          if (rc)
                  RETURN(rc);
  
+        exp = class_conn2export(conn);
+        spin_lock_init(&exp->exp_lov_data.led_lock);
+        INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head);
+
          /* We don't want to actually do the underlying connections more than
           * once, so keep track. */
          lov->refcount++;
-        if (lov->refcount > 1)
+        if (lov->refcount > 1) {
+                class_export_put(exp);
                  RETURN(0);
-
-        exp = class_conn2export(conn);
-        spin_lock_init(&exp->exp_lov_data.led_lock);
-        INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head);
+        }
  
          /* retrieve LOV metadata from MDS */
          rc = obd_connect(&mdc_conn, lov->mdcobd, &lov_mds_uuid);
@@ -248,9 +254,9 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
           * array fits in LOV_MAX_UUID_BUFFER_SIZE and all uuids are
           * terminated), but I still need to verify it makes overall
           * sense */
-        mdesc = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*mdesc));
-        LASSERT (mdesc != NULL);
-        LASSERT_REPSWABBED (req, 0);
+        mdesc = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*mdesc));
+        LASSERT(mdesc != NULL);
+        LASSERT_REPSWABBED(req, 0);
  
          *desc = *mdesc;
  
@@ -279,15 +285,15 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
           * demands on memory here. */
          lov->bufsize = sizeof(struct lov_tgt_desc) * desc->ld_tgt_count;
          OBD_ALLOC(lov->tgts, lov->bufsize);
-        if (!lov->tgts) {
+        if (lov->tgts == NULL) {
                  CERROR("Out of memory\n");
                  GOTO(out_req, rc = -ENOMEM);
          }
  
          uuids = lustre_msg_buf(req->rq_repmsg, 1,
                                 sizeof(*uuids) * desc->ld_tgt_count);
-        LASSERT (uuids != NULL);
-        LASSERT_REPSWABBED (req, 1);
+        LASSERT(uuids != NULL);
+        LASSERT_REPSWABBED(req, 1);
  
          for (i = 0, tgts = lov->tgts; i < desc->ld_tgt_count; i++, tgts++) {
                  struct obd_uuid *uuid = &tgts->uuid;
@@ -330,7 +336,9 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
          }
  
          mdc->cl_max_mds_easize = obd_size_diskmd(conn, NULL);
-        ptlrpc_req_finished (req);
+        mdc->cl_max_mds_cookiesize = desc->ld_tgt_count *
+                sizeof(struct llog_cookie);
+        ptlrpc_req_finished(req);
          class_export_put(exp);
          RETURN (0);
  
@@ -356,7 +364,7 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
          RETURN (rc);
  }
  
-static int lov_disconnect(struct lustre_handle *conn, int failover)
+static int lov_disconnect(struct lustre_handle *conn, int flags)
  {
          struct obd_device *obd = class_conn2obd(conn);
          struct lov_obd *lov = &obd->u.lov;
@@ -383,7 +391,7 @@ static int lov_disconnect(struct lustre_handle *conn, int failover)
                                  class_conn2obd(&lov->tgts[i].conn);
                          osc_obd->obd_no_recov = 1;
                  }
-                rc = obd_disconnect(&lov->tgts[i].conn, failover);
+                rc = obd_disconnect(&lov->tgts[i].conn, flags);
                  if (rc) {
                          if (lov->tgts[i].active) {
                                  CERROR("Target %s disconnect error %d\n",
@@ -400,6 +408,7 @@ static int lov_disconnect(struct lustre_handle *conn, int failover)
          lov->bufsize = 0;
          lov->tgts = NULL;
  
+ out_local:
          exp = class_conn2export(conn);
          if (exp == NULL) {
                  CERROR("export handle "LPU64" invalid!  If you can reproduce, "
@@ -421,7 +430,6 @@ static int lov_disconnect(struct lustre_handle *conn, int failover)
          spin_unlock(&exp->exp_lov_data.led_lock);
          class_export_put(exp);
  
- out_local:
          rc = class_disconnect(conn, 0);
          RETURN(rc);
  }
@@ -548,6 +556,8 @@ static obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size,
  static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid,
                              struct lov_stripe_md *lsm, int stripeno, int *set)
  {
+        valid &= src->o_valid;
+
          if (*set) {
                  if (valid & OBD_MD_FLSIZE) {
                          /* this handles sparse files properly */
@@ -566,68 +576,102 @@ static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid,
                  if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
                          tgt->o_mtime = src->o_mtime;
          } else {
-                obdo_cpy_md(tgt, src, valid);
+                memcpy(tgt, src, sizeof(*tgt));
+                tgt->o_id = lsm->lsm_object_id;
                  if (valid & OBD_MD_FLSIZE)
                          tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
                  *set = 1;
          }
  }
  
+#ifndef log2
+#define log2(n) ffz(~(n))
+#endif
+
  /* the LOV expects oa->o_id to be set to the LOV object id */
-static int lov_create(struct lustre_handle *conn, struct obdo *oa,
+static int lov_create(struct lustre_handle *conn, struct obdo *src_oa,
                        struct lov_stripe_md **ea, struct obd_trans_info *oti)
  {
          struct obd_export *export = class_conn2export(conn);
          struct lov_obd *lov;
          struct lov_stripe_md *lsm;
-        struct lov_oinfo *loi;
-        struct obdo *tmp;
+        struct lov_oinfo *loi = NULL;
+        struct obdo *tmp_oa, *ret_oa;
+        struct llog_cookie *cookies = NULL;
          unsigned ost_count, ost_idx;
-        int set = 0, obj_alloc = 0;
-        int rc = 0, i;
+        int set = 0, obj_alloc = 0, cookie_sent = 0, rc = 0, i;
          ENTRY;
  
          LASSERT(ea);
  
          if (!export)
-                GOTO(out_exp, rc = -EINVAL);
+                RETURN(-EINVAL);
  
          lov = &export->exp_obd->u.lov;
  
          if (!lov->desc.ld_active_tgt_count)
                  GOTO(out_exp, rc = -EIO);
  
-        tmp = obdo_alloc();
-        if (!tmp)
+        ret_oa = obdo_alloc();
+        if (!ret_oa)
                  GOTO(out_exp, rc = -ENOMEM);
  
+        tmp_oa = obdo_alloc();
+        if (!tmp_oa)
+                GOTO(out_oa, rc = -ENOMEM);
+
          lsm = *ea;
  
          if (!lsm) {
-                rc = obd_alloc_memmd(conn, &lsm);
+                int stripes;
+                ost_count = lov_get_stripecnt(lov, 0);
+
+                /* If the MDS file was truncated up to some size, stripe over
+                 * enough OSTs to allow the file to be created at that size.
+                 */
+                if (src_oa->o_valid & OBD_MD_FLSIZE) {
+                        stripes=((src_oa->o_size+LUSTRE_STRIPE_MAXBYTES)>>12)-1;
+                        do_div(stripes, (__u32)(LUSTRE_STRIPE_MAXBYTES >> 12));
+
+                        if (stripes > lov->desc.ld_active_tgt_count)
+                                GOTO(out_exp, rc = -EFBIG);
+                        if (stripes < ost_count)
+                                stripes = ost_count;
+                } else
+                        stripes = ost_count;
+
+                rc = lov_alloc_memmd(&lsm, stripes);
                  if (rc < 0)
                          GOTO(out_tmp, rc);
  
                  rc = 0;
-                lsm->lsm_magic = LOV_MAGIC;
          }
  
          ost_count = lov->desc.ld_tgt_count;
  
-        LASSERT(oa->o_valid & OBD_MD_FLID);
-        lsm->lsm_object_id = oa->o_id;
+        LASSERT(src_oa->o_valid & OBD_MD_FLID);
+        lsm->lsm_object_id = src_oa->o_id;
          if (!lsm->lsm_stripe_size)
                  lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
  
          if (!*ea || lsm->lsm_stripe_offset >= ost_count) {
                  get_random_bytes(&ost_idx, 2);
                  ost_idx %= ost_count;
-        } else
+        } else {
                  ost_idx = lsm->lsm_stripe_offset;
+        }
  
          CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n",
                 lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx);
  
+        /* XXX LOV STACKING: need to figure out how many real OSCs */
+        if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) {
+                oti_alloc_cookies(oti, lsm->lsm_stripe_count);
+                if (!oti->oti_logcookies)
+                        GOTO(out_cleanup, rc = -ENOMEM);
+                cookies = oti->oti_logcookies;
+        }
+
          loi = lsm->lsm_oinfo;
          for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
                  struct lov_stripe_md obj_md;
@@ -640,14 +684,30 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
                  }
  
                  /* create data objects with "parent" OA */
-                memcpy(tmp, oa, sizeof(*tmp));
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+
+                /* XXX When we start creating objects on demand, we need to
+                 *     make sure that we always create the object on the
+                 *     stripe which holds the existing file size.
+                 */
+                if (src_oa->o_valid & OBD_MD_FLSIZE) {
+                        if (lov_stripe_offset(lsm, src_oa->o_size, i,
+                                              &tmp_oa->o_size) < 0 &&
+                            tmp_oa->o_size)
+                                tmp_oa->o_size--;
+
+                        CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
+                               i, tmp_oa->o_size, src_oa->o_size);
+                }
+
                  /* XXX: LOV STACKING: use real "obj_mdp" sub-data */
-                err = obd_create(&lov->tgts[ost_idx].conn, tmp, &obj_mdp, oti);
+                err = obd_create(&lov->tgts[ost_idx].conn, tmp_oa,&obj_mdp,oti);
                  if (err) {
                          if (lov->tgts[ost_idx].active) {
                                  CERROR("error creating objid "LPX64" sub-object"
-                                       " on OST idx %d/%d: rc = %d\n", oa->o_id,
-                                       ost_idx, lsm->lsm_stripe_count, err);
+                                       " on OST idx %d/%d: rc = %d\n",
+                                       src_oa->o_id, ost_idx,
+                                       lsm->lsm_stripe_count, err);
                                  if (err > 0) {
                                          CERROR("obd_create returned invalid "
                                                 "err %d\n", err);
@@ -658,17 +718,22 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
                                  rc = err;
                          continue;
                  }
-                loi->loi_id = tmp->o_id;
+                loi->loi_id = tmp_oa->o_id;
                  loi->loi_ost_idx = ost_idx;
                  CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64" at idx %d\n",
                         lsm->lsm_object_id, loi->loi_id, ost_idx);
  
                  if (set == 0)
                          lsm->lsm_stripe_offset = ost_idx;
-                lov_merge_attrs(oa, tmp, OBD_MD_FLBLKSZ, lsm, obj_alloc, &set);
-                ot_init(&loi->loi_dirty_ot_inline);
+                lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm,
+                                obj_alloc, &set);
                  loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
+                ot_init(loi->loi_dirty_ot);
  
+                if (cookies)
+                        ++oti->oti_logcookies;
+                if (tmp_oa->o_valid & OBD_MD_FLCOOKIE)
+                        ++cookie_sent;
                  ++obj_alloc;
                  ++loi;
  
@@ -677,6 +742,12 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
                          GOTO(out_done, rc = 0);
          }
  
+        /* If we were passed specific striping params, then a failure to
+         * meet those requirements is an error, since we can't reallocate
+         * that memory (it might be part of a larger array or something).
+         *
+         * We can only get here if lsm_stripe_count was originally > 1.
+         */
          if (*ea != NULL) {
                  CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n",
                         lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count,rc);
@@ -686,27 +757,61 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
          } else {
                  struct lov_stripe_md *lsm_new;
                  /* XXX LOV STACKING call into osc for sizes */
-                unsigned size = lov_stripe_md_size(obj_alloc);
+                unsigned oldsize, newsize;
+
+                if (oti && cookies && cookie_sent) {
+                        oldsize = lsm->lsm_stripe_count * sizeof(*cookies);
+                        newsize = obj_alloc * sizeof(*cookies);
+
+                        oti_alloc_cookies(oti, obj_alloc);
+                        if (oti->oti_logcookies) {
+                                memcpy(oti->oti_logcookies, cookies, newsize);
+                                OBD_FREE(cookies, oldsize);
+                                cookies = oti->oti_logcookies;
+                        } else {
+                                CWARN("'leaking' %d bytes\n", oldsize-newsize);
+                        }
+                }
  
                  CERROR("reallocating LSM for objid "LPX64": old %u new %u\n",
                         lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count);
-                OBD_ALLOC(lsm_new, size);
-                if (!lsm_new)
-                        GOTO(out_cleanup, rc = -ENOMEM);
-                memcpy(lsm_new, lsm, size);
-                lsm_new->lsm_stripe_count = obj_alloc;
-
-                /* XXX LOV STACKING call into osc for sizes */
-                OBD_FREE(lsm, lov_stripe_md_size(lsm->lsm_stripe_count));
-                lsm = lsm_new;
-
+                oldsize = lov_stripe_md_size(lsm->lsm_stripe_count);
+                newsize = lov_stripe_md_size(obj_alloc);
+                OBD_ALLOC(lsm_new, newsize);
+                if (lsm_new != NULL) {
+                        memcpy(lsm_new, lsm, newsize);
+                        lsm_new->lsm_stripe_count = obj_alloc;
+                        OBD_FREE(lsm, newsize);
+                        lsm = lsm_new;
+                } else {
+                        CWARN("'leaking' %d bytes\n", oldsize - newsize);
+                }
                  rc = 0;
          }
   out_done:
          *ea = lsm;
+        if (src_oa->o_valid & OBD_MD_FLSIZE &&
+            ret_oa->o_size != src_oa->o_size) {
+                CERROR("original size "LPU64" isn't new object size "LPU64"\n",
+                       src_oa->o_size, ret_oa->o_size);
+                LBUG();
+        }
+        ret_oa->o_id = src_oa->o_id;
+        memcpy(src_oa, ret_oa, sizeof(*src_oa));
  
   out_tmp:
-        obdo_free(tmp);
+        obdo_free(tmp_oa);
+ out_oa:
+        obdo_free(ret_oa);
+        if (oti && cookies) {
+                oti->oti_logcookies = cookies;
+                if (!cookie_sent) {
+                        oti_free_cookies(oti);
+                        src_oa->o_valid &= ~OBD_MD_FLCOOKIE;
+                } else {
+                        src_oa->o_valid |= OBD_MD_FLCOOKIE;
+                }
+        }
   out_exp:
          class_export_put(export);
          return rc;
@@ -717,15 +822,26 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
  
                  --loi;
                  /* destroy already created objects here */
-                memcpy(tmp, oa, sizeof(*tmp));
-                tmp->o_id = loi->loi_id;
-                err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL,
-                                  NULL);
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+                tmp_oa->o_id = loi->loi_id;
+
+                if (oti && cookie_sent) {
+                        err = obd_log_cancel(&lov->tgts[loi->loi_ost_idx].conn,
+                                             NULL, 1, --oti->oti_logcookies,
+                                             OBD_LLOG_FL_SENDNOW);
+                        if (err)
+                                CERROR("Failed to cancel objid "LPX64" subobj "
+                                       LPX64" cookie on OST idx %d: rc = %d\n",
+                                       src_oa->o_id, loi->loi_id,
+                                       loi->loi_ost_idx, err);
+                }
+
+                err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
+                                  NULL, oti);
                  if (err)
-                        CERROR("Failed to uncreate objid "LPX64" subobj "
-                               LPX64" on OST idx %d: rc = %d\n",
-                               oa->o_id, loi->loi_id, loi->loi_ost_idx,
-                               err);
+                        CERROR("Failed to uncreate objid "LPX64" subobj "LPX64
+                               " on OST idx %d: rc = %d\n", src_oa->o_id,
+                               loi->loi_id, loi->loi_ost_idx, err);
          }
          if (*ea == NULL)
                  obd_free_memmd(conn, &lsm);
@@ -779,12 +895,12 @@ static int lov_destroy(struct lustre_handle *conn, struct obdo *oa,
                  memcpy(&tmp, oa, sizeof(tmp));
                  tmp.o_id = loi->loi_id;
                  if (lfh)
-                        memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
-                               FD_OSTDATA_SIZE);
+                        memcpy(obdo_handle(&tmp), &lfh->lfh_och[i].och_fh,
+                               sizeof(lfh->lfh_och[i].och_fh));
                  else
                          tmp.o_valid &= ~OBD_MD_FLHANDLE;
                  err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, &tmp,
-                                  NULL, NULL);
+                                  NULL, oti);
                  if (err && lov->tgts[loi->loi_ost_idx].active) {
                          CERROR("error: destroying objid "LPX64" subobj "
                                 LPX64" on OST idx %d: rc = %d\n",
@@ -839,8 +955,8 @@ static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
                  memcpy(&tmp, oa, sizeof(tmp));
                  tmp.o_id = loi->loi_id;
                  if (lfh)
-                        memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
-                               FD_OSTDATA_SIZE);
+                        memcpy(obdo_handle(&tmp), &lfh->lfh_och[i].och_fh,
+                               sizeof(lfh->lfh_och[i].och_fh));
                  else
                          tmp.o_valid &= ~OBD_MD_FLHANDLE;
  
@@ -867,12 +983,13 @@ static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
          return rc;
  }
  
-static int lov_getattr_interpret(struct ptlrpc_request_set *rqset,
-                                 struct lov_getattr_async_args *aa, int rc)
+static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, void *data, 
+                                 int rc)
  {
+        struct lov_getattr_async_args *aa = data;
          struct lov_stripe_md *lsm = aa->aa_lsm;
          struct obdo          *oa = aa->aa_oa;
-        struct obdo          *obdos = aa->aa_stripe_oas;
+        struct obdo          *obdos = aa->aa_obdos;
          struct lov_oinfo     *loi;
          int                   i;
          int                   set = 0;
@@ -881,8 +998,8 @@ static int lov_getattr_interpret(struct ptlrpc_request_set *rqset,
          if (rc == 0) {
                  /* NB all stripe requests succeeded to get here */
  
-                for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
-                     i++,loi++) {
+                for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+                     i++, loi++) {
                          if (obdos[i].o_valid == 0)      /* inactive stripe */
                                  continue;
  
@@ -955,8 +1072,8 @@ static int lov_getattr_async (struct lustre_handle *conn, struct obdo *oa,
                  memcpy(&obdos[i], oa, sizeof(obdos[i]));
                  obdos[i].o_id = loi->loi_id;
                  if (lfh)
-                        memcpy(obdo_handle(&obdos[i]), lfh->lfh_och + i,
-                               FD_OSTDATA_SIZE);
+                        memcpy(obdo_handle(&obdos[i]), &lfh->lfh_och[i].och_fh,
+                               sizeof(lfh->lfh_och[i].och_fh));
                  else
                          obdos[i].o_valid &= ~OBD_MD_FLHANDLE;
  
@@ -980,7 +1097,7 @@ static int lov_getattr_async (struct lustre_handle *conn, struct obdo *oa,
          aa = (struct lov_getattr_async_args *)&rqset->set_args;
          aa->aa_lsm = lsm;
          aa->aa_oa = oa;
-        aa->aa_stripe_oas = obdos;
+        aa->aa_obdos = obdos;
          GOTO (out, rc = 0);
  
   out_obdos:
@@ -992,10 +1109,10 @@ static int lov_getattr_async (struct lustre_handle *conn, struct obdo *oa,
          RETURN (rc);
  }
  
-static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
+static int lov_setattr(struct lustre_handle *conn, struct obdo *src_oa,
                         struct lov_stripe_md *lsm, struct obd_trans_info *oti)
  {
-        struct obdo *tmp;
+        struct obdo *tmp_oa, *ret_oa;
          struct obd_export *export = class_conn2export(conn);
          struct lov_obd *lov;
          struct lov_oinfo *loi;
@@ -1009,18 +1126,17 @@ static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
          if (!export || !export->exp_obd)
                  GOTO(out, rc = -ENODEV);
  
-        /* size changes should go through punch and not setattr */
-        LASSERT(!(oa->o_valid & OBD_MD_FLSIZE));
-
-        /* for now, we only expect mtime updates here */
-        LASSERT(!(oa->o_valid & ~(OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME)));
-
-        tmp = obdo_alloc();
-        if (!tmp)
+        /* for now, we only expect time updates here */
+        LASSERT(!(src_oa->o_valid & ~(OBD_MD_FLID|OBD_MD_FLTYPE|OBD_MD_FLMODE|
+                                      OBD_MD_FLATIME | OBD_MD_FLMTIME |
+                                      OBD_MD_FLCTIME)));
+        ret_oa = obdo_alloc();
+        if (!ret_oa)
                  GOTO(out, rc = -ENOMEM);
  
-        if (oa->o_valid & OBD_MD_FLHANDLE)
-                lfh = lov_handle2lfh(obdo_handle(oa));
+        tmp_oa = obdo_alloc();
+        if (!tmp_oa)
+                GOTO(out_oa, rc = -ENOMEM);
  
          lov = &export->exp_obd->u.lov;
          for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
@@ -1031,46 +1147,54 @@ static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
                          continue;
                  }
  
-                obdo_cpy_md(tmp, oa, oa->o_valid);
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
  
                  if (lfh)
-                        memcpy(obdo_handle(tmp), lfh->lfh_och + i,
-                               FD_OSTDATA_SIZE);
+                        memcpy(obdo_handle(tmp_oa), &lfh->lfh_och[i].och_fh,
+                               sizeof(lfh->lfh_och[i].och_fh));
                  else
-                        tmp->o_valid &= ~OBD_MD_FLHANDLE;
+                        tmp_oa->o_valid &= ~OBD_MD_FLHANDLE;
  
-                tmp->o_id = loi->loi_id;
+                tmp_oa->o_id = loi->loi_id;
  
-                err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp,
+                err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
                                    NULL, NULL);
                  if (err) {
                          if (lov->tgts[loi->loi_ost_idx].active) {
                                  CERROR("error: setattr objid "LPX64" subobj "
                                         LPX64" on OST idx %d: rc = %d\n",
-                                       oa->o_id, loi->loi_id, loi->loi_ost_idx,
-                                       err);
+                                       src_oa->o_id, loi->loi_id,
+                                       loi->loi_ost_idx, err);
                                  if (!rc)
                                          rc = err;
                          }
-                } else
-                        set = 1;
+                        continue;
+                }
+
+                lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, i, &set);
          }
-        obdo_free(tmp);
          if (!set && !rc)
                  rc = -EIO;
          if (lfh != NULL)
                  lov_lfh_put(lfh);
-        GOTO(out, rc);
- out:
+
+        ret_oa->o_id = src_oa->o_id;
+        memcpy(src_oa, ret_oa, sizeof(*src_oa));
+        GOTO(out_tmp, rc);
+out_tmp:
+        obdo_free(tmp_oa);
+out_oa:
+        obdo_free(ret_oa);
+out:
          class_export_put(export);
          return rc;
  }
  
-static int lov_open(struct lustre_handle *conn, struct obdo *oa,
+static int lov_open(struct lustre_handle *conn, struct obdo *src_oa,
                      struct lov_stripe_md *lsm, struct obd_trans_info *oti,
                      struct obd_client_handle *och)
  {
-        struct obdo *tmp; /* on the heap here, on the stack in lov_close? */
+        struct obdo *tmp_oa, *ret_oa;
          struct obd_export *export = class_conn2export(conn);
          struct lov_obd *lov;
          struct lov_oinfo *loi;
@@ -1085,20 +1209,24 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
          if (!export || !export->exp_obd)
                  GOTO(out_exp, rc = -ENODEV);
  
-        tmp = obdo_alloc();
-        if (!tmp)
+        ret_oa = obdo_alloc();
+        if (!ret_oa)
                  GOTO(out_exp, rc = -ENOMEM);
  
+        tmp_oa = obdo_alloc();
+        if (!tmp_oa)
+                GOTO(out_oa, rc = -ENOMEM);
+
          lfh = lov_lfh_new();
          if (lfh == NULL)
                  GOTO(out_tmp, rc = -ENOMEM);
-        OBD_ALLOC(lfh->lfh_och, lsm->lsm_stripe_count * sizeof *och);
+        OBD_ALLOC(lfh->lfh_och, lsm->lsm_stripe_count * sizeof(*och));
          if (!lfh->lfh_och)
                  GOTO(out_lfh, rc = -ENOMEM);
  
          lov = &export->exp_obd->u.lov;
-        oa->o_size = 0;
-        oa->o_blocks = 0;
+        src_oa->o_size = 0;
+        src_oa->o_blocks = 0;
          for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
                  if (lov->tgts[loi->loi_ost_idx].active == 0) {
                          CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
@@ -1106,11 +1234,11 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
                  }
  
                  /* create data objects with "parent" OA */
-                memcpy(tmp, oa, sizeof(*tmp));
-                tmp->o_id = loi->loi_id;
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+                tmp_oa->o_id = loi->loi_id;
  
-                rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp,
-                              NULL, NULL, lfh->lfh_och + i);
+                rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
+                              NULL, NULL, &lfh->lfh_och[i]);
                  if (rc) {
                          if (!lov->tgts[loi->loi_ost_idx].active) {
                                  rc = 0;
@@ -1118,27 +1246,31 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
                          }
                          CERROR("error: open objid "LPX64" subobj "LPX64
                                 " on OST idx %d: rc = %d\n",
-                               oa->o_id, lsm->lsm_oinfo[i].loi_id,
+                               src_oa->o_id, lsm->lsm_oinfo[i].loi_id,
                                 loi->loi_ost_idx, rc);
                          goto out_handles;
                  }
  
-                lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &set);
+                lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, i, &set);
          }
  
          lfh->lfh_count = lsm->lsm_stripe_count;
          och->och_fh.cookie = lfh->lfh_handle.h_cookie;
-        obdo_handle(oa)->cookie = lfh->lfh_handle.h_cookie;
-        oa->o_valid |= OBD_MD_FLHANDLE;
+        obdo_handle(ret_oa)->cookie = lfh->lfh_handle.h_cookie;
+        ret_oa->o_valid |= OBD_MD_FLHANDLE;
+        ret_oa->o_id = src_oa->o_id;
+        memcpy(src_oa, ret_oa, sizeof(*src_oa));
  
-        /* llfh refcount transfers to list */
+        /* lfh refcount transfers to list */
          spin_lock(&export->exp_lov_data.led_lock);
          list_add(&lfh->lfh_list, &export->exp_lov_data.led_open_head);
          spin_unlock(&export->exp_lov_data.led_lock);
  
          GOTO(out_tmp, rc);
   out_tmp:
-        obdo_free(tmp);
+        obdo_free(tmp_oa);
+ out_oa:
+        obdo_free(ret_oa);
   out_exp:
          class_export_put(export);
          return rc;
@@ -1150,16 +1282,16 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
                  if (lov->tgts[loi->loi_ost_idx].active == 0)
                          continue;
  
-                memcpy(tmp, oa, sizeof(*tmp));
-                tmp->o_id = loi->loi_id;
-                memcpy(obdo_handle(tmp), lfh->lfh_och + i, FD_OSTDATA_SIZE);
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+                tmp_oa->o_id = loi->loi_id;
+                memcpy(obdo_handle(tmp_oa), &lfh->lfh_och[i], FD_OSTDATA_SIZE);
  
-                err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp,
+                err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
                                  NULL, NULL);
                  if (err && lov->tgts[loi->loi_ost_idx].active) {
                          CERROR("error: closing objid "LPX64" subobj "LPX64
                                 " on OST idx %d after open error: rc=%d\n",
-                               oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
+                               src_oa->o_id, loi->loi_id, loi->loi_ost_idx,err);
                  }
          }
  
@@ -1189,6 +1321,8 @@ static int lov_close(struct lustre_handle *conn, struct obdo *oa,
  
          if (oa->o_valid & OBD_MD_FLHANDLE)
                  lfh = lov_handle2lfh(obdo_handle(oa));
+        if (!lfh)
+                LBUG();
  
          lov = &export->exp_obd->u.lov;
          for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
@@ -1198,7 +1332,7 @@ static int lov_close(struct lustre_handle *conn, struct obdo *oa,
                  memcpy(&tmp, oa, sizeof(tmp));
                  tmp.o_id = loi->loi_id;
                  if (lfh)
-                        memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
+                        memcpy(obdo_handle(&tmp), &lfh->lfh_och[i],
                                 FD_OSTDATA_SIZE);
                  else
                          tmp.o_valid &= ~OBD_MD_FLHANDLE;
@@ -1223,18 +1357,16 @@ static int lov_close(struct lustre_handle *conn, struct obdo *oa,
  
                  OBD_FREE(lfh->lfh_och, lsm->lsm_stripe_count * FD_OSTDATA_SIZE);
                  lov_lfh_destroy(lfh);
+                LASSERT(atomic_read(&lfh->lfh_refcount) == 1);
                  lov_lfh_put(lfh); /* balance handle2lfh above */
-        }
+        } else
+                LBUG();
          GOTO(out, rc);
   out:
          class_export_put(export);
          return rc;
  }
  
-#ifndef log2
-#define log2(n) ffz(~(n))
-#endif
-
  /* we have an offset in file backed by an lov and want to find out where
   * that offset lands in our given stripe of the file.  for the easy
   * case where the offset is within the stripe, we just have to scale the
@@ -1404,8 +1536,8 @@ static int lov_punch(struct lustre_handle *conn, struct obdo *oa,
                  memcpy(&tmp, oa, sizeof(tmp));
                  tmp.o_id = loi->loi_id;
                  if (lfh)
-                        memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
-                               FD_OSTDATA_SIZE);
+                        memcpy(obdo_handle(&tmp), &lfh->lfh_och[i].och_fh,
+                               sizeof(lfh->lfh_och[i].och_fh));
                  else
                          tmp.o_valid &= ~OBD_MD_FLHANDLE;
  
@@ -1455,7 +1587,7 @@ static int lov_brw_check(struct lov_obd *lov, struct lov_stripe_md *lsm,
          return 0;
  }
  
-static int lov_brw(int cmd, struct lustre_handle *conn,
+static int lov_brw(int cmd, struct lustre_handle *conn, struct obdo *src_oa,
                     struct lov_stripe_md *lsm, obd_count oa_bufs,
                     struct brw_page *pga, struct obd_trans_info *oti)
  {
@@ -1467,10 +1599,12 @@ static int lov_brw(int cmd, struct lustre_handle *conn,
                  int ost_idx;
          } *stripeinfo, *si, *si_last;
          struct obd_export *export = class_conn2export(conn);
+        struct obdo *ret_oa = NULL, *tmp_oa = NULL;
+        struct lov_file_handles *lfh = NULL;
          struct lov_obd *lov;
          struct brw_page *ioarr;
          struct lov_oinfo *loi;
-        int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count;
+        int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count, set = 0;
          ENTRY;
  
          if (lsm_bad_magic(lsm))
@@ -1495,6 +1629,21 @@ static int lov_brw(int cmd, struct lustre_handle *conn,
          if (!ioarr)
                  GOTO(out_where, rc = -ENOMEM);
  
+        if (src_oa) {
+                ret_oa = obdo_alloc();
+                if (!ret_oa)
+                        GOTO(out_ioarr, rc = -ENOMEM);
+
+                tmp_oa = obdo_alloc();
+                if (!tmp_oa)
+                        GOTO(out_oa, rc = -ENOMEM);
+
+                if (src_oa->o_valid & OBD_MD_FLHANDLE)
+                        lfh = lov_handle2lfh(obdo_handle(src_oa));
+                else
+                        src_oa->o_valid &= ~OBD_MD_FLHANDLE;
+        }
+
          for (i = 0; i < oa_bufs; i++) {
                  where[i] = lov_stripe_number(lsm, pga[i].off);
                  stripeinfo[where[i]].bufct++;
@@ -1524,23 +1673,46 @@ static int lov_brw(int cmd, struct lustre_handle *conn,
  
                  if (lov->tgts[si->ost_idx].active == 0) {
                          CDEBUG(D_HA, "lov idx %d inactive\n", si->ost_idx);
-                        GOTO(out_ioarr, rc = -EIO);
+                        GOTO(out_oa, rc = -EIO);
                  }
  
                  if (si->bufct) {
                          LASSERT(shift < oa_bufs);
-                        rc = obd_brw(cmd, &lov->tgts[si->ost_idx].conn,
+                        if (src_oa) {
+                                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+                                if (lfh)
+                                        memcpy(obdo_handle(tmp_oa),
+                                               &lfh->lfh_och[i].och_fh,
+                                               sizeof(lfh->lfh_och[i].och_fh));
+                        }
+
+                        tmp_oa->o_id = si->lsm.lsm_object_id;
+                        rc = obd_brw(cmd, &lov->tgts[si->ost_idx].conn, tmp_oa,
                                       &si->lsm, si->bufct, &ioarr[shift],
                                       oti);
                          if (rc)
                                  GOTO(out_ioarr, rc);
+
+                        lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm,
+                                        i, &set);
                  }
          }
-        GOTO(out_ioarr, rc);
+
+        ret_oa->o_id = src_oa->o_id;
+        memcpy(src_oa, ret_oa, sizeof(*src_oa));
+
+        GOTO(out_oa, rc);
+ out_oa:
+        if (tmp_oa)
+                obdo_free(tmp_oa);
+        if (ret_oa)
+                obdo_free(ret_oa);
   out_ioarr:
          OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
   out_where:
          OBD_FREE(where, sizeof(*where) * oa_bufs);
+        if (lfh)
+                lov_lfh_put(lfh);
   out_sinfo:
          OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
   out_exp:
@@ -1548,18 +1720,43 @@ static int lov_brw(int cmd, struct lustre_handle *conn,
          return rc;
  }
  
-static int lov_brw_interpret (struct ptlrpc_request_set *set,
-                              struct lov_brw_async_args *aa, int rc)
+static int lov_brw_interpret(struct ptlrpc_request_set *rqset,
+                             struct lov_brw_async_args *aa, int rc)
  {
-        obd_count        oa_bufs = aa->aa_oa_bufs;
-        struct brw_page *ioarr = aa->aa_ioarr;
+        struct lov_stripe_md *lsm = aa->aa_lsm;
+        obd_count             oa_bufs = aa->aa_oa_bufs;
+        struct obdo          *oa = aa->aa_oa;
+        struct obdo          *obdos = aa->aa_obdos;
+        struct brw_page      *ioarr = aa->aa_ioarr;
+        struct lov_oinfo     *loi;
+        int i, set = 0;
          ENTRY;
  
-        OBD_FREE (ioarr, sizeof (*ioarr) * oa_bufs);
-        RETURN (rc);
+        if (rc == 0) {
+                /* NB all stripe requests succeeded to get here */
+
+                for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+                     i++, loi++) {
+                        if (obdos[i].o_valid == 0)      /* inactive stripe */
+                                continue;
+
+                        lov_merge_attrs(oa, &obdos[i], obdos[i].o_valid, lsm,
+                                        i, &set);
+                }
+
+                if (!set) {
+                        CERROR("No stripes had valid attrs\n");
+                        rc = -EIO;
+                }
+        }
+        oa->o_id = lsm->lsm_object_id;
+
+        OBD_FREE(obdos, lsm->lsm_stripe_count * sizeof(*obdos));
+        OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
+        RETURN(rc);
  }
  
-static int lov_brw_async(int cmd, struct lustre_handle *conn,
+static int lov_brw_async(int cmd, struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md *lsm, obd_count oa_bufs,
                           struct brw_page *pga, struct ptlrpc_request_set *set,
                           struct obd_trans_info *oti)
@@ -1573,7 +1770,9 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn,
          } *stripeinfo, *si, *si_last;
          struct obd_export *export = class_conn2export(conn);
          struct lov_obd *lov;
+        struct lov_file_handles *lfh = NULL;
          struct brw_page *ioarr;
+        struct obdo *obdos = NULL;
          struct lov_oinfo *loi;
          struct lov_brw_async_args *aa;
          int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count;
@@ -1597,9 +1796,20 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn,
          if (!where)
                  GOTO(out_sinfo, rc = -ENOMEM);
  
+        if (oa) {
+                OBD_ALLOC(obdos, sizeof(*obdos) * stripe_count);
+                if (!obdos)
+                        GOTO(out_where, rc = -ENOMEM);
+
+                if (oa->o_valid & OBD_MD_FLHANDLE)
+                        lfh = lov_handle2lfh(obdo_handle(oa));
+                else
+                        oa->o_valid &= ~OBD_MD_FLHANDLE;
+        }
+
          OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs);
          if (!ioarr)
-                GOTO(out_where, rc = -ENOMEM);
+                GOTO(out_obdos, rc = -ENOMEM);
  
          for (i = 0; i < oa_bufs; i++) {
                  where[i] = lov_stripe_number(lsm, pga[i].off);
@@ -1612,6 +1822,15 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn,
                          si->index = si_last->index + si_last->bufct;
                  si->lsm.lsm_object_id = loi->loi_id;
                  si->ost_idx = loi->loi_ost_idx;
+
+                if (oa) {
+                        memcpy(&obdos[i], oa, sizeof(*obdos));
+                        obdos[i].o_id = si->lsm.lsm_object_id;
+                        if (lfh)
+                                memcpy(obdo_handle(&obdos[i]),
+                                       &lfh->lfh_och[i].och_fh,
+                                       sizeof(lfh->lfh_och[i].och_fh));
+                }
          }
  
          for (i = 0; i < oa_bufs; i++) {
@@ -1637,24 +1856,35 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn,
                  }
  
                  LASSERT(shift < oa_bufs);
+
                  rc = obd_brw_async(cmd, &lov->tgts[si->ost_idx].conn,
-                                   &si->lsm, si->bufct, &ioarr[shift],
-                                   set, oti);
+                                   &obdos[i], &si->lsm, si->bufct,
+                                   &ioarr[shift], set, oti);
                  if (rc)
                          GOTO(out_ioarr, rc);
          }
-        LASSERT (rc == 0);
-        LASSERT (set->set_interpret == NULL);
-        set->set_interpret = lov_brw_interpret;
-        LASSERT (sizeof (set->set_args) >= sizeof (struct lov_brw_async_args));
+        LASSERT(rc == 0);
+        LASSERT(set->set_interpret == NULL);
+        set->set_interpret = (set_interpreter_func)lov_brw_interpret;
+        LASSERT(sizeof(set->set_args) >= sizeof(struct lov_brw_async_args));
          aa = (struct lov_brw_async_args *)&set->set_args;
-        aa->aa_oa_bufs = oa_bufs;
+        aa->aa_lsm = lsm;
+        aa->aa_obdos = obdos;
+        aa->aa_oa = oa;
          aa->aa_ioarr = ioarr;
+        aa->aa_oa_bufs = oa_bufs;
+
+        /* Don't free ioarr or obdos - that's done in lov_brw_interpret */
          GOTO(out_where, rc);
+
   out_ioarr:
          OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
+ out_obdos:
+        OBD_FREE(obdos, stripe_count * sizeof(*obdos));
   out_where:
          OBD_FREE(where, sizeof(*where) * oa_bufs);
+        if (lfh)
+                lov_lfh_put(lfh);
   out_sinfo:
          OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
   out_exp:
@@ -1980,20 +2210,16 @@ static int lov_cancel_unused(struct lustre_handle *conn,
                          (tot) += (add);                                 \
          } while(0)
  
-static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
+static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                      unsigned long max_age)
  {
-        struct obd_export *tgt_export;
-        struct lov_obd *lov;
+        struct lov_obd *lov = &obd->u.lov;
          struct obd_statfs lov_sfs;
          int set = 0;
          int rc = 0;
          int i;
          ENTRY;
  
-        if (!export || !export->exp_obd)
-                RETURN(-ENODEV);
-
-        lov = &export->exp_obd->u.lov;
  
          /* We only get block data from the OBD */
          for (i = 0; i < lov->desc.ld_tgt_count; i++) {
@@ -2004,14 +2230,8 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
                          continue;
                  }
  
-                tgt_export = class_conn2export(&lov->tgts[i].conn);
-                if (!tgt_export) {
-                        CDEBUG(D_HA, "lov idx %d NULL export\n", i);
-                        continue;
-                }
-
-                err = obd_statfs(tgt_export, &lov_sfs);
-                class_export_put(tgt_export);
+                err = obd_statfs(class_conn2obd(&lov->tgts[i].conn), &lov_sfs,
+                                 max_age);
                  if (err) {
                          if (lov->tgts[i].active) {
                                  CERROR("error: statfs OSC %s on OST idx %d: "
@@ -2022,6 +2242,7 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
                          }
                          continue;
                  }
+
                  if (!set) {
                          memcpy(osfs, &lov_sfs, sizeof(lov_sfs));
                          set = 1;
@@ -2044,6 +2265,7 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
                          LOV_SUM_MAX(osfs->os_ffree, lov_sfs.os_ffree);
                  }
          }
+
          if (set) {
                  __u32 expected_stripes = lov->desc.ld_default_stripe_count ?
                                           lov->desc.ld_default_stripe_count :
@@ -2055,6 +2277,7 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
                          do_div(osfs->os_ffree, expected_stripes);
          } else if (!rc)
                  rc = -EIO;
+
          RETURN(rc);
  }
  
@@ -2191,7 +2414,28 @@ static int lov_get_info(struct lustre_handle *conn, __u32 keylen,
          RETURN(-EINVAL);
  }
  
-static int lov_mark_page_dirty(struct lustre_handle *conn, 
+static int lov_set_info(struct lustre_handle *conn, obd_count keylen,
+                        void *key, obd_count vallen, void *val)
+{
+        struct obd_device *obddev = class_conn2obd(conn);
+        struct lov_obd *lov = &obddev->u.lov;
+        int i, rc = 0;
+        ENTRY;
+
+        if (keylen < strlen("mds_conn") ||
+            memcmp(key, "mds_conn", strlen("mds_conn")) != 0)
+                RETURN(-EINVAL);
+
+        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+                int er;
+                er = obd_set_info(&lov->tgts[i].conn, keylen, key, vallen, val);
+                if (!rc)
+                        rc = er;
+        }
+        RETURN(rc);
+}
+
+static int lov_mark_page_dirty(struct lustre_handle *conn,
                                 struct lov_stripe_md *lsm, unsigned long offset)
  {
          struct lov_obd *lov = &class_conn2obd(conn)->u.lov;
@@ -2209,12 +2453,12 @@ static int lov_mark_page_dirty(struct lustre_handle *conn,
                  RETURN(-ENOMEM);
  
          stripe = lov_stripe_number(lsm, (obd_off)offset << PAGE_CACHE_SHIFT);
-        lov_stripe_offset(lsm, (obd_off)offset << PAGE_CACHE_SHIFT, stripe, 
+        lov_stripe_offset(lsm, (obd_off)offset << PAGE_CACHE_SHIFT, stripe,
                            &off);
          off >>= PAGE_CACHE_SHIFT;
  
          loi = &lsm->lsm_oinfo[stripe];
-        CDEBUG(D_INODE, "off %lu => off %lu on stripe %d\n", offset, 
+        CDEBUG(D_INODE, "off %lu => off %lu on stripe %d\n", offset,
                 (unsigned long)off, stripe);
          submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
  
@@ -2223,7 +2467,7 @@ static int lov_mark_page_dirty(struct lustre_handle *conn,
          RETURN(rc);
  }
  
-static int lov_clear_dirty_pages(struct lustre_handle *conn, 
+static int lov_clear_dirty_pages(struct lustre_handle *conn,
                                   struct lov_stripe_md *lsm, unsigned long start,
                                   unsigned long end, unsigned long *cleared)
  
@@ -2267,11 +2511,11 @@ static int lov_clear_dirty_pages(struct lustre_handle *conn,
                  obd_start >>= PAGE_CACHE_SHIFT;
                  obd_end >>= PAGE_CACHE_SHIFT;
  
-                CDEBUG(D_INODE, "offs [%lu,%lu] => offs [%lu,%lu] stripe %d\n", 
-                       start, end, (unsigned long)obd_start, 
+                CDEBUG(D_INODE, "offs [%lu,%lu] => offs [%lu,%lu] stripe %d\n",
+                       start, end, (unsigned long)obd_start,
                         (unsigned long)obd_end, loi->loi_ost_idx);
                  submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
-                rc = obd_clear_dirty_pages(&lov->tgts[loi->loi_ost_idx].conn, 
+                rc = obd_clear_dirty_pages(&lov->tgts[loi->loi_ost_idx].conn,
                                             submd, obd_start, obd_end,
                                             &osc_cleared);
                  if (rc)
@@ -2310,15 +2554,14 @@ static int lov_last_dirty_offset(struct lustre_handle *conn,
          *offset = 0;
          lov = &export->exp_obd->u.lov;
          rc = -ENOENT;
-        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; 
-                                          i++, loi++) {
  
+        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++){
                  count = lsm->lsm_stripe_size >> PAGE_CACHE_SHIFT;
                  skip = (lsm->lsm_stripe_count - 1) * count;
  
                  submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
  
-                err = obd_last_dirty_offset(&lov->tgts[loi->loi_ost_idx].conn, 
+                err = obd_last_dirty_offset(&lov->tgts[loi->loi_ost_idx].conn,
                                              submd, &tmp);
                  if (err == -ENOENT)
                          continue;
@@ -2326,7 +2569,7 @@ static int lov_last_dirty_offset(struct lustre_handle *conn,
                          GOTO(out_exp, rc = err);
  
                  rc = 0;
-                if (tmp != ~0) 
+                if (tmp != ~0)
                          tmp += (tmp/count * skip) + (i * count);
                  if (tmp > *offset)
                          *offset = tmp;
@@ -2338,6 +2581,100 @@ out_exp:
          RETURN(rc);
  }
  
+/* For LOV catalogs, we "nest" catalogs from the parent catalog.  What this
+ * means is that the parent catalog has a bunch of log cookies that are
+ * pointing at one catalog for each OSC.  The OSC catalogs in turn hold
+ * cookies for actual log files. */
+static int lov_get_catalogs(struct lov_obd *lov, struct llog_handle *cathandle)
+{
+        int i, rc;
+
+        ENTRY;
+        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+                lov->tgts[i].ltd_cathandle = llog_new_log(cathandle,
+                                                          &lov->tgts[i].uuid);
+                if (IS_ERR(lov->tgts[i].ltd_cathandle))
+                        continue;
+                rc = llog_init_catalog(cathandle, &lov->tgts[i].uuid);
+                if (rc)
+                        GOTO(err_logs, rc);
+        }
+        lov->lo_catalog_loaded = 1;
+        RETURN(0);
+err_logs:
+        while (i-- > 0) {
+                llog_delete_log(cathandle, lov->tgts[i].ltd_cathandle);
+                llog_close_log(cathandle, lov->tgts[i].ltd_cathandle);
+        }
+        return rc;
+}
+
+/* Add log records for each OSC that this object is striped over, and return
+ * cookies for each one.  We _would_ have nice abstraction here, except that
+ * we need to keep cookies in stripe order, even if some are NULL, so that
+ * the right cookies are passed back to the right OSTs at the client side.
+ * Unset cookies should be all-zero (which will never occur naturally). */
+static int lov_log_add(struct lustre_handle *conn,
+                       struct llog_handle *cathandle,
+                       struct llog_trans_hdr *rec, struct lov_stripe_md *lsm,
+                       struct llog_cookie *logcookies, int numcookies)
+{
+        struct obd_device *obd = class_conn2obd(conn);
+        struct lov_obd *lov = &obd->u.lov;
+        struct lov_oinfo *loi;
+        int i, rc = 0;
+        ENTRY;
+
+        LASSERT(logcookies && numcookies >= lsm->lsm_stripe_count);
+
+        if (unlikely(!lov->lo_catalog_loaded))
+                lov_get_catalogs(lov, cathandle);
+
+        for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
+                rc += obd_log_add(&lov->tgts[loi->loi_ost_idx].conn,
+                                  lov->tgts[loi->loi_ost_idx].ltd_cathandle,
+                                  rec, NULL, logcookies + rc, numcookies - rc);
+        }
+
+        RETURN(rc);
+}
+
+static int lov_log_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm,
+                          int count, struct llog_cookie *cookies, int flags)
+{
+        struct obd_export *export = class_conn2export(conn);
+        struct lov_obd *lov;
+        struct lov_oinfo *loi;
+        int rc = 0, i;
+        ENTRY;
+
+        LASSERT(lsm != NULL);
+        if (export == NULL || export->exp_obd == NULL)
+                GOTO(out, rc = -ENODEV);
+
+        LASSERT(count == lsm->lsm_stripe_count);
+
+        loi = lsm->lsm_oinfo;
+        lov = &export->exp_obd->u.lov;
+        for (i = 0; i < count; i++, cookies++, loi++) {
+                int err;
+
+                err = obd_log_cancel(&lov->tgts[loi->loi_ost_idx].conn,
+                                     NULL, 1, cookies, flags);
+                if (err && lov->tgts[loi->loi_ost_idx].active) {
+                        CERROR("error: objid "LPX64" subobj "LPX64
+                               " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
+                               loi->loi_id, loi->loi_ost_idx, err);
+                        if (!rc)
+                                rc = err;
+                }
+        }
+        GOTO(out, rc);
+ out:
+        class_export_put(export);
+        return rc;
+}
+
  struct obd_ops lov_obd_ops = {
          o_owner:       THIS_MODULE,
          o_attach:      lov_attach,
@@ -2364,9 +2701,12 @@ struct obd_ops lov_obd_ops = {
          o_cancel_unused: lov_cancel_unused,
          o_iocontrol:   lov_iocontrol,
          o_get_info:    lov_get_info,
-        .o_mark_page_dirty =    lov_mark_page_dirty,
-        .o_clear_dirty_pages =    lov_clear_dirty_pages,
-        .o_last_dirty_offset =    lov_last_dirty_offset,
+        o_set_info:    lov_set_info,
+        o_log_add:     lov_log_add,
+        o_log_cancel:  lov_log_cancel,
+        o_mark_page_dirty:   lov_mark_page_dirty,
+        o_clear_dirty_pages: lov_clear_dirty_pages,
+        o_last_dirty_offset: lov_last_dirty_offset,
  };
  
  int __init lov_init(void)
@@ -2374,15 +2714,13 @@ int __init lov_init(void)
          struct lprocfs_static_vars lvars;
          int rc;
  
-        printk(KERN_INFO "Lustre Logical Object Volume driver; "
-               "info@clusterfs.com\n");
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(lov, &lvars);
          rc = class_register_type(&lov_obd_ops, lvars.module_vars,
                                   OBD_LOV_DEVICENAME);
          RETURN(rc);
  }
  
-static void __exit lov_exit(void)
+static void /*__exit*/ lov_exit(void)
  {
          class_unregister_type(OBD_LOV_DEVICENAME);
  }
diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c

index bbb40de..a719aac 100644 (file)
--- a/lustre/lov/lov_pack.c
+++ b/lustre/lov/lov_pack.c
@@ -34,6 +34,8 @@
  #include <linux/obd_class.h>
  #include <linux/obd_support.h>
  
+#include "lov_internal.h"
+
  void lov_dump_lmm(int level, struct lov_mds_md *lmm)
  {
          struct lov_object_id *loi;
@@ -129,14 +131,14 @@ int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmmp,
          for (i = 0, loi = lsm->lsm_oinfo; i < stripe_count; i++, loi++) {
                  /* XXX call down to osc_packmd() to do the packing */
                  LASSERT (loi->loi_id);
-                lmm->lmm_objects[loi->loi_ost_idx].l_object_id = 
+                lmm->lmm_objects[loi->loi_ost_idx].l_object_id =
                          cpu_to_le64 (loi->loi_id);
          }
  
          RETURN(lmm_size);
  }
  
-static int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
+int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
  {
          if (!stripe_count)
                  stripe_count = lov->desc.ld_default_stripe_count;
@@ -146,6 +148,90 @@ static int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
          return stripe_count;
  }
  
+static int lov_verify_lmm(struct lov_mds_md *lmm, int lmm_bytes,
+                          int *ost_count, int *stripe_count, int *ost_offset)
+{
+        if (lmm_bytes < sizeof(*lmm)) {
+                CERROR("lov_mds_md too small: %d, need at least %d\n",
+                       lmm_bytes, (int)sizeof(*lmm));
+                return -EINVAL;
+        }
+
+        if (le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC) {
+                CERROR("bad disk LOV MAGIC: %#08x != %#08x\n",
+                       le32_to_cpu(lmm->lmm_magic), LOV_MAGIC);
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        *ost_count = le16_to_cpu(lmm->lmm_ost_count);
+        *stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
+        *ost_offset = le32_to_cpu(lmm->lmm_stripe_offset);
+
+        if (*ost_count == 0 || *stripe_count == 0) {
+                CERROR("zero OST count %d or stripe count %d\n",
+                       *ost_count, *stripe_count);
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        if (lmm_bytes < lov_mds_md_size(*ost_count)) {
+                CERROR("lov_mds_md too small: %d, need %d\n",
+                       lmm_bytes, lov_mds_md_size(*ost_count));
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        if (*ost_offset > *ost_count) {
+                CERROR("starting OST offset %d > number of OSTs %d\n",
+                       *ost_offset, *ost_count);
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        if (*stripe_count > *ost_count) {
+                CERROR("stripe count %d > number of OSTs %d\n",
+                       *stripe_count, *ost_count);
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        if (lmm->lmm_object_id == 0) {
+                CERROR("zero object id\n");
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        return 0;
+}
+
+int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count)
+{
+        int lsm_size = lov_stripe_md_size(stripe_count);
+        struct lov_oinfo *loi;
+        int i;
+
+        OBD_ALLOC(*lsmp, lsm_size);
+        if (!*lsmp)
+                return -ENOMEM;
+
+        (*lsmp)->lsm_magic = LOV_MAGIC;
+        (*lsmp)->lsm_stripe_count = stripe_count;
+        (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
+
+        for (i = 0, loi = (*lsmp)->lsm_oinfo; i < stripe_count; i++, loi++){
+                loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
+                ot_init(loi->loi_dirty_ot);
+        }
+        return lsm_size;
+}
+
+void lov_free_memmd(struct lov_stripe_md **lsmp)
+{
+        OBD_FREE(*lsmp, lov_stripe_md_size((*lsmp)->lsm_stripe_count));
+        *lsmp = NULL;
+}
+
  /* Unpack LOV object metadata from disk storage.  It is packed in LE byte
   * order and is opaque to the networking layer.
   */
@@ -156,75 +242,48 @@ int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
          struct lov_obd *lov = &obd->u.lov;
          struct lov_stripe_md *lsm;
          struct lov_oinfo *loi;
-        int ost_count = 0;
-        int ost_offset = 0;
+        int ost_count;
+        int ost_offset;
          int stripe_count;
          int lsm_size;
          int i;
          ENTRY;
  
+        /* If passed an MDS struct use values from there, otherwise defaults */
          if (lmm) {
-                if (lmm_bytes < sizeof (*lmm)) {
-                        CERROR("lov_mds_md too small: %d, need %d\n",
-                                lmm_bytes, (int)sizeof(*lmm));
-                        RETURN(-EINVAL);
-                }
-                if (le32_to_cpu (lmm->lmm_magic) != LOV_MAGIC) {
-                        CERROR("bad disk LOV MAGIC: %#08x != %#08x\n",
-                               le32_to_cpu (lmm->lmm_magic), LOV_MAGIC);
-                        RETURN(-EINVAL);
-                }
-
-                ost_count = le16_to_cpu (lmm->lmm_ost_count);
-                stripe_count = le16_to_cpu (lmm->lmm_stripe_count);
-
-                if (ost_count == 0 || stripe_count == 0) {
-                        CERROR ("zero ost %d or stripe %d count\n",
-                                ost_count, stripe_count);
-                        RETURN (-EINVAL);
-                }
-
-                if (lmm_bytes < lov_mds_md_size (ost_count)) {
-                        CERROR ("lov_mds_md too small: %d, need %d\n",
-                                lmm_bytes, lov_mds_md_size (ost_count));
-                        RETURN (-EINVAL);
-                }
-        } else
+                i = lov_verify_lmm(lmm, lmm_bytes, &ost_count, &stripe_count,
+                                   &ost_offset);
+                if (i)
+                        RETURN(i);
+        } else {
+                ost_count = 0;
                  stripe_count = lov_get_stripecnt(lov, 0);
+                ost_offset = 0;
+        }
  
-        /* XXX LOV STACKING call into osc for sizes */
-        lsm_size = lov_stripe_md_size(stripe_count);
-
+        /* If we aren't passed an lsmp struct, we just want the size */
          if (!lsmp)
-                RETURN(lsm_size);
+                /* XXX LOV STACKING call into osc for sizes */
+                RETURN(lov_stripe_md_size(stripe_count));
  
+        /* If we are passed an allocated struct but nothing to unpack, free */
          if (*lsmp && !lmm) {
-                stripe_count = (*lsmp)->lsm_stripe_count;
-                OBD_FREE(*lsmp, lov_stripe_md_size(stripe_count));
-                *lsmp = NULL;
+                lov_free_memmd(lsmp);
                  RETURN(0);
          }
  
-        if (!*lsmp) {
-                OBD_ALLOC(*lsmp, lsm_size);
-                if (!*lsmp)
-                        RETURN(-ENOMEM);
-        }
-
-        lsm = *lsmp;
-        lsm->lsm_magic = LOV_MAGIC;
-        lsm->lsm_stripe_count = stripe_count;
-        lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
+        lsm_size = lov_alloc_memmd(lsmp, stripe_count);
+        if (lsm_size < 0)
+                RETURN(lsm_size);
  
+        /* If we are passed a pointer but nothing to unpack, we only alloc */
          if (!lmm)
                  RETURN(lsm_size);
  
-        lsm->lsm_object_id = le64_to_cpu (lmm->lmm_object_id);
-        lsm->lsm_stripe_size = le32_to_cpu (lmm->lmm_stripe_size);
-        ost_offset = lsm->lsm_stripe_offset = le32_to_cpu (lmm->lmm_stripe_offset);
-
-        LMM_ASSERT(lsm->lsm_object_id);
-        LMM_ASSERT(ost_count);
+        lsm = *lsmp;
+        lsm->lsm_object_id = le64_to_cpu(lmm->lmm_object_id);
+        lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
+        lsm->lsm_stripe_offset = ost_offset;
  
          for (i = 0, loi = lsm->lsm_oinfo; i < ost_count; i++, ost_offset++) {
                  ost_offset %= ost_count;
@@ -232,17 +291,20 @@ int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
                  if (!lmm->lmm_objects[ost_offset].l_object_id)
                          continue;
  
-                LMM_ASSERT(loi - lsm->lsm_oinfo < stripe_count);
                  /* XXX LOV STACKING call down to osc_unpackmd() */
                  loi->loi_id =
-                        le64_to_cpu (lmm->lmm_objects[ost_offset].l_object_id);
+                        le64_to_cpu(lmm->lmm_objects[ost_offset].l_object_id);
                  loi->loi_ost_idx = ost_offset;
-                loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
-                ot_init(loi->loi_dirty_ot);
                  loi++;
          }
-        LMM_ASSERT(loi - lsm->lsm_oinfo > 0);
-        LMM_ASSERT(loi - lsm->lsm_oinfo == stripe_count);
+
+        if (loi - lsm->lsm_oinfo != stripe_count) {
+                CERROR("missing objects in lmm struct\n");
+                lov_dump_lmm(D_WARNING, lmm);
+                lov_free_memmd(lsmp);
+                RETURN(-EINVAL);
+        }
+
  
          RETURN(lsm_size);
  }
@@ -260,7 +322,6 @@ int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
          struct obd_device *obd = class_conn2obd(conn);
          struct lov_obd *lov = &obd->u.lov;
          struct lov_mds_md lmm;
-        struct lov_stripe_md *lsm;
          int stripe_count;
          int rc;
          ENTRY;
@@ -272,7 +333,7 @@ int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
          /* Bug 1185 FIXME: struct lov_mds_md is little-endian everywhere else */
  
          if (lmm.lmm_magic != LOV_MAGIC) {
-                CERROR("bad userland LOV MAGIC: %#08x != %#08x\n",
+                CDEBUG(D_IOCTL, "bad userland LOV MAGIC: %#08x != %#08x\n",
                         lmm.lmm_magic, LOV_MAGIC);
                  RETURN(-EINVAL);
          }
@@ -291,32 +352,27 @@ int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
          }
  #endif
          if (lmm.lmm_stripe_size & (PAGE_SIZE - 1)) {
-                CERROR("stripe size %u not multiple of %lu\n",
+                CDEBUG(D_IOCTL, "stripe size %u not multiple of %lu\n",
                         lmm.lmm_stripe_size, PAGE_SIZE);
                  RETURN(-EINVAL);
          }
          stripe_count = lov_get_stripecnt(lov, lmm.lmm_stripe_count);
  
          if ((__u64)lmm.lmm_stripe_size * stripe_count > ~0UL) {
-                CERROR("stripe width %ux%u > %lu on 32-bit system\n",
+                CDEBUG(D_IOCTL, "stripe width %ux%u > %lu on 32-bit system\n",
                         lmm.lmm_stripe_size, (int)lmm.lmm_stripe_count, ~0UL);
                  RETURN(-EINVAL);
          }
  
-        /* XXX LOV STACKING call into osc for sizes */
-        OBD_ALLOC(lsm, lov_stripe_md_size(stripe_count));
-        if (!lsm)
-                RETURN(-ENOMEM);
+        rc = lov_alloc_memmd(lsmp, stripe_count);
  
-        lsm->lsm_magic = LOV_MAGIC;
-        lsm->lsm_stripe_count = stripe_count;
-        lsm->lsm_stripe_offset = lmm.lmm_stripe_offset;
-        lsm->lsm_stripe_size = lmm.lmm_stripe_size;
-        lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
+        if (rc < 0)
+                RETURN(rc);
  
-        *lsmp = lsm;
+        (*lsmp)->lsm_stripe_offset = lmm.lmm_stripe_offset;
+        (*lsmp)->lsm_stripe_size = lmm.lmm_stripe_size;
  
-        RETURN(rc);
+        RETURN(0);
  }
  
  /* Retrieve object striping information.
diff --git a/lustre/lov/lproc_lov.c b/lustre/lov/lproc_lov.c

index e0b3adb..7b7a00c 100644 (file)
--- a/lustre/lov/lproc_lov.c
+++ b/lustre/lov/lproc_lov.c
@@ -30,19 +30,12 @@
  #include <linux/seq_file.h>
  
  #ifndef LPROCFS
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
  #else
  
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  obd_self_statfs);
-
-int rd_stripesize(char *page, char **start, off_t off, int count, int *eof,
-                  void *data)
+static int lov_rd_stripesize(char *page, char **start, off_t off, int count,
+                             int *eof, void *data)
  {
          struct obd_device *dev = (struct obd_device *)data;
          struct lov_desc *desc;
@@ -53,8 +46,8 @@ int rd_stripesize(char *page, char **start, off_t off, int count, int *eof,
          return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_size);
  }
  
-int rd_stripeoffset(char *page, char **start, off_t off, int count, int *eof,
-                    void *data)
+static int lov_rd_stripeoffset(char *page, char **start, off_t off, int count,
+                               int *eof, void *data)
  {
          struct obd_device *dev = (struct obd_device *)data;
          struct lov_desc *desc;
@@ -65,8 +58,8 @@ int rd_stripeoffset(char *page, char **start, off_t off, int count, int *eof,
          return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_offset);
  }
  
-int rd_stripetype(char *page, char **start, off_t off, int count, int *eof,
-                  void *data)
+static int lov_rd_stripetype(char *page, char **start, off_t off, int count,
+                             int *eof, void *data)
  {
          struct obd_device* dev = (struct obd_device*)data;
          struct lov_desc *desc;
@@ -77,8 +70,8 @@ int rd_stripetype(char *page, char **start, off_t off, int count, int *eof,
          return snprintf(page, count, "%u\n", desc->ld_pattern);
  }
  
-int rd_stripecount(char *page, char **start, off_t off, int count, int *eof,
-                   void *data)
+static int lov_rd_stripecount(char *page, char **start, off_t off, int count,
+                              int *eof, void *data)
  {
          struct obd_device *dev = (struct obd_device *)data;
          struct lov_desc *desc;
@@ -89,8 +82,8 @@ int rd_stripecount(char *page, char **start, off_t off, int count, int *eof,
          return snprintf(page, count, "%u\n", desc->ld_default_stripe_count);
  }
  
-int rd_numobd(char *page, char **start, off_t off, int count, int *eof,
-              void *data)
+static int lov_rd_numobd(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
  {
          struct obd_device *dev = (struct obd_device*)data;
          struct lov_desc *desc;
@@ -102,8 +95,8 @@ int rd_numobd(char *page, char **start, off_t off, int count, int *eof,
  
  }
  
-int rd_activeobd(char *page, char **start, off_t off, int count, int *eof,
-                 void *data)
+static int lov_rd_activeobd(char *page, char **start, off_t off, int count,
+                            int *eof, void *data)
  {
          struct obd_device* dev = (struct obd_device*)data;
          struct lov_desc *desc;
@@ -114,7 +107,8 @@ int rd_activeobd(char *page, char **start, off_t off, int count, int *eof,
          return snprintf(page, count, "%u\n", desc->ld_active_tgt_count);
  }
  
-int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int lov_rd_mdc(char *page, char **start, off_t off, int count, int *eof,
+                      void *data)
  {
          struct obd_device *dev = (struct obd_device*) data;
          struct lov_obd *lov;
@@ -125,7 +119,7 @@ int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data)
          return snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid.uuid);
  }
  
-static void *ll_tgt_seq_start(struct seq_file *p, loff_t *pos)
+static void *lov_tgt_seq_start(struct seq_file *p, loff_t *pos)
  {
          struct obd_device *dev = p->private;
          struct lov_obd *lov = &dev->u.lov;
@@ -133,12 +127,12 @@ static void *ll_tgt_seq_start(struct seq_file *p, loff_t *pos)
          return (*pos >= lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]);
  
  }
-static void ll_tgt_seq_stop(struct seq_file *p, void *v)
-{
  
+static void lov_tgt_seq_stop(struct seq_file *p, void *v)
+{
  }
  
-static void *ll_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
+static void *lov_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
  {
          struct obd_device *dev = p->private;
          struct lov_obd *lov = &dev->u.lov;
@@ -147,7 +141,7 @@ static void *ll_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
          return (*pos >=lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]);
  }
  
-static int ll_tgt_seq_show(struct seq_file *p, void *v)
+static int lov_tgt_seq_show(struct seq_file *p, void *v)
  {
          struct lov_tgt_desc *tgt = v;
          struct obd_device *dev = p->private;
@@ -157,18 +151,18 @@ static int ll_tgt_seq_show(struct seq_file *p, void *v)
                            tgt->active ? "" : "IN");
  }
  
-struct seq_operations ll_tgt_sops = {
-        .start = ll_tgt_seq_start,
-        .stop = ll_tgt_seq_stop,
-        .next = ll_tgt_seq_next,
-        .show = ll_tgt_seq_show,
+struct seq_operations lov_tgt_sops = {
+        .start = lov_tgt_seq_start,
+        .stop = lov_tgt_seq_stop,
+        .next = lov_tgt_seq_next,
+        .show = lov_tgt_seq_show,
  };
  
-static int ll_target_seq_open(struct inode *inode, struct file *file)
+static int lov_target_seq_open(struct inode *inode, struct file *file)
  {
          struct proc_dir_entry *dp = inode->u.generic_ip;
          struct seq_file *seq;
-        int rc = seq_open(file, &ll_tgt_sops);
+        int rc = seq_open(file, &lov_tgt_sops);
  
          if (rc)
                  return rc;
@@ -178,35 +172,36 @@ static int ll_target_seq_open(struct inode *inode, struct file *file)
  
          return 0;
  }
+
  struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",         lprocfs_rd_uuid, 0, 0 },
-        { "stripesize",   rd_stripesize,   0, 0 },
-        { "stripeoffset", rd_stripeoffset, 0, 0 },
-        { "stripecount",  rd_stripecount,  0, 0 },
-        { "stripetype",   rd_stripetype,   0, 0 },
-        { "numobd",       rd_numobd,       0, 0 },
-        { "activeobd",    rd_activeobd,    0, 0 },
-        { "filestotal",   rd_filestotal,   0, 0 },
-        { "filesfree",    rd_filesfree,    0, 0 },
-        { "filegroups",   rd_filegroups,   0, 0 },
-        { "blocksize",    rd_blksize,      0, 0 },
-        { "kbytestotal",  rd_kbytestotal,  0, 0 },
-        { "kbytesfree",   rd_kbytesfree,   0, 0 },
-        { "target_mdc",   rd_mdc,          0, 0 },
+        { "uuid",         lprocfs_rd_uuid,        0, 0 },
+        { "stripesize",   lov_rd_stripesize,      0, 0 },
+        { "stripeoffset", lov_rd_stripeoffset,    0, 0 },
+        { "stripecount",  lov_rd_stripecount,     0, 0 },
+        { "stripetype",   lov_rd_stripetype,      0, 0 },
+        { "numobd",       lov_rd_numobd,          0, 0 },
+        { "activeobd",    lov_rd_activeobd,       0, 0 },
+        { "filestotal",   lprocfs_rd_filestotal,  0, 0 },
+        { "filesfree",    lprocfs_rd_filesfree,   0, 0 },
+        //{ "filegroups",   lprocfs_rd_filegroups,  0, 0 },
+        { "blocksize",    lprocfs_rd_blksize,     0, 0 },
+        { "kbytestotal",  lprocfs_rd_kbytestotal, 0, 0 },
+        { "kbytesfree",   lprocfs_rd_kbytesfree,  0, 0 },
+        { "target_mdc",   lov_rd_mdc,             0, 0 },
          { 0 }
  };
  
-struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs",     lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
          { 0 }
  };
  
-struct file_operations ll_proc_target_fops = {
-        .open = ll_target_seq_open,
+struct file_operations lov_proc_target_fops = {
+        .open = lov_target_seq_open,
          .read = seq_read,
          .llseek = seq_lseek,
          .release = seq_release,
  };
  
  #endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(lov, lprocfs_module_vars, lprocfs_obd_vars)
diff --git a/lustre/mdc/.cvsignore b/lustre/mdc/.cvsignore

index e530020..49c6100 100644 (file)
--- a/lustre/mdc/.cvsignore
+++ b/lustre/mdc/.cvsignore
@@ -6,3 +6,4 @@ Makefile
  Makefile.in
  .deps
  TAGS
+.*.cmd
diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c

index 3f81507..6dca228 100644 (file)
--- a/lustre/mdc/lproc_mdc.c
+++ b/lustre/mdc/lproc_mdc.c
@@ -22,42 +22,32 @@
  #define DEBUG_SUBSYSTEM S_CLASS
  
  #include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
+#include <linux/vfs.h>
  #include <linux/obd_class.h>
  #include <linux/lprocfs_status.h>
  
  #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
  #else
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  obd_self_statfs);
-
-struct lprocfs_vars lprocfs_obd_vars[] = {
+static struct lprocfs_vars lprocfs_obd_vars[] = {
          { "uuid",            lprocfs_rd_uuid,        0, 0 },
-        { "blocksize",       rd_blksize,             0, 0 },
-        { "kbytestotal",     rd_kbytestotal,         0, 0 },
-        { "kbytesfree",      rd_kbytesfree,          0, 0 },
-        { "filestotal",      rd_filestotal,          0, 0 },
-        { "filesfree",       rd_filesfree,           0, 0 },
-        { "filegroups",      rd_filegroups,          0, 0 },
+        { "blocksize",       lprocfs_rd_blksize,     0, 0 },
+        { "kbytestotal",     lprocfs_rd_kbytestotal, 0, 0 },
+        { "kbytesfree",      lprocfs_rd_kbytesfree,  0, 0 },
+        { "filestotal",      lprocfs_rd_filestotal,  0, 0 },
+        { "filesfree",       lprocfs_rd_filesfree,   0, 0 },
+        //{ "filegroups",      lprocfs_rd_filegroups,  0, 0 },
          { "mds_server_uuid", lprocfs_rd_server_uuid, 0, 0 },
          { "mds_conn_uuid",   lprocfs_rd_conn_uuid,   0, 0 },
          { 0 }
  };
  
-struct lprocfs_vars lprocfs_module_vars[] = {
+static struct lprocfs_vars lprocfs_module_vars[] = {
          { "num_refs",        lprocfs_rd_numrefs,     0, 0 },
          { 0 }
  };
  
  #endif /* LPROCFS */
  
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(mdc, lprocfs_module_vars, lprocfs_obd_vars)
diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h

index e39a0aa..49d85ab 100644 (file)
--- a/lustre/mdc/mdc_internal.h
+++ b/lustre/mdc/mdc_internal.h
@@ -1,24 +1,25 @@
-void mds_pack_req_body(struct ptlrpc_request *);
-void mds_pack_rep_body(struct ptlrpc_request *);
-void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
+void mdc_pack_req_body(struct ptlrpc_request *);
+void mdc_pack_rep_body(struct ptlrpc_request *);
+void mdc_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
                        obd_id ino, int type);
-void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
+void mdc_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
                        int flags, struct mdc_op_data *data);
-void mds_setattr_pack(struct ptlrpc_request *req,
+void mdc_setattr_pack(struct ptlrpc_request *req,
                        struct mdc_op_data *data,
-                      struct iattr *iattr, void *ea, int ealen);
-void mds_create_pack(struct ptlrpc_request *req, int offset,
+                      struct iattr *iattr, void *ea, int ealen,
+                     void *ea2, int ea2len);
+void mdc_create_pack(struct ptlrpc_request *req, int offset,
                       struct mdc_op_data *op_data,
                       __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                       const void *data, int datalen);
-void mds_open_pack(struct ptlrpc_request *req, int offset,
+void mdc_open_pack(struct ptlrpc_request *req, int offset,
                     struct mdc_op_data *op_data,
                     __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                     __u32 flags, const void *data, int datalen);
-void mds_unlink_pack(struct ptlrpc_request *req, int offset,
+void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
                       struct mdc_op_data *data);
-void mds_link_pack(struct ptlrpc_request *req, int offset,
+void mdc_link_pack(struct ptlrpc_request *req, int offset,
                     struct mdc_op_data *data);
-void mds_rename_pack(struct ptlrpc_request *req, int offset,
+void mdc_rename_pack(struct ptlrpc_request *req, int offset,
                       struct mdc_op_data *data,
                       const char *old, int oldlen, const char *new, int newlen);
diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c

index 806a830..a17f7a1 100644 (file)
--- a/lustre/mdc/mdc_lib.c
+++ b/lustre/mdc/mdc_lib.c
@@ -28,7 +28,7 @@
  #include <linux/lustre_mds.h>
  #include <linux/lustre_lite.h>
  
-void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
+void mdc_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
                        obd_id ino, int type, __u64 xid)
  {
          struct mds_body *b;
@@ -45,7 +45,7 @@ void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
          b->nlink = size;                        /* !! */
  }
  
-static void mds_pack_body(struct mds_body *b)
+static void mdc_pack_body(struct mds_body *b)
  {
          LASSERT (b != NULL);
  
@@ -54,14 +54,14 @@ static void mds_pack_body(struct mds_body *b)
          b->capability = current->cap_effective;
  }
  
-void mds_pack_req_body(struct ptlrpc_request *req)
+void mdc_pack_req_body(struct ptlrpc_request *req)
  {
          struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*b));
-        mds_pack_body(b);
+        mdc_pack_body(b);
  }
  
  /* packing of MDS records */
-void mds_create_pack(struct ptlrpc_request *req, int offset,
+void mdc_create_pack(struct ptlrpc_request *req, int offset,
                       struct mdc_op_data *op_data,
                       __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                       const void *data, int datalen)
@@ -94,8 +94,9 @@ void mds_create_pack(struct ptlrpc_request *req, int offset,
                  memcpy (tmp, data, datalen);
          }
  }
+
  /* packing of MDS records */
-void mds_open_pack(struct ptlrpc_request *req, int offset,
+void mdc_open_pack(struct ptlrpc_request *req, int offset,
                     struct mdc_op_data *op_data,
                     __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                     __u32 flags, const void *data, int datalen)
@@ -109,8 +110,9 @@ void mds_open_pack(struct ptlrpc_request *req, int offset,
          rec->cr_fsuid = current->fsuid;
          rec->cr_fsgid = current->fsgid;
          rec->cr_cap = current->cap_effective;
-        ll_ino2fid(&rec->cr_fid, op_data->ino1,
-                   op_data->gen1, op_data->typ1);
+        if (op_data != NULL)
+                ll_ino2fid(&rec->cr_fid, op_data->ino1,
+                           op_data->gen1, op_data->typ1);
          memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
          rec->cr_mode = mode;
          rec->cr_flags = flags;
@@ -123,17 +125,22 @@ void mds_open_pack(struct ptlrpc_request *req, int offset,
          else
                  rec->cr_suppgid = -1;
  
-        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1);
-        LOGL0(op_data->name, op_data->namelen, tmp);
+        if (op_data->name) {
+                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1,
+                                     op_data->namelen + 1);
+                LOGL0(op_data->name, op_data->namelen, tmp);
+        }
  
          if (data) {
                  tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen);
                  memcpy (tmp, data, datalen);
          }
  }
-void mds_setattr_pack(struct ptlrpc_request *req,
+
+void mdc_setattr_pack(struct ptlrpc_request *req,
                        struct mdc_op_data *data,
-                      struct iattr *iattr, void *ea, int ealen)
+                      struct iattr *iattr, void *ea, int ealen,
+                      void *ea2, int ea2len)
  {
          struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, 0,
                                                       sizeof (*rec));
@@ -163,11 +170,18 @@ void mds_setattr_pack(struct ptlrpc_request *req,
                          rec->sa_suppgid = -1;
          }
  
-        if (ealen != 0)
-                memcpy(lustre_msg_buf(req->rq_reqmsg, 1, ealen), ea, ealen);
+        if (ealen == 0)
+                return;
+
+        memcpy(lustre_msg_buf(req->rq_reqmsg, 1, ealen), ea, ealen);
+
+        if (ea2len == 0)
+                return;
+
+        memcpy(lustre_msg_buf(req->rq_reqmsg, 2, ea2len), ea2, ea2len);
  }
  
-void mds_unlink_pack(struct ptlrpc_request *req, int offset,
+void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
                       struct mdc_op_data *data)
  {
          struct mds_rec_unlink *rec;
@@ -194,7 +208,7 @@ void mds_unlink_pack(struct ptlrpc_request *req, int offset,
          LOGL0(data->name, data->namelen, tmp);
  }
  
-void mds_link_pack(struct ptlrpc_request *req, int offset,
+void mdc_link_pack(struct ptlrpc_request *req, int offset,
                     struct mdc_op_data *data)
  {
          struct mds_rec_link *rec;
@@ -221,7 +235,7 @@ void mds_link_pack(struct ptlrpc_request *req, int offset,
          LOGL0(data->name, data->namelen, tmp);
  }
  
-void mds_rename_pack(struct ptlrpc_request *req, int offset,
+void mdc_rename_pack(struct ptlrpc_request *req, int offset,
                       struct mdc_op_data *data,
                       const char *old, int oldlen, const char *new, int newlen)
  {
@@ -255,7 +269,7 @@ void mds_rename_pack(struct ptlrpc_request *req, int offset,
          }
  }
  
-void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
+void mdc_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
                        int flags, struct mdc_op_data *data)
  {
          struct mds_body *b;
diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c

index 2da2fdb..4f7443e 100644 (file)
--- a/lustre/mdc/mdc_reint.c
+++ b/lustre/mdc/mdc_reint.c
@@ -35,22 +35,24 @@
  #include "mdc_internal.h"
  
  /* mdc_setattr does its own semaphore handling */
-static int mdc_reint(struct ptlrpc_request *request, int level)
+static int mdc_reint(struct ptlrpc_request *request,
+                     struct mdc_rpc_lock *rpc_lock, int level)
  {
          int rc;
-        __u32 *opcodeptr;
+        
  
-        opcodeptr = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*opcodeptr));
          request->rq_level = level;
  
-        if (!(*opcodeptr == REINT_SETATTR))
-                mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+        mdc_get_rpc_lock(rpc_lock, NULL);
          rc = ptlrpc_queue_wait(request);
-        if (!(*opcodeptr == REINT_SETATTR))
-                mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
-
+        mdc_put_rpc_lock(rpc_lock, NULL);
          if (rc)
                  CDEBUG(D_INFO, "error in handling %d\n", rc);
+        else if (!lustre_swab_repbuf(request, 0, sizeof(struct mds_body),
+                                     lustre_swab_mds_body)) {
+                CERROR ("Can't unpack mds_body\n");
+                rc = -EPROTO;
+        }
          return rc;
  }
  
@@ -60,42 +62,45 @@ static int mdc_reint(struct ptlrpc_request *request, int level)
   * If it is called with iattr->ia_valid & ATTR_FROM_OPEN, then it is a
   * magic open-path setattr that should take the setattr semaphore and
   * go to the setattr portal. */
-int mdc_setattr(struct lustre_handle *conn,
-                struct mdc_op_data *data,
-                struct iattr *iattr, void *ea, int ealen,
+int mdc_setattr(struct lustre_handle *conn, struct mdc_op_data *data,
+                struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
                  struct ptlrpc_request **request)
  {
          struct ptlrpc_request *req;
          struct mds_rec_setattr *rec;
          struct mdc_rpc_lock *rpc_lock;
-        int rc, bufcount = 1, size[2] = {sizeof(*rec), ealen};
+        int rc, bufcount = 1, size[3] = {sizeof(*rec), ealen, ea2len};
          ENTRY;
  
          LASSERT(iattr != NULL);
  
-        if (ealen > 0)
+        if (ealen > 0) {
                  bufcount = 2;
+                if (ea2len > 0)
+                        bufcount = 3;
+        }
  
          req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount,
                                size, NULL);
-        if (!req)
+        if (req == NULL)
                  RETURN(-ENOMEM);
  
          if (iattr->ia_valid & ATTR_FROM_OPEN) {
                  req->rq_request_portal = MDS_SETATTR_PORTAL; //XXX FIXME bug 249
                  rpc_lock = &mdc_setattr_lock;
-        } else
+        } else {
                  rpc_lock = &mdc_rpc_lock;
+        }
  
-        mds_setattr_pack(req, data, iattr, ea, ealen);
+        if (iattr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
+                CDEBUG(D_INODE, "setting mtime %lu, ctime %lu\n",
+                       iattr->ia_mtime, iattr->ia_ctime);
+        mdc_setattr_pack(req, data, iattr, ea, ealen, ea2, ea2len);
  
          size[0] = sizeof(struct mds_body);
          req->rq_replen = lustre_msg_size(1, size);
  
-        mdc_get_rpc_lock(rpc_lock, NULL);
-        rc = mdc_reint(req, LUSTRE_CONN_FULL);
-        mdc_put_rpc_lock(rpc_lock, NULL);
-
+        rc = mdc_reint(req, rpc_lock, LUSTRE_CONN_FULL);
          *request = req;
          if (rc == -ERESTARTSYS)
                  rc = 0;
@@ -103,17 +108,14 @@ int mdc_setattr(struct lustre_handle *conn,
          RETURN(rc);
  }
  
-int mdc_create(struct lustre_handle *conn,
-               struct mdc_op_data *op_data,
-               const void *data, int datalen,
-               int mode, __u32 uid, __u32 gid, __u64 time, __u64 rdev,
-               struct ptlrpc_request **request)
+int mdc_create(struct lustre_handle *conn, struct mdc_op_data *op_data,
+               const void *data, int datalen, int mode, __u32 uid, __u32 gid,
+               __u64 time, __u64 rdev, struct ptlrpc_request **request)
  {
          struct ptlrpc_request *req;
-        int rc, size[3] = {sizeof(struct mds_rec_create),
-                           op_data->namelen + 1, 0};
+        int rc, size[3] = {sizeof(struct mds_rec_create), op_data->namelen + 1};
          int level, bufcount = 2;
-//        ENTRY;
+        ENTRY;
  
          if (data && datalen) {
                  size[bufcount] = datalen;
@@ -122,14 +124,12 @@ int mdc_create(struct lustre_handle *conn,
  
          req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount,
                                size, NULL);
-        if (!req)
-                return -ENOMEM;
-//                RETURN(-ENOMEM);
+        if (req == NULL)
+                RETURN(-ENOMEM);
  
-        /* mds_create_pack fills msg->bufs[1] with name
+        /* mdc_create_pack fills msg->bufs[1] with name
           * and msg->bufs[2] with tgt, for symlinks or lov MD data */
-        mds_create_pack(req, 0, op_data,
-                        mode, rdev, uid, gid, time,
+        mdc_create_pack(req, 0, op_data, mode, rdev, uid, gid, time,
                          data, datalen);
  
          size[0] = sizeof(struct mds_body);
@@ -137,7 +137,7 @@ int mdc_create(struct lustre_handle *conn,
  
          level = LUSTRE_CONN_FULL;
   resend:
-        rc = mdc_reint(req, level);
+        rc = mdc_reint(req, &mdc_rpc_lock, level);
          /* Resend if we were told to. */
          if (rc == -ERESTARTSYS) {
                  level = LUSTRE_CONN_RECOVER;
@@ -148,12 +148,10 @@ int mdc_create(struct lustre_handle *conn,
                  mdc_store_inode_generation(req, 0, 0);
  
          *request = req;
-        return rc;
-//        RETURN(rc);
+        RETURN(rc);
  }
  
-int mdc_unlink(struct lustre_handle *conn,
-               struct mdc_op_data *data,
+int mdc_unlink(struct lustre_handle *conn, struct mdc_op_data *data,
                 struct ptlrpc_request **request)
  {
          struct obd_device *obddev = class_conn2obd(conn);
@@ -162,27 +160,26 @@ int mdc_unlink(struct lustre_handle *conn,
          ENTRY;
  
          LASSERT(req == NULL);
-
          req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size,
                                NULL);
-        if (!req)
+        if (req == NULL)
                  RETURN(-ENOMEM);
          *request = req;
  
          size[0] = sizeof(struct mds_body);
          size[1] = obddev->u.cli.cl_max_mds_easize;
-        req->rq_replen = lustre_msg_size(2, size);
+        size[2] = obddev->u.cli.cl_max_mds_cookiesize;
+        req->rq_replen = lustre_msg_size(3, size);
  
-        mds_unlink_pack(req, 0, data);
+        mdc_unlink_pack(req, 0, data);
  
-        rc = mdc_reint(req, LUSTRE_CONN_FULL);
+        rc = mdc_reint(req, &mdc_rpc_lock, LUSTRE_CONN_FULL);
          if (rc == -ERESTARTSYS)
                  rc = 0;
          RETURN(rc);
  }
  
-int mdc_link(struct lustre_handle *conn,
-             struct mdc_op_data *data,
+int mdc_link(struct lustre_handle *conn, struct mdc_op_data *data,
               struct ptlrpc_request **request)
  {
          struct ptlrpc_request *req;
@@ -191,15 +188,15 @@ int mdc_link(struct lustre_handle *conn,
  
          req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size,
                                NULL);
-        if (!req)
+        if (req == NULL)
                  RETURN(-ENOMEM);
  
-        mds_link_pack(req, 0, data);
+        mdc_link_pack(req, 0, data);
  
          size[0] = sizeof(struct mds_body);
          req->rq_replen = lustre_msg_size(1, size);
  
-        rc = mdc_reint(req, LUSTRE_CONN_FULL);
+        rc = mdc_reint(req, &mdc_rpc_lock, LUSTRE_CONN_FULL);
          *request = req;
          if (rc == -ERESTARTSYS)
                  rc = 0;
@@ -207,10 +204,8 @@ int mdc_link(struct lustre_handle *conn,
          RETURN(rc);
  }
  
-int mdc_rename(struct lustre_handle *conn,
-               struct mdc_op_data *data,
-               const char *old, int oldlen,
-               const char *new, int newlen,
+int mdc_rename(struct lustre_handle *conn, struct mdc_op_data *data,
+               const char *old, int oldlen, const char *new, int newlen,
                 struct ptlrpc_request **request)
  {
          struct ptlrpc_request *req;
@@ -220,15 +215,15 @@ int mdc_rename(struct lustre_handle *conn,
  
          req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 3, size,
                                NULL);
-        if (!req)
+        if (req == NULL)
                  RETURN(-ENOMEM);
  
-        mds_rename_pack(req, 0, data, old, oldlen, new, newlen);
+        mdc_rename_pack(req, 0, data, old, oldlen, new, newlen);
  
          size[0] = sizeof(struct mds_body);
          req->rq_replen = lustre_msg_size(1, size);
  
-        rc = mdc_reint(req, LUSTRE_CONN_FULL);
+        rc = mdc_reint(req, &mdc_rpc_lock, LUSTRE_CONN_FULL);
          *request = req;
          if (rc == -ERESTARTSYS)
                  rc = 0;
diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c

index 204a836..b205d21 100644 (file)
--- a/lustre/mdc/mdc_request.c
+++ b/lustre/mdc/mdc_request.c
@@ -46,6 +46,7 @@ struct mdc_rpc_lock mdc_setattr_lock;
  EXPORT_SYMBOL(mdc_rpc_lock);
  
  /* Helper that implements most of mdc_getstatus and signal_completed_replay. */
+/* XXX this should become mdc_get_info("key"), sending MDS_GET_INFO RPC */
  static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
                            int level, int msg_flags)
  {
@@ -62,7 +63,7 @@ static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
          req->rq_level = level;
          req->rq_replen = lustre_msg_size(1, &size);
  
-        mds_pack_req_body(req);
+        mdc_pack_req_body(req);
          req->rq_reqmsg->flags |= msg_flags;
          rc = ptlrpc_queue_wait(req);
  
@@ -88,13 +89,14 @@ static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
          return rc;
  }
  
-/* should become mdc_getinfo() */
+/* This should be mdc_get_info("rootfid") */
  int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid)
  {
          return send_getstatus(class_conn2cliimp(conn), rootfid, LUSTRE_CONN_CON,
                                0);
  }
  
+/* should call mdc_get_info("lovdesc") and mdc_get_info("lovtgts") */
  int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
                     struct ptlrpc_request **request)
  {
@@ -233,7 +235,7 @@ int mdc_getattr(struct lustre_handle *conn, struct ll_fid *fid,
          memcpy(&body->fid1, fid, sizeof(*fid));
          body->valid = valid;
          body->eadatasize = ea_size;
-        mds_pack_req_body(req);
+        mdc_pack_req_body(req);
  
          rc = mdc_getattr_common (conn, ea_size, req);
          if (rc != 0) {
@@ -263,7 +265,7 @@ int mdc_getattr_name(struct lustre_handle *conn, struct ll_fid *fid,
          memcpy(&body->fid1, fid, sizeof(*fid));
          body->valid = valid;
          body->eadatasize = ea_size;
-        mds_pack_req_body(req);
+        mdc_pack_req_body(req);
  
          LASSERT (strnlen (filename, namelen) == namelen - 1);
          memcpy(lustre_msg_buf(req->rq_reqmsg, 1, namelen), filename, namelen);
@@ -283,9 +285,9 @@ void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
                                  int repoff)
  {
          struct mds_rec_create *rec =
-                lustre_msg_buf(req->rq_reqmsg, reqoff, sizeof (*rec));
+                lustre_msg_buf(req->rq_reqmsg, reqoff, sizeof(*rec));
          struct mds_body *body =
-                lustre_msg_buf(req->rq_repmsg, repoff, sizeof (*body));
+                lustre_msg_buf(req->rq_repmsg, repoff, sizeof(*body));
  
          LASSERT (rec != NULL);
          LASSERT (body != NULL);
@@ -295,11 +297,49 @@ void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
                    rec->cr_replayfid.generation, rec->cr_replayfid.id);
  }
  
+int mdc_req2lustre_md(struct ptlrpc_request *req, int offset,
+                      struct lustre_handle *obd_import,
+                      struct lustre_md *md)
+{
+        int rc;
+        ENTRY;
+
+        LASSERT(md);
+        memset(md, 0, sizeof(*md));
+
+        md->body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*md->body));
+        LASSERT (md->body != NULL);
+        LASSERT_REPSWABBED (req, offset);
+
+        if (md->body->valid & OBD_MD_FLEASIZE) {
+                int lmmsize;
+                struct lov_mds_md *lmm;
+
+                LASSERT(S_ISREG(md->body->mode));
+
+                if (md->body->eadatasize == 0) {
+                        CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n");
+                        RETURN(-EPROTO);
+                }
+                lmmsize = md->body->eadatasize;
+                lmm = lustre_msg_buf(req->rq_repmsg, offset + 1, lmmsize);
+                LASSERT (lmm != NULL);
+                LASSERT_REPSWABBED (req, offset + 1);
+
+                rc = obd_unpackmd(obd_import, &md->lsm, lmm, lmmsize);
+                if (rc < 0) {
+                        /* XXX don't know if I should do this... */
+                        CERROR ("Error %d unpacking eadata\n", rc);
+                        LBUG();
+                }
+                LASSERT (rc >= sizeof (*md->lsm));
+        }
+        RETURN(0);
+}
+
+
  /* We always reserve enough space in the reply packet for a stripe MD, because
- * we don't know in advance the file type.
- *
- * XXX we could get that from ext2_dir_entry_2 file_type
- */
+ * we don't know in advance the file type. */
  int mdc_enqueue(struct lustre_handle *conn,
                  int lock_type,
                  struct lookup_intent *it,
@@ -318,9 +358,10 @@ int mdc_enqueue(struct lustre_handle *conn,
                  { .name = {data->ino1, data->gen1} };
          int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
          int rc, flags = LDLM_FL_HAS_INTENT;
-        int repsize[3] = {sizeof(struct ldlm_reply),
+        int repsize[4] = {sizeof(struct ldlm_reply),
                            sizeof(struct mds_body),
-                          obddev->u.cli.cl_max_mds_easize};
+                          obddev->u.cli.cl_max_mds_easize,
+                          obddev->u.cli.cl_max_mds_cookiesize};
          struct ldlm_reply *dlm_rep;
          struct ldlm_intent *lit;
          struct ldlm_request *lockreq;
@@ -352,7 +393,7 @@ int mdc_enqueue(struct lustre_handle *conn,
                  lit->opc = (__u64)it->it_op;
  
                  /* pack the intended request */
-                mds_open_pack(req, 2, data, it->it_mode, 0, current->fsuid,
+                mdc_open_pack(req, 2, data, it->it_mode, 0, current->fsuid,
                                current->fsgid, LTIME_S(CURRENT_TIME),
                                it->it_flags, tgt, tgtlen);
                  /* get ready for the reply */
@@ -371,10 +412,10 @@ int mdc_enqueue(struct lustre_handle *conn,
                  lit->opc = (__u64)it->it_op;
  
                  /* pack the intended request */
-                mds_unlink_pack(req, 2, data);
+                mdc_unlink_pack(req, 2, data);
                  /* get ready for the reply */
-                reply_buffers = 3;
-                req->rq_replen = lustre_msg_size(3, repsize);
+                reply_buffers = 4;
+                req->rq_replen = lustre_msg_size(4, repsize);
          } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
                  int valid = OBD_MD_FLNOTOBD | OBD_MD_FLEASIZE;
                  size[2] = sizeof(struct mds_body);
@@ -390,7 +431,7 @@ int mdc_enqueue(struct lustre_handle *conn,
                  lit->opc = (__u64)it->it_op;
  
                  /* pack the intended request */
-                mds_getattr_pack(req, valid, 2, it->it_flags, data);
+                mdc_getattr_pack(req, valid, 2, it->it_flags, data);
                  /* get ready for the reply */
                  reply_buffers = 3;
                  req->rq_replen = lustre_msg_size(3, repsize);
@@ -447,8 +488,8 @@ int mdc_enqueue(struct lustre_handle *conn,
          }
  
          dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep));
-        LASSERT (dlm_rep != NULL);           /* checked by ldlm_cli_enqueue() */
-        LASSERT_REPSWABBED (req, 0);         /* swabbed by ldlm_cli_enqueue() */
+        LASSERT(dlm_rep != NULL);           /* checked by ldlm_cli_enqueue() */
+        LASSERT_REPSWABBED(req, 0);         /* swabbed by ldlm_cli_enqueue() */
  
          it->it_disposition = (int) dlm_rep->lock_policy_res1;
          it->it_status = (int) dlm_rep->lock_policy_res2;
@@ -456,8 +497,8 @@ int mdc_enqueue(struct lustre_handle *conn,
          it->it_data = req;
  
          /* We know what to expect, so we do any byte flipping required here */
-        LASSERT (reply_buffers == 3 || reply_buffers == 1);
-        if (reply_buffers == 3) {
+        LASSERT(reply_buffers == 4 || reply_buffers == 3 || reply_buffers == 1);
+        if (reply_buffers >= 3) {
                  struct mds_body *body;
  
                  body = lustre_swab_repbuf (req, 1, sizeof (*body),
@@ -471,8 +512,8 @@ int mdc_enqueue(struct lustre_handle *conn,
                          /* The eadata is opaque; just check that it is
                           * there.  Eventually, obd_unpackmd() will check
                           * the contents */
-                        eadata = lustre_swab_repbuf (req, 2, body->eadatasize,
-                                                     NULL);
+                        eadata = lustre_swab_repbuf(req, 2, body->eadatasize,
+                                                    NULL);
                          if (eadata == NULL) {
                                  CERROR ("Missing/short eadata\n");
                                  RETURN (-EPROTO);
@@ -490,8 +531,7 @@ static void mdc_replay_open(struct ptlrpc_request *req)
          struct list_head *tmp;
          struct mds_body *body;
  
-        body = lustre_swab_repbuf (req, 1, sizeof (*body),
-                                   lustre_swab_mds_body);
+        body = lustre_swab_repbuf(req, 1, sizeof(*body), lustre_swab_mds_body);
          LASSERT (body != NULL);
  
          memcpy(&old, file_fh, sizeof(old));
@@ -517,15 +557,15 @@ void mdc_set_open_replay_data(struct obd_client_handle *och)
  {
          struct ptlrpc_request *req = och->och_req;
          struct mds_rec_create *rec =
-                lustre_msg_buf(req->rq_reqmsg, 2, sizeof (*rec));
+                lustre_msg_buf(req->rq_reqmsg, 2, sizeof(*rec));
          struct mds_body *body =
-                lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
+                lustre_msg_buf(req->rq_repmsg, 1, sizeof(*body));
  
-        LASSERT (rec != NULL);
+        LASSERT(rec != NULL);
          /* outgoing messages always in my byte order */
-        LASSERT (body != NULL);
+        LASSERT(body != NULL);
          /* incoming message in my byte order (it's been swabbed) */
-        LASSERT_REPSWABBED (req, 1);
+        LASSERT_REPSWABBED(req, 1);
  
          memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
          req->rq_replay_cb = mdc_replay_open;
@@ -589,7 +629,7 @@ int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
          if (rc != 0)
                  GOTO(out, rc);
  
-        mds_readdir_pack(req, offset, PAGE_CACHE_SIZE, ino, type);
+        mdc_readdir_pack(req, offset, PAGE_CACHE_SIZE, ino, type);
  
          req->rq_replen = lustre_msg_size(1, &size);
          rc = ptlrpc_queue_wait(req);
@@ -622,27 +662,28 @@ static int mdc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
          case OBD_IOC_CLIENT_RECOVER:
                  RETURN(ptlrpc_recover_import(imp, data->ioc_inlbuf1));
          case IOC_OSC_SET_ACTIVE:
-                if (data->ioc_offset) {
-                        CERROR("%s: can't reactivate MDC\n",
-                               obddev->obd_uuid.uuid);
-                        RETURN(-ENOTTY);
-                }
-                RETURN(ptlrpc_set_import_active(imp, 0));
+                RETURN(ptlrpc_set_import_active(imp, data->ioc_offset));
          default:
                  CERROR("osc_ioctl(): unrecognised ioctl %#x\n", cmd);
                  RETURN(-ENOTTY);
          }
  }
  
-static int mdc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                      unsigned long max_age)
  {
          struct ptlrpc_request *req;
          struct obd_statfs *msfs;
          int rc, size = sizeof(*msfs);
          ENTRY;
  
-        req = ptlrpc_prep_req(exp->exp_obd->u.cli.cl_import, MDS_STATFS, 0, 
-                              NULL, NULL);
+        /* We could possibly pass max_age in the request (as an absolute
+         * timestamp or a "seconds.usec ago") so the target can avoid doing
+         * extra calls into the filesystem if that isn't necessary (e.g.
+         * during mount that would help a bit).  Having relative timestamps
+         * is not so great if request processing is slow, while absolute
+         * timestamps are not ideal because they need time synchronization. */
+        req = ptlrpc_prep_req(obd->u.cli.cl_import, MDS_STATFS, 0, NULL, NULL);
          if (!req)
                  RETURN(-ENOMEM);
  
@@ -655,14 +696,13 @@ static int mdc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
          if (rc)
                  GOTO(out, rc);
  
-        msfs = lustre_swab_repbuf (req, 0, sizeof (*msfs),
-                                   lustre_swab_obd_statfs);
+        msfs = lustre_swab_repbuf(req, 0, sizeof(*msfs),lustre_swab_obd_statfs);
          if (msfs == NULL) {
-                CERROR ("Can't unpack obd_statfs\n");
-                GOTO (out, rc = -EPROTO);
+                CERROR("Can't unpack obd_statfs\n");
+                GOTO(out, rc = -EPROTO);
          }
  
-        memcpy (osfs, msfs, sizeof (*msfs));
+        memcpy(osfs, msfs, sizeof (*msfs));
          EXIT;
  out:
          ptlrpc_req_finished(req);
@@ -670,11 +710,83 @@ out:
          return rc;
  }
  
+static int mdc_pin(struct lustre_handle *conn, obd_id ino, __u32 gen, int type,
+                   struct obd_client_handle *handle, int flag)
+{
+        struct ptlrpc_request *req;
+        struct mds_body *body;
+        int rc, size = sizeof(*body);
+        ENTRY;
+
+        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_PIN, 1, &size, NULL);
+        if (req == NULL)
+                RETURN(-ENOMEM);
+
+        body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
+        ll_ino2fid(&body->fid1, ino, gen, type);
+        body->flags = flag;
+
+        req->rq_replen = lustre_msg_size(1, &size);
+
+        mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+        rc = ptlrpc_queue_wait(req);
+        mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
+        if (rc) {
+                CERROR("pin failed: %d\n", rc);
+                ptlrpc_req_finished(req);
+                RETURN(rc);
+        }
+
+        body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_mds_body);
+        if (body == NULL) {
+                ptlrpc_req_finished(req);
+                RETURN(rc);
+        }
+
+        memcpy(&handle->och_fh, &body->handle, sizeof(body->handle));
+        handle->och_req = req; /* will be dropped by unpin */
+        handle->och_magic = OBD_CLIENT_HANDLE_MAGIC;
+        RETURN(rc);
+}
+
+static int mdc_unpin(struct lustre_handle *conn,
+                     struct obd_client_handle *handle, int flag)
+{
+        struct ptlrpc_request *req;
+        struct mds_body *body;
+        int rc, size = sizeof(*body);
+        ENTRY;
+
+        if (handle->och_magic != OBD_CLIENT_HANDLE_MAGIC)
+                RETURN(0);
+
+        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size,
+                              NULL);
+        if (req == NULL)
+                RETURN(-ENOMEM);
+
+        body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body));
+        memcpy(&body->handle, &handle->och_fh, sizeof(body->handle));
+        body->flags = flag;
+
+        req->rq_replen = lustre_msg_size(0, NULL);
+        mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+        rc = ptlrpc_queue_wait(req);
+        mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
+
+        if (rc != 0)
+                CERROR("unpin failed: %d\n", rc);
+
+        ptlrpc_req_finished(req);
+        ptlrpc_req_finished(handle->och_req);
+        RETURN(rc);
+}
+
  static int mdc_attach(struct obd_device *dev, obd_count len, void *data)
  {
          struct lprocfs_static_vars lvars;
  
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(mdc, &lvars);
          return lprocfs_obd_attach(dev, lvars.obd_vars);
  }
  
@@ -692,7 +804,9 @@ struct obd_ops mdc_obd_ops = {
          o_connect:     client_import_connect,
          o_disconnect:  client_import_disconnect,
          o_iocontrol:   mdc_iocontrol,
-        o_statfs:      mdc_statfs
+        o_statfs:      mdc_statfs,
+        o_pin:         mdc_pin,
+        o_unpin:       mdc_unpin,
  };
  
  int __init mdc_init(void)
@@ -700,12 +814,12 @@ int __init mdc_init(void)
          struct lprocfs_static_vars lvars;
          mdc_init_rpc_lock(&mdc_rpc_lock);
          mdc_init_rpc_lock(&mdc_setattr_lock);
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(mdc, &lvars);
          return class_register_type(&mdc_obd_ops, lvars.module_vars,
                                     LUSTRE_MDC_NAME);
  }
  
-static void __exit mdc_exit(void)
+static void /*__exit*/ mdc_exit(void)
  {
          class_unregister_type(LUSTRE_MDC_NAME);
  }
@@ -715,6 +829,7 @@ MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
  MODULE_DESCRIPTION("Lustre Metadata Client");
  MODULE_LICENSE("GPL");
  
+EXPORT_SYMBOL(mdc_req2lustre_md);
  EXPORT_SYMBOL(mdc_getstatus);
  EXPORT_SYMBOL(mdc_getlovinfo);
  EXPORT_SYMBOL(mdc_enqueue);
diff --git a/lustre/mds/.cvsignore b/lustre/mds/.cvsignore

index e530020..49c6100 100644 (file)
--- a/lustre/mds/.cvsignore
+++ b/lustre/mds/.cvsignore
@@ -6,3 +6,4 @@ Makefile
  Makefile.in
  .deps
  TAGS
+.*.cmd
diff --git a/lustre/mds/Makefile.mk b/lustre/mds/Makefile.mk

index 6b712fb..0696bd7 100644 (file)
--- a/lustre/mds/Makefile.mk
+++ b/lustre/mds/Makefile.mk
@@ -3,8 +3,9 @@
  # This code is issued under the GNU General Public License.
  # See the file COPYING in this distribution
  
-include fs/lustre/portals/Kernelenv
+include $(src)/../portals/Kernelenv
  
  obj-y += mds.o
-
-mds-objs    := mds_lov.o handler.o mds_reint.o mds_fs.o lproc_mds.o mds_internal.h mds_updates.o mds_open.o simple.o target.o
+mds-objs := mds_lov.o handler.o mds_reint.o mds_fs.o lproc_mds.o mds_open.o \
+               mds_lib.o
+       
diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c

index de3f2ed..756e290 100644 (file)
--- a/lustre/mds/handler.c
+++ b/lustre/mds/handler.c
@@ -50,19 +50,11 @@
  #include <linux/lustre_mds.h>
  #include <linux/lustre_fsfilt.h>
  #include <linux/lprocfs_status.h>
+#include <linux/lustre_commit_confd.h>
+
  #include "mds_internal.h"
  
-extern int mds_get_lovtgts(struct mds_obd *obd, int tgt_count,
-                           struct obd_uuid *uuidarray);
-extern int mds_get_lovdesc(struct mds_obd  *obd, struct lov_desc *desc);
-int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
-                       struct ptlrpc_request *req, int rc, int disp);
-static int mds_cleanup(struct obd_device * obddev, int force, int failover);
-
-inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req)
-{
-        return &req->rq_export->exp_obd->u.mds;
-}
+static int mds_cleanup(struct obd_device *obd, int flags);
  
  static int mds_bulk_timeout(void *data)
  {
@@ -188,6 +180,9 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
  
          snprintf(fid_name, sizeof(fid_name), "0x%lx", ino);
  
+        CDEBUG(D_DENTRY, "--> mds_fid2dentry: ino %lu, gen %u, sb %p\n",
+               ino, generation, mds->mds_sb);
+
          /* under ext3 this is neither supposed to return bad inodes
             nor NULL inodes. */
          result = ll_lookup_one_len(fid_name, mds->mds_fid_de, strlen(fid_name));
@@ -198,9 +193,6 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
          if (!inode)
                  RETURN(ERR_PTR(-ENOENT));
  
-        CDEBUG(D_DENTRY, "--> mds_fid2dentry: ino %lu, gen %u, sb %p\n",
-               inode->i_ino, inode->i_generation, inode->i_sb);
-
          if (generation && inode->i_generation != generation) {
                  /* we didn't find the right inode.. */
                  CERROR("bad inode %lu, link: %d ct: %d or generation %u/%u\n",
@@ -341,80 +333,125 @@ void mds_mfd_destroy(struct mds_file_data *mfd)
          mds_mfd_put(mfd);
  }
  
-/* Call with med->med_open_lock held, please. */
-static int mds_close_mfd(struct mds_file_data *mfd, struct mds_export_data *med)
+/* Close a "file descriptor" and possibly unlink an orphan from the
+ * PENDING directory.
+ *
+ * If we are being called from mds_disconnect() because the client has
+ * disappeared, then req == NULL and we do not update last_rcvd because
+ * there is nothing that could be recovered by the client at this stage
+ * (it will not even _have_ an entry in last_rcvd anymore).
+ */
+static int mds_mfd_close(struct ptlrpc_request *req, struct obd_device *obd,
+                         struct mds_file_data *mfd)
  {
-        struct dentry *de = NULL;
-
-#ifdef CONFIG_SMP
-        LASSERT(spin_is_locked(&med->med_open_lock));
-#endif
-        list_del(&mfd->mfd_list);
+        struct dentry *dparent = mfd->mfd_dentry->d_parent;
+        struct inode *child_inode = mfd->mfd_dentry->d_inode;
+        char fidname[LL_FID_NAMELEN];
+        int last_orphan, fidlen, rc = 0;
+        ENTRY;
  
-        if (mfd->mfd_dentry->d_parent) {
-                LASSERT(atomic_read(&mfd->mfd_dentry->d_parent->d_count));
-                de = dget(mfd->mfd_dentry->d_parent);
+        if (dparent) {
+                LASSERT(atomic_read(&dparent->d_count) > 0);
+                dparent = dget(dparent);
          }
  
-        /* this is the actual "close" */
-        l_dput(mfd->mfd_dentry);
+        fidlen = ll_fid2str(fidname, child_inode->i_ino,
+                            child_inode->i_generation);
  
-        if (de)
-                l_dput(de);
+        last_orphan = mds_open_orphan_dec_test(child_inode) &&
+                mds_inode_is_orphan(child_inode);
  
+        /* this is the actual "close" */
+        l_dput(mfd->mfd_dentry);
          mds_mfd_destroy(mfd);
-        RETURN(0);
-}
  
-static int mds_disconnect(struct lustre_handle *conn, int failover)
-{
-        struct obd_export *export = class_conn2export(conn);
-        int rc;
-        unsigned long flags;
-        ENTRY;
+        if (dparent)
+                l_dput(dparent);
  
-        ldlm_cancel_locks_for_export(export);
+        if (last_orphan) {
+                struct mds_obd *mds = &obd->u.mds;
+                struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+                struct dentry *pending_child = NULL;
+                void *handle;
  
-        spin_lock_irqsave(&export->exp_lock, flags);
-        export->exp_failover = failover;
-        spin_unlock_irqrestore(&export->exp_lock, flags);
+                CDEBUG(D_ERROR, "destroying orphan object %s\n", fidname);
  
-        rc = class_disconnect(conn, failover);
-        class_export_put(export);
+                /* Sadly, there is no easy way to save pending_child from
+                 * mds_reint_unlink() into mfd, so we need to re-lookup,
+                 * but normally it will still be in the dcache.
+                 */
+                down(&pending_dir->i_sem);
+                pending_child = lookup_one_len(fidname, mds->mds_pending_dir,
+                                               fidlen);
+                if (IS_ERR(pending_child))
+                        GOTO(out_lock, rc = PTR_ERR(pending_child));
+                LASSERT(pending_child->d_inode != NULL);
+
+                handle = fsfilt_start(obd, pending_dir, FSFILT_OP_UNLINK, NULL);
+                if (IS_ERR(handle))
+                        GOTO(out_dput, rc = PTR_ERR(handle));
+                rc = vfs_unlink(pending_dir, pending_child);
+                if (rc)
+                        CERROR("error unlinking orphan %s: rc %d\n",fidname,rc);
+
+                if (req) {
+                        rc = mds_finish_transno(mds, pending_dir, handle, req,
+                                                rc, 0);
+                } else {
+                        int err = fsfilt_commit(obd, pending_dir, handle, 0);
+                        if (err) {
+                                CERROR("error committing orphan unlink: %d\n",
+                                       err);
+                                if (!rc)
+                                        rc = err;
+                        }
+                }
+        out_dput:
+                dput(pending_child);
+        out_lock:
+                up(&pending_dir->i_sem);
+        }
  
          RETURN(rc);
  }
  
-static void mds_destroy_export(struct obd_export *export)
+static int mds_disconnect(struct lustre_handle *conn, int flags)
  {
+        struct obd_export *export = class_conn2export(conn);
          struct mds_export_data *med = &export->exp_mds_data;
-        struct list_head *tmp, *n;
+        struct obd_device *obd = export->exp_obd;
+        struct obd_run_ctxt saved;
          int rc;
-
          ENTRY;
-        LASSERT(!strcmp(export->exp_obd->obd_type->typ_name,
-                        LUSTRE_MDS_NAME));
  
-        /*
-         * Close any open files.
-         */
+        push_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+        /* Close any open files (which may also cause orphan unlinking). */
          spin_lock(&med->med_open_lock);
-        list_for_each_safe(tmp, n, &med->med_open_head) {
+        while (!list_empty(&med->med_open_head)) {
+                struct list_head *tmp = med->med_open_head.next;
                  struct mds_file_data *mfd =
                          list_entry(tmp, struct mds_file_data, mfd_list);
  #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+                /* bug 1579: fix force-closing for 2.5 */
                  struct dentry *dentry = mfd->mfd_dentry;
+
+                list_del(&mfd->mfd_list);
+                spin_unlock(&med->med_open_lock);
+
                  CERROR("force closing client file handle for %*s (%s:%lu)\n",
                         dentry->d_name.len, dentry->d_name.name,
                         kdevname(dentry->d_inode->i_sb->s_dev),
                         dentry->d_inode->i_ino);
+                rc = mds_mfd_close(NULL, obd, mfd);
  #endif
-                rc = mds_close_mfd(mfd, med);
                  if (rc)
                          CDEBUG(D_INODE, "Error closing file: %d\n", rc);
+                spin_lock(&med->med_open_lock);
          }
          spin_unlock(&med->med_open_lock);
+        pop_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
  
+        ldlm_cancel_locks_for_export(export);
          if (export->exp_outstanding_reply) {
                  struct ptlrpc_request *req = export->exp_outstanding_reply;
                  unsigned long          flags;
@@ -432,9 +469,13 @@ static void mds_destroy_export(struct obd_export *export)
                  export->exp_outstanding_reply = NULL;
          }
  
-        if (!export->exp_failover)
+        if (!(flags & OBD_OPT_FAILOVER))
                  mds_client_free(export);
-        EXIT;
+
+        rc = class_disconnect(conn, flags);
+        class_export_put(export);
+
+        RETURN(rc);
  }
  
  /*
@@ -448,14 +489,24 @@ static void mds_fsync_super(struct super_block *sb)
  {
          lock_kernel();
          lock_super(sb);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
          if (sb->s_dirt && sb->s_op && sb->s_op->write_super)
                  sb->s_op->write_super(sb);
+#else
+        if (sb->s_dirt && sb->s_op) {
+                if (sb->s_op->sync_fs)
+                        sb->s_op->sync_fs(sb, 1);
+                else if (sb->s_op->write_super)
+                        sb->s_op->write_super(sb);
+        }
+#endif
          unlock_super(sb);
          unlock_kernel();
  }
  
  static int mds_getstatus(struct ptlrpc_request *req)
  {
+        struct obd_device *obd = req->rq_export->exp_obd;
          struct mds_obd *mds = mds_req2mds(req);
          struct mds_body *body;
          int rc, size = sizeof(*body);
@@ -473,7 +524,7 @@ static int mds_getstatus(struct ptlrpc_request *req)
           * requests if they have any.  This would be fsync_super() if it
           * was exported.
           */
-        mds_fsync_super(mds->mds_sb);
+        fsfilt_sync(obd, mds->mds_sb);
  
          body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
          memcpy(&body->fid1, &mds->mds_rootfid, sizeof(body->fid1));
@@ -525,8 +576,7 @@ static int mds_getlovinfo(struct ptlrpc_request *req)
          memcpy(desc, &mds->mds_lov_desc, sizeof (*desc));
  
          tgt_count = mds->mds_lov_desc.ld_tgt_count;
-        uuid0 = lustre_msg_buf (req->rq_repmsg, 1,
-                                tgt_count * sizeof (*uuid0));
+        uuid0 = lustre_msg_buf(req->rq_repmsg, 1, tgt_count * sizeof (*uuid0));
          if (uuid0 == NULL) {
                  CERROR("too many targets, enlarge client buffers\n");
                  req->rq_status = -ENOSPC;
@@ -539,6 +589,8 @@ static int mds_getlovinfo(struct ptlrpc_request *req)
                  req->rq_status = rc;
                  RETURN(0);
          }
+        memcpy(&mds->mds_osc_uuid, &mds->mds_lov_desc.ld_uuid,
+               sizeof(mds->mds_osc_uuid));
          RETURN(0);
  }
  
@@ -616,8 +668,8 @@ int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg,
  
          rc = fsfilt_get_md(obd, inode, lmm, lmm_size);
          if (rc < 0) {
-                CERROR ("Error %d reading eadata for ino %lu\n",
-                        rc, inode->i_ino);
+                CERROR("Error %d reading eadata for ino %lu\n",
+                       rc, inode->i_ino);
          } else if (rc > 0) {
                  body->valid |= OBD_MD_FLEASIZE;
                  body->eadatasize = rc;
@@ -639,19 +691,22 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry,
          if (inode == NULL)
                  RETURN(-ENOENT);
  
-        body = lustre_msg_buf(req->rq_repmsg, reply_off, sizeof (*body));
-        LASSERT (body != NULL);                 /* caller prepped reply */
+        body = lustre_msg_buf(req->rq_repmsg, reply_off, sizeof(*body));
+        LASSERT(body != NULL);                 /* caller prepped reply */
  
          mds_pack_inode2fid(&body->fid1, inode);
          mds_pack_inode2body(body, inode);
  
-        if (S_ISREG(inode->i_mode) &&
-            (reqbody->valid & OBD_MD_FLEASIZE) != 0) {
-                rc = mds_pack_md(obd, req->rq_repmsg, reply_off + 1,
-                                 body, inode);
+        if (S_ISREG(inode->i_mode) && (reqbody->valid & OBD_MD_FLEASIZE) != 0) {
+                rc = mds_pack_md(obd, req->rq_repmsg, reply_off+1, body, inode);
+
+                /* If we have LOV EA data, the OST holds size, atime, mtime */
+                if (!(body->valid & OBD_MD_FLEASIZE))
+                        body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                        OBD_MD_FLATIME | OBD_MD_FLMTIME);
          } else if (S_ISLNK(inode->i_mode) &&
                     (reqbody->valid & OBD_MD_LINKNAME) != 0) {
-                char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1, 0);
+                char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1,0);
                  int len;
  
                  LASSERT (symname != NULL);       /* caller prepped reply */
@@ -672,6 +727,7 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry,
                          rc = 0;
                  }
          }
+
          RETURN(rc);
  }
  
@@ -684,11 +740,10 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
          ENTRY;
  
          body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body));
-        LASSERT (body != NULL);                 /* checked by caller */
-        LASSERT_REQSWABBED (req, offset);       /* swabbed by caller */
+        LASSERT(body != NULL);                 /* checked by caller */
+        LASSERT_REQSWABBED(req, offset);       /* swabbed by caller */
  
-        if (S_ISREG(inode->i_mode) &&
-            (body->valid & OBD_MD_FLEASIZE) != 0) {
+        if (S_ISREG(inode->i_mode) && (body->valid & OBD_MD_FLEASIZE)) {
                  int rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0);
                  CDEBUG(D_INODE, "got %d bytes MD data for inode %lu\n",
                         rc, inode->i_ino);
@@ -701,14 +756,14 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
                          size[bufcount] = 0;
                          CERROR("MD size %d larger than maximum possible %u\n",
                                 rc, mds->mds_max_mdsize);
-                } else
+                } else {
                          size[bufcount] = rc;
+                }
                  bufcount++;
-        } else if (S_ISLNK (inode->i_mode) &&
-                   (body->valid & OBD_MD_LINKNAME) != 0) {
+        } else if (S_ISLNK(inode->i_mode) && (body->valid & OBD_MD_LINKNAME)) {
                  if (inode->i_size + 1 != body->eadatasize)
-                        CERROR ("symlink size: %Lu, reply space: %d\n",
-                                inode->i_size + 1, body->eadatasize);
+                        CERROR("symlink size: %Lu, reply space: %d\n",
+                               inode->i_size + 1, body->eadatasize);
                  size[bufcount] = MIN(inode->i_size + 1, body->eadatasize);
                  bufcount++;
                  CDEBUG(D_INODE, "symlink size: %Lu, reply space: %d\n",
@@ -724,9 +779,8 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
          rc = lustre_pack_msg(bufcount, size, NULL, &req->rq_replen,
                               &req->rq_repmsg);
          if (rc) {
-                CERROR("out of memoryK\n");
-                req->rq_status = rc;
-                GOTO(out, rc);
+                CERROR("out of memory\n");
+                GOTO(out, req->rq_status = rc);
          }
  
          EXIT;
@@ -738,6 +792,8 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
  static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req,
                                       struct lustre_handle *client_lockh)
  {
+        struct mds_export_data *med = &req->rq_export->exp_mds_data;
+        struct mds_client_data *mcd = med->med_mcd;
          struct obd_device *obd = req->rq_export->exp_obd;
          struct mds_obd *mds = mds_req2mds(req);
          struct dentry *parent, *child;
@@ -748,8 +804,15 @@ static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req,
          int namelen, rc = 0;
          char *name;
  
-        if (req->rq_export->exp_outstanding_reply)
-                mds_steal_ack_locks(req->rq_export, req);
+        req->rq_transno = mcd->mcd_last_transno;
+        req->rq_status = mcd->mcd_last_result;
+
+        LASSERT (req->rq_export->exp_outstanding_reply);
+
+        mds_steal_ack_locks(req->rq_export, req);
+
+        if (req->rq_status)
+                return;
  
          body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body));
          LASSERT (body != NULL);                 /* checked by caller */
@@ -770,6 +833,7 @@ static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req,
          uc.ouc_cap = body->capability;
          uc.ouc_suppgid1 = body->suppgid;
          uc.ouc_suppgid2 = -1;
+
          push_ctxt(&saved, &mds->mds_ctxt, &uc);
          parent = mds_fid2dentry(mds, &body->fid1, NULL);
          LASSERT(!IS_ERR(parent));
@@ -785,7 +849,8 @@ static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req,
          }
  
          rc = mds_getattr_internal(obd, child, req, body, offset);
-        req->rq_status = rc;
+        /* XXX need to handle error here */
+        LASSERT(!rc);
          l_dput(child);
          l_dput(parent);
  }
@@ -795,6 +860,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req,
  {
          struct mds_obd *mds = mds_req2mds(req);
          struct obd_device *obd = req->rq_export->exp_obd;
+        struct ldlm_reply *rep = NULL;
          struct obd_run_ctxt saved;
          struct mds_body *body;
          struct dentry *de = NULL, *dchild = NULL;
@@ -803,7 +869,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req,
          struct ldlm_res_id child_res_id = { .name = {0} };
          struct lustre_handle parent_lockh;
          int namesize;
-        int flags = 0, rc = 0, cleanup_phase = 0, req_was_resent;
+        int flags = 0, rc = 0, cleanup_phase = 0;
          char *name;
          ENTRY;
  
@@ -811,34 +877,39 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req,
  
          /* Swab now, before anyone looks inside the request */
  
-        body = lustre_swab_reqbuf (req, offset, sizeof (*body),
-                                   lustre_swab_mds_body);
+        body = lustre_swab_reqbuf(req, offset, sizeof(*body),
+                                  lustre_swab_mds_body);
          if (body == NULL) {
-                CERROR ("Can't swab mds_body\n");
-                GOTO (cleanup, rc = -EFAULT);
+                CERROR("Can't swab mds_body\n");
+                GOTO(cleanup, rc = -EFAULT);
          }
  
-        LASSERT_REQSWAB (req, offset + 1);
-        name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0);
+        LASSERT_REQSWAB(req, offset + 1);
+        name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0);
          if (name == NULL) {
-                CERROR ("Can't unpack name\n");
-                GOTO (cleanup, rc = -EFAULT);
+                CERROR("Can't unpack name\n");
+                GOTO(cleanup, rc = -EFAULT);
          }
          namesize = req->rq_reqmsg->buflens[offset + 1];
  
-        req_was_resent = lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT;
-        if (child_lockh->cookie) {
-                LASSERT(req_was_resent);
-                reconstruct_getattr_name(offset, req, child_lockh);
-                RETURN(0);
-        } else if (req_was_resent) {
-                DEBUG_REQ(D_HA, req, "no reply for RESENT req");
+        if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
+                struct obd_export *exp = req->rq_export;
+                if (exp->exp_outstanding_reply &&
+                    exp->exp_outstanding_reply->rq_xid == req->rq_xid) {
+                        reconstruct_getattr_name(offset, req, child_lockh);
+                        RETURN(0);
+                }
+                DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")",
+                          exp->exp_outstanding_reply ?
+                          exp->exp_outstanding_reply->rq_xid : (u64)0);
          }
  
          LASSERT (offset == 0 || offset == 2);
-        /* if requests were at offset 2, replies go back at 1 */
-        if (offset)
+        /* if requests were at offset 2, the getattr reply goes back at 1 */
+        if (offset) { 
+                rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
                  offset = 1;
+        }
  
          uc.ouc_fsuid = body->fsuid;
          uc.ouc_fsgid = body->fsgid;
@@ -847,6 +918,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req,
          uc.ouc_suppgid2 = -1;
          push_ctxt(&saved, &mds->mds_ctxt, &uc);
          /* Step 1: Lookup/lock parent */
+        intent_set_disposition(rep, DISP_LOOKUP_EXECD);
          de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_PR,
                                     &parent_lockh);
          if (IS_ERR(de))
@@ -868,7 +940,10 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req,
          cleanup_phase = 2; /* child dentry */
  
          if (dchild->d_inode == NULL) {
+                intent_set_disposition(rep, DISP_LOOKUP_NEG);
                  GOTO(cleanup, rc = -ENOENT);
+        } else {
+                intent_set_disposition(rep, DISP_LOOKUP_POS);
          }
  
          /* Step 3: Lock child */
@@ -963,11 +1038,17 @@ out_pop:
          return rc;
  }
  
+
+static int mds_obd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                          unsigned long max_age)
+{
+        return fsfilt_statfs(obd, obd->u.mds.mds_sb, osfs);
+}
+
  static int mds_statfs(struct ptlrpc_request *req)
  {
          struct obd_device *obd = req->rq_export->exp_obd;
-        struct obd_statfs *osfs;
-        int rc, size = sizeof(*osfs);
+        int rc, size = sizeof(struct obd_statfs);
          ENTRY;
  
          rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
@@ -976,10 +1057,10 @@ static int mds_statfs(struct ptlrpc_request *req)
                  GOTO(out, rc);
          }
  
-        osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*osfs));
-        rc = fsfilt_statfs(obd, obd->u.mds.mds_sb, osfs);
+        /* We call this so that we can cache a bit - 1 jiffie worth */
+        rc = obd_statfs(obd, lustre_msg_buf(req->rq_repmsg,0,size),jiffies-HZ);
          if (rc) {
-                CERROR("mds: statfs failed: rc %d\n", rc);
+                CERROR("mds_obd_statfs failed: rc %d\n", rc);
                  GOTO(out, rc);
          }
  
@@ -1006,8 +1087,10 @@ static void reconstruct_close(struct ptlrpc_request *req)
  static int mds_close(struct ptlrpc_request *req)
  {
          struct mds_export_data *med = &req->rq_export->exp_mds_data;
+        struct obd_device *obd = req->rq_export->exp_obd;
          struct mds_body *body;
          struct mds_file_data *mfd;
+        struct obd_run_ctxt saved;
          int rc;
          ENTRY;
  
@@ -1028,10 +1111,20 @@ static int mds_close(struct ptlrpc_request *req)
                  RETURN(-ESTALE);
          }
  
+        rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc) {
+                CERROR("lustre_pack_msg: rc = %d\n", rc);
+                req->rq_status = rc;
+        }
+
          spin_lock(&med->med_open_lock);
-        req->rq_status = mds_close_mfd(mfd, med);
+        list_del(&mfd->mfd_list);
          spin_unlock(&med->med_open_lock);
  
+        push_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+        req->rq_status = mds_mfd_close(rc ? NULL : req, obd, mfd);
+        pop_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+
          if (OBD_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK)) {
                  CERROR("test case OBD_FAIL_MDS_CLOSE_PACK\n");
                  req->rq_status = -ENOMEM;
@@ -1039,12 +1132,6 @@ static int mds_close(struct ptlrpc_request *req)
                  RETURN(-ENOMEM);
          }
  
-        rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
-        if (rc) {
-                CERROR("mds: lustre_pack_msg: rc = %d\n", rc);
-                req->rq_status = rc;
-        }
-
          mds_mfd_put(mfd);
          RETURN(0);
  }
@@ -1073,7 +1160,7 @@ static int mds_readpage(struct ptlrpc_request *req)
                  GOTO (out, rc = -EFAULT);
  
          /* body->size is actually the offset -eeb */
-        if ((body->size & (PAGE_SIZE - 1)) != 0) {
+        if ((body->size & ~PAGE_MASK) != 0) {
                  CERROR ("offset "LPU64"not on a page boundary\n", body->size);
                  GOTO (out, rc = -EFAULT);
          }
@@ -1306,9 +1393,10 @@ int mds_handle(struct ptlrpc_request *req)
                  break;
  
          case MDS_REINT: {
-                __u32 *opcp = lustre_msg_buf (req->rq_reqmsg, 0, sizeof (*opcp));
+                __u32 *opcp = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*opcp));
                  __u32  opc;
-                int size[2] = {sizeof(struct mds_body), mds->mds_max_mdsize};
+                int size[3] = {sizeof(struct mds_body), mds->mds_max_mdsize,
+                               mds->mds_max_cookiesize};
                  int bufcount;
  
                  /* NB only peek inside req now; mds_reint() will swab it */
@@ -1319,15 +1407,18 @@ int mds_handle(struct ptlrpc_request *req)
                  }
                  opc = *opcp;
                  if (lustre_msg_swabbed (req->rq_reqmsg))
-                        __swab32s (&opc);
+                        __swab32s(&opc);
  
                  DEBUG_REQ(D_INODE, req, "reint %d (%s)", opc,
-                          (opc < sizeof (reint_names) / sizeof (reint_names[0]) ||
-                           reint_names[opc] == NULL) ? reint_names[opc] : "unknown opcode");
+                          (opc < sizeof(reint_names) / sizeof(reint_names[0]) ||
+                           reint_names[opc] == NULL) ? reint_names[opc] :
+                                                       "unknown opcode");
  
                  OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET, 0);
  
                  if (opc == REINT_UNLINK)
+                        bufcount = 3;
+                else if (opc == REINT_OPEN)
                          bufcount = 2;
                  else
                          bufcount = 1;
@@ -1348,11 +1439,23 @@ int mds_handle(struct ptlrpc_request *req)
                  rc = mds_close(req);
                  break;
  
+        case MDS_PIN:
+                DEBUG_REQ(D_INODE, req, "pin");
+                OBD_FAIL_RETURN(OBD_FAIL_MDS_PIN_NET, 0);
+                rc = mds_pin(req);
+                break;
+
          case OBD_PING:
                  DEBUG_REQ(D_INODE, req, "ping");
                  rc = target_handle_ping(req);
                  break;
  
+        case OBD_LOG_CANCEL:
+                CDEBUG(D_INODE, "log cancel\n");
+                OBD_FAIL_RETURN(OBD_FAIL_OBD_LOG_CANCEL_NET, 0);
+                rc = -ENOTSUPP; /* la la la */
+                break;
+
          case LDLM_ENQUEUE:
                  DEBUG_REQ(D_INODE, req, "enqueue");
                  OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
@@ -1385,7 +1488,7 @@ int mds_handle(struct ptlrpc_request *req)
                  struct obd_device *obd = list_entry(mds, struct obd_device,
                                                      u.mds);
                  req->rq_repmsg->last_xid =
-                        le64_to_cpu (med->med_mcd->mcd_last_xid);
+                        le64_to_cpu(med->med_mcd->mcd_last_xid);
  
                  if (!obd->obd_no_transno) {
                          req->rq_repmsg->last_committed =
@@ -1421,8 +1524,9 @@ int mds_handle(struct ptlrpc_request *req)
   *
   * Also assumes for mds_last_transno that we are not modifying it (no locking).
   */
-int mds_update_server_data(struct mds_obd *mds)
+int mds_update_server_data(struct obd_device *obd)
  {
+        struct mds_obd *mds = &obd->u.mds;
          struct mds_server_data *msd = mds->mds_server_data;
          struct file *filp = mds->mds_rcvd_filp;
          struct obd_run_ctxt saved;
@@ -1433,21 +1537,16 @@ int mds_update_server_data(struct mds_obd *mds)
          msd->msd_last_transno = cpu_to_le64(mds->mds_last_transno);
          msd->msd_mount_count = cpu_to_le64(mds->mds_mount_count);
  
-        CDEBUG(D_SUPER, "MDS mount_count is %Lu, last_transno is %Lu\n",
-               (unsigned long long)mds->mds_mount_count,
-               (unsigned long long)mds->mds_last_transno);
-        rc = lustre_fwrite(filp, (char *)msd, sizeof(*msd), &off);
+        CDEBUG(D_SUPER, "MDS mount_count is "LPU64", last_transno is "LPU64"\n",
+               mds->mds_mount_count, mds->mds_last_transno);
+        rc = fsfilt_write_record(obd, filp, (char *)msd, sizeof(*msd), &off);
          if (rc != sizeof(*msd)) {
                  CERROR("error writing MDS server data: rc = %d\n", rc);
                  if (rc > 0)
                          rc = -EIO;
                  GOTO(out, rc);
          }
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        rc = fsync_dev(filp->f_dentry->d_inode->i_rdev);
-#else
          rc = file_fsync(filp, filp->f_dentry, 1);
-#endif
          if (rc)
                  CERROR("error flushing MDS server data: rc = %d\n", rc);
  
@@ -1457,10 +1556,10 @@ out:
  }
  
  /* mount the file system (secretly) */
-static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
+static int mds_setup(struct obd_device *obd, obd_count len, void *buf)
  {
          struct obd_ioctl_data* data = buf;
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
          struct vfsmount *mnt;
          int rc = 0;
          unsigned long page;
@@ -1473,9 +1572,12 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
          if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2)
                  RETURN(rc = -EINVAL);
  
-        obddev->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2);
-        if (IS_ERR(obddev->obd_fsops))
-                RETURN(rc = PTR_ERR(obddev->obd_fsops));
+        if (data->ioc_inlbuf4)
+                obd_str2uuid(&mds->mds_osc_uuid, data->ioc_inlbuf4);
+
+        obd->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2);
+        if (IS_ERR(obd->obd_fsops))
+                RETURN(rc = PTR_ERR(obd->obd_fsops));
  
  
          if (data->ioc_inllen3 > 0 && data->ioc_inlbuf3) {
@@ -1511,73 +1613,93 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
  
          spin_lock_init(&mds->mds_transno_lock);
          mds->mds_max_mdsize = sizeof(struct lov_mds_md);
-        rc = mds_fs_setup(obddev, mnt);
+        mds->mds_max_cookiesize = sizeof(struct llog_cookie);
+        rc = mds_fs_setup(obd, mnt);
          if (rc) {
                  CERROR("MDS filesystem method init failed: rc = %d\n", rc);
                  GOTO(err_put, rc);
          }
  
-        obddev->obd_namespace =
-                ldlm_namespace_new("mds_server", LDLM_NAMESPACE_SERVER);
-        if (obddev->obd_namespace == NULL) {
-                mds_cleanup(obddev, 0, 0);
-                GOTO(err_fs, rc = -ENOMEM);
+#ifdef ENABLE_ORPHANS
+        rc = llog_start_commit_thread();
+        if (rc < 0)
+                GOTO(err_fs, rc);
+#endif
+
+#ifdef ENABLE_ORPHANS
+        mds->mds_catalog = mds_get_catalog(obd);
+        if (IS_ERR(mds->mds_catalog))
+                GOTO(err_fs, rc = PTR_ERR(mds->mds_catalog));
+#endif
+
+        obd->obd_namespace = ldlm_namespace_new("mds_server",
+                                                LDLM_NAMESPACE_SERVER);
+        if (obd->obd_namespace == NULL) {
+                mds_cleanup(obd, 0);
+                GOTO(err_log, rc = -ENOMEM);
          }
  
          ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
-                           "mds_ldlm_client", &obddev->obd_ldlm_client);
+                           "mds_ldlm_client", &obd->obd_ldlm_client);
  
          mds->mds_has_lov_desc = 0;
+        obd->obd_replayable = 1;
  
          RETURN(0);
  
+err_log:
+#ifdef ENABLE_ORPHANS
+        mds_put_catalog(mds->mds_catalog);
+        /* No extra cleanup needed for llog_init_commit_thread() */
  err_fs:
-        mds_fs_cleanup(obddev, 0);
+#endif
+        mds_fs_cleanup(obd, 0);
  err_put:
          unlock_kernel();
          mntput(mds->mds_vfsmnt);
          mds->mds_sb = 0;
          lock_kernel();
  err_ops:
-        fsfilt_put_ops(obddev->obd_fsops);
+        fsfilt_put_ops(obd->obd_fsops);
          return rc;
  }
  
-static int mds_cleanup(struct obd_device *obddev, int force, int failover)
+static int mds_cleanup(struct obd_device *obd, int flags)
  {
-        struct super_block *sb;
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
          ENTRY;
  
-        sb = mds->mds_sb;
-        if (!mds->mds_sb)
+        if (mds->mds_sb == NULL)
                  RETURN(0);
  
-        mds_update_server_data(mds);
-        mds_fs_cleanup(obddev, failover);
+#ifdef ENABLE_ORPHANS
+        mds_put_catalog(mds->mds_catalog);
+#endif
+        if (mds->mds_osc_obd)
+                obd_disconnect(&mds->mds_osc_conn, flags);
+        mds_update_server_data(obd);
+        mds_fs_cleanup(obd, flags);
  
          unlock_kernel();
  
          /* 2 seems normal on mds, (may_umount() also expects 2
            fwiw), but we only see 1 at this point in obdfilter. */
-        if (atomic_read(&obddev->u.mds.mds_vfsmnt->mnt_count) > 2){
-                CERROR("%s: mount point busy, mnt_count: %d\n",
-                       obddev->obd_name,
-                       atomic_read(&obddev->u.mds.mds_vfsmnt->mnt_count));
-        }
+        if (atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count) > 2)
+                CERROR("%s: mount point busy, mnt_count: %d\n", obd->obd_name,
+                       atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count));
  
          mntput(mds->mds_vfsmnt);
          mds->mds_sb = 0;
  
-        ldlm_namespace_free(obddev->obd_namespace);
+        ldlm_namespace_free(obd->obd_namespace);
  
-        if (obddev->obd_recovering)
-                target_cancel_recovery_timer(obddev);
+        if (obd->obd_recovering)
+                target_cancel_recovery_timer(obd);
          lock_kernel();
  #ifdef CONFIG_DEV_RDONLY
          dev_clear_rdonly(2);
  #endif
-        fsfilt_put_ops(obddev->obd_fsops);
+        fsfilt_put_ops(obd->obd_fsops);
  
          RETURN(0);
  }
@@ -1616,13 +1738,26 @@ static void fixup_handle_for_resent_req(struct ptlrpc_request *req,
                    remote_hdl.cookie);
  }
  
+int intent_disposition(struct ldlm_reply *rep, int flag)
+{
+        if (!rep)
+                return 0;
+        return (rep->lock_policy_res1 & flag);
+}
+
+void intent_set_disposition(struct ldlm_reply *rep, int flag)
+{
+        if (!rep)
+                return;
+        rep->lock_policy_res1 |= flag;
+}
+
  static int ldlm_intent_policy(struct ldlm_namespace *ns,
                                struct ldlm_lock **lockp, void *req_cookie,
                                ldlm_mode_t mode, int flags, void *data)
  {
          struct ptlrpc_request *req = req_cookie;
          struct ldlm_lock *lock = *lockp;
-        int rc = 0;
          ENTRY;
  
          if (!req_cookie)
@@ -1632,34 +1767,33 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns,
                  /* an intent needs to be considered */
                  struct ldlm_intent *it;
                  struct mds_obd *mds = &req->rq_export->exp_obd->u.mds;
-                struct mds_body *mds_body;
                  struct ldlm_reply *rep;
-                struct lustre_handle lockh = { 0 };
+                struct lustre_handle lockh;
                  struct ldlm_lock *new_lock;
-                int rc, offset = 2, repsize[3] = {sizeof(struct ldlm_reply),
-                                                  sizeof(struct mds_body),
-                                                  mds->mds_max_mdsize};
+                int offset = 2, repsize[4] = {sizeof(struct ldlm_reply),
+                                              sizeof(struct mds_body),
+                                              mds->mds_max_mdsize,
+                                              mds->mds_max_cookiesize};
  
-                it = lustre_swab_reqbuf (req, 1, sizeof (*it),
-                                         lustre_swab_ldlm_intent);
+                it = lustre_swab_reqbuf(req, 1, sizeof (*it),
+                                        lustre_swab_ldlm_intent);
                  if (it == NULL) {
                          CERROR ("Intent missing\n");
-                        rc = req->rq_status = -EFAULT;
-                        RETURN (rc);
+                        req->rq_status = -EFAULT;
+                        RETURN(req->rq_status);
                  }
  
                  LDLM_DEBUG(lock, "intent policy, opc: %s",
                             ldlm_it2str(it->opc));
  
-                rc = lustre_pack_msg(3, repsize, NULL, &req->rq_replen,
-                                     &req->rq_repmsg);
-                if (rc) {
-                        rc = req->rq_status = -ENOMEM;
-                        RETURN(rc);
-                }
+                req->rq_status = lustre_pack_msg(it->opc == IT_UNLINK ? 4 : 3,
+                                                 repsize, NULL, &req->rq_replen,
+                                                 &req->rq_repmsg);
+                if (req->rq_status)
+                        RETURN(req->rq_status);
  
                  rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
-                rep->lock_policy_res1 = IT_INTENT_EXEC;
+                intent_set_disposition(rep, DISP_IT_EXECD);
  
                  fixup_handle_for_resent_req(req, lock, &lockh);
  
@@ -1667,45 +1801,28 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns,
                  switch ((long)it->opc) {
                  case IT_OPEN:
                  case IT_CREAT|IT_OPEN:
-                        rc = mds_reint(req, offset, &lockh);
-                        /* We return a dentry to the client if IT_OPEN_POS is
-                         * set, or if we make it to the OPEN portion of the
-                         * programme (which implies that we created) */
-                        if (!(rep->lock_policy_res1 & IT_OPEN_POS ||
-                              rep->lock_policy_res1 & IT_OPEN_OPEN)) {
-                                rep->lock_policy_res2 = rc;
+                        /* XXX swab here to assert that an mds_open reint
+                         * packet is following */
+                        rep->lock_policy_res2 = mds_reint(req, offset, &lockh);
+                        /* We abort the lock if the lookup was negative and
+                         * we did not make it to the OPEN portion */
+                        if (intent_disposition(rep, DISP_LOOKUP_NEG) &&
+                            !intent_disposition(rep, DISP_OPEN_OPEN))
                                  RETURN(ELDLM_LOCK_ABORTED);
-                        }
-                        break;
-                case IT_UNLINK:
-                        rc = mds_reint(req, offset, &lockh);
-                        /* Don't return a lock if the unlink failed, or if we're
-                         * not sending back an EA */
-                        if (rc) {
-                                rep->lock_policy_res2 = rc;
-                                RETURN(ELDLM_LOCK_ABORTED);
-                        }
-                        if (req->rq_status != 0) {
-                                rep->lock_policy_res2 = req->rq_status;
-                                RETURN(ELDLM_LOCK_ABORTED);
-                        }
-                        mds_body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*mds_body));
-                        if (!(mds_body->valid & OBD_MD_FLEASIZE)) {
-                                rep->lock_policy_res2 = rc;
-                                RETURN(ELDLM_LOCK_ABORTED);
-                        }
                          break;
                  case IT_GETATTR:
                  case IT_LOOKUP:
                  case IT_READDIR:
-                        rc = mds_getattr_name(offset, req, &lockh);
+                        rep->lock_policy_res2 = mds_getattr_name(offset, req,
+                                                                 &lockh);
                          /* FIXME: we need to sit down and decide on who should
                           * set req->rq_status, who should return negative and
-                         * positive return values, and what they all mean. */
-                        if (rc) {
-                                rep->lock_policy_res2 = rc;
+                         * positive return values, and what they all mean. 
+                         * - replay: returns 0 & req->status is old status
+                         * - otherwise: returns req->status */
+                        if (!intent_disposition(rep, DISP_LOOKUP_POS) || 
+                            rep->lock_policy_res2)
                                  RETURN(ELDLM_LOCK_ABORTED);
-                        }
                          if (req->rq_status != 0) {
                                  rep->lock_policy_res2 = req->rq_status;
                                  RETURN(ELDLM_LOCK_ABORTED);
@@ -1717,10 +1834,17 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns,
                  }
  
                  /* By this point, whatever function we called above must have
-                 * filled in 'lockh' or returned an error.  We want to give the
-                 * new lock to the client instead of whatever lock it was about
-                 * to get. */
+                 * either filled in 'lockh', been an intent replay, or returned
+                 * an error.  We want to allow replayed RPCs to not get a lock,
+                 * since we would just drop it below anyways because lock replay
+                 * is done separately by the client afterwards.  For regular
+                 * RPCs we want to give the new lock to the client instead of
+                 * whatever lock it was about to get.
+                 */
                  new_lock = ldlm_handle2lock(&lockh);
+                if (flags & LDLM_FL_INTENT_ONLY && !new_lock)
+                        RETURN(ELDLM_LOCK_ABORTED);
+
                  LASSERT(new_lock != NULL);
  
                  /* If we've already given this lock to a client once, then we
@@ -1785,14 +1909,13 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns,
                  RETURN(ELDLM_LOCK_REPLACED);
          } else {
                  int size = sizeof(struct ldlm_reply);
-                rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
-                                     &req->rq_repmsg);
-                if (rc) {
+                if (lustre_pack_msg(1, &size, NULL, &req->rq_replen,
+                                    &req->rq_repmsg)) {
                          LBUG();
                          RETURN(-ENOMEM);
                  }
          }
-        RETURN(rc);
+        RETURN(0);
  }
  
  int mds_attach(struct obd_device *dev, obd_count len, void *data)
@@ -1906,7 +2029,7 @@ err_thread:
  }
  
  
-static int mdt_cleanup(struct obd_device *obddev, int force, int failover)
+static int mdt_cleanup(struct obd_device *obddev, int flags)
  {
          struct mds_obd *mds = &obddev->u.mds;
          ENTRY;
@@ -1928,15 +2051,15 @@ extern int mds_iocontrol(unsigned int cmd, struct lustre_handle *conn,
  
  /* use obd ops to offer management infrastructure */
  static struct obd_ops mds_obd_ops = {
-        o_owner:          THIS_MODULE,
-        o_attach:         mds_attach,
-        o_detach:         mds_detach,
-        o_connect:        mds_connect,
-        o_disconnect:     mds_disconnect,
-        o_setup:          mds_setup,
-        o_cleanup:        mds_cleanup,
-        o_iocontrol:      mds_iocontrol,
-        o_destroy_export: mds_destroy_export
+        o_owner:       THIS_MODULE,
+        o_attach:      mds_attach,
+        o_detach:      mds_detach,
+        o_connect:     mds_connect,
+        o_disconnect:  mds_disconnect,
+        o_setup:       mds_setup,
+        o_cleanup:     mds_cleanup,
+        o_statfs:      mds_obd_statfs,
+        o_iocontrol:   mds_iocontrol
  };
  
  static struct obd_ops mdt_obd_ops = {
@@ -1961,7 +2084,7 @@ static int __init mds_init(void)
          return 0;
  }
  
-static void __exit mds_exit(void)
+static void /*__exit*/ mds_exit(void)
  {
          ldlm_unregister_intent();
          class_unregister_type(LUSTRE_MDS_NAME);
diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c

index 5d6fa57..e355415 100644 (file)
--- a/lustre/mds/lproc_mds.c
+++ b/lustre/mds/lproc_mds.c
@@ -37,71 +37,43 @@ struct lprocfs_vars lprocfs_mdt_module_vars[] = { {0} };
  
  #else
  
-static inline int lprocfs_mds_statfs(void *data, struct statfs *sfs)
-{
-        struct obd_device* dev = (struct obd_device*) data;
-        struct mds_obd *mds;
-
-        LASSERT(dev != NULL);
-        mds = &dev->u.mds;
-        return vfs_statfs(mds->mds_sb, sfs);
-}
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  lprocfs_mds_statfs);
-
-int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
-              void *data)
-{
-        struct obd_device *obd = (struct obd_device *)data;
-
-        LASSERT(obd != NULL);
-        LASSERT(obd->obd_fsops != NULL);
-        LASSERT(obd->obd_fsops->fs_type != NULL);
-        return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
-}
-
-int lprocfs_mds_rd_mntdev(char *page, char **start, off_t off, int count,
-                          int *eof, void *data)
+static int lprocfs_mds_rd_mntdev(char *page, char **start, off_t off, int count,
+                                 int *eof, void *data)
  {
          struct obd_device* obd = (struct obd_device *)data;
  
          LASSERT(obd != NULL);
          LASSERT(obd->u.mds.mds_vfsmnt->mnt_devname);
          *eof = 1;
-        return snprintf(page, count, "%s\n",
-                        obd->u.mds.mds_vfsmnt->mnt_devname);
+
+        return snprintf(page, count, "%s\n",obd->u.mds.mds_vfsmnt->mnt_devname);
  }
  
  struct lprocfs_vars lprocfs_mds_obd_vars[] = {
-        { "uuid",       lprocfs_rd_uuid, 0, 0 },
-        { "blocksize",  rd_blksize,      0, 0 },
-        { "kbytestotal",rd_kbytestotal,  0, 0 },
-        { "kbytesfree", rd_kbytesfree,   0, 0 },
-        { "fstype",     rd_fstype,       0, 0 },
-        { "filestotal", rd_filestotal,   0, 0 },
-        { "filesfree",  rd_filesfree,    0, 0 },
-        { "filegroups", rd_filegroups,   0, 0 },
-        { "mntdev",     lprocfs_mds_rd_mntdev,    0, 0 },
+        { "uuid",         lprocfs_rd_uuid,        0, 0 },
+        { "blocksize",    lprocfs_rd_blksize,     0, 0 },
+        { "kbytestotal",  lprocfs_rd_kbytestotal, 0, 0 },
+        { "kbytesfree",   lprocfs_rd_kbytesfree,  0, 0 },
+        { "fstype",       lprocfs_rd_fstype,      0, 0 },
+        { "filestotal",   lprocfs_rd_filestotal,  0, 0 },
+        { "filesfree",    lprocfs_rd_filesfree,   0, 0 },
+        //{ "filegroups",   lprocfs_rd_filegroups,  0, 0 },
+        { "mntdev",       lprocfs_mds_rd_mntdev,  0, 0 },
          { 0 }
  };
  
  struct lprocfs_vars lprocfs_mds_module_vars[] = {
-        { "num_refs",   lprocfs_rd_numrefs, 0, 0 },
+        { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
          { 0 }
  };
  
  struct lprocfs_vars lprocfs_mdt_obd_vars[] = {
-        { "uuid",       lprocfs_rd_uuid, 0, 0 },
+        { "uuid",         lprocfs_rd_uuid,        0, 0 },
          { 0 }
  };
  
  struct lprocfs_vars lprocfs_mdt_module_vars[] = {
-        { "num_refs",   lprocfs_rd_numrefs, 0, 0 },
+        { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
          { 0 }
  };
  
diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c

index cefc680..56346ca 100644 (file)
--- a/lustre/mds/mds_fs.c
+++ b/lustre/mds/mds_fs.c
@@ -37,6 +37,9 @@
  #include <linux/obd_support.h>
  #include <linux/lustre_lib.h>
  #include <linux/lustre_fsfilt.h>
+#include <portals/list.h>
+
+#include "mds_internal.h"
  
  /* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
  #define MDS_MAX_CLIENTS (PAGE_SIZE * 8)
@@ -50,10 +53,10 @@
   * we know its offset.
   */
  int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
-                   struct mds_export_data *med, int cl_off)
+                   struct mds_export_data *med, int cl_idx)
  {
          unsigned long *bitmap = mds->mds_client_bitmap;
-        int new_client = (cl_off == -1);
+        int new_client = (cl_idx == -1);
  
          LASSERT(bitmap != NULL);
  
@@ -61,39 +64,40 @@ int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
          if (!strcmp(med->med_mcd->mcd_uuid, "OBD_CLASS_UUID"))
                  RETURN(0);
  
-        /* the bitmap operations can handle cl_off > sizeof(long) * 8, so
+        /* the bitmap operations can handle cl_idx > sizeof(long) * 8, so
           * there's no need for extra complication here
           */
          if (new_client) {
-                cl_off = find_first_zero_bit(bitmap, MDS_MAX_CLIENTS);
+                cl_idx = find_first_zero_bit(bitmap, MDS_MAX_CLIENTS);
          repeat:
-                if (cl_off >= MDS_MAX_CLIENTS) {
+                if (cl_idx >= MDS_MAX_CLIENTS) {
                          CERROR("no room for clients - fix MDS_MAX_CLIENTS\n");
                          return -ENOMEM;
                  }
-                if (test_and_set_bit(cl_off, bitmap)) {
+                if (test_and_set_bit(cl_idx, bitmap)) {
                          CERROR("MDS client %d: found bit is set in bitmap\n",
-                               cl_off);
-                        cl_off = find_next_zero_bit(bitmap, MDS_MAX_CLIENTS,
-                                                    cl_off);
+                               cl_idx);
+                        cl_idx = find_next_zero_bit(bitmap, MDS_MAX_CLIENTS,
+                                                    cl_idx);
                          goto repeat;
                  }
          } else {
-                if (test_and_set_bit(cl_off, bitmap)) {
+                if (test_and_set_bit(cl_idx, bitmap)) {
                          CERROR("MDS client %d: bit already set in bitmap!!\n",
-                               cl_off);
+                               cl_idx);
                          LBUG();
                  }
          }
  
-        CDEBUG(D_INFO, "client at offset %d with UUID '%s' added\n",
-               cl_off, med->med_mcd->mcd_uuid);
+        CDEBUG(D_INFO, "client at index %d with UUID '%s' added\n",
+               cl_idx, med->med_mcd->mcd_uuid);
  
-        med->med_off = cl_off;
+        med->med_idx = cl_idx;
+        med->med_off = MDS_LR_CLIENT_START + (cl_idx * MDS_LR_CLIENT_SIZE);
  
          if (new_client) {
                  struct obd_run_ctxt saved;
-                loff_t off = MDS_LR_CLIENT + (cl_off * MDS_LR_SIZE);
+                loff_t off = med->med_off;
                  ssize_t written;
                  void *handle;
  
@@ -114,14 +118,16 @@ int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
                   * could use any of them, or maybe an FSFILT_OP_NONE is best?
                   */
                  handle = fsfilt_start(obd,mds->mds_rcvd_filp->f_dentry->d_inode,
-                                      FSFILT_OP_SETATTR);
+                                      FSFILT_OP_SETATTR, NULL);
                  if (IS_ERR(handle)) {
                          written = PTR_ERR(handle);
                          CERROR("unable to start transaction: rc %d\n",
                                 (int)written);
                  } else {
-                        written = lustre_fwrite(mds->mds_rcvd_filp,med->med_mcd,
-                                                sizeof(*med->med_mcd), &off);
+                        written = fsfilt_write_record(obd, mds->mds_rcvd_filp,
+                                                      (char *)med->med_mcd,
+                                                      sizeof(*med->med_mcd),
+                                                      &off);
                          fsfilt_commit(obd,mds->mds_rcvd_filp->f_dentry->d_inode,
                                        handle, 0);
                  }
@@ -132,8 +138,8 @@ int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
                                  RETURN(written);
                          RETURN(-EIO);
                  }
-                CDEBUG(D_INFO, "wrote client mcd at off %u (len %u)\n",
-                       MDS_LR_CLIENT + (cl_off * MDS_LR_SIZE),
+                CDEBUG(D_INFO, "wrote client mcd at idx %u off %llu (len %u)\n",
+                       med->med_idx, med->med_off,
                         (unsigned int)sizeof(*med->med_mcd));
          }
          return 0;
@@ -143,11 +149,11 @@ int mds_client_free(struct obd_export *exp)
  {
          struct mds_export_data *med = &exp->exp_mds_data;
          struct mds_obd *mds = &exp->exp_obd->u.mds;
+        struct obd_device *obd = exp->exp_obd;
          struct mds_client_data zero_mcd;
          struct obd_run_ctxt saved;
          int written;
          unsigned long *bitmap = mds->mds_client_bitmap;
-        loff_t off;
  
          LASSERT(bitmap);
          if (!med->med_mcd)
@@ -157,30 +163,29 @@ int mds_client_free(struct obd_export *exp)
          if (!strcmp(med->med_mcd->mcd_uuid, "OBD_CLASS_UUID"))
                  GOTO(free_and_out, 0);
  
-        off = MDS_LR_CLIENT + (med->med_off * MDS_LR_SIZE);
-
-        CDEBUG(D_INFO, "freeing client at offset %u (%lld)with UUID '%s'\n",
-               med->med_off, off, med->med_mcd->mcd_uuid);
+        CDEBUG(D_INFO, "freeing client at index %u (%lld)with UUID '%s'\n",
+               med->med_idx, med->med_off, med->med_mcd->mcd_uuid);
  
-        if (!test_and_clear_bit(med->med_off, bitmap)) {
+        if (!test_and_clear_bit(med->med_idx, bitmap)) {
                  CERROR("MDS client %u: bit already clear in bitmap!!\n",
-                       med->med_off);
+                       med->med_idx);
                  LBUG();
          }
  
          memset(&zero_mcd, 0, sizeof zero_mcd);
          push_ctxt(&saved, &mds->mds_ctxt, NULL);
-        written = lustre_fwrite(mds->mds_rcvd_filp, (const char *)&zero_mcd,
-                                sizeof(zero_mcd), &off);
+        written = fsfilt_write_record(obd, mds->mds_rcvd_filp,
+                                      (char *)&zero_mcd, sizeof(zero_mcd),
+                                      &med->med_off);
          pop_ctxt(&saved, &mds->mds_ctxt, NULL);
  
          if (written != sizeof(zero_mcd)) {
-                CERROR("error zeroing out client %s off %d in %s: %d\n",
-                       med->med_mcd->mcd_uuid, med->med_off, LAST_RCVD,
+                CERROR("error zeroing out client %s index %d in %s: %d\n",
+                       med->med_mcd->mcd_uuid, med->med_idx, LAST_RCVD,
                         written);
          } else {
                  CDEBUG(D_INFO, "zeroed out disconnecting client %s at off %d\n",
-                       med->med_mcd->mcd_uuid, med->med_off);
+                       med->med_mcd->mcd_uuid, med->med_idx);
          }
  
   free_and_out:
@@ -199,20 +204,20 @@ static int mds_server_free_data(struct mds_obd *mds)
          return 0;
  }
  
-static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
+static int mds_read_last_rcvd(struct obd_device *obd, struct file *file)
  {
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
          struct mds_server_data *msd;
          struct mds_client_data *mcd = NULL;
          loff_t off = 0;
-        int cl_off;
-        unsigned long last_rcvd_size = f->f_dentry->d_inode->i_size;
+        int cl_idx;
+        unsigned long last_rcvd_size = file->f_dentry->d_inode->i_size;
          __u64 last_transno = 0;
-        __u64 last_mount;
+        __u64 mount_count;
          int rc = 0;
  
-        LASSERT(sizeof(struct mds_client_data) == MDS_LR_SIZE);
-        LASSERT(sizeof(struct mds_server_data) <= MDS_LR_CLIENT);
+        LASSERT(sizeof(struct mds_client_data) == MDS_LR_CLIENT_SIZE);
+        LASSERT(sizeof(struct mds_server_data) <= MDS_LR_SERVER_SIZE);
  
          OBD_ALLOC(msd, sizeof(*msd));
          if (!msd)
@@ -225,40 +230,71 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
                  RETURN(-ENOMEM);
          }
  
-        rc = lustre_fread(f, (char *)msd, sizeof(*msd), &off);
-
          mds->mds_server_data = msd;
-        if (rc == 0) {
-                CERROR("%s: empty MDS %s, new MDS?\n", obddev->obd_name,
-                       LAST_RCVD);
+
+        if (last_rcvd_size == 0) {
+                CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD);
+                memcpy(msd->msd_uuid, obd->obd_uuid.uuid,sizeof(msd->msd_uuid));
+                msd->msd_server_size = cpu_to_le32(MDS_LR_SERVER_SIZE);
+                msd->msd_client_start = cpu_to_le32(MDS_LR_CLIENT_START);
+                msd->msd_client_size = cpu_to_le16(MDS_LR_CLIENT_SIZE);
+
                  RETURN(0);
          }
  
+        rc = fsfilt_read_record(obd, file, (char *)msd, sizeof(*msd), &off);
+
          if (rc != sizeof(*msd)) {
-                CERROR("error reading MDS %s: rc = %d\n", LAST_RCVD, rc);
+                CERROR("error reading MDS %s: rc = %d\n", LAST_RCVD,rc);
                  if (rc > 0)
                          rc = -EIO;
                  GOTO(err_msd, rc);
          }
+        if (!msd->msd_server_size)
+                msd->msd_server_size = cpu_to_le32(MDS_LR_SERVER_SIZE);
+        if (!msd->msd_client_start)
+                msd->msd_client_start = cpu_to_le32(MDS_LR_CLIENT_START);
+        if (!msd->msd_client_size)
+                msd->msd_client_size = cpu_to_le16(MDS_LR_CLIENT_SIZE);
+
+        if (msd->msd_feature_incompat) {
+                CERROR("unsupported incompat feature %x\n",
+                       le32_to_cpu(msd->msd_feature_incompat));
+                GOTO(err_msd, rc = -EINVAL);
+        }
+        if (msd->msd_feature_rocompat) {
+                CERROR("unsupported read-only feature %x\n",
+                       le32_to_cpu(msd->msd_feature_rocompat));
+                /* Do something like remount filesystem read-only */
+                GOTO(err_msd, rc = -EINVAL);
+        }
  
-        CDEBUG(D_INODE, "last_rcvd has size %lu (msd + %lu clients)\n",
-               last_rcvd_size, (last_rcvd_size - MDS_LR_CLIENT)/MDS_LR_SIZE);
-
-        /*
-         * When we do a clean MDS shutdown, we save the last_transno into
-         * the header.
-         */
          last_transno = le64_to_cpu(msd->msd_last_transno);
          mds->mds_last_transno = last_transno;
-        CDEBUG(D_INODE, "got "LPU64" for server last_rcvd value\n",
-               last_transno);
-
-        last_mount = le64_to_cpu(msd->msd_mount_count);
-        mds->mds_mount_count = last_mount;
-        CDEBUG(D_INODE, "got "LPU64" for server last_mount value\n",last_mount);
  
-        /* off is adjusted by lustre_fread, so we don't adjust it in the loop */
-        for (off = MDS_LR_CLIENT, cl_off = 0; off < last_rcvd_size; cl_off++) {
+        mount_count = le64_to_cpu(msd->msd_mount_count);
+        mds->mds_mount_count = mount_count;
+
+        CDEBUG(D_INODE, "%s: server last_transno: "LPU64"\n",
+               obd->obd_name, last_transno);
+        CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n",
+               obd->obd_name, mount_count);
+        CDEBUG(D_INODE, "%s: server data size: %u\n",
+               obd->obd_name, le32_to_cpu(msd->msd_server_size));
+        CDEBUG(D_INODE, "%s: per-client data start: %u\n",
+               obd->obd_name, le32_to_cpu(msd->msd_client_start));
+        CDEBUG(D_INODE, "%s: per-client data size: %u\n",
+               obd->obd_name, le32_to_cpu(msd->msd_client_size));
+        CDEBUG(D_INODE, "%s: last_rcvd size: %lu\n",
+               obd->obd_name, last_rcvd_size);
+        CDEBUG(D_INODE, "%s: last_rcvd clients: %lu\n", obd->obd_name,
+               (last_rcvd_size - MDS_LR_CLIENT_START) / MDS_LR_CLIENT_SIZE);
+
+        /* When we do a clean FILTER shutdown, we save the last_transno into
+         * the header.  If we find clients with higher last_transno values
+         * then those clients may need recovery done. */
+        for (cl_idx = 0; off < last_rcvd_size; cl_idx++) {
+                __u64 last_transno;
                  int mount_age;
  
                  if (!mcd) {
@@ -267,10 +303,16 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
                                  GOTO(err_msd, rc = -ENOMEM);
                  }
  
-                rc = lustre_fread(f, (char *)mcd, sizeof(*mcd), &off);
+                /* Don't assume off is incremented properly, in case
+                 * sizeof(fsd) isn't the same as fsd->fsd_client_size.
+                 */
+                off = le32_to_cpu(msd->msd_client_start) +
+                        cl_idx * le16_to_cpu(msd->msd_client_size);
+                rc = fsfilt_read_record(obd, file, (char *)mcd,
+                                        sizeof(*mcd), &off);
                  if (rc != sizeof(*mcd)) {
                          CERROR("error reading MDS %s offset %d: rc = %d\n",
-                               LAST_RCVD, cl_off, rc);
+                               LAST_RCVD, cl_idx, rc);
                          if (rc > 0) /* XXX fatal error or just abort reading? */
                                  rc = -EIO;
                          break;
@@ -278,7 +320,7 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
  
                  if (mcd->mcd_uuid[0] == '\0') {
                          CDEBUG(D_INFO, "skipping zeroed client at offset %d\n",
-                               cl_off);
+                               cl_idx);
                          continue;
                  }
  
@@ -287,10 +329,15 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
                  /* These exports are cleaned up by mds_disconnect(), so they
                   * need to be set up like real exports as mds_connect() does.
                   */
-                mount_age = last_mount - le64_to_cpu(mcd->mcd_mount_count);
+                mount_age = mount_count - le64_to_cpu(mcd->mcd_mount_count);
                  if (mount_age < MDS_MOUNT_RECOV) {
-                        struct obd_export *exp = class_new_export(obddev);
+                        struct obd_export *exp = class_new_export(obd);
                          struct mds_export_data *med;
+                        CERROR("RCVRNG CLIENT uuid: %s off: %d lr: "LPU64
+                               "srv lr: "LPU64" mnt: "LPU64" last mount: "LPU64
+                               "\n", mcd->mcd_uuid, cl_idx,
+                               last_transno, le64_to_cpu(msd->msd_last_transno),
+                               le64_to_cpu(mcd->mcd_mount_count), mount_count);
  
                          if (!exp) {
                                  rc = -ENOMEM;
@@ -301,35 +348,35 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
                                 sizeof exp->exp_client_uuid.uuid);
                          med = &exp->exp_mds_data;
                          med->med_mcd = mcd;
-                        mds_client_add(obddev, mds, med, cl_off);
+                        mds_client_add(obd, mds, med, cl_idx);
                          /* create helper if export init gets more complex */
                          INIT_LIST_HEAD(&med->med_open_head);
                          spin_lock_init(&med->med_open_lock);
  
                          mcd = NULL;
-                        obddev->obd_recoverable_clients++;
+                        obd->obd_recoverable_clients++;
                          class_export_put(exp);
                  } else {
                          CDEBUG(D_INFO, "discarded client %d, UUID '%s', count "
-                               LPU64"\n", cl_off, mcd->mcd_uuid,
+                               LPU64"\n", cl_idx, mcd->mcd_uuid,
                                 le64_to_cpu(mcd->mcd_mount_count));
                  }
  
-                CDEBUG(D_OTHER, "client at offset %d has last_transno = %Lu\n",
-                       cl_off, (unsigned long long)last_transno);
+                CDEBUG(D_OTHER, "client at offset %d has last_transno = "
+                       LPU64"\n", cl_idx, last_transno);
  
                  if (last_transno > mds->mds_last_transno)
                          mds->mds_last_transno = last_transno;
          }
  
-        obddev->obd_last_committed = mds->mds_last_transno;
-        if (obddev->obd_recoverable_clients) {
+        obd->obd_last_committed = mds->mds_last_transno;
+        if (obd->obd_recoverable_clients) {
                  CERROR("RECOVERY: %d recoverable clients, last_transno "
                         LPU64"\n",
-                       obddev->obd_recoverable_clients, mds->mds_last_transno);
-                obddev->obd_next_recovery_transno = obddev->obd_last_committed
+                       obd->obd_recoverable_clients, mds->mds_last_transno);
+                obd->obd_next_recovery_transno = obd->obd_last_committed
                          + 1;
-                obddev->obd_recovering = 1;
+                obd->obd_recovering = 1;
          }
  
          if (mcd)
@@ -342,12 +389,12 @@ err_msd:
          return rc;
  }
  
-static int mds_fs_prep(struct obd_device *obddev)
+static int mds_fs_prep(struct obd_device *obd)
  {
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
          struct obd_run_ctxt saved;
          struct dentry *dentry;
-        struct file *f;
+        struct file *file;
          int rc;
  
          push_ctxt(&saved, &mds->mds_ctxt, NULL);
@@ -373,46 +420,76 @@ static int mds_fs_prep(struct obd_device *obddev)
          }
          mds->mds_fid_de = dentry;
  
-        f = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0644);
-        if (IS_ERR(f)) {
-                rc = PTR_ERR(f);
+        dentry = simple_mkdir(current->fs->pwd, "PENDING", 0777);
+        if (IS_ERR(dentry)) {
+                rc = PTR_ERR(dentry);
+                CERROR("cannot create PENDING directory: rc = %d\n", rc);
+                GOTO(err_fid, rc);
+        }
+        mds->mds_pending_dir = dentry;
+
+        dentry = simple_mkdir(current->fs->pwd, "LOGS", 0700);
+        if (IS_ERR(dentry)) {
+                rc = PTR_ERR(dentry);
+                CERROR("cannot create LOGS directory: rc = %d\n", rc);
+                GOTO(err_pending, rc);
+        }
+        mds->mds_logs_dir = dentry;
+
+        file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0644);
+        if (IS_ERR(file)) {
+                rc = PTR_ERR(file);
                  CERROR("cannot open/create %s file: rc = %d\n", LAST_RCVD, rc);
-                GOTO(err_pop, rc = PTR_ERR(f));
+
+                GOTO(err_logs, rc = PTR_ERR(file));
          }
-        if (!S_ISREG(f->f_dentry->d_inode->i_mode)) {
+        if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
                  CERROR("%s is not a regular file!: mode = %o\n", LAST_RCVD,
-                       f->f_dentry->d_inode->i_mode);
+                       file->f_dentry->d_inode->i_mode);
                  GOTO(err_filp, rc = -ENOENT);
          }
  
-        rc = fsfilt_journal_data(obddev, f);
+        rc = fsfilt_journal_data(obd, file);
          if (rc) {
                  CERROR("cannot journal data on %s: rc = %d\n", LAST_RCVD, rc);
                  GOTO(err_filp, rc);
          }
  
-        rc = mds_read_last_rcvd(obddev, f);
+        rc = mds_read_last_rcvd(obd, file);
          if (rc) {
                  CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
                  GOTO(err_client, rc);
          }
-        mds->mds_rcvd_filp = f;
+        mds->mds_rcvd_filp = file;
+#ifdef I_SKIP_PDFLUSH
+        /*
+         * we need this to protect from deadlock
+         * pdflush vs. lustre_fwrite()
+         */
+        file->f_dentry->d_inode->i_flags |= I_SKIP_PDFLUSH;
+#endif
  err_pop:
          pop_ctxt(&saved, &mds->mds_ctxt, NULL);
  
          return rc;
  
  err_client:
-        class_disconnect_exports(obddev, 0);
+        class_disconnect_exports(obd, 0);
  err_filp:
-        if (filp_close(f, 0))
+        if (filp_close(file, 0))
                  CERROR("can't close %s after error\n", LAST_RCVD);
+err_logs:
+        dput(mds->mds_logs_dir);
+err_pending:
+        dput(mds->mds_pending_dir);
+err_fid:
+        dput(mds->mds_fid_de);
          goto err_pop;
  }
  
-int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt)
+int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt)
  {
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
          ENTRY;
  
          mds->mds_vfsmnt = mnt;
@@ -421,21 +498,20 @@ int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt)
          mds->mds_ctxt.pwdmnt = mnt;
          mds->mds_ctxt.pwd = mnt->mnt_root;
          mds->mds_ctxt.fs = get_ds();
-        RETURN(mds_fs_prep(obddev));
+        RETURN(mds_fs_prep(obd));
  }
  
-int mds_fs_cleanup(struct obd_device *obddev, int failover)
+int mds_fs_cleanup(struct obd_device *obd, int flags)
  {
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
          struct obd_run_ctxt saved;
          int rc = 0;
  
-        if (failover)
+        if (flags & OBD_OPT_FAILOVER)
                  CERROR("%s: shutting down for failover; client state will"
-                       " be preserved.\n", obddev->obd_name);
+                       " be preserved.\n", obd->obd_name);
  
-        class_disconnect_exports(obddev, failover); /* this cleans up client
-                                                   info too */
+        class_disconnect_exports(obd, flags); /* cleans up client info too */
          mds_server_free_data(mds);
  
          push_ctxt(&saved, &mds->mds_ctxt, NULL);
@@ -443,7 +519,15 @@ int mds_fs_cleanup(struct obd_device *obddev, int failover)
                  rc = filp_close(mds->mds_rcvd_filp, 0);
                  mds->mds_rcvd_filp = NULL;
                  if (rc)
-                        CERROR("last_rcvd file won't close, rc=%d\n", rc);
+                        CERROR("%s file won't close, rc=%d\n", LAST_RCVD, rc);
+        }
+        if (mds->mds_logs_dir) {
+                l_dput(mds->mds_logs_dir);
+                mds->mds_logs_dir = NULL;
+        }
+        if (mds->mds_pending_dir) {
+                l_dput(mds->mds_pending_dir);
+                mds->mds_pending_dir = NULL;
          }
          pop_ctxt(&saved, &mds->mds_ctxt, NULL);
          shrink_dcache_parent(mds->mds_fid_de);
@@ -451,3 +535,233 @@ int mds_fs_cleanup(struct obd_device *obddev, int failover)
  
          return rc;
  }
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+int mds_log_close(struct llog_handle *cathandle, struct llog_handle *loghandle)
+{
+        struct llog_object_hdr *llh = loghandle->lgh_hdr;
+        struct mds_obd *mds = &cathandle->lgh_obd->u.mds;
+        struct dentry *dchild = NULL;
+        int rc;
+        ENTRY;
+
+        /* If we are going to delete this log, grab a ref before we close
+         * it so we don't have to immediately do another lookup.
+         */
+        if (llh->llh_hdr.lth_type != LLOG_CATALOG_MAGIC && llh->llh_count == 0){
+                CDEBUG(D_INODE, "deleting log file "LPX64":%x\n",
+                       loghandle->lgh_cookie.lgc_lgl.lgl_oid,
+                       loghandle->lgh_cookie.lgc_lgl.lgl_ogen);
+                down(&mds->mds_logs_dir->d_inode->i_sem);
+                dchild = dget(loghandle->lgh_file->f_dentry);
+                llog_delete_log(cathandle, loghandle);
+        } else {
+                CDEBUG(D_INODE, "closing log file "LPX64":%x\n",
+                       loghandle->lgh_cookie.lgc_lgl.lgl_oid,
+                       loghandle->lgh_cookie.lgc_lgl.lgl_ogen);
+        }
+
+        rc = filp_close(loghandle->lgh_file, 0);
+
+        llog_free_handle(loghandle); /* also removes loghandle from list */
+
+        if (dchild) {
+                int err = vfs_unlink(mds->mds_logs_dir->d_inode, dchild);
+                if (err) {
+                        CERROR("error unlinking empty log %*s: rc %d\n",
+                               dchild->d_name.len, dchild->d_name.name, err);
+                        if (!rc)
+                                rc = err;
+                }
+                l_dput(dchild);
+                up(&mds->mds_logs_dir->d_inode->i_sem);
+        }
+        RETURN(rc);
+}
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+struct llog_handle *mds_log_open(struct obd_device *obd,
+                                 struct llog_cookie *logcookie)
+{
+        struct ll_fid fid = { .id = logcookie->lgc_lgl.lgl_oid,
+                              .generation = logcookie->lgc_lgl.lgl_ogen,
+                              .f_type = S_IFREG };
+        struct llog_handle *loghandle;
+        struct dentry *dchild;
+        int rc;
+        ENTRY;
+
+        loghandle = llog_alloc_handle();
+        if (loghandle == NULL)
+                RETURN(ERR_PTR(-ENOMEM));
+
+        down(&obd->u.mds.mds_logs_dir->d_inode->i_sem);
+        dchild = mds_fid2dentry(&obd->u.mds, &fid, NULL);
+        up(&obd->u.mds.mds_logs_dir->d_inode->i_sem);
+        if (IS_ERR(dchild)) {
+                rc = PTR_ERR(dchild);
+                CERROR("error looking up log file "LPX64":%x: rc %d\n",
+                       fid.id, fid.generation, rc);
+                GOTO(out, rc);
+        }
+
+        if (dchild->d_inode == NULL) {
+                rc = -ENOENT;
+                CERROR("nonexistent log file "LPX64":%x: rc %d\n",
+                       fid.id, fid.generation, rc);
+                GOTO(out_put, rc);
+        }
+
+        /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
+        mntget(obd->u.mds.mds_vfsmnt);
+        loghandle->lgh_file = dentry_open(dchild, obd->u.mds.mds_vfsmnt,
+                                          O_RDWR | O_LARGEFILE);
+        if (IS_ERR(loghandle->lgh_file)) {
+                rc = PTR_ERR(loghandle->lgh_file);
+                CERROR("error opening logfile "LPX64":%x: rc %d\n",
+                       fid.id, fid.generation, rc);
+                GOTO(out, rc);
+        }
+        memcpy(&loghandle->lgh_cookie, logcookie, sizeof(*logcookie));
+        loghandle->lgh_log_create = mds_log_create;
+        loghandle->lgh_log_open = mds_log_open;
+        loghandle->lgh_log_close = mds_log_close;
+        loghandle->lgh_obd = obd;
+
+        RETURN(loghandle);
+
+out_put:
+        l_dput(dchild);
+out:
+        llog_free_handle(loghandle);
+        return ERR_PTR(rc);
+}
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+struct llog_handle *mds_log_create(struct obd_device *obd)
+{
+        char logbuf[24], *logname; /* logSSSSSSSSSS.count */
+        struct llog_handle *loghandle;
+        int rc, open_flags = O_RDWR | O_CREAT | O_LARGEFILE;
+        ENTRY;
+
+        loghandle = llog_alloc_handle();
+        if (!loghandle)
+                RETURN(ERR_PTR(-ENOMEM));
+
+retry:
+        if (!obd->u.mds.mds_catalog) {
+                logname = "LOGS/catalog";
+        } else {
+                sprintf(logbuf, "LOGS/log%lu.%u\n",
+                        CURRENT_SECONDS, obd->u.mds.mds_catalog->lgh_index++);
+                open_flags |= O_EXCL;
+                logname = logbuf;
+        }
+        loghandle->lgh_file = filp_open(logname, open_flags, 0644);
+        if (IS_ERR(loghandle->lgh_file)) {
+                rc = PTR_ERR(loghandle->lgh_file);
+                if (rc == -EEXIST) {
+                        CDEBUG(D_HA, "collision in logfile %s creation\n",
+                               logname);
+                        obd->u.mds.mds_catalog->lgh_index++;
+                        goto retry;
+                }
+                CERROR("error opening/creating %s: rc %d\n", logname, rc);
+                GOTO(out_handle, rc);
+        }
+
+        loghandle->lgh_cookie.lgc_lgl.lgl_oid =
+                loghandle->lgh_file->f_dentry->d_inode->i_ino;
+        loghandle->lgh_cookie.lgc_lgl.lgl_ogen =
+                loghandle->lgh_file->f_dentry->d_inode->i_generation;
+        loghandle->lgh_log_create = mds_log_create;
+        loghandle->lgh_log_open = mds_log_open;
+        loghandle->lgh_log_close = mds_log_close;
+        loghandle->lgh_obd = obd;
+
+        RETURN(loghandle);
+
+out_handle:
+        llog_free_handle(loghandle);
+        return ERR_PTR(rc);
+}
+
+struct llog_handle *mds_get_catalog(struct obd_device *obd)
+{
+        struct mds_server_data *msd = obd->u.mds.mds_server_data;
+        struct obd_run_ctxt saved;
+        struct llog_handle *cathandle = NULL;
+        int rc = 0;
+        ENTRY;
+
+        push_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+
+        if (msd->msd_catalog_oid) {
+                struct llog_cookie catcookie;
+
+                catcookie.lgc_lgl.lgl_oid = le64_to_cpu(msd->msd_catalog_oid);
+                catcookie.lgc_lgl.lgl_ogen = le32_to_cpu(msd->msd_catalog_ogen);
+                cathandle = mds_log_open(obd, &catcookie);
+                if (IS_ERR(cathandle)) {
+                        CERROR("error opening catalog "LPX64":%x: rc %d\n",
+                               catcookie.lgc_lgl.lgl_oid,
+                               catcookie.lgc_lgl.lgl_ogen,
+                               (int)PTR_ERR(cathandle));
+                        msd->msd_catalog_oid = 0;
+                        msd->msd_catalog_ogen = 0;
+                }
+                /* ORPHANS FIXME: compare catalog UUID to msd_peeruuid */
+        }
+
+        if (!msd->msd_catalog_oid) {
+                struct llog_logid *lgl;
+
+                cathandle = mds_log_create(obd);
+                if (IS_ERR(cathandle)) {
+                        CERROR("error creating new catalog: rc %d\n",
+                               (int)PTR_ERR(cathandle));
+                        GOTO(out, cathandle);
+                }
+                lgl = &cathandle->lgh_cookie.lgc_lgl;
+                msd->msd_catalog_oid = cpu_to_le64(lgl->lgl_oid);
+                msd->msd_catalog_ogen = cpu_to_le32(lgl->lgl_ogen);
+                rc = mds_update_server_data(obd);
+                if (rc) {
+                        CERROR("error writing new catalog to disk: rc %d\n",rc);
+                        GOTO(out_handle, rc);
+                }
+        }
+
+        rc = llog_init_catalog(cathandle, &obd->u.mds.mds_osc_uuid);
+
+out:
+        pop_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+        RETURN(cathandle);
+
+out_handle:
+        mds_log_close(cathandle, cathandle);
+        cathandle = ERR_PTR(rc);
+        goto out;
+
+}
+
+void mds_put_catalog(struct llog_handle *cathandle)
+{
+        struct llog_handle *loghandle, *n;
+        int rc;
+        ENTRY;
+
+        list_for_each_entry_safe(loghandle, n, &cathandle->lgh_list, lgh_list)
+                mds_log_close(cathandle, loghandle);
+
+        rc = filp_close(cathandle->lgh_file, 0);
+        if (rc)
+                CERROR("error closing catalog: rc %d\n", rc);
+
+        llog_free_handle(cathandle);
+        EXIT;
+}
diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h

index 0b62a92..c2d3d77 100644 (file)
--- a/lustre/mds/mds_internal.h
+++ b/lustre/mds/mds_internal.h
@@ -1,9 +1,41 @@
+#ifndef _MDS_INTERNAL_H
+#define _MDS_INTERNAL_H
+static inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req)
+{
+        return &req->rq_export->exp_obd->u.mds;
+}
+
+/* mds/mds_fs.c */
+struct llog_handle *mds_log_create(struct obd_device *obd);
+int mds_log_close(struct llog_handle *cathandle, struct llog_handle *loghandle);
+struct llog_handle *mds_log_open(struct obd_device *obd,
+                                 struct llog_cookie *logcookie);
+struct llog_handle *mds_get_catalog(struct obd_device *obd);
+void mds_put_catalog(struct llog_handle *cathandle);
+
+/* mds/handler.c */
  struct mds_file_data *mds_mfd_new(void);
  void mds_mfd_put(struct mds_file_data *mfd);
  void mds_mfd_destroy(struct mds_file_data *mfd);
+
+/* mds/mds_reint.c */
+void mds_commit_cb(struct obd_device *, __u64 last_rcvd, void *data, int error);
+int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
+                       struct ptlrpc_request *req, int rc, __u32 op_data);
+
+/* mds/mds_lib.c */
  int mds_update_unpack(struct ptlrpc_request *, int offset,
                        struct mds_update_record *);
  
+/* mds/mds_lov.c */
+int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,
+                    struct obd_uuid *uuidarray);
+
+/* mds/mds_open.c */
+int mds_open(struct mds_update_record *rec, int offset,
+             struct ptlrpc_request *req, struct lustre_handle *);
+int mds_pin(struct ptlrpc_request *req);
+
  /* mds/mds_fs.c */
  int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
                    struct mds_export_data *med, int cl_off);
@@ -13,3 +45,5 @@ int mds_client_free(struct obd_export *exp);
  void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode);
  void mds_pack_inode2body(struct mds_body *body, struct inode *inode);
  #endif
+
+#endif /* _MDS_INTERNAL_H */
diff --git a/lustre/mds/mds_lib.c b/lustre/mds/mds_lib.c

index 8f16795..93ac300 100644 (file)
--- a/lustre/mds/mds_lib.c
+++ b/lustre/mds/mds_lib.c
@@ -57,17 +57,15 @@ void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode)
          fid->f_type = (S_IFMT & inode->i_mode);
  }
  
+/* Note that we can copy all of the fields, just some will not be "valid" */
  void mds_pack_inode2body(struct mds_body *b, struct inode *inode)
  {
-        b->valid = OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME |
-                OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
-                OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
-                OBD_MD_FLNLINK | OBD_MD_FLGENER;
+        b->valid = OBD_MD_FLID | OBD_MD_FLCTIME | OBD_MD_FLUID | OBD_MD_FLGID |
+                OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLGENER;
  
-        /* The MDS file size isn't authoritative for regular files, so don't
-         * even pretend. */
-        if (S_ISREG(inode->i_mode))
-                b->valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
+        if (!S_ISREG(inode->i_mode))
+                b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLATIME |
+                            OBD_MD_FLMTIME;
  
          b->ino = inode->i_ino;
          b->atime = LTIME_S(inode->i_atime);
@@ -80,10 +78,12 @@ void mds_pack_inode2body(struct mds_body *b, struct inode *inode)
          b->gid = inode->i_gid;
          b->flags = inode->i_flags;
          b->rdev = b->rdev;
-        b->nlink = inode->i_nlink;
+        /* Return the correct link count for orphan inodes */
+        b->nlink = mds_inode_is_orphan(inode) ? 0 : inode->i_nlink;
          b->generation = inode->i_generation;
          b->suppgid = -1;
  }
+
  /* unpacking */
  static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
                                struct mds_update_record *r)
@@ -92,8 +92,8 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
          struct mds_rec_setattr *rec;
          ENTRY;
  
-        rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
-                                  lustre_swab_mds_rec_setattr);
+        rec = lustre_swab_reqbuf(req, offset, sizeof(*rec),
+                                 lustre_swab_mds_rec_setattr);
          if (rec == NULL)
                  RETURN (-EFAULT);
  
@@ -120,9 +120,14 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
                  if (r->ur_eadata == NULL)
                          RETURN (-EFAULT);
                  r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 1];
-        } else {
-                r->ur_eadata = NULL;
-                r->ur_eadatalen = 0;
+        }
+
+        if (req->rq_reqmsg->bufcount > offset + 2) {
+                r->ur_logcookies = lustre_msg_buf(req->rq_reqmsg, offset + 2,0);
+                if (r->ur_eadata == NULL)
+                        RETURN (-EFAULT);
+
+                r->ur_cookielen = req->rq_reqmsg->buflens[offset + 2];
          }
  
          RETURN(0);
@@ -172,9 +177,6 @@ static int mds_create_unpack(struct ptlrpc_request *req, int offset,
                  if (r->ur_tgt == NULL)
                          RETURN (-EFAULT);
                  r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2];
-        } else {
-                r->ur_tgt = NULL;
-                r->ur_tgtlen = 0;
          }
          RETURN(0);
  }
diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c

index 02c53cc..ecca88c 100644 (file)
--- a/lustre/mds/mds_lov.c
+++ b/lustre/mds/mds_lov.c
@@ -32,6 +32,9 @@
  #include <linux/obd_class.h>
  #include <linux/obd_lov.h>
  #include <linux/lustre_lib.h>
+#include <linux/lustre_fsfilt.h>
+
+#include "mds_internal.h"
  
  void le_lov_desc_to_cpu (struct lov_desc *ld)
  {
@@ -141,6 +144,7 @@ int mds_set_lovdesc(struct obd_device *obd, struct lov_desc *desc,
          mds->mds_has_lov_desc = 1;
          /* XXX the MDS should not really know about this */
          mds->mds_max_mdsize = lov_mds_md_size(desc->ld_tgt_count);
+        mds->mds_max_cookiesize = desc->ld_tgt_count*sizeof(struct llog_cookie);
  
  out:
          pop_ctxt(&saved, &mds->mds_ctxt, NULL);
@@ -182,7 +186,8 @@ out:
          return rc;
  }
  
-int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,struct obd_uuid *uuidarray)
+int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,
+                    struct obd_uuid *uuidarray)
  {
          struct obd_run_ctxt saved;
          struct file *f;
@@ -266,13 +271,13 @@ int mds_iocontrol(unsigned int cmd, struct lustre_handle *conn,
  
                  RETURN(rc);
  
-        case OBD_IOC_SET_READONLY:
+        case OBD_IOC_SET_READONLY: {
+                BDEVNAME_DECLARE_STORAGE(tmp);
                  CERROR("setting device %s read-only\n",
-                       ll_bdevname(obd->u.mds.mds_sb->s_dev));
-#ifdef CONFIG_DEV_RDONLY
+                       ll_bdevname(obd->u.mds.mds_sb->s_dev, tmp));
                  dev_set_rdonly(obd->u.mds.mds_sb->s_dev, 2);
-#endif
                  RETURN(0);
+        }
  
          case OBD_IOC_ABORT_RECOVERY:
                  CERROR("aborting recovery for device %s\n", obd->obd_name);
diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c

index 04d6ee9..2bd2f8c 100644 (file)
--- a/lustre/mds/mds_open.c
+++ b/lustre/mds/mds_open.c
@@ -45,19 +45,6 @@
  
  #include "mds_internal.h"
  
-extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req);
-int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
-                       struct ptlrpc_request *req, int rc, __u32 op_data);
-extern int enqueue_ordered_locks(int lock_mode, struct obd_device *obd,
-                                 struct ldlm_res_id *p1_res_id,
-                                 struct ldlm_res_id *p2_res_id,
-                                 struct ldlm_res_id *c1_res_id,
-                                 struct ldlm_res_id *c2_res_id,
-                                 struct lustre_handle *p1_lockh,
-                                 struct lustre_handle *p2_lockh,
-                                 struct lustre_handle *c1_lockh,
-                                 struct lustre_handle *c2_lockh);
-
  struct mds_file_data *mds_dentry_open(struct dentry *dentry,
                                        struct vfsmount *mnt,
                                        int flags,
@@ -65,17 +52,16 @@ struct mds_file_data *mds_dentry_open(struct dentry *dentry,
  {
          struct mds_export_data *med = &req->rq_export->exp_mds_data;
          struct inode *inode;
-        int mode;
          struct mds_file_data *mfd;
-        int error;
+        int mode, error;
  
          mfd = mds_mfd_new();
-        if (!mfd) {
+        if (mfd == NULL) {
                  CERROR("mds: out of memory\n");
                  GOTO(cleanup_dentry, error = -ENOMEM);
          }
  
-        mode = (flags+1) & O_ACCMODE;
+        mode = (flags + 1) & O_ACCMODE;
          inode = dentry->d_inode;
  
          if (mode & FMODE_WRITE) {
@@ -107,6 +93,7 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
                        struct ptlrpc_request *req,
                        struct lustre_handle *child_lockh)
  {
+        struct ptlrpc_request *oldreq = req->rq_export->exp_outstanding_reply;
          struct mds_export_data *med = &req->rq_export->exp_mds_data;
          struct mds_client_data *mcd = med->med_mcd;
          struct mds_obd *mds = mds_req2mds(req);
@@ -115,7 +102,7 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
          struct dentry *parent, *child;
          struct ldlm_reply *rep;
          struct mds_body *body;
-        int disp, rc;
+        int rc;
          struct list_head *t;
          int put_child = 1;
          ENTRY;
@@ -127,14 +114,13 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
          /* copy rc, transno and disp; steal locks */
          req->rq_transno = mcd->mcd_last_transno;
          req->rq_status = mcd->mcd_last_result;
-        disp = rep->lock_policy_res1 = mcd->mcd_last_data;
+        intent_set_disposition(rep, mcd->mcd_last_data);
  
-        if (req->rq_export->exp_outstanding_reply)
+        if (oldreq)
                  mds_steal_ack_locks(req->rq_export, req);
  
-        /* We never care about these. */
-        disp &= ~(IT_OPEN_LOOKUP | IT_OPEN_POS | IT_OPEN_NEG);
-        if (!disp) {
+        /* Only replay if create or open actually happened. */
+        if (!intent_disposition(rep, DISP_OPEN_CREATE | DISP_OPEN_OPEN) ) {
                  EXIT;
                  return; /* error looking up parent or child */
          }
@@ -149,11 +135,11 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
                  GOTO(out_dput, 0); /* child not present to open */
          }
  
-        /* At this point, we know we have a child, which means that we'll send
-         * it back _unless_ it was open failed, _and_ we didn't create the file.
-         * I love you guys.  No, really.
+        /* At this point, we know we have a child. We'll send
+         * it back _unless_ it not created and open failed.
           */
-        if (((disp & (IT_OPEN_OPEN | IT_OPEN_CREATE)) == IT_OPEN_OPEN) &&
+        if (intent_disposition(rep, DISP_OPEN_OPEN) &&
+            !intent_disposition(rep, DISP_OPEN_CREATE) &&
              req->rq_status) {
                  GOTO(out_dput, 0);
          }
@@ -165,8 +151,14 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
          if (S_ISREG(child->d_inode->i_mode)) {
                  rc = mds_pack_md(obd, req->rq_repmsg, 2, body,
                                   child->d_inode);
+
                  if (rc)
                          LASSERT(rc == req->rq_status);
+
+                /* If we have LOV EA data, the OST holds size, mtime */
+                if (!(body->valid & OBD_MD_FLEASIZE))
+                        body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                        OBD_MD_FLATIME | OBD_MD_FLMTIME);
          } else {
                  /* XXX need to check this case */
          }
@@ -185,7 +177,7 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
          /* If we didn't get as far as trying to open, then some locking thing
           * probably went wrong, and we'll just bail here.
           */
-        if ((disp & IT_OPEN_OPEN) == 0)
+        if (!intent_disposition(rep, DISP_OPEN_OPEN))
                  GOTO(out_dput, 0);
  
          /* If we failed, then we must have failed opening, so don't look for
@@ -197,12 +189,12 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
          mfd = NULL;
          list_for_each(t, &med->med_open_head) {
                  mfd = list_entry(t, struct mds_file_data, mfd_list);
-                if (mfd->mfd_xid == req->rq_xid) 
+                if (mfd->mfd_xid == req->rq_xid)
                          break;
                  mfd = NULL;
          }
  
-        if (req->rq_export->exp_outstanding_reply) {
+        if (oldreq) {
                  /* if we're not recovering, it had better be found */
                  LASSERT(mfd);
          } else if (mfd == NULL) {
@@ -226,35 +218,180 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
          EXIT;
  }
  
+int mds_pin(struct ptlrpc_request *req)
+{
+        struct mds_obd *mds = mds_req2mds(req);
+        struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+        struct mds_file_data *mfd = NULL;
+        struct mds_body *body;
+        struct dentry *dchild;
+        struct obd_run_ctxt saved;
+        char fidname[LL_FID_NAMELEN];
+        int fidlen = 0, rc, cleanup_phase = 0, size = sizeof(*body);
+        ENTRY;
+
+        body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
+
+        down(&pending_dir->i_sem);
+        fidlen = ll_fid2str(fidname, body->fid1.id, body->fid1.generation);
+        dchild = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
+        if (IS_ERR(dchild)) {
+                up(&pending_dir->i_sem);
+                rc = PTR_ERR(dchild);
+                CERROR("error looking up %s in PENDING: rc = %d\n",
+                       fidname, rc);
+                RETURN(rc);
+        }
+
+        cleanup_phase = 2;
+
+        if (dchild->d_inode) {
+                up(&pending_dir->i_sem);
+                mds_inode_set_orphan(dchild->d_inode);
+                mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+                mds_pack_inode2body(body, dchild->d_inode);
+                GOTO(openit, rc = 0);
+        }
+        dput(dchild);
+        up(&pending_dir->i_sem);
+
+        /* We didn't find it in PENDING so it isn't an orphan.  See
+         * if it's a regular inode. */
+        dchild = mds_fid2dentry(mds, &body->fid1, NULL);
+        if (!IS_ERR(dchild)) {
+                mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+                mds_pack_inode2body(body, dchild->d_inode);
+                GOTO(openit, rc = 0);
+        }
+
+        /* We didn't find this inode on disk, but we're trying to pin it.
+         * This should never happen. */
+        CERROR("ENOENT during mds_pin for fid "LPU64"/%u\n", body->fid1.id,
+               body->fid1.generation);
+        RETURN(-ENOENT);
+
+ openit:
+        /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
+        mfd = mds_dentry_open(dchild, mds->mds_vfsmnt, body->flags, req);
+        if (IS_ERR(mfd)) {
+                dchild = NULL; /* prevent a double dput in cleanup phase 2 */
+                GOTO(cleanup, rc = PTR_ERR(mfd));
+        }
+
+        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc) {
+                CERROR("out of memoryK\n");
+                GOTO(cleanup, rc);
+        }
+        body = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*body));
+
+        cleanup_phase = 4; /* mfd allocated */
+        body->handle.cookie = mfd->mfd_handle.h_cookie;
+        CDEBUG(D_INODE, "mfd %p, cookie "LPX64"\n", mfd,
+               mfd->mfd_handle.h_cookie);
+        GOTO(cleanup, rc = 0);
+
+ cleanup:
+        push_ctxt(&saved, &mds->mds_ctxt, NULL);
+        rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, NULL,
+                                req, rc, 0);
+        pop_ctxt(&saved, &mds->mds_ctxt, NULL);
+        /* XXX what do we do here if mds_finish_transno itself failed? */
+        switch (cleanup_phase) {
+        case 4:
+                if (rc)
+                        mds_mfd_destroy(mfd);
+        case 2:
+                if (rc || S_ISLNK(dchild->d_inode->i_mode))
+                        l_dput(dchild);
+        }
+        return rc;
+}
+
  int mds_open(struct mds_update_record *rec, int offset,
               struct ptlrpc_request *req, struct lustre_handle *child_lockh)
  {
+        /* XXX ALLOCATE _something_ - 464 bytes on stack here */
          static const char acc_table [] = {[O_RDONLY] MAY_READ,
                                            [O_WRONLY] MAY_WRITE,
                                            [O_RDWR]   MAY_READ | MAY_WRITE};
          struct mds_obd *mds = mds_req2mds(req);
          struct obd_device *obd = req->rq_export->exp_obd;
-        struct ldlm_reply *rep;
-        struct mds_body *body;
-        struct dentry *dchild = NULL, *parent;
+        struct ldlm_reply *rep = NULL;
+        struct mds_body *body = NULL;
+        struct dentry *dchild = NULL, *parent = NULL;
          struct mds_export_data *med;
          struct mds_file_data *mfd = NULL;
          struct ldlm_res_id child_res_id = { .name = {0} };
          struct lustre_handle parent_lockh;
          int rc = 0, parent_mode, child_mode = LCK_PR, lock_flags, created = 0;
-        int cleanup_phase = 0;
+        int cleanup_phase = 0, acc_mode;
          void *handle = NULL;
-        int acc_mode;
          ENTRY;
  
-        LASSERT(offset == 2);                  /* only called via intent */
-        rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
-        body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
+        if (offset == 2) { /* intent */
+                rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
+                body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
+        } else if (offset == 0) { /* non-intent reint */
+                body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
+        } else {
+                body = NULL;
+                LBUG();
+        }
  
          MDS_CHECK_RESENT(req, reconstruct_open(rec, offset, req, child_lockh));
  
+        /* Step 0: If we are passed a fid, then we assume the client already
+         * opened this file and is only replaying the RPC, so we open the
+         * inode by fid (at some large expense in security).
+         */
+        if (rec->ur_fid2->id) {
+                struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+                char fidname[LL_FID_NAMELEN];
+                int fidlen = 0;
+
+                down(&pending_dir->i_sem);
+                fidlen = ll_fid2str(fidname, rec->ur_fid2->id,
+                                    rec->ur_fid2->generation);
+                dchild = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
+                if (IS_ERR(dchild)) {
+                        up(&pending_dir->i_sem);
+                        rc = PTR_ERR(dchild);
+                        CERROR("error looking up %s in PENDING: rc = %d\n",
+                               fidname, rc);
+                        RETURN(rc);
+                }
+
+                if (dchild->d_inode) {
+                        up(&pending_dir->i_sem);
+                        mds_inode_set_orphan(dchild->d_inode);
+                        mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+                        mds_pack_inode2body(body, dchild->d_inode);
+                        cleanup_phase = 2;
+                        GOTO(openit, rc = 0);
+                }
+                dput(dchild);
+                up(&pending_dir->i_sem);
+
+                /* We didn't find it in PENDING so it isn't an orphan.  See
+                 * if it was a regular inode that was previously created.
+                 */
+                dchild = mds_fid2dentry(mds, rec->ur_fid2, NULL);
+                if (!IS_ERR(dchild)) {
+                        mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+                        mds_pack_inode2body(body, dchild->d_inode);
+                        cleanup_phase = 2;
+                        GOTO(openit, rc = 0);
+                }
+
+                /* We didn't find the correct inode on disk either, so we
+                 * need to re-create it via a regular replay.  Do that below.
+                 */
+                LASSERT(rec->ur_flags & O_CREAT);
+        }
+        LASSERT(offset == 2); /* If we got here, we must be called via intent */
+
          med = &req->rq_export->exp_mds_data;
-        rep->lock_policy_res1 |= IT_OPEN_LOOKUP;
          if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_PACK)) {
                  CERROR("test case OBD_FAIL_MDS_OPEN_PACK\n");
                  req->rq_status = -ENOMEM;
@@ -263,11 +400,12 @@ int mds_open(struct mds_update_record *rec, int offset,
  
          if ((rec->ur_flags & O_ACCMODE) >= sizeof (acc_table))
                  RETURN(-EINVAL);
-        acc_mode = acc_table [rec->ur_flags & O_ACCMODE];
+        acc_mode = acc_table[rec->ur_flags & O_ACCMODE];
          if ((rec->ur_flags & O_TRUNC) != 0)
                  acc_mode |= MAY_WRITE;
  
          /* Step 1: Find and lock the parent */
+        intent_set_disposition(rep, DISP_LOOKUP_EXECD);
          parent_mode = (rec->ur_flags & O_CREAT) ? LCK_PW : LCK_PR;
          parent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, parent_mode,
                                         &parent_lockh);
@@ -288,38 +426,88 @@ int mds_open(struct mds_update_record *rec, int offset,
          cleanup_phase = 2; /* child dentry */
  
          if (dchild->d_inode)
-                rep->lock_policy_res1 |= IT_OPEN_POS;
+                intent_set_disposition(rep, DISP_LOOKUP_POS);
          else
-                rep->lock_policy_res1 |= IT_OPEN_NEG;
+                intent_set_disposition(rep, DISP_LOOKUP_NEG);
  
          /* Step 3: If the child was negative, and we're supposed to,
           * create it. */
          if (!dchild->d_inode) {
+                unsigned long ino = rec->ur_fid2->id;
+
                  if (!(rec->ur_flags & O_CREAT)) {
                          /* It's negative and we weren't supposed to create it */
                          GOTO(cleanup, rc = -ENOENT);
                  }
  
-                rep->lock_policy_res1 |= IT_OPEN_CREATE;
-                handle = fsfilt_start(obd, parent->d_inode, FSFILT_OP_CREATE);
+                intent_set_disposition(rep, DISP_OPEN_CREATE);
+                handle = fsfilt_start(obd, parent->d_inode, FSFILT_OP_CREATE,
+                                      NULL);
                  if (IS_ERR(handle)) {
                          rc = PTR_ERR(handle);
                          handle = NULL;
                          GOTO(cleanup, rc);
                  }
+                if (ino)
+                        dchild->d_fsdata = (void *)(unsigned long)ino;
+
                  rc = vfs_create(parent->d_inode, dchild, rec->ur_mode);
-                if (rc)
+                if (dchild->d_fsdata == (void *)(unsigned long)ino)
+                        dchild->d_fsdata = NULL;
+
+                if (rc) {
+                        CDEBUG(D_INODE, "error during create: %d\n", rc);
                          GOTO(cleanup, rc);
-                created = 1;
+                } else {
+                        struct iattr iattr;
+                        struct inode *inode = dchild->d_inode;
+
+                        if (ino) {
+                                LASSERT(ino == inode->i_ino);
+                                /* Written as part of setattr */
+                                inode->i_generation = rec->ur_fid2->generation;
+                                CDEBUG(D_HA, "recreated ino %lu with gen %x\n",
+                                       inode->i_ino, inode->i_generation);
+                        }
+
+                        created = 1;
+                        LTIME_S(iattr.ia_atime) = rec->ur_time;
+                        LTIME_S(iattr.ia_ctime) = rec->ur_time;
+                        LTIME_S(iattr.ia_mtime) = rec->ur_time;
+
+                        iattr.ia_uid = rec->ur_uid;
+                        if (parent->d_inode->i_mode & S_ISGID) {
+                                iattr.ia_gid = parent->d_inode->i_gid;
+                        } else
+                                iattr.ia_gid = rec->ur_gid;
+
+                        iattr.ia_valid = ATTR_UID | ATTR_GID | ATTR_ATIME |
+                                ATTR_MTIME | ATTR_CTIME;
+
+                        rc = fsfilt_setattr(obd, dchild, handle, &iattr, 0);
+                        if (rc) {
+                                CERROR("error on setattr: rc = %d\n", rc);
+                                /* XXX should we abort here in case of error? */
+                        }
+                }
+
                  child_mode = LCK_PW;
                  acc_mode = 0;                  /* Don't check for permissions */
          }
  
+        LASSERT(!mds_inode_is_orphan(dchild->d_inode));
+
          /* Step 4: It's positive, so lock the child */
          child_res_id.name[0] = dchild->d_inode->i_ino;
          child_res_id.name[1] = dchild->d_inode->i_generation;
   reacquire:
          lock_flags = 0;
+        /* For the open(O_CREAT) case, this would technically be a lock
+         * inversion (getting a VFS lock after starting a transaction),
+         * but in that case we cannot possibly block on this lock because
+         * we just created the child and also hold a write lock on the
+         * parent, so nobody could be holding the lock yet.
+         */
          rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
                                child_res_id, LDLM_PLAIN, NULL, 0, child_mode,
                                &lock_flags, ldlm_completion_ast,
@@ -346,15 +534,19 @@ int mds_open(struct mds_update_record *rec, int offset,
  
                  /* An append-only file must be opened in append mode for
                   * writing */
-                if (IS_APPEND(dchild->d_inode) &&
-                    (acc_mode & MAY_WRITE) != 0 &&
+                if (IS_APPEND(dchild->d_inode) && (acc_mode & MAY_WRITE) != 0 &&
                      ((rec->ur_flags & O_APPEND) == 0 ||
                       (rec->ur_flags & O_TRUNC) != 0))
-                        GOTO (cleanup, rc = -EPERM);
+                        GOTO(cleanup, rc = -EPERM);
  
                  rc = mds_pack_md(obd, req->rq_repmsg, 2, body, dchild->d_inode);
                  if (rc)
                          GOTO(cleanup, rc);
+
+                /* If we have LOV EA data, the OST holds size, mtime */
+                if (!(body->valid & OBD_MD_FLEASIZE))
+                        body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                        OBD_MD_FLATIME | OBD_MD_FLMTIME);
          }
  
          if (!created && (rec->ur_flags & O_CREAT) &&
@@ -364,9 +556,9 @@ int mds_open(struct mds_update_record *rec, int offset,
                  GOTO(cleanup, rc = -EEXIST); // returns a lock to the client
          }
  
-        /* If we're opening a file without an EA, the client needs a write
-         * lock. */
-        if (S_ISREG(dchild->d_inode->i_mode) &&
+        /* If we're opening a file without an EA for write, the client needs
+         * a write lock. */
+        if (S_ISREG(dchild->d_inode->i_mode) && (rec->ur_flags & O_ACCMODE) &&
              child_mode != LCK_PW && !(body->valid & OBD_MD_FLEASIZE)) {
                  ldlm_lock_decref(child_lockh, child_mode);
                  child_mode = LCK_PW;
@@ -381,15 +573,14 @@ int mds_open(struct mds_update_record *rec, int offset,
                  GOTO(cleanup, rc = -ENOTDIR);
  
          /* Step 5: mds_open it */
-        rep->lock_policy_res1 |= IT_OPEN_OPEN;
-
+        intent_set_disposition(rep, DISP_OPEN_OPEN);
+ openit:
          /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
          mfd = mds_dentry_open(dchild, mds->mds_vfsmnt,
                                rec->ur_flags & ~(O_DIRECT | O_TRUNC), req);
-        if (!mfd) {
-                CERROR("mds: out of memory\n");
-                dchild = NULL; /* prevent a double dput in step 2 */
-                GOTO(cleanup, rc = -ENOMEM);
+        if (IS_ERR(mfd)) {
+                dchild = NULL; /* prevent a double dput in cleanup phase 2 */
+                GOTO(cleanup, rc = PTR_ERR(mfd));
          }
  
          cleanup_phase = 4; /* mfd allocated */
@@ -401,6 +592,7 @@ int mds_open(struct mds_update_record *rec, int offset,
   cleanup:
          rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle,
                                  req, rc, rep->lock_policy_res1);
+        /* XXX what do we do here if mds_finish_transno itself failed? */
          switch (cleanup_phase) {
          case 4:
                  if (rc && !S_ISLNK(dchild->d_inode->i_mode))
@@ -410,19 +602,22 @@ int mds_open(struct mds_update_record *rec, int offset,
                   * ldlm_intent_policy: if we found the dentry, or we tried to
                   * open it (meaning that we created, if it wasn't found), then
                   * we return the lock to the caller and client. */
-                if (!(rep->lock_policy_res1 & (IT_OPEN_OPEN | IT_OPEN_POS)))
+                if (intent_disposition(rep, DISP_LOOKUP_NEG) &&
+                    !intent_disposition(rep, DISP_OPEN_OPEN))
                          ldlm_lock_decref(child_lockh, child_mode);
          case 2:
                  if (rc || S_ISLNK(dchild->d_inode->i_mode))
                          l_dput(dchild);
          case 1:
-                l_dput(parent);
-                if (rc) {
-                        ldlm_lock_decref(&parent_lockh, parent_mode);
-                } else {
-                        memcpy(&req->rq_ack_locks[0].lock, &parent_lockh,
-                               sizeof(parent_lockh));
-                        req->rq_ack_locks[0].mode = parent_mode;
+                if (parent) {
+                        l_dput(parent);
+                        if (rc) {
+                                ldlm_lock_decref(&parent_lockh, parent_mode);
+                        } else {
+                                memcpy(&req->rq_ack_locks[0].lock,&parent_lockh,
+                                       sizeof(parent_lockh));
+                                req->rq_ack_locks[0].mode = parent_mode;
+                        }
                  }
          }
          RETURN(rc);
diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c

index 50949dd..61871d7 100644 (file)
--- a/lustre/mds/mds_reint.c
+++ b/lustre/mds/mds_reint.c
@@ -37,19 +37,93 @@
  #include <linux/lustre_mds.h>
  #include <linux/lustre_dlm.h>
  #include <linux/lustre_fsfilt.h>
+
  #include "mds_internal.h"
  
-extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req);
+void mds_commit_cb(struct obd_device *obd, __u64 transno, void *data,
+                   int error)
+{
+        obd_transno_commit_cb(obd, transno, error);
+}
+
+struct mds_logcancel_data {
+        struct lov_mds_md      *mlcd_lmm;
+        int                     mlcd_size;
+        int                     mlcd_cookielen;
+        int                     mlcd_eadatalen;
+        struct llog_cookie      mlcd_cookies[0];
+};
+
+/* Establish a connection to the OSC when we first need it.  We don't do
+ * this during MDS setup because that would introduce setup ordering issues. */
+static int mds_osc_connect(struct obd_device *obd, struct mds_obd *mds)
+{
+        int rc;
+        ENTRY;
+
+        if (IS_ERR(mds->mds_osc_obd))
+                RETURN(PTR_ERR(mds->mds_osc_obd));
+
+        if (mds->mds_osc_obd)
+                RETURN(0);
+
+        mds->mds_osc_obd = class_uuid2obd(&mds->mds_osc_uuid);
+        if (!mds->mds_osc_obd) {
+                CERROR("MDS cannot locate OSC/LOV %s - no logging!\n",
+                       mds->mds_osc_uuid.uuid);
+                mds->mds_osc_obd = ERR_PTR(-ENOTCONN);
+                RETURN(-ENOTCONN);
+        }
+
+        rc = obd_connect(&mds->mds_osc_conn, mds->mds_osc_obd, &obd->obd_uuid);
+        if (rc) {
+                CERROR("MDS cannot locate OSC/LOV %s - no logging!\n",
+                       mds->mds_osc_uuid.uuid);
+                mds->mds_osc_obd = ERR_PTR(rc);
+                RETURN(rc);
+        }
+
+        rc = obd_set_info(&mds->mds_osc_conn, strlen("mds_conn"), "mds_conn",
+                          0, NULL);
+        RETURN(rc);
+}
  
-static void mds_commit_cb(struct obd_device *obd, __u64 transno, int error)
+static void mds_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
+                                  void *cb_data, int error)
  {
+        struct mds_logcancel_data *mlcd = cb_data;
+        struct lov_stripe_md *lsm = NULL;
+        int rc;
+
          obd_transno_commit_cb(obd, transno, error);
+
+        CDEBUG(D_HA, "cancelling %d cookies\n",
+               (int)(mlcd->mlcd_cookielen / sizeof(*mlcd->mlcd_cookies)));
+
+        rc = obd_unpackmd(&obd->u.mds.mds_osc_conn, &lsm, mlcd->mlcd_lmm,
+                          mlcd->mlcd_eadatalen);
+        if (rc < 0) {
+                CERROR("bad LSM cancelling %d log cookies: rc %d\n",
+                       (int)(mlcd->mlcd_cookielen/sizeof(*mlcd->mlcd_cookies)),
+                       rc);
+        } else {
+                rc = obd_log_cancel(&obd->u.mds.mds_osc_conn, lsm,
+                                    mlcd->mlcd_cookielen /
+                                    sizeof(*mlcd->mlcd_cookies),
+                                    mlcd->mlcd_cookies, OBD_LLOG_FL_SENDNOW);
+                ///* XXX 0 normally, SENDNOW for debug */);
+                if (rc)
+                        CERROR("error cancelling %d log cookies: rc %d\n",
+                               (int)(mlcd->mlcd_cookielen /
+                                     sizeof(*mlcd->mlcd_cookies)), rc);
+        }
+
+        OBD_FREE(mlcd, mlcd->mlcd_size);
  }
  
  /* Assumes caller has already pushed us into the kernel context. */
-int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
-                       struct ptlrpc_request *req, int rc,
-                       __u32 op_data)
+int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
+                       struct ptlrpc_request *req, int rc, __u32 op_data)
  {
          struct mds_export_data *med = &req->rq_export->exp_mds_data;
          struct mds_client_data *mcd = med->med_mcd;
@@ -70,15 +144,15 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
  
          if (!handle) {
                  /* if we're starting our own xaction, use our own inode */
-                i = mds->mds_rcvd_filp->f_dentry->d_inode;
-                handle = fsfilt_start(obd, i, FSFILT_OP_SETATTR);
+                inode = mds->mds_rcvd_filp->f_dentry->d_inode;
+                handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL);
                  if (IS_ERR(handle)) {
                          CERROR("fsfilt_start: %ld\n", PTR_ERR(handle));
                          GOTO(out, rc = PTR_ERR(handle));
                  }
          }
  
-        off = MDS_LR_CLIENT + med->med_off * MDS_LR_SIZE;
+        off = med->med_off;
  
          transno = req->rq_reqmsg->transno;
          if (transno == 0) {
@@ -94,10 +168,11 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
          mcd->mcd_last_data = cpu_to_le32(op_data);
  
          fsfilt_set_last_rcvd(req->rq_export->exp_obd, transno, handle,
-                             mds_commit_cb);
-        written = lustre_fwrite(mds->mds_rcvd_filp, mcd, sizeof(*mcd), &off);
-        CDEBUG(D_INODE, "wrote trans "LPU64" client %s at #%u: written = "
-               LPSZ"\n", transno, mcd->mcd_uuid, med->med_off, written);
+                             mds_commit_cb, NULL);
+        written = fsfilt_write_record(obd, mds->mds_rcvd_filp,
+                                      (char *)mcd, sizeof(*mcd), &off);
+        CDEBUG(D_INODE, "wrote trans "LPU64" client %s at idx %u: written = "
+               LPSZ"\n", transno, mcd->mcd_uuid, med->med_idx, written);
  
          if (written != sizeof(*mcd)) {
                  CERROR("error writing to last_rcvd: rc = "LPSZ"\n", written);
@@ -110,7 +185,7 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
          }
  
  commit:
-        err = fsfilt_commit(obd, i, handle, 0);
+        err = fsfilt_commit(obd, inode, handle, 0);
          if (err) {
                  CERROR("error committing transaction: %d\n", err);
                  if (!rc)
@@ -139,22 +214,29 @@ int mds_fix_attr(struct inode *inode, struct mds_update_record *rec)
          if (!(ia_valid & ATTR_RAW))
                  RETURN(0);
  
-        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                RETURN(-EPERM);
-
-        LTIME_S(attr->ia_ctime) = now;
+        if (!(ia_valid & ATTR_CTIME_SET))
+                LTIME_S(attr->ia_ctime) = now;
          if (!(ia_valid & ATTR_ATIME_SET))
                  LTIME_S(attr->ia_atime) = now;
          if (!(ia_valid & ATTR_MTIME_SET))
                  LTIME_S(attr->ia_mtime) = now;
  
+        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+                RETURN(-EPERM);
+
          /* times */
-        if ((ia_valid & (ATTR_MTIME|ATTR_ATIME))==(ATTR_MTIME|ATTR_ATIME) &&
-             !(ia_valid & ATTR_ATIME_SET)) {
+        if ((ia_valid & (ATTR_MTIME|ATTR_ATIME))==(ATTR_MTIME|ATTR_ATIME)) {
                  if (rec->ur_fsuid != inode->i_uid &&
                      (error = permission(inode,MAY_WRITE)) != 0)
                          RETURN(error);
-        } else if (ia_valid & ATTR_UID) {
+        }
+
+        if (ia_valid & ATTR_SIZE) {
+                if ((error = permission(inode,MAY_WRITE)) != 0)
+                        RETURN(error);
+        }
+
+        if (ia_valid & ATTR_UID) {
                  /* chown */
                  error = -EPERM;
                  if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
@@ -164,7 +246,6 @@ int mds_fix_attr(struct inode *inode, struct mds_update_record *rec)
                  if (attr->ia_gid == (gid_t) -1)
                          attr->ia_gid = inode->i_gid;
                  attr->ia_mode = inode->i_mode;
-                attr->ia_valid =  ATTR_UID | ATTR_GID | ATTR_CTIME;
                  /*
                   * If the user or group of a non-directory has been
                   * changed by a non-root user, remove the setuid bit.
@@ -232,6 +313,14 @@ static void reconstruct_reint_setattr(struct mds_update_record *rec,
          mds_pack_inode2fid(&body->fid1, de->d_inode);
          mds_pack_inode2body(body, de->d_inode);
  
+        /* Don't return OST-specific attributes if we didn't just set them */
+        if (rec->ur_iattr.ia_valid & ATTR_SIZE)
+                body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+        if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_MTIME_SET))
+                body->valid |= OBD_MD_FLMTIME;
+        if (rec->ur_iattr.ia_valid & (ATTR_ATIME | ATTR_ATIME_SET))
+                body->valid |= OBD_MD_FLATIME;
+
          l_dput(de);
  }
  
@@ -251,6 +340,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
          struct inode *inode = NULL;
          struct lustre_handle lockh;
          void *handle = NULL;
+        struct mds_logcancel_data *mlcd = NULL;
          int rc = 0, cleanup_phase = 0, err, locked = 0;
          ENTRY;
  
@@ -279,21 +369,28 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
          OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE,
                         to_kdev_t(inode->i_sb->s_dev));
  
-        handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR);
+#ifdef ENABLE_ORPHANS
+        if (unlikely(mds->mds_osc_obd == NULL))
+                mds_osc_connect(obd, mds);
+#endif
+
+        handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL);
          if (IS_ERR(handle)) {
                  rc = PTR_ERR(handle);
                  handle = NULL;
                  GOTO(cleanup, rc);
          }
  
+        if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_CTIME))
+                CDEBUG(D_INODE, "setting mtime %lu, ctime %lu\n",
+                       LTIME_S(rec->ur_iattr.ia_mtime),
+                       LTIME_S(rec->ur_iattr.ia_ctime));
          rc = mds_fix_attr(inode, rec);
          if (rc)
                  GOTO(cleanup, rc);
  
          rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr, 0);
-        if (rc == 0 &&
-            S_ISREG(inode->i_mode) &&
-            rec->ur_eadata != NULL) {
+        if (rc == 0 && S_ISREG(inode->i_mode) && rec->ur_eadata != NULL) {
                  rc = fsfilt_set_md(obd, inode, handle,
                                     rec->ur_eadata, rec->ur_eadatalen);
          }
@@ -302,10 +399,39 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
          mds_pack_inode2fid(&body->fid1, inode);
          mds_pack_inode2body(body, inode);
  
+        /* Don't return OST-specific attributes if we didn't just set them */
+        if (rec->ur_iattr.ia_valid & ATTR_SIZE)
+                body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+        if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_MTIME_SET))
+                body->valid |= OBD_MD_FLMTIME;
+        if (rec->ur_iattr.ia_valid & (ATTR_ATIME | ATTR_ATIME_SET))
+                body->valid |= OBD_MD_FLATIME;
+
+        if (rc == 0 && rec->ur_cookielen && !IS_ERR(mds->mds_osc_obd)) {
+                OBD_ALLOC(mlcd, sizeof(*mlcd) + rec->ur_cookielen +
+                          rec->ur_eadatalen);
+                if (mlcd) {
+                        mlcd->mlcd_size = sizeof(*mlcd) + rec->ur_cookielen +
+                                rec->ur_eadatalen;
+                        mlcd->mlcd_eadatalen = rec->ur_eadatalen;
+                        mlcd->mlcd_cookielen = rec->ur_cookielen;
+                        mlcd->mlcd_lmm = (void *)&mlcd->mlcd_cookies +
+                                mlcd->mlcd_cookielen;
+                        memcpy(&mlcd->mlcd_cookies, rec->ur_logcookies,
+                               mlcd->mlcd_cookielen);
+                        memcpy(mlcd->mlcd_lmm, rec->ur_eadata,
+                               mlcd->mlcd_eadatalen);
+                } else {
+                        CERROR("unable to allocate log cancel data\n");
+                }
+        }
          EXIT;
   cleanup:
+        if (mlcd != NULL)
+                fsfilt_set_last_rcvd(req->rq_export->exp_obd, 0, handle,
+                                     mds_cancel_cookies_cb, mlcd);
          err = mds_finish_transno(mds, inode, handle, req, rc, 0);
-        switch(cleanup_phase) {
+        switch (cleanup_phase) {
          case 1:
                  l_dput(de);
                  if (locked) {
@@ -418,7 +544,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
  
          switch (type) {
          case S_IFREG:{
-                handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE, NULL);
                  if (IS_ERR(handle))
                          GOTO(cleanup, rc = PTR_ERR(handle));
                  rc = vfs_create(dir, dchild, rec->ur_mode);
@@ -426,7 +552,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                  break;
          }
          case S_IFDIR:{
-                handle = fsfilt_start(obd, dir, FSFILT_OP_MKDIR);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_MKDIR, NULL);
                  if (IS_ERR(handle))
                          GOTO(cleanup, rc = PTR_ERR(handle));
                  rc = vfs_mkdir(dir, dchild, rec->ur_mode);
@@ -434,7 +560,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                  break;
          }
          case S_IFLNK:{
-                handle = fsfilt_start(obd, dir, FSFILT_OP_SYMLINK);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_SYMLINK, NULL);
                  if (IS_ERR(handle))
                          GOTO(cleanup, rc = PTR_ERR(handle));
                  if (rec->ur_tgt == NULL)        /* no target supplied */
@@ -449,7 +575,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
          case S_IFIFO:
          case S_IFSOCK:{
                  int rdev = rec->ur_rdev;
-                handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD, NULL);
                  if (IS_ERR(handle))
                          GOTO(cleanup, (handle = NULL, rc = PTR_ERR(handle)));
                  rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev);
@@ -458,13 +584,13 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
          }
          default:
                  CERROR("bad file type %o creating %s\n", type, rec->ur_name);
+                dchild->d_fsdata = NULL;
                  GOTO(cleanup, rc = -EINVAL);
          }
  
-        /* In case we stored the desired inum in here, we want to clean up.
-         * We also do this in the cleanup block, for the error cases.
-         */
-        dchild->d_fsdata = NULL;
+        /* In case we stored the desired inum in here, we want to clean up. */
+        if (dchild->d_fsdata == (void *)(unsigned long)rec->ur_fid2->id)
+                dchild->d_fsdata = NULL;
  
          if (rc) {
                  CDEBUG(D_INODE, "error during create: %d\n", rc);
@@ -532,7 +658,6 @@ cleanup:
          }
          switch (cleanup_phase) {
          case 2: /* child dentry */
-                dchild->d_fsdata = NULL;
                  l_dput(dchild);
          case 1: /* locked parent dentry */
                  if (rc) {
@@ -634,43 +759,134 @@ static void reconstruct_reint_unlink(struct mds_update_record *rec, int offset,
                    "can't get EA for reconstructed unlink, leaking OST inodes");
  }
  
+/* If we are unlinking an open file/dir (i.e. creating an orphan) then
+ * we instead link the inode into the PENDING directory until it is
+ * finally released.  We can't simply call mds_reint_rename() or some
+ * part thereof, because we don't have the inode to check for link
+ * count/open status until after it is locked.
+ *
+ * For lock ordering, we always get the PENDING, then pending_child lock
+ * last to avoid deadlocks.
+ */
+static int mds_unlink_orphan(struct mds_update_record *rec,
+                             struct obd_device *obd, struct dentry *dparent,
+                             struct dentry *dchild, void **handle)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+        struct dentry *pending_child;
+        char fidname[LL_FID_NAMELEN];
+        int fidlen = 0, rc;
+        ENTRY;
+
+        LASSERT(!mds_inode_is_orphan(dchild->d_inode));
+
+        down(&pending_dir->i_sem);
+        fidlen = ll_fid2str(fidname, dchild->d_inode->i_ino,
+                            dchild->d_inode->i_generation);
+
+        CDEBUG(D_ERROR, "pending destroy of %dx open file %s = %s\n",
+               mds_open_orphan_count(dchild->d_inode),
+               rec->ur_name, fidname);
+
+        pending_child = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
+        if (IS_ERR(pending_child))
+                GOTO(out_lock, rc = PTR_ERR(pending_child));
+
+        if (pending_child->d_inode != NULL) {
+                CERROR("re-destroying orphan file %s?\n", rec->ur_name);
+                LASSERT(pending_child->d_inode == dchild->d_inode);
+                GOTO(out_dput, rc = 0);
+        }
+
+        *handle = fsfilt_start(obd, pending_dir, FSFILT_OP_RENAME, NULL);
+        if (IS_ERR(*handle))
+                GOTO(out_dput, rc = PTR_ERR(*handle));
+
+        rc = vfs_rename(dparent->d_inode, dchild, pending_dir, pending_child);
+        if (rc)
+                CERROR("error renaming orphan %lu/%s to PENDING: rc = %d\n",
+                       dparent->d_inode->i_ino, rec->ur_name, rc);
+        else
+                mds_inode_set_orphan(dchild->d_inode);
+out_dput:
+        dput(pending_child);
+out_lock:
+        up(&pending_dir->i_sem);
+        RETURN(rc);
+}
+
+static int mds_log_op_unlink(struct obd_device *obd, struct mds_obd *mds,
+                             struct inode *inode, struct lustre_msg *repmsg,
+                             int offset)
+{
+        struct lov_stripe_md *lsm = NULL;
+        struct llog_unlink_rec *lur;
+        int rc;
+        ENTRY;
+
+        if (IS_ERR(mds->mds_osc_obd))
+                RETURN(PTR_ERR(mds->mds_osc_obd));
+
+        rc = obd_unpackmd(&mds->mds_osc_conn, &lsm,
+                          lustre_msg_buf(repmsg, offset, 0),
+                          repmsg->buflens[offset]);
+        if (rc < 0)
+                RETURN(rc);
+
+        OBD_ALLOC(lur, sizeof(*lur));
+        if (!lur)
+                RETURN(-ENOMEM);
+        lur->lur_hdr.lth_len = lur->lur_end_len = sizeof(*lur);
+        lur->lur_hdr.lth_type = MDS_UNLINK_REC;
+        lur->lur_oid = inode->i_ino;
+        lur->lur_ogen = inode->i_generation;
+
+        rc = obd_log_add(&mds->mds_osc_conn, mds->mds_catalog, &lur->lur_hdr,
+                         lsm, lustre_msg_buf(repmsg, offset + 1, 0),
+                         repmsg->buflens[offset+1]/sizeof(struct llog_cookie));
+
+        obd_free_memmd(&mds->mds_osc_conn, &lsm);
+        OBD_FREE(lur, sizeof(*lur));
+
+        RETURN(rc);
+}
+
  static int mds_reint_unlink(struct mds_update_record *rec, int offset,
                              struct ptlrpc_request *req,
-                            struct lustre_handle *child_lockh)
+                            struct lustre_handle *lh)
  {
-        struct dentry *dir_de = NULL;
+        struct dentry *dparent = NULL;
          struct dentry *dchild = NULL;
          struct mds_obd *mds = mds_req2mds(req);
          struct obd_device *obd = req->rq_export->exp_obd;
          struct mds_body *body = NULL;
-        struct inode *dir_inode = NULL, *child_inode;
-        struct lustre_handle parent_lockh;
+        struct inode *child_inode;
+        struct lustre_handle parent_lockh, child_lockh;
          void *handle = NULL;
          struct ldlm_res_id child_res_id = { .name = {0} };
-        int rc = 0, flags = 0, return_lock = 0;
-        int cleanup_phase = 0;
+        int rc = 0, flags = 0, log_unlink = 0, cleanup_phase = 0;
          ENTRY;
  
          LASSERT(offset == 0 || offset == 2);
  
          MDS_CHECK_RESENT(req, reconstruct_reint_unlink(rec, offset, req,
-                                                       child_lockh));
+                                                       &child_lockh));
  
          if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK))
                  GOTO(cleanup, rc = -ENOENT);
  
          /* Step 1: Lookup the parent by FID */
-        dir_de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW,
-                                       &parent_lockh);
-        if (IS_ERR(dir_de))
-                GOTO(cleanup, rc = PTR_ERR(dir_de));
-        dir_inode = dir_de->d_inode;
-        LASSERT(dir_inode);
+        dparent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW,
+                                        &parent_lockh);
+        if (IS_ERR(dparent))
+                GOTO(cleanup, rc = PTR_ERR(dparent));
+        LASSERT(dparent->d_inode);
  
          cleanup_phase = 1; /* Have parent dentry lock */
  
          /* Step 2: Lookup the child */
-        dchild = ll_lookup_one_len(rec->ur_name, dir_de, rec->ur_namelen - 1);
+        dchild = ll_lookup_one_len(rec->ur_name, dparent, rec->ur_namelen - 1);
          if (IS_ERR(dchild))
                  GOTO(cleanup, rc = PTR_ERR(dchild));
  
@@ -678,15 +894,13 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
  
          child_inode = dchild->d_inode;
          if (child_inode == NULL) {
-                CDEBUG(D_INODE,
-                       "child doesn't exist (dir %lu, name %s)\n",
-                       dir_inode->i_ino, rec->ur_name);
-                rc = -ENOENT;
-                GOTO(cleanup, rc);
+                CDEBUG(D_INODE, "child doesn't exist (dir %lu, name %s)\n",
+                       dparent->d_inode->i_ino, rec->ur_name);
+                GOTO(cleanup, rc = -ENOENT);
          }
  
          DEBUG_REQ(D_INODE, req, "parent ino %lu, child ino %lu",
-                  dir_inode->i_ino, child_inode->i_ino);
+                  dparent->d_inode->i_ino, child_inode->i_ino);
  
          /* Step 3: Get a lock on the child */
          child_res_id.name[0] = child_inode->i_ino;
@@ -695,14 +909,14 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
          rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
                                child_res_id, LDLM_PLAIN, NULL, 0, LCK_EX,
                                &flags, ldlm_completion_ast, mds_blocking_ast,
-                              NULL, child_lockh);
+                              NULL, &child_lockh);
          if (rc != ELDLM_OK)
                  GOTO(cleanup, rc);
  
          cleanup_phase = 3; /* child lock */
  
          OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE,
-                       to_kdev_t(dir_inode->i_sb->s_dev));
+                       to_kdev_t(dparent->d_inode->i_sb->s_dev));
  
          /* ldlm_reply in buf[0] if called via intent */
          if (offset)
@@ -711,43 +925,89 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
          body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*body));
          LASSERT(body != NULL);
  
-        /* Step 4: Do the unlink: client decides between rmdir/unlink!
-         * (bug 72) */
+#ifdef ENABLE_ORPHANS
+        if (unlikely(mds->mds_osc_obd == NULL))
+                mds_osc_connect(obd, mds);
+#endif
+
+        /* If this is the last reference to this inode, get the OBD EA
+         * data first so the client can destroy OST objects */
+        if (S_ISREG(child_inode->i_mode) && child_inode->i_nlink == 1) {
+                mds_pack_inode2fid(&body->fid1, child_inode);
+                mds_pack_inode2body(body, child_inode);
+                mds_pack_md(obd, req->rq_repmsg, offset + 1, body, child_inode);
+                if (!(body->valid & OBD_MD_FLEASIZE)) {
+                        body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                        OBD_MD_FLATIME | OBD_MD_FLMTIME);
+                        log_unlink = 1;
+                }
+        }
+
+        /* We have to do these checks ourselves, in case we are making an
+         * orphan.  The client tells us whether rmdir() or unlink() was called,
+         * so we need to return appropriate errors (bug 72).
+         *
+         * We don't have to check permissions, because vfs_rename (called from
+         * mds_unlink_orphan) also calls may_delete. */
+        if ((rec->ur_mode & S_IFMT) == S_IFDIR) {
+                if (!S_ISDIR(child_inode->i_mode))
+                        GOTO(cleanup, rc = -ENOTDIR);
+        } else {
+                if (S_ISDIR(child_inode->i_mode))
+                        GOTO(cleanup, rc = -EISDIR);
+        }
+
+        if (mds_open_orphan_count(child_inode) > 0) {
+                rc = mds_unlink_orphan(rec, obd, dparent, dchild, &handle);
+#ifdef ENABLE_ORPHANS
+                if (!rc && mds_log_op_unlink(obd, mds, child_inode,
+                                             req->rq_repmsg, offset + 1) > 0)
+                        body->valid |= OBD_MD_FLCOOKIE;
+#endif
+                GOTO(cleanup, rc);
+        }
+
+        // Step 4: Do the unlink: client decides between rmdir/unlink! (bug 72)
          switch (rec->ur_mode & S_IFMT) {
          case S_IFDIR:
                  /* Drop any lingering child directories before we start our
                   * transaction, to avoid doing multiple inode dirty/delete
-                 * in our compound transaction (bug 1321).
-                 */
+                 * in our compound transaction (bug 1321). */
                  shrink_dcache_parent(dchild);
-                handle = fsfilt_start(obd, dir_inode, FSFILT_OP_RMDIR);
+                handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_RMDIR,
+                                      NULL);
                  if (IS_ERR(handle))
                          GOTO(cleanup, rc = PTR_ERR(handle));
                  cleanup_phase = 4;
-                rc = vfs_rmdir(dir_inode, dchild);
+                rc = vfs_rmdir(dparent->d_inode, dchild);
                  break;
-        case S_IFREG:
-                /* If this is the last reference to this inode, get the OBD EA
-                 * data first so the client can destroy OST objects */
-                if (S_ISREG(child_inode->i_mode) && child_inode->i_nlink == 1) {
-                        mds_pack_inode2fid(&body->fid1, child_inode);
-                        mds_pack_inode2body(body, child_inode);
-                        mds_pack_md(obd, req->rq_repmsg, offset + 1,
-                                    body, child_inode);
-                        if (body->valid & OBD_MD_FLEASIZE)
-                                return_lock = 1;
-                }
-                /* no break */
+        case S_IFREG: {
+                handle = fsfilt_start(obd, dparent->d_inode,
+                                      FSFILT_OP_UNLINK_LOG, NULL);
+                if (IS_ERR(handle))
+                        GOTO(cleanup, rc = PTR_ERR(handle));
+
+                cleanup_phase = 4;
+                rc = vfs_unlink(dparent->d_inode, dchild);
+#ifdef ENABLE_ORPHANS
+                if (!rc && log_unlink)
+                        if (mds_log_op_unlink(obd, mds, child_inode,
+                                              req->rq_repmsg, offset + 1) > 0)
+                                body->valid |= OBD_MD_FLCOOKIE;
+#endif
+                break;
+        }
          case S_IFLNK:
          case S_IFCHR:
          case S_IFBLK:
          case S_IFIFO:
          case S_IFSOCK:
-                handle = fsfilt_start(obd, dir_inode, FSFILT_OP_UNLINK);
+                handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK,
+                                      NULL);
                  if (IS_ERR(handle))
                          GOTO(cleanup, rc = PTR_ERR(handle));
                  cleanup_phase = 4;
-                rc = vfs_unlink(dir_inode, dchild);
+                rc = vfs_unlink(dparent->d_inode, dchild);
                  break;
          default:
                  CERROR("bad file type %o unlinking %s\n", rec->ur_mode,
@@ -758,29 +1018,29 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
  
   cleanup:
          switch(cleanup_phase) {
-            case 4:
-                rc = mds_finish_transno(mds, dir_inode, handle, req, rc, 0);
-                if (rc && body) {
-                        /* Don't unlink the OST objects if the MDS unlink failed */
+        case 4:
+                rc = mds_finish_transno(mds, dparent->d_inode, handle, req,
+                                        rc, 0);
+                if (rc && body != NULL) {
+                        // Don't unlink the OST objects if the MDS unlink failed
                          body->valid = 0;
                  }
-            case 3: /* child lock */
-                if (rc != 0 || return_lock == 0)
-                        ldlm_lock_decref(child_lockh, LCK_EX);
-            case 2: /* child dentry */
+        case 3: /* child lock */
+                ldlm_lock_decref(&child_lockh, LCK_EX);
+        case 2: /* child dentry */
                  l_dput(dchild);
-            case 1: /* parent dentry and lock */
+        case 1: /* parent dentry and lock */
                  if (rc) {
-                        ldlm_lock_decref(&parent_lockh, LCK_EX);
+                        ldlm_lock_decref(&parent_lockh, LCK_PW);
                  } else {
                          memcpy(&req->rq_ack_locks[0].lock, &parent_lockh,
                                 sizeof(parent_lockh));
-                        req->rq_ack_locks[0].mode = LCK_EX;
+                        req->rq_ack_locks[0].mode = LCK_PW;
                  }
-                l_dput(dir_de);
-            case 0:
+                l_dput(dparent);
+        case 0:
                  break;
-            default:
+        default:
                  CERROR("invalid cleanup_phase %d\n", cleanup_phase);
                  LBUG();
          }
@@ -857,8 +1117,10 @@ static int mds_reint_link(struct mds_update_record *rec, int offset,
          /* Step 3: Lookup the child */
          dchild = ll_lookup_one_len(rec->ur_name, de_tgt_dir, rec->ur_namelen-1);
          if (IS_ERR(dchild)) {
-                CERROR("child lookup error %ld\n", PTR_ERR(dchild));
-                GOTO(cleanup, rc = PTR_ERR(dchild));
+                rc = PTR_ERR(dchild);
+                if (rc != -EPERM && rc != -EACCES)
+                        CERROR("child lookup error %d\n", rc);
+                GOTO(cleanup, rc);
          }
  
          cleanup_phase = 4; /* child dentry */
@@ -874,15 +1136,15 @@ static int mds_reint_link(struct mds_update_record *rec, int offset,
          OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE,
                         to_kdev_t(de_src->d_inode->i_sb->s_dev));
  
-        handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK);
+        handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK, NULL);
          if (IS_ERR(handle)) {
                  rc = PTR_ERR(handle);
                  GOTO(cleanup, rc);
          }
  
          rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild);
-        if (rc)
-                CERROR("link error %d\n", rc);
+        if (rc && rc != -EPERM && rc != -EACCES)
+                CERROR("vfs_link error %d\n", rc);
  cleanup:
          rc = mds_finish_transno(mds, de_tgt_dir ? de_tgt_dir->d_inode : NULL,
                                  handle, req, rc, 0);
@@ -1057,13 +1319,12 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset,
          OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE,
                         to_kdev_t(de_srcdir->d_inode->i_sb->s_dev));
  
-        handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME);
+        handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME, NULL);
          if (IS_ERR(handle))
                  GOTO(cleanup, rc = PTR_ERR(handle));
  
          lock_kernel();
-        rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new,
-                        NULL);
+        rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new);
          unlock_kernel();
  
          EXIT;
diff --git a/lustre/obdclass/.cvsignore b/lustre/obdclass/.cvsignore

index e530020..49c6100 100644 (file)
--- a/lustre/obdclass/.cvsignore
+++ b/lustre/obdclass/.cvsignore
@@ -6,3 +6,4 @@ Makefile
  Makefile.in
  .deps
  TAGS
+.*.cmd
diff --git a/lustre/obdclass/Makefile.am b/lustre/obdclass/Makefile.am

index 61f4bc2..06d60d4 100644 (file)
--- a/lustre/obdclass/Makefile.am
+++ b/lustre/obdclass/Makefile.am
@@ -1,5 +1,3 @@
-# FIXME: we need to make it clear that obdclass.o depends on
-# lustre_build_version, or 'make -j2' breaks!
  DEFS=
  MODULE = obdclass
  
@@ -9,15 +7,13 @@ else
  FSMOD = fsfilt_ext3
  endif
  
+class_obd.o: lustre_build_version
+
  if LIBLUSTRE
  lib_LIBRARIES = liblustreclass.a
-liblustreclass_a_SOURCES = uuid.c statfs_pack.c genops.c debug.c class_obd.c lustre_handles.c lustre_peer.c lprocfs_status.c simple.c
-
-class_obd.o: lustre_version
-
-lustre_version:
-       echo '#define LUSTRE_VERSION 12' > $(top_builddir)/include/linux/lustre_build_version.h
-       echo '#define BUILD_VERSION "1"' >> $(top_builddir)/include/linux/lustre_build_version.h
+liblustreclass_a_SOURCES = uuid.c statfs_pack.c genops.c debug.c class_obd.c
+liblustreclass_a_SOURCES += lustre_handles.c lustre_peer.c lprocfs_status.c
+liblustreclass_a_SOURCES += simple.c recov_log.c obdo.c
  
  else
  modulefs_DATA = lustre_build_version obdclass.o $(FSMOD).o fsfilt_reiserfs.o
@@ -25,15 +21,16 @@ EXTRA_PROGRAMS = obdclass $(FSMOD) fsfilt_reiserfs
  
  obdclass_SOURCES = class_obd.c debug.c genops.c sysctl.c uuid.c simple.c
  obdclass_SOURCES += lprocfs_status.c lustre_handles.c lustre_peer.c
-obdclass_SOURCES += fsfilt.c statfs_pack.c otree.c
+obdclass_SOURCES += fsfilt.c statfs_pack.c otree.c recov_log.c obdo.c
  endif
  
  include $(top_srcdir)/Rules
  
-# XXX I'm sure there's some automake mv-if-different helper for this.
  lustre_build_version:
         perl $(top_srcdir)/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver
+       echo #define LUSTRE_RELEASE @RELEASE@ >> tmpver
         cmp -s $(top_builddir)/include/linux/lustre_build_version.h tmpver \
-               2> /dev/null &&                                            \
-               $(RM) tmpver ||                                            \
-               mv tmpver $(top_builddir)/include/linux/lustre_build_version.h
+                2> /dev/null &&                                            \
+                $(RM) tmpver ||                                            \
+                mv tmpver $(top_builddir)/include/linux/lustre_build_version.h
+
diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c

index b497aa3..8275ed8 100644 (file)
--- a/lustre/obdclass/class_obd.c
+++ b/lustre/obdclass/class_obd.c
@@ -53,9 +53,7 @@
  #include <linux/miscdevice.h>
  #include <linux/smp_lock.h>
  #else
-
  # include <liblustre.h>
-
  #endif
  
  #include <linux/obd_support.h>
@@ -64,6 +62,7 @@
  #include <linux/lprocfs_status.h>
  #include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
  #include <linux/lustre_build_version.h>
+#include <portals/list.h>
  
  struct semaphore obd_conf_sem;   /* serialize configuration commands */
  struct obd_device obd_dev[MAX_OBD_DEVICES];
@@ -181,12 +180,10 @@ static inline void obd_conn2data(struct obd_ioctl_data *data,
  
  static void dump_exports(struct obd_device *obd)
  {
-        struct list_head *tmp, *n;
+        struct obd_export *exp, *n;
  
-        list_for_each_safe(tmp, n, &obd->obd_exports) {
-                struct obd_export *exp = list_entry(tmp, struct obd_export,
-                                                    exp_obd_chain);
-                CDEBUG(D_ERROR, "%s: %p %s %d %d %p\n",
+        list_for_each_entry_safe(exp, n, &obd->obd_exports, exp_obd_chain) {
+                CERROR("%s: %p %s %d %d %p\n",
                         obd->obd_name, exp, exp->exp_client_uuid.uuid,
                         atomic_read(&exp->exp_refcount),
                         exp->exp_failed, exp->exp_outstanding_reply );
@@ -543,6 +540,7 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
                  obd->obd_type->typ_refcnt--;
                  class_put_type(obd->obd_type);
                  obd->obd_type = NULL;
+                memset(obd, 0, sizeof(*obd));
                  GOTO(out, err = 0);
          }
  
@@ -562,7 +560,7 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
  
                  atomic_set(&obd->obd_refcount, 0);
  
-                if ( OBT(obd) && OBP(obd, setup) )
+                if (OBT(obd) && OBP(obd, setup))
                          err = obd_setup(obd, sizeof(*data), data);
  
                  if (!err) {
@@ -574,8 +572,8 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
                  GOTO(out, err);
          }
          case OBD_IOC_CLEANUP: {
-                int force = 0, failover = 0;
-                char * flag;
+                int flags = 0;
+                char *flag;
  
                  if (!obd->obd_set_up) {
                          CERROR("Device %d not setup\n", obd->obd_minor);
@@ -586,18 +584,19 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
                          for (flag = data->ioc_inlbuf1; *flag != 0; flag++)
                                  switch (*flag) {
                                  case 'F':
-                                        force = 1;
+                                        flags |= OBD_OPT_FORCE;
                                          break;
                                  case 'A':
-                                        failover = 1;
+                                        flags |= OBD_OPT_FAILOVER;
                                          break;
                                  default:
-                                        CERROR("unrecognised flag '%c'\n", 
+                                        CERROR("unrecognised flag '%c'\n",
                                                 *flag);
                                  }
                  }
-                
-                if (atomic_read(&obd->obd_refcount) == 1 || force) {
+
+                if (atomic_read(&obd->obd_refcount) == 1 ||
+                    flags & OBD_OPT_FORCE) {
                          /* this will stop new connections, and need to
                             do it before class_disconnect_exports() */
                          obd->obd_stopping = 1;
@@ -607,19 +606,19 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
                          struct l_wait_info lwi = LWI_TIMEOUT_INTR(60 * HZ, NULL,
                                                                    NULL, NULL);
                          int rc;
-                        
-                        if (!force) {
+
+                        if (!(flags & OBD_OPT_FORCE)) {
                                  CERROR("OBD device %d (%p) has refcount %d\n",
-                                       obd->obd_minor, obd, 
+                                       obd->obd_minor, obd,
                                         atomic_read(&obd->obd_refcount));
                                  dump_exports(obd);
                                  GOTO(out, err = -EBUSY);
                          }
-                        class_disconnect_exports(obd, failover);
-                        CDEBUG(D_IOCTL, 
-                               "%s: waiting for obd refs to go away: %d\n", 
+                        class_disconnect_exports(obd, flags);
+                        CDEBUG(D_IOCTL,
+                               "%s: waiting for obd refs to go away: %d\n",
                                 obd->obd_name, atomic_read(&obd->obd_refcount));
-                
+
                          rc = l_wait_event(obd->obd_refcount_waitq,
                                       atomic_read(&obd->obd_refcount) < 2, &lwi);
                          if (rc == 0) {
@@ -630,12 +629,12 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
                                         atomic_read(&obd->obd_refcount));
                                  dump_exports(obd);
                          }
-                        CDEBUG(D_IOCTL, "%s: awake, now finishing cleanup\n", 
+                        CDEBUG(D_IOCTL, "%s: awake, now finishing cleanup\n",
                                 obd->obd_name);
                  }
  
                  if (OBT(obd) && OBP(obd, cleanup))
-                        err = obd_cleanup(obd, force, failover);
+                        err = obd_cleanup(obd, flags);
  
                  if (!err) {
                          obd->obd_set_up = obd->obd_stopping = 0;
@@ -807,10 +806,10 @@ EXPORT_SYMBOL(class_conn2cliimp);
  EXPORT_SYMBOL(class_conn2ldlmimp);
  EXPORT_SYMBOL(class_disconnect);
  EXPORT_SYMBOL(class_disconnect_exports);
-EXPORT_SYMBOL(lustre_uuid_to_peer);
  
  /* uuid.c */
  EXPORT_SYMBOL(class_uuid_unparse);
+EXPORT_SYMBOL(lustre_uuid_to_peer);
  EXPORT_SYMBOL(client_tgtuuid2obd);
  
  EXPORT_SYMBOL(class_handle_hash);
@@ -831,12 +830,15 @@ int init_obdclass(void)
                        ", info@clusterfs.com\n");
  
          class_init_uuidlist();
-        class_handle_init();
+        err = class_handle_init();
+        if (err)
+                return err;
  
          sema_init(&obd_conf_sem, 1);
          INIT_LIST_HEAD(&obd_types);
  
-        if ((err = misc_register(&obd_psdev))) {
+        err = misc_register(&obd_psdev);
+        if (err) {
                  CERROR("cannot register %d err %d\n", OBD_MINOR, err);
                  return err;
          }
@@ -875,7 +877,7 @@ int obd_proc_read_version(char *page, char **start, off_t off, int count, int *e
  #endif
  
  #ifdef __KERNEL__
-static void __exit cleanup_obdclass(void)
+static void /*__exit*/ cleanup_obdclass(void)
  #else
  static void cleanup_obdclass(void)
  #endif
@@ -914,8 +916,8 @@ static void cleanup_obdclass(void)
   * kernel patch */
  #ifdef __KERNEL__
  #include <linux/lustre_version.h>
-#define LUSTRE_MIN_VERSION 18
-#define LUSTRE_MAX_VERSION 19
+#define LUSTRE_MIN_VERSION 21
+#define LUSTRE_MAX_VERSION 21
  #if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION)
  # error Cannot continue: Your Lustre kernel patch is older than the sources
  #elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION)
diff --git a/lustre/obdclass/fsfilt.c b/lustre/obdclass/fsfilt.c

index 4357b79..d0abdfe 100644 (file)
--- a/lustre/obdclass/fsfilt.c
+++ b/lustre/obdclass/fsfilt.c
@@ -64,7 +64,7 @@ void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops)
          /* unlock fsfilt_types list */
  }
  
-struct fsfilt_operations *fsfilt_get_ops(char *type)
+struct fsfilt_operations *fsfilt_get_ops(const char *type)
  {
          struct fsfilt_operations *fs_ops;
  
@@ -89,7 +89,7 @@ struct fsfilt_operations *fsfilt_get_ops(char *type)
                         /* unlock fsfilt_types list */
                  }
          }
-        __MOD_INC_USE_COUNT(fs_ops->fs_owner);
+        try_module_get(fs_ops->fs_owner);
          /* unlock fsfilt_types list */
  
          return fs_ops;
@@ -97,7 +97,7 @@ struct fsfilt_operations *fsfilt_get_ops(char *type)
  
  void fsfilt_put_ops(struct fsfilt_operations *fs_ops)
  {
-        __MOD_DEC_USE_COUNT(fs_ops->fs_owner);
+        module_put(fs_ops->fs_owner);
  }
  
  
diff --git a/lustre/obdclass/fsfilt_ext3.c b/lustre/obdclass/fsfilt_ext3.c

index 5f6322f..5dd196d 100644 (file)
--- a/lustre/obdclass/fsfilt_ext3.c
+++ b/lustre/obdclass/fsfilt_ext3.c
@@ -32,7 +32,13 @@
  #include <linux/quotaops.h>
  #include <linux/ext3_fs.h>
  #include <linux/ext3_jbd.h>
-#include <linux/ext3_xattr.h>
+#include <linux/version.h>
+/* XXX ugh */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ #include <linux/ext3_xattr.h>
+#else 
+ #include <linux/../../fs/ext3/xattr.h>
+#endif
  #include <linux/kp30.h>
  #include <linux/lustre_fsfilt.h>
  #include <linux/obd.h>
@@ -43,10 +49,11 @@ static kmem_cache_t *fcb_cache;
  static atomic_t fcb_cache_count = ATOMIC_INIT(0);
  
  struct fsfilt_cb_data {
-        struct journal_callback cb_jcb; /* data private to jbd */
+        struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
          fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
          struct obd_device *cb_obd;      /* MDS/OBD completion device */
          __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
+        void *cb_data;                  /* MDS/OST completion function data */
  };
  
  #define EXT3_XATTR_INDEX_LUSTRE         5
@@ -58,13 +65,24 @@ struct fsfilt_cb_data {
   * the inode (which we will be changing anyways as part of this
   * transaction).
   */
-static void *fsfilt_ext3_start(struct inode *inode, int op)
+static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private)
  {
          /* For updates to the last recieved file */
          int nblocks = EXT3_DATA_TRANS_BLOCKS;
          void *handle;
  
          switch(op) {
+        case FSFILT_OP_CREATE_LOG:
+                nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
+                op = FSFILT_OP_CREATE;
+                break;
+        case FSFILT_OP_UNLINK_LOG:
+                nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
+                op = FSFILT_OP_UNLINK;
+                break;
+        }
+
+        switch(op) {
          case FSFILT_OP_RMDIR:
          case FSFILT_OP_UNLINK:
                  nblocks += EXT3_DELETE_TRANS_BLOCKS;
@@ -95,7 +113,7 @@ static void *fsfilt_ext3_start(struct inode *inode, int op)
                   LBUG();
          }
  
-        LASSERT(!current->journal_info);
+        LASSERT(current->journal_info == desc_private);
          lock_kernel();
          handle = journal_start(EXT3_JOURNAL(inode), nblocks);
          unlock_kernel();
@@ -185,14 +203,14 @@ static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso)
   * the pages have been written.
   */
  static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso,
-                                   int niocount, struct niobuf_remote *nb)
+                                   int niocount, void *desc_private)
  {
          journal_t *journal;
          handle_t *handle;
          int needed;
          ENTRY;
  
-        LASSERT(!current->journal_info);
+        LASSERT(current->journal_info == desc_private);
          journal = EXT3_SB(fso->fso_dentry->d_inode->i_sb)->s_journal;
          needed = fsfilt_ext3_credits_needed(objcount, fso);
  
@@ -218,6 +236,8 @@ static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso,
          if (IS_ERR(handle))
                  CERROR("can't get handle for %d credits: rc = %ld\n", needed,
                         PTR_ERR(handle));
+        else
+                LASSERT(handle->h_buffer_credits >= needed);
  
          RETURN(handle);
  }
@@ -249,24 +269,26 @@ static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle,
           * in the block pointers; this is really the "small" stripe MD data.
           * We can avoid further hackery by virtue of the MDS file size being
           * zero all the time (which doesn't invoke block truncate at unlink
-         * time), so we assert we never change the MDS file size from zero.
-         */
+         * time), so we assert we never change the MDS file size from zero. */
          if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
                  /* ATTR_SIZE would invoke truncate: clear it */
                  iattr->ia_valid &= ~ATTR_SIZE;
-                inode->i_size = iattr->ia_size;
+                EXT3_I(inode)->i_disksize = inode->i_size = iattr->ia_size;
  
                  /* make sure _something_ gets set - so new inode
-                 * goes to disk (probably won't work over XFS
-                 */
-                if (!iattr->ia_valid & ATTR_MODE) {
+                 * goes to disk (probably won't work over XFS */
+                if (!(iattr->ia_valid & (ATTR_MODE | ATTR_MTIME | ATTR_CTIME))){
                          iattr->ia_valid |= ATTR_MODE;
                          iattr->ia_mode = inode->i_mode;
                  }
          }
-        if (inode->i_op->setattr)
+
+        /* Don't allow setattr to change file type */
+        iattr->ia_mode = (inode->i_mode & S_IFMT)|(iattr->ia_mode & ~S_IFMT);
+
+        if (inode->i_op->setattr) {
                  rc = inode->i_op->setattr(dentry, iattr);
-        else{
+        } else {
                  rc = inode_change_ok(inode, iattr);
                  if (!rc)
                          rc = inode_setattr(inode, iattr);
@@ -286,8 +308,8 @@ static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
           * it will fit, because putting it in an EA currently kills the MDS
           * performance.  We'll fix this with "fast EAs" in the future.
           */
-        if (lmm_size <= sizeof(EXT3_I(inode)->i_data) -
-                        sizeof(EXT3_I(inode)->i_data[0])) {
+        if (inode->i_blocks == 0 && lmm_size <= sizeof(EXT3_I(inode)->i_data) -
+                                            sizeof(EXT3_I(inode)->i_data[0])) {
                  /* XXX old_size is debugging only */
                  int old_size = EXT3_I(inode)->i_data[0];
                  if (old_size != 0) {
@@ -303,8 +325,15 @@ static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
          } else {
                  down(&inode->i_sem);
                  lock_kernel();
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
                  rc = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_LUSTRE,
                                      XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0);
+#else
+                rc = ext3_xattr_set_handle(handle, inode, 
+                                           EXT3_XATTR_INDEX_LUSTRE,
+                                           XATTR_LUSTRE_MDS_OBJID, lmm, 
+                                           lmm_size, 0);
+#endif
                  unlock_kernel();
                  up(&inode->i_sem);
          }
@@ -319,7 +348,7 @@ static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size)
  {
          int rc;
  
-        if (EXT3_I(inode)->i_data[0]) {
+        if (inode->i_blocks == 0 && EXT3_I(inode)->i_data[0]) {
                  int size = le32_to_cpu(EXT3_I(inode)->i_data[0]);
                  LASSERT(size < sizeof(EXT3_I(inode)->i_data));
                  if (lmm) {
@@ -411,14 +440,15 @@ static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error)
  {
          struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
  
-        fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error);
+        fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error);
  
          OBD_SLAB_FREE(fcb, fcb_cache, sizeof *fcb);
          atomic_dec(&fcb_cache_count);
  }
  
  static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
-                                     void *handle, fsfilt_cb_t cb_func)
+                                     void *handle, fsfilt_cb_t cb_func,
+                                     void *cb_data)
  {
          struct fsfilt_cb_data *fcb;
  
@@ -430,10 +460,10 @@ static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
          fcb->cb_func = cb_func;
          fcb->cb_obd = obd;
          fcb->cb_last_rcvd = last_rcvd;
+        fcb->cb_data = cb_data;
  
          CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
          lock_kernel();
-        /* Note that an "incompatible pointer" warning here is OK for now */
          journal_callback_set(handle, fsfilt_ext3_cb_func,
                               (struct journal_callback *)fcb);
          unlock_kernel();
@@ -443,10 +473,11 @@ static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
  
  static int fsfilt_ext3_journal_data(struct file *filp)
  {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        /* bug 1576: enable data journaling on 2.5 when appropriate */
          struct inode *inode = filp->f_dentry->d_inode;
-
          EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
-
+#endif
          return 0;
  }
  
@@ -459,7 +490,7 @@ static int fsfilt_ext3_journal_data(struct file *filp)
   */
  static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs)
  {
-        struct statfs sfs;
+        struct kstatfs sfs;
          int rc = vfs_statfs(sb, &sfs);
  
          if (!rc && sfs.f_bfree < sfs.f_ffree) {
@@ -484,6 +515,110 @@ static int fsfilt_ext3_prep_san_write(struct inode *inode, long *blocks,
          return ext3_prep_san_write(inode, blocks, nblocks, newsize);
  }
  
+static int fsfilt_ext3_read_record(struct file * file, char *buf,
+                                   int size, loff_t *offs)
+{
+        struct buffer_head *bh;
+        unsigned long block, boffs;
+        struct inode *inode = file->f_dentry->d_inode;
+        int err;
+
+        if (inode->i_size < *offs + size) {
+                CERROR("file size %llu is too short for read %u@%llu\n",
+                       inode->i_size, size, *offs);
+                return -EIO;
+        }
+
+        block = *offs >> inode->i_blkbits;
+        bh = ext3_bread(NULL, inode, block, 0, &err);
+        if (!bh) {
+                CERROR("can't read block: %d\n", err);
+                return err;
+        }
+
+        boffs = (unsigned)*offs % bh->b_size;
+        if (boffs + size > bh->b_size) {
+                CERROR("request crosses block's border. offset %llu, size %u\n",
+                       *offs, size);
+                brelse(bh);
+                return -EIO;
+        }
+
+        memcpy(buf, bh->b_data + boffs, size);
+        brelse(bh);
+        *offs += size;
+        return size;
+}
+
+static int fsfilt_ext3_write_record(struct file * file, char *buf,
+                                    int size, loff_t *offs)
+{
+        struct buffer_head *bh;
+        unsigned long block, boffs;
+        struct inode *inode = file->f_dentry->d_inode;
+        loff_t old_size = inode->i_size;
+        journal_t *journal;
+        handle_t *handle;
+        int err;
+
+        journal = EXT3_SB(inode->i_sb)->s_journal;
+        handle = journal_start(journal, EXT3_DATA_TRANS_BLOCKS + 2);
+        if (handle == NULL) {
+                CERROR("can't start transaction\n");
+                return -EIO;
+        }
+
+        block = *offs >> inode->i_blkbits;
+        if (*offs + size > inode->i_size) {
+                down(&inode->i_sem);
+                if (*offs + size > inode->i_size)
+                        inode->i_size = ((loff_t)block + 1) << inode->i_blkbits;
+                up(&inode->i_sem);
+        }
+
+        bh = ext3_bread(handle, inode, block, 1, &err);
+        if (!bh) {
+                CERROR("can't read/create block: %d\n", err);
+                goto out;
+        }
+
+        /* This is a hack only needed because ext3_get_block_handle() updates
+         * i_disksize after marking the inode dirty in ext3_splice_branch().
+         * We will fix that when we get a chance, as ext3_mark_inode_dirty()
+         * is not without cost, nor is it even exported.
+         */
+        if (inode->i_size > old_size)
+                mark_inode_dirty(inode);
+
+        boffs = (unsigned)*offs % bh->b_size;
+        if (boffs + size > bh->b_size) {
+                CERROR("request crosses block's border. offset %llu, size %u\n",
+                       *offs, size);
+                err = -EIO;
+                goto out;
+        }
+
+        err = ext3_journal_get_write_access(handle, bh);
+        if (err) {
+                CERROR("journal_get_write_access() returned error %d\n", err);
+                goto out;
+        }
+        memcpy(bh->b_data + boffs, buf, size);
+        err = ext3_journal_dirty_metadata(handle, bh);
+        if (err) {
+                CERROR("journal_dirty_metadata() returned error %d\n", err);
+                goto out;
+        }
+        err = size;
+out:
+        if (bh)
+                brelse(bh);
+        journal_stop(handle);
+        if (err > 0)
+                *offs += size;
+        return err;
+}
+
  static struct fsfilt_operations fsfilt_ext3_ops = {
          fs_type:                "ext3",
          fs_owner:               THIS_MODULE,
@@ -499,6 +634,8 @@ static struct fsfilt_operations fsfilt_ext3_ops = {
          fs_statfs:              fsfilt_ext3_statfs,
          fs_sync:                fsfilt_ext3_sync,
          fs_prep_san_write:      fsfilt_ext3_prep_san_write,
+        fs_write_record:        fsfilt_ext3_write_record,
+        fs_read_record:         fsfilt_ext3_read_record,
  };
  
  static int __init fsfilt_ext3_init(void)
diff --git a/lustre/obdclass/fsfilt_extN.c b/lustre/obdclass/fsfilt_extN.c

index 1fba0f4..80f7e50 100644 (file)
--- a/lustre/obdclass/fsfilt_extN.c
+++ b/lustre/obdclass/fsfilt_extN.c
@@ -43,10 +43,11 @@ static kmem_cache_t *fcb_cache;
  static atomic_t fcb_cache_count = ATOMIC_INIT(0);
  
  struct fsfilt_cb_data {
-        struct journal_callback cb_jcb; /* data private to jbd */
+        struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
          fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
          struct obd_device *cb_obd;      /* MDS/OBD completion device */
          __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
+        void *cb_data;                  /* MDS/OST completion function data */
  };
  
  #define EXTN_XATTR_INDEX_LUSTRE         5
@@ -58,13 +59,24 @@ struct fsfilt_cb_data {
   * the inode (which we will be changing anyways as part of this
   * transaction).
   */
-static void *fsfilt_extN_start(struct inode *inode, int op)
+static void *fsfilt_extN_start(struct inode *inode, int op, void *desc_private)
  {
          /* For updates to the last recieved file */
          int nblocks = EXTN_DATA_TRANS_BLOCKS;
          void *handle;
  
          switch(op) {
+        case FSFILT_OP_CREATE_LOG:
+                nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS;
+                op = FSFILT_OP_CREATE;
+                break;
+        case FSFILT_OP_UNLINK_LOG:
+                nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS;
+                op = FSFILT_OP_UNLINK;
+                break;
+        }
+
+        switch(op) {
          case FSFILT_OP_RMDIR:
          case FSFILT_OP_UNLINK:
                  nblocks += EXTN_DELETE_TRANS_BLOCKS;
@@ -95,7 +107,7 @@ static void *fsfilt_extN_start(struct inode *inode, int op)
                   LBUG();
          }
  
-        LASSERT(!current->journal_info);
+        LASSERT(current->journal_info == desc_private);
          lock_kernel();
          handle = journal_start(EXTN_JOURNAL(inode), nblocks);
          unlock_kernel();
@@ -124,7 +136,7 @@ static void *fsfilt_extN_start(struct inode *inode, int op)
   * objcount inode blocks
   * 1 superblock
   * 2 * EXTN_SINGLEDATA_TRANS_BLOCKS for the quota files
- * 
+ *
   * 1 EXTN_DATA_TRANS_BLOCKS for the last_rcvd update.
   */
  static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso)
@@ -155,7 +167,7 @@ static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso)
                  ngdblocks = EXTN_SB(sb)->s_gdb_count;
  
          needed += nbitmaps + ngdblocks;
-        
+
          /* last_rcvd update */
          needed += EXTN_DATA_TRANS_BLOCKS;
  
@@ -185,14 +197,14 @@ static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso)
   * the pages have been written.
   */
  static void *fsfilt_extN_brw_start(int objcount, struct fsfilt_objinfo *fso,
-                                   int niocount, struct niobuf_remote *nb)
+                                   int niocount, void *desc_private)
  {
          journal_t *journal;
          handle_t *handle;
          int needed;
          ENTRY;
  
-        LASSERT(!current->journal_info);
+        LASSERT(current->journal_info == desc_private);
          journal = EXTN_SB(fso->fso_dentry->d_inode->i_sb)->s_journal;
          needed = fsfilt_extN_credits_needed(objcount, fso);
  
@@ -218,6 +230,8 @@ static void *fsfilt_extN_brw_start(int objcount, struct fsfilt_objinfo *fso,
          if (IS_ERR(handle))
                  CERROR("can't get handle for %d credits: rc = %ld\n", needed,
                         PTR_ERR(handle));
+        else
+                LASSERT(handle->h_buffer_credits >= needed);
  
          RETURN(handle);
  }
@@ -249,24 +263,26 @@ static int fsfilt_extN_setattr(struct dentry *dentry, void *handle,
           * in the block pointers; this is really the "small" stripe MD data.
           * We can avoid further hackery by virtue of the MDS file size being
           * zero all the time (which doesn't invoke block truncate at unlink
-         * time), so we assert we never change the MDS file size from zero.
-         */
+         * time), so we assert we never change the MDS file size from zero. */
          if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
                  /* ATTR_SIZE would invoke truncate: clear it */
                  iattr->ia_valid &= ~ATTR_SIZE;
-                inode->i_size = iattr->ia_size;
+                EXTN_I(inode)->i_disksize = inode->i_size = iattr->ia_size;
  
                  /* make sure _something_ gets set - so new inode
-                 * goes to disk (probably won't work over XFS
-                 */
-                if (!iattr->ia_valid & ATTR_MODE) {
+                 * goes to disk (probably won't work over XFS */
+                if (!(iattr->ia_valid & (ATTR_MODE | ATTR_MTIME | ATTR_CTIME))){
                          iattr->ia_valid |= ATTR_MODE;
                          iattr->ia_mode = inode->i_mode;
                  }
          }
-        if (inode->i_op->setattr)
+
+        /* Don't allow setattr to change file type */
+        iattr->ia_mode = (inode->i_mode & S_IFMT)|(iattr->ia_mode & ~S_IFMT);
+
+        if (inode->i_op->setattr) {
                  rc = inode->i_op->setattr(dentry, iattr);
-        else{
+        } else {
                  rc = inode_change_ok(inode, iattr);
                  if (!rc)
                          rc = inode_setattr(inode, iattr);
@@ -286,8 +302,8 @@ static int fsfilt_extN_set_md(struct inode *inode, void *handle,
           * it will fit, because putting it in an EA currently kills the MDS
           * performance.  We'll fix this with "fast EAs" in the future.
           */
-        if (lmm_size <= sizeof(EXTN_I(inode)->i_data) -
-                        sizeof(EXTN_I(inode)->i_data[0])) {
+        if (inode->i_blocks == 0 && lmm_size <= sizeof(EXTN_I(inode)->i_data) -
+                                            sizeof(EXTN_I(inode)->i_data[0])) {
                  /* XXX old_size is debugging only */
                  int old_size = EXTN_I(inode)->i_data[0];
                  if (old_size != 0) {
@@ -319,7 +335,7 @@ static int fsfilt_extN_get_md(struct inode *inode, void *lmm, int lmm_size)
  {
          int rc;
  
-        if (EXTN_I(inode)->i_data[0]) {
+        if (inode->i_blocks == 0 && EXTN_I(inode)->i_data[0]) {
                  int size = le32_to_cpu(EXTN_I(inode)->i_data[0]);
                  LASSERT(size < sizeof(EXTN_I(inode)->i_data));
                  if (lmm) {
@@ -411,14 +427,15 @@ static void fsfilt_extN_cb_func(struct journal_callback *jcb, int error)
  {
          struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
  
-        fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error);
+        fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error);
  
          OBD_SLAB_FREE(fcb, fcb_cache, sizeof *fcb);
          atomic_dec(&fcb_cache_count);
  }
  
  static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
-                                     void *handle, fsfilt_cb_t cb_func)
+                                     void *handle, fsfilt_cb_t cb_func,
+                                     void *cb_data)
  {
          struct fsfilt_cb_data *fcb;
  
@@ -430,10 +447,10 @@ static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
          fcb->cb_func = cb_func;
          fcb->cb_obd = obd;
          fcb->cb_last_rcvd = last_rcvd;
+        fcb->cb_data = cb_data;
  
          CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
          lock_kernel();
-        /* Note that an "incompatible pointer" warning here is OK for now */
          journal_callback_set(handle, fsfilt_extN_cb_func,
                               (struct journal_callback *)fcb);
          unlock_kernel();
@@ -459,7 +476,7 @@ static int fsfilt_extN_journal_data(struct file *filp)
   */
  static int fsfilt_extN_statfs(struct super_block *sb, struct obd_statfs *osfs)
  {
-        struct statfs sfs;
+        struct kstatfs sfs;
          int rc = vfs_statfs(sb, &sfs);
  
          if (!rc && sfs.f_bfree < sfs.f_ffree) {
@@ -484,6 +501,110 @@ static int fsfilt_extN_prep_san_write(struct inode *inode, long *blocks,
          return extN_prep_san_write(inode, blocks, nblocks, newsize);
  }
  
+static int fsfilt_extN_read_record(struct file * file, char *buf,
+                                   int size, loff_t *offs)
+{
+        struct buffer_head *bh;
+        unsigned long block, boffs;
+        struct inode *inode = file->f_dentry->d_inode;
+        int err;
+
+        if (inode->i_size < *offs + size) {
+                CERROR("file size %llu is too short for read %u@%llu\n",
+                       inode->i_size, size, *offs);
+                return -EIO;
+        }
+
+        block = *offs >> inode->i_blkbits;
+        bh = extN_bread(NULL, inode, block, 0, &err);
+        if (!bh) {
+                CERROR("can't read block: %d\n", err);
+                return err;
+        }
+
+        boffs = (unsigned)*offs % bh->b_size;
+        if (boffs + size > bh->b_size) {
+                CERROR("request crosses block's border. offset %llu, size %u\n",
+                       *offs, size);
+                brelse(bh);
+                return -EIO;
+        }
+
+        memcpy(buf, bh->b_data + boffs, size);
+        brelse(bh);
+        *offs += size;
+        return size;
+}
+
+static int fsfilt_extN_write_record(struct file * file, char *buf,
+                                    int size, loff_t *offs)
+{
+        struct buffer_head *bh;
+        unsigned long block, boffs;
+        struct inode *inode = file->f_dentry->d_inode;
+        loff_t old_size = inode->i_size;
+        journal_t *journal;
+        handle_t *handle;
+        int err;
+
+        journal = EXTN_SB(inode->i_sb)->s_journal;
+        handle = journal_start(journal, EXTN_DATA_TRANS_BLOCKS + 2);
+        if (handle == NULL) {
+                CERROR("can't start transaction\n");
+                return -EIO;
+        }
+
+        block = *offs >> inode->i_blkbits;
+        if (*offs + size > inode->i_size) {
+                down(&inode->i_sem);
+                if (*offs + size > inode->i_size)
+                        inode->i_size = ((loff_t)block + 1) << inode->i_blkbits;
+                up(&inode->i_sem);
+        }
+
+        bh = extN_bread(handle, inode, block, 1, &err);
+        if (!bh) {
+                CERROR("can't read/create block: %d\n", err);
+                goto out;
+        }
+
+        /* This is a hack only needed because extN_get_block_handle() updates
+         * i_disksize after marking the inode dirty in extN_splice_branch().
+         * We will fix that when we get a chance, as extN_mark_inode_dirty()
+         * is not without cost, nor is it even exported.
+         */
+        if (inode->i_size > old_size)
+                mark_inode_dirty(inode);
+
+        boffs = (unsigned)*offs % bh->b_size;
+        if (boffs + size > bh->b_size) {
+                CERROR("request crosses block's border. offset %llu, size %u\n",
+                       *offs, size);
+                err = -EIO;
+                goto out;
+        }
+
+        err = extN_journal_get_write_access(handle, bh);
+        if (err) {
+                CERROR("journal_get_write_access() returned error %d\n", err);
+                goto out;
+        }
+        memcpy(bh->b_data + boffs, buf, size);
+        err = extN_journal_dirty_metadata(handle, bh);
+        if (err) {
+                CERROR("journal_dirty_metadata() returned error %d\n", err);
+                goto out;
+        }
+        err = size;
+out:
+        if (bh)
+                brelse(bh);
+        journal_stop(handle);
+        if (err > 0)
+                *offs += size;
+        return err;
+}
+
  static struct fsfilt_operations fsfilt_extN_ops = {
          fs_type:                "extN",
          fs_owner:               THIS_MODULE,
@@ -499,6 +620,8 @@ static struct fsfilt_operations fsfilt_extN_ops = {
          fs_statfs:              fsfilt_extN_statfs,
          fs_sync:                fsfilt_extN_sync,
          fs_prep_san_write:      fsfilt_extN_prep_san_write,
+        fs_write_record:        fsfilt_extN_write_record,
+        fs_read_record:         fsfilt_extN_read_record,
  };
  
  static int __init fsfilt_extN_init(void)
diff --git a/lustre/obdclass/fsfilt_reiserfs.c b/lustre/obdclass/fsfilt_reiserfs.c

index ccefb92..3d118fc 100644 (file)
--- a/lustre/obdclass/fsfilt_reiserfs.c
+++ b/lustre/obdclass/fsfilt_reiserfs.c
@@ -48,18 +48,19 @@
  #include <linux/obd_class.h>
  #include <linux/module.h>
  
-static void *fsfilt_reiserfs_start(struct inode *inode, int op)
+static void *fsfilt_reiserfs_start(struct inode *inode, int op,
+                                   void *desc_private)
  {
          return (void *)0xf00f00be;
  }
  
  static void *fsfilt_reiserfs_brw_start(int objcount, struct fsfilt_objinfo *fso,
-                                   int niocount, struct niobuf_remote *nb)
+                                       int niocount, void *desc_private)
  {
          return (void *)0xf00f00be;
  }
  
-static int fsfilt_reiserfs_commit(struct inode *inode, void *handle, 
+static int fsfilt_reiserfs_commit(struct inode *inode, void *handle,
                                    int force_sync)
  {
          if (handle != (void *)0xf00f00be) {
@@ -131,8 +132,9 @@ static ssize_t fsfilt_reiserfs_readpage(struct file *file, char *buf, size_t cou
          return file->f_op->read(file, buf, count, offset);
  }
  
-static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
-                                         void *handle, fsfilt_cb_t cb_func)
+static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd,
+                                         __u64 last_rcvd, void *handle,
+                                         fsfilt_cb_t cb_func, void *cb_data)
  {
          static long next = 0;
  
@@ -141,7 +143,7 @@ static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd
                  next = jiffies + 300 * HZ;
          }
  
-        cb_func(obd, last_rcvd, 0);
+        cb_func(obd, last_rcvd, cb_data, 0);
  
          return 0;
  }
diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c

index 4862cf3..bb48e5d 100644 (file)
--- a/lustre/obdclass/lprocfs_status.c
+++ b/lustre/obdclass/lprocfs_status.c
@@ -22,37 +22,38 @@
  
  #define EXPORT_SYMTAB
  #define DEBUG_SUBSYSTEM S_CLASS
-#ifdef __KERNEL__
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/version.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
-#include <linux/seq_file.h>
  
-#else
-#include <liblustre.h>
+#ifdef __KERNEL__
+# include <linux/config.h>
+# include <linux/module.h>
+# include <linux/version.h>
+# include <linux/slab.h>
+# include <linux/types.h>
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#  include <asm/statfs.h>
+# endif
+# include <linux/seq_file.h>
+#else /* __KERNEL__ */
+# include <liblustre.h>
  #endif
  
  #include <linux/obd_class.h>
  #include <linux/lprocfs_status.h>
+#include <linux/lustre_fsfilt.h>
  
  #ifdef LPROCFS
  
  struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
                                      const char *name)
  {
-        struct proc_dir_entry* temp;
+        struct proc_dir_entry *temp;
  
-        if (!head)
+        if (head == NULL)
                  return NULL;
  
          temp = head->subdir;
          while (temp != NULL) {
-                if (!strcmp(temp->name, name))
+                if (strcmp(temp->name, name) == 0)
                          return temp;
  
                  temp = temp->next;
@@ -65,26 +66,30 @@ struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
  int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
                       void *data)
  {
-        if ((root == NULL) || (list == NULL))
+        if (root == NULL || list == NULL)
                  return -EINVAL;
  
-        while (list->name) {
+        while (list->name != NULL) {
                  struct proc_dir_entry *cur_root, *proc;
-                char *pathcopy, *cur, *next;
-                int pathsize = strlen(list->name)+1;
+                char *pathcopy, *cur, *next, pathbuf[64];
+                int pathsize = strlen(list->name) + 1;
  
                  proc = NULL;
                  cur_root = root;
  
                  /* need copy of path for strsep */
-                OBD_ALLOC(pathcopy, pathsize);
-                if (!pathcopy)
-                        return -ENOMEM;
+                if (strlen(list->name) > sizeof(pathbuf) - 1) {
+                        OBD_ALLOC(pathcopy, pathsize);
+                        if (pathcopy == NULL)
+                                return -ENOMEM;
+                } else {
+                        pathcopy = pathbuf;
+                }
  
                  next = pathcopy;
                  strcpy(pathcopy, list->name);
  
-                while (cur_root && (cur = strsep(&next, "/"))) {
+                while (cur_root != NULL && (cur = strsep(&next, "/"))) {
                          if (*cur =='\0') /* skip double/trailing "/" */
                                  continue;
  
@@ -92,10 +97,10 @@ int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
                          CDEBUG(D_OTHER, "cur_root=%s, cur=%s, next=%s, (%s)\n",
                                 cur_root->name, cur, next,
                                 (proc ? "exists" : "new"));
-                        if (next)
+                        if (next != NULL) {
                                  cur_root = (proc ? proc :
-                                                   proc_mkdir(cur, cur_root));
-                        else if (!proc) {
+                                            proc_mkdir(cur, cur_root));
+                        } else if (proc == NULL) {
                                  mode_t mode = 0444;
                                  if (list->write_fptr)
                                          mode = 0644;
@@ -103,9 +108,10 @@ int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
                          }
                  }
  
+                if (pathcopy != pathbuf)
                  OBD_FREE(pathcopy, pathsize);
  
-                if ((cur_root == NULL) || (proc == NULL)) {
+                if (cur_root == NULL || proc == NULL) {
                          CERROR("LprocFS: No memory to create /proc entry %s",
                                 list->name);
                          return -ENOMEM;
@@ -119,7 +125,7 @@ int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
          return 0;
  }
  
-void lprocfs_remove(struct proc_dir_entry* root)
+void lprocfs_remove(struct proc_dir_entry *root)
  {
          struct proc_dir_entry *temp = root;
          struct proc_dir_entry *rm_entry;
@@ -130,7 +136,7 @@ void lprocfs_remove(struct proc_dir_entry* root)
          LASSERT(parent != NULL);
  
          while (1) {
-                while (temp->subdir)
+                while (temp->subdir != NULL)
                          temp = temp->subdir;
  
                  rm_entry = temp;
@@ -148,14 +154,14 @@ struct proc_dir_entry *lprocfs_register(const char *name,
          struct proc_dir_entry *newchild;
  
          newchild = lprocfs_srch(parent, name);
-        if (newchild) {
+        if (newchild != NULL) {
                  CERROR(" Lproc: Attempting to register %s more than once \n",
                         name);
                  return ERR_PTR(-EALREADY);
          }
  
          newchild = proc_mkdir(name, parent);
-        if (newchild && list) {
+        if (newchild != NULL && list != NULL) {
                  int rc = lprocfs_add_vars(newchild, list, data);
                  if (rc) {
                          lprocfs_remove(newchild);
@@ -175,10 +181,10 @@ int lprocfs_rd_u64(char *page, char **start, off_t off,
          return snprintf(page, count, LPU64"\n", *(__u64 *)data);
  }
  
-int lprocfs_rd_uuid(char* page, char **start, off_t off, int count,
+int lprocfs_rd_uuid(char *page, char **start, off_t off, int count,
                      int *eof, void *data)
  {
-        struct obd_device* dev = (struct obd_device*)data;
+        struct obd_device *dev = (struct obd_device*)data;
  
          LASSERT(dev != NULL);
          *eof = 1;
@@ -186,9 +192,9 @@ int lprocfs_rd_uuid(char* page, char **start, off_t off, int count,
  }
  
  int lprocfs_rd_name(char *page, char **start, off_t off, int count,
-                    int *eof, void *data)
+                    int *eof, void* data)
  {
-        struct obd_device* dev = (struct obd_device *)data;
+        struct obd_device *dev = (struct obd_device *)data;
  
          LASSERT(dev != NULL);
          LASSERT(dev->obd_name != NULL);
@@ -196,72 +202,98 @@ int lprocfs_rd_name(char *page, char **start, off_t off, int count,
          return snprintf(page, count, "%s\n", dev->obd_name);
  }
  
-int lprocfs_rd_blksize(char* page, char **start, off_t off, int count,
-                       int *eof, struct statfs *sfs)
+int lprocfs_rd_fstype(char *page, char **start, off_t off, int count, int *eof,
+                      void *data)
  {
-        LASSERT(sfs != NULL);
-        *eof = 1;
-        return snprintf(page, count, "%lu\n", sfs->f_bsize);
+        struct obd_device *obd = (struct obd_device *)data;
+
+        LASSERT(obd != NULL);
+        LASSERT(obd->obd_fsops != NULL);
+        LASSERT(obd->obd_fsops->fs_type != NULL);
+        return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
  }
  
-int lprocfs_rd_kbytestotal(char* page, char **start, off_t off, int count,
-                           int *eof, struct statfs *sfs)
+int lprocfs_rd_blksize(char *page, char **start, off_t off, int count,
+                       int *eof, void *data)
  {
-        __u32 blk_size;
-        __u64 result;
+        struct obd_statfs osfs;
+        int rc = obd_statfs(data, &osfs, jiffies - HZ);
+        if (!rc) {
+                *eof = 1;
+                rc = snprintf(page, count, "%u\n", osfs.os_bsize);
+        }
+        return rc;
+}
  
-        LASSERT(sfs != NULL);
-        blk_size = sfs->f_bsize >> 10;
-        result = sfs->f_blocks;
+int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, int count,
+                           int *eof, void *data)
+{
+        struct obd_statfs osfs;
+        int rc = obd_statfs(data, &osfs, jiffies - HZ);
+        if (!rc) {
+                __u32 blk_size = osfs.os_bsize >> 10;
+                __u64 result = osfs.os_blocks;
  
-        while (blk_size >>= 1)
-                result <<= 1;
+                while (blk_size >>= 1)
+                        result <<= 1;
  
-        *eof = 1;
-        return snprintf(page, count, LPU64"\n", result);
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", result);
+        }
+        return rc;
  }
  
-int lprocfs_rd_kbytesfree(char* page, char **start, off_t off, int count,
-                          int *eof, struct statfs *sfs)
+int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, int count,
+                          int *eof, void *data)
  {
-        __u32 blk_size;
-        __u64 result;
+        struct obd_statfs osfs;
+        int rc = obd_statfs(data, &osfs, jiffies - HZ);
+        if (!rc) {
+                __u32 blk_size = osfs.os_bsize >> 10;
+                __u64 result = osfs.os_bfree;
  
-        LASSERT(sfs != NULL);
-        blk_size = sfs->f_bsize >> 10;
-        result = sfs->f_bfree;
+                while (blk_size >>= 1)
+                        result <<= 1;
  
-        while (blk_size >>= 1)
-                result <<= 1;
-
-        *eof = 1;
-        return snprintf(page, count, LPU64"\n", result);
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", result);
+        }
+        return rc;
  }
  
-int lprocfs_rd_filestotal(char* page, char **start, off_t off, int count,
-                          int *eof, struct statfs *sfs)
+int lprocfs_rd_filestotal(char *page, char **start, off_t off, int count,
+                          int *eof, void *data)
  {
-        LASSERT(sfs != NULL);
-        *eof = 1;
-        return snprintf(page, count, "%ld\n", sfs->f_files);
+        struct obd_statfs osfs;
+        int rc = obd_statfs(data, &osfs, jiffies - HZ);
+        if (!rc) {
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", osfs.os_files);
+        }
+
+        return rc;
  }
  
-int lprocfs_rd_filesfree(char* page, char **start, off_t off, int count,
-                         int *eof, struct statfs *sfs)
+int lprocfs_rd_filesfree(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
  {
-        LASSERT(sfs != NULL);
-        *eof = 1;
-        return snprintf(page, count, "%ld\n", sfs->f_ffree);
+        struct obd_statfs osfs;
+        int rc = obd_statfs(data, &osfs, jiffies - HZ);
+        if (!rc) {
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", osfs.os_ffree);
+        }
+        return rc;
  }
  
-int lprocfs_rd_filegroups(char* page, char **start, off_t off, int count,
-                          int *eof, struct statfs *sfs)
+int lprocfs_rd_filegroups(char *page, char **start, off_t off, int count,
+                          int *eof, void *data)
  {
          *eof = 1;
          return snprintf(page, count, "unimplemented\n");
  }
  
-int lprocfs_rd_server_uuid(char* page, char **start, off_t off, int count,
+int lprocfs_rd_server_uuid(char *page, char **start, off_t off, int count,
                             int *eof, void *data)
  {
          struct obd_device *obd = (struct obd_device *)data;
@@ -290,7 +322,7 @@ int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count,
  int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count,
                         int *eof, void *data)
  {
-        struct obd_type* class = (struct obd_type*) data;
+        struct obd_type *class = (struct obd_type*) data;
  
          LASSERT(class != NULL);
          *eof = 1;
@@ -334,21 +366,21 @@ struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num)
          if (num == 0)
                  return NULL;
  
-        OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus]));
+        OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[num_online_cpus()]));
          if (stats == NULL)
                  return NULL;
  
          percpusize = L1_CACHE_ALIGN(offsetof(typeof(*percpu), lp_cntr[num]));
-        stats->ls_percpu_size = smp_num_cpus * percpusize;
+        stats->ls_percpu_size = num_online_cpus() * percpusize;
          OBD_ALLOC(stats->ls_percpu[0], stats->ls_percpu_size);
          if (stats->ls_percpu[0] == NULL) {
                  OBD_FREE(stats, offsetof(typeof(*stats),
-                                         ls_percpu[smp_num_cpus]));
+                                         ls_percpu[num_online_cpus()]));
                  return NULL;
          }
  
          stats->ls_num = num;
-        for (i = 1; i < smp_num_cpus; i++)
+        for (i = 1; i < num_online_cpus(); i++)
                  stats->ls_percpu[i] = (void *)(stats->ls_percpu[i - 1]) +
                          percpusize;
  
@@ -361,7 +393,7 @@ void lprocfs_free_stats(struct lprocfs_stats *stats)
                  return;
  
          OBD_FREE(stats->ls_percpu[0], stats->ls_percpu_size);
-        OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus]));
+        OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_online_cpus()]));
  }
  
  /* Reset counter under lock */
@@ -410,17 +442,18 @@ static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
         }
         idx = cntr - &(stats->ls_percpu[0])->lp_cntr[0];
  
-       for (i = 0; i < smp_num_cpus; i++) {
+       for (i = 0; i < num_online_cpus(); i++) {
                 struct lprocfs_counter *percpu_cntr =
                         &(stats->ls_percpu[i])->lp_cntr[idx];
                 int centry;
+
                 do {
-                        centry = atomic_read(&percpu_cntr->lc_cntl.la_entry);
-                        t.lc_count = percpu_cntr->lc_count;
-                        t.lc_sum = percpu_cntr->lc_sum;
-                        t.lc_min = percpu_cntr->lc_min;
-                        t.lc_max = percpu_cntr->lc_max;
-                        t.lc_sumsquare = percpu_cntr->lc_sumsquare;
+                       centry = atomic_read(&percpu_cntr->lc_cntl.la_entry);
+                       t.lc_count = percpu_cntr->lc_count;
+                       t.lc_sum = percpu_cntr->lc_sum;
+                       t.lc_min = percpu_cntr->lc_min;
+                       t.lc_max = percpu_cntr->lc_max;
+                       t.lc_sumsquare = percpu_cntr->lc_sumsquare;
                 } while (centry != atomic_read(&percpu_cntr->lc_cntl.la_entry) &&
                          centry != atomic_read(&percpu_cntr->lc_cntl.la_exit));
                 ret.lc_count += t.lc_count;
@@ -453,10 +486,10 @@ static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
  }
  
  struct seq_operations lprocfs_stats_seq_sops = {
-        .start = lprocfs_stats_seq_start,
-        .stop = lprocfs_stats_seq_stop,
-        .next = lprocfs_stats_seq_next,
-        .show = lprocfs_stats_seq_show,
+        start: lprocfs_stats_seq_start,
+        stop:  lprocfs_stats_seq_stop,
+        next:  lprocfs_stats_seq_next,
+        show:  lprocfs_stats_seq_show,
  };
  
  static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
@@ -474,13 +507,13 @@ static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
  }
  
  struct file_operations lprocfs_stats_seq_fops = {
-        .open    = lprocfs_stats_seq_open,
-        .read    = seq_read,
-        .llseek  = seq_lseek,
-        .release = seq_release,
+        open:    lprocfs_stats_seq_open,
+        read:    seq_read,
+        llseek:  seq_lseek,
+        release: seq_release,
  };
  
-int lprocfs_register_stats(struct proc_dir_entry *root, const char* name,
+int lprocfs_register_stats(struct proc_dir_entry *root, const char *name,
                             struct lprocfs_stats *stats)
  {
          struct proc_dir_entry *entry;
@@ -502,7 +535,7 @@ void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
          int i;
  
          LASSERT(stats != NULL);
-        for (i = 0; i < smp_num_cpus; i++) {
+        for (i = 0; i < num_online_cpus(); i++) {
                  c = &(stats->ls_percpu[i]->lp_cntr[index]);
                  c->lc_config = conf;
                  c->lc_min = ~(__u64)0;
@@ -515,7 +548,7 @@ EXPORT_SYMBOL(lprocfs_counter_init);
  #define LPROCFS_OBD_OP_INIT(base, stats, op)                               \
  do {                                                                       \
          unsigned int coffset = base + OBD_COUNTER_OFFSET(op);              \
-        LASSERT(coffset < stats->ls_num);                                     \
+        LASSERT(coffset < stats->ls_num);                                  \
          lprocfs_counter_init(stats, coffset, 0, #op, "reqs");              \
  } while (0)
  
@@ -529,10 +562,10 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
          LASSERT(obd->obd_proc_entry != NULL);
          LASSERT(obd->obd_cntr_base == 0);
  
-        num_stats = 1 + OBD_COUNTER_OFFSET(destroy_export) +
+        num_stats = 1 + OBD_COUNTER_OFFSET(unpin) +
                  num_private_stats;
          stats = lprocfs_alloc_stats(num_stats);
-        if (!stats)
+        if (stats == NULL)
                  return -ENOMEM;
  
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, iocontrol);
@@ -569,16 +602,28 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, match);
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel);
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, log_add);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, log_cancel);
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, san_preprw);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, mark_page_dirty);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, clear_dirty_pages);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, last_dirty_offset);
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, pin); 
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpin);
  
          for (i = num_private_stats; i < num_stats; i++) {
-                /* If this assertion failed, it is likely that an obd
+                /* If this LBUGs, it is likely that an obd
                   * operation was added to struct obd_ops in
                   * <linux/obd.h>, and that the corresponding line item
                   * LPROCFS_OBD_OP_INIT(.., .., opname)
                   * is missing from the list above. */
-                LASSERT(&(stats->ls_percpu[0])->lp_cntr[i].lc_name != NULL);
+                if (stats->ls_percpu[0]->lp_cntr[i].lc_name == NULL) {
+                        CERROR("Missing obd_stat initializer obd_op "
+                               "operation at offset %d. Aborting.\n",
+                               i - num_private_stats);
+                        LBUG();
+                }
          }
          rc = lprocfs_register_stats(obd->obd_proc_entry, "stats", stats);
          if (rc < 0) {
@@ -617,6 +662,7 @@ EXPORT_SYMBOL(lprocfs_free_obd_stats);
  EXPORT_SYMBOL(lprocfs_rd_u64);
  EXPORT_SYMBOL(lprocfs_rd_uuid);
  EXPORT_SYMBOL(lprocfs_rd_name);
+EXPORT_SYMBOL(lprocfs_rd_fstype);
  EXPORT_SYMBOL(lprocfs_rd_server_uuid);
  EXPORT_SYMBOL(lprocfs_rd_conn_uuid);
  EXPORT_SYMBOL(lprocfs_rd_numrefs);
diff --git a/lustre/obdclass/lustre_handles.c b/lustre/obdclass/lustre_handles.c

index 06f86ad..bc07df9 100644 (file)
--- a/lustre/obdclass/lustre_handles.c
+++ b/lustre/obdclass/lustre_handles.c
@@ -4,32 +4,31 @@
   * Copyright (C) 2002 Cluster File Systems, Inc.
   *   Author: Phil Schwan <phil@clusterfs.com>
   *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *   This file is part of Lustre, http://www.lustre.org/
   *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2.1 of the GNU Lesser General
- *   Public License as published by the Free Software Foundation.
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
   *
- *   Portals is distributed in the hope that it will be useful,
+ *   Lustre is distributed in the hope that it will be useful,
   *   but WITHOUT ANY WARRANTY; without even the implied warranty of
   *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU Lesser General Public License for more details.
+ *   GNU General Public License for more details.
   *
- *   You should have received a copy of the GNU Lesser General Public
- *   License along with Portals; if not, write to the Free Software
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
   *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   */
  
  #define DEBUG_SUBSYSTEM S_CLASS
  #ifdef __KERNEL__
-#include <linux/types.h>
-#include <linux/random.h>
+# include <linux/types.h>
+# include <linux/random.h>
  #else 
-#include <liblustre.h>
+# include <liblustre.h>
  #endif 
  
-
-#include <linux/kp30.h>
+#include <linux/obd_support.h>
  #include <linux/lustre_handles.h>
  
  static spinlock_t handle_lock = SPIN_LOCK_UNLOCKED;
@@ -118,7 +117,7 @@ int class_handle_init(void)
  
          LASSERT(handle_hash == NULL);
  
-        PORTAL_ALLOC(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
+        OBD_VMALLOC(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
          if (handle_hash == NULL)
                  return -ENOMEM;
  
@@ -158,7 +157,7 @@ void class_handle_cleanup(void)
                  cleanup_all_handles();
          }
  
-        PORTAL_FREE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
+        OBD_VFREE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
          handle_hash = NULL;
  
          if (handle_count)
diff --git a/lustre/obdclass/lustre_peer.c b/lustre/obdclass/lustre_peer.c

index 016354c..5987d2e 100644 (file)
--- a/lustre/obdclass/lustre_peer.c
+++ b/lustre/obdclass/lustre_peer.c
@@ -64,8 +64,8 @@ void class_exit_uuidlist(void)
                  struct uuid_nid_data *data =
                          list_entry(tmp, struct uuid_nid_data, head);
  
-                PORTAL_FREE(data->uuid, strlen(data->uuid) + 1);
-                PORTAL_FREE(data, sizeof(*data));
+                OBD_FREE(data->uuid, strlen(data->uuid) + 1);
+                OBD_FREE(data, sizeof(*data));
          }
  }
  
@@ -109,11 +109,11 @@ int class_add_uuid(char *uuid, __u64 nid, __u32 nal)
          }
  
          rc = -ENOMEM;
-        PORTAL_ALLOC(data, sizeof(*data));
+        OBD_ALLOC(data, sizeof(*data));
          if (data == NULL)
                  goto fail_0;
  
-        PORTAL_ALLOC(data->uuid, nob);
+        OBD_ALLOC(data->uuid, nob);
          if (data == NULL)
                  goto fail_1;
  
@@ -131,7 +131,7 @@ int class_add_uuid(char *uuid, __u64 nid, __u32 nal)
          return 0;
  
   fail_1:
-        PORTAL_FREE (data, sizeof (*data));
+        OBD_FREE (data, sizeof (*data));
   fail_0:
          kportal_put_ni (nal);
          return (rc);
@@ -171,8 +171,8 @@ int class_del_uuid (char *uuid)
                  list_del (&data->head);
  
                  kportal_put_ni (data->nal);
-                PORTAL_FREE(data->uuid, strlen(data->uuid) + 1);
-                PORTAL_FREE(data, sizeof(*data));
+                OBD_FREE(data->uuid, strlen(data->uuid) + 1);
+                OBD_FREE(data, sizeof(*data));
          } while (!list_empty (&deathrow));
  
          return 0;
diff --git a/lustre/obdclass/simple.c b/lustre/obdclass/simple.c

index 0ce54a3..bd1363a 100644 (file)
--- a/lustre/obdclass/simple.c
+++ b/lustre/obdclass/simple.c
@@ -139,7 +139,6 @@ void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx,
                  current->fsgid = saved->ouc.ouc_fsgid;
                  current->cap_effective = saved->ouc.ouc_cap;
                  current->ngroups = saved->ngroups;
-
                  current->groups[0] = saved->ouc.ouc_suppgid1;
                  current->groups[1] = saved->ouc.ouc_suppgid2;
          }
@@ -167,7 +166,7 @@ struct dentry *simple_mknod(struct dentry *dir, char *name, int mode)
          ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
          CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name);
  
-        dchild = lookup_one_len(name, dir, strlen(name));
+        dchild = ll_lookup_one_len(name, dir, strlen(name));
          if (IS_ERR(dchild))
                  GOTO(out_up, dchild);
  
@@ -201,7 +200,7 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode)
  
          ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
          CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name);
-        dchild = lookup_one_len(name, dir, strlen(name));
+        dchild = ll_lookup_one_len(name, dir, strlen(name));
          if (IS_ERR(dchild))
                  GOTO(out_up, dchild);
  
diff --git a/lustre/obdclass/statfs_pack.c b/lustre/obdclass/statfs_pack.c

index 786a768..8bb78cc 100644 (file)
--- a/lustre/obdclass/statfs_pack.c
+++ b/lustre/obdclass/statfs_pack.c
@@ -31,7 +31,7 @@
  #else
  #include <linux/version.h>
  #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
+#include <linux/statfs.h>
  #endif
  #endif
  
@@ -40,8 +40,9 @@
  #include <linux/obd_support.h>
  #include <linux/obd_class.h>
  
-void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs)
+void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs)
  {
+        memset(osfs, 0, sizeof(*osfs));
          osfs->os_type = sfs->f_type;
          osfs->os_blocks = sfs->f_blocks;
          osfs->os_bfree = sfs->f_bfree;
@@ -52,8 +53,9 @@ void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs)
          osfs->os_namelen = sfs->f_namelen;
  }
  
-void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs)
+void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs)
  {
+        memset(sfs, 0, sizeof(*sfs));
          sfs->f_type = osfs->os_type;
          sfs->f_blocks = osfs->os_blocks;
          sfs->f_bfree = osfs->os_bfree;
@@ -64,39 +66,5 @@ void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs)
          sfs->f_namelen = osfs->os_namelen;
  }
  
-int obd_self_statfs(struct obd_device *obd, struct statfs *sfs)
-{
-        struct obd_export *export, *my_export = NULL;
-        struct obd_statfs osfs = { 0 };
-        int rc;
-        ENTRY;
-
-        LASSERT( obd != NULL );
-
-        spin_lock(&obd->obd_dev_lock);
-        if (list_empty(&obd->obd_exports)) {
-                spin_unlock(&obd->obd_dev_lock);
-                export = my_export = class_new_export(obd);
-                if (export == NULL)
-                        RETURN(-ENOMEM);
-        } else {
-                export = list_entry(obd->obd_exports.next, typeof(*export),
-                                    exp_obd_chain);
-                export = class_export_get(export);
-                spin_unlock(&obd->obd_dev_lock);
-        }
-
-        rc = obd_statfs(export, &osfs);
-        if (!rc)
-                statfs_unpack(sfs, &osfs);
-
-        if (my_export)
-                class_unlink_export(my_export);
-
-        class_export_put(export);
-        RETURN(rc);
-}
-
  EXPORT_SYMBOL(statfs_pack);
  EXPORT_SYMBOL(statfs_unpack);
-EXPORT_SYMBOL(obd_self_statfs);
diff --git a/lustre/obdecho/.cvsignore b/lustre/obdecho/.cvsignore

index e530020..49c6100 100644 (file)
--- a/lustre/obdecho/.cvsignore
+++ b/lustre/obdecho/.cvsignore
@@ -6,3 +6,4 @@ Makefile
  Makefile.in
  .deps
  TAGS
+.*.cmd
diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c

index f89df07..887889a 100644 (file)
--- a/lustre/obdecho/echo.c
+++ b/lustre/obdecho/echo.c
@@ -64,7 +64,7 @@ static int echo_connect(struct lustre_handle *conn, struct obd_device *obd,
          return class_connect(conn, obd, cluuid);
  }
  
-static int echo_disconnect(struct lustre_handle *conn, int failover)
+static int echo_disconnect(struct lustre_handle *conn, int flags)
  {
          struct obd_export *exp = class_conn2export(conn);
  
@@ -72,7 +72,7 @@ static int echo_disconnect(struct lustre_handle *conn, int failover)
  
          ldlm_cancel_locks_for_export(exp);
          class_export_put(exp);
-        return (class_disconnect(conn, failover));
+        return class_disconnect(conn, flags);
  }
  
  static __u64 echo_next_id(struct obd_device *obddev)
@@ -235,7 +235,7 @@ static int echo_setattr(struct lustre_handle *conn, struct obdo *oa,
  int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
                  int objcount, struct obd_ioobj *obj, int niocount,
                  struct niobuf_remote *nb, struct niobuf_local *res,
-                void **desc_private, struct obd_trans_info *oti)
+                struct obd_trans_info *oti)
  {
          struct obd_device *obd;
          struct niobuf_local *r = res;
@@ -253,7 +253,8 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
          CDEBUG(D_PAGE, "%s %d obdos with %d IOs\n",
                 cmd == OBD_BRW_READ ? "reading" : "writing", objcount, niocount);
  
-        *desc_private = (void *)DESC_PRIV;
+        if (oti)
+                oti->oti_handle = (void *)DESC_PRIV;
  
          for (i = 0; i < objcount; i++, obj++) {
                  int gfp_mask = (obj->ioo_id & 1) ? GFP_HIGHUSER : GFP_KERNEL;
@@ -285,7 +286,7 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
  
                          r->offset = nb->offset;
                          r->len = nb->len;
-                        LASSERT ((r->offset & (PAGE_SIZE - 1)) + r->len <= PAGE_SIZE);
+                        LASSERT((r->offset & ~PAGE_MASK) + r->len <= PAGE_SIZE);
  
                          CDEBUG(D_PAGE, "$$$$ get page %p @ "LPU64" for %d\n",
                                 r->page, r->offset, r->len);
@@ -339,9 +340,9 @@ preprw_cleanup:
          return rc;
  }
  
-int echo_commitrw(int cmd, struct obd_export *export, int objcount,
-                  struct obd_ioobj *obj, int niocount, struct niobuf_local *res,
-                  void *desc_private, struct obd_trans_info *oti)
+int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa,
+                  int objcount, struct obd_ioobj *obj, int niocount,
+                  struct niobuf_local *res, struct obd_trans_info *oti)
  {
          struct obd_device *obd;
          struct niobuf_local *r = res;
@@ -365,7 +366,7 @@ int echo_commitrw(int cmd, struct obd_export *export, int objcount,
                  RETURN(-EINVAL);
          }
  
-        LASSERT(desc_private == (void *)DESC_PRIV);
+        LASSERT(oti == NULL || oti->oti_handle == (void *)DESC_PRIV);
  
          for (i = 0; i < objcount; i++, obj++) {
                  int verify = obj->ioo_id != 0;
@@ -437,7 +438,7 @@ static int echo_setup(struct obd_device *obddev, obd_count len, void *buf)
          RETURN(0);
  }
  
-static int echo_cleanup(struct obd_device *obddev, int force, int failover)
+static int echo_cleanup(struct obd_device *obddev, int flags)
  {
          ENTRY;
  
@@ -453,7 +454,7 @@ int echo_attach(struct obd_device *obd, obd_count len, void *data)
          struct lprocfs_static_vars lvars;
          int rc;
  
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(echo, &lvars);
          rc = lprocfs_obd_attach(obd, lvars.obd_vars);
          if (rc != 0)
                  return rc;
@@ -539,7 +540,7 @@ static int __init obdecho_init(void)
  
          printk(KERN_INFO "Lustre Echo OBD driver; info@clusterfs.com\n");
  
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(echo, &lvars);
  
          rc = echo_object0_pages_init ();
          if (rc != 0)
@@ -561,7 +562,7 @@ static int __init obdecho_init(void)
          RETURN(rc);
  }
  
-static void __exit obdecho_exit(void)
+static void /*__exit*/ obdecho_exit(void)
  {
          echo_client_cleanup();
          class_unregister_type(OBD_ECHO_DEVICENAME);
diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c

index 79da7ea..c010798 100644 (file)
--- a/lustre/obdecho/echo_client.c
+++ b/lustre/obdecho/echo_client.c
@@ -484,7 +484,7 @@ echo_client_kbrw (struct obd_device *obd, int rw,
                  }
          }
  
-        rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, NULL);
+        rc = obd_brw(rw, &ec->ec_conn, oa, lsm, npages, pga, NULL);
  
   out:
          if (rc != 0)
@@ -568,7 +568,7 @@ static int echo_client_ubrw(struct obd_device *obd, int rw,
                  pgp->flag = 0;
          }
  
-        rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, NULL);
+        rc = obd_brw(rw, &ec->ec_conn, oa, lsm, npages, pga, NULL);
  
          //        if (rw == OBD_BRW_READ)
          //                mark_dirty_kiobuf (kiobuf, count);
@@ -1009,7 +1009,7 @@ static int echo_setup(struct obd_device *obddev, obd_count len, void *buf)
          RETURN(rc);
  }
  
-static int echo_cleanup(struct obd_device * obddev, int force, int failover)
+static int echo_cleanup(struct obd_device *obddev, int flags)
  {
          struct list_head       *el;
          struct ec_object       *eco;
@@ -1023,21 +1023,21 @@ static int echo_cleanup(struct obd_device * obddev, int force, int failover)
          }
  
          /* XXX assuming sole access */
-        while (!list_empty (&ec->ec_objects)) {
+        while (!list_empty(&ec->ec_objects)) {
                  el = ec->ec_objects.next;
-                eco = list_entry (el, struct ec_object, eco_obj_chain);
+                eco = list_entry(el, struct ec_object, eco_obj_chain);
  
-                LASSERT (eco->eco_refcount == 0);
+                LASSERT(eco->eco_refcount == 0);
                  eco->eco_refcount = 1;
                  eco->eco_deleted = 1;
-                echo_put_object (eco);
+                echo_put_object(eco);
          }
  
-        rc = obd_disconnect (&ec->ec_conn, 0);
+        rc = obd_disconnect(&ec->ec_conn, 0);
          if (rc != 0)
                  CERROR("fail to disconnect device: %d\n", rc);
  
-        RETURN (rc);
+        RETURN(rc);
  }
  
  static int echo_connect(struct lustre_handle *conn, struct obd_device *src,
@@ -1057,7 +1057,7 @@ static int echo_connect(struct lustre_handle *conn, struct obd_device *src,
          RETURN (rc);
  }
  
-static int echo_disconnect(struct lustre_handle *conn, int failover)
+static int echo_disconnect(struct lustre_handle *conn, int flags)
  {
          struct obd_export      *exp = class_conn2export (conn);
          struct obd_device      *obd;
@@ -1128,7 +1128,7 @@ int echo_client_init(void)
  {
          struct lprocfs_static_vars lvars;
  
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(echo, &lvars);
          return class_register_type(&echo_obd_ops, lvars.module_vars,
                                     OBD_ECHO_CLIENT_DEVICENAME);
  }
diff --git a/lustre/obdecho/lproc_echo.c b/lustre/obdecho/lproc_echo.c

index 6a16001..c25d156 100644 (file)
--- a/lustre/obdecho/lproc_echo.c
+++ b/lustre/obdecho/lproc_echo.c
@@ -25,30 +25,19 @@
  #include <linux/obd_class.h>
  
  #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
  #else
  
-int rd_fstype(char* page, char **start, off_t off, int count, int *eof,
-              void *data)
-{
-        struct obd_device* dev = (struct obd_device*)data;
-        
-        LASSERT(dev != NULL);
-        *eof = 1;
-        return snprintf(page, count, "%s\n", dev->u.echo.eo_fstype);
-}
-
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",     lprocfs_rd_uuid,    0, 0 },
-        { "fstype",   rd_fstype,          0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",         lprocfs_rd_uuid,        0, 0 },
          { 0 }
  };
  
-struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
          { 0 }
  };
  
  #endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(echo, lprocfs_module_vars, lprocfs_obd_vars)
diff --git a/lustre/obdfilter/.cvsignore b/lustre/obdfilter/.cvsignore

index e530020..49c6100 100644 (file)
--- a/lustre/obdfilter/.cvsignore
+++ b/lustre/obdfilter/.cvsignore
@@ -6,3 +6,4 @@ Makefile
  Makefile.in
  .deps
  TAGS
+.*.cmd
diff --git a/lustre/obdfilter/Makefile.am b/lustre/obdfilter/Makefile.am

index b9addf1..ed4ca1e 100644 (file)
--- a/lustre/obdfilter/Makefile.am
+++ b/lustre/obdfilter/Makefile.am
@@ -6,6 +6,7 @@
  MODULE = obdfilter
  modulefs_DATA = obdfilter.o
  EXTRA_PROGRAMS = obdfilter
-obdfilter_SOURCES = filter.c lproc_obdfilter.c
+obdfilter_SOURCES = filter.c filter_io.c filter_log.c filter_san.c \
+lproc_obdfilter.c filter_internal.h
  
  include $(top_srcdir)/Rules
diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c

index 6f2d96c..b6c1bd9 100644 (file)
--- a/lustre/obdfilter/filter.c
+++ b/lustre/obdfilter/filter.c
@@ -33,31 +33,27 @@
   *            threaded operation on the OST.
   */
  
-#define EXPORT_SYMTAB
  #define DEBUG_SUBSYSTEM S_FILTER
  
  #include <linux/config.h>
  #include <linux/module.h>
-#include <linux/pagemap.h> // XXX kill me soon
  #include <linux/fs.h>
  #include <linux/dcache.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_dlm.h>
-#include <linux/obd_filter.h>
  #include <linux/init.h>
-#include <linux/random.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lprocfs_status.h>
  #include <linux/version.h>
  #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-#include <linux/mount.h>
+# include <linux/mount.h>
+# include <linux/buffer_head.h>
  #endif
  
-enum {
-        LPROC_FILTER_READ_BYTES = 0,
-        LPROC_FILTER_WRITE_BYTES = 1,
-        LPROC_FILTER_LAST,
-};
+#include <linux/obd_class.h>
+#include <linux/lustre_dlm.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/lprocfs_status.h>
+#include <linux/lustre_log.h>
+#include <linux/lustre_commit_confd.h>
+
+#include "filter_internal.h"
  
  #define S_SHIFT 12
  static char *obd_type_by_mode[S_IFMT >> S_SHIFT] = {
@@ -132,19 +128,79 @@ static void filter_ffd_destroy(struct filter_file_data *ffd)
          filter_ffd_put(ffd);
  }
  
-static void filter_commit_cb(struct obd_device *obd, __u64 transno, int error)
+static void filter_commit_cb(struct obd_device *obd, __u64 transno,
+                             void *cb_data, int error)
  {
          obd_transno_commit_cb(obd, transno, error);
  }
-/* Assumes caller has already pushed us into the kernel context. */
-int filter_finish_transno(struct obd_export *export, void *handle,
-                          struct obd_trans_info *oti, int rc)
+
+static int filter_client_log_cancel(struct lustre_handle *conn,
+                                    struct lov_stripe_md *lsm, int count,
+                                    struct llog_cookie *cookies, int flags)
  {
-        __u64 last_rcvd;
-        struct obd_device *obd = export->exp_obd;
+        struct obd_device *obd = class_conn2obd(conn);
+        struct llog_commit_data *llcd;
          struct filter_obd *filter = &obd->u.filter;
-        struct filter_export_data *fed = &export->exp_filter_data;
+        int rc = 0;
+        ENTRY;
+
+        if (count == 0 || cookies == NULL) {
+                down(&filter->fo_sem);
+                if (filter->fo_llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW))
+                        GOTO(out, rc);
+
+                llcd = filter->fo_llcd;
+                GOTO(send_now, rc);
+        }
+
+        down(&filter->fo_sem);
+        llcd = filter->fo_llcd;
+        if (llcd == NULL) {
+                llcd = llcd_grab();
+                if (llcd == NULL) {
+                        CERROR("couldn't get an llcd - dropped "LPX64":%x+%u\n",
+                               cookies->lgc_lgl.lgl_oid,
+                               cookies->lgc_lgl.lgl_ogen, cookies->lgc_index);
+                        GOTO(out, rc = -ENOMEM);
+                }
+                llcd->llcd_import = filter->fo_mdc_imp;
+                filter->fo_llcd = llcd;
+        }
+
+        memcpy(llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies,
+               sizeof(*cookies));
+        llcd->llcd_cookiebytes += sizeof(*cookies);
+
+        GOTO(send_now, rc);
+send_now:
+        if ((PAGE_SIZE - llcd->llcd_cookiebytes < sizeof(*cookies) ||
+             flags & OBD_LLOG_FL_SENDNOW)) {
+                filter->fo_llcd = NULL;
+                llcd_send(llcd);
+        }
+out:
+        up(&filter->fo_sem);
+
+        return rc;
+}
+
+/* When this (destroy) operation is committed, return the cancel cookie */
+static void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
+                                     void *cb_data, int error)
+{
+        filter_client_log_cancel(&obd->u.filter.fo_mdc_conn, NULL, 1,
+                                 cb_data, OBD_LLOG_FL_SENDNOW);
+        OBD_FREE(cb_data, sizeof(struct llog_cookie));
+}
+
+/* Assumes caller has already pushed us into the kernel context. */
+int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti,
+                          int rc)
+{
+        struct filter_obd *filter = &exp->exp_obd->u.filter;
+        struct filter_export_data *fed = &exp->exp_filter_data;
          struct filter_client_data *fcd = fed->fed_fcd;
+        __u64 last_rcvd;
          loff_t off;
          ssize_t written;
  
@@ -152,14 +208,14 @@ int filter_finish_transno(struct obd_export *export, void *handle,
          if (rc)
                  RETURN(rc);
  
-        if (!obd->obd_replayable)
+        if (!exp->exp_obd->obd_replayable)
                  RETURN(rc);
  
          /* we don't allocate new transnos for replayed requests */
-        if (oti && oti->oti_transno == 0) {
+        if (oti != NULL && oti->oti_transno == 0) {
                  spin_lock(&filter->fo_translock);
-                last_rcvd = le64_to_cpu(filter->fo_fsd->fsd_last_rcvd) + 1;
-                filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd);
+                last_rcvd = le64_to_cpu(filter->fo_fsd->fsd_last_transno) + 1;
+                filter->fo_fsd->fsd_last_transno = cpu_to_le64(last_rcvd);
                  spin_unlock(&filter->fo_translock);
                  oti->oti_transno = last_rcvd;
                  fcd->fcd_last_rcvd = cpu_to_le64(last_rcvd);
@@ -169,27 +225,28 @@ int filter_finish_transno(struct obd_export *export, void *handle,
                  fcd->fcd_last_xid = 0;
  
                  off = fed->fed_lr_off;
-                fsfilt_set_last_rcvd(obd, last_rcvd, handle, filter_commit_cb);
-                written = lustre_fwrite(filter->fo_rcvd_filp, (char *)fcd, 
-                                        sizeof(*fcd), &off);
+                fsfilt_set_last_rcvd(exp->exp_obd, last_rcvd, oti->oti_handle,
+                                     filter_commit_cb, NULL);
+                written = fsfilt_write_record(exp->exp_obd,
+                                              filter->fo_rcvd_filp, (char *)fcd,
+                                              sizeof(*fcd), &off);
                  CDEBUG(D_HA, "wrote trans #"LPD64" for client %s at #%d: "
-                       "written = "LPSZ"\n", last_rcvd, fcd->fcd_uuid, 
+                       "written = "LPSZ"\n", last_rcvd, fcd->fcd_uuid,
                         fed->fed_lr_idx, written);
  
                  if (written == sizeof(*fcd))
                          RETURN(0);
-                CERROR("error writing to last_rcvd file: rc = %d\n", 
+                CERROR("error writing to %s: rc = %d\n", LAST_RCVD,
                         (int)written);
                  if (written >= 0)
-                        RETURN(-EIO);
-
+                        RETURN(-ENOSPC);
                  RETURN(written);
-        }                 
+        }
  
          RETURN(0);
  }
  
-static inline void f_dput(struct dentry *dentry)
+void f_dput(struct dentry *dentry)
  {
          /* Can't go inside filter_ddelete because it can block */
          CDEBUG(D_INODE, "putting %s: %p, count = %d\n",
@@ -207,26 +264,19 @@ static void filter_drelease(struct dentry *dentry)
  }
  
  struct dentry_operations filter_dops = {
-        .d_release = filter_drelease,
+        d_release: filter_drelease,
  };
  
-#define LAST_RCVD "last_rcvd"
-#define INIT_OBJID 2
-
-/* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
-#define FILTER_LR_MAX_CLIENTS (PAGE_SIZE * 8)
-#define FILTER_LR_MAX_CLIENT_WORDS (FILTER_LR_MAX_CLIENTS/sizeof(unsigned long))
-
  /* Add client data to the FILTER.  We use a bitmap to locate a free space
   * in the last_rcvd file if cl_idx is -1 (i.e. a new client).
   * Otherwise, we have just read the data from the last_rcvd file and
- * we know its offset.
- */
-int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
-                      struct filter_export_data *fed, int cl_idx)
+ * we know its offset. */
+static int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
+                             struct filter_export_data *fed, int cl_idx)
  {
          unsigned long *bitmap = filter->fo_last_rcvd_slots;
          int new_client = (cl_idx == -1);
+        ENTRY;
  
          LASSERT(bitmap != NULL);
  
@@ -242,7 +292,7 @@ int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
          repeat:
                  if (cl_idx >= FILTER_LR_MAX_CLIENTS) {
                          CERROR("no client slots - fix FILTER_LR_MAX_CLIENTS\n");
-                        return -ENOMEM;
+                        RETURN(-ENOMEM);
                  }
                  if (test_and_set_bit(cl_idx, bitmap)) {
                          CERROR("FILTER client %d: found bit is set in bitmap\n",
@@ -270,23 +320,23 @@ int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
          if (new_client) {
                  struct obd_run_ctxt saved;
                  loff_t off = fed->fed_lr_off;
-                ssize_t written;
+                int written;
                  void *handle;
  
                  CDEBUG(D_INFO, "writing client fcd at idx %u (%llu) (len %u)\n",
                         fed->fed_lr_idx,off,(unsigned int)sizeof(*fed->fed_fcd));
  
                  push_ctxt(&saved, &filter->fo_ctxt, NULL);
-                /* Transaction eeded to fix for bug 1403 */
+                /* Transaction needed to fix bug 1403 */
                  handle = fsfilt_start(obd,
                                        filter->fo_rcvd_filp->f_dentry->d_inode,
-                                      FSFILT_OP_SETATTR);
+                                      FSFILT_OP_SETATTR, NULL);
                  if (IS_ERR(handle)) {
                          written = PTR_ERR(handle);
                          CERROR("unable to start transaction: rc %d\n",
                                 (int)written);
                  } else {
-                        written = lustre_fwrite(filter->fo_rcvd_filp,
+                        written = fsfilt_write_record(obd, filter->fo_rcvd_filp,
                                                  (char *)fed->fed_fcd,
                                                  sizeof(*fed->fed_fcd), &off);
                          fsfilt_commit(obd,
@@ -296,32 +346,35 @@ int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
                  pop_ctxt(&saved, &filter->fo_ctxt, NULL);
  
                  if (written != sizeof(*fed->fed_fcd)) {
+                        CERROR("error writing %s client idx %u: rc %d\n",
+                               LAST_RCVD, fed->fed_lr_idx, written);
                          if (written < 0)
                                  RETURN(written);
-                        RETURN(-EIO);
+                        RETURN(-ENOSPC);
                  }
          }
-        return 0;
+        RETURN(0);
  }
  
-int filter_client_free(struct obd_export *exp, int failover)
+static int filter_client_free(struct obd_export *exp, int flags)
  {
          struct filter_export_data *fed = &exp->exp_filter_data;
          struct filter_obd *filter = &exp->exp_obd->u.filter;
+        struct obd_device *obd = exp->exp_obd;
          struct filter_client_data zero_fcd;
          struct obd_run_ctxt saved;
          int written;
          loff_t off;
          ENTRY;
  
-        if (!fed->fed_fcd)
+        if (fed->fed_fcd == NULL)
                  RETURN(0);
  
-        if (failover != 0)
+        if (flags & OBD_OPT_FAILOVER)
                  GOTO(free, 0);
  
          /* XXX if fcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
-        if (!strcmp(fed->fed_fcd->fcd_uuid, "OBD_CLASS_UUID"))
+        if (strcmp(fed->fed_fcd->fcd_uuid, "OBD_CLASS_UUID") == 0)
                  GOTO(free, 0);
  
          LASSERT(filter->fo_last_rcvd_slots != NULL);
@@ -339,8 +392,9 @@ int filter_client_free(struct obd_export *exp, int failover)
  
          memset(&zero_fcd, 0, sizeof zero_fcd);
          push_ctxt(&saved, &filter->fo_ctxt, NULL);
-        written = lustre_fwrite(filter->fo_rcvd_filp, (const char *)&zero_fcd,
-                                sizeof(zero_fcd), &off);
+        written = fsfilt_write_record(obd, filter->fo_rcvd_filp,
+                                      (char *)&zero_fcd, sizeof(zero_fcd),
+                                      &off);
  
          /* XXX: this write gets lost sometimes, unless this sync is here. */
          if (written > 0)
@@ -374,29 +428,30 @@ static int filter_free_server_data(struct filter_obd *filter)
          return 0;
  }
  
-
  /* assumes caller is already in kernel ctxt */
-static int filter_update_server_data(struct file *filp,
-                                     struct filter_server_data *fsd)
+int filter_update_server_data(struct obd_device *obd,
+                              struct file *filp, struct filter_server_data *fsd)
  {
          loff_t off = 0;
          int rc;
+        ENTRY;
  
          CDEBUG(D_INODE, "server uuid      : %s\n", fsd->fsd_uuid);
          CDEBUG(D_INODE, "server last_objid: "LPU64"\n",
                 le64_to_cpu(fsd->fsd_last_objid));
          CDEBUG(D_INODE, "server last_rcvd : "LPU64"\n",
-               le64_to_cpu(fsd->fsd_last_rcvd));
+               le64_to_cpu(fsd->fsd_last_transno));
          CDEBUG(D_INODE, "server last_mount: "LPU64"\n",
                 le64_to_cpu(fsd->fsd_mount_count));
  
-        rc = lustre_fwrite(filp, (char *)fsd, sizeof(*fsd), &off);
-        if (rc != sizeof(*fsd)) {
-                CDEBUG(D_INODE, "error writing filter_server_data: rc = %d\n",
-                       rc);
-                RETURN(-EIO);
-        }
-        RETURN(0);
+        rc = fsfilt_write_record(obd, filp, (char *)fsd, sizeof(*fsd), &off);
+        if (rc == sizeof(*fsd))
+                RETURN(0);
+
+        CDEBUG(D_INODE, "error writing filter_server_data: rc = %d\n", rc);
+        if (rc >= 0)
+                RETURN(-ENOSPC);
+        RETURN(rc);
  }
  
  /* assumes caller has already in kernel ctxt */
@@ -432,11 +487,11 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
          }
  
          if (last_rcvd_size == 0) {
-                CERROR("%s: initializing new last_rcvd\n", obd->obd_name);
+                CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD);
  
                  memcpy(fsd->fsd_uuid, obd->obd_uuid.uuid,sizeof(fsd->fsd_uuid));
                  fsd->fsd_last_objid = cpu_to_le64(init_lastobjid);
-                fsd->fsd_last_rcvd = 0;
+                fsd->fsd_last_transno = 0;
                  mount_count = fsd->fsd_mount_count = 0;
                  fsd->fsd_server_size = cpu_to_le32(FILTER_LR_SERVER_SIZE);
                  fsd->fsd_client_start = cpu_to_le32(FILTER_LR_CLIENT_START);
@@ -444,15 +499,18 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
                  fsd->fsd_subdir_count = cpu_to_le16(FILTER_SUBDIR_COUNT);
                  filter->fo_subdir_count = FILTER_SUBDIR_COUNT;
          } else {
-                ssize_t retval = lustre_fread(filp, (char *)fsd, sizeof(*fsd),
-                                              &off);
+                int retval = fsfilt_read_record(obd, filp, (char *)fsd,
+                                                sizeof(*fsd), &off);
                  if (retval != sizeof(*fsd)) {
-                        CDEBUG(D_INODE,"OBD filter: error reading %s\n",
-                               LAST_RCVD);
+                        CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n",
+                               LAST_RCVD, retval);
                          GOTO(err_fsd, rc = -EIO);
                  }
                  mount_count = le64_to_cpu(fsd->fsd_mount_count);
                  filter->fo_subdir_count = le16_to_cpu(fsd->fsd_subdir_count);
+                fsd->fsd_last_objid =
+                        cpu_to_le64(le64_to_cpu(fsd->fsd_last_objid) +
+                                    FILTER_SKIP_OBJID);
          }
  
          if (fsd->fsd_feature_incompat) {
@@ -470,7 +528,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
          CDEBUG(D_INODE, "%s: server last_objid: "LPU64"\n",
                 obd->obd_name, le64_to_cpu(fsd->fsd_last_objid));
          CDEBUG(D_INODE, "%s: server last_rcvd : "LPU64"\n",
-               obd->obd_name, le64_to_cpu(fsd->fsd_last_rcvd));
+               obd->obd_name, le64_to_cpu(fsd->fsd_last_transno));
          CDEBUG(D_INODE, "%s: server last_mount: "LPU64"\n",
                 obd->obd_name, mount_count);
          CDEBUG(D_INODE, "%s: server data size: %u\n",
@@ -482,13 +540,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
          CDEBUG(D_INODE, "%s: server subdir_count: %u\n",
                 obd->obd_name, le16_to_cpu(fsd->fsd_subdir_count));
  
-        /*
-         * When we do a clean FILTER shutdown, we save the last_rcvd into
-         * the header.  If we find clients with higher last_rcvd values
-         * then those clients may need recovery done.
-         */
          if (!obd->obd_replayable) {
-                CERROR("%s: recovery support OFF\n", obd->obd_name);
+                CWARN("%s: recovery support OFF\n", obd->obd_name);
                  GOTO(out, rc = 0);
          }
  
@@ -507,7 +560,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
                   */
                  off = le32_to_cpu(fsd->fsd_client_start) +
                          cl_idx * le16_to_cpu(fsd->fsd_client_size);
-                rc = lustre_fread(filp, (char *)fcd, sizeof(*fcd), &off);
+                rc = fsfilt_read_record(obd, filp, (char *)fcd, sizeof(*fcd),
+                                        &off);
                  if (rc != sizeof(*fcd)) {
                          CERROR("error reading FILTER %s offset %d: rc = %d\n",
                                 LAST_RCVD, cl_idx, rc);
@@ -534,7 +588,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
                          CERROR("RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64
                                 " srv lr: "LPU64" mnt: "LPU64" last mount: "
                                 LPU64"\n", fcd->fcd_uuid, cl_idx,
-                               last_rcvd, le64_to_cpu(fsd->fsd_last_rcvd),
+                               last_rcvd, le64_to_cpu(fsd->fsd_last_transno),
                                 le64_to_cpu(fcd->fcd_mount_count), mount_count);
                          if (exp == NULL) {
                                  /* XXX this rc is ignored  */
@@ -563,15 +617,16 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
                  CDEBUG(D_OTHER, "client at idx %d has last_rcvd = "LPU64"\n",
                         cl_idx, last_rcvd);
  
-                if (last_rcvd > le64_to_cpu(filter->fo_fsd->fsd_last_rcvd))
-                        filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd);
+                if (last_rcvd > le64_to_cpu(filter->fo_fsd->fsd_last_transno))
+                        filter->fo_fsd->fsd_last_transno=cpu_to_le64(last_rcvd);
  
                  obd->obd_last_committed =
-                        le64_to_cpu(filter->fo_fsd->fsd_last_rcvd);
+                        le64_to_cpu(filter->fo_fsd->fsd_last_transno);
+
                  if (obd->obd_recoverable_clients) {
                          CERROR("RECOVERY: %d recoverable clients, last_rcvd "
                                 LPU64"\n", obd->obd_recoverable_clients,
-                               le64_to_cpu(filter->fo_fsd->fsd_last_rcvd));
+                               le64_to_cpu(filter->fo_fsd->fsd_last_transno));
                          obd->obd_next_recovery_transno =
                                  obd->obd_last_committed + 1;
                          obd->obd_recovering = 1;
@@ -585,8 +640,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
  out:
          fsd->fsd_mount_count = cpu_to_le64(mount_count + 1);
  
-        /* save it,so mount count and last_recvd is current */
-        rc = filter_update_server_data(filp, filter->fo_fsd);
+        /* save it, so mount count and last_transno is current */
+        rc = filter_update_server_data(obd, filp, filter->fo_fsd);
  
          RETURN(rc);
  
@@ -639,7 +694,7 @@ static int filter_prep(struct obd_device *obd)
                  filter->fo_dentry_O_mode[mode] = dentry;
          }
  
-        file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0700);
+        file = filp_open(LAST_RCVD, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
          if (!file || IS_ERR(file)) {
                  rc = PTR_ERR(file);
                  CERROR("OBD filter: cannot open/create %s: rc = %d\n",
@@ -663,8 +718,15 @@ static int filter_prep(struct obd_device *obd)
          filter->fo_fop = file->f_op;
          filter->fo_iop = inode->i_op;
          filter->fo_aops = inode->i_mapping->a_ops;
+#ifdef I_SKIP_PDFLUSH
+        /*
+         * we need this to protect from deadlock
+         * pdflush vs. lustre_fwrite()
+         */
+        inode->i_flags |= I_SKIP_PDFLUSH;
+#endif
  
-        rc = filter_init_server_data(obd, file, INIT_OBJID);
+        rc = filter_init_server_data(obd, file, FILTER_INIT_OBJID);
          if (rc) {
                  CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
                  GOTO(err_client, rc);
@@ -740,9 +802,10 @@ static void filter_post(struct obd_device *obd)
           * from lastobjid */
  
          push_ctxt(&saved, &filter->fo_ctxt, NULL);
-        rc = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd);
+        rc = filter_update_server_data(obd, filter->fo_rcvd_filp,
+                                       filter->fo_fsd);
          if (rc)
-                CERROR("OBD filter: error writing lastobjid: rc = %ld\n", rc);
+                CERROR("error writing lastobjid: rc = %ld\n", rc);
  
  
          if (filter->fo_rcvd_filp) {
@@ -751,7 +814,7 @@ static void filter_post(struct obd_device *obd)
                  filp_close(filter->fo_rcvd_filp, 0);
                  filter->fo_rcvd_filp = NULL;
                  if (rc)
-                        CERROR("last_rcvd file won't closed rc = %ld\n", rc);
+                        CERROR("error closing %s: rc = %ld\n", LAST_RCVD, rc);
          }
  
          if (filter->fo_subdir_count) {
@@ -777,8 +840,7 @@ static void filter_post(struct obd_device *obd)
          pop_ctxt(&saved, &filter->fo_ctxt, NULL);
  }
  
-
-static __u64 filter_next_id(struct filter_obd *filter)
+__u64 filter_next_id(struct filter_obd *filter)
  {
          obd_id id;
          LASSERT(filter->fo_fsd != NULL);
@@ -792,8 +854,9 @@ static __u64 filter_next_id(struct filter_obd *filter)
  }
  
  /* direct cut-n-paste of mds_blocking_ast() */
-int filter_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
-                     void *data, int flag)
+static int filter_blocking_ast(struct ldlm_lock *lock,
+                               struct ldlm_lock_desc *desc,
+                               void *data, int flag)
  {
          int do_ast;
          ENTRY;
@@ -852,6 +915,7 @@ static int filter_lock_dentry(struct obd_device *obd, struct dentry *de,
          RETURN(rc == ELDLM_OK ? 0 : -ENOLCK);  /* XXX translate ldlm code */
  }
  
+/* We never dget the object parent, so DON'T dput it either */
  static void filter_parent_unlock(struct dentry *dparent,
                                   struct lustre_handle *lockh,
                                   ldlm_mode_t lock_mode)
@@ -860,8 +924,8 @@ static void filter_parent_unlock(struct dentry *dparent,
  }
  
  /* We never dget the object parent, so DON'T dput it either */
-static inline struct dentry *filter_parent(struct obd_device *obd,
-                                           obd_mode mode, obd_id objid)
+struct dentry *filter_parent(struct obd_device *obd, obd_mode mode,
+                             obd_id objid)
  {
          struct filter_obd *filter = &obd->u.filter;
  
@@ -873,10 +937,9 @@ static inline struct dentry *filter_parent(struct obd_device *obd,
  }
  
  /* We never dget the object parent, so DON'T dput it either */
-static inline struct dentry *filter_parent_lock(struct obd_device *obd,
-                                                obd_mode mode, obd_id objid,
-                                                ldlm_mode_t lock_mode,
-                                                struct lustre_handle *lockh)
+struct dentry *filter_parent_lock(struct obd_device *obd, obd_mode mode,
+                                  obd_id objid, ldlm_mode_t lock_mode,
+                                  struct lustre_handle *lockh)
  {
          unsigned long now = jiffies;
          struct dentry *de = filter_parent(obd, mode, objid);
@@ -886,7 +949,7 @@ static inline struct dentry *filter_parent_lock(struct obd_device *obd,
                  return de;
  
          rc = filter_lock_dentry(obd, de, lock_mode, lockh);
-        if (time_after(jiffies, now + 15*HZ))
+        if (time_after(jiffies, now + 15 * HZ))
                  CERROR("slow parent lock %lus\n", (jiffies - now) / HZ);
          return rc ? ERR_PTR(rc) : de;
  }
@@ -897,13 +960,11 @@ static inline struct dentry *filter_parent_lock(struct obd_device *obd,
   * appropriately for this operation (normally a write lock).  If
   * dir_dentry is NULL, we do a read lock while we do the lookup to
   * avoid races with create/destroy and such changing the directory
- * internal to the filesystem code.
- */
-static struct dentry *filter_fid2dentry(struct obd_device *obd,
-                                        struct dentry *dir_dentry,
-                                        obd_mode mode, obd_id id)
+ * internal to the filesystem code. */
+struct dentry *filter_fid2dentry(struct obd_device *obd,
+                                 struct dentry *dir_dentry,
+                                 obd_mode mode, obd_id id)
  {
-        struct super_block *sb = obd->u.filter.fo_sb;
          struct lustre_handle lockh;
          struct dentry *dparent = dir_dentry;
          struct dentry *dchild;
@@ -911,11 +972,6 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd,
          int len;
          ENTRY;
  
-        if (!sb || !sb->s_dev) {
-                CERROR("device not initialized.\n");
-                RETURN(ERR_PTR(-ENXIO));
-        }
-
          if (id == 0) {
                  CERROR("fatal: invalid object id 0\n");
                  LBUG();
@@ -923,7 +979,7 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd,
          }
  
          len = sprintf(name, LPU64, id);
-        if (!dir_dentry) {
+        if (dir_dentry == NULL) {
                  dparent = filter_parent_lock(obd, mode, id, LCK_PR, &lockh);
                  if (IS_ERR(dparent))
                          RETURN(dparent);
@@ -931,7 +987,7 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd,
          CDEBUG(D_INODE, "looking up object O/%*s/%s\n",
                 dparent->d_name.len, dparent->d_name.name, name);
          dchild = ll_lookup_one_len(name, dparent, len);
-        if (!dir_dentry)
+        if (dir_dentry == NULL)
                  filter_parent_unlock(dparent, &lockh, LCK_PR);
          if (IS_ERR(dchild)) {
                  CERROR("child lookup error %ld\n", PTR_ERR(dchild));
@@ -947,13 +1003,12 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd,
  }
  
  static struct file *filter_obj_open(struct obd_export *export,
-                                    __u64 id, __u32 type,
-                                    ldlm_mode_t parent_mode,
+                                    struct obd_trans_info *oti,
+                                    __u64 id, __u32 type, int parent_mode,
                                      struct lustre_handle *parent_lockh)
  {
          struct obd_device *obd = export->exp_obd;
          struct filter_obd *filter = &obd->u.filter;
-        struct super_block *sb = filter->fo_sb;
          struct dentry *dchild = NULL, *dparent = NULL;
          struct filter_export_data *fed = &export->exp_filter_data;
          struct filter_dentry_data *fdd = NULL;
@@ -966,11 +1021,6 @@ static struct file *filter_obj_open(struct obd_export *export,
  
          push_ctxt(&saved, &filter->fo_ctxt, NULL);
  
-        if (!sb || !sb->s_dev) {
-                CERROR("fatal: device not initialized.\n");
-                GOTO(cleanup, file = ERR_PTR(-ENXIO));
-        }
-
          if (!id) {
                  CERROR("fatal: invalid obdo "LPU64"\n", id);
                  GOTO(cleanup, file = ERR_PTR(-ESTALE));
@@ -1014,6 +1064,7 @@ static struct file *filter_obj_open(struct obd_export *export,
  
          if (dchild->d_inode == NULL) {
                  CERROR("opening non-existent object %s - O_CREAT?\n", name);
+                /* dput(dchild); call filter_create_internal here */
                  file = ERR_PTR(-ENOENT);
                  GOTO(cleanup, file);
          }
@@ -1083,9 +1134,8 @@ cleanup:
  }
  
  /* Caller must hold LCK_PW on parent and push us into kernel context.
- * Caller is also required to ensure that dchild->d_inode exists.
- */
-static int filter_destroy_internal(struct obd_device *obd,
+ * Caller is also required to ensure that dchild->d_inode exists. */
+static int filter_destroy_internal(struct obd_device *obd, obd_id objid,
                                     struct dentry *dparent,
                                     struct dentry *dchild)
  {
@@ -1099,6 +1149,39 @@ static int filter_destroy_internal(struct obd_device *obd,
                         inode->i_nlink, atomic_read(&inode->i_count));
          }
  
+        
+#if 0
+        /* Tell the clients that the object is gone now and that they should
+         * throw away any cached pages.  We don't need to wait until they're
+         * done, so just decref the lock right away and let ldlm_completion_ast
+         * clean up when it's all over. */
+        ldlm_cli_enqueue(..., LCK_PW, AST_INTENT_DESTROY, &lockh);
+        ldlm_lock_decref(&lockh, LCK_PW);
+#endif
+
+        if (0) {
+                struct lustre_handle lockh;
+                int flags = 0, rc;
+                struct ldlm_res_id res_id = { .name = { objid } };
+
+                /* This part is a wee bit iffy: we really only want to bust the
+                 * locks on our stripe, so that we don't end up bouncing
+                 * [0->EOF] locks around on each of the OSTs as the rest of the
+                 * destroys get processed.  Because we're only talking to
+                 * the local LDLM, though, we should only end up locking the 
+                 * whole of our stripe.  When bug 1425 (take all locks on OST
+                 * for stripe 0) is fixed, this code should be revisited. */
+                struct ldlm_extent extent = { 0, OBD_OBJECT_EOF };
+
+                rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+                                      res_id, LDLM_EXTENT, &extent,
+                                      sizeof(extent), LCK_PW, &flags,
+                                      ldlm_completion_ast, filter_blocking_ast,
+                                      NULL, &lockh);
+                /* We only care about the side-effects, just drop the lock. */
+                ldlm_lock_decref(&lockh, LCK_PW);
+        }
+
          rc = vfs_unlink(dparent->d_inode, dchild);
  
          if (rc)
@@ -1113,8 +1196,7 @@ static int filter_destroy_internal(struct obd_device *obd,
  */
  static int filter_close_internal(struct obd_export *exp,
                                   struct filter_file_data *ffd,
-                                 struct obd_trans_info *oti,
-                                 int failover)
+                                 struct obd_trans_info *oti, int flags)
  {
          struct obd_device *obd = exp->exp_obd;
          struct filter_obd *filter = &obd->u.filter;
@@ -1128,13 +1210,14 @@ static int filter_close_internal(struct obd_export *exp,
          ENTRY;
  
          LASSERT(filp->private_data == ffd);
-        LASSERT(fdd);
+        LASSERT(fdd != NULL);
          LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
  
          rc = filp_close(filp, 0);
  
          if (atomic_dec_and_test(&fdd->fdd_open_count) &&
-            fdd->fdd_flags & FILTER_FLAG_DESTROY && !failover) {
+            (fdd->fdd_flags & FILTER_FLAG_DESTROY) &&
+            !(flags & OBD_OPT_FAILOVER)) {
                  void *handle;
  
                  push_ctxt(&saved, &filter->fo_ctxt, NULL);
@@ -1148,15 +1231,27 @@ static int filter_close_internal(struct obd_export *exp,
                  cleanup_phase = 2;
  
                  handle = fsfilt_start(obd, dparent->d_inode,
-                                      FSFILT_OP_UNLINK);
+                                      FSFILT_OP_UNLINK_LOG, oti);
                  if (IS_ERR(handle))
                          GOTO(cleanup, rc = PTR_ERR(handle));
  
+                if (oti != NULL) {
+                        if (oti->oti_handle == NULL)
+                                oti->oti_handle = handle;
+                        else
+                                LASSERT(oti->oti_handle == handle);
+                }
+
+#ifdef ENABLE_ORPHANS
+                /* Remove orphan unlink record from log */
+                llog_cancel_records(filter->fo_catalog, 1, &fdd->fdd_cookie);
+#endif
                  /* XXX unlink from PENDING directory now too */
-                rc2 = filter_destroy_internal(obd, dparent, dchild);
+                rc2 = filter_destroy_internal(obd, fdd->fdd_objid, dparent,
+                                              dchild);
                  if (rc2 && !rc)
                          rc = rc2;
-                rc = filter_finish_transno(exp, handle, oti, rc);
+                rc = filter_finish_transno(exp, oti, rc);
                  rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0);
                  if (rc2) {
                          CERROR("error on commit, err = %d\n", rc2);
@@ -1189,14 +1284,12 @@ cleanup:
          RETURN(rc);
  }
  
-/* obd methods */
  /* mount the file system (secretly) */
-static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
-                               char *option)
+int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
+                        char *option)
  {
          struct obd_ioctl_data* data = buf;
          struct filter_obd *filter = &obd->u.filter;
-
          struct vfsmount *mnt;
          int rc = 0;
          ENTRY;
@@ -1208,7 +1301,8 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
          if (IS_ERR(obd->obd_fsops))
                  RETURN(PTR_ERR(obd->obd_fsops));
  
-        mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, option);
+        mnt = do_kern_mount(data->ioc_inlbuf2, MS_NOATIME | MS_NODIRATIME,
+                            data->ioc_inlbuf1, option);
          rc = PTR_ERR(mnt);
          if (IS_ERR(mnt))
                  GOTO(err_ops, rc);
@@ -1257,14 +1351,27 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
          spin_lock_init(&filter->fo_objidlock);
          INIT_LIST_HEAD(&filter->fo_export_list);
  
+        ptlrpc_init_client(MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL,
+                           "filter_mdc", &filter->fo_mdc_client);
+        sema_init(&filter->fo_sem, 1);
+
          obd->obd_namespace = ldlm_namespace_new("filter-tgt",
                                                  LDLM_NAMESPACE_SERVER);
-        if (!obd->obd_namespace)
+        if (obd->obd_namespace == NULL)
                  GOTO(err_post, rc = -ENOMEM);
  
          ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
                             "filter_ldlm_cb_client", &obd->obd_ldlm_client);
  
+        /* Create a non-replaying connection for recovery logging, so that
+         * we don't create a client entry for this local connection, and do
+         * not log or assign transaction numbers for logging operations. */
+#ifdef ENABLE_ORPHANS
+        filter->fo_catalog = filter_get_catalog(obd);
+        if (IS_ERR(filter->fo_catalog))
+                GOTO(err_post, rc = PTR_ERR(filter->fo_catalog));
+#endif
+
          RETURN(0);
  
  err_post:
@@ -1284,82 +1391,67 @@ static int filter_setup(struct obd_device *obd, obd_count len, void *buf)
          struct obd_ioctl_data* data = buf;
          char *option = NULL;
  
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        /* bug 1577: implement async-delete for 2.5 */
          if (!strcmp(data->ioc_inlbuf2, "ext3"))
                  option = "asyncdel";
+#endif
  
          return filter_common_setup(obd, len, buf, option);
  }
  
-/* sanobd setup methods - use a specific mount option */
-static int filter_san_setup(struct obd_device *obd, obd_count len, void *buf)
-{
-        struct obd_ioctl_data* data = buf;
-        char *option = NULL;
-
-        if (!data->ioc_inlbuf2)
-                RETURN(-EINVAL);
-
-        /* for extN/ext3 filesystem, we must mount it with 'writeback' mode */
-        if (!strcmp(data->ioc_inlbuf2, "extN"))
-                option = "data=writeback";
-        else if (!strcmp(data->ioc_inlbuf2, "ext3"))
-                option = "data=writeback,asyncdel";
-        else
-                LBUG(); /* just a reminder */
-
-        return filter_common_setup(obd, len, buf, option);
-}
-
-static int filter_cleanup(struct obd_device *obd, int force, int failover)
+static int filter_cleanup(struct obd_device *obd, int flags)
  {
-        struct super_block *sb;
+        struct filter_obd *filter = &obd->u.filter;
          ENTRY;
  
-        if (failover)
+        if (flags & OBD_OPT_FAILOVER)
                  CERROR("%s: shutting down for failover; client state will"
                         " be preserved.\n", obd->obd_name);
  
          if (!list_empty(&obd->obd_exports)) {
                  CERROR("%s: still has clients!\n", obd->obd_name);
-                class_disconnect_exports(obd, failover);
+                class_disconnect_exports(obd, flags);
                  if (!list_empty(&obd->obd_exports)) {
                          CERROR("still has exports after forced cleanup?\n");
                          RETURN(-EBUSY);
                  }
          }
  
+#ifdef ENABLE_ORPHANS
+        filter_put_catalog(filter->fo_catalog);
+#endif
+
          ldlm_namespace_free(obd->obd_namespace);
  
-        sb = obd->u.filter.fo_sb;
-        if (!sb)
+        if (filter->fo_sb == NULL)
                  RETURN(0);
  
          filter_post(obd);
  
-        shrink_dcache_parent(sb->s_root);
-        unlock_kernel();
+        shrink_dcache_parent(filter->fo_sb->s_root);
+        filter->fo_sb = 0;
  
-        if (atomic_read(&obd->u.filter.fo_vfsmnt->mnt_count) > 1){
+        if (atomic_read(&filter->fo_vfsmnt->mnt_count) > 1)
                  CERROR("%s: mount point busy, mnt_count: %d\n", obd->obd_name,
-                       atomic_read(&obd->u.filter.fo_vfsmnt->mnt_count));
-        }
-
-        mntput(obd->u.filter.fo_vfsmnt);
-        obd->u.filter.fo_sb = 0;
-/*        destroy_buffers(obd->u.filter.fo_sb->s_dev);*/
+                       atomic_read(&filter->fo_vfsmnt->mnt_count));
  
+        unlock_kernel();
+        mntput(filter->fo_vfsmnt);
+        //destroy_buffers(filter->fo_sb->s_dev);
+        filter->fo_sb = NULL;
          fsfilt_put_ops(obd->obd_fsops);
          lock_kernel();
  
          RETURN(0);
  }
  
-int filter_attach(struct obd_device *obd, obd_count len, void *data)
+static int filter_attach(struct obd_device *obd, obd_count len, void *data)
  {
          struct lprocfs_static_vars lvars;
          int rc;
  
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(filter, &lvars);
          rc = lprocfs_obd_attach(obd, lvars.obd_vars);
          if (rc != 0)
                  return rc;
@@ -1376,7 +1468,7 @@ int filter_attach(struct obd_device *obd, obd_count len, void *data)
          return rc;
  }
  
-int filter_detach(struct obd_device *dev)
+static int filter_detach(struct obd_device *dev)
  {
          lprocfs_free_obd_stats(dev);
          return lprocfs_obd_detach(dev);
@@ -1391,17 +1483,16 @@ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd,
          struct filter_client_data *fcd;
          struct filter_obd *filter = &obd->u.filter;
          int rc;
-
          ENTRY;
  
-        if (!conn || !obd || !cluuid)
+        if (conn == NULL || obd == NULL || cluuid == NULL)
                  RETURN(-EINVAL);
  
          rc = class_connect(conn, obd, cluuid);
          if (rc)
                  RETURN(rc);
          exp = class_conn2export(conn);
-        LASSERT(exp);
+        LASSERT(exp != NULL);
  
          fed = &exp->exp_filter_data;
          class_export_put(exp);
@@ -1450,37 +1541,37 @@ static void filter_destroy_export(struct obd_export *exp)
                  list_del(&ffd->ffd_export_list);
                  spin_unlock(&fed->fed_lock);
  
-                CERROR("force close file %*s (hdl %p:"LPX64") on disconnect\n",
-                       ffd->ffd_file->f_dentry->d_name.len,
+                CDEBUG(D_INFO, "force close file %*s (hdl %p:"LPX64") on "
+                       "disconnect\n", ffd->ffd_file->f_dentry->d_name.len,
                         ffd->ffd_file->f_dentry->d_name.name,
                         ffd, ffd->ffd_handle.h_cookie);
  
-                filter_close_internal(exp, ffd, NULL, exp->exp_failover);
+                filter_close_internal(exp, ffd, NULL, exp->exp_flags);
                  spin_lock(&fed->fed_lock);
          }
          spin_unlock(&fed->fed_lock);
  
          if (exp->exp_obd->obd_replayable)
-                filter_client_free(exp, exp->exp_failover);
+                filter_client_free(exp, exp->exp_flags);
          EXIT;
  }
  
  /* also incredibly similar to mds_disconnect */
-static int filter_disconnect(struct lustre_handle *conn, int failover)
+static int filter_disconnect(struct lustre_handle *conn, int flags)
  {
          struct obd_export *exp = class_conn2export(conn);
+        unsigned long irqflags;
          int rc;
-        unsigned long flags;
          ENTRY;
  
          LASSERT(exp);
          ldlm_cancel_locks_for_export(exp);
  
-        spin_lock_irqsave(&exp->exp_lock, flags);
-        exp->exp_failover = failover;
-        spin_unlock_irqrestore(&exp->exp_lock, flags);
+        spin_lock_irqsave(&exp->exp_lock, irqflags);
+        exp->exp_flags = flags;
+        spin_unlock_irqrestore(&exp->exp_lock, irqflags);
  
-        rc = class_disconnect(conn, failover);
+        rc = class_disconnect(conn, flags);
  
          fsfilt_sync(exp->exp_obd, exp->exp_obd->u.filter.fo_sb);
          class_export_put(exp);
@@ -1488,29 +1579,8 @@ static int filter_disconnect(struct lustre_handle *conn, int failover)
          RETURN(rc);
  }
  
-static void filter_from_inode(struct obdo *oa, struct inode *inode, int valid)
-{
-        int type = oa->o_mode & S_IFMT;
-        ENTRY;
-
-        CDEBUG(D_INFO, "src inode %lu (%p), dst obdo "LPU64" valid 0x%08x\n",
-               inode->i_ino, inode, oa->o_id, valid);
-        /* Don't copy the inode number in place of the object ID */
-        obdo_from_inode(oa, inode, valid);
-        oa->o_mode &= ~S_IFMT;
-        oa->o_mode |= type;
-
-        if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
-                obd_rdev rdev = kdev_t_to_nr(inode->i_rdev);
-                oa->o_rdev = rdev;
-                oa->o_valid |= OBD_MD_FLRDEV;
-        }
-
-        EXIT;
-}
-
-static struct dentry *__filter_oa2dentry(struct lustre_handle *conn,
-                                         struct obdo *oa, char *what)
+struct dentry *__filter_oa2dentry(struct obd_device *obd,
+                                  struct obdo *oa, const char *what)
  {
          struct dentry *dchild = NULL;
  
@@ -1525,22 +1595,14 @@ static struct dentry *__filter_oa2dentry(struct lustre_handle *conn,
                          LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
                          filter_ffd_put(ffd);
  
-                        CDEBUG(D_INODE,
-                               "got child objid %*s: %p, count = %d\n",
-                               dchild->d_name.len, dchild->d_name.name,
+                        CDEBUG(D_INODE,"%s got child objid %*s: %p, count %d\n",
+                               what, dchild->d_name.len, dchild->d_name.name,
                                 dchild, atomic_read(&dchild->d_count));
                  }
          }
  
-        if (!dchild) {
-                struct obd_device *obd = class_conn2obd(conn);
-
-                if (!obd) {
-                        CERROR("invalid client cookie "LPX64"\n", conn->cookie);
-                        RETURN(ERR_PTR(-EINVAL));
-                }
+        if (!dchild)
                  dchild = filter_fid2dentry(obd, NULL, oa->o_mode, oa->o_id);
-        }
  
          if (IS_ERR(dchild)) {
                  CERROR("%s error looking up object: "LPU64"\n", what, oa->o_id);
@@ -1556,20 +1618,27 @@ static struct dentry *__filter_oa2dentry(struct lustre_handle *conn,
          return dchild;
  }
  
-#define filter_oa2dentry(conn, oa) __filter_oa2dentry(conn, oa, __FUNCTION__)
-
  static int filter_getattr(struct lustre_handle *conn, struct obdo *oa,
                            struct lov_stripe_md *md)
  {
          struct dentry *dentry = NULL;
+        struct obd_device *obd;
          int rc = 0;
          ENTRY;
  
-        dentry = filter_oa2dentry(conn, oa);
+        obd = class_conn2obd(conn);
+        if (obd == NULL) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
+                RETURN(-EINVAL);
+        }
+
+        dentry = filter_oa2dentry(obd, oa);
          if (IS_ERR(dentry))
                  RETURN(PTR_ERR(dentry));
  
-        filter_from_inode(oa, dentry->d_inode, oa->o_valid);
+        /* Limit the valid bits in the return data to what we actually use */
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS);
  
          f_dput(dentry);
          RETURN(rc);
@@ -1580,48 +1649,55 @@ static int filter_setattr(struct lustre_handle *conn, struct obdo *oa,
                            struct lov_stripe_md *md, struct obd_trans_info *oti)
  {
          struct obd_run_ctxt saved;
-        struct obd_export *export = class_conn2export(conn);
-        struct obd_device *obd = class_conn2obd(conn);
-        struct filter_obd *filter = &obd->u.filter;
+        struct obd_export *exp;
+        struct filter_obd *filter;
          struct dentry *dentry;
          struct iattr iattr;
-        struct inode *inode;
-        void * handle;
+        void *handle;
          int rc, rc2;
          ENTRY;
  
-        dentry = filter_oa2dentry(conn, oa);
+        LASSERT(oti != NULL);
+        exp = class_conn2export(conn);
+        if (!exp) {
+                CERROR("invalid client cookie "LPX64"\n", conn->cookie);
+                RETURN(-EINVAL);
+        }
  
+        dentry = filter_oa2dentry(exp->exp_obd, oa);
          if (IS_ERR(dentry))
                  GOTO(out_exp, rc = PTR_ERR(dentry));
  
+        filter = &exp->exp_obd->u.filter;
+
          iattr_from_obdo(&iattr, oa, oa->o_valid);
-        iattr.ia_mode = (iattr.ia_mode & ~S_IFMT) | S_IFREG;
-        inode = dentry->d_inode;
  
          push_ctxt(&saved, &filter->fo_ctxt, NULL);
          lock_kernel();
+
+        /* XXX this could be a rwsem instead, if filter_preprw played along */
          if (iattr.ia_valid & ATTR_SIZE)
-                down(&inode->i_sem);
+                down(&dentry->d_inode->i_sem);
  
-        handle = fsfilt_start(obd, dentry->d_inode, FSFILT_OP_SETATTR);
+        handle = fsfilt_start(exp->exp_obd, dentry->d_inode, FSFILT_OP_SETATTR,
+                              oti);
          if (IS_ERR(handle))
                  GOTO(out_unlock, rc = PTR_ERR(handle));
  
-        rc = fsfilt_setattr(obd, dentry, handle, &iattr, 1);
-        rc = filter_finish_transno(export, handle, oti, rc);
-        rc2 = fsfilt_commit(obd, dentry->d_inode, handle, 0);
+        rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1);
+        rc = filter_finish_transno(exp, oti, rc);
+        rc2 = fsfilt_commit(exp->exp_obd, dentry->d_inode, handle, 0);
          if (rc2) {
                  CERROR("error on commit, err = %d\n", rc2);
                  if (!rc)
                          rc = rc2;
          }
  
-        if (iattr.ia_valid & ATTR_SIZE) {
-                up(&inode->i_sem);
-                oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLCTIME | OBD_MD_FLMTIME;
-                obdo_from_inode(oa, inode, oa->o_valid);
-        }
+        if (iattr.ia_valid & ATTR_SIZE)
+                up(&dentry->d_inode->i_sem);
+
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS);
  
  out_unlock:
          unlock_kernel();
@@ -1629,7 +1705,7 @@ out_unlock:
  
          f_dput(dentry);
   out_exp:
-        class_export_put(export);
+        class_export_put(exp);
          RETURN(rc);
  }
  
@@ -1637,7 +1713,7 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa,
                         struct lov_stripe_md *ea, struct obd_trans_info *oti,
                         struct obd_client_handle *och)
  {
-        struct obd_export *export = NULL;
+        struct obd_export *exp;
          struct lustre_handle *handle;
          struct filter_file_data *ffd;
          struct file *filp;
@@ -1645,19 +1721,19 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa,
          int rc = 0;
          ENTRY;
  
-        export = class_conn2export(conn);
-        if (!export) {
-                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
-                       conn->cookie);
-                GOTO(out, rc = -EINVAL);
+        exp = class_conn2export(conn);
+        if (exp == NULL) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
+                RETURN(-EINVAL);
          }
  
-        filp = filter_obj_open(export, oa->o_id, oa->o_mode,
+        filp = filter_obj_open(exp, oti, oa->o_id, oa->o_mode,
                                 LCK_PR, &parent_lockh);
          if (IS_ERR(filp))
                  GOTO(out, rc = PTR_ERR(filp));
  
-        filter_from_inode(oa, filp->f_dentry->d_inode, oa->o_valid);
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa, filp->f_dentry->d_inode, FILTER_VALID_FLAGS);
  
          ffd = filp->private_data;
          handle = obdo_handle(oa);
@@ -1665,7 +1741,7 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa,
          oa->o_valid |= OBD_MD_FLHANDLE;
  
  out:
-        class_export_put(export);
+        class_export_put(exp);
          if (!rc) {
                  memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh,
                         sizeof(parent_lockh));
@@ -1677,15 +1753,16 @@ out:
  static int filter_close(struct lustre_handle *conn, struct obdo *oa,
                          struct lov_stripe_md *ea, struct obd_trans_info *oti)
  {
-        struct obd_export *exp = class_conn2export(conn);
+        struct obd_export *exp;
          struct filter_file_data *ffd;
          struct filter_export_data *fed;
          int rc;
          ENTRY;
  
-        if (!exp) {
-                CDEBUG(D_IOCTL, "invalid client cookie"LPX64"\n", conn->cookie);
-                GOTO(out, rc = -EINVAL);
+        exp = class_conn2export(conn);
+        if (exp == NULL) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
+                RETURN(-EINVAL);
          }
  
          if (!(oa->o_valid & OBD_MD_FLHANDLE)) {
@@ -1705,6 +1782,9 @@ static int filter_close(struct lustre_handle *conn, struct obdo *oa,
          list_del(&ffd->ffd_export_list);
          spin_unlock(&fed->fed_lock);
  
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa,ffd->ffd_file->f_dentry->d_inode,FILTER_VALID_FLAGS);
+
          rc = filter_close_internal(exp, ffd, oti, 0);
          filter_ffd_put(ffd);
          GOTO(out, rc);
@@ -1717,24 +1797,25 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md **ea, struct obd_trans_info *oti)
  {
          struct obd_export *exp;
-        struct obd_device *obd = class_conn2obd(conn);
-        struct filter_obd *filter = &obd->u.filter;
+        struct obd_device *obd;
+        struct filter_obd *filter;
          struct obd_run_ctxt saved;
          struct lustre_handle parent_lockh;
          struct dentry *dparent;
+        struct ll_fid mds_fid = { .id = 0 };
          struct dentry *dchild = NULL;
-        struct iattr;
          void *handle;
          int err, rc, cleanup_phase;
          ENTRY;
  
-        if (!obd) {
-                CERROR("invalid client cookie "LPX64"\n", conn->cookie);
+        exp = class_conn2export(conn);
+        if (exp == NULL) {
+                CDEBUG(D_IOCTL,"invalid client cookie "LPX64"\n", conn->cookie);
                  RETURN(-EINVAL);
          }
  
-        exp = class_conn2export(conn);
-
+        obd = exp->exp_obd;
+        filter = &obd->u.filter;
          push_ctxt(&saved, &filter->fo_ctxt, NULL);
   retry:
          oa->o_id = filter_next_id(filter);
@@ -1760,21 +1841,42 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa,
          }
  
          cleanup_phase = 2;
-        handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_CREATE);
+        handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_CREATE_LOG, oti);
          if (IS_ERR(handle))
                  GOTO(cleanup, rc = PTR_ERR(handle));
  
          rc = vfs_create(dparent->d_inode, dchild, oa->o_mode);
-        if (rc)
+        if (rc) {
                  CERROR("create failed rc = %d\n", rc);
+        } else if (oa->o_valid & (OBD_MD_FLCTIME|OBD_MD_FLMTIME|OBD_MD_FLSIZE)){
+                struct iattr attr;
  
-        rc = filter_finish_transno(exp, handle, oti, rc);
-        err = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd);
-        if (err) {
-                CERROR("unable to write lastobjid but file created\n");
-                if (!rc)
-                        rc = err;
+                iattr_from_obdo(&attr, oa, oa->o_valid);
+                rc = fsfilt_setattr(obd, dchild, handle, &attr, 1);
+                if (rc)
+                        CERROR("create setattr failed rc = %d\n", rc);
          }
+        rc = filter_finish_transno(exp, oti, rc);
+        err = filter_update_server_data(obd, filter->fo_rcvd_filp,
+                                        filter->fo_fsd);
+        if (err)
+                CERROR("unable to write lastobjid but file created\n");
+
+        /* Set flags for fields we have set in the inode struct */
+        if (!rc && mds_fid.id && (oa->o_valid & OBD_MD_FLCOOKIE)) {
+                err = filter_log_op_create(obd->u.filter.fo_catalog, &mds_fid,
+                                           dchild->d_inode->i_ino,
+                                           dchild->d_inode->i_generation,
+                                           oti->oti_logcookies);
+                if (err) {
+                        CERROR("error logging create record: rc %d\n", err);
+                        oa->o_valid = OBD_MD_FLID;
+                } else {
+                        oa->o_valid = OBD_MD_FLID | OBD_MD_FLCOOKIE;
+                }
+        } else
+                oa->o_valid = OBD_MD_FLID;
+
          err = fsfilt_commit(obd, dparent->d_inode, handle, 0);
          if (err) {
                  CERROR("error on commit, err = %d\n", err);
@@ -1786,9 +1888,7 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa,
                  GOTO(cleanup, rc);
  
          /* Set flags for fields we have set in the inode struct */
-        oa->o_valid = OBD_MD_FLID | OBD_MD_FLBLKSZ | OBD_MD_FLBLOCKS |
-                 OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME;
-        filter_from_inode(oa, dchild->d_inode, oa->o_valid);
+        obdo_from_inode(oa, dchild->d_inode, FILTER_VALID_FLAGS);
  
          EXIT;
  cleanup:
@@ -1819,24 +1919,25 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
                            struct lov_stripe_md *ea, struct obd_trans_info *oti)
  {
          struct obd_export *exp;
-        struct obd_device *obd = class_conn2obd(conn);
-        struct filter_obd *filter = &obd->u.filter;
-        struct dentry *dparent, *dchild = NULL;
+        struct obd_device *obd;
+        struct filter_obd *filter;
+        struct dentry *dchild = NULL, *dparent = NULL;
          struct filter_dentry_data *fdd;
          struct obd_run_ctxt saved;
          void *handle = NULL;
          struct lustre_handle parent_lockh;
+        struct llog_cookie *fcc = NULL;
          int rc, rc2, cleanup_phase = 0;
          ENTRY;
  
-        if (!obd) {
-                CERROR("invalid client cookie "LPX64"\n", conn->cookie);
+        exp = class_conn2export(conn);
+        if (exp == NULL) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
                  RETURN(-EINVAL);
          }
  
-        exp = class_conn2export(conn);
-
-        CDEBUG(D_INODE, "destroying objid "LPU64"\n", oa->o_id);
+        obd = exp->exp_obd;
+        filter = &obd->u.filter;
  
          push_ctxt(&saved, &filter->fo_ctxt, NULL);
          dparent = filter_parent_lock(obd, oa->o_mode, oa->o_id,
@@ -1850,38 +1951,53 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
                  GOTO(cleanup, rc = -ENOENT);
          cleanup_phase = 2;
  
-        if (!dchild->d_inode) {
+        if (dchild->d_inode == NULL) {
                  CERROR("destroying non-existent object "LPU64"\n", oa->o_id);
                  GOTO(cleanup, rc = -ENOENT);
          }
-
-        handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK);
+        handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK_LOG, oti);
          if (IS_ERR(handle))
                  GOTO(cleanup, rc = PTR_ERR(handle));
          cleanup_phase = 3;
  
          fdd = dchild->d_fsdata;
-        if (fdd && atomic_read(&fdd->fdd_open_count)) {
-                LASSERT(fdd->fdd_magic = FILTER_DENTRY_MAGIC);
+
+        /* Our MDC connection is established by the MDS to us */
+        if ((oa->o_valid & OBD_MD_FLCOOKIE) && filter->fo_mdc_imp != NULL) {
+                OBD_ALLOC(fcc, sizeof(*fcc));
+                if (fcc != NULL)
+                        memcpy(fcc, obdo_logcookie(oa), sizeof(*fcc));
+        }
+
+        if (fdd != NULL && atomic_read(&fdd->fdd_open_count)) {
+                LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
                  if (!(fdd->fdd_flags & FILTER_FLAG_DESTROY)) {
                          fdd->fdd_flags |= FILTER_FLAG_DESTROY;
-                        /* XXX put into PENDING directory in case of crash */
+
+#ifdef ENABLE_ORPHANS
+                        filter_log_op_orphan(filter->fo_catalog, oa->o_id,
+                                             oa->o_generation,&fdd->fdd_cookie);
+#endif
                          CDEBUG(D_INODE,
                                 "defer destroy of %dx open objid "LPU64"\n",
                                 atomic_read(&fdd->fdd_open_count), oa->o_id);
-                } else
+                } else {
                          CDEBUG(D_INODE,
                                 "repeat destroy of %dx open objid "LPU64"\n",
                                 atomic_read(&fdd->fdd_open_count), oa->o_id);
+                }
                  GOTO(cleanup, rc = 0);
          }
  
-        rc = filter_destroy_internal(obd, dparent, dchild);
+        rc = filter_destroy_internal(obd, oa->o_id, dparent, dchild);
  
  cleanup:
          switch(cleanup_phase) {
          case 3:
-                rc = filter_finish_transno(exp, handle, oti, rc);
+                if (fcc != NULL)
+                        fsfilt_set_last_rcvd(obd, 0, oti->oti_handle,
+                                             filter_cancel_cookies_cb, fcc);
+                rc = filter_finish_transno(exp, oti, rc);
                  rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0);
                  if (rc2) {
                          CERROR("error on commit, err = %d\n", rc2);
@@ -1930,742 +2046,17 @@ static int filter_truncate(struct lustre_handle *conn, struct obdo *oa,
          RETURN(error);
  }
  
-static inline void lustre_put_page(struct page *page)
-{
-        page_cache_release(page);
-}
-
-static int filter_start_page_read(struct inode *inode, struct niobuf_local *lnb)
-{
-        struct address_space *mapping = inode->i_mapping;
-        struct page *page;
-        unsigned long index = lnb->offset >> PAGE_SHIFT;
-        int rc;
-
-        page = grab_cache_page(mapping, index); /* locked page */
-        if (IS_ERR(page))
-                return lnb->rc = PTR_ERR(page);
-
-        lnb->page = page;
-
-        if (inode->i_size < lnb->offset + lnb->len - 1)
-                lnb->rc = inode->i_size - lnb->offset;
-        else
-                lnb->rc = lnb->len;
-
-        if (PageUptodate(page)) {
-                unlock_page(page);
-                return 0;
-        }
-
-        rc = mapping->a_ops->readpage(NULL, page);
-        if (rc < 0) {
-                CERROR("page index %lu, rc = %d\n", index, rc);
-                lnb->page = NULL;
-                lustre_put_page(page);
-                return lnb->rc = rc;
-        }
-
-        return 0;
-}
-
-static int filter_finish_page_read(struct niobuf_local *lnb)
-{
-        if (lnb->page == NULL)
-                return 0;
-
-        if (PageUptodate(lnb->page))
-                return 0;
-
-        wait_on_page(lnb->page);
-        if (!PageUptodate(lnb->page)) {
-                CERROR("page index %lu/offset "LPX64" not uptodate\n",
-                       lnb->page->index, lnb->offset);
-                GOTO(err_page, lnb->rc = -EIO);
-        }
-        if (PageError(lnb->page)) {
-                CERROR("page index %lu/offset "LPX64" has error\n",
-                       lnb->page->index, lnb->offset);
-                GOTO(err_page, lnb->rc = -EIO);
-        }
-
-        return 0;
-
-err_page:
-        lustre_put_page(lnb->page);
-        lnb->page = NULL;
-        return lnb->rc;
-}
-
-static struct page *lustre_get_page_write(struct inode *inode,
-                                          unsigned long index)
-{
-        struct address_space *mapping = inode->i_mapping;
-        struct page *page;
-        int rc;
-
-        page = grab_cache_page(mapping, index); /* locked page */
-
-        if (!IS_ERR(page)) {
-                /* Note: Called with "O" and "PAGE_SIZE" this is essentially
-                 * a no-op for most filesystems, because we write the whole
-                 * page.  For partial-page I/O this will read in the page.
-                 */
-                rc = mapping->a_ops->prepare_write(NULL, page, 0, PAGE_SIZE);
-                if (rc) {
-                        CERROR("page index %lu, rc = %d\n", index, rc);
-                        if (rc != -ENOSPC)
-                                LBUG();
-                        GOTO(err_unlock, rc);
-                }
-                /* XXX not sure if we need this if we are overwriting page */
-                if (PageError(page)) {
-                        CERROR("error on page index %lu, rc = %d\n", index, rc);
-                        LBUG();
-                        GOTO(err_unlock, rc = -EIO);
-                }
-        }
-        return page;
-
-err_unlock:
-        unlock_page(page);
-        lustre_put_page(page);
-        return ERR_PTR(rc);
-}
-
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-int waitfor_one_page(struct page *page)
-{
-        wait_on_page_locked(page);
-        return 0;
-}
-#endif
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-/* We should only change the file mtime (and not the ctime, like
- * update_inode_times() in generic_file_write()) when we only change data.
- */
-static inline void inode_update_time(struct inode *inode, int ctime_too)
-{
-        time_t now = CURRENT_TIME;
-        if (inode->i_mtime == now && (!ctime_too || inode->i_ctime == now))
-                return;
-        inode->i_mtime = now;
-        if (ctime_too)
-                inode->i_ctime = now;
-        mark_inode_dirty_sync(inode);
-}
-#endif
-
-static int lustre_commit_write(struct niobuf_local *lnb)
-{
-        struct page *page = lnb->page;
-        unsigned from = lnb->offset & ~PAGE_MASK;
-        unsigned to = from + lnb->len;
-        struct inode *inode = page->mapping->host;
-        int err;
-
-        LASSERT(to <= PAGE_SIZE);
-        err = page->mapping->a_ops->commit_write(NULL, page, from, to);
-        if (!err && IS_SYNC(inode))
-                err = waitfor_one_page(page);
-        //SetPageUptodate(page); // the client commit_write will do this
-
-        SetPageReferenced(page);
-        unlock_page(page);
-        lustre_put_page(page);
-        return err;
-}
-
-int filter_get_page_write(struct inode *inode, struct niobuf_local *lnb,
-                          int *pglocked)
-{
-        unsigned long index = lnb->offset >> PAGE_SHIFT;
-        struct address_space *mapping = inode->i_mapping;
-        struct page *page;
-        int rc;
-
-        //ASSERT_PAGE_INDEX(index, GOTO(err, rc = -EINVAL));
-        if (*pglocked)
-                page = grab_cache_page_nowait(mapping, index); /* locked page */
-        else
-                page = grab_cache_page(mapping, index); /* locked page */
-
-
-        /* This page is currently locked, so get a temporary page instead. */
-        if (!page) {
-                CDEBUG(D_ERROR,"ino %lu page %ld locked\n", inode->i_ino,index);
-                page = alloc_pages(GFP_KERNEL, 0); /* locked page */
-                if (!page) {
-                        CERROR("no memory for a temp page\n");
-                        GOTO(err, rc = -ENOMEM);
-                }
-                page->index = index;
-                lnb->page = page;
-                lnb->flags |= N_LOCAL_TEMP_PAGE;
-        } else if (!IS_ERR(page)) {
-                (*pglocked)++;
-
-                rc = mapping->a_ops->prepare_write(NULL, page,
-                                                   lnb->offset & ~PAGE_MASK,
-                                                   lnb->len);
-                if (rc) {
-                        if (rc != -ENOSPC)
-                                CERROR("page index %lu, rc = %d\n", index, rc);
-                        GOTO(err_unlock, rc);
-                }
-                /* XXX not sure if we need this if we are overwriting page */
-                if (PageError(page)) {
-                        CERROR("error on page index %lu, rc = %d\n", index, rc);
-                        LBUG();
-                        GOTO(err_unlock, rc = -EIO);
-                }
-                lnb->page = page;
-        }
-
-        return 0;
-
-err_unlock:
-        unlock_page(page);
-        lustre_put_page(page);
-err:
-        return lnb->rc = rc;
-}
-
-/*
- * We need to balance prepare_write() calls with commit_write() calls.
- * If the page has been prepared, but we have no data for it, we don't
- * want to overwrite valid data on disk, but we still need to zero out
- * data for space which was newly allocated.  Like part of what happens
- * in __block_prepare_write() for newly allocated blocks.
- *
- * XXX currently __block_prepare_write() creates buffers for all the
- *     pages, and the filesystems mark these buffers as BH_New if they
- *     were newly allocated from disk. We use the BH_New flag similarly.
- */
-static int filter_commit_write(struct niobuf_local *lnb, int err)
-{
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        if (err) {
-                unsigned block_start, block_end;
-                struct buffer_head *bh, *head = lnb->page->buffers;
-                unsigned blocksize = head->b_size;
-
-                /* debugging: just seeing if this ever happens */
-                CDEBUG(err == -ENOSPC ? D_INODE : D_ERROR,
-                       "called for ino %lu:%lu on err %d\n",
-                       lnb->page->mapping->host->i_ino, lnb->page->index, err);
-
-                /* Currently one buffer per page, but in the future... */
-                for (bh = head, block_start = 0; bh != head || !block_start;
-                     block_start = block_end, bh = bh->b_this_page) {
-                        block_end = block_start + blocksize;
-                        if (buffer_new(bh)) {
-                                memset(kmap(lnb->page) + block_start, 0,
-                                       blocksize);
-                                kunmap(lnb->page);
-                        }
-                }
-        }
-#endif
-        return lustre_commit_write(lnb);
-}
-
-static int filter_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
-                         int objcount, struct obd_ioobj *obj,
-                         int niocount, struct niobuf_remote *nb,
-                         struct niobuf_local *res, void **desc_private,
-                         struct obd_trans_info *oti)
-{
-        struct obd_run_ctxt saved;
-        struct obd_device *obd;
-        struct obd_ioobj *o;
-        struct niobuf_remote *rnb;
-        struct niobuf_local *lnb;
-        struct fsfilt_objinfo *fso;
-        struct dentry *dentry;
-        struct inode *inode;
-        int pglocked = 0, rc = 0, i, j, tot_bytes = 0;
-        unsigned long now = jiffies;
-        ENTRY;
-
-        memset(res, 0, niocount * sizeof(*res));
-
-        obd = exp->exp_obd;
-        if (obd == NULL)
-                RETURN(-EINVAL);
-
-        // theoretically we support multi-obj BRW RPCs, but until then...
-        LASSERT(objcount == 1);
-
-        OBD_ALLOC(fso, objcount * sizeof(*fso));
-        if (!fso)
-                RETURN(-ENOMEM);
-
-        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-
-        for (i = 0, o = obj; i < objcount; i++, o++) {
-                struct filter_dentry_data *fdd;
-
-                LASSERT(o->ioo_bufcnt);
-
-                dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id);
-
-                if (IS_ERR(dentry))
-                        GOTO(out_objinfo, rc = PTR_ERR(dentry));
-
-                fso[i].fso_dentry = dentry;
-                fso[i].fso_bufcnt = o->ioo_bufcnt;
-
-                if (!dentry->d_inode) {
-                        CERROR("trying to BRW to non-existent file "LPU64"\n",
-                               o->ioo_id);
-                        f_dput(dentry);
-                        GOTO(out_objinfo, rc = -ENOENT);
-                }
-
-                /* If we ever start to support mutli-object BRW RPCs, we will
-                 * need to get locks on mulitple inodes (in order) or use the
-                 * DLM to do the locking for us (and use the same locking in
-                 * filter_setattr() for truncate).  That isn't all, because
-                 * there still exists the possibility of a truncate starting
-                 * a new transaction while holding the ext3 rwsem = write
-                 * while some writes (which have started their transactions
-                 * here) blocking on the ext3 rwsem = read => lock inversion.
-                 *
-                 * The handling gets very ugly when dealing with locked pages.
-                 * It may be easier to just get rid of the locked page code
-                 * (which has problems of its own) and either discover we do
-                 * not need it anymore (i.e. it was a symptom of another bug)
-                 * or ensure we get the page locks in an appropriate order.
-                 */
-                if (cmd & OBD_BRW_WRITE)
-                        down(&dentry->d_inode->i_sem);
-                fdd = dentry->d_fsdata;
-                if (!fdd || !atomic_read(&fdd->fdd_open_count))
-                        CDEBUG(D_PAGE, "I/O to unopened object "LPU64"\n",
-                               o->ioo_id);
-        }
-
-        if (time_after(jiffies, now + 15*HZ))
-                CERROR("slow prep setup %lus\n", (jiffies - now) / HZ);
-
-        if (cmd & OBD_BRW_WRITE) {
-                *desc_private = fsfilt_brw_start(obd, objcount, fso,
-                                                 niocount, nb);
-                if (IS_ERR(*desc_private)) {
-                        rc = PTR_ERR(*desc_private);
-                        CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
-                               "error starting transaction: rc = %d\n", rc);
-                        *desc_private = NULL;
-                        GOTO(out_objinfo, rc);
-                }
-        }
-
-        for (i = 0, o = obj, rnb = nb, lnb = res; i < objcount; i++, o++) {
-                dentry = fso[i].fso_dentry;
-                inode = dentry->d_inode;
-
-                for (j = 0; j < o->ioo_bufcnt; j++, rnb++, lnb++) {
-                        if (j == 0)
-                                lnb->dentry = dentry;
-                        else
-                                lnb->dentry = dget(dentry);
-
-                        lnb->offset = rnb->offset;
-                        lnb->len    = rnb->len;
-                        lnb->flags  = rnb->flags;
-                        lnb->start  = jiffies;
-
-                        if (cmd & OBD_BRW_WRITE) {
-                                rc = filter_get_page_write(inode,lnb,&pglocked);
-                                if (rc)
-                                        up(&dentry->d_inode->i_sem);
-                        } else if (inode->i_size <= rnb->offset) {
-                                /* If there's no more data, abort early.
-                                 * lnb->page == NULL and lnb->rc == 0, so it's
-                                 * easy to detect later. */
-                                f_dput(dentry);
-                                lnb->dentry = NULL;
-                                break;
-                        } else {
-                                rc = filter_start_page_read(inode, lnb);
-                        }
-
-                        if (rc) {
-                                CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
-                                       "page err %u@"LPU64" %u/%u %p: rc %d\n",
-                                       lnb->len, lnb->offset, j, o->ioo_bufcnt,
-                                       dentry, rc);
-                                f_dput(dentry);
-                                GOTO(out_pages, rc);
-                        }
-
-                        tot_bytes += lnb->len;
-
-                        if ((cmd & OBD_BRW_READ) && lnb->rc < lnb->len) {
-                                /* Likewise with a partial read */
-                                break;
-                        }
-                }
-        }
-
-        if (time_after(jiffies, now + 15*HZ))
-                CERROR("slow prep get page %lus\n", (jiffies - now) / HZ);
-
-        if (cmd & OBD_BRW_READ) {
-                lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES,
-                                    tot_bytes);
-                while (lnb-- > res) {
-                        rc = filter_finish_page_read(lnb);
-                        if (rc) {
-                                CERROR("error page %u@"LPU64" %u %p: rc %d\n",
-                                       lnb->len, lnb->offset, lnb - res,
-                                       lnb->dentry, rc);
-                                f_dput(lnb->dentry);
-                                GOTO(out_pages, rc);
-                        }
-                }
-        } else
-                lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_WRITE_BYTES,
-                                    tot_bytes);
-
-        if (time_after(jiffies, now + 15*HZ))
-                CERROR("slow prep finish page %lus\n", (jiffies - now) / HZ);
-
-        EXIT;
-out:
-        OBD_FREE(fso, objcount * sizeof(*fso));
-        current->journal_info = NULL;
-        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-        return rc;
-
-out_pages:
-        while (lnb-- > res) {
-                if (cmd & OBD_BRW_WRITE) {
-                        filter_commit_write(lnb, rc);
-                        up(&lnb->dentry->d_inode->i_sem);
-                } else {
-                        lustre_put_page(lnb->page);
-                }
-                f_dput(lnb->dentry);
-        }
-        if (cmd & OBD_BRW_WRITE) {
-                filter_finish_transno(exp, *desc_private, oti, rc);
-                fsfilt_commit(obd,
-                              filter_parent(obd,S_IFREG,obj->ioo_id)->d_inode,
-                              *desc_private, 0);
-        }
-        goto out; /* dropped the dentry refs already (one per page) */
-
-out_objinfo:
-        for (i = 0; i < objcount && fso[i].fso_dentry; i++) {
-                if (cmd & OBD_BRW_WRITE)
-                        up(&fso[i].fso_dentry->d_inode->i_sem);
-                f_dput(fso[i].fso_dentry);
-        }
-        goto out;
-}
-
-static int filter_write_locked_page(struct niobuf_local *lnb)
-{
-        struct page *lpage;
-        void        *lpage_addr;
-        void        *lnb_addr;
-        int rc;
-        ENTRY;
-
-        lpage = lustre_get_page_write(lnb->dentry->d_inode, lnb->page->index);
-        if (IS_ERR(lpage)) {
-                /* It is highly unlikely that we would ever get an error here.
-                 * The page we want to get was previously locked, so it had to
-                 * have already allocated the space, and we were just writing
-                 * over the same data, so there would be no hole in the file.
-                 *
-                 * XXX: possibility of a race with truncate could exist, need
-                 *      to check that.  There are no guarantees w.r.t.
-                 *      write order even on a local filesystem, although the
-                 *      normal response would be to return the number of bytes
-                 *      successfully written and leave the rest to the app.
-                 */
-                rc = PTR_ERR(lpage);
-                CERROR("error getting locked page index %ld: rc = %d\n",
-                       lnb->page->index, rc);
-                LBUG();
-                lustre_commit_write(lnb);
-                RETURN(rc);
-        }
-
-        /* 2 kmaps == vanishingly small deadlock opportunity */
-        lpage_addr = kmap(lpage);
-        lnb_addr = kmap(lnb->page);
-
-        memcpy(lpage_addr, lnb_addr, PAGE_SIZE);
-
-        kunmap(lnb->page);
-        kunmap(lpage);
-
-        lustre_put_page(lnb->page);
-
-        lnb->page = lpage;
-        rc = lustre_commit_write(lnb);
-        if (rc)
-                CERROR("error committing locked page %ld: rc = %d\n",
-                       lnb->page->index, rc);
-
-        RETURN(rc);
-}
-
  static int filter_syncfs(struct obd_export *exp)
  {
-        struct obd_device *obd = exp->exp_obd;
          ENTRY;
  
-        RETURN(fsfilt_sync(obd, obd->u.filter.fo_sb));
-}
-
-static int filter_commitrw(int cmd, struct obd_export *exp,
-                           int objcount, struct obd_ioobj *obj,
-                           int niocount, struct niobuf_local *res,
-                           void *desc_private, struct obd_trans_info *oti)
-{
-        struct obd_run_ctxt saved;
-        struct obd_ioobj *o;
-        struct niobuf_local *lnb;
-        struct obd_device *obd = exp->exp_obd;
-        int found_locked = 0, rc = 0, i;
-        unsigned long now = jiffies;  /* DEBUGGING OST TIMEOUTS */
-        ENTRY;
-
-        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-
-        LASSERT(!current->journal_info);
-        current->journal_info = desc_private;
-
-        for (i = 0, o = obj, lnb = res; i < objcount; i++, o++) {
-                int j;
-
-                if (cmd & OBD_BRW_WRITE) {
-                        inode_update_time(lnb->dentry->d_inode, 1);
-                        up(&lnb->dentry->d_inode->i_sem);
-                }
-                for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) {
-                        if (lnb->page == NULL) {
-                                continue;
-                        }
-
-                        if (lnb->flags & N_LOCAL_TEMP_PAGE) {
-                                found_locked++;
-                                continue;
-                        }
-
-                        if (time_after(jiffies, lnb->start + 15*HZ))
-                                CERROR("slow commitrw %lus\n",
-                                       (jiffies - lnb->start) / HZ);
-
-                        if (cmd & OBD_BRW_WRITE) {
-                                int err = filter_commit_write(lnb, 0);
-
-                                if (!rc)
-                                        rc = err;
-                        } else {
-                                lustre_put_page(lnb->page);
-                        }
-
-                        f_dput(lnb->dentry);
-                        if (time_after(jiffies, lnb->start + 15*HZ))
-                                CERROR("slow commit_write %lus\n",
-                                       (jiffies - lnb->start) / HZ);
-                }
-        }
-
-        for (i = 0, o = obj, lnb = res; found_locked > 0 && i < objcount;
-             i++, o++) {
-                int j;
-                for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) {
-                        int err;
-                        if (!(lnb->flags & N_LOCAL_TEMP_PAGE))
-                                continue;
-
-                        if (time_after(jiffies, lnb->start + 15*HZ))
-                                CERROR("slow commitrw locked %lus\n",
-                                       (jiffies - lnb->start) / HZ);
-
-                        err = filter_write_locked_page(lnb);
-                        if (!rc)
-                                rc = err;
-                        f_dput(lnb->dentry);
-                        found_locked--;
-
-                        if (time_after(jiffies, lnb->start + 15*HZ))
-                                CERROR("slow commit_write locked %lus\n",
-                                       (jiffies - lnb->start) / HZ);
-                }
-        }
-
-        if (cmd & OBD_BRW_WRITE) {
-                /* We just want any dentry for the commit, for now */
-                struct dentry *dparent = filter_parent(obd, S_IFREG, 0);
-                int err;
-
-                rc = filter_finish_transno(exp, desc_private, oti, rc);
-                err = fsfilt_commit(obd, dparent->d_inode, desc_private,
-                                    obd_sync_filter);
-                if (err)
-                        rc = err;
-                if (obd_sync_filter)
-                        LASSERT(oti->oti_transno <= obd->obd_last_committed);
-
-                if (time_after(jiffies, now + 15*HZ))
-                        CERROR("slow commitrw commit %lus\n", (jiffies-now)/HZ);
-        }
-
-        LASSERT(!current->journal_info);
-
-        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-        RETURN(rc);
+        RETURN(fsfilt_sync(exp->exp_obd, exp->exp_obd->u.filter.fo_sb));
  }
  
-static int filter_brw(int cmd, struct lustre_handle *conn,
-                      struct lov_stripe_md *lsm, obd_count oa_bufs,
-                      struct brw_page *pga, struct obd_trans_info *oti)
+static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                         unsigned long max_age)
  {
-        struct obd_export *export = class_conn2export(conn);
-        struct obd_ioobj        ioo;
-        struct niobuf_local     *lnb;
-        struct niobuf_remote    *rnb;
-        obd_count               i;
-        void                    *desc_private;
-        int                     ret = 0;
          ENTRY;
-
-        if (export == NULL)
-                RETURN(-EINVAL);
-
-        OBD_ALLOC(lnb, oa_bufs * sizeof(struct niobuf_local));
-        OBD_ALLOC(rnb, oa_bufs * sizeof(struct niobuf_remote));
-
-        if (lnb == NULL || rnb == NULL)
-                GOTO(out, ret = -ENOMEM);
-
-        for (i = 0; i < oa_bufs; i++) {
-                rnb[i].offset = pga[i].off;
-                rnb[i].len = pga[i].count;
-        }
-
-        ioo.ioo_id = lsm->lsm_object_id;
-        ioo.ioo_gr = 0;
-        ioo.ioo_type = S_IFREG;
-        ioo.ioo_bufcnt = oa_bufs;
-
-        ret = filter_preprw(cmd, export, NULL, 1, &ioo, oa_bufs, rnb, lnb,
-                            &desc_private, oti);
-        if (ret != 0)
-                GOTO(out, ret);
-
-        for (i = 0; i < oa_bufs; i++) {
-                void *virt = kmap(pga[i].pg);
-                obd_off off = pga[i].off & ~PAGE_MASK;
-                void *addr = kmap(lnb[i].page);
-
-                /* 2 kmaps == vanishingly small deadlock opportunity */
-
-                if (cmd & OBD_BRW_WRITE)
-                        memcpy(addr + off, virt + off, pga[i].count);
-                else
-                        memcpy(virt + off, addr + off, pga[i].count);
-
-                kunmap(addr);
-                kunmap(virt);
-        }
-
-        ret = filter_commitrw(cmd, export, 1, &ioo, oa_bufs, lnb, desc_private,
-                              oti);
-
-out:
-        if (lnb)
-                OBD_FREE(lnb, oa_bufs * sizeof(struct niobuf_local));
-        if (rnb)
-                OBD_FREE(rnb, oa_bufs * sizeof(struct niobuf_remote));
-        class_export_put(export);
-        RETURN(ret);
-}
-
-static int filter_san_preprw(int cmd, struct lustre_handle *conn,
-                             int objcount, struct obd_ioobj *obj,
-                             int niocount, struct niobuf_remote *nb)
-{
-        struct obd_device *obd;
-        struct obd_ioobj *o = obj;
-        struct niobuf_remote *rnb = nb;
-        int rc = 0;
-        int i;
-        ENTRY;
-
-        obd = class_conn2obd(conn);
-        if (!obd) {
-                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
-                       conn->cookie);
-                RETURN(-EINVAL);
-        }
-
-        for (i = 0; i < objcount; i++, o++) {
-                struct dentry *dentry;
-                struct inode *inode;
-                int (*fs_bmap)(struct address_space *, long);
-                int j;
-
-                dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id);
-                if (IS_ERR(dentry))
-                        GOTO(out, rc = PTR_ERR(dentry));
-                inode = dentry->d_inode;
-                if (!inode) {
-                        CERROR("trying to BRW to non-existent file "LPU64"\n",
-                               o->ioo_id);
-                        f_dput(dentry);
-                        GOTO(out, rc = -ENOENT);
-                }
-                fs_bmap = inode->i_mapping->a_ops->bmap;
-
-                for (j = 0; j < o->ioo_bufcnt; j++, rnb++) {
-                        long block;
-
-                        block = rnb->offset >> inode->i_blkbits;
-
-                        if (cmd == OBD_BRW_READ) {
-                                block = fs_bmap(inode->i_mapping, block);
-                        } else {
-                                loff_t newsize = rnb->offset + rnb->len;
-                                /* fs_prep_san_write will also update inode
-                                 * size for us:
-                                 * (1) new alloced block
-                                 * (2) existed block but size extented
-                                 */
-                                /* FIXME We could call fs_prep_san_write()
-                                 * only once for all the blocks allocation.
-                                 * Now call it once for each block, for
-                                 * simplicity. And if error happens, we
-                                 * probably need to release previous alloced
-                                 * block */
-                                rc = fs_prep_san_write(obd, inode, &block,
-                                                       1, newsize);
-                                if (rc)
-                                        break;
-                        }
-
-                        rnb->offset = block;
-                }
-                f_dput(dentry);
-        }
-out:
-        RETURN(rc);
-}
-
-static int filter_statfs(struct obd_export *exp, struct obd_statfs *osfs)
-{
-        struct obd_device *obd = exp->exp_obd;
-        ENTRY;
-
          RETURN(fsfilt_statfs(obd, obd->u.filter.fo_sb, osfs));
  }
  
@@ -2676,7 +2067,7 @@ static int filter_get_info(struct lustre_handle *conn, __u32 keylen,
          ENTRY;
  
          obd = class_conn2obd(conn);
-        if (!obd) {
+        if (obd == NULL) {
                  CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
                         conn->cookie);
                  RETURN(-EINVAL);
@@ -2702,77 +2093,46 @@ static int filter_get_info(struct lustre_handle *conn, __u32 keylen,
          RETURN(-EINVAL);
  }
  
-int filter_copy_data(struct lustre_handle *dst_conn, struct obdo *dst,
-                  struct lustre_handle *src_conn, struct obdo *src,
-                  obd_size count, obd_off offset, struct obd_trans_info *oti)
+static int filter_set_info(struct lustre_handle *conn, __u32 keylen,
+                           void *key, __u32 vallen, void *val)
  {
-        struct page *page;
-        struct lov_stripe_md srcmd, dstmd;
-        unsigned long index = 0;
-        int err = 0;
-
-        LBUG(); /* THIS CODE IS NOT CORRECT -phil */
-
-        memset(&srcmd, 0, sizeof(srcmd));
-        memset(&dstmd, 0, sizeof(dstmd));
-        srcmd.lsm_object_id = src->o_id;
-        dstmd.lsm_object_id = dst->o_id;
-
+        struct obd_device *obd;
+        struct obd_export *exp;
+        struct obd_import *imp;
          ENTRY;
-        CDEBUG(D_INFO, "src: ino "LPU64" blocks "LPU64", size "LPU64
-               ", dst: ino "LPU64"\n",
-               src->o_id, src->o_blocks, src->o_size, dst->o_id);
-        page = alloc_page(GFP_USER);
-        if (page == NULL)
-                RETURN(-ENOMEM);
-
-        wait_on_page(page);
  
-        /* XXX with brw vector I/O, we could batch up reads and writes here,
-         *     all we need to do is allocate multiple pages to handle the I/Os
-         *     and arrays to handle the request parameters.
-         */
-        while (index < ((src->o_size + PAGE_SIZE - 1) >> PAGE_SHIFT)) {
-                struct brw_page pg;
-
-                pg.pg = page;
-                pg.count = PAGE_SIZE;
-                pg.off = (page->index) << PAGE_SHIFT;
-                pg.flag = 0;
-
-                page->index = index;
-                err = obd_brw(OBD_BRW_READ, src_conn, &srcmd, 1, &pg, NULL);
-                if (err) {
-                        EXIT;
-                        break;
-                }
+        obd = class_conn2obd(conn);
+        if (obd == NULL) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
+                       conn->cookie);
+                RETURN(-EINVAL);
+        }
  
-                pg.flag = OBD_BRW_CREATE;
-                CDEBUG(D_INFO, "Read page %ld ...\n", page->index);
+        if (keylen < strlen("mds_conn") ||
+            memcmp(key, "mds_conn", keylen) != 0)
+                RETURN(-EINVAL);
  
-                err = obd_brw(OBD_BRW_WRITE, dst_conn, &dstmd, 1, &pg, oti);
+        CERROR("Received MDS connection ("LPX64")\n", conn->cookie);
+        memcpy(&obd->u.filter.fo_mdc_conn, conn, sizeof(*conn));
  
-                /* XXX should handle dst->o_size, dst->o_blocks here */
-                if (err) {
-                        EXIT;
-                        break;
-                }
+        imp = obd->u.filter.fo_mdc_imp = class_new_import();
  
-                CDEBUG(D_INFO, "Wrote page %ld ...\n", page->index);
+        exp = class_conn2export(conn);
+        imp->imp_connection = ptlrpc_connection_addref(exp->exp_connection);
+        class_export_put(exp);
  
-                index++;
-        }
-        dst->o_size = src->o_size;
-        dst->o_blocks = src->o_blocks;
-        dst->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
-        unlock_page(page);
-        __free_page(page);
+        imp->imp_client = &obd->u.filter.fo_mdc_client;
+        imp->imp_remote_handle = *conn;
+        imp->imp_obd = obd;
+        imp->imp_dlm_fake = 1; /* XXX rename imp_dlm_fake to something else */
+        imp->imp_level = LUSTRE_CONN_FULL;
+        class_import_put(imp);
  
-        RETURN(err);
+        RETURN(0);
  }
  
  int filter_iocontrol(unsigned int cmd, struct lustre_handle *conn,
-                  int len, void *karg, void *uarg)
+                     int len, void *karg, void *uarg)
  {
          struct obd_device *obd = class_conn2obd(conn);
  
@@ -2788,12 +2148,12 @@ int filter_iocontrol(unsigned int cmd, struct lustre_handle *conn,
          RETURN(0);
  }
  
-
  static struct obd_ops filter_obd_ops = {
          o_owner:          THIS_MODULE,
          o_attach:         filter_attach,
          o_detach:         filter_detach,
          o_get_info:       filter_get_info,
+        o_set_info:       filter_set_info,
          o_setup:          filter_setup,
          o_cleanup:        filter_cleanup,
          o_connect:        filter_connect,
@@ -2810,15 +2170,9 @@ static struct obd_ops filter_obd_ops = {
          o_punch:          filter_truncate,
          o_preprw:         filter_preprw,
          o_commitrw:       filter_commitrw,
+        o_log_cancel:     filter_log_cancel,
          o_destroy_export: filter_destroy_export,
          o_iocontrol:      filter_iocontrol,
-#if 0
-        o_san_preprw:  filter_san_preprw,
-        o_preallocate: filter_preallocate_inodes,
-        o_migrate:     filter_migrate,
-        o_copy:        filter_copy_data,
-        o_iterate:     filter_iterate
-#endif
  };
  
  static struct obd_ops filter_sanobd_ops = {
@@ -2826,6 +2180,7 @@ static struct obd_ops filter_sanobd_ops = {
          o_attach:         filter_attach,
          o_detach:         filter_detach,
          o_get_info:       filter_get_info,
+        o_set_info:       filter_set_info,
          o_setup:          filter_san_setup,
          o_cleanup:        filter_cleanup,
          o_connect:        filter_connect,
@@ -2841,18 +2196,12 @@ static struct obd_ops filter_sanobd_ops = {
          o_punch:          filter_truncate,
          o_preprw:         filter_preprw,
          o_commitrw:       filter_commitrw,
+        o_log_cancel:     filter_log_cancel,
          o_san_preprw:     filter_san_preprw,
          o_destroy_export: filter_destroy_export,
          o_iocontrol:      filter_iocontrol,
-#if 0
-        o_preallocate:  filter_preallocate_inodes,
-        o_migrate:      filter_migrate,
-        o_copy:         filter_copy_data,
-        o_iterate:      filter_iterate
-#endif
  };
  
-
  static int __init obdfilter_init(void)
  {
          struct lprocfs_static_vars lvars;
@@ -2860,7 +2209,7 @@ static int __init obdfilter_init(void)
  
          printk(KERN_INFO "Lustre Filtering OBD driver; info@clusterfs.com\n");
  
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(filter, &lvars);
  
          rc = class_register_type(&filter_obd_ops, lvars.module_vars,
                                   OBD_FILTER_DEVICENAME);
diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c

index 1319dbd..411a9fb 100644 (file)
--- a/lustre/obdfilter/lproc_obdfilter.c
+++ b/lustre/obdfilter/lproc_obdfilter.c
@@ -22,41 +22,16 @@
  #define DEBUG_SUBSYSTEM S_CLASS
  
  #include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
  #include <linux/lprocfs_status.h>
  #include <linux/obd.h>
  
  #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
  #else
  
-static inline int lprocfs_filter_statfs(void *data, struct statfs *sfs)
-{
-        struct obd_device *dev = (struct obd_device *) data;
-        LASSERT(dev != NULL);
-        return vfs_statfs(dev->u.filter.fo_sb, sfs);
-}
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  lprocfs_filter_statfs);
-
-int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
-              void *data)
-{
-        struct obd_device *dev = (struct obd_device *)data;
-        LASSERT(dev != NULL);
-        return snprintf(page, count, "%s\n", dev->u.filter.fo_fstype);
-}
-
-int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off, int count,
-                             int *eof, void *data)
+static int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off,
+                                    int count, int *eof, void *data)
  {
          struct obd_device* obd = (struct obd_device *)data;
  
@@ -67,23 +42,23 @@ int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off, int count,
                          obd->u.filter.fo_vfsmnt->mnt_devname);
  }
  
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",        lprocfs_rd_uuid,    0, 0 },
-        { "blocksize",   rd_blksize,         0, 0 },
-        { "kbytestotal", rd_kbytestotal,     0, 0 },
-        { "kbytesfree",  rd_kbytesfree,      0, 0 },
-        { "filestotal",  rd_filestotal,      0, 0 },
-        { "filesfree",   rd_filesfree,       0, 0 },
-        { "filegroups",  rd_filegroups,      0, 0 },
-        { "fstype",      rd_fstype,          0, 0 },
-        { "mntdev",      lprocfs_filter_rd_mntdev,    0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",         lprocfs_rd_uuid,          0, 0 },
+        { "blocksize",    lprocfs_rd_blksize,       0, 0 },
+        { "kbytestotal",  lprocfs_rd_kbytestotal,   0, 0 },
+        { "kbytesfree",   lprocfs_rd_kbytesfree,    0, 0 },
+        { "filestotal",   lprocfs_rd_filestotal,    0, 0 },
+        { "filesfree",    lprocfs_rd_filesfree,     0, 0 },
+        //{ "filegroups",   lprocfs_rd_filegroups,    0, 0 },
+        { "fstype",       lprocfs_rd_fstype,        0, 0 },
+        { "mntdev",       lprocfs_filter_rd_mntdev, 0, 0 },
          { 0 }
  };
  
-struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs",    lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",     lprocfs_rd_numrefs,       0, 0 },
          { 0 }
  };
  
  #endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(filter,lprocfs_module_vars, lprocfs_obd_vars)
diff --git a/lustre/osc/.cvsignore b/lustre/osc/.cvsignore

index e530020..49c6100 100644 (file)
--- a/lustre/osc/.cvsignore
+++ b/lustre/osc/.cvsignore
@@ -6,3 +6,4 @@ Makefile
  Makefile.in
  .deps
  TAGS
+.*.cmd
diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c

index d5e4ec1..e9affd0 100644 (file)
--- a/lustre/osc/lproc_osc.c
+++ b/lustre/osc/lproc_osc.c
@@ -29,34 +29,26 @@
  #include <linux/lprocfs_status.h>
  
  #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
  #else
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  obd_self_statfs);
-
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",            lprocfs_rd_uuid, 0, 0 },
-        { "blocksize",       rd_blksize, 0, 0 },
-        { "kbytestotal",     rd_kbytestotal, 0, 0 },
-        { "kbytesfree",      rd_kbytesfree, 0, 0 },
-        { "filestotal",      rd_filestotal, 0, 0 },
-        { "filesfree",       rd_filesfree, 0, 0   },
-        { "filegroups",      rd_filegroups, 0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",            lprocfs_rd_uuid,        0, 0 },
+        { "blocksize",       lprocfs_rd_blksize,     0, 0 },
+        { "kbytestotal",     lprocfs_rd_kbytestotal, 0, 0 },
+        { "kbytesfree",      lprocfs_rd_kbytesfree,  0, 0 },
+        { "filestotal",      lprocfs_rd_filestotal,  0, 0 },
+        { "filesfree",       lprocfs_rd_filesfree,   0, 0 },
+        //{ "filegroups",      lprocfs_rd_filegroups,  0, 0 },
          { "ost_server_uuid", lprocfs_rd_server_uuid, 0, 0 },
-        { "ost_conn_uuid",   lprocfs_rd_conn_uuid, 0, 0 },
+        { "ost_conn_uuid",   lprocfs_rd_conn_uuid,   0, 0 },
          { 0 }
  };
  
-struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs",        lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",        lprocfs_rd_numrefs,     0, 0 },
          { 0 }
  };
  
  #endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(osc,lprocfs_module_vars, lprocfs_obd_vars)
diff --git a/lustre/osc/osc_lib.c b/lustre/osc/osc_lib.c

index aa04a1a..c8cd6ad 100644 (file)
--- a/lustre/osc/osc_lib.c
+++ b/lustre/osc/osc_lib.c
@@ -34,8 +34,7 @@ static kdev_t path2dev(char *path)
  {
          struct dentry *dentry;
          struct nameidata nd;
-        kdev_t dev;
-        KDEVT_VAL(dev, 0);
+        kdev_t dev = KDEVT_INIT(0);
  
          if (!path_init(path, LOOKUP_FOLLOW, &nd))
                  return 0;
diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c

index 4bda8de..89061fd 100644 (file)
--- a/lustre/osc/osc_request.c
+++ b/lustre/osc/osc_request.c
@@ -30,32 +30,33 @@
  #define DEBUG_SUBSYSTEM S_OSC
  
  #ifdef __KERNEL__
-#include <linux/version.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/lustre_dlm.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <linux/workqueue.h>
-#include <linux/smp_lock.h>
-#else
-#include <linux/locks.h>
-#endif
-#else
-#include <liblustre.h>
+# include <linux/version.h>
+# include <linux/module.h>
+# include <linux/mm.h>
+# include <linux/highmem.h>
+# include <linux/lustre_dlm.h>
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#  include <linux/workqueue.h>
+#  include <linux/smp_lock.h>
+# else
+#  include <linux/locks.h>
+# endif
+#else /* __KERNEL__ */
+# include <liblustre.h>
  #endif
  
  #include <linux/kp30.h>
  #include <linux/lustre_mds.h> /* for mds_objid */
  #include <linux/lustre_otree.h>
  #include <linux/obd_ost.h>
+#include <linux/lustre_commit_confd.h>
  #include <linux/obd_lov.h>
  
  #ifndef  __CYGWIN__
-#include <linux/ctype.h>
-#include <linux/init.h>
+# include <linux/ctype.h>
+# include <linux/init.h>
  #else
-#include <ctype.h>
+# include <ctype.h>
  #endif
  
  #include <linux/lustre_ha.h>
@@ -64,11 +65,13 @@
  #include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
  #include <linux/lprocfs_status.h>
  
+static struct llog_cookie zero_cookie = { { 0 } };
+
  static int osc_attach(struct obd_device *dev, obd_count len, void *data)
  {
          struct lprocfs_static_vars lvars;
  
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(osc,&lvars);
          return lprocfs_obd_attach(dev, lvars.obd_vars);
  }
  
@@ -119,29 +122,29 @@ static int osc_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
                  if (lmm_bytes < sizeof (*lmm)) {
                          CERROR("lov_mds_md too small: %d, need %d\n",
                                 lmm_bytes, (int)sizeof(*lmm));
-                        RETURN (-EINVAL);
+                        RETURN(-EINVAL);
                  }
                  /* XXX LOV_MAGIC etc check? */
  
-                if (lmm->lmm_object_id == cpu_to_le64 (0)) {
-                        CERROR ("lov_mds_md: zero lmm_object_id\n");
-                        RETURN (-EINVAL);
+                if (lmm->lmm_object_id == cpu_to_le64(0)) {
+                        CERROR("lov_mds_md: zero lmm_object_id\n");
+                        RETURN(-EINVAL);
                  }
          }
  
          lsm_size = lov_stripe_md_size(1);
-        if (!lsmp)
+        if (lsmp == NULL)
                  RETURN(lsm_size);
  
-        if (*lsmp && !lmm) {
+        if (*lsmp != NULL && lmm == NULL) {
                  OBD_FREE(*lsmp, lsm_size);
                  *lsmp = NULL;
                  RETURN(0);
          }
  
-        if (!*lsmp) {
+        if (*lsmp == NULL) {
                  OBD_ALLOC(*lsmp, lsm_size);
-                if (!*lsmp)
+                if (*lsmp == NULL)
                          RETURN(-ENOMEM);
  
                  (*lsmp)->lsm_oinfo[0].loi_dirty_ot =
@@ -149,7 +152,7 @@ static int osc_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
                  ot_init((*lsmp)->lsm_oinfo[0].loi_dirty_ot);
          }
  
-        if (lmm) {
+        if (lmm != NULL) {
                  /* XXX zero *lsmp? */
                  (*lsmp)->lsm_object_id = le64_to_cpu (lmm->lmm_object_id);
                  LASSERT((*lsmp)->lsm_object_id);
@@ -167,29 +170,27 @@ static int osc_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
  static int osc_getattr_interpret(struct ptlrpc_request *req,
                                   struct osc_getattr_async_args *aa, int rc)
  {
-        struct obdo     *oa = aa->aa_oa;
          struct ost_body *body;
          ENTRY;
  
-        if (rc != 0) {
-                CERROR("failed: rc = %d\n", rc);
-                RETURN (rc);
-        }
-
-        body = lustre_swab_repbuf(req, 0, sizeof (*body), lustre_swab_ost_body);
-        if (body == NULL) {
-                CERROR ("can't unpack ost_body\n");
-                RETURN (-EPROTO);
-        }
+        if (rc != 0)
+                RETURN(rc);
  
-        CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
-        memcpy(oa, &body->oa, sizeof(*oa));
+        body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
+        if (body) {
+                CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
+                memcpy(aa->aa_oa, &body->oa, sizeof(*aa->aa_oa));
  
-        /* This should really be sent by the OST */
-        oa->o_blksize = OSC_BRW_MAX_SIZE;
-        oa->o_valid |= OBD_MD_FLBLKSZ;
+                /* This should really be sent by the OST */
+                aa->aa_oa->o_blksize = OSC_BRW_MAX_SIZE;
+                aa->aa_oa->o_valid |= OBD_MD_FLBLKSZ;
+        } else {
+                CERROR("can't unpack ost_body\n");
+                rc = -EPROTO;
+                aa->aa_oa->o_valid = 0;
+        }
  
-        RETURN (0);
+        RETURN(rc);
  }
  
  static int osc_getattr_async(struct lustre_handle *conn, struct obdo *oa,
@@ -505,7 +506,7 @@ static int osc_create(struct lustre_handle *conn, struct obdo *oa,
                  GOTO(out, rc = -ENOMEM);
  
          body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
-        memcpy(&body->oa, oa, sizeof(*oa));
+        memcpy(&body->oa, oa, sizeof(body->oa));
  
          request->rq_replen = lustre_msg_size(1, &size);
  
@@ -513,8 +514,8 @@ static int osc_create(struct lustre_handle *conn, struct obdo *oa,
          if (rc)
                  GOTO(out_req, rc);
  
-        body = lustre_swab_repbuf (request, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_repbuf(request, 0, sizeof(*body),
+                                  lustre_swab_ost_body);
          if (body == NULL) {
                  CERROR ("can't unpack ost_body\n");
                  GOTO (out_req, rc = -EPROTO);
@@ -531,13 +532,19 @@ static int osc_create(struct lustre_handle *conn, struct obdo *oa,
           * This needs to be fixed in a big way.
           */
          lsm->lsm_object_id = oa->o_id;
-        lsm->lsm_stripe_count = 0;
-        lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
          *ea = lsm;
  
-        if (oti != NULL)
+        if (oti != NULL) {
                  oti->oti_transno = request->rq_repmsg->transno;
  
+                if (oa->o_valid & OBD_MD_FLCOOKIE) {
+                        if (!oti->oti_logcookies)
+                                oti_alloc_cookies(oti, 1);
+                        memcpy(oti->oti_logcookies, obdo_logcookie(oa),
+                               sizeof(oti->oti_onecookie));
+                }
+        }
+
          CDEBUG(D_HA, "transno: "LPD64"\n", request->rq_repmsg->transno);
          EXIT;
  out_req:
@@ -616,14 +623,20 @@ static int osc_destroy(struct lustre_handle *conn, struct obdo *oa,
          body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
          memcpy(&body->oa, oa, sizeof(*oa));
  
+        if (oti && oa->o_valid & OBD_MD_FLCOOKIE) {
+                memcpy(obdo_logcookie(oa), oti->oti_logcookies,
+                       sizeof(*oti->oti_logcookies));
+                oti->oti_logcookies++;
+        }
+
          request->rq_replen = lustre_msg_size(1, &size);
  
          rc = ptlrpc_queue_wait(request);
          if (rc)
                  GOTO(out, rc);
  
-        body = lustre_swab_repbuf (request, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_repbuf(request, 0, sizeof(*body),
+                                  lustre_swab_ost_body);
          if (body == NULL) {
                  CERROR ("Can't unpack body\n");
                  GOTO (out, rc = -EPROTO);
@@ -663,7 +676,7 @@ static void osc_update_grant(struct client_obd *cli, struct ost_body *body)
                  return;
          }
  
-        CDEBUG(D_INODE, "got "LPU64" grant\n", body->oa.o_rdev);
+        CDEBUG(D_ERROR, "got "LPU64" grant\n", body->oa.o_rdev);
          down(&cli->cl_dirty_sem);
          cli->cl_dirty_granted = body->oa.o_rdev;
          /* XXX check for over-run and wake up the io thread that
@@ -708,9 +721,8 @@ static void handle_short_read(int nob_read, obd_count page_count,
          }
  }
  
-static int check_write_rcs (struct ptlrpc_request *request,
-                            int niocount, obd_count page_count,
-                            struct brw_page *pga)
+static int check_write_rcs(struct ptlrpc_request *request, int niocount,
+                           obd_count page_count, struct brw_page *pga)
  {
          int    i;
          __u32 *remote_rcs;
@@ -778,11 +790,10 @@ static obd_count cksum_pages(int nob, obd_count page_count,
  }
  #endif
  
-static int osc_brw_prep_request(struct obd_import *imp,
+static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
                                  struct lov_stripe_md *lsm, obd_count page_count,
-                                struct brw_page *pga, int cmd,
-                                int *requested_nobp, int *niocountp,
-                                struct ptlrpc_request **reqp)
+                                struct brw_page *pga, int *requested_nobp,
+                                int *niocountp, struct ptlrpc_request **reqp)
  {
          struct ptlrpc_request   *req;
          struct ptlrpc_bulk_desc *desc;
@@ -804,11 +815,11 @@ static int osc_brw_prep_request(struct obd_import *imp,
                  if (!can_merge_pages (&pga[i - 1], &pga[i]))
                          niocount++;
  
-        size[0] = sizeof (*body);
-        size[1] = sizeof (*ioobj);
-        size[2] = niocount * sizeof (*niobuf);
+        size[0] = sizeof(*body);
+        size[1] = sizeof(*ioobj);
+        size[2] = niocount * sizeof(*niobuf);
  
-        req = ptlrpc_prep_req (imp, opc, 3, size, NULL);
+        req = ptlrpc_prep_req(imp, opc, 3, size, NULL);
          if (req == NULL)
                  return (-ENOMEM);
  
@@ -819,16 +830,18 @@ static int osc_brw_prep_request(struct obd_import *imp,
                  desc = ptlrpc_prep_bulk_imp(req, BULK_PUT_SINK,
                                              OST_BULK_PORTAL);
          if (desc == NULL)
-                GOTO (out, rc = -ENOMEM);
+                GOTO(out, rc = -ENOMEM);
          /* NB request now owns desc and will free it when it gets freed */
  
          body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body));
          ioobj = lustre_msg_buf(req->rq_reqmsg, 1, sizeof(*ioobj));
          niobuf = lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf));
  
-        ioobj->ioo_id = lsm->lsm_object_id;
-        ioobj->ioo_gr = 0;
-        ioobj->ioo_type = S_IFREG;
+        memcpy(&body->oa, oa, sizeof(*oa));
+
+        ioobj->ioo_id = oa->o_id;
+        ioobj->ioo_gr = oa->o_valid & 0 ? oa->o_gr : 0;
+        ioobj->ioo_type = oa->o_mode;
          ioobj->ioo_bufcnt = niocount;
  
          LASSERT (page_count > 0);
@@ -836,19 +849,18 @@ static int osc_brw_prep_request(struct obd_import *imp,
                  struct brw_page *pg = &pga[i];
                  struct brw_page *pg_prev = pg - 1;
  
-                LASSERT (pg->count > 0);
-                LASSERT ((pg->off & (PAGE_SIZE - 1)) + pg->count <= PAGE_SIZE);
-                LASSERT (i == 0 || pg->off > pg_prev->off);
+                LASSERT(pg->count > 0);
+                LASSERT((pg->off & ~PAGE_MASK) + pg->count <= PAGE_SIZE);
+                LASSERT(i == 0 || pg->off > pg_prev->off);
  
-                rc = ptlrpc_prep_bulk_page (desc, pg->pg,
-                                            pg->off & (PAGE_SIZE - 1),
-                                            pg->count);
+                rc = ptlrpc_prep_bulk_page(desc, pg->pg, pg->off & ~PAGE_MASK,
+                                           pg->count);
                  if (rc != 0)
-                        GOTO (out, rc);
+                        GOTO(out, rc);
  
                  requested_nob += pg->count;
  
-                if (i > 0 && can_merge_pages (pg_prev, pg)) {
+                if (i > 0 && can_merge_pages(pg_prev, pg)) {
                          niobuf--;
                          niobuf->len += pg->count;
                  } else {
@@ -858,17 +870,17 @@ static int osc_brw_prep_request(struct obd_import *imp,
                  }
          }
  
-        LASSERT ((void *)(niobuf - niocount) ==
-                 lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf)));
+        LASSERT((void *)(niobuf - niocount) ==
+                lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf)));
  #if CHECKSUM_BULK
          body->oa.o_valid |= OBD_MD_FLCKSUM;
          if (opc == OST_BRW_WRITE)
-                body->oa.o_nlink = cksum_pages (requested_nob, page_count, pga);
+                body->oa.o_nlink = cksum_pages(requested_nob, page_count, pga);
  #endif
          osc_announce_cached(cli, body);
-        spin_lock_irqsave (&req->rq_lock, flags);
+        spin_lock_irqsave(&req->rq_lock, flags);
          req->rq_no_resend = 1;
-        spin_unlock_irqrestore (&req->rq_lock, flags);
+        spin_unlock_irqrestore(&req->rq_lock, flags);
  
          /* size[0] still sizeof (*body) */
          if (opc == OST_WRITE) {
@@ -890,21 +902,23 @@ static int osc_brw_prep_request(struct obd_import *imp,
          return (rc);
  }
  
-static int osc_brw_fini_request (struct ptlrpc_request *req,
-                                 int requested_nob, int niocount,
-                                 obd_count page_count, struct brw_page *pga,
-                                 int rc)
+static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa,
+                                int requested_nob, int niocount,
+                                obd_count page_count, struct brw_page *pga,
+                                int rc)
  {
          struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
          struct ost_body *body;
+
          if (rc < 0)
                  return (rc);
  
-        body = lustre_swab_repbuf(req, 0, sizeof (*body), lustre_swab_ost_body);
+        body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
          if (body == NULL) {
                  CERROR ("Can't unpack body\n");
-                RETURN(-EPROTO);
+                return (-EPROTO);
          }
+
          osc_update_grant(cli, body);
  
          if (req->rq_reqmsg->opc == OST_WRITE) {
@@ -913,22 +927,23 @@ static int osc_brw_fini_request (struct ptlrpc_request *req,
                          return (-EPROTO);
                  }
  
-                return (check_write_rcs(req, niocount, page_count, pga));
+                return(check_write_rcs(req, niocount, page_count, pga));
          }
  
          if (rc > requested_nob) {
-                CERROR ("Unexpected rc %d (%d requested)\n",
-                        rc, requested_nob);
+                CERROR("Unexpected rc %d (%d requested)\n", rc, requested_nob);
                  return (-EPROTO);
          }
  
          if (rc < requested_nob)
                  handle_short_read(rc, page_count, pga);
  
+        memcpy(oa, &body->oa, sizeof(*oa));
+
  #if CHECKSUM_BULK
-        if (body->oa.o_valid & OBD_MD_FLCKSUM) {
+        if (oa->o_valid & OBD_MD_FLCKSUM) {
                  static int cksum_counter;
-                obd_count server_cksum = body->oa.o_nlink;
+                obd_count server_cksum = oa->o_nlink;
                  obd_count cksum = cksum_pages(rc, page_count, pga);
  
                  cksum_counter++;
@@ -937,6 +952,7 @@ static int osc_brw_fini_request (struct ptlrpc_request *req,
                                 ", server NID "LPX64"\n", server_cksum, cksum,
                                 imp->imp_connection->c_peer.peer_nid);
                          cksum_counter = 0;
+                        oa->o_rdev = cksum;
                  } else if ((cksum_counter & (-cksum_counter)) == cksum_counter)
                          CERROR("Checksum %u from "LPX64" OK: %x\n",
                                 cksum_counter,
@@ -953,9 +969,9 @@ static int osc_brw_fini_request (struct ptlrpc_request *req,
          return (0);
  }
  
-static int osc_brw_internal(struct lustre_handle *conn,
+static int osc_brw_internal(int cmd, struct lustre_handle *conn,struct obdo *oa,
                              struct lov_stripe_md *lsm,
-                            obd_count page_count, struct brw_page *pga, int cmd)
+                            obd_count page_count, struct brw_page *pga)
  {
          int                    requested_nob;
          int                    niocount;
@@ -964,8 +980,9 @@ static int osc_brw_internal(struct lustre_handle *conn,
          ENTRY;
  
  restart_bulk:
-        rc = osc_brw_prep_request(class_conn2cliimp(conn), lsm, page_count, pga,
-                                  cmd, &requested_nob, &niocount, &request);
+        rc = osc_brw_prep_request(cmd, class_conn2cliimp(conn), oa, lsm,
+                                  page_count, pga, &requested_nob, &niocount,
+                                  &request);
          /* NB ^ sets rq_no_resend */
  
          if (rc != 0)
@@ -979,8 +996,8 @@ restart_bulk:
                  goto restart_bulk;
          }
  
-        rc = osc_brw_fini_request (request, requested_nob, niocount,
-                                   page_count, pga, rc);
+        rc = osc_brw_fini_request(request, oa, requested_nob, niocount,
+                                  page_count, pga, rc);
  
          ptlrpc_req_finished(request);
          RETURN (rc);
@@ -989,6 +1006,7 @@ restart_bulk:
  static int brw_interpret(struct ptlrpc_request *request,
                           struct osc_brw_async_args *aa, int rc)
  {
+        struct obdo *oa      = aa->aa_oa;
          int requested_nob    = aa->aa_requested_nob;
          int niocount         = aa->aa_nio_count;
          obd_count page_count = aa->aa_page_count;
@@ -1002,14 +1020,14 @@ static int brw_interpret(struct ptlrpc_request *request,
                  //goto restart_bulk;
          }
  
-        rc = osc_brw_fini_request (request, requested_nob, niocount,
-                                   page_count, pga, rc);
+        rc = osc_brw_fini_request(request, oa, requested_nob, niocount,
+                                  page_count, pga, rc);
          RETURN (rc);
  }
  
-static int async_internal(struct lustre_handle *conn, struct lov_stripe_md *lsm,
-                          obd_count page_count, struct brw_page *pga,
-                          struct ptlrpc_request_set *set, int cmd)
+static int async_internal(int cmd, struct lustre_handle *conn, struct obdo *oa,
+                          struct lov_stripe_md *lsm, obd_count page_count,
+                          struct brw_page *pga, struct ptlrpc_request_set *set)
  {
          struct ptlrpc_request     *request;
          int                        requested_nob;
@@ -1018,14 +1036,15 @@ static int async_internal(struct lustre_handle *conn, struct lov_stripe_md *lsm,
          int                        rc;
          ENTRY;
  
-        rc = osc_brw_prep_request (class_conn2cliimp(conn),
-                                   lsm, page_count, pga, cmd,
-                                   &requested_nob, &nio_count, &request);
+        rc = osc_brw_prep_request(cmd, class_conn2cliimp(conn), oa, lsm,
+                                  page_count, pga, &requested_nob, &nio_count,
+                                  &request);
          /* NB ^ sets rq_no_resend */
  
          if (rc == 0) {
-                LASSERT (sizeof (*aa) <= sizeof (request->rq_async_args));
+                LASSERT(sizeof(*aa) <= sizeof(request->rq_async_args));
                  aa = (struct osc_brw_async_args *)&request->rq_async_args;
+                aa->aa_oa = oa;
                  aa->aa_requested_nob = requested_nob;
                  aa->aa_nio_count = nio_count;
                  aa->aa_page_count = page_count;
@@ -1096,7 +1115,7 @@ static obd_count check_elan_limit(struct brw_page *pg, obd_count pages)
          return i;
  }
  
-static int osc_brw(int cmd, struct lustre_handle *conn,
+static int osc_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
                     struct lov_stripe_md *md, obd_count page_count,
                     struct brw_page *pga, struct obd_trans_info *oti)
  {
@@ -1124,7 +1143,7 @@ static int osc_brw(int cmd, struct lustre_handle *conn,
                  sort_brw_pages(pga, pages_per_brw);
                  pages_per_brw = check_elan_limit(pga, pages_per_brw);
  
-                rc = osc_brw_internal(conn, md, pages_per_brw, pga, cmd);
+                rc = osc_brw_internal(cmd, conn, oa, md, pages_per_brw, pga);
  
                  if (rc != 0)
                          RETURN(rc);
@@ -1135,7 +1154,7 @@ static int osc_brw(int cmd, struct lustre_handle *conn,
          RETURN(0);
  }
  
-static int osc_brw_async(int cmd, struct lustre_handle *conn,
+static int osc_brw_async(int cmd, struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md *md, obd_count page_count,
                           struct brw_page *pga, struct ptlrpc_request_set *set,
                           struct obd_trans_info *oti)
@@ -1164,7 +1183,7 @@ static int osc_brw_async(int cmd, struct lustre_handle *conn,
                  sort_brw_pages(pga, pages_per_brw);
                  pages_per_brw = check_elan_limit(pga, pages_per_brw);
  
-                rc = async_internal(conn, md, pages_per_brw, pga, set, cmd);
+                rc = async_internal(cmd, conn, oa, md, pages_per_brw, pga, set);
  
                  if (rc != 0)
                          RETURN(rc);
@@ -1178,9 +1197,8 @@ static int osc_brw_async(int cmd, struct lustre_handle *conn,
  #ifdef __KERNEL__
  /* Note: caller will lock/unlock, and set uptodate on the pages */
  #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-static int sanosc_brw_read(struct lustre_handle *conn,
-                           struct lov_stripe_md *lsm,
-                           obd_count page_count,
+static int sanosc_brw_read(struct lustre_handle *conn, struct obdo *oa,
+                           struct lov_stripe_md *lsm, obd_count page_count,
                             struct brw_page *pga)
  {
          struct ptlrpc_request *request = NULL;
@@ -1201,14 +1219,16 @@ static int sanosc_brw_read(struct lustre_handle *conn,
          if (!request)
                  RETURN(-ENOMEM);
  
-        body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
-        iooptr = lustre_msg_buf(request->rq_reqmsg, 1, sizeof (*iooptr));
+        body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof(*body));
+        iooptr = lustre_msg_buf(request->rq_reqmsg, 1, sizeof(*iooptr));
          nioptr = lustre_msg_buf(request->rq_reqmsg, 2,
-                                sizeof (*nioptr) * page_count);
+                                sizeof(*nioptr) * page_count);
+
+        memcpy(&body->oa, oa, sizeof(body->oa));
  
-        iooptr->ioo_id = lsm->lsm_object_id;
-        iooptr->ioo_gr = 0;
-        iooptr->ioo_type = S_IFREG;
+        iooptr->ioo_id = oa->o_id;
+        iooptr->ioo_gr = oa->o_valid & 0 ? oa->o_gr : 0;
+        iooptr->ioo_type = oa->o_mode;
          iooptr->ioo_bufcnt = page_count;
  
          for (mapped = 0; mapped < page_count; mapped++, nioptr++) {
@@ -1227,8 +1247,17 @@ static int sanosc_brw_read(struct lustre_handle *conn,
          if (rc)
                  GOTO(out_req, rc);
  
-        swab = lustre_msg_swabbed (request->rq_repmsg);
-        LASSERT_REPSWAB (request, 1);
+        body = lustre_swab_repbuf(request, 0, sizeof(*body),
+                                  lustre_swab_ost_body);
+        if (body == NULL) {
+                CERROR("Can't unpack body\n");
+                GOTO(out_req, rc = -EPROTO);
+        }
+
+        memcpy(oa, &body->oa, sizeof(*oa));
+
+        swab = lustre_msg_swabbed(request->rq_repmsg);
+        LASSERT_REPSWAB(request, 1);
          nioptr = lustre_msg_buf(request->rq_repmsg, 1, size[1]);
          if (!nioptr) {
                  /* nioptr missing or short */
@@ -1300,9 +1329,8 @@ out_req:
          RETURN(rc);
  }
  
-static int sanosc_brw_write(struct lustre_handle *conn,
-                            struct lov_stripe_md *lsm,
-                            obd_count page_count,
+static int sanosc_brw_write(struct lustre_handle *conn, struct obdo *oa,
+                            struct lov_stripe_md *lsm, obd_count page_count,
                              struct brw_page *pga)
  {
          struct ptlrpc_request *request = NULL;
@@ -1326,9 +1354,11 @@ static int sanosc_brw_write(struct lustre_handle *conn,
          nioptr = lustre_msg_buf(request->rq_reqmsg, 2,
                                  sizeof (*nioptr) * page_count);
  
-        iooptr->ioo_id = lsm->lsm_object_id;
-        iooptr->ioo_gr = 0;
-        iooptr->ioo_type = S_IFREG;
+        memcpy(&body->oa, oa, sizeof(body->oa));
+
+        iooptr->ioo_id = oa->o_id;
+        iooptr->ioo_gr = oa->o_valid & 0 ? oa->o_gr : 0;
+        iooptr->ioo_type = oa->o_mode;
          iooptr->ioo_bufcnt = page_count;
  
          /* pack request */
@@ -1414,7 +1444,7 @@ out_req:
          RETURN(rc);
  }
  
-static int sanosc_brw(int cmd, struct lustre_handle *conn,
+static int sanosc_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
                        struct lov_stripe_md *lsm, obd_count page_count,
                        struct brw_page *pga, struct obd_trans_info *oti)
  {
@@ -1430,9 +1460,9 @@ static int sanosc_brw(int cmd, struct lustre_handle *conn,
                          pages_per_brw = page_count;
  
                  if (cmd & OBD_BRW_WRITE)
-                        rc = sanosc_brw_write(conn, lsm, pages_per_brw, pga);
+                        rc = sanosc_brw_write(conn, oa, lsm, pages_per_brw,pga);
                  else
-                        rc = sanosc_brw_read(conn, lsm, pages_per_brw, pga);
+                        rc = sanosc_brw_read(conn, oa, lsm, pages_per_brw, pga);
  
                  if (rc != 0)
                          RETURN(rc);
@@ -1445,7 +1475,7 @@ static int sanosc_brw(int cmd, struct lustre_handle *conn,
  #endif
  #endif
  
-static int osc_mark_page_dirty(struct lustre_handle *conn, 
+static int osc_mark_page_dirty(struct lustre_handle *conn,
                                 struct lov_stripe_md *lsm, unsigned long offset)
  {
          struct client_obd *cli = &class_conn2obd(conn)->u.cli;
@@ -1455,12 +1485,14 @@ static int osc_mark_page_dirty(struct lustre_handle *conn,
  
          down(&cli->cl_dirty_sem);
  
-        if (cli->cl_ost_can_grant && 
+#if 0
+        if (cli->cl_ost_can_grant &&
              (cli->cl_dirty + PAGE_CACHE_SIZE >= cli->cl_dirty_granted)) {
                  CDEBUG(D_INODE, "granted "LPU64" < "LPU64"\n",
                         cli->cl_dirty_granted, cli->cl_dirty + PAGE_CACHE_SIZE);
                  GOTO(out, rc = -EDQUOT);
          }
+#endif
  
          rc = ot_mark_offset(dirty_ot, offset);
          if (rc)
@@ -1474,7 +1506,7 @@ out:
          RETURN(rc);
  }
  
-static int osc_clear_dirty_pages(struct lustre_handle *conn, 
+static int osc_clear_dirty_pages(struct lustre_handle *conn,
                                   struct lov_stripe_md *lsm,
                                   unsigned long start, unsigned long end,
                                   unsigned long *cleared)
@@ -1526,7 +1558,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
                         struct lustre_handle *lockh)
  {
          struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
-        struct obd_device *obddev = class_conn2obd(connh);
+        struct obd_device *obd = class_conn2obd(connh);
          struct ldlm_extent *extent = extentp;
          int rc;
          ENTRY;
@@ -1537,7 +1569,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
          extent->end |= ~PAGE_MASK;
  
          /* Next, search for already existing extent locks that will cover us */
-        rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_MATCH_DATA, &res_id,
+        rc = ldlm_lock_match(obd->obd_namespace, LDLM_FL_MATCH_DATA, &res_id,
                               type, extent, sizeof(extent), mode, data, lockh);
          if (rc == 1)
                  /* We already have a lock, and it's referenced */
@@ -1556,7 +1588,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
           * locks out from other users right now, too. */
  
          if (mode == LCK_PR) {
-                rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_MATCH_DATA,
+                rc = ldlm_lock_match(obd->obd_namespace, LDLM_FL_MATCH_DATA,
                                       &res_id, type, extent, sizeof(extent),
                                       LCK_PW, data, lockh);
                  if (rc == 1) {
@@ -1570,7 +1602,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
                  }
          }
  
-        rc = ldlm_cli_enqueue(connh, NULL, obddev->obd_namespace, parent_lock,
+        rc = ldlm_cli_enqueue(connh, NULL, obd->obd_namespace, parent_lock,
                                res_id, type, extent, sizeof(extent), mode, flags,
                                ldlm_completion_ast, callback, data, lockh);
          RETURN(rc);
@@ -1581,7 +1613,7 @@ static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm,
                         int *flags, void *data, struct lustre_handle *lockh)
  {
          struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
-        struct obd_device *obddev = class_conn2obd(connh);
+        struct obd_device *obd = class_conn2obd(connh);
          struct ldlm_extent *extent = extentp;
          int rc;
          ENTRY;
@@ -1592,7 +1624,7 @@ static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm,
          extent->end |= ~PAGE_MASK;
  
          /* Next, search for already existing extent locks that will cover us */
-        rc = ldlm_lock_match(obddev->obd_namespace, *flags, &res_id, type,
+        rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id, type,
                               extent, sizeof(extent), mode, data, lockh);
          if (rc)
                  RETURN(rc);
@@ -1601,7 +1633,7 @@ static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm,
           * VFS and page cache already protect us locally, so lots of readers/
           * writers can share a single PW lock. */
          if (mode == LCK_PR) {
-                rc = ldlm_lock_match(obddev->obd_namespace, *flags, &res_id,
+                rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id,
                                       type, extent, sizeof(extent), LCK_PW,
                                       data, lockh);
                  if (rc == 1) {
@@ -1628,22 +1660,28 @@ static int osc_cancel(struct lustre_handle *oconn, struct lov_stripe_md *md,
  static int osc_cancel_unused(struct lustre_handle *connh,
                               struct lov_stripe_md *lsm, int flags, void *opaque)
  {
-        struct obd_device *obddev = class_conn2obd(connh);
+        struct obd_device *obd = class_conn2obd(connh);
          struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
  
-        return ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags,
+        return ldlm_cli_cancel_unused(obd->obd_namespace, &res_id, flags,
                                        opaque);
  }
  
-static int osc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+static int osc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                      unsigned long max_age)
  {
          struct obd_statfs *msfs;
          struct ptlrpc_request *request;
          int rc, size = sizeof(*osfs);
          ENTRY;
  
-        request = ptlrpc_prep_req(exp->exp_obd->u.cli.cl_import, OST_STATFS, 0, 
-                                  NULL, NULL);
+        /* We could possibly pass max_age in the request (as an absolute
+         * timestamp or a "seconds.usec ago") so the target can avoid doing
+         * extra calls into the filesystem if that isn't necessary (e.g.
+         * during mount that would help a bit).  Having relative timestamps
+         * is not so great if request processing is slow, while absolute
+         * timestamps are not ideal because they need time synchronization. */
+        request = ptlrpc_prep_req(obd->u.cli.cl_import, OST_STATFS,0,NULL,NULL);
          if (!request)
                  RETURN(-ENOMEM);
  
@@ -1655,14 +1693,14 @@ static int osc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
                  GOTO(out, rc);
          }
  
-        msfs = lustre_swab_repbuf (request, 0, sizeof (*msfs),
-                                   lustre_swab_obd_statfs);
+        msfs = lustre_swab_repbuf(request, 0, sizeof(*msfs),
+                                  lustre_swab_obd_statfs);
          if (msfs == NULL) {
-                CERROR ("Can't unpack obd_statfs\n");
-                GOTO (out, rc = -EPROTO);
+                CERROR("Can't unpack obd_statfs\n");
+                GOTO(out, rc = -EPROTO);
          }
  
-        memcpy (osfs, msfs, sizeof (*msfs));
+        memcpy(osfs, msfs, sizeof(*osfs));
  
          EXIT;
   out:
@@ -1717,16 +1755,16 @@ static int osc_getstripe(struct lustre_handle *conn, struct lov_stripe_md *lsm,
  static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                           void *karg, void *uarg)
  {
-        struct obd_device *obddev = class_conn2obd(conn);
+        struct obd_device *obd = class_conn2obd(conn);
          struct obd_ioctl_data *data = karg;
          int err = 0;
          ENTRY;
  
          switch (cmd) {
          case IOC_OSC_REGISTER_LOV: {
-                if (obddev->u.cli.cl_containing_lov)
+                if (obd->u.cli.cl_containing_lov)
                          GOTO(out, err = -EALREADY);
-                obddev->u.cli.cl_containing_lov = (struct obd_device *)karg;
+                obd->u.cli.cl_containing_lov = (struct obd_device *)karg;
                  GOTO(out, err);
          }
          case OBD_IOC_LOV_GET_CONFIG: {
@@ -1758,9 +1796,9 @@ static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                  desc->ld_default_stripe_size = 0;
                  desc->ld_default_stripe_offset = 0;
                  desc->ld_pattern = 0;
-                memcpy(&desc->ld_uuid, &obddev->obd_uuid, sizeof(uuid));
+                memcpy(&desc->ld_uuid, &obd->obd_uuid, sizeof(uuid));
  
-                memcpy(data->ioc_inlbuf2, &obddev->obd_uuid, sizeof(uuid));
+                memcpy(data->ioc_inlbuf2, &obd->obd_uuid, sizeof(uuid));
  
                  err = copy_to_user((void *)uarg, buf, len);
                  if (err)
@@ -1777,15 +1815,15 @@ static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                  err = osc_getstripe(conn, karg, uarg);
                  GOTO(out, err);
          case OBD_IOC_CLIENT_RECOVER:
-                err = ptlrpc_recover_import(obddev->u.cli.cl_import,
+                err = ptlrpc_recover_import(obd->u.cli.cl_import,
                                              data->ioc_inlbuf1);
                  GOTO(out, err);
          case IOC_OSC_SET_ACTIVE:
-                err = ptlrpc_set_import_active(obddev->u.cli.cl_import,
+                err = ptlrpc_set_import_active(obd->u.cli.cl_import,
                                                 data->ioc_offset);
                  GOTO(out, err);
          default:
-                CERROR ("osc_ioctl(): unrecognised ioctl %#x\n", cmd);
+                CERROR("unrecognised ioctl %#x by %s\n", cmd, current->comm);
                  GOTO(out, err = -ENOTTY);
          }
  out:
@@ -1809,6 +1847,104 @@ static int osc_get_info(struct lustre_handle *conn, obd_count keylen,
          RETURN(-EINVAL);
  }
  
+static int osc_set_info(struct lustre_handle *conn, obd_count keylen,
+                        void *key, obd_count vallen, void *val)
+{
+        struct ptlrpc_request *req;
+        int rc, size = keylen;
+        char *bufs[1] = {key};
+        ENTRY;
+
+        if (keylen < strlen("mds_conn") ||
+            memcmp(key, "mds_conn", strlen("mds_conn")) != 0)
+                RETURN(-EINVAL);
+
+        req = ptlrpc_prep_req(class_conn2cliimp(conn), OST_SET_INFO, 1,
+                              &size, bufs);
+        if (req == NULL)
+                RETURN(-ENOMEM);
+
+        req->rq_replen = lustre_msg_size(0, NULL);
+        rc = ptlrpc_queue_wait(req);
+        ptlrpc_req_finished(req);
+        RETURN(rc);
+}
+
+static int osc_log_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm,
+                          int count, struct llog_cookie *cookies, int flags)
+{
+        struct obd_device *obd = class_conn2obd(conn);
+        struct llog_commit_data *llcd;
+        struct client_obd *cli;
+        int rc = 0;
+        ENTRY;
+
+        cli = &obd->u.cli;
+        if ((count == 0 || cookies == NULL ||
+             memcmp(cookies, &zero_cookie, sizeof(*cookies)) == 0)) {
+                down(&cli->cl_sem);
+                if (cli->cl_llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW))
+                        GOTO(out, rc);
+
+                llcd = cli->cl_llcd;
+                GOTO(send_now, rc);
+        }
+
+        down(&cli->cl_sem);
+        llcd = cli->cl_llcd;
+        if (llcd == NULL) {
+                llcd = llcd_grab();
+                if (llcd == NULL) {
+                        CERROR("couldn't get an llcd - dropped "LPX64":%x+%u\n",
+                               cookies->lgc_lgl.lgl_oid,
+                               cookies->lgc_lgl.lgl_ogen, cookies->lgc_index);
+                        GOTO(out, rc = -ENOMEM);
+                }
+                llcd->llcd_import = cli->cl_import;
+                cli->cl_llcd = llcd;
+        }
+
+        memcpy(llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies,
+               sizeof(*cookies));
+        llcd->llcd_cookiebytes += sizeof(*cookies);
+
+        /* If we can't fit any more cookies into the page, we need to send it */
+send_now:
+        if ((PAGE_SIZE - llcd->llcd_cookiebytes < sizeof(*cookies) ||
+             flags & OBD_LLOG_FL_SENDNOW)) {
+                cli->cl_llcd = NULL;
+                llcd_send(llcd);
+        }
+out:
+        up(&cli->cl_sem);
+
+        return rc;
+}
+
+static int osc_disconnect(struct lustre_handle *conn, int flags)
+{
+        struct obd_device *obd = class_conn2obd(conn);
+
+        /* flush any remaining cancel messages out to the target */
+        if (obd->u.cli.cl_llcd)
+                osc_log_cancel(conn, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW);
+
+        return client_import_disconnect(conn, flags);
+}
+
+static int osc_log_add(struct lustre_handle *conn,
+                       struct llog_handle *cathandle,
+                       struct llog_trans_hdr *rec, struct lov_stripe_md *lsm,
+                       struct llog_cookie *logcookies, int numcookies)
+{
+        ENTRY;
+        LASSERT(logcookies && numcookies > 0);
+
+        llog_add_record(cathandle, rec, logcookies);
+
+        RETURN(1);
+}
+
  struct obd_ops osc_obd_ops = {
          o_owner:        THIS_MODULE,
          o_attach:       osc_attach,
@@ -1816,14 +1952,14 @@ struct obd_ops osc_obd_ops = {
          o_setup:        client_obd_setup,
          o_cleanup:      client_obd_cleanup,
          o_connect:      client_import_connect,
-        o_disconnect:   client_import_disconnect,
+        o_disconnect:   osc_disconnect,
          o_statfs:       osc_statfs,
          o_packmd:       osc_packmd,
          o_unpackmd:     osc_unpackmd,
          o_create:       osc_create,
          o_destroy:      osc_destroy,
          o_getattr:      osc_getattr,
-        o_getattr_async: osc_getattr_async,
+        o_getattr_async:osc_getattr_async,
          o_setattr:      osc_setattr,
          o_open:         osc_open,
          o_close:        osc_close,
@@ -1833,14 +1969,18 @@ struct obd_ops osc_obd_ops = {
          o_enqueue:      osc_enqueue,
          o_match:        osc_match,
          o_cancel:       osc_cancel,
-        o_cancel_unused: osc_cancel_unused,
+        o_cancel_unused:osc_cancel_unused,
          o_iocontrol:    osc_iocontrol,
          o_get_info:     osc_get_info,
-        .o_mark_page_dirty =    osc_mark_page_dirty,
-        .o_clear_dirty_pages =  osc_clear_dirty_pages,
-        .o_last_dirty_offset =  osc_last_dirty_offset,
+        o_set_info:     osc_set_info,
+        o_log_cancel:   osc_log_cancel,
+        o_log_add:      osc_log_add,
+        o_mark_page_dirty:    osc_mark_page_dirty,
+        o_clear_dirty_pages:  osc_clear_dirty_pages,
+        o_last_dirty_offset:  osc_last_dirty_offset,
  };
  
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
  struct obd_ops sanosc_obd_ops = {
          o_owner:        THIS_MODULE,
          o_attach:       osc_attach,
@@ -1858,48 +1998,54 @@ struct obd_ops sanosc_obd_ops = {
          o_setattr:      osc_setattr,
          o_open:         osc_open,
          o_close:        osc_close,
-#ifdef __KERNEL__
          o_setup:        client_sanobd_setup,
          o_brw:          sanosc_brw,
-#endif
          o_punch:        osc_punch,
          o_enqueue:      osc_enqueue,
          o_match:        osc_match,
          o_cancel:       osc_cancel,
          o_cancel_unused: osc_cancel_unused,
          o_iocontrol:    osc_iocontrol,
-        .o_mark_page_dirty =    osc_mark_page_dirty,
-        .o_clear_dirty_pages =  osc_clear_dirty_pages,
-        .o_last_dirty_offset =  osc_last_dirty_offset,
+        o_log_cancel:   osc_log_cancel,
+        o_log_add:      osc_log_add,
+        o_mark_page_dirty:   osc_mark_page_dirty,
+        o_clear_dirty_pages: osc_clear_dirty_pages,
+        o_last_dirty_offset: osc_last_dirty_offset,
  };
+#endif
  
  int __init osc_init(void)
  {
-        struct lprocfs_static_vars lvars;
+        struct lprocfs_static_vars lvars, sanlvars;
          int rc;
          ENTRY;
  
          LASSERT(sizeof(struct obd_client_handle) <= FD_OSTDATA_SIZE);
          LASSERT(sizeof(struct obd_client_handle) <= OBD_INLINESZ);
  
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(osc,&lvars);
+        lprocfs_init_vars(osc,&sanlvars);
  
          rc = class_register_type(&osc_obd_ops, lvars.module_vars,
                                   LUSTRE_OSC_NAME);
          if (rc)
                  RETURN(rc);
  
-        rc = class_register_type(&sanosc_obd_ops, lvars.module_vars,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        rc = class_register_type(&sanosc_obd_ops, sanlvars.module_vars,
                                   LUSTRE_SANOSC_NAME);
          if (rc)
                  class_unregister_type(LUSTRE_OSC_NAME);
+#endif
  
          RETURN(rc);
  }
  
-static void __exit osc_exit(void)
+static void /*__exit*/ osc_exit(void)
  {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
          class_unregister_type(LUSTRE_SANOSC_NAME);
+#endif
          class_unregister_type(LUSTRE_OSC_NAME);
  }
  
diff --git a/lustre/ost/.cvsignore b/lustre/ost/.cvsignore

index e530020..49c6100 100644 (file)
--- a/lustre/ost/.cvsignore
+++ b/lustre/ost/.cvsignore
@@ -6,3 +6,4 @@ Makefile
  Makefile.in
  .deps
  TAGS
+.*.cmd
diff --git a/lustre/ost/lproc_ost.c b/lustre/ost/lproc_ost.c

index c44093c..936706d 100644 (file)
--- a/lustre/ost/lproc_ost.c
+++ b/lustre/ost/lproc_ost.c
@@ -25,18 +25,18 @@
  #include <linux/lprocfs_status.h>
  
  #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
  #else
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",        lprocfs_rd_uuid,   0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",            lprocfs_rd_uuid,   0, 0 },
          { 0 }
  };
  
-struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs",   lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",       lprocfs_rd_numrefs, 0, 0 },
          { 0 }
  };
  
  #endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(ost, lprocfs_module_vars, lprocfs_obd_vars)
diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c

index 023deb2..6801e92 100644 (file)
--- a/lustre/ost/ost_handler.c
+++ b/lustre/ost/ost_handler.c
@@ -40,28 +40,25 @@
  #include <linux/lustre_export.h>
  #include <linux/init.h>
  #include <linux/lprocfs_status.h>
+#include <linux/lustre_commit_confd.h>
+#include <portals/list.h>
  
-inline void oti_init(struct obd_trans_info *oti,
-                           struct ptlrpc_request *req)
+void oti_init(struct obd_trans_info *oti, struct ptlrpc_request *req)
  {
-        if(oti == NULL)
+        if (oti == NULL)
                  return;
          memset(oti, 0, sizeof *oti);
  
-        
          if (req->rq_repmsg && req->rq_reqmsg != 0)
                  oti->oti_transno = req->rq_repmsg->transno;
-
-        EXIT;
  }
  
-inline void oti_to_request(struct obd_trans_info *oti,
-                           struct ptlrpc_request *req)
+void oti_to_request(struct obd_trans_info *oti, struct ptlrpc_request *req)
  {
-        int i;
          struct oti_req_ack_lock *ack_lock;
+        int i;
  
-        if(oti == NULL)
+        if (oti == NULL)
                  return;
  
          if (req->rq_repmsg)
@@ -75,7 +72,6 @@ inline void oti_to_request(struct obd_trans_info *oti,
                         sizeof(req->rq_ack_locks[i].lock));
                  req->rq_ack_locks[i].mode = ack_lock->mode;
          }
-        EXIT;
  }
  
  static int ost_destroy(struct ptlrpc_request *req, struct obd_trans_info *oti)
@@ -85,15 +81,16 @@ static int ost_destroy(struct ptlrpc_request *req, struct obd_trans_info *oti)
          int rc, size = sizeof(*body);
          ENTRY;
  
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
          if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
  
          rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
          if (rc)
                  RETURN(rc);
  
+        if (body->oa.o_valid & OBD_MD_FLCOOKIE)
+                oti->oti_logcookies = obdo_logcookie(&body->oa);
          req->rq_status = obd_destroy(conn, &body->oa, NULL, oti);
          RETURN(0);
  }
@@ -105,16 +102,15 @@ static int ost_getattr(struct ptlrpc_request *req)
          int rc, size = sizeof(*body);
          ENTRY;
  
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
          if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
  
          rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
          if (rc)
                  RETURN(rc);
  
-        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof(*repbody));
          memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
          req->rq_status = obd_getattr(conn, &repbody->oa, NULL);
          RETURN(0);
@@ -130,10 +126,9 @@ static int ost_statfs(struct ptlrpc_request *req)
          if (rc)
                  RETURN(rc);
  
-        osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*osfs));
-        memset(osfs, 0, size);
+        osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*osfs));
  
-        req->rq_status = obd_statfs(req->rq_export, osfs);
+        req->rq_status = obd_statfs(req->rq_export->exp_obd, osfs, jiffies-HZ);
          if (req->rq_status != 0)
                  CERROR("ost: statfs failed: rc %d\n", req->rq_status);
  
@@ -167,16 +162,15 @@ static int ost_open(struct ptlrpc_request *req, struct obd_trans_info *oti)
          int rc, size = sizeof(*repbody);
          ENTRY;
  
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
          if (body == NULL)
-                return (-EFAULT);
+                RETURN(-EFAULT);
  
          rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
          if (rc)
                  RETURN(rc);
  
-        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof(*repbody));
          memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
          req->rq_status = obd_open(conn, &repbody->oa, NULL, oti, NULL);
          RETURN(0);
@@ -189,16 +183,15 @@ static int ost_close(struct ptlrpc_request *req, struct obd_trans_info *oti)
          int rc, size = sizeof(*repbody);
          ENTRY;
  
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
          if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
  
          rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
          if (rc)
                  RETURN(rc);
  
-        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
          memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
          req->rq_status = obd_close(conn, &repbody->oa, NULL, oti);
          RETURN(0);
@@ -211,18 +204,19 @@ static int ost_create(struct ptlrpc_request *req, struct obd_trans_info *oti)
          int rc, size = sizeof(*repbody);
          ENTRY;
  
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
          if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
  
          rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
          if (rc)
                  RETURN(rc);
  
-        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof(*repbody));
          memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
+        oti->oti_logcookies = obdo_logcookie(&repbody->oa);
          req->rq_status = obd_create(conn, &repbody->oa, NULL, oti);
+        //obd_log_cancel(conn, NULL, 1, oti->oti_logcookies, 0);
          RETURN(0);
  }
  
@@ -233,10 +227,9 @@ static int ost_punch(struct ptlrpc_request *req, struct obd_trans_info *oti)
          int rc, size = sizeof(*repbody);
          ENTRY;
  
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
          if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
  
          if ((body->oa.o_valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) !=
              (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))
@@ -246,7 +239,7 @@ static int ost_punch(struct ptlrpc_request *req, struct obd_trans_info *oti)
          if (rc)
                  RETURN(rc);
  
-        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
          memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
          req->rq_status = obd_punch(conn, &repbody->oa, NULL, repbody->oa.o_size,
                                     repbody->oa.o_blocks, oti);
@@ -260,16 +253,15 @@ static int ost_setattr(struct ptlrpc_request *req, struct obd_trans_info *oti)
          int rc, size = sizeof(*repbody);
          ENTRY;
  
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
          if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
  
          rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
          if (rc)
                  RETURN(rc);
  
-        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
          memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
  
          req->rq_status = obd_setattr(conn, &repbody->oa, NULL, oti);
@@ -285,9 +277,9 @@ static int ost_bulk_timeout(void *data)
          RETURN(1);
  }
  
-static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo,
-                                 struct niobuf_remote *rnb, int nrnb,
-                                 struct niobuf_remote **pp_rnbp)
+static int get_per_page_niobufs(struct obd_ioobj *ioo, int nioo,
+                                struct niobuf_remote *rnb, int nrnb,
+                                struct niobuf_remote **pp_rnbp)
  {
          /* Copy a remote niobuf, splitting it into page-sized chunks
           * and setting ioo[i].ioo_bufcnt accordingly */
@@ -305,14 +297,14 @@ static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo,
                          obd_off p0 = offset >> PAGE_SHIFT;
                          obd_off pn = (offset + rnb[rnbidx].len - 1)>>PAGE_SHIFT;
  
-                        LASSERT (rnbidx < nrnb);
+                        LASSERT(rnbidx < nrnb);
  
                          npages += (pn + 1 - p0);
  
                          if (rnb[rnbidx].len == 0) {
                                  CERROR("zero len BRW: obj %d objid "LPX64
                                         " buf %u\n", i, ioo[i].ioo_id, j);
-                                return (-EINVAL);
+                                return -EINVAL;
                          }
                          if (j > 0 &&
                              rnb[rnbidx].offset <= rnb[rnbidx-1].offset) {
@@ -320,20 +312,20 @@ static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo,
                                         " buf %u offset "LPX64" <= "LPX64"\n",
                                         i, ioo[i].ioo_id, j, rnb[rnbidx].offset,
                                         rnb[rnbidx].offset);
-                                return (-EINVAL);
+                                return -EINVAL;
                          }
                  }
  
-        LASSERT (rnbidx == nrnb);
+        LASSERT(rnbidx == nrnb);
  
          if (npages == nrnb) {       /* all niobufs are for single pages */
                  *pp_rnbp = rnb;
-                return (npages);
+                return npages;
          }
  
-        OBD_ALLOC (pp_rnb, sizeof (*pp_rnb) * npages);
+        OBD_ALLOC(pp_rnb, sizeof(*pp_rnb) * npages);
          if (pp_rnb == NULL)
-                return (-ENOMEM);
+                return -ENOMEM;
  
          /* now do the actual split */
          page = rnbidx = 0;
@@ -344,35 +336,35 @@ static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo,
                          obd_off off = rnb[rnbidx].offset;
                          int     nob = rnb[rnbidx].len;
  
-                        LASSERT (rnbidx < nrnb);
+                        LASSERT(rnbidx < nrnb);
                          do {
                                  obd_off  poff = off & (PAGE_SIZE - 1);
                                  int      pnob = (poff + nob > PAGE_SIZE) ?
                                                  PAGE_SIZE - poff : nob;
  
-                                LASSERT (page < npages);
+                                LASSERT(page < npages);
                                  pp_rnb[page].len = pnob;
                                  pp_rnb[page].offset = off;
                                  pp_rnb[page].flags = rnb->flags;
  
-                                CDEBUG (D_PAGE, "   obj %d id "LPX64
-                                        "page %d(%d) "LPX64" for %d\n",
-                                        i, ioo[i].ioo_id, obj_pages, page,
-                                        pp_rnb[page].offset, pp_rnb[page].len);
+                                CDEBUG(D_PAGE, "   obj %d id "LPX64
+                                       "page %d(%d) "LPX64" for %d\n",
+                                       i, ioo[i].ioo_id, obj_pages, page,
+                                       pp_rnb[page].offset, pp_rnb[page].len);
                                  page++;
                                  obj_pages++;
  
                                  off += pnob;
                                  nob -= pnob;
                          } while (nob > 0);
-                        LASSERT (nob == 0);
+                        LASSERT(nob == 0);
                  }
                  ioo[i].ioo_bufcnt = obj_pages;
          }
-        LASSERT (page == npages);
+        LASSERT(page == npages);
  
          *pp_rnbp = pp_rnb;
-        return (npages);
+        return npages;
  }
  
  static void free_per_page_niobufs (int npages, struct niobuf_remote *pp_rnb,
@@ -381,23 +373,19 @@ static void free_per_page_niobufs (int npages, struct niobuf_remote *pp_rnb,
          if (pp_rnb == rnb)                      /* didn't allocate above */
                  return;
  
-        OBD_FREE (pp_rnb, sizeof (*pp_rnb) * npages);
+        OBD_FREE(pp_rnb, sizeof(*pp_rnb) * npages);
  }
  
  #if CHECKSUM_BULK
  __u64 ost_checksum_bulk (struct ptlrpc_bulk_desc *desc)
  {
          __u64             cksum = 0;
-        struct list_head *tmp;
-        char             *ptr;
+        struct ptlrpc_bulk_page *bp;
  
-        list_for_each (tmp, &desc->bd_page_list) {
-                struct ptlrpc_bulk_page *bp;
-
-                bp = list_entry (tmp, struct ptlrpc_bulk_page, bp_link);
-                ptr = kmap (bp->bp_page);
-                ost_checksum (&cksum, ptr + bp->bp_pageoffset, bp->bp_buflen);
-                kunmap (bp->bp_page);
+        list_for_each_entry(bp, &desc->bd_page_list, bp_link) {
+                ost_checksum(&cksum, kmap(bp->bp_page) + bp->bp_pageoffset,
+                             bp->bp_buflen);
+                kunmap(bp->bp_page);
          }
  }
  #endif
@@ -409,9 +397,9 @@ static int ost_brw_read(struct ptlrpc_request *req)
          struct niobuf_remote    *pp_rnb;
          struct niobuf_local     *local_nb;
          struct obd_ioobj        *ioo;
-        struct ost_body         *body;
+        struct ost_body         *body, *repbody;
          struct l_wait_info       lwi;
-        void                    *desc_priv = NULL;
+        struct obd_trans_info    oti = { 0 };
          int                      size[1] = { sizeof(*body) };
          int                      comms_error = 0;
          int                      niocount;
@@ -426,35 +414,36 @@ static int ost_brw_read(struct ptlrpc_request *req)
  
          body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
          if (body == NULL) {
-                CERROR ("Missing/short ost_body\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short ost_body\n");
+                GOTO(out, rc = -EFAULT);
          }
  
-        ioo = lustre_swab_reqbuf (req, 1, sizeof (*ioo),
-                                  lustre_swab_obd_ioobj);
+        ioo = lustre_swab_reqbuf(req, 1, sizeof(*ioo), lustre_swab_obd_ioobj);
          if (ioo == NULL) {
-                CERROR ("Missing/short ioobj\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short ioobj\n");
+                GOTO(out, rc = -EFAULT);
          }
  
          niocount = ioo->ioo_bufcnt;
-        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb),
+        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof(*remote_nb),
                                         lustre_swab_niobuf_remote);
          if (remote_nb == NULL) {
-                CERROR ("Missing/short niobuf\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short niobuf\n");
+                GOTO(out, rc = -EFAULT);
          }
-        if (lustre_msg_swabbed (req->rq_reqmsg)) { /* swab remaining niobufs */
+        if (lustre_msg_swabbed(req->rq_reqmsg)) { /* swab remaining niobufs */
                  for (i = 1; i < niocount; i++)
                          lustre_swab_niobuf_remote (&remote_nb[i]);
          }
  
+        size[0] = sizeof(*body);
          rc = lustre_pack_msg(1, size, NULL, &req->rq_replen, &req->rq_repmsg);
          if (rc)
                  GOTO(out, rc);
  
+        /* FIXME all niobuf splitting should be done in obdfilter if needed */
          /* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */
-        npages = get_per_page_niobufs (ioo, 1, remote_nb, niocount, &pp_rnb);
+        npages = get_per_page_niobufs(ioo, 1, remote_nb, niocount, &pp_rnb);
          if (npages < 0)
                  GOTO(out, rc = npages);
  
@@ -462,12 +451,12 @@ static int ost_brw_read(struct ptlrpc_request *req)
          if (local_nb == NULL)
                  GOTO(out_pp_rnb, rc = -ENOMEM);
  
-        desc = ptlrpc_prep_bulk_exp (req, BULK_PUT_SOURCE, OST_BULK_PORTAL);
+        desc = ptlrpc_prep_bulk_exp(req, BULK_PUT_SOURCE, OST_BULK_PORTAL);
          if (desc == NULL)
                  GOTO(out_local, rc = -ENOMEM);
  
-        rc = obd_preprw(OBD_BRW_READ, req->rq_export, NULL, 1, ioo, npages,
-                        pp_rnb, local_nb, &desc_priv, NULL);
+        rc = obd_preprw(OBD_BRW_READ, req->rq_export, &body->oa, 1,
+                        ioo, npages, pp_rnb, local_nb, &oti);
          if (rc != 0)
                  GOTO(out_bulk, rc);
  
@@ -480,7 +469,7 @@ static int ost_brw_read(struct ptlrpc_request *req)
                          break;
                  }
  
-                LASSERT (page_rc <= pp_rnb[i].len);
+                LASSERT(page_rc <= pp_rnb[i].len);
                  nob += page_rc;
                  if (page_rc != 0) {             /* some data! */
                          LASSERT (local_nb[i].page != NULL);
@@ -493,8 +482,8 @@ static int ost_brw_read(struct ptlrpc_request *req)
  
                  if (page_rc != pp_rnb[i].len) { /* short read */
                          /* All subsequent pages should be 0 */
-                        while (++i < npages)
-                                LASSERT (local_nb[i].rc == 0);
+                        while(++i < npages)
+                                LASSERT(local_nb[i].rc == 0);
                          break;
                  }
          }
@@ -509,7 +498,7 @@ static int ost_brw_read(struct ptlrpc_request *req)
                          if (rc) {
                                  LASSERT(rc == -ETIMEDOUT);
                                  CERROR ("timeout waiting for bulk PUT\n");
-                                ptlrpc_abort_bulk (desc);
+                                ptlrpc_abort_bulk(desc);
                          }
                  } else {
                          CERROR("ptlrpc_bulk_put failed RC: %d\n", rc);
@@ -518,25 +507,27 @@ static int ost_brw_read(struct ptlrpc_request *req)
          }
  
          /* Must commit after prep above in all cases */
-        rc = obd_commitrw(OBD_BRW_READ, req->rq_export, 1, ioo, npages,
-                          local_nb, desc_priv, NULL);
+        rc = obd_commitrw(OBD_BRW_READ, req->rq_export, &body->oa, 1,
+                          ioo, npages, local_nb, &oti);
+
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
+        memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa));
  
  #if CHECKSUM_BULK
          if (rc == 0) {
-                body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
-                body->oa.o_rdev = ost_checksum_bulk (desc);
-                body->oa.o_valid |= OBD_MD_FLCKSUM;
+                repbody->oa.o_rdev = ost_checksum_bulk(desc);
+                repbody->oa.o_valid |= OBD_MD_FLCKSUM;
          }
  #endif
  
   out_bulk:
-        ptlrpc_free_bulk (desc);
+        ptlrpc_free_bulk(desc);
   out_local:
          OBD_FREE(local_nb, sizeof(*local_nb) * npages);
   out_pp_rnb:
-        free_per_page_niobufs (npages, pp_rnb, remote_nb);
+        free_per_page_niobufs(npages, pp_rnb, remote_nb);
   out:
-        LASSERT (rc <= 0);
+        LASSERT(rc <= 0);
          if (rc == 0) {
                  req->rq_status = nob;
                  ptlrpc_reply(req);
@@ -547,7 +538,7 @@ static int ost_brw_read(struct ptlrpc_request *req)
          } else {
                  if (req->rq_repmsg != NULL) {
                          /* reply out callback would free */
-                        OBD_FREE (req->rq_repmsg, req->rq_replen);
+                        OBD_FREE(req->rq_repmsg, req->rq_replen);
                  }
                  CERROR("bulk IO comms error: evicting %s@%s nid "LPU64"\n",
                         req->rq_export->exp_client_uuid.uuid,
@@ -566,11 +557,10 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
          struct niobuf_remote    *pp_rnb;
          struct niobuf_local     *local_nb;
          struct obd_ioobj        *ioo;
-        struct ost_body         *body;
+        struct ost_body         *body, *repbody;
          struct l_wait_info       lwi;
-        void                    *desc_priv = NULL;
          __u32                   *rcs;
-        int                      size[2] = { sizeof (*body) };
+        int                      size[2] = { sizeof(*body) };
          int                      objcount, niocount, npages;
          int                      comms_error = 0;
          int                      rc, rc2, swab, i, j;
@@ -580,39 +570,38 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                  GOTO(out, rc = -EIO);
  
          /* pause before transaction has been started */
-        OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE, 
+        OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE,
                           obd_timeout +1);
  
-        swab = lustre_msg_swabbed (req->rq_reqmsg);
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        swab = lustre_msg_swabbed(req->rq_reqmsg);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
          if (body == NULL) {
-                CERROR ("Missing/short ost_body\n");
+                CERROR("Missing/short ost_body\n");
                  GOTO(out, rc = -EFAULT);
          }
  
-        LASSERT_REQSWAB (req, 1);
+        LASSERT_REQSWAB(req, 1);
          objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
          if (objcount == 0) {
-                CERROR ("Missing/short ioobj\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short ioobj\n");
+                GOTO(out, rc = -EFAULT);
          }
-        ioo = lustre_msg_buf (req->rq_reqmsg, 1, objcount * sizeof (*ioo));
+        ioo = lustre_msg_buf (req->rq_reqmsg, 1, objcount * sizeof(*ioo));
          LASSERT (ioo != NULL);
          for (niocount = i = 0; i < objcount; i++) {
                  if (swab)
                          lustre_swab_obd_ioobj (&ioo[i]);
                  if (ioo[i].ioo_bufcnt == 0) {
-                        CERROR ("ioo[%d] has zero bufcnt\n", i);
-                        GOTO (out, rc = -EFAULT);
+                        CERROR("ioo[%d] has zero bufcnt\n", i);
+                        GOTO(out, rc = -EFAULT);
                  }
                  niocount += ioo[i].ioo_bufcnt;
          }
  
-        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb),
+        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof(*remote_nb),
                                         lustre_swab_niobuf_remote);
          if (remote_nb == NULL) {
-                CERROR ("Missing/short niobuf\n");
+                CERROR("Missing/short niobuf\n");
                  GOTO(out, rc = -EFAULT);
          }
          if (swab) {                             /* swab the remaining niobufs */
@@ -620,30 +609,31 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                          lustre_swab_niobuf_remote (&remote_nb[i]);
          }
  
-        size[1] = niocount * sizeof (*rcs);
+        size[1] = niocount * sizeof(*rcs);
          rc = lustre_pack_msg(2, size, NULL, &req->rq_replen,
                               &req->rq_repmsg);
          if (rc != 0)
-                GOTO (out, rc);
-        rcs = lustre_msg_buf (req->rq_repmsg, 1, niocount * sizeof (*rcs));
+                GOTO(out, rc);
+        rcs = lustre_msg_buf(req->rq_repmsg, 1, niocount * sizeof(*rcs));
  
+        /* FIXME all niobuf splitting should be done in obdfilter if needed */
          /* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */
          npages = get_per_page_niobufs(ioo, objcount,remote_nb,niocount,&pp_rnb);
          if (npages < 0)
-                GOTO (out, rc = npages);
+                GOTO(out, rc = npages);
  
          OBD_ALLOC(local_nb, sizeof(*local_nb) * npages);
          if (local_nb == NULL)
                  GOTO(out_pp_rnb, rc = -ENOMEM);
  
-        desc = ptlrpc_prep_bulk_exp (req, BULK_GET_SINK, OST_BULK_PORTAL);
+        desc = ptlrpc_prep_bulk_exp(req, BULK_GET_SINK, OST_BULK_PORTAL);
          if (desc == NULL)
                  GOTO(out_local, rc = -ENOMEM);
  
-        rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, NULL, objcount, ioo,
-                        npages, pp_rnb, local_nb, &desc_priv, oti);
+        rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, &body->oa, objcount,
+                        ioo, npages, pp_rnb, local_nb, oti);
          if (rc != 0)
-                GOTO (out_bulk, rc);
+                GOTO(out_bulk, rc);
  
          /* NB Having prepped, we must commit... */
  
@@ -664,8 +654,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                                            ptlrpc_bulk_complete(desc), &lwi);
                          if (rc) {
                                  LASSERT(rc == -ETIMEDOUT);
-                                CERROR ("timeout waiting for bulk GET\n");
-                                ptlrpc_abort_bulk (desc);
+                                CERROR("timeout waiting for bulk GET\n");
+                                ptlrpc_abort_bulk(desc);
                          }
                  } else {
                         CERROR("ptlrpc_bulk_get failed RC: %d\n", rc);
@@ -673,17 +663,21 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 comms_error = rc != 0;
          }
  
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
+        memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa));
+
  #if CHECKSUM_BULK
          if (rc == 0 && (body->oa.o_valid & OBD_MD_FLCKSUM) != 0) {
                  static int cksum_counter;
                  __u64 client_cksum = body->oa.o_rdev;
-                __u64 cksum = ost_checksum_bulk (desc);
+                __u64 cksum = ost_checksum_bulk(desc);
  
                  if (client_cksum != cksum) {
                          CERROR("Bad checksum: client "LPX64", server "LPX64
                                 ", client NID "LPX64"\n", client_cksum, cksum,
                                 req->rq_connection->c_peer.peer_nid);
                          cksum_counter = 1;
+                        repbody->oa.o_rdev = cksum;
                  } else {
                          cksum_counter++;
                          if ((cksum_counter & (-cksum_counter)) == cksum_counter)
@@ -695,8 +689,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
          }
  #endif
          /* Must commit after prep above in all cases */
-        rc2 = obd_commitrw(OBD_BRW_WRITE, req->rq_export, objcount, ioo,
-                           npages, local_nb, desc_priv, oti);
+        rc2 = obd_commitrw(OBD_BRW_WRITE, req->rq_export, &repbody->oa,
+                           objcount, ioo, npages, local_nb, oti);
  
          if (rc == 0) {
                  /* set per-requested niobuf return codes */
@@ -705,25 +699,25 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
  
                          rcs[i] = 0;
                          do {
-                                LASSERT (j < npages);
+                                LASSERT(j < npages);
                                  if (local_nb[j].rc < 0)
                                          rcs[i] = local_nb[j].rc;
                                  nob -= pp_rnb[j].len;
                                  j++;
                          } while (nob > 0);
-                        LASSERT (nob == 0);
+                        LASSERT(nob == 0);
                  }
-                LASSERT (j == npages);
+                LASSERT(j == npages);
          }
          if (rc == 0)
                  rc = rc2;
  
   out_bulk:
-        ptlrpc_free_bulk (desc);
+        ptlrpc_free_bulk(desc);
   out_local:
          OBD_FREE(local_nb, sizeof(*local_nb) * npages);
   out_pp_rnb:
-        free_per_page_niobufs (npages, pp_rnb, remote_nb);
+        free_per_page_niobufs(npages, pp_rnb, remote_nb);
   out:
          if (rc == 0) {
                  oti_to_request(oti, req);
@@ -748,10 +742,9 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
  
  static int ost_san_brw(struct ptlrpc_request *req, int cmd)
  {
-        struct lustre_handle *conn = &req->rq_reqmsg->handle;
          struct niobuf_remote *remote_nb, *res_nb;
          struct obd_ioobj *ioo;
-        struct ost_body *body;
+        struct ost_body *body, *repbody;
          int rc, i, j, objcount, niocount, size[2] = {sizeof(*body)};
          int n;
          int swab;
@@ -759,19 +752,17 @@ static int ost_san_brw(struct ptlrpc_request *req, int cmd)
  
          /* XXX not set to use latest protocol */
  
-        swab = lustre_msg_swabbed (req->rq_reqmsg);
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        swab = lustre_msg_swabbed(req->rq_reqmsg);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
          if (body == NULL) {
-                CERROR ("Missing/short ost_body\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short ost_body\n");
+                GOTO(out, rc = -EFAULT);
          }
  
-        ioo = lustre_swab_reqbuf(req, 1, sizeof (*ioo),
-                                 lustre_swab_obd_ioobj);
+        ioo = lustre_swab_reqbuf(req, 1, sizeof(*ioo), lustre_swab_obd_ioobj);
          if (ioo == NULL) {
-                CERROR ("Missing/short ioobj\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short ioobj\n");
+                GOTO(out, rc = -EFAULT);
          }
          objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
          niocount = ioo[0].ioo_bufcnt;
@@ -781,11 +772,11 @@ static int ost_san_brw(struct ptlrpc_request *req, int cmd)
                  niocount += ioo[i].ioo_bufcnt;
          }
  
-        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb),
+        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof(*remote_nb),
                                         lustre_swab_niobuf_remote);
          if (remote_nb == NULL) {
-                CERROR ("Missing/short niobuf\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short niobuf\n");
+                GOTO(out, rc = -EFAULT);
          }
          if (swab) {                             /* swab the remaining niobufs */
                  for (i = 1; i < niocount; i++)
@@ -814,14 +805,17 @@ static int ost_san_brw(struct ptlrpc_request *req, int cmd)
          if (rc)
                  GOTO(out, rc);
  
-        req->rq_status = obd_san_preprw(cmd, conn, objcount, ioo,
-                                        niocount, remote_nb);
+        req->rq_status = obd_san_preprw(cmd, req->rq_export, &body->oa,
+                                        objcount, ioo, niocount, remote_nb);
  
          if (req->rq_status)
-                GOTO (out, rc = 0);
+                GOTO(out, rc = 0);
+
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
+        memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
  
          res_nb = lustre_msg_buf(req->rq_repmsg, 1, size[1]);
-        memcpy (res_nb, remote_nb, size[1]);
+        memcpy(res_nb, remote_nb, size[1]);
          rc = 0;
  out:
          if (rc) {
@@ -835,6 +829,57 @@ out:
          return rc;
  }
  
+static int ost_log_cancel(struct ptlrpc_request *req)
+{
+        struct lustre_handle *conn;
+        struct llog_cookie *logcookies;
+        int num_cookies, rc = 0;
+        ENTRY;
+
+        logcookies = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*logcookies));
+        if (logcookies == NULL) {
+                DEBUG_REQ(D_HA, req, "no cookies sent");
+                RETURN(-EFAULT);
+        }
+        num_cookies = req->rq_reqmsg->buflens[0] / sizeof(*logcookies);
+
+        /* workaround until we don't need to send replies */
+        rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
+                RETURN(rc);
+        req->rq_repmsg->status = 0;
+        /* end workaround */
+
+        conn = (struct lustre_handle *)&req->rq_reqmsg->handle;
+        rc = obd_log_cancel(conn, NULL, num_cookies, logcookies, 0);
+
+        RETURN(rc);
+}
+
+static int ost_set_info(struct ptlrpc_request *req)
+{
+        struct lustre_handle *conn;
+        char *key;
+        int keylen, rc = 0;
+        ENTRY;
+
+        key = lustre_msg_buf(req->rq_reqmsg, 0, 1);
+        if (key == NULL) {
+                DEBUG_REQ(D_HA, req, "no set_info key");
+                RETURN(-EFAULT);
+        }
+        keylen = req->rq_reqmsg->buflens[0];
+
+        rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
+                RETURN(rc);
+
+        conn = (struct lustre_handle *)&req->rq_reqmsg->handle;
+        rc = obd_set_info(conn, keylen, key, 0, NULL);
+        req->rq_repmsg->status = 0;
+        RETURN(rc);
+}
+
  static int filter_recovery_request(struct ptlrpc_request *req,
                                     struct obd_device *obd, int *process)
  {
@@ -850,9 +895,10 @@ static int filter_recovery_request(struct ptlrpc_request *req,
          case OST_DESTROY:
          case OST_OPEN:
          case OST_PUNCH:
-        case OST_SETATTR: 
+        case OST_SETATTR:
          case OST_SYNCFS:
          case OST_WRITE:
+        case OBD_LOG_CANCEL:
          case LDLM_ENQUEUE:
                  *process = target_queue_recovery_request(req, obd);
                  RETURN(0);
@@ -881,7 +927,7 @@ static int ost_handle(struct ptlrpc_request *req)
                  int abort_recovery, recovering;
  
                  if (req->rq_export == NULL) {
-                        CERROR("lustre_ost: operation %d on unconnected OST\n",
+                        CDEBUG(D_HA, "operation %d on unconnected OST\n",
                                 req->rq_reqmsg->opc);
                          req->rq_status = -ENOTCONN;
                          GOTO(out, rc = -ENOTCONN);
@@ -901,7 +947,7 @@ static int ost_handle(struct ptlrpc_request *req)
                          if (rc || !should_process)
                                  RETURN(rc);
                  }
-        } 
+        }
  
          if (strcmp(req->rq_obd->obd_type->typ_name, "ost") != 0)
                  GOTO(out, rc = -EINVAL);
@@ -988,10 +1034,18 @@ static int ost_handle(struct ptlrpc_request *req)
                  OBD_FAIL_RETURN(OBD_FAIL_OST_SYNCFS_NET, 0);
                  rc = ost_syncfs(req);
                  break;
+        case OST_SET_INFO:
+                DEBUG_REQ(D_INODE, req, "set_info");
+                rc = ost_set_info(req);
          case OBD_PING:
                  DEBUG_REQ(D_INODE, req, "ping");
                  rc = target_handle_ping(req);
                  break;
+        case OBD_LOG_CANCEL:
+                CDEBUG(D_INODE, "log cancel\n");
+                OBD_FAIL_RETURN(OBD_FAIL_OBD_LOG_CANCEL_NET, 0);
+                rc = ost_log_cancel(req);
+                break;
          case LDLM_ENQUEUE:
                  CDEBUG(D_INODE, "enqueue\n");
                  OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
@@ -1058,17 +1112,22 @@ out:
  static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
  {
          struct ost_obd *ost = &obddev->u.ost;
-        int err;
-        int i;
+        int err, i;
          ENTRY;
  
+#ifdef ENABLE_ORPHANS
+        err = llog_start_commit_thread();
+        if (err < 0)
+                RETURN(err);
+#endif
+
          ost->ost_service = ptlrpc_init_svc(OST_NEVENTS, OST_NBUFS,
                                             OST_BUFSIZE, OST_MAXREQSIZE,
                                             OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
                                             ost_handle, "ost", obddev);
          if (!ost->ost_service) {
                  CERROR("failed to start service\n");
-                GOTO(error_disc, err = -ENOMEM);
+                RETURN(-ENOMEM);
          }
  
          for (i = 0; i < OST_NUM_THREADS; i++) {
@@ -1077,17 +1136,14 @@ static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
                  err = ptlrpc_start_thread(obddev, ost->ost_service, name);
                  if (err) {
                          CERROR("error starting thread #%d: rc %d\n", i, err);
-                        GOTO(error_disc, err = -EINVAL);
+                        RETURN(-EINVAL);
                  }
          }
  
          RETURN(0);
-
-error_disc:
-        RETURN(err);
  }
  
-static int ost_cleanup(struct obd_device *obddev, int force, int failover)
+static int ost_cleanup(struct obd_device *obddev, int flags)
  {
          struct ost_obd *ost = &obddev->u.ost;
          int err = 0;
@@ -1106,7 +1162,7 @@ int ost_attach(struct obd_device *dev, obd_count len, void *data)
  {
          struct lprocfs_static_vars lvars;
  
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(ost,&lvars);
          return lprocfs_obd_attach(dev, lvars.obd_vars);
  }
  
@@ -1115,7 +1171,7 @@ int ost_detach(struct obd_device *dev)
          return lprocfs_obd_detach(dev);
  }
  
-/* I don't think this function is ever used, since nothing 
+/* I don't think this function is ever used, since nothing
   * connects directly to this module.
   */
  static int ost_connect(struct lustre_handle *conn,
@@ -1153,12 +1209,12 @@ static int __init ost_init(void)
          struct lprocfs_static_vars lvars;
          ENTRY;
  
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(ost,&lvars);
          RETURN(class_register_type(&ost_obd_ops, lvars.module_vars,
                                     LUSTRE_OST_NAME));
  }
  
-static void __exit ost_exit(void)
+static void /*__exit*/ ost_exit(void)
  {
          class_unregister_type(LUSTRE_OST_NAME);
  }
diff --git a/lustre/portals/.cvsignore b/lustre/portals/.cvsignore

index 99ac885..c1a9bdf 100644 (file)
--- a/lustre/portals/.cvsignore
+++ b/lustre/portals/.cvsignore
@@ -6,3 +6,4 @@ autom4te.cache
  config.log
  config.status
  configure
+.*.o.cmd
diff --git a/lustre/portals/Kernelenv.in b/lustre/portals/Kernelenv.in

index 29a713f..7a48c58 100644 (file)
--- a/lustre/portals/Kernelenv.in
+++ b/lustre/portals/Kernelenv.in
@@ -1 +1,6 @@
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+# portals/utils/debug.c wants <linux/version.h> from userspace.  sigh.
+HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS)
+LIBREADLINE := @LIBREADLINE@
+# 2.5's makefiles aren't nice to cross dir libraries in host programs
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
diff --git a/lustre/portals/Kernelenv.mk b/lustre/portals/Kernelenv.mk

index 29a713f..7c66dfa 100644 (file)
--- a/lustre/portals/Kernelenv.mk
+++ b/lustre/portals/Kernelenv.mk
@@ -1 +1,4 @@
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+HOSTCFLAGS := $(EXTRA_CFLAGS)
+# the kernel doesn't want us to build archives for host binaries :/
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
diff --git a/lustre/portals/Makefile.mk b/lustre/portals/Makefile.mk

index be0e51a..73a19df 100644 (file)
--- a/lustre/portals/Makefile.mk
+++ b/lustre/portals/Makefile.mk
@@ -1,6 +1,12 @@
-include fs/lustre/portals/Kernelenv
+include $(src)/Kernelenv
  
-obj-y += portals/
+# The ordering of these determines the order that each subsystem's 
+# module_init() functions are called in.  if these are changed make sure
+# they reflect the dependencies between each subsystem's _init functions.
  obj-y += libcfs/
-obj-y += knals/
+obj-y += portals/
  obj-y += router/
+obj-y += knals/
+obj-y += tests/
+
+obj-m += utils/
diff --git a/lustre/portals/archdep.m4 b/lustre/portals/archdep.m4

index 7a4e05c..1a7741b 100644 (file)
--- a/lustre/portals/archdep.m4
+++ b/lustre/portals/archdep.m4
@@ -11,8 +11,13 @@ AC_ARG_WITH(lib, [  --with-lib compile lustre library], host_cpu="lib")
  
  AC_ARG_WITH(linux, [  --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux)
  AC_SUBST(LINUX)
+if test x$enable_inkernel = xyes ; then
+        echo ln -s `pwd` $LINUX/fs/lustre
+        rm $LINUX/fs/lustre
+        ln -s `pwd` $LINUX/fs/lustre
+fi
  
-# --------- UML?  --------------------
+#  --------------------
  AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...)
  if test $host_cpu = "lib" ; then 
          host_cpu="lib"
@@ -111,6 +116,13 @@ case ${host_cpu} in
          MOD_LINK=elf64_ia64
  ;;
  
+       x86_64 )
+       AC_MSG_RESULT($host_cpu)
+        KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -fomit-frame-pointer -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -finline-limit=2000 -fno-strength-reduce -fno-asynchronous-unwind-tables'
+       KCPPFLAGS='-D__KERNEL__ -DMODULE'
+        MOD_LINK=elf_x86_64
+;;
+
         sparc64 )
         AC_MSG_RESULT($host_cpu)
          KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs'
@@ -160,21 +172,33 @@ if test $host_cpu != "lib" ; then
        AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.)
    fi
  
-# ------------ RELEASE and moduledir ------------------
+# ------------ LINUXRELEASE and moduledir ------------------
    AC_MSG_CHECKING(for Linux release)
    
    dnl We need to rid ourselves of the nasty [ ] quotes.
    changequote(, )
    dnl Get release from version.h
-  RELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
+  LINUXRELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
    changequote([, ])
    
-  moduledir='$(libdir)/modules/'$RELEASE/kernel
+  moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel
    AC_SUBST(moduledir)
    
    modulefsdir='$(moduledir)/fs/$(PACKAGE)'
    AC_SUBST(modulefsdir)
    
+  AC_MSG_RESULT($LINUXRELEASE)
+  AC_SUBST(LINUXRELEASE)
+
+# ------------ RELEASE --------------------------------
+  AC_MSG_CHECKING(lustre release)
+  
+  dnl We need to rid ourselves of the nasty [ ] quotes.
+  changequote(, )
+  dnl Get release from version.h
+  RELEASE="`sed -ne 's/-/_/g' -e 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_]*\).*/\1/p' $LINUX/include/linux/version.h`_`date +%Y%m%d%H%M`"
+  changequote([, ])
+
    AC_MSG_RESULT($RELEASE)
    AC_SUBST(RELEASE)
  
@@ -302,7 +326,7 @@ AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib)
  # This needs to run after we've defined the KCPPFLAGS
  
  AC_MSG_CHECKING(for kernel version)
-AC_TRY_LINK([#define __KERNEL__
+AC_TRY_COMPILE([#define __KERNEL__
               #include <linux/sched.h>],
              [struct task_struct p;
               p.sighand = NULL;],
@@ -313,5 +337,5 @@ if test $RH_2_4_20 = 1; then
         AC_MSG_RESULT(redhat-2.4.20)
         CPPFLAGS="$CPPFLAGS -DCONFIG_RH_2_4_20"
  else
-       AC_MSG_RESULT($RELEASE)
+       AC_MSG_RESULT($LINUXRELEASE)
  fi 
diff --git a/lustre/portals/include/config.h.in b/lustre/portals/include/config.h.in

index 3aa6909..f9605ab 100644 (file)
--- a/lustre/portals/include/config.h.in
+++ b/lustre/portals/include/config.h.in
@@ -1,5 +1,11 @@
  /* portals/include/config.h.in.  Generated from configure.in by autoheader.  */
  
+/* Compile with orphan support */
+#undef ENABLE_ORPHANS
+
+/* Use the Pinger */
+#undef ENABLE_PINGER
+
  /* Define to 1 if you have the <inttypes.h> header file. */
  #undef HAVE_INTTYPES_H
  
diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h

index ee3b9fc..2133391 100644 (file)
--- a/lustre/portals/include/linux/kp30.h
+++ b/lustre/portals/include/linux/kp30.h
@@ -4,7 +4,6 @@
  #ifndef _KP30_INCLUDED
  #define _KP30_INCLUDED
  
-
  #define PORTAL_DEBUG
  
  #ifndef offsetof
@@ -13,10 +12,6 @@
  
  #define LOWEST_BIT_SET(x)      ((x) & ~((x) - 1))
  
-#ifndef CONFIG_SMP
-# define smp_processor_id() 0
-#endif
-
  /*
   *  Debugging
   */
@@ -24,39 +19,34 @@ extern unsigned int portal_subsystem_debug;
  extern unsigned int portal_stack;
  extern unsigned int portal_debug;
  extern unsigned int portal_printk;
-/* Debugging subsystems  (8 bit ID)
- *
- * If you add debug subsystem #32, you need to send email to phil, because
- * you're going to break kernel subsystem debug filtering. */
-#define S_UNDEFINED    (0 << 24)
-#define S_MDC          (1 << 24)
-#define S_MDS          (2 << 24)
-#define S_OSC          (3 << 24)
-#define S_OST          (4 << 24)
-#define S_CLASS        (5 << 24)
-#define S_OBDFS        (6 << 24) /* obsolete */
-#define S_LLITE        (7 << 24)
-#define S_RPC          (8 << 24)
-#define S_EXT2OBD      (9 << 24) /* obsolete */
-#define S_PORTALS     (10 << 24)
-#define S_SOCKNAL     (11 << 24)
-#define S_QSWNAL      (12 << 24)
-#define S_PINGER      (13 << 24)
-#define S_FILTER      (14 << 24)
-#define S_TRACE       (15 << 24) /* obsolete */
-#define S_ECHO        (16 << 24)
-#define S_LDLM        (17 << 24)
-#define S_LOV         (18 << 24)
-#define S_GMNAL       (19 << 24)
-#define S_PTLROUTER   (20 << 24)
-#define S_COBD        (21 << 24)
-#define S_PTLBD       (22 << 24)
-#define S_LOG         (23 << 24)
-
-/* If you change these values, please keep portals/linux/utils/debug.c
+/* Debugging subsystems (32 bits, non-overlapping) */
+#define S_UNDEFINED    (1 << 0)
+#define S_MDC          (1 << 1)
+#define S_MDS          (1 << 2)
+#define S_OSC          (1 << 3)
+#define S_OST          (1 << 4)
+#define S_CLASS        (1 << 5)
+#define S_LOG          (1 << 6)
+#define S_LLITE        (1 << 7)
+#define S_RPC          (1 << 8)
+#define S_MGMT         (1 << 9)
+#define S_PORTALS     (1 << 10)
+#define S_SOCKNAL     (1 << 11)
+#define S_QSWNAL      (1 << 12)
+#define S_PINGER      (1 << 13)
+#define S_FILTER      (1 << 14)
+#define S_PTLBD       (1 << 15)
+#define S_ECHO        (1 << 16)
+#define S_LDLM        (1 << 17)
+#define S_LOV         (1 << 18)
+#define S_GMNAL       (1 << 19)
+#define S_PTLROUTER   (1 << 20)
+#define S_COBD        (1 << 21)
+
+/* If you change these values, please keep portals/utils/debug.c
   * up to date! */
  
-/* Debugging masks (24 bits, non-overlapping) */
+/* Debugging masks (32 bits, non-overlapping) */
  #define D_TRACE     (1 << 0) /* ENTRY/EXIT markers */
  #define D_INODE     (1 << 1)
  #define D_SUPER     (1 << 2)
@@ -80,20 +70,23 @@ extern unsigned int portal_printk;
  #define D_RPCTRACE  (1 << 20) /* for distributed debugging */
  #define D_VFSTRACE  (1 << 21)
  
-#ifndef __KERNEL__
-#define THREAD_SIZE 8192
+#ifdef __KERNEL__
+# include <linux/sched.h> /* THREAD_SIZE */
+#else
+# define THREAD_SIZE 8192
  #endif
-#ifdef  __ia64__
-#define CDEBUG_STACK() (THREAD_SIZE -                                      \
+
+#ifdef __KERNEL__
+# ifdef  __ia64__
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
                          ((unsigned long)__builtin_dwarf_cfa() &            \
                           (THREAD_SIZE - 1)))
-#else
-#define CDEBUG_STACK() (THREAD_SIZE -                                      \
+# else
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
                          ((unsigned long)__builtin_frame_address(0) &       \
                           (THREAD_SIZE - 1)))
-#endif
+# endif
  
-#ifdef __KERNEL__
  #define CHECK_STACK(stack)                                                    \
          do {                                                                  \
                  if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) {    \
@@ -105,20 +98,21 @@ extern unsigned int portal_printk;
                        /*panic("LBUG");*/                                      \
                  }                                                             \
          } while (0)
-#else
+#else /* __KERNEL__ */
  #define CHECK_STACK(stack) do { } while(0)
-#endif
+#define CDEBUG_STACK (0L)
+#endif /* __KERNEL__ */
  
  #if 1
  #define CDEBUG(mask, format, a...)                                            \
  do {                                                                          \
-        CHECK_STACK(CDEBUG_STACK());                                          \
+        CHECK_STACK(CDEBUG_STACK);                                            \
          if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) ||                      \
              (portal_debug & (mask) &&                                         \
-             portal_subsystem_debug & (1 << (DEBUG_SUBSYSTEM >> 24))))        \
+             portal_subsystem_debug & DEBUG_SUBSYSTEM))                       \
                  portals_debug_msg(DEBUG_SUBSYSTEM, mask,                      \
                                    __FILE__, __FUNCTION__, __LINE__,           \
-                                  CDEBUG_STACK(), format , ## a);             \
+                                  CDEBUG_STACK, format, ## a);                \
  } while (0)
  
  #define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
@@ -162,7 +156,6 @@ do {                                                                    \
  #define EXIT                            do { } while (0)
  #endif
  
-
  #ifdef __KERNEL__
  # include <linux/vmalloc.h>
  # include <linux/time.h>
@@ -210,7 +203,8 @@ static inline void our_cond_resched(void)
  #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
  
  #ifdef PORTAL_DEBUG
-extern void kportal_assertion_failed(char *expr,char *file,char *func,int line);
+extern void kportal_assertion_failed(char *expr, char *file, const char *func,
+                                     const int line);
  #define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__,  \
                                                          __FUNCTION__, __LINE__))
  #else
@@ -560,14 +554,14 @@ extern struct prof_ent prof_ents[MAX_PROFS];
  #endif /* PORTALS_PROFILING */
  
  /* debug.c */
-void portals_run_lbug_upcall(char * file, char *fn, int line);
+void portals_run_lbug_upcall(char * file, const char *fn, const int line);
  void portals_debug_dumplog(void);
  int portals_debug_init(unsigned long bufsize);
  int portals_debug_cleanup(void);
  int portals_debug_clear_buffer(void);
  int portals_debug_mark_buffer(char *text);
  int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
-                char *file, unsigned int size);
+                             char *file, unsigned int size);
  __s32 portals_debug_copy_to_user(char *buf, unsigned long len);
  #if (__GNUC__)
  /* Use the special GNU C __attribute__ hack to have the compiler check the
@@ -578,13 +572,14 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len);
  # warning printf has been defined as a macro...
  # undef printf
  #endif
-void portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                        unsigned long stack, const char *format, ...)
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                       const int line, unsigned long stack,
+                       const char *format, ...)
          __attribute__ ((format (printf, 7, 8)));
  #else
-void portals_debug_msg (int subsys, int mask, char *file, char *fn,
-                        int line, unsigned long stack,
-                        const char *format, ...);
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                       const int line, unsigned long stack,
+                       const char *format, ...);
  #endif /* __GNUC__ */
  void portals_debug_set_level(unsigned int debug_level);
  
@@ -618,9 +613,9 @@ extern void kportal_blockallsigs (void);
  # define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0);
  # define PORTAL_FREE(a, b) do { free(a); } while (0);
  # define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \
-    printf ("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format,                    \
-            (subsys) >> 24, (mask), (long)time(0), file, fn, line,            \
-            getpid() , stack, ## a);
+    printf("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format,                    \
+           (subsys), (mask), (long)time(0), file, fn, line,                   \
+           getpid() , stack, ## a);
  #endif
  
  #ifndef CURRENT_TIME
@@ -911,13 +906,13 @@ ptl_handle_ni_t *kportal_get_ni (int nal);
  void kportal_put_ni (int nal);
  
  #ifdef __CYGWIN__
-#ifndef BITS_PER_LONG
-#if (~0UL) == 0xffffffffUL
-#define BITS_PER_LONG 32
-#else
-#define BITS_PER_LONG 64
-#endif
-#endif
+# ifndef BITS_PER_LONG
+#  if (~0UL) == 0xffffffffUL
+#   define BITS_PER_LONG 32
+#  else
+#   define BITS_PER_LONG 64
+#  endif
+# endif
  #endif
  
  #if (BITS_PER_LONG == 32 || __WORDSIZE == 32)
diff --git a/lustre/portals/include/linux/portals_compat25.h b/lustre/portals/include/linux/portals_compat25.h

index e28fbac..a7cb4d1 100644 (file)
--- a/lustre/portals/include/linux/portals_compat25.h
+++ b/lustre/portals/include/linux/portals_compat25.h
@@ -1,13 +1,56 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _PORTALS_COMPAT_H
+#define _PORTALS_COMPAT_H
+
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+#if SPINLOCK_DEBUG
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
+#  define SIGNAL_MASK_ASSERT() \
+   LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC)
+# else
+#  define SIGNAL_MASK_ASSERT() \
+   LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC)
+# endif
+#else
+# define SIGNAL_MASK_ASSERT()
+#endif
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+
  #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
-# define SIGNAL_MASK_LOCK(task, flags)                              \
+
+# define SIGNAL_MASK_LOCK(task, flags)                                  \
    spin_lock_irqsave(&task->sighand->siglock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags)                            \
+# define SIGNAL_MASK_UNLOCK(task, flags)                                \
    spin_unlock_irqrestore(&task->sighand->siglock, flags)
+# define USERMODEHELPER(path, argv, envp)                               \
+  call_usermodehelper(path, argv, envp, 1)
  # define RECALC_SIGPENDING         recalc_sigpending()
-#else
-# define SIGNAL_MASK_LOCK(task, flags)                              \
+# define CURRENT_SECONDS           get_seconds()
+
+#else /* 2.4.x */
+
+# define SIGNAL_MASK_LOCK(task, flags)                                  \
    spin_lock_irqsave(&task->sigmask_lock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags)                            \
+# define SIGNAL_MASK_UNLOCK(task, flags)                                \
    spin_unlock_irqrestore(&task->sigmask_lock, flags)
+# define USERMODEHELPER(path, argv, envp)                               \
+  call_usermodehelper(path, argv, envp)
  # define RECALC_SIGPENDING         recalc_sigpending(current)
+# define CURRENT_SECONDS           CURRENT_TIME
+
+#endif
+
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt "|%d", ## a, current->thread.extern_pid)
+#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt "|%d", ## a, current->thread.mode.tt.extern_pid)
+#else
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt, ## a)
  #endif
+
+#endif /* _PORTALS_COMPAT_H */
diff --git a/lustre/portals/include/portals/list.h b/lustre/portals/include/portals/list.h

index 2b63312..78a1e2d 100644 (file)
--- a/lustre/portals/include/portals/list.h
+++ b/lustre/portals/include/portals/list.h
@@ -1,6 +1,4 @@
  #ifndef _LINUX_LIST_H
-#define _LINUX_LIST_H
-
  
  /*
   * Simple doubly linked list implementation.
@@ -101,7 +99,9 @@ static inline void list_del_init(struct list_head *entry)
         __list_del(entry->prev, entry->next);
         INIT_LIST_HEAD(entry);
  }
+#endif
  
+#ifndef list_for_each_entry
  /**
   * list_move - delete from one list and add as another's head
   * @list: the entry to move
@@ -124,7 +124,10 @@ static inline void list_move_tail(struct list_head *list,
         __list_del(list->prev, list->next);
         list_add_tail(list, head);
  }
+#endif
  
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
  /**
   * list_empty - tests whether a list is empty
   * @head: the list to test.
diff --git a/lustre/portals/include/portals/lltrace.h b/lustre/portals/include/portals/lltrace.h

index 7d1b304..d389aab 100644 (file)
--- a/lustre/portals/include/portals/lltrace.h
+++ b/lustre/portals/include/portals/lltrace.h
@@ -2,7 +2,7 @@
   * vim:expandtab:shiftwidth=8:tabstop=8:
   *
   * Compile with:
- * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl 
+ * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
   */
  #ifndef __LTRACE_H_
  #define __LTRACE_H_
@@ -31,20 +31,20 @@ static inline int ltrace_write_file(char* fname)
          argv[0] = "debug_kernel";
          argv[1] = fname;
          argv[2] = "1";
-        
+
          fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
-        
+
          return jt_dbg_debug_kernel(3, argv);
  }
  
  static inline int ltrace_clear()
  {
          char* argv[1];
-        
+
          argv[0] = "clear";
-        
+
          fprintf(stderr, "[ptlctl] %s\n", argv[0]);
-        
+
          return jt_dbg_clear_debug_buf(1, argv);
  }
  
@@ -52,9 +52,9 @@ static inline int ltrace_mark(int indent_level, char* text)
  {
          char* argv[2];
          char mark_buf[PATH_MAX];
-        
+
          snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
-        
+
          argv[0] = "mark";
          argv[1] = mark_buf;
          return jt_dbg_mark_debug_buf(2, argv);
@@ -65,9 +65,9 @@ static inline int ltrace_applymasks()
          char* argv[2];
          argv[0] = "list";
          argv[1] = "applymasks";
-        
+
          fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
-        
+
          return jt_dbg_list(2, argv);
  }
  
@@ -95,19 +95,19 @@ static inline int ltrace_start()
  #ifdef PORTALS_DEV_ID
          rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
  #endif
-        ltrace_filter("class"); 
+        ltrace_filter("class");
          ltrace_filter("socknal");
-        ltrace_filter("qswnal"); 
-        ltrace_filter("gmnal");  
-        ltrace_filter("portals");  
-        
-        ltrace_show("all_types");  
-        ltrace_filter("trace");  
-        ltrace_filter("malloc"); 
-        ltrace_filter("net"); 
-        ltrace_filter("page"); 
-        ltrace_filter("other"); 
-        ltrace_filter("info"); 
+        ltrace_filter("qswnal");
+        ltrace_filter("gmnal");
+        ltrace_filter("portals");
+
+        ltrace_show("all_types");
+        ltrace_filter("trace");
+        ltrace_filter("malloc");
+        ltrace_filter("net");
+        ltrace_filter("page");
+        ltrace_filter("other");
+        ltrace_filter("info");
          ltrace_applymasks();
  
          return rc;
@@ -146,7 +146,7 @@ static inline void ltrace_add_processnames(char* fname)
          struct timezone tz;
          int nob;
          int underuml = !not_uml();
-        
+
          gettimeofday(&tv, &tz);
  
          nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
@@ -167,7 +167,7 @@ static inline void ltrace_add_processnames(char* fname)
                                   "(%s:%d:%s() %d+%lu): ",
                                   "lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
          }
-         
+
          nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
          system(cmdbuf);
  }
diff --git a/lustre/portals/include/portals/myrnal.h b/lustre/portals/include/portals/myrnal.h

index 12b1925..13790f7 100644 (file)
--- a/lustre/portals/include/portals/myrnal.h
+++ b/lustre/portals/include/portals/myrnal.h
@@ -1,6 +1,3 @@
-/*
-*/
-
  #ifndef MYRNAL_H
  #define MYRNAL_H
  
diff --git a/lustre/portals/include/portals/nal.h b/lustre/portals/include/portals/nal.h

index 88be63c..7cb3ab7 100644 (file)
--- a/lustre/portals/include/portals/nal.h
+++ b/lustre/portals/include/portals/nal.h
@@ -1,5 +1,3 @@
-/*
-*/
  #ifndef _NAL_H_
  #define _NAL_H_
  
diff --git a/lustre/portals/include/portals/ppid.h b/lustre/portals/include/portals/ppid.h

index 4727599..760f465 100644 (file)
--- a/lustre/portals/include/portals/ppid.h
+++ b/lustre/portals/include/portals/ppid.h
@@ -1,6 +1,3 @@
-/*
- */
-
  #ifndef _INCppidh_
  #define _INCppidh_
  
diff --git a/lustre/portals/include/portals/stringtab.h b/lustre/portals/include/portals/stringtab.h

index c9683f7..33e4375 100644 (file)
--- a/lustre/portals/include/portals/stringtab.h
+++ b/lustre/portals/include/portals/stringtab.h
@@ -1,5 +1,3 @@
  /*
-*/
-/*
   * stringtab.h
   */
diff --git a/lustre/portals/include/portals/types.h b/lustre/portals/include/portals/types.h

index d4038b6..0269290 100644 (file)
--- a/lustre/portals/include/portals/types.h
+++ b/lustre/portals/include/portals/types.h
@@ -2,14 +2,19 @@
  #define _P30_TYPES_H_
  
  #ifdef __linux__
-#include <asm/types.h>
-#include <asm/timex.h>
+# include <asm/types.h>
+# include <asm/timex.h>
  #else
-#include <sys/types.h>
+# include <sys/types.h>
  typedef u_int32_t __u32;
  typedef u_int64_t __u64;
-typedef unsigned long long cycles_t;
-static inline cycles_t get_cycles(void) { return 0; }
+#endif
+
+#ifdef __KERNEL__
+# include <linux/time.h>
+#else
+# include <sys/time.h>
+# define do_gettimeofday(tv) gettimeofday(tv, NULL)
  #endif
  
  typedef __u64 ptl_nid_t;
@@ -31,7 +36,7 @@ typedef ptl_handle_any_t ptl_handle_md_t;
  typedef ptl_handle_any_t ptl_handle_me_t;
  
  #define PTL_HANDLE_NONE \
-((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
+    ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
  #define PTL_EQ_NONE PTL_HANDLE_NONE
  
  static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
@@ -108,17 +113,15 @@ typedef struct {
          ptl_handle_me_t unlinked_me;
          ptl_md_t mem_desc;
          ptl_hdr_data_t hdr_data;
-        cycles_t  arrival_time;
+        struct timeval arrival_time;
          volatile ptl_seq_t sequence;
  } ptl_event_t;
  
-
  typedef enum {
          PTL_ACK_REQ,
          PTL_NOACK_REQ
  } ptl_ack_req_t;
  
-
  typedef struct {
          volatile ptl_seq_t sequence;
          ptl_size_t size;
@@ -130,7 +133,6 @@ typedef struct {
          ptl_eq_t *eq;
  } ptl_ni_t;
  
-
  typedef struct {
          int max_match_entries;    /* max number of match entries */
          int max_mem_descriptors;  /* max number of memory descriptors */
diff --git a/lustre/portals/knals/.cvsignore b/lustre/portals/knals/.cvsignore

index 282522d..89a4aa6 100644 (file)
--- a/lustre/portals/knals/.cvsignore
+++ b/lustre/portals/knals/.cvsignore
@@ -1,2 +1,3 @@
  Makefile
  Makefile.in
+.*.o.cmd
diff --git a/lustre/portals/knals/Makefile.mk b/lustre/portals/knals/Makefile.mk

index ce40a60..cd5d9d6 100644 (file)
--- a/lustre/portals/knals/Makefile.mk
+++ b/lustre/portals/knals/Makefile.mk
@@ -1,4 +1,4 @@
-include ../Kernelenv
+include $(obj)/../Kernelenv
  
  obj-y = socknal/
-# more coming...
-\ No newline at end of file
+# more coming...
diff --git a/lustre/portals/knals/gmnal/gmnal.c b/lustre/portals/knals/gmnal/gmnal.c

index ceeea2a..0cffc15 100644 (file)
--- a/lustre/portals/knals/gmnal/gmnal.c
+++ b/lustre/portals/knals/gmnal/gmnal.c
@@ -124,7 +124,7 @@ static nal_t *kgmnal_init(int interface, ptl_pt_index_t ptl_size,
          return &kgmnal_api;
  }
  
-static void __exit
+static void /*__exit*/
  kgmnal_finalize(void)
  {
          struct list_head *tmp;
diff --git a/lustre/portals/knals/scimacnal/scimacnal.c b/lustre/portals/knals/scimacnal/scimacnal.c

index 1066d69..479cc2c 100644 (file)
--- a/lustre/portals/knals/scimacnal/scimacnal.c
+++ b/lustre/portals/knals/scimacnal/scimacnal.c
@@ -112,7 +112,7 @@ static nal_t *kscimacnal_init(int interface, ptl_pt_index_t  ptl_size,
  
  
  /* Called by kernel at module unload time */
-static void __exit 
+static void /*__exit*/ 
  kscimacnal_finalize(void)
  {
          /* FIXME: How should the shutdown procedure really look? */
diff --git a/lustre/portals/knals/socknal/.cvsignore b/lustre/portals/knals/socknal/.cvsignore

index e995588..95973d6 100644 (file)
--- a/lustre/portals/knals/socknal/.cvsignore
+++ b/lustre/portals/knals/socknal/.cvsignore
@@ -1,3 +1,4 @@
  .deps
  Makefile
  Makefile.in
+.*.o.cmd
diff --git a/lustre/portals/knals/socknal/Makefile.mk b/lustre/portals/knals/socknal/Makefile.mk

index 46edf01..5c1b366 100644 (file)
--- a/lustre/portals/knals/socknal/Makefile.mk
+++ b/lustre/portals/knals/socknal/Makefile.mk
@@ -3,7 +3,7 @@
  # This code is issued under the GNU General Public License.
  # See the file COPYING in this distribution
  
-include ../../Kernelenv
+include $(src)/../../Kernelenv
  
  obj-y += ksocknal.o
  ksocknal-objs    := socknal.o socknal_cb.o
diff --git a/lustre/portals/knals/toenal/toenal.c b/lustre/portals/knals/toenal/toenal.c

index 1f5dc38..77ee473 100644 (file)
--- a/lustre/portals/knals/toenal/toenal.c
+++ b/lustre/portals/knals/toenal/toenal.c
@@ -379,7 +379,7 @@ ktoenal_cmd(struct portal_ioctl_data * data, void * private)
  }
  
  
-void __exit
+void /*__exit*/
  ktoenal_module_fini (void)
  {
          CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
diff --git a/lustre/portals/knals/toenal/toenal_cb.c b/lustre/portals/knals/toenal/toenal_cb.c

index ec37f6f..abd0731 100644 (file)
--- a/lustre/portals/knals/toenal/toenal_cb.c
+++ b/lustre/portals/knals/toenal/toenal_cb.c
@@ -893,6 +893,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags)
                                  spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
                                  goto get_fmb;   /* => go get a fwd msg buffer */
                          default:
+                                break;
                          }
                          /* Not Reached */
                          LBUG ();
@@ -934,6 +935,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags)
                  goto out;                       /* (later) */
  
          default:
+                break;
          }
  
          /* Not Reached */
diff --git a/lustre/portals/libcfs/.cvsignore b/lustre/portals/libcfs/.cvsignore

index 67d1a3d..7fa686f 100644 (file)
--- a/lustre/portals/libcfs/.cvsignore
+++ b/lustre/portals/libcfs/.cvsignore
@@ -2,3 +2,4 @@
  Makefile
  Makefile.in
  link-stamp
+.*.o.cmd
diff --git a/lustre/portals/libcfs/Makefile.mk b/lustre/portals/libcfs/Makefile.mk

index 3196ea2..9aa838f 100644 (file)
--- a/lustre/portals/libcfs/Makefile.mk
+++ b/lustre/portals/libcfs/Makefile.mk
@@ -6,4 +6,4 @@
  include fs/lustre/portals/Kernelenv
  
  obj-y += libcfs.o
-licfs-objs    := module.o proc.o debug.o
-\ No newline at end of file
+libcfs-objs    := module.o proc.o debug.o
diff --git a/lustre/portals/libcfs/debug.c b/lustre/portals/libcfs/debug.c

index 8d26dbb..f37cd96 100644 (file)
--- a/lustre/portals/libcfs/debug.c
+++ b/lustre/portals/libcfs/debug.c
@@ -571,8 +571,8 @@ int portals_debug_init(unsigned long bufsize)
          memset(debug_buf, 0, debug_size);
          debug_wrapped = 0;
  
-        printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
-               bufsize, debug_buf);
+        //printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
+               //bufsize, debug_buf);
          atomic_set(&debug_off_a, debug_off);
          notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier);
          debug_size = bufsize;
@@ -632,9 +632,9 @@ int portals_debug_mark_buffer(char *text)
          if (debug_buf == NULL)
                  return -EINVAL;
  
-        CDEBUG(0, "*******************************************************************************\n");
+        CDEBUG(0, "********************************************************\n");
          CDEBUG(0, "DEBUG MARKER: %s\n", text);
-        CDEBUG(0, "*******************************************************************************\n");
+        CDEBUG(0, "********************************************************\n");
  
          return 0;
  }
@@ -672,8 +672,8 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len)
  
  /* FIXME: I'm not very smart; someone smarter should make this better. */
  void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                   unsigned long stack, const char *format, ...)
+portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                  const int line, unsigned long stack, const char *format, ...)
  {
          va_list       ap;
          unsigned long flags;
@@ -728,8 +728,8 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
          do_gettimeofday(&tv);
  
          prefix_nob = snprintf(debug_buf + debug_off, max_nob,
-                              "%02x:%06x:%d:%lu.%06lu ",
-                              subsys >> 24, mask, smp_processor_id(),
+                              "%06x:%06x:%d:%lu.%06lu ",
+                              subsys, mask, smp_processor_id(),
                                tv.tv_sec, tv.tv_usec);
          max_nob -= prefix_nob;
  
@@ -752,7 +752,7 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
  
          va_start(ap, format);
          msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob,
-                            max_nob, format, ap);
+                             max_nob, format, ap);
          max_nob -= msg_nob;
          va_end(ap);
  
@@ -790,7 +790,7 @@ void portals_debug_set_level(unsigned int debug_level)
          portal_debug = debug_level;
  }
  
-void portals_run_lbug_upcall(char * file, char *fn, int line)
+void portals_run_lbug_upcall(char *file, const char *fn, const int line)
  {
          char *argv[6];
          char *envp[3];
@@ -803,7 +803,7 @@ void portals_run_lbug_upcall(char * file, char *fn, int line)
          argv[0] = portals_upcall;
          argv[1] = "LBUG";
          argv[2] = file;
-        argv[3] = fn;
+        argv[3] = (char *)fn;
          argv[4] = buf;
          argv[5] = NULL;
  
diff --git a/lustre/portals/libcfs/module.c b/lustre/portals/libcfs/module.c

index 14cc325..e8eb290 100644 (file)
--- a/lustre/portals/libcfs/module.c
+++ b/lustre/portals/libcfs/module.c
@@ -62,10 +62,10 @@ static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
  struct semaphore nal_cmd_sem;
  
  #ifdef PORTAL_DEBUG
-void
-kportal_assertion_failed (char *expr, char *file, char *func, int line)
+void kportal_assertion_failed(char *expr, char *file, const char *func,
+                              const int line)
  {
-        portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK(),
+        portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK,
                            "ASSERTION(%s) failed\n", expr);
          LBUG_WITH_LOC(file, func, line);
  }
diff --git a/lustre/portals/portals/.cvsignore b/lustre/portals/portals/.cvsignore

index e995588..95973d6 100644 (file)
--- a/lustre/portals/portals/.cvsignore
+++ b/lustre/portals/portals/.cvsignore
@@ -1,3 +1,4 @@
  .deps
  Makefile
  Makefile.in
+.*.o.cmd
diff --git a/lustre/portals/portals/Makefile.mk b/lustre/portals/portals/Makefile.mk

index 5627ef7..7822846 100644 (file)
--- a/lustre/portals/portals/Makefile.mk
+++ b/lustre/portals/portals/Makefile.mk
@@ -3,7 +3,10 @@
  # This code is issued under the GNU General Public License.
  # See the file COPYING in this distribution
  
-include ../Kernelenv
+include $(src)/../Kernelenv
  
  obj-y += portals.o
-portals-objs    := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o lib-move.o lib-msg.o lib-ni.o lib-not-impl.o lib-pid.o api-eq.o api-errno.o api-init.o api-md.o api-me.o api-ni.o api-wrap.o
+portals-objs    :=     lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
+                       lib-move.o lib-msg.o lib-ni.o lib-pid.o \
+                       api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
+                       api-wrap.o
diff --git a/lustre/portals/portals/api-init.c b/lustre/portals/portals/api-init.c

index e59c922..dc1fead 100644 (file)
--- a/lustre/portals/portals/api-init.c
+++ b/lustre/portals/portals/api-init.c
@@ -26,7 +26,7 @@
  #include <portals/api-support.h>
  
  int ptl_init;
-unsigned int portal_subsystem_debug = 0xfff7e3ff;
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL | S_GMNAL);
  unsigned int portal_debug = ~0;
  unsigned int portal_printk;
  unsigned int portal_stack;
diff --git a/lustre/portals/portals/lib-move.c b/lustre/portals/portals/lib-move.c

index fde4f16..02f8b60 100644 (file)
--- a/lustre/portals/portals/lib-move.c
+++ b/lustre/portals/portals/lib-move.c
@@ -544,7 +544,7 @@ get_new_msg (nal_cb_t *nal, lib_md_t *md)
          msg->send_ack = 0;
  
          msg->md = md;
-        msg->ev.arrival_time = get_cycles();
+        do_gettimeofday(&msg->ev.arrival_time);
          md->pending++;
          if (md->threshold != PTL_MD_THRESH_INF) {
                  LASSERT (md->threshold > 0);
diff --git a/lustre/portals/router/.cvsignore b/lustre/portals/router/.cvsignore

index e995588..95973d6 100644 (file)
--- a/lustre/portals/router/.cvsignore
+++ b/lustre/portals/router/.cvsignore
@@ -1,3 +1,4 @@
  .deps
  Makefile
  Makefile.in
+.*.o.cmd
diff --git a/lustre/portals/router/Makefile.mk b/lustre/portals/router/Makefile.mk

index 64bd09b..9b02c03 100644 (file)
--- a/lustre/portals/router/Makefile.mk
+++ b/lustre/portals/router/Makefile.mk
@@ -3,7 +3,7 @@
  # This code is issued under the GNU General Public License.
  # See the file COPYING in this distribution
  
-include ../Kernelenv
+include $(src)/../Kernelenv
  
  obj-y += kptlrouter.o
  kptlrouter-objs    := router.o proc.o
diff --git a/lustre/portals/router/router.c b/lustre/portals/router/router.c

index 6074c3c..27a7fba 100644 (file)
--- a/lustre/portals/router/router.c
+++ b/lustre/portals/router/router.c
@@ -23,8 +23,8 @@
  
  #include "router.h"
  
-struct list_head kpr_routes;
-struct list_head kpr_nals;
+LIST_HEAD(kpr_routes);
+LIST_HEAD(kpr_nals);
  
  unsigned long long kpr_fwd_bytes;
  unsigned long      kpr_fwd_packets;
@@ -35,7 +35,7 @@ atomic_t           kpr_queue_depth;
   *
   * Once in a blue moon we register/deregister NALs and add/remove routing
   * entries (thread context only)... */
-rwlock_t         kpr_rwlock;
+rwlock_t         kpr_rwlock = RW_LOCK_UNLOCKED;
  
  kpr_router_interface_t kpr_router_interface = {
         kprri_register:         kpr_register_nal,
@@ -55,7 +55,7 @@ kpr_control_interface_t kpr_control_interface = {
  int
  kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
  {
-       long               flags;
+       unsigned long      flags;
         struct list_head  *e;
         kpr_nal_entry_t   *ne;
  
@@ -98,7 +98,7 @@ kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
  void
  kpr_shutdown_nal (void *arg)
  {
-       long             flags;
+       unsigned long    flags;
         kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
  
          CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
@@ -123,7 +123,7 @@ kpr_shutdown_nal (void *arg)
  void
  kpr_deregister_nal (void *arg)
  {
-       long              flags;
+       unsigned long     flags;
         kpr_nal_entry_t  *ne = (kpr_nal_entry_t *)arg;
  
          CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
@@ -296,7 +296,7 @@ int
  kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
                 ptl_nid_t hi_nid)
  {
-       long               flags;
+       unsigned long      flags;
         struct list_head  *e;
         kpr_route_entry_t *re;
  
@@ -345,7 +345,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
  int
  kpr_del_route (ptl_nid_t nid)
  {
-       long               flags;
+       unsigned long      flags;
         struct list_head  *e;
  
          CDEBUG(D_OTHER, "Del route "LPX64"\n", nid);
@@ -398,7 +398,7 @@ kpr_get_route(int idx, int *gateway_nalid, ptl_nid_t *gateway_nid,
          return (-ENOENT);
  }
  
-static void __exit
+static void /*__exit*/
  kpr_finalise (void)
  {
          LASSERT (list_empty (&kpr_nals));
@@ -427,10 +427,6 @@ kpr_initialise (void)
          CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
                 atomic_read(&portal_kmemory));
  
-       rwlock_init(&kpr_rwlock);
-       INIT_LIST_HEAD(&kpr_routes);
-       INIT_LIST_HEAD(&kpr_nals);
-
          kpr_proc_init();
  
          PORTAL_SYMBOL_REGISTER(kpr_router_interface);
diff --git a/lustre/portals/tests/.cvsignore b/lustre/portals/tests/.cvsignore

index 051d1bd..d0c4c88 100644 (file)
--- a/lustre/portals/tests/.cvsignore
+++ b/lustre/portals/tests/.cvsignore
@@ -1,3 +1,4 @@
  Makefile
  Makefile.in
  .deps
+.*.o.cmd
diff --git a/lustre/portals/tests/ping_cli.c b/lustre/portals/tests/ping_cli.c

index 389ffbb..4d04ffb 100644 (file)
--- a/lustre/portals/tests/ping_cli.c
+++ b/lustre/portals/tests/ping_cli.c
@@ -260,7 +260,7 @@ pingcli_start(struct portal_ioctl_data *args)
  
  
  /* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
  {
          PORTAL_ALLOC (client, sizeof(struct pingcli_data));
          if (client == NULL)
@@ -282,7 +282,7 @@ static int __init pingcli_init(void)
  } /* pingcli_init() */
  
  
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
  {
          PORTAL_SYMBOL_UNREGISTER (kping_client);
  } /* pingcli_cleanup() */
diff --git a/lustre/portals/tests/ping_srv.c b/lustre/portals/tests/ping_srv.c

index 1037d09..873e11c 100644 (file)
--- a/lustre/portals/tests/ping_srv.c
+++ b/lustre/portals/tests/ping_srv.c
@@ -47,11 +47,11 @@
  #include <asm/semaphore.h>
  
  #define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
-#define MAXSIZE (16*1024*1024)
+#define MAXSIZE (16*1024)
  
  static unsigned ping_head_magic;
  static unsigned ping_bulk_magic;
-static int nal  = 0;                            // Your NAL,
+static int nal  = SOCKNAL;                            // Your NAL,
  static unsigned long packets_valid = 0;         // Valid packets 
  static int running = 1;
  atomic_t pkt;
@@ -282,7 +282,7 @@ static int __init pingsrv_init(void)
  } /* pingsrv_init() */
  
  
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
  {
          remove_proc_entry ("net/pingsrv", NULL);
          
diff --git a/lustre/portals/tests/sping_cli.c b/lustre/portals/tests/sping_cli.c

index 4cef08b..35e114b 100644 (file)
--- a/lustre/portals/tests/sping_cli.c
+++ b/lustre/portals/tests/sping_cli.c
@@ -235,7 +235,7 @@ pingcli_start(struct portal_ioctl_data *args)
  
  
  /* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
  {
  
          PORTAL_ALLOC (client, sizeof(struct pingcli_data));
@@ -258,7 +258,7 @@ static int __init pingcli_init(void)
  } /* pingcli_init() */
  
  
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
  {
          PORTAL_SYMBOL_UNREGISTER (kping_client);
  } /* pingcli_cleanup() */
diff --git a/lustre/portals/tests/sping_srv.c b/lustre/portals/tests/sping_srv.c

index a18ea35..2b45a46 100644 (file)
--- a/lustre/portals/tests/sping_srv.c
+++ b/lustre/portals/tests/sping_srv.c
@@ -269,7 +269,7 @@ static int __init pingsrv_init(void)
  } /* pingsrv_init() */
  
  
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
  {
          remove_proc_entry ("net/pingsrv", NULL);
          
diff --git a/lustre/portals/unals/debug.c b/lustre/portals/unals/debug.c

index 529bb2d..b73f042 100644 (file)
--- a/lustre/portals/unals/debug.c
+++ b/lustre/portals/unals/debug.c
@@ -84,8 +84,8 @@ int portals_debug_copy_to_user(char *buf, unsigned long len)
  
  /* FIXME: I'm not very smart; someone smarter should make this better. */
  void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                   const char *format, ...)
+portals_debug_msg (int subsys, int mask, char *file, const char *fn, 
+                   const int line, const char *format, ...)
  {
          va_list       ap;
          unsigned long flags;
diff --git a/lustre/portals/utils/.cvsignore b/lustre/portals/utils/.cvsignore

index 148310a..8e474ad 100644 (file)
--- a/lustre/portals/utils/.cvsignore
+++ b/lustre/portals/utils/.cvsignore
@@ -5,4 +5,5 @@ debugctl
  ptlctl
  .deps
  routerstat
-wirecheck
-\ No newline at end of file
+wirecheck
+.*.cmd
diff --git a/lustre/portals/utils/debug.c b/lustre/portals/utils/debug.c

index 9ab1c73..0a009d2 100644 (file)
--- a/lustre/portals/utils/debug.c
+++ b/lustre/portals/utils/debug.c
@@ -53,17 +53,18 @@ static char rawbuf[8192];
  static char *buf = rawbuf;
  static int max = 8192;
  //static int g_pfd = -1;
-static int subsystem_array[1 << 8];
+static int subsystem_mask = ~0;
  static int debug_mask = ~0;
  
  static const char *portal_debug_subsystems[] =
-        {"undefined", "mdc", "mds", "osc", "ost", "class", "obdfs", "llite",
-         "rpc", "ext2obd", "portals", "socknal", "qswnal", "pinger", "filter",
-         "obdtrace", "echo", "ldlm", "lov", "gmnal", "router", "ptldb", NULL};
+        {"undefined", "mdc", "mds", "osc", "ost", "class", "log", "llite",
+         "rpc", "mgmt", "portals", "socknal", "qswnal", "pinger", "filter",
+         "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", NULL};
  static const char *portal_debug_masks[] =
          {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
           "blocks", "net", "warning", "buffs", "other", "dentry", "portals",
-         "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", NULL};
+         "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace",
+         NULL};
  
  struct debug_daemon_cmd {
          char *cmd;
@@ -88,7 +89,10 @@ static int do_debug_mask(char *name, int enable)
                          printf("%s output from subsystem \"%s\"\n",
                                  enable ? "Enabling" : "Disabling",
                                  portal_debug_subsystems[i]);
-                        subsystem_array[i] = enable;
+                        if (enable)
+                                subsystem_mask |= (1 << i);
+                        else
+                                subsystem_mask &= ~(1 << i);
                          found = 1;
                  }
          }
@@ -111,7 +115,6 @@ static int do_debug_mask(char *name, int enable)
  
  int dbg_initialize(int argc, char **argv)
  {
-        memset(subsystem_array, 1, sizeof(subsystem_array));
          return 0;
  }
  
@@ -213,12 +216,7 @@ int jt_dbg_list(int argc, char **argv)
                  for (i = 0; portal_debug_masks[i] != NULL; i++)
                          printf(", %s", portal_debug_masks[i]);
                  printf("\n");
-        }
-        else if (strcasecmp(argv[1], "applymasks") == 0) {
-                unsigned int subsystem_mask = 0;
-                for (i = 0; portal_debug_subsystems[i] != NULL; i++) {
-                        if (subsystem_array[i]) subsystem_mask |= (1 << i);
-                }
+        } else if (strcasecmp(argv[1], "applymasks") == 0) {
                  applymask_all(subsystem_mask, debug_mask);
          }
          return 0;
@@ -230,12 +228,6 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw)
  {
          char *p, *z;
          unsigned long subsystem, debug, dropped = 0, kept = 0;
-        int max_sub, max_type;
-
-        for (max_sub = 0; portal_debug_subsystems[max_sub] != NULL; max_sub++)
-                ;
-        for (max_type = 0; portal_debug_masks[max_type] != NULL; max_type++)
-                ;
  
          while (size) {
                  p = memchr(buf, '\n', size);
@@ -247,8 +239,7 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw)
                  z++;
                  /* for some reason %*s isn't working. */
                  *p = '\0';
-                if (subsystem < max_sub &&
-                    subsystem_array[subsystem] &&
+                if ((subsystem_mask & subsystem) &&
                      (!debug || (debug_mask & debug))) {
                          if (raw)
                                  fprintf(fd, "%s\n", buf);
@@ -551,6 +542,8 @@ int jt_dbg_modules(int argc, char **argv)
                  {"mds_ext3", "lustre/mds"},
                  {"mds_extN", "lustre/mds"},
                  {"ptlbd", "lustre/ptlbd"},
+                {"mgmt_svc", "lustre/mgmt"},
+                {"mgmt_cli", "lustre/mgmt"},
                  {NULL, NULL}
          };
          char *path = "..";
diff --git a/lustre/portals/utils/portals.c b/lustre/portals/utils/portals.c

index 90d66f5..a89f4f7 100644 (file)
--- a/lustre/portals/utils/portals.c
+++ b/lustre/portals/utils/portals.c
@@ -22,6 +22,7 @@
  
  #include <stdio.h>
  #include <sys/types.h>
+#include <netdb.h>
  #include <sys/socket.h>
  #include <netinet/tcp.h>
  #include <netdb.h>
@@ -106,6 +107,27 @@ nal2name (int nal)
          return ((e == NULL) ? "???" : e->name);
  }
  
+static struct hostent *
+ptl_gethostbyname(char * hname) {
+        struct hostent *he;
+        he = gethostbyname(hname);
+        if (!he) {
+                switch(h_errno) {
+                case HOST_NOT_FOUND:
+                case NO_ADDRESS:
+                        fprintf(stderr, "Unable to resolve hostname: %s\n",
+                                hname);
+                        break;
+                default:
+                        fprintf(stderr, "gethostbyname error: %s\n",
+                                strerror(errno));
+                        break;
+                }
+                return NULL;
+        }
+        return he;
+}
+
  int
  ptl_parse_nid (ptl_nid_t *nidp, char *str)
  {
@@ -127,7 +149,7 @@ ptl_parse_nid (ptl_nid_t *nidp, char *str)
          
          if ((('a' <= str[0] && str[0] <= 'z') ||
               ('A' <= str[0] && str[0] <= 'Z')) &&
-             (he = gethostbyname (str)) != NULL)
+             (he = ptl_gethostbyname (str)) != NULL)
          {
                  __u32 addr = *(__u32 *)he->h_addr;
  
@@ -351,12 +373,9 @@ int jt_ptl_connect(int argc, char **argv)
                          goto usage;
                  }
  
-                he = gethostbyname(argv[1]);
-                if (!he) {
-                        fprintf(stderr, "gethostbyname error: %s\n",
-                                strerror(errno));
+                he = ptl_gethostbyname(argv[1]);
+                if (!he)
                          return -1;
-                }
  
                  g_port = atol(argv[2]);
  
@@ -525,12 +544,9 @@ int jt_ptl_disconnect(int argc, char **argv)
  
                  PORTAL_IOC_INIT(data);
                  if (argc == 2) {
-                        he = gethostbyname(argv[1]);
-                        if (!he) {
-                                fprintf(stderr, "gethostbyname error: %s\n",
-                                        strerror(errno));
+                        he = ptl_gethostbyname(argv[1]);
+                        if (!he) 
                                  return -1;
-                        }
                          
                          data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
  
@@ -582,12 +598,9 @@ int jt_ptl_push_connection (int argc, char **argv)
  
                  PORTAL_IOC_INIT(data);
                  if (argc == 2) {
-                        he = gethostbyname(argv[1]);
-                        if (!he) {
-                                fprintf(stderr, "gethostbyname error: %s\n",
-                                        strerror(errno));
+                        he = ptl_gethostbyname(argv[1]);
+                        if (!he)
                                  return -1;
-                        }
                          
                          data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
  
diff --git a/lustre/ptlbd/client.c b/lustre/ptlbd/client.c

index af76523..0a6ad8f 100644 (file)
--- a/lustre/ptlbd/client.c
+++ b/lustre/ptlbd/client.c
@@ -76,7 +76,7 @@ static int ptlbd_cl_setup(struct obd_device *obd, obd_count len, void *buf)
          RETURN(0);
  }
  
-static int ptlbd_cl_cleanup(struct obd_device *obd, int force, int failover)
+static int ptlbd_cl_cleanup(struct obd_device *obd, int flags)
  {
          struct ptlbd_obd *ptlbd = &obd->u.ptlbd;
          struct obd_import *imp;
@@ -99,9 +99,8 @@ static int ptlbd_cl_cleanup(struct obd_device *obd, int force, int failover)
  
  
  /* modelled after ptlrpc_import_connect() */
-int ptlbd_cl_connect(struct lustre_handle *conn,
-                      struct obd_device *obd, 
-                      struct obd_uuid *target_uuid)
+int ptlbd_cl_connect(struct lustre_handle *conn, struct obd_device *obd,
+                     struct obd_uuid *target_uuid)
  {
          struct ptlbd_obd *ptlbd = &obd->u.ptlbd;
          struct obd_import *imp = ptlbd->bd_import;
@@ -196,7 +195,7 @@ int ptlbd_cl_init(void)
  {
          struct lprocfs_static_vars lvars;
  
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(ptlbd,&lvars);
          return class_register_type(&ptlbd_cl_obd_ops, lvars.module_vars,
                                     OBD_PTLBD_CL_DEVICENAME);
  }
diff --git a/lustre/ptlbd/main.c b/lustre/ptlbd/main.c

index e3fde99..dc591f4 100644 (file)
--- a/lustre/ptlbd/main.c
+++ b/lustre/ptlbd/main.c
@@ -57,7 +57,7 @@ out_cl:
          RETURN(ret);
  }
  
-static void __exit ptlbd_exit(void)
+static void /*__exit*/ ptlbd_exit(void)
  {
          ENTRY;
          ptlbd_cl_exit();
diff --git a/lustre/ptlbd/server.c b/lustre/ptlbd/server.c

index 34ec737..d293a86 100644 (file)
--- a/lustre/ptlbd/server.c
+++ b/lustre/ptlbd/server.c
@@ -74,7 +74,7 @@ out_filp:
          RETURN(rc);
  }
  
-static int ptlbd_sv_cleanup(struct obd_device *obddev, int force, int failover)
+static int ptlbd_sv_cleanup(struct obd_device *obddev, int flags)
  {
          struct ptlbd_obd *ptlbd = &obddev->u.ptlbd;
          ENTRY;
@@ -102,7 +102,7 @@ int ptlbd_sv_init(void)
  {
          struct lprocfs_static_vars lvars;
  
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(ptlbd,&lvars);
          return class_register_type(&ptlbd_sv_obd_ops, lvars.module_vars,
                                     OBD_PTLBD_SV_DEVICENAME);
  }
diff --git a/lustre/ptlrpc/.cvsignore b/lustre/ptlrpc/.cvsignore

index 067f05c..cf51f30 100644 (file)
--- a/lustre/ptlrpc/.cvsignore
+++ b/lustre/ptlrpc/.cvsignore
@@ -7,3 +7,4 @@ Makefile.in
  .deps
  tags
  TAGS
+.*.cmd
diff --git a/lustre/ptlrpc/Makefile.am b/lustre/ptlrpc/Makefile.am

index eb44329..355d48c 100644 (file)
--- a/lustre/ptlrpc/Makefile.am
+++ b/lustre/ptlrpc/Makefile.am
@@ -16,7 +16,7 @@ EXTRA_PROGRAMS = ptlrpc
  
  ptlrpc_SOURCES = recover.c connection.c ptlrpc_module.c events.c service.c \
  client.c niobuf.c pack_generic.c lproc_ptlrpc.c pinger.c ptlrpc_lib.c \
-ptlrpc_internal.h
+ptlrpc_internal.h recov_thread.c
  endif
  
  include $(top_srcdir)/Rules
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c

index a98af3e..50ea587 100644 (file)
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -78,13 +78,13 @@ void ptlrpc_readdress_connection(struct ptlrpc_connection *conn,
          struct ptlrpc_peer peer;
          int err;
  
-        err = ptlrpc_uuid_to_peer (uuid, &peer);
+        err = ptlrpc_uuid_to_peer(uuid, &peer);
          if (err != 0) {
                  CERROR("cannot find peer %s!\n", uuid->uuid);
                  return;
          }
  
-        memcpy (&conn->c_peer, &peer, sizeof (peer));
+        memcpy(&conn->c_peer, &peer, sizeof (peer));
          return;
  }
  
@@ -96,7 +96,7 @@ static inline struct ptlrpc_bulk_desc *new_bulk(void)
          if (!desc)
                  return NULL;
  
-        spin_lock_init (&desc->bd_lock);
+        spin_lock_init(&desc->bd_lock);
          init_waitqueue_head(&desc->bd_waitq);
          INIT_LIST_HEAD(&desc->bd_page_list);
          desc->bd_md_h = PTL_HANDLE_NONE;
@@ -108,10 +108,10 @@ static inline struct ptlrpc_bulk_desc *new_bulk(void)
  struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req,
                                                 int type, int portal)
  {
-        struct obd_import       *imp = req->rq_import;
+        struct obd_import *imp = req->rq_import;
          struct ptlrpc_bulk_desc *desc;
  
-        LASSERT (type == BULK_PUT_SINK || type == BULK_GET_SOURCE);
+        LASSERT(type == BULK_PUT_SINK || type == BULK_GET_SOURCE);
  
          desc = new_bulk();
          if (desc == NULL)
@@ -132,10 +132,10 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req,
  struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp (struct ptlrpc_request *req,
                                                 int type, int portal)
  {
-        struct obd_export       *exp = req->rq_export;
+        struct obd_export *exp = req->rq_export;
          struct ptlrpc_bulk_desc *desc;
  
-        LASSERT (type == BULK_PUT_SOURCE || type == BULK_GET_SINK);
+        LASSERT(type == BULK_PUT_SOURCE || type == BULK_GET_SINK);
  
          desc = new_bulk();
          if (desc == NULL)
@@ -159,12 +159,12 @@ int ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
  
          OBD_ALLOC(bulk, sizeof(*bulk));
          if (bulk == NULL)
-                return (-ENOMEM);
+                return -ENOMEM;
  
-        LASSERT (page != NULL);
-        LASSERT (pageoffset >= 0);
-        LASSERT (len > 0);
-        LASSERT (pageoffset + len <= PAGE_SIZE);
+        LASSERT(page != NULL);
+        LASSERT(pageoffset >= 0);
+        LASSERT(len > 0);
+        LASSERT(pageoffset + len <= PAGE_SIZE);
  
          bulk->bp_page = page;
          bulk->bp_pageoffset = pageoffset;
@@ -181,9 +181,9 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
          struct list_head *tmp, *next;
          ENTRY;
  
-        LASSERT (desc != NULL);
-        LASSERT (desc->bd_page_count != 0x5a5a5a5a); /* not freed already */
-        LASSERT (!desc->bd_network_rw);         /* network hands off or */
+        LASSERT(desc != NULL);
+        LASSERT(desc->bd_page_count != 0x5a5a5a5a); /* not freed already */
+        LASSERT(!desc->bd_network_rw);         /* network hands off or */
  
          list_for_each_safe(tmp, next, &desc->bd_page_list) {
                  struct ptlrpc_bulk_page *bulk;
@@ -191,7 +191,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
                  ptlrpc_free_bulk_page(bulk);
          }
  
-        LASSERT (desc->bd_page_count == 0);
+        LASSERT(desc->bd_page_count == 0);
          LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
  
          if (desc->bd_export)
@@ -205,7 +205,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
  
  void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk)
  {
-        LASSERT (bulk != NULL);
+        LASSERT(bulk != NULL);
  
          list_del(&bulk->bp_link);
          bulk->bp_desc->bd_page_count--;
@@ -247,7 +247,7 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
  
          request->rq_connection = ptlrpc_connection_addref(imp->imp_connection);
  
-        spin_lock_init (&request->rq_lock);
+        spin_lock_init(&request->rq_lock);
          INIT_LIST_HEAD(&request->rq_list);
          init_waitqueue_head(&request->rq_wait_for_rep);
          request->rq_xid = ptlrpc_next_xid();
@@ -289,18 +289,18 @@ void ptlrpc_set_destroy(struct ptlrpc_request_set *set)
                  struct ptlrpc_request *req =
                          list_entry(tmp, struct ptlrpc_request, rq_set_chain);
  
-                LASSERT (req->rq_phase == expected_phase);
+                LASSERT(req->rq_phase == expected_phase);
                  n++;
          }
  
-        LASSERT (set->set_remaining == 0 || set->set_remaining == n);
+        LASSERT(set->set_remaining == 0 || set->set_remaining == n);
  
          list_for_each_safe(tmp, next, &set->set_requests) {
                  struct ptlrpc_request *req =
                          list_entry(tmp, struct ptlrpc_request, rq_set_chain);
                  list_del_init(&req->rq_set_chain);
  
-                LASSERT (req->rq_phase == expected_phase);
+                LASSERT(req->rq_phase == expected_phase);
  
                  if (req->rq_phase == RQ_PHASE_NEW) {
  
@@ -312,7 +312,8 @@ void ptlrpc_set_destroy(struct ptlrpc_request_set *set)
                                  /* higher level (i.e. LOV) failed;
                                   * let the sub reqs clean up */
                                  req->rq_status = -EBADR;
-                                interpreter(req, &req->rq_async_args, req->rq_status);
+                                interpreter(req, &req->rq_async_args,
+                                            req->rq_status);
                          }
                          set->set_remaining--;
                  }
@@ -402,8 +403,8 @@ static int after_reply(struct ptlrpc_request *req, int *restartp)
          int rc;
          ENTRY;
  
-        LASSERT (!req->rq_receiving_reply);
-        LASSERT (req->rq_replied);
+        LASSERT(!req->rq_receiving_reply);
+        LASSERT(req->rq_replied);
  
          if (restartp != NULL)
                  *restartp = 0;
@@ -418,14 +419,14 @@ static int after_reply(struct ptlrpc_request *req, int *restartp)
          rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
          if (rc) {
                  CERROR("unpack_rep failed: %d\n", rc);
-                RETURN (-EPROTO);
+                RETURN(-EPROTO);
          }
  
          if (req->rq_repmsg->type != PTL_RPC_MSG_REPLY &&
              req->rq_repmsg->type != PTL_RPC_MSG_ERR) {
                  CERROR("invalid packet type received (type=%u)\n",
                         req->rq_repmsg->type);
-                RETURN (-EPROTO);
+                RETURN(-EPROTO);
          }
  
          /* Store transno in reqmsg for replay. */
@@ -447,6 +448,9 @@ static int after_reply(struct ptlrpc_request *req, int *restartp)
                  if (req->rq_err)
                          RETURN(-EIO);
  
+                if (req->rq_no_resend)
+                        RETURN(rc); /* -ENOTCONN */
+
                  if (req->rq_resend) {
                          if (restartp == NULL)
                                  LBUG(); /* async resend not supported yet */
@@ -456,7 +460,7 @@ static int after_reply(struct ptlrpc_request *req, int *restartp)
                          *restartp = 1;
                          lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
                          DEBUG_REQ(D_HA, req, "resending: ");
-                        RETURN (0);
+                        RETURN(0);
                  }
  
                  CERROR("request should be err or resend: %p\n", req);
@@ -472,10 +476,9 @@ static int after_reply(struct ptlrpc_request *req, int *restartp)
                          imp->imp_max_transno = req->rq_transno;
  
                  /* Replay-enabled imports return commit-status information. */
-                if (req->rq_repmsg->last_committed) {
+                if (req->rq_repmsg->last_committed)
                          imp->imp_peer_committed_transno =
                                  req->rq_repmsg->last_committed;
-                }
                  ptlrpc_free_committed(imp);
                  spin_unlock_irqrestore(&imp->imp_lock, flags);
          }
@@ -510,8 +513,8 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                          continue;
  
                  if (req->rq_phase == RQ_PHASE_INTERPRET)
-                        GOTO (interpret, req->rq_status);
-                
+                        GOTO(interpret, req->rq_status);
+
                  if (req->rq_err) {
                          ptlrpc_unregister_reply(req);
                          if (req->rq_status == 0)
@@ -522,7 +525,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                          list_del_init(&req->rq_list);
                          spin_unlock_irqrestore(&imp->imp_lock, flags);
  
-                        GOTO (interpret, req->rq_status);
+                        GOTO(interpret, req->rq_status);
                  }
  
                  if (req->rq_intr) {
@@ -535,7 +538,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                          list_del_init(&req->rq_list);
                          spin_unlock_irqrestore(&imp->imp_lock, flags);
  
-                        GOTO (interpret, req->rq_status);
+                        GOTO(interpret, req->rq_status);
                  }
  
                  if (req->rq_phase == RQ_PHASE_RPC) {
@@ -553,13 +556,13 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                                  list_add_tail(&req->rq_list,
                                                &imp->imp_sending_list);
  
-                                if (req->rq_import_generation < 
+                                if (req->rq_import_generation <
                                      imp->imp_generation) {
                                          req->rq_status = -EIO;
                                          req->rq_phase = RQ_PHASE_INTERPRET;
-                                        spin_unlock_irqrestore(&imp->imp_lock, 
+                                        spin_unlock_irqrestore(&imp->imp_lock,
                                                                 flags);
-                                        GOTO (interpret, req->rq_status);
+                                        GOTO(interpret, req->rq_status);
                                  }
                                  spin_unlock_irqrestore(&imp->imp_lock, flags);
  
@@ -571,16 +574,17 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                                          req->rq_resend = 0;
                                          spin_unlock_irqrestore(&req->rq_lock,
                                                                 flags);
+
                                          ptlrpc_unregister_reply(req);
                                          if (req->rq_bulk)
                                                  ptlrpc_unregister_bulk(req);
-                               }
+                                }
  
                                  rc = ptl_send_rpc(req);
                                  if (rc) {
                                          req->rq_status = rc;
                                          req->rq_phase = RQ_PHASE_INTERPRET;
-                                        GOTO (interpret, req->rq_status);
+                                        GOTO(interpret, req->rq_status);
                                  }
  
                          }
@@ -612,21 +616,21 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                           */
                          if (req->rq_bulk == NULL || req->rq_status != 0) {
                                  req->rq_phase = RQ_PHASE_INTERPRET;
-                                GOTO (interpret, req->rq_status);
+                                GOTO(interpret, req->rq_status);
                          }
  
                          req->rq_phase = RQ_PHASE_BULK;
                  }
  
-                LASSERT (req->rq_phase == RQ_PHASE_BULK);
+                LASSERT(req->rq_phase == RQ_PHASE_BULK);
                  if (!ptlrpc_bulk_complete (req->rq_bulk))
                          continue;
  
                  req->rq_phase = RQ_PHASE_INTERPRET;
  
          interpret:
-                LASSERT (req->rq_phase == RQ_PHASE_INTERPRET);
-                LASSERT (!req->rq_receiving_reply);
+                LASSERT(req->rq_phase == RQ_PHASE_INTERPRET);
+                LASSERT(!req->rq_receiving_reply);
  
                  ptlrpc_unregister_reply(req);
                  if (req->rq_bulk != NULL)
@@ -651,7 +655,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                  set->set_remaining--;
          }
  
-        RETURN (set->set_remaining == 0);
+        RETURN(set->set_remaining == 0);
  }
  
  int ptlrpc_expire_one_request(struct ptlrpc_request *req)
@@ -695,7 +699,7 @@ static int expired_set(void *data)
          time_t                     now = LTIME_S (CURRENT_TIME);
          ENTRY;
  
-        LASSERT (set != NULL);
+        LASSERT(set != NULL);
  
          /* A timeout expired; see which reqs it applies to... */
          list_for_each (tmp, &set->set_requests) {
@@ -728,7 +732,7 @@ static void interrupted_set(void *data)
          struct list_head *tmp;
          unsigned long flags;
  
-        LASSERT (set != NULL);
+        LASSERT(set != NULL);
          CERROR("INTERRUPTED SET %p\n", set);
  
          list_for_each(tmp, &set->set_requests) {
@@ -757,12 +761,13 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
          int                    timeout;
          ENTRY;
  
+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
          LASSERT(!list_empty(&set->set_requests));
          list_for_each(tmp, &set->set_requests) {
                  req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
  
-                LASSERT (req->rq_level == LUSTRE_CONN_FULL);
-                LASSERT (req->rq_phase == RQ_PHASE_NEW);
+                LASSERT(req->rq_level == LUSTRE_CONN_FULL);
+                LASSERT(req->rq_phase == RQ_PHASE_NEW);
                  req->rq_phase = RQ_PHASE_RPC;
  
                  imp = req->rq_import;
@@ -789,7 +794,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                          spin_lock (&req->rq_lock);
                          req->rq_waiting = 1;
                          spin_unlock (&req->rq_lock);
-                        LASSERT (list_empty (&req->rq_list));
+                        LASSERT(list_empty (&req->rq_list));
                          // list_del(&req->rq_list);
                          list_add_tail(&req->rq_list, &imp->imp_delayed_list);
                          spin_unlock_irqrestore(&imp->imp_lock, flags);
@@ -801,6 +806,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                  list_add_tail(&req->rq_list, &imp->imp_sending_list);
                  spin_unlock_irqrestore(&imp->imp_lock, flags);
  
+                req->rq_reqmsg->status = current->pid;
                  CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:ni:nid:opc"
                         " %s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm,
                         imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status,
@@ -820,7 +826,8 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                  now = LTIME_S (CURRENT_TIME);
                  timeout = 0;
                  list_for_each (tmp, &set->set_requests) {
-                        req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
+                        req = list_entry(tmp, struct ptlrpc_request,
+                                         rq_set_chain);
  
                          /* request in-flight? */
                          if (!((req->rq_phase == RQ_PHASE_RPC &&
@@ -846,7 +853,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                                         expired_set, interrupted_set, set);
                  rc = l_wait_event(set->set_waitq, ptlrpc_check_set(set), &lwi);
  
-                LASSERT (rc == 0 || rc == -EINTR || rc == -ETIMEDOUT);
+                LASSERT(rc == 0 || rc == -EINTR || rc == -ETIMEDOUT);
  
                  /* -EINTR => all requests have been flagged rq_intr so next
                   * check completes.
@@ -857,13 +864,13 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                   * the error cases -eeb. */
          } while (rc != 0);
  
-        LASSERT (set->set_remaining == 0);
+        LASSERT(set->set_remaining == 0);
  
          rc = 0;
          list_for_each(tmp, &set->set_requests) {
                  req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
  
-                LASSERT (req->rq_phase == RQ_PHASE_COMPLETE);
+                LASSERT(req->rq_phase == RQ_PHASE_COMPLETE);
                  if (req->rq_status != 0)
                          rc = req->rq_status;
          }
@@ -885,7 +892,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
                  return;
          }
  
-        LASSERT (!request->rq_receiving_reply);
+        LASSERT(!request->rq_receiving_reply);
  
          /* We must take it off the imp_replay_list first.  Otherwise, we'll set
           * request->rq_reqmsg to NULL while osc_close is dereferencing it. */
@@ -940,7 +947,7 @@ static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked)
          if (request == NULL)
                  RETURN(1);
  
-        if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a) || 
+        if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a) ||
              request->rq_obd == (void *)(long)(0x5a5a5a5a5a5a5a5a)) {
                  CERROR("dereferencing freed request (bug 575)\n");
                  LBUG();
@@ -981,7 +988,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
          int           rc;
          ENTRY;
  
-        LASSERT (!in_interrupt ());             /* might sleep */
+        LASSERT(!in_interrupt ());             /* might sleep */
  
          spin_lock_irqsave (&request->rq_lock, flags);
          if (!request->rq_receiving_reply) {     /* not waiting for a reply */
@@ -991,7 +998,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
                  return;
          }
  
-        LASSERT (!request->rq_replied);         /* callback hasn't completed */
+        LASSERT(!request->rq_replied);         /* callback hasn't completed */
          spin_unlock_irqrestore (&request->rq_lock, flags);
  
          rc = PtlMDUnlink (request->rq_reply_md_h);
@@ -1000,8 +1007,8 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
                  LBUG ();
  
          case PTL_OK:                            /* unlinked before completion */
-                LASSERT (request->rq_receiving_reply);
-                LASSERT (!request->rq_replied);
+                LASSERT(request->rq_receiving_reply);
+                LASSERT(!request->rq_replied);
                  spin_lock_irqsave (&request->rq_lock, flags);
                  request->rq_receiving_reply = 0;
                  spin_unlock_irqrestore (&request->rq_lock, flags);
@@ -1018,7 +1025,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
  
                          rc = l_wait_event (request->rq_wait_for_rep,
                                             request->rq_replied, &lwi);
-                        LASSERT (rc == 0 || rc == -ETIMEDOUT);
+                        LASSERT(rc == 0 || rc == -ETIMEDOUT);
                          if (rc == 0) {
                                  spin_lock_irqsave (&request->rq_lock, flags);
                                  /* Ensure the callback has completed scheduling
@@ -1032,8 +1039,8 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
                  /* fall through */
  
          case PTL_INV_MD:                        /* callback completed */
-                LASSERT (!request->rq_receiving_reply);
-                LASSERT (request->rq_replied);
+                LASSERT(!request->rq_receiving_reply);
+                LASSERT(request->rq_replied);
                  EXIT;
                  return;
          }
@@ -1061,7 +1068,7 @@ void ptlrpc_free_committed(struct obd_import *imp)
                  req = list_entry(tmp, struct ptlrpc_request, rq_list);
  
                  /* XXX ok to remove when 1357 resolved - rread 05/29/03  */
-                LASSERT (req != last_req);
+                LASSERT(req != last_req);
                  last_req = req;
  
                  if (req->rq_import_generation < imp->imp_generation) {
@@ -1208,14 +1215,13 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
          struct l_wait_info lwi;
          struct obd_import *imp = req->rq_import;
          struct obd_device *obd = imp->imp_obd;
-        struct ptlrpc_connection *conn = imp->imp_connection;
-        unsigned int flags;
+        unsigned long flags;
          int do_restart = 0;
          int timeout = 0;
          ENTRY;
  
-        LASSERT (req->rq_set == NULL);
-        LASSERT (!req->rq_receiving_reply);
+        LASSERT(req->rq_set == NULL);
+        LASSERT(!req->rq_receiving_reply);
  
          /* for distributed debugging */
          req->rq_reqmsg->status = current->pid;
@@ -1224,7 +1230,8 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
                 "%s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm,
                 imp->imp_obd->obd_uuid.uuid,
                 req->rq_reqmsg->status, req->rq_xid,
-               conn->c_peer.peer_ni->pni_name, conn->c_peer.peer_nid,
+               imp->imp_connection->c_peer.peer_ni->pni_name,
+               imp->imp_connection->c_peer.peer_nid,
                 req->rq_reqmsg->opc);
  
          /* Mark phase here for a little debug help */
@@ -1242,13 +1249,13 @@ restart:
          if (req->rq_import->imp_invalid && req->rq_level == LUSTRE_CONN_FULL) {
                  DEBUG_REQ(D_ERROR, req, "IMP_INVALID:");
                  spin_unlock_irqrestore(&imp->imp_lock, flags);
-                GOTO (out, rc = -EIO);
+                GOTO(out, rc = -EIO);
          }
  
          if (req->rq_import_generation < imp->imp_generation) {
                  DEBUG_REQ(D_ERROR, req, "req old gen:");
                  spin_unlock_irqrestore(&imp->imp_lock, flags);
-                GOTO (out, rc = -EIO);
+                GOTO(out, rc = -EIO);
          }
  
          if (req->rq_level > imp->imp_level) {
@@ -1256,7 +1263,7 @@ restart:
                  if (req->rq_no_recov || obd->obd_no_recov ||
                      imp->imp_dlm_fake) {
                          spin_unlock_irqrestore(&imp->imp_lock, flags);
-                        GOTO (out, rc = -EWOULDBLOCK);
+                        GOTO(out, rc = -EWOULDBLOCK);
                  }
  
                  list_add_tail(&req->rq_list, &imp->imp_delayed_list);
@@ -1269,23 +1276,24 @@ restart:
                                    (req->rq_level <= imp->imp_level ||
                                     req->rq_err),
                                    &lwi);
-                DEBUG_REQ(D_HA, req, "\"%s\" awake: (%d > %d)",
-                          current->comm, req->rq_level, imp->imp_level);
+                DEBUG_REQ(D_HA, req, "\"%s\" awake: (%d > %d or %d == 1)",
+                          current->comm, imp->imp_level, req->rq_level,
+                          req->rq_err);
  
                  spin_lock_irqsave(&imp->imp_lock, flags);
                  list_del_init(&req->rq_list);
  
-                if (req->rq_err || 
+                if (req->rq_err ||
                      req->rq_import_generation < imp->imp_generation)
                          rc = -EIO;
  
  
                  if (rc) {
                          spin_unlock_irqrestore(&imp->imp_lock, flags);
-                        GOTO (out, rc);
+                        GOTO(out, rc);
                  }
  
-                CERROR("process %d resumed\n", current->pid);
+                DEBUG_REQ(D_HA, req, "resumed");
          }
  
          /* XXX this is the same as ptlrpc_set_wait */
@@ -1335,7 +1343,7 @@ restart:
                            &reply_ev);
                  reply_in_callback(&reply_ev);
  
-                LASSERT (reply_ev.mem_desc.user_ptr == (void *)req);
+                LASSERT(reply_ev.mem_desc.user_ptr == (void *)req);
                  // ptlrpc_check_reply(req);
                  // not required now it only tests
          }
@@ -1347,7 +1355,8 @@ restart:
                 "%s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm,
                 imp->imp_obd->obd_uuid.uuid,
                 req->rq_reqmsg->status, req->rq_xid,
-               conn->c_peer.peer_ni->pni_name, conn->c_peer.peer_nid,
+               imp->imp_connection->c_peer.peer_ni->pni_name,
+               imp->imp_connection->c_peer.peer_nid,
                 req->rq_reqmsg->opc);
  
          spin_lock_irqsave(&imp->imp_lock, flags);
@@ -1421,7 +1430,7 @@ restart:
                                             ptlrpc_bulk_complete(req->rq_bulk),
                                             &lwi);
                          if (brc != 0) {
-                                LASSERT (brc == -ETIMEDOUT);
+                                LASSERT(brc == -ETIMEDOUT);
                                  CERROR ("Timed out waiting for bulk\n");
                                  rc = brc;
                          }
@@ -1429,14 +1438,14 @@ restart:
                  if (rc < 0) {
                          /* MDS blocks for put ACKs before replying */
                          /* OSC sets rq_no_resend for the time being */
-                        LASSERT (req->rq_no_resend);
+                        LASSERT(req->rq_no_resend);
                          ptlrpc_unregister_bulk (req);
                  }
          }
  
-        LASSERT (!req->rq_receiving_reply);
+        LASSERT(!req->rq_receiving_reply);
          req->rq_phase = RQ_PHASE_INTERPRET;
-        RETURN (rc);
+        RETURN(rc);
  }
  
  int ptlrpc_replay_req(struct ptlrpc_request *req)
@@ -1450,7 +1459,7 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
           * state it was left in */
  
          /* Not handling automatic bulk replay yet (or ever?) */
-        LASSERT (req->rq_bulk == NULL);
+        LASSERT(req->rq_bulk == NULL);
  
          DEBUG_REQ(D_NET, req, "about to replay");
  
diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c

index 07be1af..c4c47d3 100644 (file)
--- a/lustre/ptlrpc/lproc_ptlrpc.c
+++ b/lustre/ptlrpc/lproc_ptlrpc.c
@@ -50,6 +50,7 @@ struct ll_rpc_opcode {
          { OST_SAN_READ,     "ost_san_read" },
          { OST_SAN_WRITE,    "ost_san_write" },
          { OST_SYNCFS,       "ost_syncfs" },
+        { OST_SET_INFO,     "ost_set_info" },
          { MDS_GETATTR,      "mds_getattr" },
          { MDS_GETATTR_NAME, "mds_getattr_name" },
          { MDS_CLOSE,        "mds_close" },
@@ -60,6 +61,8 @@ struct ll_rpc_opcode {
          { MDS_GETSTATUS,    "mds_getstatus" },
          { MDS_STATFS,       "mds_statfs" },
          { MDS_GETLOVINFO,   "mds_getlovinfo" },
+        { MDS_PIN,          "mds_pin" },
+        { MDS_UNPIN,        "mds_unpin" },
          { LDLM_ENQUEUE,     "ldlm_enqueue" },
          { LDLM_CONVERT,     "ldlm_convert" },
          { LDLM_CANCEL,      "ldlm_cancel" },
@@ -71,7 +74,8 @@ struct ll_rpc_opcode {
          { PTLBD_FLUSH,      "ptlbd_flush" },
          { PTLBD_CONNECT,    "ptlbd_connect" },
          { PTLBD_DISCONNECT, "ptlbd_disconnect" },
-        { OBD_PING,         "obd_ping" }
+        { OBD_PING,         "obd_ping" },
+        { OBD_LOG_CANCEL,   "obd_log_cancel" },
  };
  
  const char* ll_opcode2str(__u32 opcode)
@@ -119,7 +123,7 @@ void ptlrpc_lprocfs_register_service(struct obd_device *obddev,
          }
  
          lprocfs_counter_init(svc_stats, PTLRPC_REQWAIT_CNTR,
-                             svc_counter_config, "req_waittime", "cycles");
+                             svc_counter_config, "req_waittime", "usec");
          /* Wait for b_eq branch
          lprocfs_counter_init(svc_stats, PTLRPC_SVCEQDEPTH_CNTR,
                               svc_counter_config, "svc_eqdepth", "reqs");
@@ -127,12 +131,12 @@ void ptlrpc_lprocfs_register_service(struct obd_device *obddev,
          /* no stddev on idletime */
          lprocfs_counter_init(svc_stats, PTLRPC_SVCIDLETIME_CNTR,
                               (LPROCFS_CNTR_EXTERNALLOCK|LPROCFS_CNTR_AVGMINMAX),
-                             "svc_idletime", "cycles");
+                             "svc_idletime", "usec");
          for (i = 0; i < LUSTRE_MAX_OPCODES; i++) {
                  __u32 opcode = ll_rpc_opcode_table[i].opcode;
                  lprocfs_counter_init(svc_stats, PTLRPC_LAST_CNTR + i,
                                       svc_counter_config, ll_opcode2str(opcode),
-                                     "cycles");
+                                     "usec");
          }
  
          rc = lprocfs_register_stats(svc_procroot, "stats", svc_stats);
diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c

index 3811d2a..0e2d651 100644 (file)
--- a/lustre/ptlrpc/pack_generic.c
+++ b/lustre/ptlrpc/pack_generic.c
@@ -187,14 +187,9 @@ void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size)
          }
  
          buflen = m->buflens[n];
-        if (buflen == 0) {
-                CERROR("msg %p buffer[%d] is zero length\n", m, n);
-                return NULL;
-        }
-
          if (buflen < min_size) {
                  CERROR("msg %p buffer[%d] size %d too small (required %d)\n",
-                        m, n, buflen, min_size);
+                       m, n, buflen, min_size);
                  return NULL;
          }
  
@@ -249,17 +244,16 @@ void *lustre_swab_reqbuf (struct ptlrpc_request *req, int index, int min_size,
  {
          void *ptr;
  
-        LASSERT_REQSWAB (req, index);
+        LASSERT_REQSWAB(req, index);
  
          ptr = lustre_msg_buf(req->rq_reqmsg, index, min_size);
          if (ptr == NULL)
-                return (NULL);
+                return NULL;
  
-        if (swabber != NULL &&
-            lustre_msg_swabbed (req->rq_reqmsg))
+        if (swabber != NULL && lustre_msg_swabbed(req->rq_reqmsg))
                  ((void (*)(void *))swabber)(ptr);
  
-        return (ptr);
+        return ptr;
  }
  
  /* Wrap up the normal fixed length case */
@@ -268,17 +262,16 @@ void *lustre_swab_repbuf (struct ptlrpc_request *req, int index, int min_size,
  {
          void *ptr;
  
-        LASSERT_REPSWAB (req, index);
+        LASSERT_REPSWAB(req, index);
  
-        ptr = lustre_msg_buf (req->rq_repmsg, index, min_size);
+        ptr = lustre_msg_buf(req->rq_repmsg, index, min_size);
          if (ptr == NULL)
-                return (NULL);
+                return NULL;
  
-        if (swabber != NULL &&
-            lustre_msg_swabbed (req->rq_repmsg))
+        if (swabber != NULL && lustre_msg_swabbed(req->rq_repmsg))
                  ((void (*)(void *))swabber)(ptr);
  
-        return (ptr);
+        return ptr;
  }
  
  /* byte flipping routines for all wire types declared in
@@ -638,12 +631,12 @@ void lustre_assert_wire_constants (void)
          LASSERT (REINT_RENAME == 5);
          LASSERT (REINT_OPEN == 6);
          LASSERT (REINT_MAX == 6);
-        LASSERT (IT_INTENT_EXEC == 1);
-        LASSERT (IT_OPEN_LOOKUP == 2);
-        LASSERT (IT_OPEN_NEG == 4);
-        LASSERT (IT_OPEN_POS == 8);
-        LASSERT (IT_OPEN_CREATE == 16);
-        LASSERT (IT_OPEN_OPEN == 32);
+        LASSERT (DISP_IT_EXECD == 1);
+        LASSERT (DISP_LOOKUP_EXECD == 2);
+        LASSERT (DISP_LOOKUP_NEG == 4);
+        LASSERT (DISP_LOOKUP_POS == 8);
+        LASSERT (DISP_OPEN_CREATE == 16);
+        LASSERT (DISP_OPEN_OPEN == 32);
          LASSERT (MDS_STATUS_CONN == 1);
          LASSERT (MDS_STATUS_LOV == 2);
          LASSERT (MDS_OPEN_HAS_EA == 1);
diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c

index ebc69e1..c81fb51 100644 (file)
--- a/lustre/ptlrpc/pinger.c
+++ b/lustre/ptlrpc/pinger.c
@@ -47,12 +47,12 @@ void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
  
  int ptlrpc_pinger_add_import(struct obd_import *imp)
  {
+#ifndef ENABLE_PINGER
+        return 0;
+#else
          int rc;
          ENTRY;
  
-#ifndef ENABLE_PINGER
-        RETURN(0);
-#else
          if (!list_empty(&imp->imp_pinger_chain))
                  RETURN(-EALREADY);
  
@@ -77,12 +77,12 @@ int ptlrpc_pinger_add_import(struct obd_import *imp)
  
  int ptlrpc_pinger_del_import(struct obd_import *imp)
  {
+#ifndef ENABLE_PINGER
+        return 0;
+#else
          int rc;
          ENTRY;
  
-#ifndef ENABLE_PINGER
-        RETURN(0);
-#else
          if (list_empty(&imp->imp_pinger_chain))
                  RETURN(-ENOENT);
  
@@ -118,14 +118,7 @@ static int ptlrpc_pinger_main(void *arg)
          RECALC_SIGPENDING;
          SIGNAL_MASK_UNLOCK(current, flags);
  
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
-        sprintf(current->comm, "%s|%d", data->name,current->thread.extern_pid);
-#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        sprintf(current->comm, "%s|%d", data->name,
-                current->thread.mode.tt.extern_pid);
-#else
-        strcpy(current->comm, data->name);
-#endif
+        THREAD_NAME(current->comm, "%s", data->name);
          unlock_kernel();
  
          /* Record that the thread is running */
@@ -147,7 +140,8 @@ static int ptlrpc_pinger_main(void *arg)
                  down(&pinger_sem);
                  list_for_each(iter, &pinger_imports) {
                          struct obd_import *imp =
-                                list_entry(iter, struct obd_import, imp_pinger_chain);
+                                list_entry(iter, struct obd_import,
+                                           imp_pinger_chain);
                          int generation, level;
                          unsigned long flags;
  
@@ -159,16 +153,19 @@ static int ptlrpc_pinger_main(void *arg)
                                  spin_unlock_irqrestore(&imp->imp_lock, flags);
  
                                  if (level != LUSTRE_CONN_FULL) {
-                                        CDEBUG(D_HA, "not pinging %s (in recovery)\n",
+                                        CDEBUG(D_HA,
+                                               "not pinging %s (in recovery)\n",
                                                 imp->imp_target_uuid.uuid);
                                          continue;
                                  }
  
-                                req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL);
+                                req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL,
+                                                      NULL);
                                  if (!req) {
                                          CERROR("OOM trying to ping\n");
                                          break;
                                  }
+                                req->rq_no_resend = 1;
                                  req->rq_replen = lustre_msg_size(0, NULL);
                                  req->rq_level = LUSTRE_CONN_FULL;
                                  req->rq_phase = RQ_PHASE_RPC;
diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h

index cb96c3c..8d66c88 100644 (file)
--- a/lustre/ptlrpc/ptlrpc_internal.h
+++ b/lustre/ptlrpc/ptlrpc_internal.h
@@ -33,19 +33,22 @@ struct ptlrpc_request_set;
  /* ldlm hooks that we need, managed via inter_module_{get,put} */
  extern int (*ptlrpc_ldlm_namespace_cleanup)(struct ldlm_namespace *, int);
  extern int (*ptlrpc_ldlm_cli_cancel_unused)(struct ldlm_namespace *,
-                                     struct ldlm_res_id *, int);
+                                            struct ldlm_res_id *, int);
  extern int (*ptlrpc_ldlm_replay_locks)(struct obd_import *);
  
  int ptlrpc_get_ldlm_hooks(void);
  void ptlrpc_daemonize(void);
  
  void ptlrpc_request_handle_eviction(struct ptlrpc_request *);
-void lustre_assert_wire_constants (void);
+void lustre_assert_wire_constants(void);
  
  void ptlrpc_lprocfs_register_service(struct obd_device *obddev,
                                       struct ptlrpc_service *svc);
  void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc);
  
+/* recovd_thread.c */
+int llog_init_commit_master(void);
+int llog_cleanup_commit_master(int force);
  
  static inline int opcode_offset(__u32 opc) {
          if (opc < OST_LAST_OPC) {
@@ -66,9 +69,9 @@ static inline int opcode_offset(__u32 opc) {
                          (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
                          (MDS_LAST_OPC - MDS_FIRST_OPC) +
                          (OST_LAST_OPC - OST_FIRST_OPC));
-        } else if (opc == OBD_PING) {
+        } else if (opc < OBD_LAST_OPC) {
                  /* OBD Ping */
-                return (opc - OBD_PING +
+                return (opc - OBD_FIRST_OPC +
                          (PTLBD_LAST_OPC - PTLBD_FIRST_OPC) +
                          (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
                          (MDS_LAST_OPC - MDS_FIRST_OPC) +
@@ -79,10 +82,11 @@ static inline int opcode_offset(__u32 opc) {
          }
  }
  
-#define LUSTRE_MAX_OPCODES (1 + (PTLBD_LAST_OPC - PTLBD_FIRST_OPC) \
-                              + (LDLM_LAST_OPC - LDLM_FIRST_OPC)   \
-                              + (MDS_LAST_OPC - MDS_FIRST_OPC)     \
-                              + (OST_LAST_OPC - OST_FIRST_OPC))
+#define LUSTRE_MAX_OPCODES ((PTLBD_LAST_OPC - PTLBD_FIRST_OPC) + \
+                            (LDLM_LAST_OPC - LDLM_FIRST_OPC)   + \
+                            (MDS_LAST_OPC - MDS_FIRST_OPC)     + \
+                            (OST_LAST_OPC - OST_FIRST_OPC)     + \
+                            (OBD_LAST_OPC - OBD_FIRST_OPC))
  
  enum {
          PTLRPC_REQWAIT_CNTR     = 0,
diff --git a/lustre/ptlrpc/ptlrpc_lib.c b/lustre/ptlrpc/ptlrpc_lib.c

index ccc05dc..3dfec9a 100644 (file)
--- a/lustre/ptlrpc/ptlrpc_lib.c
+++ b/lustre/ptlrpc/ptlrpc_lib.c
@@ -24,11 +24,12 @@
  
  #ifdef __KERNEL__
  # include <linux/module.h>
-#else 
+#else
  # include <liblustre.h>
  #endif
  #include <linux/obd.h>
  #include <linux/obd_ost.h>
+#include <linux/lustre_mgmt.h>
  #include <linux/lustre_net.h>
  #include <linux/lustre_dlm.h>
  
@@ -40,19 +41,27 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
          struct obd_import *imp;
          struct obd_uuid server_uuid;
          int rq_portal, rp_portal, connect_op;
-        char *name;
+        char *name = obddev->obd_type->typ_name;
          ENTRY;
  
-        if (obddev->obd_type->typ_ops->o_brw) {
+        /* In a more perfect world, we would hang a ptlrpc_client off of
+         * obd_type and just use the values from there. */
+        if (!strcmp(name, LUSTRE_OSC_NAME)) {
                  rq_portal = OST_REQUEST_PORTAL;
                  rp_portal = OSC_REPLY_PORTAL;
-                name = "osc";
                  connect_op = OST_CONNECT;
-        } else {
+        } else if (!strcmp(name, LUSTRE_MDC_NAME)) {
                  rq_portal = MDS_REQUEST_PORTAL;
                  rp_portal = MDC_REPLY_PORTAL;
-                name = "mdc";
                  connect_op = MDS_CONNECT;
+        } else if (!strcmp(name, LUSTRE_MGMTCLI_NAME)) {
+                rq_portal = MGMT_REQUEST_PORTAL;
+                rp_portal = MGMT_REPLY_PORTAL;
+                connect_op = MGMT_CONNECT;
+        } else {
+                CERROR("unknown client OBD type \"%s\", can't setup\n",
+                       name);
+                RETURN(-EINVAL);
          }
  
          if (data->ioc_inllen1 < 1) {
@@ -108,18 +117,60 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
  
          cli->cl_import = imp;
          cli->cl_max_mds_easize = sizeof(struct lov_mds_md);
+        cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie);
          cli->cl_sandev = to_kdev_t(0);
  
+        /* Register with management client if we need to. */
+        if (data->ioc_inllen3 > 0) {
+                char *mgmt_name = data->ioc_inlbuf3;
+                int rc;
+                struct obd_device *mgmt_obd;
+                mgmtcli_register_for_events_t register_f;
+
+                CDEBUG(D_HA, "%s registering with %s for events about %s\n",
+                       obddev->obd_name, mgmt_name, server_uuid.uuid);
+
+                mgmt_obd = class_name2obd(mgmt_name);
+                if (!mgmt_obd) {
+                        CERROR("can't find mgmtcli %s to register\n",
+                               mgmt_name);
+                        class_destroy_import(imp);
+                        RETURN(-ENOENT);
+                }
+                
+                register_f = inter_module_get("mgmtcli_register_for_events");
+                if (!register_f) {
+                        CERROR("can't i_m_g mgmtcli_register_for_events\n");
+                        class_destroy_import(imp);
+                        RETURN(-ENOSYS);
+                }
+                
+                rc = register_f(mgmt_obd, obddev, &imp->imp_target_uuid);
+                inter_module_put("mgmtcli_register_for_events");
+
+                if (!rc)
+                        cli->cl_mgmtcli_obd = mgmt_obd;
+
+                RETURN(rc);
+        }
+
          RETURN(0);
  }
  
-int client_obd_cleanup(struct obd_device *obddev, int force, int failover)
+int client_obd_cleanup(struct obd_device *obddev, int flags)
  {
-        struct client_obd *client = &obddev->u.cli;
+        struct client_obd *cli = &obddev->u.cli;
  
-        if (!client->cl_import)
+        if (!cli->cl_import)
                  RETURN(-EINVAL);
-        class_destroy_import(client->cl_import);
-        client->cl_import = NULL;
+        if (cli->cl_mgmtcli_obd) {
+                mgmtcli_deregister_for_events_t dereg_f;
+                
+                dereg_f = inter_module_get("mgmtcli_deregister_for_events");
+                dereg_f(cli->cl_mgmtcli_obd, obddev);
+                inter_module_put("mgmtcli_deregister_for_events");
+        }
+        class_destroy_import(cli->cl_import);
+        cli->cl_import = NULL;
          RETURN(0);
  }
diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c

index 57f3653..4b75026 100644 (file)
--- a/lustre/ptlrpc/ptlrpc_module.c
+++ b/lustre/ptlrpc/ptlrpc_module.c
@@ -100,13 +100,14 @@ __init int ptlrpc_init(void)
          int rc;
          ENTRY;
  
-        lustre_assert_wire_constants ();
-        
+        lustre_assert_wire_constants();
+
          rc = ptlrpc_init_portals();
          if (rc)
                  RETURN(rc);
  
          ptlrpc_init_connection();
+        llog_init_commit_master();
  
          ptlrpc_put_connection_superhack = ptlrpc_put_connection;
          ptlrpc_abort_inflight_superhack = ptlrpc_abort_inflight;
@@ -117,6 +118,9 @@ static void __exit ptlrpc_exit(void)
  {
          ptlrpc_exit_portals();
          ptlrpc_cleanup_connection();
+#ifdef ENABLE_ORPHANS
+        llog_cleanup_commit_master(0);
+#endif
  }
  
  /* connection.c */
diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c

index ca2afad..70e9b5c 100644 (file)
--- a/lustre/ptlrpc/recover.c
+++ b/lustre/ptlrpc/recover.c
@@ -24,11 +24,11 @@
  
  #define DEBUG_SUBSYSTEM S_RPC
  #ifdef __KERNEL__
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
+# include <linux/config.h>
+# include <linux/module.h>
+# include <linux/kmod.h>
  #else
-#include <liblustre.h>
+# include <liblustre.h>
  #endif
  
  #include <linux/obd_support.h>
@@ -62,7 +62,7 @@ int ptlrpc_reconnect_import(struct obd_import *imp)
          struct lustre_handle old_hdl;
          __u64 committed_before_reconnect = imp->imp_peer_committed_transno;
  
-        CERROR("reconnect handle "LPX64"\n", 
+        CERROR("reconnect handle "LPX64"\n",
                 imp->imp_dlm_handle.cookie);
  
          req = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp);
@@ -89,7 +89,7 @@ int ptlrpc_reconnect_import(struct obd_import *imp)
                          GOTO(out_disc, rc = -ENOTCONN);
                  }
  
-                if (memcmp(&imp->imp_remote_handle, &req->rq_repmsg->handle, 
+                if (memcmp(&imp->imp_remote_handle, &req->rq_repmsg->handle,
                             sizeof(imp->imp_remote_handle))) {
                          CERROR("%s@%s changed handle from "LPX64" to "LPX64
                                 "; copying, but this may foreshadow disaster\n",
@@ -104,12 +104,13 @@ int ptlrpc_reconnect_import(struct obd_import *imp)
                  CERROR("reconnected to %s@%s after partition\n",
                         imp->imp_target_uuid.uuid, conn->c_remote_uuid.uuid);
                  GOTO(out_disc, rc = RECON_RESULT_RECONNECTED);
-        } else if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECOVERING) {
+        } else if (lustre_msg_get_op_flags(req->rq_repmsg) &
+                   MSG_CONNECT_RECOVERING) {
                  rc = RECON_RESULT_RECOVERING;
          } else {
                  rc = RECON_RESULT_EVICTED;
          }
-        
+
          old_hdl = imp->imp_remote_handle;
          imp->imp_remote_handle = req->rq_repmsg->handle;
          CERROR("reconnected to %s@%s ("LPX64", was "LPX64")!\n",
@@ -150,9 +151,9 @@ void ptlrpc_run_recovery_over_upcall(struct obd_device *obd)
          rc = USERMODEHELPER(argv[0], argv, envp);
          if (rc < 0) {
                  CERROR("Error invoking recovery upcall %s %s %s: %d; check "
-                       "/proc/sys/lustre/upcall\n",                
+                       "/proc/sys/lustre/upcall\n",
                         argv[0], argv[1], argv[2], rc);
-                
+
          } else {
                  CERROR("Invoked upcall %s %s %s",
                         argv[0], argv[1], argv[2]);
@@ -180,10 +181,10 @@ void ptlrpc_run_failed_import_upcall(struct obd_import* imp)
  
          rc = USERMODEHELPER(argv[0], argv, envp);
          if (rc < 0) {
-                CERROR("Error invoking recovery upcall %s %s %s %s %s: %d; check "
-                       "/proc/sys/lustre/lustre_upcall\n",                
+                CERROR("Error invoking recovery upcall %s %s %s %s %s: %d; "
+                       "check /proc/sys/lustre/lustre_upcall\n",
                         argv[0], argv[1], argv[2], argv[3], argv[4],rc);
-                
+
          } else {
                  CERROR("Invoked upcall %s %s %s %s %s\n",
                         argv[0], argv[1], argv[2], argv[3], argv[4]);
@@ -196,7 +197,6 @@ int ptlrpc_replay(struct obd_import *imp)
          struct list_head *tmp, *pos;
          struct ptlrpc_request *req;
          unsigned long flags;
-        __u64 committed = imp->imp_peer_committed_transno;
          ENTRY;
  
          /* It might have committed some after we last spoke, so make sure we
@@ -207,7 +207,7 @@ int ptlrpc_replay(struct obd_import *imp)
          spin_unlock_irqrestore(&imp->imp_lock, flags);
  
          CDEBUG(D_HA, "import %p from %s has committed "LPD64"\n",
-               imp, imp->imp_target_uuid.uuid, committed);
+               imp, imp->imp_target_uuid.uuid, imp->imp_peer_committed_transno);
  
          list_for_each(tmp, &imp->imp_replay_list) {
                  req = list_entry(tmp, struct ptlrpc_request, rq_list);
@@ -221,7 +221,7 @@ int ptlrpc_replay(struct obd_import *imp)
           * than the one we're replaying (it can't be committed until it's
           * replayed, and we're doing that here).  l_f_e_safe protects against
           * problems with the current request being committed, in the unlikely
-         * event of that race.  So, in conclusion, I think that it's safe to 
+         * event of that race.  So, in conclusion, I think that it's safe to
           * perform this list-walk without the imp_lock held.
           *
           * But, the {mdc,osc}_replay_open callbacks both iterate
@@ -235,7 +235,7 @@ int ptlrpc_replay(struct obd_import *imp)
                  DEBUG_REQ(D_HA, req, "REPLAY:");
  
                  rc = ptlrpc_replay_req(req);
-        
+
                  if (rc) {
                          CERROR("recovery replay error %d for req "LPD64"\n",
                                 rc, req->rq_xid);
@@ -307,7 +307,6 @@ inline void ptlrpc_invalidate_import_state(struct obd_import *imp)
          ptlrpc_abort_inflight(imp);
  }
  
-
  void ptlrpc_handle_failed_import(struct obd_import *imp)
  {
          ENTRY;
@@ -329,7 +328,6 @@ void ptlrpc_request_handle_eviction(struct ptlrpc_request *failed_req)
          int rc;
          struct obd_import *imp= failed_req->rq_import;
          unsigned long flags;
-        struct ptlrpc_request *req;
          ENTRY;
  
          CDEBUG(D_HA, "import %s of %s@%s evicted: reconnecting\n",
@@ -347,7 +345,6 @@ void ptlrpc_request_handle_eviction(struct ptlrpc_request *failed_req)
                  failed_req->rq_err = 1;
                  spin_unlock_irqrestore (&failed_req->rq_lock, flags);
          }
-        ptlrpc_req_finished(req);
          EXIT;
  }
  
@@ -361,17 +358,23 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
  
          notify_obd = imp->imp_obd->u.cli.cl_containing_lov;
  
-        /* When deactivating, mark import invalid, and 
-           abort in-flight requests. */
+        /* When deactivating, mark import invalid, and abort in-flight
+         * requests. */
          if (!active) {
-                CDEBUG(D_ERROR, "setting import %s INVALID\n", imp->imp_target_uuid.uuid);
                  spin_lock_irqsave(&imp->imp_lock, flags);
-                imp->imp_invalid = 1;
+                /* This is a bit of a hack, but invalidating replayable
+                 * imports makes a temporary reconnect failure into a much more
+                 * ugly -- and hard to remedy -- situation. */
+                if (!imp->imp_replayable) {
+                        CDEBUG(D_HA, "setting import %s INVALID\n",
+                               imp->imp_target_uuid.uuid);
+                        imp->imp_invalid = 1;
+                }
                  imp->imp_generation++;
                  spin_unlock_irqrestore(&imp->imp_lock, flags);
                  ptlrpc_invalidate_import_state(imp);
-//                ptlrpc_abort_inflight(imp);
-        } 
+                //ptlrpc_abort_inflight(imp);
+        }
  
          if (notify_obd == NULL)
                  GOTO(out, rc = 0);
@@ -403,8 +406,9 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
  
  out:
          /* When activating, mark import valid */
-        if (active) {
-                CDEBUG(D_ERROR, "setting import %s VALID\n", imp->imp_target_uuid.uuid);
+        if (active && !rc) {
+                CDEBUG(D_HA, "setting import %s VALID\n",
+                       imp->imp_target_uuid.uuid);
                  spin_lock_irqsave(&imp->imp_lock, flags);
                  imp->imp_invalid = 0;
                  spin_unlock_irqrestore(&imp->imp_lock, flags);
@@ -420,7 +424,7 @@ void ptlrpc_fail_import(struct obd_import *imp, int generation)
          ENTRY;
  
          LASSERT (!imp->imp_dlm_fake);
-        
+
          spin_lock_irqsave(&imp->imp_lock, flags);
          if (imp->imp_level != LUSTRE_CONN_FULL)
                  in_recovery = 1;
@@ -466,14 +470,14 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
          ENTRY;
  
          spin_lock_irqsave(&imp->imp_lock, flags);
-        if (imp->imp_level == LUSTRE_CONN_FULL || 
+        if (imp->imp_level == LUSTRE_CONN_FULL ||
              imp->imp_level == LUSTRE_CONN_NOTCONN)
                      imp->imp_level = LUSTRE_CONN_RECOVER;
          else
                  in_recover = 1;
          spin_unlock_irqrestore(&imp->imp_lock, flags);
  
-        if (in_recover == 1) 
+        if (in_recover == 1)
                  RETURN(-EALREADY);
  
          if (new_uuid) {
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c

index f2a1089..22ccb09 100644 (file)
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -289,18 +289,24 @@ void ptlrpc_daemonize(void)
          reparent_to_init();
  }
  
+static long timeval_sub(struct timeval *large, struct timeval *small)
+{
+        return (large->tv_sec - small->tv_sec) * 1000000 +
+                (large->tv_usec - small->tv_usec);
+}
+
  static int ptlrpc_main(void *arg)
  {
-        struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
+        struct ptlrpc_svc_data *data = arg;
          struct obd_device *obddev = data->dev;
          struct ptlrpc_service *svc = data->svc;
          struct ptlrpc_thread *thread = data->thread;
          struct ptlrpc_request *request;
          ptl_event_t *event;
-        int rc = 0;
          unsigned long flags;
-        cycles_t workdone_time = -1;
-        cycles_t svc_workcycles = -1;
+        struct timeval start_time, finish_time;
+        long total;
+        int rc = 0;
          ENTRY;
  
          lock_kernel();
@@ -311,21 +317,14 @@ static int ptlrpc_main(void *arg)
          RECALC_SIGPENDING;
          SIGNAL_MASK_UNLOCK(current, flags);
  
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
-        sprintf(current->comm, "%s|%d", data->name,current->thread.extern_pid);
-#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        sprintf(current->comm, "%s|%d", data->name,
-                current->thread.mode.tt.extern_pid);
-#else
-        strcpy(current->comm, data->name);
-#endif
+        THREAD_NAME(current->comm, "%s", data->name);
          unlock_kernel();
  
          OBD_ALLOC(event, sizeof(*event));
-        if (!event)
+        if (event == NULL)
                  GOTO(out, rc = -ENOMEM);
          OBD_ALLOC(request, sizeof(*request));
-        if (!request)
+        if (request == NULL)
                  GOTO(out_event, rc = -ENOMEM);
  
          /* Record that the thread is running */
@@ -334,14 +333,15 @@ static int ptlrpc_main(void *arg)
  
          /* XXX maintain a list of all managed devices: insert here */
  
+        do_gettimeofday(&finish_time);
          /* And now, loop forever on requests */
          while (1) {
                  struct l_wait_info lwi = { 0 };
                  l_wait_event(svc->srv_waitq,
                               ptlrpc_check_event(svc, thread, event), &lwi);
  
+                spin_lock(&svc->srv_lock);
                  if (thread->t_flags & SVC_STOPPING) {
-                        spin_lock(&svc->srv_lock);
                          thread->t_flags &= ~SVC_STOPPING;
                          spin_unlock(&svc->srv_lock);
  
@@ -349,65 +349,64 @@ static int ptlrpc_main(void *arg)
                          break;
                  }
  
-                if (thread->t_flags & SVC_EVENT) {
-                        cycles_t  workstart_time;
-
-                        spin_lock(&svc->srv_lock);
-                        thread->t_flags &= ~SVC_EVENT;
-                        /* Update Service Statistics */
-                        workstart_time = get_cycles();
-                        if (workdone_time != -1 && svc->svc_stats != NULL) {
-                                /* Stats for req(n) are updated just before
-                                 * req(n+1) is executed. This avoids need to
-                                 * reacquire svc->srv_lock after
-                                 * call to handling_request().
-                                 */
-                                int opc;
-
-                                /* req_waittime */
-                                lprocfs_counter_add(svc->svc_stats,
-                                                    PTLRPC_REQWAIT_CNTR,
-                                                    (workstart_time -
-                                                     event->arrival_time));
-                                /* svc_eqdepth */
-                                /* Wait for b_eq branch
-                                lprocfs_counter_add(svc->svc_stats,
-                                                    PTLRPC_SVCEQDEPTH_CNTR,
-                                                    0);
-                                */
-                                /* svc_idletime */
-                                lprocfs_counter_add(svc->svc_stats,
-                                                    PTLRPC_SVCIDLETIME_CNTR,
-                                                    (workstart_time -
-                                                     workdone_time));
-                                /* previous request */
-                                opc = opcode_offset(request->rq_reqmsg->opc);
-                                if (opc > 0) {
-                                        LASSERT(opc < LUSTRE_MAX_OPCODES);
-                                        lprocfs_counter_add(svc->svc_stats, opc,
-                                                            PTLRPC_LAST_CNTR +
-                                                            svc_workcycles);
-                                }
-                        }
+                if (!(thread->t_flags & SVC_EVENT)) {
+                        CERROR("unknown flag in service");
                          spin_unlock(&svc->srv_lock);
+                        LBUG();
+                        EXIT;
+                        break;
+                }
+
+                thread->t_flags &= ~SVC_EVENT;
+                spin_unlock(&svc->srv_lock);
+
+                do_gettimeofday(&start_time);
+                total = timeval_sub(&start_time, &event->arrival_time);
+                if (svc->svc_stats != NULL) {
+                        lprocfs_counter_add(svc->svc_stats, PTLRPC_REQWAIT_CNTR,
+                                            total);
+                        lprocfs_counter_add(svc->svc_stats,
+                                            PTLRPC_SVCIDLETIME_CNTR,
+                                            timeval_sub(&start_time,
+                                                        &finish_time));
+#if 0 /* Wait for b_eq branch */
+                        lprocfs_counter_add(svc->svc_stats,
+                                            PTLRPC_SVCEQDEPTH_CNTR, 0);
+#endif
+                }
  
+                if (total / 1000000 > (long)obd_timeout) {
+                        CERROR("Dropping request from NID "LPX64" because it's "
+                               "%ld seconds old.\n", event->initiator.nid,
+                               total / 1000000); /* bug 1502 */
+                } else {
+                        CDEBUG(D_HA, "request from NID "LPX64" noticed after "
+                               "%ldus\n", event->initiator.nid, total);
                          rc = handle_incoming_request(obddev, svc, event,
                                                       request);
-                        workdone_time = get_cycles();
-                        svc_workcycles = workdone_time - workstart_time;
-                        continue;
                  }
-
-                CERROR("unknown break in service");
-                LBUG();
-                EXIT;
-                break;
+                do_gettimeofday(&finish_time);
+                total = timeval_sub(&finish_time, &start_time);
+
+                CDEBUG((total / 1000000 > (long)obd_timeout) ? D_ERROR : D_HA,
+                       "request "LPU64" from NID "LPX64" processed in %ldus "
+                       "(%ldus total)\n", request->rq_xid, event->initiator.nid,
+                       total, timeval_sub(&finish_time, &event->arrival_time));
+
+                if (svc->svc_stats != NULL) {
+                        int opc = opcode_offset(request->rq_reqmsg->opc);
+                        if (opc > 0) {
+                                LASSERT(opc < LUSTRE_MAX_OPCODES);
+                                lprocfs_counter_add(svc->svc_stats,
+                                                    opc + PTLRPC_LAST_CNTR,
+                                                    total);
+                        }
+                }
          }
  
          /* NB should wait for all SENT callbacks to complete before exiting
           * here.  Unfortunately at this time there is no way to track this
-         * state.
-         */
+         * state. */
          OBD_FREE(request, sizeof(*request));
  out_event:
          OBD_FREE(event, sizeof(*event));
diff --git a/lustre/scripts/lustre.spec.in b/lustre/scripts/lustre.spec.in

index a24a26a..1b2ba01 100644 (file)
--- a/lustre/scripts/lustre.spec.in
+++ b/lustre/scripts/lustre.spec.in
@@ -1,17 +1,17 @@
  # lustre.spec
  %define version b_devel
-%define kversion @RELEASE@
+%define kversion @LINUXRELEASE@
  %define linuxdir @LINUX@
-Release: 0306170928kernel
  
  Summary: Lustre Lite File System
  Name: lustre-lite
  Version: %{version}
+Release: @RELEASE@
  Copyright: GPL
  Group: Utilities/System
  Requires: lustre-modules, PyXML
-BuildRoot: /var/tmp/lustre-%{version}-root
  Source: ftp://ftp.lustre.com/pub/lustre/lustre-%{version}.tar.gz
+BuildRoot: /var/tmp/lustre-%{version}-root
  
  %description
  The Lustre Lite Cluster File System: kernel drivers for file system,
@@ -69,21 +69,10 @@ cd $RPM_BUILD_DIR/lustre-%{version}
  ./configure --with-linux='%{linuxdir}' 
  make
  
-#%ifarch i386
-#cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version}
-#./configure --with-lib 
-#make
-#%endif
-
  %install
  cd $RPM_BUILD_DIR/lustre-%{version}
  make install prefix=$RPM_BUILD_ROOT
  
-#%ifarch i386
-#cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version}
-#make install prefix=$RPM_BUILD_ROOT
-#%endif
-
  %ifarch alpha
  # this hurts me
    conf_flag=
@@ -226,20 +215,20 @@ if [ ! -e /dev/portals ]; then
  fi
  depmod -ae || exit 0
  
-grep -q obdclass /etc/modules.conf || \
-       echo 'alias char-major-10-241 obdclass' >> /etc/modules.conf
+#grep -q obdclass /etc/modules.conf || \
+#      echo 'alias char-major-10-241 obdclass' >> /etc/modules.conf
  
-grep -q '/dev/obd' /etc/modules.conf || \
-       echo 'alias /dev/obd obdclass' >> /etc/modules.conf
+#grep -q '/dev/obd' /etc/modules.conf || \
+#      echo 'alias /dev/obd obdclass' >> /etc/modules.conf
  
-grep -q '/dev/lustre' /etc/modules.conf || \
-       echo 'alias /dev/lustre obdclass' >> /etc/modules.conf
+#grep -q '/dev/lustre' /etc/modules.conf || \
+#      echo 'alias /dev/lustre obdclass' >> /etc/modules.conf
  
-grep -q portals /etc/modules.conf || \
-        echo 'alias char-major-10-240 portals' >> /etc/modules.conf
+#grep -q portals /etc/modules.conf || \
+#        echo 'alias char-major-10-240 portals' >> /etc/modules.conf
  
-grep -q '/dev/portals' /etc/modules.conf || \
-        echo 'alias /dev/portals portals' >> /etc/modules.conf
+#grep -q '/dev/portals' /etc/modules.conf || \
+#        echo 'alias /dev/portals portals' >> /etc/modules.conf
  
  %postun
  depmod -ae || exit 0
@@ -257,6 +246,7 @@ if grep -q slapd-lustre $slapd; then
     cp $tmp $slapd
     rm $tmp
  fi
+
  %clean
  #rm -rf $RPM_BUILD_ROOT
  
diff --git a/lustre/tests/.cvsignore b/lustre/tests/.cvsignore

index 2e5c1fe..21575d0 100644 (file)
--- a/lustre/tests/.cvsignore
+++ b/lustre/tests/.cvsignore
@@ -41,5 +41,9 @@ runas
  openfile
  unlinkmany
  fchdir_test
+*.cmd
  getdents
  o_directory
+mkdirdeep
+utime
+small_write
diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am

index 064de98..6600962 100644 (file)
--- a/lustre/tests/Makefile.am
+++ b/lustre/tests/Makefile.am
@@ -6,18 +6,21 @@ CFLAGS := -g -Wall
  # LDADD := -lreadline -ltermcap # -lefence
  EXTRA_DIST = $(pkgexample_SCRIPTS) $(noinst_SCRIPTS) $(noinst_DATA) \
         sanity.sh          rundbench    mcreate
-pkgexample_SCRIPTS = llmount.sh llmountcleanup.sh llecho.sh llechocleanup.sh local.sh echo.sh uml.sh lov.sh
+pkgexample_SCRIPTS = llmount.sh llmountcleanup.sh llecho.sh llechocleanup.sh
+pkgexample_SCRIPTS += local.sh echo.sh uml.sh lov.sh
  noinst_DATA =
-noinst_SCRIPTS = leak_finder.pl llecho.sh llmount.sh llmountcleanup.sh tbox.sh \
-       llrmount.sh runfailure-mds runvmstat runfailure-net runfailure-ost \
-       runiozone runregression-net.sh runtests sanity.sh rundbench
+noinst_SCRIPTS = leak_finder.pl llecho.sh llmount.sh llmountcleanup.sh tbox.sh
+noinst_SCRIPTS += llrmount.sh runfailure-mds runvmstat runfailure-net
+noinst_SCRIPTS += runfailure-ost runiozone runregression-net.sh runtests
+noinst_SCRIPTS += sanity.sh rundbench
  noinst_PROGRAMS = openunlink testreq truncate directio openme writeme open_delay
-noinst_PROGRAMS += munlink tchmod toexcl fsx test_brw openclose createdestroy
-noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink
+noinst_PROGRAMS += tchmod toexcl fsx test_brw openclose createdestroy
+noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink utime
  noinst_PROGRAMS += opendirunlink opendevunlink unlinkmany fchdir_test checkstat
-noinst_PROGRAMS += wantedi statone runas openfile getdents o_directory
+noinst_PROGRAMS += wantedi statone runas openfile getdents mkdirdeep o_directory
+noinst_PROGRAMS += small_write
  # noinst_PROGRAMS += ldaptest
-sbin_PROGRAMS = mcreate mkdirmany
+sbin_PROGRAMS = mcreate munlink mkdirmany
  
  # ldaptest_SOURCES = ldaptest.c
  tchmod_SOURCES = tchmod.c
@@ -48,13 +51,15 @@ openfile_SOURCES = openfile.c
  wantedi_SOURCES = wantedi.c
  createtest_SOURCES = createtest.c
  open_delay_SOURCES = open_delay.c
-opendirunlink_SOURCES=opendirunlink.c
-opendevunlink_SOURCES=opendirunlink.c
-fchdir_test_SOURCES=fchdir_test.c
+opendirunlink_SOURCES = opendirunlink.c
+opendevunlink_SOURCES = opendevunlink.c
+fchdir_test_SOURCES = fchdir_test.c
  getdents_SOURCES=getdents.c
  o_directory_SOURCES = o_directory.c
-#mkdirdeep_SOURCES= mkdirdeep.c
-#mkdirdeep_LDADD=-L../portals/util -lptlctl
-#mkdirdeep_CPPFLAGS=-I$(top_srcdir)/portals/include
+utime_SOURCES = utime.c
+mkdirdeep_SOURCES = mkdirdeep.c
+mkdirdeep_LDADD=-L$(top_builddir)/portals/utils -lptlctl
+mkdirdeep_CPPFLAGS=-I$(top_srcdir)/portals/include
+small_write_SOURCES = small_write.c
  
  include $(top_srcdir)/Rules
diff --git a/lustre/tests/acceptance-metadata-double.sh b/lustre/tests/acceptance-metadata-double.sh

index f647a55..496f3b4 100644 (file)
--- a/lustre/tests/acceptance-metadata-double.sh
+++ b/lustre/tests/acceptance-metadata-double.sh
@@ -8,6 +8,7 @@ set -e
  
  SRCDIR="`dirname $0`"
  CREATE=$SRCDIR/create.pl
+RENAME=$SRCDIR/rename.pl
  
  debug_client_on()
  {
@@ -23,118 +24,71 @@ MNT=${MNT:-/mnt/lustre}
  
  debug_client_on
  echo "create.pl, 2 mounts, 1 thread, 10 ops, debug on"
-perl $CREATE -- $MNT 2 10
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=10
  echo "create.pl, 2 mounts, 1 thread, 100 ops, debug on"
-perl $CREATE --silent -- $MNT 2 100
-echo "create.pl --mcreate=0, 2 mounts, 1 thread, 10 ops, debug on"
-perl $CREATE --mcreate=0 -- $MNT 2 10
-echo "create.pl --mcreate=0, 2 mounts, 1 thread, 100 ops, debug on"
-perl $CREATE --mcreate=0 --silent -- $MNT 2 100
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 1 thread, 10 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=10 --use_mcreate=0
+echo "create.pl --use_mcreate=0, 2 mounts, 1 thread, 100 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --use_mcreate=0 --silent
  echo "rename.pl, 2 mounts, 1 thread, 10 ops, debug on"
-perl rename.pl --count=2 $MNT 10
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=10
  echo "rename.pl, 2 mounts, 1 thread, 100 ops, debug on"
-perl rename.pl --count=2 --silent $MNT 100
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=100 --silent
  
  debug_client_off
  echo "create.pl, 2 mounts, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent -- $MNT 2 1000
-echo "create.pl --mcreate=0, 2 mounts, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT 2 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=1000 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 1 thread, 1000 ops, debug off"
+perl $CREATE --silent --use_mcreate=0 -- $MNT 2 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=1000 --use_mcreate=0 --silent
  echo "rename.pl, 2 mounts, 1 thread, 1000 ops, debug off"
-perl rename.pl --count=2 --silent $MNT 1000
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=1000 --silent
  
  debug_client_on
  echo "create.pl, 2 mounts, 2 threads, 100 ops, debug on"
-perl $CREATE --silent -- $MNT 2 100 &
-perl $CREATE --silent -- $MNT 2 100 &
-wait
-echo "create.pl --mcreate=0, 2 mounts, 2 threads, 100 ops, debug on"
-perl $CREATE --silent --mcreate=0 -- $MNT 2 100 &
-perl $CREATE --silent --mcreate=0 -- $MNT 2 100 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=2 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 2 threads, 100 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=2 --use_mcreate=0 --silent
  echo "rename.pl, 2 mounts, 2 thread, 1000 ops, debug on"
-perl rename.pl --count=2 --silent $MNT 1000 &
-perl rename.pl --count=2 --silent $MNT 1000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=1000 --num_threads=2 --silent
  
  debug_client_off
  echo "create.pl, 2 mounts, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent -- $MNT 2 2000 &
-perl $CREATE --silent -- $MNT 2 2000 &
-wait
-echo "create.pl --mcreate=0, 2 mounts, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=2 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 2 threads, 2000 ops, debug off"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=2 --use_mcreate=0 --silent
  echo "rename.pl, 2 mounts, 2 threads, 2000 ops, debug off"
-perl rename.pl --count=2 --silent $MNT 2000 &
-perl rename.pl --count=2 --silent $MNT 2000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=2 --silent
  
  debug_client_on
  echo "create.pl, 2 mounts, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
-  perl $CREATE --silent -- $MNT 2 100 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT 2 100 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=4 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 4 threads, 100 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=4 --use_mcreate=0 --silent
  echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug on"
-for i in `seq 1 4`; do
-  perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --silent
  
  debug_client_off
  echo "create.pl, 2 mounts, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl $CREATE --silent -- $MNT 2 2000 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 4 threads, 2000 ops, debug off"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --use_mcreate=0 --silent
  echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --silent
  
  debug_client_on
  echo "create.pl, 2 mounts, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
-  perl $CREATE --silent -- $MNT 2 500 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT 2 500 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=500 --num_threads=8 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 8 threads, 500 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=500 --num_threads=8 --use_mcreate=0 --silent
  echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug on"
-for i in `seq 1 8`; do
-  perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --silent
  
  debug_client_off
  echo "create.pl, 2 mounts, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl $CREATE --silent -- $MNT 2 2000 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 8 threads, 2000 ops, debug off"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --use_mcreate=0 --silent
  echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --silent
diff --git a/lustre/tests/acceptance-metadata-single.sh b/lustre/tests/acceptance-metadata-single.sh

index 53774e5..2bf0a53 100644 (file)
--- a/lustre/tests/acceptance-metadata-single.sh
+++ b/lustre/tests/acceptance-metadata-single.sh
@@ -8,6 +8,7 @@ set -e
  
  SRCDIR="`dirname $0`"
  CREATE=$SRCDIR/create.pl
+RENAME=$SRCDIR/rename.pl
  
  debug_client_on()
  {
@@ -23,121 +24,75 @@ MNT=${MNT:-/mnt/lustre}
  
  debug_client_on
  echo "create.pl, 1 mount, 1 thread, 10 ops, debug on"
-perl $CREATE -- $MNT -1 10
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=10
  echo "create.pl, 1 mount, 1 thread, 100 ops, debug on"
-perl $CREATE --silent -- $MNT -1 100
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --silent
  echo "create.pl --mcreate=0, 1 mount, 1 thread, 10 ops, debug on"
-perl $CREATE --mcreate=0 -- $MNT -1 10
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=10 --use_mcreate=0
  echo "create.pl --mcreate=0, 1 mount, 1 thread, 100 ops, debug on"
-perl $CREATE --mcreate=0 --silent -- $MNT -1 100
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --use_mcreate=0 --silent
  echo "rename.pl, 1 mount, 1 thread, 10 ops, debug on"
-perl rename.pl $MNT 10
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=10
  echo "rename.pl, 1 mount, 1 thread, 100 ops, debug on"
-perl rename.pl --silent $MNT 100
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=100 --silent
  
  debug_client_off
  echo "create.pl, 1 mount, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent -- $MNT -1 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --silent
  echo "create.pl --mcreate=0, 1 mount, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT -1 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --use_mcreate=0 --silent
  echo "rename.pl, 1 mount, 1 thread, 1000 ops, debug off"
-perl rename.pl --silent $MNT 1000
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --silent
  
  debug_client_on
  echo "create.pl, 1 mount, 2 threads, 100 ops, debug on"
-perl $CREATE --silent -- $MNT -1 100 &
-perl $CREATE --silent -- $MNT -1 100 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=2 --silent
  echo "create.pl --mcreate=0, 1 mount, 2 threads, 100 ops, debug on"
-perl $CREATE --silent --mcreate=0 -- $MNT -1 100 &
-perl $CREATE --silent --mcreate=0 -- $MNT -1 100 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=2 --use_mcreate=0 --silent
  echo "rename.pl, 1 mount, 2 thread, 1000 ops, debug on"
-perl rename.pl --silent $MNT 1000 &
-perl rename.pl --silent $MNT 1000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --num_threads=2 --silent
  
  debug_client_off
  echo "create.pl, 1 mount, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent -- $MNT -1 2000 &
-perl $CREATE --silent -- $MNT -1 2000 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=2 --silent
  echo "create.pl --mcreate=0, 1 mount, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
-perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=2 --use_mcreate=0  --silent
  wait
  echo "rename.pl, 1 mount, 2 threads, 2000 ops, debug off"
-perl rename.pl --silent $MNT 2000 &
-perl rename.pl --silent $MNT 2000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=2 --silent
  
  debug_client_on
  echo "create.pl, 1 mount, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
-  perl $CREATE --silent -- $MNT -1 100 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=4 --silent
  echo "create.pl --mcreate=0, 1 mount, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT -1 100 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=4 --use_mcreate=0 --silent
  echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug on"
-for i in `seq 1 4`; do
-  perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4 --silent
  
  debug_client_off
  echo "create.pl, 1 mount, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl $CREATE --silent -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4  --silent
  echo "create.pl --mcreate=0, 1 mount, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4  --use_mcreate=0 --silent
  echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4 --silent
  
  debug_client_on
  echo "create.pl, 1 mount, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
-  perl $CREATE --silent -- $MNT -1 500 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=500 --num_threads=8  --silent
  echo "create.pl --mcreate=0, 1 mount, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT -1 500 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=500 --num_threads=8  --use_mcreate=0 --silent
  echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug on"
-for i in `seq 1 8`; do
-  perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8 --silent
  
  debug_client_off
  echo "create.pl, 1 mount, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl $CREATE --silent -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8  --silent
  echo "create.pl --mcreate=0, 1 mount, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8  --use_mcreate=0 --silent
  echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8 --silent
+
  sh rundbench 1
  sh rundbench 2
  sh rundbench 4
diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh

index 0d2d836..919ea1f 100755 (executable)
--- a/lustre/tests/acceptance-small.sh
+++ b/lustre/tests/acceptance-small.sh
@@ -5,7 +5,7 @@ set -vxe
  
  [ "$CONFIGS" -a -z "$SANITYN" ] && SANITYN=no
  [ "$CONFIGS" ] || CONFIGS="local lov"
-[ "$MAX_THREADS" ] || MAX_THREADS=50
+[ "$MAX_THREADS" ] || MAX_THREADS=10
  if [ -z "$THREADS" ]; then
         KB=`awk '/MemTotal:/ { print $2 }' /proc/meminfo`
         THREADS=`expr $KB / 16384`
@@ -76,7 +76,7 @@ for NAME in $CONFIGS; do
         if [ "$IOZONE_DIR" != "no" ]; then
                 mount | grep $MNT || sh llmount.sh
                 SPACE=`df $MNT | tail -1 | awk '{ print $4 }'`
-               IOZ_THREADS=`expr $SPACE / $SIZE`
+               IOZ_THREADS=`expr $SPACE / \( $SIZE + $SIZE / 1000 \)`
                 [ $THREADS -lt $IOZ_THREADS ] && IOZ_THREADS=$THREADS
  
                 $DEBUG_OFF
diff --git a/lustre/tests/cobd.sh b/lustre/tests/cobd.sh

index cb4f94d..983df93 100755 (executable)
--- a/lustre/tests/cobd.sh
+++ b/lustre/tests/cobd.sh
@@ -6,10 +6,11 @@ config=${1:-$(basename $0 .sh)}.xml
  LMC=${LMC:-../utils/lmc -m $config}
  TMP=${TMP:-/tmp}
  
-MDSDEV=$TMP/mds1
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
  MDSSIZE=50000
+FSTYPE=${FSTYPE:-ext3}
  
-OSTDEV=$TMP/ost1
+OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`}
  OSTSIZE=200000
  
  rm -f $config
@@ -18,12 +19,12 @@ ${LMC} --add node --node localhost || exit 10
  ${LMC} --add net --node  localhost --nid localhost --nettype tcp || exit 11
  
  # configure mds server
-${LMC}  --add mds  --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20
+${LMC}  --add mds  --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
  
  # configure ost
-${LMC} --add ost --node localhost --obd obd1 --obdtype obdecho || exit 30
+${LMC} --add ost --node localhost --obd obd1 --fstype $FSTYPE --obdtype obdecho || exit 30
  # configure ost
-${LMC} --add ost --node localhost --obd obd2 --obdtype obdecho || exit 30
+${LMC} --add ost --node localhost --obd obd2 --fstype $FSTYPE --obdtype obdecho || exit 30
  
  ${LMC} --add cobd --node localhost --real_obd obd1 --cache_obd obd2
  
diff --git a/lustre/tests/create.pl b/lustre/tests/create.pl

index 6156869..c5f3f12 100644 (file)
--- a/lustre/tests/create.pl
+++ b/lustre/tests/create.pl
@@ -1,32 +1,162 @@
-#!/usr/bin/perl
+#!/usr/bin/perl -w
+use strict;
+$|++;
+
+$ENV{PATH}="/bin:/usr/bin";
+$ENV{ENV}="";
+$ENV{BASH_ENV}="";
+use POSIX ":sys_wait_h";
+
+use diagnostics;
  use Getopt::Long;
  
+use vars qw(
+           $MAX_THREADS
+           );
+
+# Don't try to run more than this many threads concurrently.
+$MAX_THREADS = 16;
+
+# Initialize variables
  my $silent = 0;
-my $mcreate = 1; # should we use mcreate or open?
-my $files = 5;
+my $use_mcreate = 1; # should we use mcreate or open?
+my $num_files = 5;   # number of files to create
+my $iterations = 1;
+my $num_threads = 1;
+my $mountpt;
+my $num_mounts = -1;
  
+# Get options from the command line.
  GetOptions("silent!" => \$silent,
-           "mcreate=i" => \$mcreate,
-           "files=i" => \$files);
+           "use_mcreate=i" => \$use_mcreate,
+           "num_files=i" => \$num_files,
+          "mountpt=s" => \$mountpt,
+          "num_mounts=i" => \$num_mounts,
+          "iterations=i" => \$iterations,
+          "num_threads=i" => \$num_threads,
+          ) || die &usage;
+
+# Check for mandatory args.
+if (!$mountpt || 
+    !$num_mounts) {
+    die &usage;
+}
+
+if ($num_threads > $MAX_THREADS) {
+    print "\nMAX_THREADS is currently set to $MAX_THREADS.\n\n";
+    print "You will have to change this in the source\n";
+    print "if you really want to run with $num_threads threads.\n\n";
+    exit 1;
+}
  
-my $mtpt = shift || usage();
-my $mount_count = shift || usage();
-my $i = shift || usage();
-my $count = $i;
+# Initialize rand() function.
+srand (time ^ $$ ^ unpack "%L*", `ps axww | gzip`);
+
+#########################################################################
+### MAIN
+
+for (my $i=1; $i<=$num_threads; $i++) {
+    my $status = &fork_and_create($i);
+    last if ($status != 0);
+}
+
+# Wait for all our threads to finish.
+my $child = 0;
+do {
+    $child = waitpid(-1, WNOHANG);
+} until $child > 0;
+sleep 1;
+
+exit 0;
+
+#########################################################################
+### SUBROUTINES
  
  sub usage () {
-    print "Usage: $0 [--silent] [--mcreate=n] [--files=n] <mnt prefix> <mnt count> <iterations>\n";
-    print "example: $0 /mnt/lustre 2 50\n";
-    print "         will test in /mnt/lustre1 and /mnt/lustre2\n";
-    print "         $0 /mnt/lustre -1 50\n";
-    print "         will test in /mnt/lustre only\n";
+    print "\nUsage: $0 [--silent] [--use_mcreate=n] [--num_files=n] [--iterations=n] [--num_threads=n] --mountpt=/path/to/lustre/mount --num_mounts=n\n\n";
+    print "\t--silent\tminimal output\n";
+    print "\t--use_mcreate=n\tuse mcreate to create files, default=1 (yes)\n";
+    print "\t--num_files=n\tnumber of files to create per iteration, default=5\n";
+    print "\t--iterations=n\tnumber of iterations to perform, default=1\n";
+    print "\t--num_threads=n\tnumber of thread to run, default=1\n";
+    print "\t--mountpt\tlocation of lustre mount\n";
+    print "\t--num_mounts=n\tnumber of lustre mounts to test across, default=-1 (single mount point without numeric suffix)\n\n";
+    print "example: $0 --mountpt=/mnt/lustre --num_mounts=2 --iterations=50\n";
+    print "         will perform 50 interations in /mnt/lustre1 and /mnt/lustre2\n";
+    print "         $0 --mountpt=/mnt/lustre --num_mounts=-1 --iterations=50\n";
+    print "         will perform 50 iterations in /mnt/lustre only\n\n";
      exit;
  }
  
-sub do_open($) {
-    my $path = shift;
+#########################################################################
+sub fork_and_create ($) {
+    my ($thread_num) = @_;
+    
+  FORK: {
+      if (my $pid = fork) {
+         # parent here
+         # child process pid is available in $pid
+         return 0;
+      } elsif (defined $pid) { # $pid is zero here if defined
+         my $current_iteration=1;
+         while ($current_iteration <= $iterations) {
+             for (my $i=1; $i<=$num_files; $i++) {
+                 my $which = "";
+                 if ($num_mounts > 0) {
+                     $which = int(rand() * $num_mounts) + 1;
+                 }
+                 my $d = int(rand() * $num_files);
+                 do_open("${mountpt}${which}/thread${thread_num}.${d}");
+                 
+                 if ($num_mounts > 0) {
+                     $which = int(rand() * $num_mounts) + 1;
+                 }
+                 $d = int(rand() * $num_files);
+                 my $path = "${mountpt}${which}/thread${thread_num}.${d}";
+                 print  "Thread $thread_num: Unlink $path start [" . $$."]...\n" if !$silent;
+                 if (unlink($path)) {
+                     print "Thread $thread_num: Unlink done [$$] $path: Success\n" if !$silent;
+                 } else {
+                     print "Thread $thread_num: Unlink done [$$] $path: $!\n"if !$silent;
+                 }
+             }
+             if (($current_iteration) % 100 == 0) {
+                 print STDERR "Thread $thread_num: " . $current_iteration . " operations [" . $$ . "]\n";
+             }
+             $current_iteration++;
+         }
+         
+         my $which = "";
+         if ($num_mounts > 0) {
+             $which = int(rand() * $num_mounts) + 1;
+         }
+         for (my $d = 0; $d < $num_files; $d++) {
+             my $path = "${mountpt}${which}/thread${thread_num}.${d}";
+             unlink("$path") if (-e $path);
+         }
+         
+         print "Thread $thread_num: Done.\n";
+         
+         exit 0;
+
+      } elsif ($! =~ /No more process/) {
+          # EAGAIN, supposedly recoverable fork error
+         sleep 5;
+         redo FORK;
+      } else {
+          # weird fork error
+         die "Can't fork: $!\n";
+      }
+  }
+
+}
+
+#########################################################################
+
+sub do_open ($) {
+    my ($path) = @_;;
  
-    if ($mcreate) {
+    if ($use_mcreate) {
          my $tmp = `./mcreate $path`;
          if ($tmp) {
              print  "Creating $path [" . $$."]...\n" if !$silent;
@@ -37,42 +167,9 @@ sub do_open($) {
          }
      } else {
          print  "Opening $path [" . $$."]...\n"if !$silent;
-        open(FH, ">$path") || die "open($PATH): $!";
+        open(FH, ">$path") || die "open($path: $!";
          print  "Open done [$$] $path: Success\n"if !$silent;
          close(FH) || die;
      }
  }
  
-while ($i--) {
-    my $which = "";
-    if ($mount_count > 0) {
-        $which = int(rand() * $mount_count) + 1;
-    }
-    $d = int(rand() * $files);
-    do_open("$mtpt$which/$d");
-
-    if ($mount_count > 0) {
-        $which = int(rand() * $mount_count) + 1;
-    }
-    $d = int(rand() * $files);
-    $path = "$mtpt$which/$d";
-    print  "Unlink $path start [" . $$."]...\n"if !$silent;
-    if (unlink($path)) {
-        print  "Unlink done [$$] $path: Success\n"if !$silent;
-    } else {
-        print  "Unlink done [$$] $path: $!\n"if !$silent;
-    }
-    if (($count - $i) % 100 == 0) {
-        print STDERR ($count - $i) . " operations [" . $$ . "]\n";
-    }
-}
-
-my $which = "";
-if ($mount_count > 0) {
-    $which = int(rand() * $mount_count) + 1;
-}
-for ($d = 0; $d < $files; $d++) {
-    unlink("$mtpt$which/$d");
-}
-
-print "Done.\n";
diff --git a/lustre/tests/directio.c b/lustre/tests/directio.c

index e660ea4..cc92c80 100644 (file)
--- a/lustre/tests/directio.c
+++ b/lustre/tests/directio.c
@@ -41,7 +41,7 @@ int main(int argc, char **argv)
                  return 1;
          }
  
-        printf("directio on %s for %dx%lu blocks \n", argv[1], blocks,
+        printf("directio on %s for %dx%lu bytes \n", argv[1], blocks,
                 st.st_blksize);
  
          seek = (off64_t)seek_blocks * (off64_t)st.st_blksize;
@@ -75,5 +75,6 @@ int main(int argc, char **argv)
                  return 1;
          }
  
+       printf("PASS\n");
          return 0;
  }
diff --git a/lustre/tests/echo.sh b/lustre/tests/echo.sh

index 335db41..b4fe5a4 100755 (executable)
--- a/lustre/tests/echo.sh
+++ b/lustre/tests/echo.sh
@@ -21,8 +21,9 @@ CLIENTNID=${CLIENTNID:-$CLIENT}
  
  
  # FIXME: make LMC not require MDS for obdecho LOV
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
  MDSSIZE=10000
+FSTYPE=${FSTYPE:-ext3}
  
  STRIPE_BYTES=65536
  STRIPES_PER_OBJ=2      # 0 means stripe over all OSTs
@@ -33,7 +34,7 @@ $LMC --add node --node $SERVER  || exit 1
  $LMC --add net --node $SERVER --nid $SERVERNID --nettype $NET || exit 2
  
  if (($LOV)); then
-    $LMC --add mds --node $SERVER --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10
+    $LMC --add mds --node $SERVER --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 10
      $LMC --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 11
      $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 12
      $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 13
diff --git a/lustre/tests/fsx.c b/lustre/tests/fsx.c

index a2b1d5e..92a2342 100644 (file)
--- a/lustre/tests/fsx.c
+++ b/lustre/tests/fsx.c
@@ -294,9 +294,10 @@ save_buffer(char *buffer, off_t bufferlength, int fd)
                 if (size_by_seek == (off_t)-1)
                         prterr("save_buffer: lseek eof");
                 else if (bufferlength > size_by_seek) {
-                       warn("save_buffer: .fsxgood file too short... will
-save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek,
-                            (unsigned long long)bufferlength);
+                       warn("save_buffer: .fsxgood file too short... will"
+                               "save 0x%llx bytes instead of 0x%llx\n", 
+                               (unsigned long long)size_by_seek,
+                               (unsigned long long)bufferlength);
                         bufferlength = size_by_seek;
                 }
         }
@@ -310,8 +311,8 @@ save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek,
                 if (byteswritten == -1)
                         prterr("save_buffer write");
                 else
-                       warn("save_buffer: short write, 0x%x bytes instead
-of 0x%llx\n",
+                       warn("save_buffer: short write, 0x%x bytes instead"
+                               "of 0x%llx\n",
                              (unsigned)byteswritten,
                              (unsigned long long)bufferlength);
         }
@@ -372,11 +373,11 @@ check_buffers(unsigned offset, unsigned size)
                 if (n) {
                         prt("\t0x%5x\n", n);
                         if (bad)
-                               prt("operation# (mod 256) for the bad data
-may be %u\n", ((unsigned)op & 0xff));
+                               prt("operation# (mod 256) for the bad data"
+                                       "may be %u\n", ((unsigned)op & 0xff));
                         else
-                               prt("operation# (mod 256) for the bad data
-unknown, check HOLE and EXTEND ops\n");
+                               prt("operation# (mod 256) for the bad data"
+                                       "unknown, check HOLE and EXTEND ops\n");
                 } else
                         prt("????????????????\n");
                 report_failure(110);
@@ -927,33 +928,33 @@ void
  usage(void)
  {
         fprintf(stdout, "usage: %s",
-               "fsx [-dnqLOW] [-b opnum] [-c Prob] [-l flen] [-m
-start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t
-truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed]
-fname\n\
-       -b opnum: beginning operation number (default 1)\n\
-       -c P: 1 in P chance of file close+open at each op (default infinity)\n\
-       -d: debug output for all operations [-d -d = more debugging]\n\
-       -l flen: the upper bound on file size (default 262144)\n\
-       -m startop:endop: monitor (print debug output) specified byte range
-(default 0:infinity)\n\
-       -n: no verifications of file size\n\
-       -o oplen: the upper bound on operation size (default 65536)\n\
-       -p progressinterval: debug output at specified operation interval\n\
-       -q: quieter operation\n\
-       -r readbdy: 4096 would make reads page aligned (default 1)\n\
-       -s style: 1 gives smaller truncates (default 0)\n\
-       -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
-       -w writebdy: 4096 would make writes page aligned (default 1)\n\
-       -D startingop: debug output starting at specified operation\n\
-       -L: fsxLite - no file creations & no file size changes\n\
-       -N numops: total # operations to do (default infinity)\n\
-       -O: use oplen (see -o flag) for every op (default random)\n\
-       -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n\
-       -S seed: for random # generator (default 1) 0 gets timestamp\n\
-       -W: mapped write operations DISabled\n\
-        -R: read() system calls only (mapped reads disabled)\n\
-       fname: this filename is REQUIRED (no default)\n");
+               "fsx [-dnqLOW] [-b opnum] [-c Prob] [-l flen] [-m "
+"start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t "
+"truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] "
+"fname\n"
+"      -b opnum: beginning operation number (default 1)\n"
+"      -c P: 1 in P chance of file close+open at each op (default infinity)\n"
+"      -d: debug output for all operations [-d -d = more debugging]\n"
+"      -l flen: the upper bound on file size (default 262144)\n"
+"      -m startop:endop: monitor (print debug output) specified byte rang"
+"(default 0:infinity)\n"
+"      -n: no verifications of file size\n"
+"      -o oplen: the upper bound on operation size (default 65536)\n"
+"      -p progressinterval: debug output at specified operation interval\n"
+"      -q: quieter operation\n"
+"      -r readbdy: 4096 would make reads page aligned (default 1)\n"
+"      -s style: 1 gives smaller truncates (default 0)\n"
+"      -t truncbdy: 4096 would make truncates page aligned (default 1)\n"
+"      -w writebdy: 4096 would make writes page aligned (default 1)\n"
+"      -D startingop: debug output starting at specified operation\n"
+"      -L: fsxLite - no file creations & no file size changes\n"
+"      -N numops: total # operations to do (default infinity)\n"
+"      -O: use oplen (see -o flag) for every op (default random)\n"
+"      -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n"
+"      -S seed: for random # generator (default 1) 0 gets timestamp\n"
+"      -W: mapped write operations DISabled\n"
+"        -R: read() system calls only (mapped reads disabled)\n"
+"      fname: this filename is REQUIRED (no default)\n");
         exit(90);
  }
  
@@ -1020,8 +1021,8 @@ main(int argc, char **argv)
                 case 'b':
                         simulatedopcount = getnum(optarg, &endp);
                         if (!quiet)
-                               fprintf(stdout, "Will begin at operation
-%ld\n",
+                               fprintf(stdout, "Will begin at operation"
+                                       "%ld\n",
                                         simulatedopcount);
                         if (simulatedopcount == 0)
                                 usage();
@@ -1206,8 +1207,8 @@ main(int argc, char **argv)
                                 prterr(fname);
                                 warn("main: error on write");
                         } else
-                               warn("main: short write, 0x%x bytes instead
-of 0x%x\n",
+                               warn("main: short write, 0x%x bytes instead"
+                                       "of 0x%x\n",
                                      (unsigned)written, maxfilelen);
                         exit(98);
                 }
diff --git a/lustre/tests/leak_finder.pl b/lustre/tests/leak_finder.pl

index b8d234b..745f113 100644 (file)
--- a/lustre/tests/leak_finder.pl
+++ b/lustre/tests/leak_finder.pl
@@ -8,17 +8,21 @@ STDERR->autoflush(1);
  my ($line, $memory);
  my $debug_line = 0;
  
+my $total = 0;
+my $max = 0;
+
  while ($line = <>) {
      $debug_line++;
      my ($file, $func, $lno, $name, $size, $addr, $type);
-    if ($line =~ m/^.*\((.*):(\d+):(.*)\(\) (\d+ \| )?\d+\+\d+\): [vk](.*) '(.*)': (\d+) at (.*) \(tot .*$/) {
+    if ($line =~ m/^.*\((.*):(\d+):(.*)\(\) (\d+ \| )?\d+\+\d+\): (k|v|slab-)(.*) '(.*)': (\d+) at (.*) \(tot (.*)\).*$/) {
          $file = $1;
          $lno = $2;
          $func = $3;
-        $type = $5;
-        $name = $6;
-        $size = $7;
-        $addr = $8;
+        $type = $6;
+        $name = $7;
+        $size = $8;
+        $addr = $9;
+        $tot = $10;
  
         # we can't dump the log after portals has exited, so skip "leaks"
         # from memory freed in the portals module unloading.
@@ -31,13 +35,24 @@ while ($line = <>) {
          next;
      }
  
-    if ($type eq 'malloced') {
+    if (index($type, 'alloced') >= 0) {
+        if (defined($memory->{$addr})) {
+            print STDERR "*** Two allocs with the same address ($size bytes at $addr, $file:$func:$lno)\n";
+            print STDERR "    first malloc at $memory->{$addr}->{file}:$memory->{$addr}->{func}:$memory->{$addr}->{lno}, second at $file:$func:$lno\n";
+            next;
+        }
+
          $memory->{$addr}->{name} = $name;
          $memory->{$addr}->{size} = $size;
          $memory->{$addr}->{file} = $file;
          $memory->{$addr}->{func} = $func;
          $memory->{$addr}->{lno} = $lno;
          $memory->{$addr}->{debug_line} = $debug_line;
+
+        $total += $size;
+        if ($total > $max) {
+            $max = $total;
+        }
      } else {
          if (!defined($memory->{$addr})) {
              print STDERR "*** Free without malloc ($size bytes at $addr, $file:$func:$lno)\n";
@@ -52,6 +67,11 @@ while ($line = <>) {
          }
  
          delete $memory->{$addr};
+        $total -= $size;
+    }
+    if ($total != int($tot)) {
+        print "kernel total $tot != my total $total\n";
+        $total = $tot;
      }
  }
  
@@ -66,4 +86,4 @@ foreach $key (@sorted) {
      print STDERR "*** Leak: $memory->{$key}->{size} bytes allocated at $key ($memory->{$key}->{file}:$memory->{$key}->{func}:$memory->{$key}->{lno}, debug file line $memory->{$key}->{debug_line})\n";
  }
  
-print "Done.\n";
+print "maximum used: $max, amount leaked: $total\n";
diff --git a/lustre/tests/lkcdmap b/lustre/tests/lkcdmap

index 20c8c20..dbfd7f0 100755 (executable)
--- a/lustre/tests/lkcdmap
+++ b/lustre/tests/lkcdmap
@@ -4,10 +4,10 @@ LCMD=$TMP/lkcd-cmds-`hostname`
  echo "Storing LKCD module info in $LCMD"
  cat /tmp/ogdb-`hostname` | while read JUNK M JUNK; do
         MOD="../$M"
-       MAP=`echo $MOD | sed -e 's/\.o$/.map/'`
-       MODNAME=`basename $MOD | sed -e 's/\.o$//'`
+       MODNAME="`basename $MOD .o`"
+       MAP="$TMP/$MODNAME.map"
  
         nm $MOD > $MAP
         echo namelist -a $PWD/$MOD  | tee -a $LCMD
-       echo symtab -a $PWD/$MAP $MODNAME | tee -a $LCMD
+       echo symtab -a $MAP $MODNAME | tee -a $LCMD
  done
diff --git a/lustre/tests/llecho.sh b/lustre/tests/llecho.sh

index 5afade1..3e3e03b 100644 (file)
--- a/lustre/tests/llecho.sh
+++ b/lustre/tests/llecho.sh
@@ -1,6 +1,8 @@
  #!/bin/sh
  
-LCONF=${LCONF:-../utils/lconf}
+PATH=`dirname $0`/../utils:$PATH
+
+LCONF=${LCONF:-lconf}
  NAME=${NAME:-echo}
  
  config=$NAME.xml
@@ -17,5 +19,5 @@ $LCONF $lustre_opt --reformat --gdb $OPTS $config || exit 4
  cat <<EOF
  
  run getattr tests as:
-../utils/lctl --device '\$ECHO_$SERVER' test_getattr 1000000
+`dirname $0`../utils/lctl --device '\$ECHO_$SERVER' test_getattr 1000000
  EOF
diff --git a/lustre/tests/llmount.sh b/lustre/tests/llmount.sh

index 8e3b37b..d8f37c5 100755 (executable)
--- a/lustre/tests/llmount.sh
+++ b/lustre/tests/llmount.sh
@@ -30,5 +30,5 @@ if [ "$1" = "-v" ]; then
    verbose="-v"
  fi
  
-${LCONF} $portals_opt $lustre_opt $node_opt ${REFORMAT:---reformat} --gdb \
-    $verbose $conf_opt  || exit 2
+${LCONF} $portals_opt $lustre_opt $node_opt ${REFORMAT:---reformat} \
+    ${GDB:---gdb} $verbose $conf_opt  || exit 2
diff --git a/lustre/tests/local.sh b/lustre/tests/local.sh

index 25d05d2..00f2391 100755 (executable)
--- a/lustre/tests/local.sh
+++ b/lustre/tests/local.sh
@@ -7,12 +7,12 @@ config=${1:-local.xml}
  LMC="${LMC:-lmc} -m $config"
  TMP=${TMP:-/tmp}
  
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
  MDSSIZE=${MDSSIZE:-50000}
+FSTYPE=${FSTYPE:-ext3}
  
-OSTDEV=${OSTDEV:-$TMP/ost1}
+OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`}
  OSTSIZE=${OSTSIZE:-200000}
-FSTYPE=${FSTYPE:-ext3}
  
  rm -f $config
  
@@ -21,7 +21,7 @@ ${LMC} --add node --node localhost || exit 10
  ${LMC} --add net --node  localhost --nid localhost --nettype tcp || exit 11
  
  # configure mds server
-${LMC} --add mds --nspath /mnt/mds_ns  --node localhost --mds mds1  --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
+${LMC} --add mds --nspath /mnt/mds_ns  --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
  
  # configure ost
  ${LMC} --add ost --nspath /mnt/ost_ns --node localhost --ost ost1  --fstype $FSTYPE --dev $OSTDEV --size  $OSTSIZE || exit 30
diff --git a/lustre/tests/lov.sh b/lustre/tests/lov.sh

index 3956f9e..79e9590 100755 (executable)
--- a/lustre/tests/lov.sh
+++ b/lustre/tests/lov.sh
@@ -7,13 +7,16 @@ config=${1:-lov.xml}
  LMC=${LMC:-lmc}
  TMP=${TMP:-/tmp}
  
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
  MDSSIZE=${MDSSIZE:-50000}
+FSTYPE=${FSTYPE:-ext3}
  
-OSTDEV1=${OSTDEV1:-$TMP/ost1}
-OSTDEV2=${OSTDEV2:-$TMP/ost2}
-OSTDEV3=${OSTDEV3:-$TMP/ost3}
+OSTDEV1=${OSTDEV1:-$TMP/ost1-`hostname`}
+OSTDEV2=${OSTDEV2:-$TMP/ost2-`hostname`}
+OSTDEV3=${OSTDEV3:-$TMP/ost3-`hostname`}
  OSTSIZE=${OSTSIZE:-100000}
+# 1 to config an echo client instead of llite
+ECHO_CLIENT=${ECHO_CLIENT:-}
  
  STRIPE_BYTES=65536
  STRIPES_PER_OBJ=2      # 0 means stripe over all OSTs
@@ -22,13 +25,17 @@ STRIPES_PER_OBJ=2   # 0 means stripe over all OSTs
  ${LMC} -o $config --add net --node localhost --nid localhost --nettype tcp || exit 1
  
  # configure mds server
-${LMC} -m $config --format --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10
+${LMC} -m $config --format --add mds --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 10
  
  # configure ost
  ${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 20
-${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV1 --size $OSTSIZE || exit 21
-${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV2 --size $OSTSIZE || exit 22
-${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV3 --size $OSTSIZE || exit 23
-
-# create client config
-${LMC} -m $config  --add mtpt --node localhost --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
+${LMC} -m $config --add ost --node localhost --lov lov1 --fstype $FSTYPE --dev $OSTDEV1 --size $OSTSIZE || exit 21
+${LMC} -m $config --add ost --node localhost --lov lov1 --fstype $FSTYPE --dev $OSTDEV2 --size $OSTSIZE || exit 22
+${LMC} -m $config --add ost --node localhost --lov lov1 --fstype $FSTYPE --dev $OSTDEV3 --size $OSTSIZE || exit 23
+
+if [ -z "$ECHO_CLIENT" ]; then
+       # create client config
+       ${LMC} -m $config  --add mtpt --node localhost --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
+else
+       ${LMC} -m $config  --add echo_client --node localhost --ost lov1 || exit 31
+fi
diff --git a/lustre/tests/mount2.sh b/lustre/tests/mount2.sh

index 07de3ed..40ef46a 100644 (file)
--- a/lustre/tests/mount2.sh
+++ b/lustre/tests/mount2.sh
@@ -7,10 +7,11 @@ PATH=$SRCDIR:$SRCDIR/../utils:$PATH
  LMC="${LMC:-lmc} -m $config"
  TMP=${TMP:-/tmp}
  
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
  MDSSIZE=${MDSSIZE:-50000}
+FSTYPE=${FSTYPE:-ext3}
  
-OSTDEV=${OSTDEV:-$TMP/ost1}
+OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`}
  OSTSIZE=${OSTSIZE:-200000}
  
  rm -f $config
@@ -20,10 +21,10 @@ ${LMC} --add node --node localhost || exit 10
  ${LMC} --add net --node  localhost --nid localhost --nettype tcp || exit 11
  
  # configure mds server
-${LMC} --add mds  --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20
+${LMC} --add mds  --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
  
  # configure ost
-${LMC} --add ost --node localhost --ost ost1 --dev $OSTDEV --size  $OSTSIZE || exit 30
+${LMC} --add ost --node localhost --ost ost1 --fstype $FSTYPE --dev $OSTDEV --size  $OSTSIZE || exit 30
  
  # create client config
  ${LMC} --add mtpt --node localhost --path /mnt/lustre1 --mds mds1 --ost ost1 || exit 40
diff --git a/lustre/tests/mount2fs.sh b/lustre/tests/mount2fs.sh

index 27b570d..6d94362 100644 (file)
--- a/lustre/tests/mount2fs.sh
+++ b/lustre/tests/mount2fs.sh
@@ -7,12 +7,13 @@ config=${1-mds-bug.xml}
  LMC=${LMC-../utils/lmc}
  TMP=${TMP:-/tmp}
  
-MDSDEV=$TMP/mds1
-MDSDEV2=$TMP/mds2
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
+MDSDEV2=${MDSDEV:-$TMP/mds2-`hostname`}
  MDSSIZE=50000
+FSTYPE=${FSTYPE:-ext3}
  
-OSTDEV1=$TMP/ost1
-OSTDEV2=$TMP/ost2
+OSTDEV1=${OSTDEV1:-$TMP/ost1-`hostname`}
+OSTDEV2=${OSTDEV2:-$TMP/ost2-`hostname`}
  OSTSIZE=100000
  
  MDSNODE=uml1
@@ -25,19 +26,15 @@ ${LMC} -m $config --add net --node $OSTNODE --nid $OSTNODE --nettype tcp || exit
  ${LMC} -m $config --add net --node $CLIENT --nid $CLIENT --nettype tcp || exit 3
  
  # configure mds server
-${LMC} -m $config --format --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV --size $MDSSIZE ||exit 10
-${LMC} -m $config --format --add mds --node $MDSNODE --mds mds2 --dev $MDSDEV2 --size $MDSSIZE ||exit 10
+${LMC} -m $config --format --add mds --node $MDSNODE --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE ||exit 10
+${LMC} -m $config --format --add mds --node $MDSNODE --mds mds2 --fstype $FSTYPE --dev $MDSDEV2 --size $MDSSIZE ||exit 10
  
  # configure ost
  ${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20
  ${LMC} -m $config --add lov --lov lov2 --mds mds2 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20
-${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --dev $OSTDEV1 --size $OSTSIZE || exit 21
-${LMC} -m $config --add ost --node $OSTNODE --lov lov2 --dev $OSTDEV2 --size $OSTSIZE || exit 22
+${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --fstype $FSTYPE --dev $OSTDEV1 --size $OSTSIZE || exit 21
+${LMC} -m $config --add ost --node $OSTNODE --lov lov2 --fstype $FSTYPE --dev $OSTDEV2 --size $OSTSIZE || exit 22
  
  # create client config
  ${LMC} -m $config --add mtpt --node $CLIENT --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
  ${LMC} -m $config --add mtpt --node $CLIENT --path /mnt/lustre2 --mds mds2 --lov lov2 || exit 30
-
-
-
-
diff --git a/lustre/tests/opendevunlink.c b/lustre/tests/opendevunlink.c

index fde7d36..8250f96 100644 (file)
--- a/lustre/tests/opendevunlink.c
+++ b/lustre/tests/opendevunlink.c
@@ -11,6 +11,8 @@
  #include <sys/stat.h>
  #include <dirent.h>
  #include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
  
  int main(int argc, char **argv)
  {
@@ -34,7 +36,7 @@ int main(int argc, char **argv)
          fprintf(stderr, "creating special file %s\n", dname1);
          rc = mknod(dname1, 0777|S_IFIFO, 0);
          if (rc == -1) {
-                fprintf(stderr, "creating %s fails: %s\n", 
+                fprintf(stderr, "creating %s fails: %s\n",
                          dname1, strerror(errno));
                  exit(1);
          }
@@ -47,7 +49,7 @@ int main(int argc, char **argv)
                          dname1, strerror(errno));
                  exit(1);
          }
-        
+
          // doesn't matter if the two dirs are the same??
          fddev2 = open(dname2, O_RDONLY | O_NONBLOCK);
          if (fddev2 == -1) {
@@ -55,40 +57,38 @@ int main(int argc, char **argv)
                          dname2, strerror(errno));
                  exit(1);
          }
-        
+
          // delete the special file
          fprintf (stderr, "unlinking %s\n", dname1);
          rc = unlink(dname1);
          if (rc) {
-                fprintf(stderr, "unlink %s error: %s\n", 
+                fprintf(stderr, "unlink %s error: %s\n",
                          dname1, strerror(errno));
                  exit(1);
          }
  
-        if (access(dname2, F_OK) == 0){
+        if (access(dname2, F_OK) == 0) {
                  fprintf(stderr, "%s still exists\n", dname2);
                  exit(1);
          }
  
-        if (access(dname1, F_OK) == 0){
+        if (access(dname1, F_OK) == 0) {
                  fprintf(stderr, "%s still exists\n", dname1);
                  exit(1);
          }
  
          // fchmod one special file
          rc = fchmod (fddev1, 0777);
-        if(rc == -1)
-        {
-                fprintf(stderr, "fchmod unlinked special file %s fails: %s\n", 
+        if (rc == -1) {
+                fprintf(stderr, "fchmod unlinked special file %s fails: %s\n",
                          dname1, strerror(errno));
                  exit(1);
          }
-                
+
          // fstat two files to check if they are the same
          rc = fstat(fddev1, &st1);
-        if(rc == -1)
-        {
-                fprintf(stderr, "fstat unlinked special file %s fails: %s\n", 
+        if (rc == -1) {
+                fprintf(stderr, "fstat unlinked special file %s fails: %s\n",
                          dname1, strerror(errno));
                  exit(1);
          }
@@ -103,7 +103,7 @@ int main(int argc, char **argv)
          if (st1.st_mode != st2.st_mode) {  // can we do this?
                  fprintf(stderr, "fstat different value on %s and %s\n",                                 dname1, dname2);
                  exit(1);
-        }        
+        }
  
          fprintf(stderr, "Ok, everything goes well.\n");
          return 0;
diff --git a/lustre/tests/openfile.c b/lustre/tests/openfile.c

index 7d8cc6b..7b97309 100644 (file)
--- a/lustre/tests/openfile.c
+++ b/lustre/tests/openfile.c
@@ -18,8 +18,8 @@
  #include <unistd.h>
  
  typedef struct flag_mapping {
-       char string[20];
-       int  flag;
+       const char *string;
+       const int  flag;
  } FLAG_MAPPING;
  
  FLAG_MAPPING flag_table[] = {
@@ -67,13 +67,13 @@ int main(int argc, char** argv)
                  case 'f': {
                          char *tmp;
  
-                        cloned_flags = (char *)malloc(strlen(optarg));
+                        cloned_flags = (char *)malloc(strlen(optarg)+1);
                          if (cloned_flags == NULL) {
                                  fprintf(stderr, "Insufficient memory.\n");
                                  exit(-1);
                          }
  
-                        strncpy(cloned_flags, optarg, strlen(optarg));
+                        strncpy(cloned_flags, optarg, strlen(optarg)+1);
                          for (tmp = strtok(optarg, ":|"); tmp;
                               tmp = strtok(NULL, ":|")) {
                                  int i = 0;
diff --git a/lustre/tests/openunlink.c b/lustre/tests/openunlink.c

index e7671c8..96632a9 100644 (file)
--- a/lustre/tests/openunlink.c
+++ b/lustre/tests/openunlink.c
@@ -3,16 +3,18 @@
  #include <string.h>
  #include <errno.h>
  #include <sys/types.h>
+#include <sys/stat.h>
  #include <stdlib.h>
  #include <unistd.h>
  
-#define T1 "write before unlink\n"
-#define T2 "write after unlink\n"
+#define T1 "write data before unlink\n"
+#define T2 "write data after unlink\n"
  char buf[128];
  
  int main(int argc, char **argv)
  {
-       char *fname, *fname2;
+        char *fname, *fname2;
+        struct stat st;
          int fd, rc;
  
          if (argc < 2 || argc > 3) {
@@ -20,11 +22,11 @@ int main(int argc, char **argv)
                  exit(1);
          }
  
-       fname = argv[1];
-       if (argc == 3)
-               fname2 = argv[2];
-       else
-               fname2 = argv[1];
+        fname = argv[1];
+        if (argc == 3)
+                fname2 = argv[2];
+        else
+                fname2 = argv[1];
  
          fprintf(stderr, "opening\n");
          fd = open(fname, O_RDWR | O_TRUNC | O_CREAT, 0644);
@@ -36,50 +38,67 @@ int main(int argc, char **argv)
          fprintf(stderr, "writing\n");
          rc = write(fd, T1, strlen(T1) + 1);
          if (rc != strlen(T1) + 1) {
-                fprintf(stderr, "write (normal) %s\n", strerror(errno));
+                fprintf(stderr, "write (normal) %s (rc %d)\n",
+                        strerror(errno), rc);
+                exit(1);
+        }
+
+        if (argc == 3) {
+                fprintf(stderr, "closing %s\n", fname);
+                rc = close(fd);
+                if (rc) {
+                        fprintf(stderr, "close (normal) %s\n", strerror(errno));
+                        exit(1);
+                }
+
+                fprintf(stderr, "opening %s\n", fname2);
+                fd = open(fname2, O_RDWR);
+                if (fd == -1) {
+                        fprintf(stderr, "open (unlink) %s\n", strerror(errno));
+                        exit(1);
+                }
+
+                fprintf (stderr, "unlinking %s\n", fname2);
+                rc = unlink(fname2);
+                if (rc) {
+                        fprintf(stderr, "unlink %s\n", strerror(errno));
+                        exit(1);
+                }
+
+                if (access(fname2, F_OK) == 0) {
+                        fprintf(stderr, "%s still exists\n", fname2);
+                        exit(1);
+                }
+        } else {
+                fprintf(stderr, "resetting fd offset\n");
+                rc = lseek(fd, 0, SEEK_SET);
+                if (rc) {
+                        fprintf(stderr, "seek %s\n", strerror(errno));
+                        exit(1);
+                }
+
+                printf("unlink %s and press enter\n", fname);
+                getc(stdin);
+        }
+
+        if (access(fname, F_OK) == 0) {
+                fprintf(stderr, "%s still exists\n", fname);
                  exit(1);
          }
  
-       if (argc == 3) {
-               fprintf(stderr, "closing %s\n", fname);
-               rc = close(fd);
-               if (rc) {
-                       fprintf(stderr, "close (normal) %s\n", strerror(errno));
-                       exit(1);
-               }
-
-               fprintf(stderr, "opening %s\n", fname2);
-               fd = open(fname2, O_RDWR);
-               if (fd == -1) {
-                       fprintf(stderr, "open (unlink) %s\n", strerror(errno));
-                       exit(1);
-               }
-
-               fprintf (stderr, "unlinking %s\n", fname2);
-               rc = unlink(fname2);
-               if (rc) {
-                       fprintf(stderr, "unlink %s\n", strerror(errno));
-                       exit(1);
-               }
-
-               if (access(fname2, F_OK) == 0) {
-                       fprintf(stderr, "%s still exists\n", fname2);
-                       exit(1);
-               }
-       } else {
-               printf("unlink %s and press enter\n", fname);
-               getc(stdin);
-       }
-
-       if (access(fname, F_OK) == 0) {
-               fprintf(stderr, "%s still exists\n", fname);
-               exit(1);
-       }
+        fprintf(stderr, "fstating\n");
+        rc = fstat(fd, &st);
+        if (rc) {
+                fprintf(stderr, "fstat (unlink) %s\n", strerror(errno));
+                exit(1);
+        }
+        if (st.st_nlink != 0)
+                fprintf(stderr, "st_nlink = %d\n", (int)st.st_nlink);
  
          fprintf(stderr, "reading\n");
          rc = read(fd, buf, strlen(T1) + 1);
          if (rc != strlen(T1) + 1) {
-                fprintf(stderr, "read (unlink) %s rc %d\n",
+                fprintf(stderr, "read (unlink) %s (rc %d)\n",
                          strerror(errno), rc);
                  exit(1);
          }
@@ -92,7 +111,7 @@ int main(int argc, char **argv)
  
          fprintf(stderr, "truncating\n");
          rc = ftruncate(fd, 0);
-        if (rc ) {
+        if (rc) {
                  fprintf(stderr, "truncate (unlink) %s\n", strerror(errno));
                  exit(1);
          }
@@ -124,8 +143,8 @@ int main(int argc, char **argv)
          fprintf(stderr, "reading again\n");
          rc = read(fd, buf, strlen(T2) + 1);
          if (rc != strlen(T2) + 1) {
-                fprintf(stderr, "read (after unlink rewrite) %s\n",
-                        strerror(errno));
+                fprintf(stderr, "read (after unlink rewrite) %s (rc %d)\n",
+                        strerror(errno), rc);
                  exit(1);
          }
  
@@ -135,7 +154,7 @@ int main(int argc, char **argv)
                  exit(1);
          }
  
-        fprintf(stderr, "closing again\n");
+        fprintf(stderr, "closing\n");
          rc = close(fd);
          if (rc) {
                  fprintf(stderr, "close (unlink) %s\n", strerror(errno));
diff --git a/lustre/tests/recovery-cleanup.sh b/lustre/tests/recovery-cleanup.sh

index c8f85ee..fefd2d6 100755 (executable)
--- a/lustre/tests/recovery-cleanup.sh
+++ b/lustre/tests/recovery-cleanup.sh
@@ -22,9 +22,10 @@ CLIENT=${CLIENT:-mdev8}
  NETWORKTYPE=${NETWORKTYPE:-tcp}
  MOUNTPT=${MOUNTPT:-/mnt/lustre}
  CONFIG=${CONFIG:-recovery-cleanup.xml}
-MDSDEV=${MDSDEV:-/tmp/mds}
-OSTDEV=${OSTDEV:-/tmp/ost}
+MDSDEV=${MDSDEV:-/tmp/mds-`hostname`}
  MDSSIZE=${MDSSIZE:-100000}
+FSTYPE=${FSTYPE:-ext3}
+OSTDEV=${OSTDEV:-/tmp/ost-`hostname`}
  OSTSIZE=${OSTSIZE:-100000}
  
  do_mds() {
@@ -51,10 +52,10 @@ make_config() {
         lmc -m $CONFIG --add net --node $NODE --nid `h2$NETWORKTYPE $NODE` \
             --nettype $NETWORKTYPE || exit 4
      done
-    lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV \
-        --size $MDSSIZE || exit 5
-    lmc -m $CONFIG --add ost --node $OSTNODE --ost ost1 --dev $OSTDEV \
-        --size $OSTSIZE || exit 6
+    lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --fstype $FSTYPE \
+       --dev $MDSDEV --size $MDSSIZE || exit 5
+    lmc -m $CONFIG --add ost --node $OSTNODE --ost ost1 --fstype $FSTYPE \
+       --dev $OSTDEV --size $OSTSIZE || exit 6
      lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --mds mds1 \
          --ost ost1 || exit 7
  }
diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh

index ebf0a0c..bc6a9c1 100755 (executable)
--- a/lustre/tests/recovery-small.sh
+++ b/lustre/tests/recovery-small.sh
@@ -25,9 +25,9 @@ CLIENT=${CLIENT:-mdev8}
  NETWORKTYPE=${NETWORKTYPE:-tcp}
  MOUNTPT=${MOUNTPT:-/mnt/lustre}
  CONFIG=${CONFIG:-recovery-small.xml}
-MDSDEV=${MDSDEV:-/tmp/mds}
-OSTDEV=${OSTDEV:-/tmp/ost}
+MDSDEV=${MDSDEV:-/tmp/mds-`hostname`}
  MDSSIZE=${MDSSIZE:-100000}
+OSTDEV=${OSTDEV:-/tmp/ost-`hostname`}
  OSTSIZE=${OSTSIZE:-100000}
  UPCALL=${UPCALL:-$RPWD/recovery-small-upcall.sh}
  FSTYPE=${FSTYPE:-ext3}
diff --git a/lustre/tests/rename.pl b/lustre/tests/rename.pl

index 3ba9368..4ea020f 100644 (file)
--- a/lustre/tests/rename.pl
+++ b/lustre/tests/rename.pl
@@ -1,78 +1,202 @@
-#!/usr/bin/perl
+#!/usr/bin/perl -w
  use strict;
+$|++;
+
+$ENV{PATH}="/bin:/usr/bin";
+$ENV{ENV}="";
+$ENV{BASH_ENV}="";
+
  use diagnostics;
  use Getopt::Long;
+use POSIX ":sys_wait_h";
  
-sub usage () {
-    print "Usage: $0 <mount point prefix> <iterations>\n";
-    print "example: $0 --count=2 /mnt/lustre 50\n";
-    print "         will test in /mnt/lustre1 and /mnt/lustre2\n";
-    print "         $0 --count=0 /mnt/lustre 50\n";
-    print "         will test in /mnt/lustre only\n";
-    exit;
-}
-my ($j, $k, $d, $f1, $f2, $path, $silent);
-my $count = 0;
-my $create = 10;
+use vars qw(
+            $MAX_THREADS
+            );
+ 
+# Don't try to run more than this many threads concurrently.
+$MAX_THREADS = 16;
+
+# Initialize variables
+my $silent = 0;
+my $create_files = 1; # should we create files or not?
+my $use_mcreate = 1;  # should we use mcreate or open?
+my $num_dirs = 3;     # number of directories to create
+my $num_files = 6;    # number of files to create
+my $iterations = 1;
+my $num_threads = 1;
+my $mountpt;
+my $num_mounts = -1;
  
  GetOptions("silent!"=> \$silent,
-           "count=i" => \$count,
-           "create=i" => \$create);
+          "use_mcreate=i" => \$use_mcreate,
+           "create_files=i" => \$create_files,
+          "use_mcreate=i" => \$use_mcreate,
+          "num_files=i" => \$num_files,
+          "num_dirs=i" => \$num_dirs,
+          "mountpt=s" => \$mountpt,
+           "num_mounts=i" => \$num_mounts,
+          "iterations=i" => \$iterations,
+           "num_threads=i" => \$num_threads,
+           ) || die &usage;
  
-my $mtpt = shift || usage();
-my $i = shift || usage();
-my $total = $i;
-my $files = 6;
-my $dirs = 3;
-my $mcreate = 0; # should we use mcreate or open?
+# Check for mandatory args.
+if (!$mountpt ||
+    !$num_mounts) {
+    die &usage;
+}
  
-my $which = "";
-if ($count > 0) {
-    $which = int(rand() * $count) + 1;
+if ($num_threads > $MAX_THREADS) {
+    print "\nMAX_THREADS is currently set to $MAX_THREADS.\n\n";
+    print "You will have to change this in the source\n";
+    print "if you really want to run with $num_threads threads.\n\n";
+    exit 1;
  }
  
-$k = $dirs;
-if ($create == 0) {
-    $k = 0;
+# Initialize rand() function.
+srand (time ^ $$ ^ unpack "%L*", `ps axww | gzip`);
+
+#########################################################################
+### MAIN
+
+my $which = "";
+if ($num_mounts > 0) {
+    $which = int(rand() * $num_mounts) + 1;
  }
-while ($k--) {
-    $path = "$mtpt$which/$k";
-    my $rc = mkdir $path, 0755;
-    print "mkdir $path failed: $!\n" if !$rc;
-    $j = $files;
-    while ($j--) {
-        `./mcreate $path/$j`;
+
+# Create files and directories (if necessary)
+if ($create_files) {
+    for (my $i=1; $i<=$num_threads;$i++) {
+       for (my $j=0; $j<$num_dirs;$j++) {
+           my $path = "${mountpt}${which}/${i}.${j}";
+           mkdir $path, 0755 || die "Can't mkdir $path: $!\n";
+           for (my $k=0; $k<$num_files; $k++) {
+               my $filepath = "${path}/${k}";
+               &create_file($filepath);
+               if (! -e $filepath) {
+                   die "Error creating $filepath\n";
+               }
+           }
+       }
      }
  }
  
-while ($i--) {
-    my $which = "";
-    if ($count > 0) {
-        $which = int(rand() * $count) + 1;
-    }
-    $d = int(rand() * $dirs);
-    $f1 = int(rand() * $files);
-    $f2 = int(rand() * $files);
-    print "[$$] $mtpt$which/$d/$f1 $mtpt$which/$d/$f2 ...\n" if !$silent;
-    my $rc = rename "$mtpt$which/$d/$f1", "$mtpt$which/$d/$f2";
-    print "[$$] done: $rc\n" if !$silent;
-    if (($total - $i) % 100 == 0) {
-        print STDERR "[" . $$ . "]" . ($total - $i) . " operations\n";
+for (my $i=1; $i<=$num_threads; $i++) {
+    my $status = &fork_and_rename($i);
+    last if ($status != 0);
+}
+
+# Wait for all our threads to finish.
+# Wait for all our threads to finish.
+my $child = 0;
+do {
+    $child = waitpid(-1, WNOHANG);
+} until $child > 0;
+sleep 1;
+
+# Unlink files and directories (if necessary)
+if ($create_files) {
+    for (my $i=1; $i<=$num_threads;$i++) {
+       for (my $j=0; $j<$num_dirs;$j++) {
+           my $path = "${mountpt}${which}/${i}.${j}";
+           for (my $k=0; $k<=$num_files; $k++) {
+               my $filepath = "${path}/${k}";
+               unlink("$filepath") if (-e $filepath);
+           }
+           my $rc = rmdir $path;
+           print "rmdir $path failed: $!\n" if !$rc;       
+       }
      }
  }
  
-$k = $dirs;
-if ($create == 0) {
-    $k = 0;
+exit 0;
+
+#########################################################################
+### SUBROUTINES
+
+sub usage () {
+    print "\nUsage: $0 [--silent] [--create_files=n] [--use_mcreate=n] [--num_dirs=n] [--num_files=n] [--iterations=n] [--num_threads=n] --num_mounts=n --mountpt=/path/to/lustre/mount\n\n";
+    print "\t--silent\tminimal output\n";
+    print "\t--create_files=n\create files at start, default=1 (yes)\n";
+    print "\t--use_mcreate=n\tuse mcreate to create files, default=1 (yes)\n";
+    print "\t--num_dirs=n\tnumber of directories to create per iteration, default=3\n";
+    print "\t--num_files=n\tnumber of files to create per directory, default=6\n";
+    print "\t--iterations=n\tnumber of iterations to perform, default=1\n";
+    print "\t--num_threads=n\tnumber of thread to run, default=1\n";
+    print "\t--mountpt\tlocation of lustre mount\n";
+    print "\t--num_mounts=n\tnumber of lustre mounts to test across, default=-1 (single mount point without numeric suffix)\n\n";
+    print "example: $0 --mountpt=/mnt/lustre --num_mounts=2 --iterations=50\n";
+    print "         will perform 50 interations in /mnt/lustre1 and /mnt/lustre2\n";
+    print "         $0 --mountpt=/mnt/lustre --num_mounts=-1 --iterations=50\n";
+    print "         will perform 50 iterations in /mnt/lustre only\n\n";
+    exit;
  }
-while ($k--) {
-    $path = "$mtpt$which/$k";
-    $j = $files;
-    while ($j--) {
-        unlink "$path/$j";
+
+
+#########################################################################
+sub create_file ($) {
+    my ($path) = @_;;
+    
+    if ($use_mcreate) {
+        my $tmp = `./mcreate $path`;
+       if ($tmp =~ /.*error: (.*)\n/) {
+           die "Error mcreating $path: $!\n";
+       }
+    } else {
+        open(FH, ">$path") || die "Error opening $path: $!\n";
+        close(FH) || die;
      }
-    my $rc = rmdir $path;
-    print "rmdir $path failed: $!\n" if !$rc;
+    return 0;
  }
  
-print "Done.\n";
+#########################################################################
+sub fork_and_rename ($) {
+    my ($thread_num) = @_;
+    
+  FORK: {
+      if (my $pid = fork) {
+          # parent here
+          # child process pid is available in $pid
+         return 0;
+      } elsif (defined $pid) { # $pid is zero here if defined
+         
+         my $current_iteration=1;
+          while ($current_iteration <= $iterations) {
+             for (my $i=0; $i<$num_files; $i++) {
+                 my $which = "";
+                 if ($num_mounts > 0) {
+                     $which = int(rand() * $num_mounts) + 1;
+                 }
+                 
+                 my $d = int(rand() * $num_dirs);
+                 my $f1 = int(rand() * $num_files);
+                 my $f2 = int(rand() * $num_files);
+                 my $path_f1 = "${mountpt}${which}/${thread_num}.${d}/${f1}";
+                 my $path_f2 = "${mountpt}${which}/${thread_num}.${d}/${f2}";
+                 
+                 print "Thread $thread_num: [$$] $path_f1 $path_f2 ...\n" if !$silent;
+                 my $rc = rename $path_f1, $path_f2;
+                 print "Thread $thread_num: [$$] done: $rc\n" if !$silent;
+             }
+             if (($current_iteration) % 100 == 0) {
+                 print STDERR "Thread $thread_num: " . $current_iteration . " operations [" . $$ . "]\n";
+                 
+             }
+             $current_iteration++;
+         }
+
+         print "Thread $thread_num: Done.\n";
+
+         exit 0;
+
+      } elsif ($! =~ /No more process/) {
+          # EAGAIN, supposedly recoverable fork error
+          sleep 5;
+          redo FORK;
+      } else {
+          # weird fork error
+          die "Can't fork: $!\n";
+      }
+  }
+    
+}
diff --git a/lustre/tests/runas.c b/lustre/tests/runas.c

index 20981e8..1e859aa 100644 (file)
--- a/lustre/tests/runas.c
+++ b/lustre/tests/runas.c
@@ -7,40 +7,39 @@
  #include <string.h>
  #include <errno.h>
  #include <sys/types.h>
+#include <grp.h>
  #include <sys/wait.h>
  
  #define DEBUG 0
  
-void Usage_and_abort(void)
+static const char usage[] =
+"Usage: %s -u user_id [-g grp_id ] [ -G ] command\n"
+"  -u user_id      switch to UID user_id\n"
+"  -g grp_id       switch to GID grp_id\n"
+"  -G              clear supplementary groups\n";
+
+void Usage_and_abort(const char *name)
  {
-       fprintf(stderr, "Usage: runas -u user_id [ -g grp_id ]"
-               " command_to_be_run \n");
-       exit(-1);
+        fprintf(stderr, usage, name);
+        exit(-1);
  }
  
-// Usage: runas -u user_id [ -g grp_id ] [--] command_to_be_run
-// return: the return value of "command_to_be_run"
-// NOTE: returning -1 might be the return code of this program itself or
-// the "command_to_be_run"
-
-// ROOT runs "runas" for free
-// Other users run "runas" requires  chmod 6755 "command_to_be_run"
-
  int main(int argc, char **argv)
  {
-        char **my_argv;
+        char **my_argv, *name = argv[0];
          int status;
          int c,i;
          int gid_is_set = 0;
          int uid_is_set = 0;
+        int clear_supp_groups = 0;
          uid_t user_id;
          gid_t grp_id;
  
          if (argc == 1)
-                Usage_and_abort();
+                Usage_and_abort(name);
  
          // get UID and GID
-        while ((c = getopt (argc, argv, "+u:g:h")) != -1) {
+        while ((c = getopt (argc, argv, "+u:g:hG")) != -1) {
                  switch (c) {
                  case 'u':
                          user_id = (uid_t)atoi(optarg);
@@ -54,23 +53,23 @@ int main(int argc, char **argv)
                          gid_is_set = 1;
                          break;
  
-                case 'h':
-                        Usage_and_abort();
+                case 'G':
+                        clear_supp_groups = 1;
                          break;
  
                  default:
-                        //fprintf(stderr, "Bad parameters.\n");
-                        //Usage_and_abort ();
+                case 'h':
+                        Usage_and_abort(name);
                          break;
                  }
          }
  
          if (!uid_is_set)
-                Usage_and_abort();
+                Usage_and_abort(name);
  
          if (optind == argc) {
-                fprintf(stderr, "Bad parameters.\n");
-                Usage_and_abort();
+                fputs("Must specify command to run.\n", stderr);
+                Usage_and_abort(name);
          }
  
          // assemble the command
@@ -99,6 +98,14 @@ int main(int argc, char **argv)
                   exit(-1);
          }
  
+        if (clear_supp_groups) {
+                status = setgroups(0, NULL);
+                if (status == -1) {
+                        perror("clearing supplementary groups");
+                        exit(-1);
+                }
+        }
+        
          // set UID
          status = setreuid(user_id, user_id );
          if(status == -1) {
@@ -107,8 +114,8 @@ int main(int argc, char **argv)
                    exit(-1);
          }
  
-
-        fprintf(stderr, "running as USER(%d), Grp (%d):  ", user_id, grp_id );
+        fprintf(stderr, "running as UID %d, GID %d%s:", user_id, grp_id,
+                clear_supp_groups ? ", cleared groups" : "");
  
          for (i = 0; i < argc - optind; i++)
                   fprintf(stderr, " [%s]", my_argv[i]);
diff --git a/lustre/tests/rundbench b/lustre/tests/rundbench

index cb417d2..821ac46 100755 (executable)
--- a/lustre/tests/rundbench
+++ b/lustre/tests/rundbench
@@ -1,6 +1,6 @@
  #!/bin/sh
-
-DIR=${DIR:-/mnt/lustre/`hostname`}
+MNT=${MNT:-/mnt/lustre}
+DIR=${DIR:-$MNT/`hostname`}
  #[ -e /proc/sys/portals/debug ] && echo 0 > /proc/sys/portals/debug 
  mkdir -p $DIR
  TGT=$DIR/client.txt
diff --git a/lustre/tests/runobdstat b/lustre/tests/runobdstat

index 886ce8f..ad60d6d 100644 (file)
--- a/lustre/tests/runobdstat
+++ b/lustre/tests/runobdstat
@@ -1,7 +1,7 @@
  #!/bin/sh
  PATH=`dirname $0`/../utils:$PATH
  
-obdstat filter 1 | while read LINE; do
+llobdstat.pl $1 1 | while read LINE; do
         echo "`date +s`: $LINE"
-       [ "$1" ] && echo "`date +s`: $LINE" >> $1
+       [ "$2" ] && echo "`date +s`: $LINE" >> $2
  done
diff --git a/lustre/tests/runregression-brw.sh b/lustre/tests/runregression-brw.sh

index 4d86248..395ceb5 100644 (file)
--- a/lustre/tests/runregression-brw.sh
+++ b/lustre/tests/runregression-brw.sh
@@ -1,6 +1,6 @@
  #!/bin/sh
  SRCDIR="`dirname $0`/"
-export PATH=/sbin:/usr/sbin:$SRCDIR:$PATH
+export PATH=/sbin:/usr/sbin:$SRCDIR/../utils:$PATH
  
  LOOPS=${LOOPS:-1}
  COUNT=${COUNT:-1000000}
diff --git a/lustre/tests/runtests b/lustre/tests/runtests

index e59f5f4..6a8aac8 100755 (executable)
--- a/lustre/tests/runtests
+++ b/lustre/tests/runtests
@@ -35,41 +35,42 @@ while [ "$1" ]; do
         shift
  done
  
-OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
-if [ -z "$OSCMT" ]; then
+MOUNT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
+if [ -z "$MOUNT" ]; then
         sh llmount.sh
-       OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
-       [ -z "$OSCMT" ] && fail "no lustre filesystem mounted" 1
+       MOUNT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
+       [ -z "$MOUNT" ] && fail "no lustre filesystem mounted" 1
         I_MOUNTED="yes"
  fi
  
-OSCTMP=`echo $OSCMT | tr "/" "."`
+OSCTMP=`echo $MOUNT | tr "/" "."`
  USED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
  USED=`expr $USED + 16` # Some space for the status file
  
  # let's start slowly here...
-log "touching $OSCMT"
-touch $OSCMT || fail "can't touch $OSCMT" 2
-HOSTS=$OSCMT/hosts.$$
-
-# this will cause the following cp to trigger bug #620096
-log "create an empty file $HOSTS"
-mcreate $HOSTS
-
-log "copying /etc/hosts to $HOSTS"
-cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS" 3
-log "comparing /etc/hosts and $HOSTS"
-diff -u /etc/hosts $HOSTS || fail "$HOSTS different" 4
-log "renaming $HOSTS to $HOSTS.ren"
-mv $HOSTS $HOSTS.ren || fail "can't rename $HOSTS to $HOSTS.ren" 5
-log "copying /etc/hosts to $HOSTS again"
-cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS again" 6
-log "truncating $HOSTS"
-> $HOSTS || fail "can't truncate $HOSTS" 8
-log "removing $HOSTS"
-rm $HOSTS || fail "can't remove $HOSTS" 9
-
-DST=$OSCMT/runtest.$$
+log "touching $MOUNT"
+touch $MOUNT || fail "can't touch $MOUNT" 2
+HOSTS=$MOUNT/hosts.$$
+
+if [ $COUNT -gt 10 -o $COUNT -eq 0 ]; then
+       # this will cause the following cp to trigger bug #620096
+       log "create an empty file $HOSTS"
+       mcreate $HOSTS
+       log "copying /etc/hosts to $HOSTS"
+       cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS" 3
+       log "comparing /etc/hosts and $HOSTS"
+       diff -u /etc/hosts $HOSTS || fail "$HOSTS different" 4
+       log "renaming $HOSTS to $HOSTS.ren"
+       mv $HOSTS $HOSTS.ren || fail "can't rename $HOSTS to $HOSTS.ren" 5
+       log "copying /etc/hosts to $HOSTS again"
+       cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS again" 6
+       log "truncating $HOSTS"
+       > $HOSTS || fail "can't truncate $HOSTS" 8
+       log "removing $HOSTS"
+       rm $HOSTS || fail "can't remove $HOSTS" 9
+fi
+
+DST=$MOUNT/runtest.$$
  # let's start slowly here...
  log "creating $DST"
  mkdir $DST || fail "can't mkdir $DST" 10
@@ -102,27 +103,29 @@ done
  sh llmountcleanup.sh || exit 19
  sh llrmount.sh || exit 20
  
-log "renaming $HOSTS.ren to $HOSTS"
-mv $HOSTS.ren $HOSTS || fail "can't rename $HOSTS.ren to $HOSTS" 32
-log "truncating $HOSTS"
-> $HOSTS || fail "can't truncate $HOSTS" 34
-log "removing $HOSTS"
-rm $HOSTS || fail "can't remove $HOSTS again" 36
  log "removing $DST"
  rm -r $V $DST || fail "can't remove $DST" 37
  
+if [ $COUNT -gt 10 -o $COUNT -eq 0 ]; then
+       log "renaming $HOSTS.ren to $HOSTS"
+       mv $HOSTS.ren $HOSTS || fail "can't rename $HOSTS.ren to $HOSTS" 32
+       log "truncating $HOSTS"
+       > $HOSTS || fail "can't truncate $HOSTS" 34
+       log "removing $HOSTS"
+       rm $HOSTS || fail "can't remove $HOSTS again" 36
+fi
+
  # mkdirmany test (bug 589)
-log "running mkdirmany $OSCMT/base$$ 100"
-$MKDIRMANY $OSCMT/base$$ 100 || fail "mkdirmany failed"
+log "running mkdirmany $MOUNT/base$$ 100"
+$MKDIRMANY $MOUNT/base$$ 100 || fail "mkdirmany failed"
  log "removing mkdirmany directories"
-rmdir $OSCMT/base$$* || fail "mkdirmany cleanup failed"
+rmdir $MOUNT/base$$* || fail "mkdirmany cleanup failed"
  
  log "done"
  
  NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
-if [ $NOWUSED -gt $USED ]; then
+if [ `expr $NOWUSED - $USED` -gt 1024 ]; then
         echo "Space not all freed: now ${NOWUSED}kB, was ${USED}kB." 1>&2
-       echo "This is normal on BA OSTs, because of subdirectories." 1>&2
  fi
  
  if [ "$I_MOUNTED" = "yes" ]; then
diff --git a/lustre/tests/runvmstat b/lustre/tests/runvmstat

index b04d84c..f414ccc 100755 (executable)
--- a/lustre/tests/runvmstat
+++ b/lustre/tests/runvmstat
@@ -1,6 +1,6 @@
  #!/bin/sh
  vmstat 1 | while read LINE ; do
         LINE="`date +%s`: $LINE"
-       echo $LINE
-       [ "$1" ] && echo $LINE >> $1
+       echo "$LINE"
+       [ "$1" ] && echo "$LINE" >> $1
  done
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh

index 46d0072..09eb8e9 100644 (file)
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -7,17 +7,19 @@
  set -e
  
  ONLY=${ONLY:-"$*"}
-ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"34 35"}        # bugs 1365 and 1360 respectively
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"35 32q 37 39"} # bugs 1360, 1504
  
  SRCDIR=`dirname $0`
  PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
  
-CHECKSTAT=${CHECKSTAT:-"./checkstat -v"}
+CHECKSTAT=${CHECKSTAT:-"checkstat -v"}
  CREATETEST=${CREATETEST:-createtest}
  LFIND=${LFIND:-lfind}
  LSTRIPE=${LSTRIPE:-lstripe}
  LCTL=${LCTL:-lctl}
  MCREATE=${MCREATE:-mcreate}
+OPENFILE=${OPENFILE:-openfile}
+OPENUNLINK=${OPENUNLINK:-openunlink}
  TOEXCL=${TOEXCL:-toexcl}
  TRUNCATE=${TRUNCATE:-truncate}
  
@@ -29,22 +31,20 @@ else
         RUNAS=${RUNAS:-"runas -u $RUNAS_ID"}
  fi
  
-MOUNT=${MOUNT:-/mnt/lustre}
-DIR=${DIR:-$MOUNT}
-export NAME=$NAME
+export NAME=${NAME:-local}
  
  SAVE_PWD=$PWD
  
  clean() {
-        echo -n "cln.."
-        sh llmountcleanup.sh > /dev/null || exit 20
+       echo -n "cln.."
+       sh llmountcleanup.sh > /dev/null || exit 20
  }
-
  CLEAN=${CLEAN:-clean}
+
  start() {
-        echo -n "mnt.."
-        sh llrmount.sh > /dev/null || exit 10
-        echo "done"
+       echo -n "mnt.."
+       sh llrmount.sh > /dev/null || exit 10
+       echo "done"
  }
  START=${START:-start}
  
@@ -54,7 +54,7 @@ log() {
  }
  
  run_one() {
-       if ! mount | grep -q $MOUNT; then
+       if ! mount | grep -q $DIR; then
                 $START
         fi
         log "== test $1: $2"
@@ -87,23 +87,33 @@ run_test() {
  }
  
  error() { 
-    echo FAIL
-    exit 1
+       echo "FAIL: $@"
+       exit 1
  }
  
  pass() { 
-    echo PASS
+       echo PASS
  }
  
-if ! mount | grep $MOUNT; then
+MOUNT="`mount | awk '/^'$NAME' .* lustre_lite / { print $3 }'`"
+if [ -z "$MOUNT" ]; then
         sh llmount.sh
+       MOUNT="`mount | awk '/^'$NAME' .* lustre_lite / { print $3 }'`"
+       [ -z "$MOUNT" ] && error "NAME=$NAME not mounted"
         I_MOUNTED=yes
  fi
  
+[ `echo $MOUNT | wc -w` -gt 1 ] && error "NAME=$NAME mounted more than once"
+
+DIR=${DIR:-$MOUNT}
+[ -z "`echo $DIR | grep $MOUNT`" ] && echo "$DIR not in $MOUNT" && exit 99
+
+rm -rf $DIR/[Rdfs][1-9]*
+
  echo preparing for tests involving mounts
-EXT2_DEV=/tmp/SANITY.LOOP
-dd if=/dev/zero of=$EXT2_DEV bs=1k seek=1000 count=1 > /dev/null
-mke2fs -F $EXT2_DEV > /dev/null
+EXT2_DEV=${EXT2_DEV:-/tmp/SANITY.LOOP}
+touch $EXT2_DEV
+mke2fs -F $EXT2_DEV 1000 > /dev/null
  
  test_0() {
         touch $DIR/f
@@ -178,12 +188,49 @@ test_5() {
  }
  run_test 5 "mkdir .../d5 .../d5/d2; chmod .../d5/d2 ============"
  
-test_6() {
-       touch $DIR/f6
-       chmod 0666 $DIR/f6
-       $CHECKSTAT -t file -p 0666 $DIR/f6 || error
+test_6a() {
+       touch $DIR/f6a
+       chmod 0666 $DIR/f6a || error
+       $CHECKSTAT -t file -p 0666 -u \#$UID $DIR/f6a || error
+}
+run_test 6a "touch .../f6a; chmod .../f6a ======================"
+
+test_6b() {
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 6b" && return
+       $RUNAS chmod 0444 $DIR/f6a && error
+       $CHECKSTAT -t file -p 0666 -u \#$UID $DIR/f6a || error
+}
+run_test 6b "$RUNAS chmod .../f6a (should return error) =="
+
+test_6c() {
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 6c" && return
+       touch $DIR/f6c
+       chown $RUNAS_ID $DIR/f6c || error
+       $CHECKSTAT -t file -u \#$RUNAS_ID $DIR/f6c || error
+}
+run_test 6c "touch .../f6c; chown .../f6c ======================"
+
+test_6d() {
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 6d" && return
+       $RUNAS chown $UID $DIR/f6c && error
+       $CHECKSTAT -t file -u \#$RUNAS_ID $DIR/f6c || error
  }
-run_test 6 "touch .../f6; chmod .../f6 ========================="
+run_test 6d "$RUNAS chown .../f6c (should return error) =="
+
+test_6e() {
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 6e" && return
+       touch $DIR/f6e
+       chgrp $RUNAS_ID $DIR/f6e || error
+       $CHECKSTAT -t file -u \#$UID -g \#$RUNAS_ID $DIR/f6e || error
+}
+run_test 6e "touch .../f6e; chgrp .../f6e ======================"
+
+test_6f() {
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 6f" && return
+       $RUNAS chgrp $UID $DIR/f6e && error
+       $CHECKSTAT -t file -u \#$UID -g \#$RUNAS_ID $DIR/f6e || error
+}
+run_test 6f "$RUNAS chgrp .../f6e (should return error) =="
  
  test_7a() {
         mkdir $DIR/d7
@@ -357,7 +404,7 @@ test_23() {
  run_test 23 "O_CREAT|O_EXCL in subdir =========================="
  
  test_24a() {
-       echo '============ rename sanity ================================='
+       echo '== rename sanity =============================================='
         echo '-- same directory rename'
         mkdir $DIR/R1
         touch $DIR/R1/f
@@ -440,7 +487,7 @@ test_24i() {
         $CHECKSTAT -t dir  $DIR/R9/a || error
         $CHECKSTAT -a file $DIR/R9/a/f || error
  }
-run_test 24i "rename file to dir error: touch f ; mkdir a ; rename f a ====="
+run_test 24i "rename file to dir error: touch f ; mkdir a ; rename f a"
  
  test_24j() {
         mkdir $DIR/R10
@@ -452,7 +499,7 @@ test_24j() {
  run_test 24j "source does not exist ============================" 
  
  test_25a() {
-       echo '== symlink sanity ======================================='
+       echo '== symlink sanity ============================================='
         mkdir $DIR/d25
         ln -s d25 $DIR/s25
         touch $DIR/s25/foo || error
@@ -473,7 +520,8 @@ test_26a() {
  run_test 26a "multiple component symlink ======================="
  
  test_26b() {
-       ln -s d26/d26-2/foo $DIR/s26-2
+       mkdir -p $DIR/d26b/d26-2
+       ln -s d26b/d26-2/foo $DIR/s26-2
         touch $DIR/s26-2 || error
  }
  run_test 26b "multiple component symlink at end of lookup ======"
@@ -500,12 +548,12 @@ test_26e() {
  run_test 26e "unlink multiple component recursive symlink ======"
  
  test_27a() {
-       echo '== stripe sanity ========================================'
+       echo '== stripe sanity =============================================='
         mkdir $DIR/d27
         $LSTRIPE $DIR/d27/f0 8192 0 1
         $CHECKSTAT -t file $DIR/d27/f0
         pass
-       log "test_27b: write to one stripe file ========================="
+       log "== test_27b: write to one stripe file ========================="
         cp /etc/hosts $DIR/d27/f0
  }
  run_test 27a "one stripe file =================================="
@@ -513,7 +561,7 @@ run_test 27a "one stripe file =================================="
  test_27c() {
         $LSTRIPE $DIR/d27/f01 8192 0 2
         pass
-       log "test_27d: write to two stripe file file f01 ================"
+       log "== test_27d: write to two stripe file file f01 ================"
         dd if=/dev/zero of=$DIR/d27/f01 bs=4k count=4
  }
  run_test 27c "create two stripe file f01 ======================="
@@ -537,14 +585,15 @@ run_test 27e "lstripe existing file (should return error) ======"
  test_27f() {
         $LSTRIPE $DIR/d27/fbad 100 1 2 || true
         dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4
+       $LFIND $DIR/d27/fbad
  }
  run_test 27f "lstripe with bad stripe size (should return error on LOV)"
  
  test_27g() {
         $MCREATE $DIR/d27/fnone || error
         pass
-       log "test 27.9: lfind ============================================"
-       $LFIND $DIR/d27
+       log "== test 27h: lfind ============================================"
+       $LFIND $DIR/d27/fnone | grep -q "Has no stripe info" || error
  }
  run_test 27g "mcreate file without objects to test lfind ======="
  
@@ -586,7 +635,7 @@ test_30() {
  run_test 30 "run binary from Lustre (execve) ==================="
  
  test_31() {
-       ./openunlink $DIR/f31 $DIR/f31 || error
+       $OPENUNLINK $DIR/f31 $DIR/f31 || error
  }
  run_test 31 "open-unlink file =================================="
  
@@ -627,7 +676,7 @@ test_32d() {
         ls -al $DIR/d32d/ext2-mountpoint/../d2/test_dir || error
         umount $DIR/d32d/ext2-mountpoint || error
  }
-run_test 32d "open d32d/ext2-mountpoint/../d2/test_dir =========="
+run_test 32d "open d32d/ext2-mountpoint/../d2/test_dir ========="
  
  test_32e() {
         [ -e $DIR/d32e ] && rm -fr $DIR/d32e
@@ -638,7 +687,7 @@ test_32e() {
         $CHECKSTAT -t link $DIR/d32e/tmp/symlink11 || error
         $CHECKSTAT -t link $DIR/d32e/symlink01 || error
  }
-run_test 32e "stat d32e/symlink->tmp/symlink->lustre-subdir ====="
+run_test 32e "stat d32e/symlink->tmp/symlink->lustre-subdir ===="
  
  test_32f() {
         [ -e $DIR/d32f ] && rm -fr $DIR/d32f
@@ -649,7 +698,7 @@ test_32f() {
         ls $DIR/d32f/tmp/symlink11  || error
         ls $DIR/d32f/symlink01 || error
  }
-run_test 32f "open d32f/symlink->tmp/symlink->lustre-subdir ====="
+run_test 32f "open d32f/symlink->tmp/symlink->lustre-subdir ===="
  
  test_32g() {
         [ -e $DIR/d32g ] && rm -fr $DIR/d32g
@@ -687,7 +736,7 @@ test_32i() {
         $CHECKSTAT -t file $DIR/d32i/ext2-mountpoint/../test_file || error  
         umount $DIR/d32i/ext2-mountpoint || error
  }
-run_test 32i "stat d32i/ext2-mountpoint/../test_file ============"
+run_test 32i "stat d32i/ext2-mountpoint/../test_file ==========="
  
  test_32j() {
         [ -e $DIR/d32j ] && rm -fr $DIR/d32j
@@ -697,10 +746,10 @@ test_32j() {
         cat $DIR/d32j/ext2-mountpoint/../test_file || error
         umount $DIR/d32j/ext2-mountpoint || error
  }
-run_test 32j "open d32j/ext2-mountpoint/../test_file ============"
+run_test 32j "open d32j/ext2-mountpoint/../test_file ==========="
  
  test_32k() {
-       [ -e $DIR/d32k ] && rm -fr $DIR/d32k
+       rm -fr $DIR/d32k
         mkdir -p $DIR/d32k/ext2-mountpoint 
         mount -t ext2 -o loop $EXT2_DEV $DIR/d32k/ext2-mountpoint  
         mkdir -p $DIR/d32k/d2
@@ -708,10 +757,10 @@ test_32k() {
         $CHECKSTAT -t file $DIR/d32k/ext2-mountpoint/../d2/test_file || error
         umount $DIR/d32k/ext2-mountpoint || error
  }
-run_test 32k "stat d32k/ext2-mountpoint/../d2/test_file ========="
+run_test 32k "stat d32k/ext2-mountpoint/../d2/test_file ========"
  
  test_32l() {
-       [ -e $DIR/d32l ] && rm -fr $DIR/d32l
+       rm -fr $DIR/d32l
         mkdir -p $DIR/d32l/ext2-mountpoint 
         mount -t ext2 -o loop $EXT2_DEV $DIR/d32l/ext2-mountpoint || error
         mkdir -p $DIR/d32l/d2
@@ -719,10 +768,10 @@ test_32l() {
         cat  $DIR/d32l/ext2-mountpoint/../d2/test_file || error
         umount $DIR/d32l/ext2-mountpoint || error
  }
-run_test 32l "open d32l/ext2-mountpoint/../d2/test_file ========="
+run_test 32l "open d32l/ext2-mountpoint/../d2/test_file ========"
  
  test_32m() {
-       [ -e $DIR/d32m ] && rm -fr $DIR/d32m
+       rm -fr $DIR/d32m
         mkdir -p $DIR/d32m/tmp    
         TMP_DIR=$DIR/d32m/tmp       
         ln -s $DIR $TMP_DIR/symlink11 
@@ -730,10 +779,10 @@ test_32m() {
         $CHECKSTAT -t link $DIR/d32m/tmp/symlink11 || error
         $CHECKSTAT -t link $DIR/d32m/symlink01 || error
  }
-run_test 32m "stat d32m/symlink->tmp/symlink->lustre-root ======="
+run_test 32m "stat d32m/symlink->tmp/symlink->lustre-root ======"
  
  test_32n() {
-       [ -e $DIR/d32n ] && rm -fr $DIR/d32n
+       rm -fr $DIR/d32n
         mkdir -p $DIR/d32n/tmp    
         TMP_DIR=$DIR/d32n/tmp       
         ln -s $DIR $TMP_DIR/symlink11 
@@ -741,11 +790,11 @@ test_32n() {
         ls -l $DIR/d32n/tmp/symlink11  || error
         ls -l $DIR/d32n/symlink01 || error
  }
-run_test 32n "open d32n/symlink->tmp/symlink->lustre-root ======="
+run_test 32n "open d32n/symlink->tmp/symlink->lustre-root ======"
  
  test_32o() {
-       [ -e $DIR/d32o ] && rm -fr $DIR/d32o
-       [ -e $DIR/test_file ] && rm -fr $DIR/test_file
+       rm -fr $DIR/d32o
+       rm -f $DIR/test_file
         touch $DIR/test_file 
         mkdir -p $DIR/d32o/tmp    
         TMP_DIR=$DIR/d32o/tmp       
@@ -759,8 +808,8 @@ test_32o() {
  run_test 32o "stat d32o/symlink->tmp/symlink->lustre-root/test_file"
  
  test_32p() {
-       [ -e $DIR/d32p ] && rm -fr $DIR/d32p
-       [ -e $DIR/test_file ] && rm -fr $DIR/test_file
+       rm -fr $DIR/d32p
+       rm -f $DIR/test_file
         touch $DIR/test_file 
         mkdir -p $DIR/d32p/tmp    
         TMP_DIR=$DIR/d32p/tmp       
@@ -771,109 +820,220 @@ test_32p() {
  }
  run_test 32p "open d32p/symlink->tmp/symlink->lustre-root/test_file"
  
+test_32q() {
+       [ -e $DIR/d32q ] && rm -fr $DIR/d32q
+       mkdir -p $DIR/d32q
+       mount -t ext2 -o loop $EXT2_DEV $DIR/d32q
+       ls $DIR/d32q || error
+       umount $DIR/d32q || error
+}
+run_test 32q "ls a mounted file system ========================="
+
  #   chmod 444 /mnt/lustre/somefile
  #   open(/mnt/lustre/somefile, O_RDWR)
  #   Should return -1
  test_33() {
-       [ -e $DIR/test_33_file ] && rm -fr $DIR/test_33_file
+       rm -f $DIR/test_33_file
         touch $DIR/test_33_file
         chmod 444 $DIR/test_33_file
         chown $RUNAS_ID $DIR/test_33_file
-       $RUNAS openfile -f O_RDWR $DIR/test_33_file && error || true
+       $RUNAS $OPENFILE -f O_RDWR $DIR/test_33_file && error || true
  }
  run_test 33 "write file with mode 444 (should return error) ===="
  
-test_34() {
-       $MCREATE $DIR/f
-       $TRUNCATE $DIR/f 100
-       rm $DIR/f
+TEST_34_SIZE=${TEST_34_SIZE:-2000000000000}
+test_34a() {
+       rm -f $DIR/test_34_file
+       $MCREATE $DIR/test_34_file || error
+       $LFIND $DIR/test_34_file | grep -q "Has no stripe information" || error
+       $TRUNCATE $DIR/test_34_file $TEST_34_SIZE || error
+       $LFIND $DIR/test_34_file | grep -q "Has no stripe information" || error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+}
+run_test 34a "truncate file that has not been opened ==========="
+
+test_34b() {
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+       $OPENFILE -f O_RDONLY $DIR/test_34_file
+       $LFIND $DIR/test_34_file | grep -q "Has no stripe information" || error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
  }
-run_test 34 "truncate file that has not been opened ============"
+run_test 34b "O_RDONLY opening file doesn't create objects ====="
+
+test_34c() {
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+       $OPENFILE -f O_RDWR $DIR/test_34_file
+       $LFIND $DIR/test_34_file | grep -q "Has no stripe information" && error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+}
+run_test 34c "O_RDWR opening file-with-size works =============="
+
+test_34d() {
+       dd if=/dev/zero of=$DIR/test_34_file conv=notrunc bs=4k count=1 || error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+       rm $DIR/test_34_file
+}
+run_test 34d "write to sparse file ============================="
+
+test_34e() {
+       rm -f $DIR/test_34_file
+       $MCREATE $DIR/test_34_file || error
+       $TRUNCATE $DIR/test_34_file 1000 || error
+       $CHECKSTAT -s 1000 $DIR/test_34_file || error
+       $OPENFILE -f O_RDWR $DIR/test_34_file
+       $CHECKSTAT -s 1000 $DIR/test_34_file || error
+}
+run_test 34e "create objects, some with size and some without =="
  
  test_35() {
-       [ -e $DIR/test_35_file ] && rm -fr $DIR/test_35_file
         cp /bin/sh $DIR/test_35_file
         chmod 444 $DIR/test_35_file
         chown $RUNAS_ID $DIR/test_35_file
-       $DIR/test_35_file && error
-       return 0
+       $DIR/test_35_file && error || true
+       rm $DIR/test_35_file
  }
  run_test 35 "exec file with mode 444 (should return error) ====="
  
  test_36a() {
-       log 36  "cvs operations ===================================="
-       mkdir -p $DIR/cvsroot
-       chown $RUNAS_ID $DIR/cvsroot
-       $RUNAS cvs -d $DIR/cvsroot init 
+       sleep 1         # we need a rest, or UMLs clock becomes skewed
+       rm -f $DIR/test_36_file
+       utime $DIR/test_36_file || error
  }
-run_test 36a "cvs init ========================================="
+run_test 36a "MDS utime check (mknod, utime) ==================="
  
  test_36b() {
-       # on the LLNL clusters, runas will still pick up root's $TMP settings,
-        # which will not be writable for the runas user, and then you get a CVS
-       # error message with a corrupt path string (CVS bug) and panic.
-       # We're not using much space, so just stick it in /tmp, which is
-       # safe.
-       OLDTMPDIR=$TMPDIR
-       OLDTMP=$TMP
-       TMPDIR=/tmp
-       TMP=/tmp
-
-       cd /etc/init.d
-       $RUNAS cvs -d $DIR/cvsroot import -m "nomesg"  reposname vtag rtag
-
-       TMPDIR=$OLDTMPDIR
-       TMP=$OLDTMP
+       sleep 1
+       echo "" > $DIR/test_36_file
+       utime $DIR/test_36_file || error
  }
-run_test 36b "cvs import ======================================="
+run_test 36b "OST utime check (open, utime) ===================="
  
  test_36c() {
-       cd $DIR
-       mkdir -p $DIR/reposname
-       chown $RUNAS_ID $DIR/reposname
-       $RUNAS cvs -d $DIR/cvsroot co reposname
+       sleep 1
+       rm -f $DIR/d36/test_36_file
+       mkdir $DIR/d36
+       chown $RUNAS_ID $DIR/d36
+       $RUNAS utime $DIR/d36/test_36_file || error
  }
-run_test 36c "cvs checkout ====================================="
+run_test 36c "non-root MDS utime check (mknod, utime) =========="
  
  test_36d() {
-       cd $DIR/reposname
-       $RUNAS touch foo36
-       $RUNAS cvs add -m 'addmsg' foo36
+       sleep 1
+       echo "" > $DIR/d36/test_36_file
+       $RUNAS utime $DIR/d36/test_36_file || error
  }
-run_test 36d "cvs add =========================================="
+run_test 36d "non-root OST utime check (open, utime) ==========="
  
  test_36e() {
-       cd $DIR/reposname
-       $RUNAS cvs update
-}
-run_test 36e "cvs update ======================================="
-
-# XXX change this: use a non root user
-test_36f() {
-       cd $DIR/reposname
-       $RUNAS cvs commit -m 'nomsg' foo36
+       sleep 1
+       [ $RUNAS_ID -eq $UID ] && return
+       touch $DIR/d36/test_36_file2
+       $RUNAS utime $DIR/d36/test_36_file2 && error || true
  }
-run_test 36f "cvs commit ======================================="
+run_test 36e "utime on non-owned file (should return error) ===="
  
  test_37() {
         mkdir -p $DIR/dextra
         echo f > $DIR/dextra/fbugfile
-       mount -t ext2 -o loop /$EXT2_DEV $DIR/dextra
-       ls $DIR/dextra |grep "\<fbugfile\>" && error
-       umount /$EXT2_DEV
-       rm -f DIR/dextra/fbugfile
+       mount -t ext2 -o loop $EXT2_DEV $DIR/dextra
+       ls $DIR/dextra | grep "\<fbugfile\>" && error
+       umount $DIR/dextra || error
+       rm -f $DIR/dextra/fbugfile || error
  }
-run_test 37 "ls a mounted file system to check the old contents ====="
+run_test 37 "ls a mounted file system to check old content ====="
  
  # open(file, O_DIRECTORY) will leak a request and not cleanup (bug 1501)
  test_38() {
-        o_directory $DIR/test38
+       o_directory $DIR/test38
  }
  run_test 38 "open a regular file with O_DIRECTORY =============="
-        
+
+test_39() {
+       touch $DIR/test_39_file
+       touch $DIR/test_39_file2
+#      ls -l  $DIR/test_39_file $DIR/test_39_file2
+#      ls -lu  $DIR/test_39_file $DIR/test_39_file2
+#      ls -lc  $DIR/test_39_file $DIR/test_39_file2
+       sleep 2
+       $OPENFILE -f O_CREAT:O_TRUNC:O_WRONLY $DIR/test_39_file2
+#      ls -l  $DIR/test_39_file $DIR/test_39_file2
+#      ls -lu  $DIR/test_39_file $DIR/test_39_file2
+#      ls -lc  $DIR/test_39_file $DIR/test_39_file2
+       [ $DIR/test_39_file2 -nt $DIR/test_39_file ] || error
+}
+run_test 39 "mtime changed on create ==========================="
+
+test_40() {
+       dd if=/dev/zero of=$DIR/f40 bs=4096 count=1
+       $RUNAS $OPENFILE -f O_WRONLY:O_TRUNC $DIR/f40 && error
+       $CHECKSTAT -t file -s 4096 $DIR/f40 || error
+}
+run_test 40 "failed open(O_TRUNC) doesn't truncate ============="
+
+test_41() {
+       # bug 1553
+       small_write $DIR/f41 18
+}
+run_test 41 "test small file write + fstat ====================="
+
+# on the LLNL clusters, runas will still pick up root's $TMP settings,
+# which will not be writable for the runas user, and then you get a CVS
+# error message with a corrupt path string (CVS bug) and panic.
+# We're not using much space, so just stick it in /tmp, which is safe.
+OLDTMPDIR=$TMPDIR
+OLDTMP=$TMP
+TMPDIR=/tmp
+TMP=/tmp
+OLDHOME=$HOME
+[ $RUNAS_ID -ne $UID ] && HOME=/tmp
+
+test_99a() {
+       echo 99 "cvs operations ===================================="
+       mkdir -p $DIR/d99cvsroot
+       chown $RUNAS_ID $DIR/d99cvsroot
+       $RUNAS cvs -d $DIR/d99cvsroot init || error
+}
+run_test 99a "cvs init ========================================="
+
+test_99b() {
+       cd /etc/init.d
+       $RUNAS cvs -d $DIR/d99cvsroot import -m "nomesg" d99reposname vtag rtag
+}
+run_test 99b "cvs import ======================================="
+
+test_99c() {
+       cd $DIR
+       mkdir -p $DIR/d99reposname
+       chown $RUNAS_ID $DIR/d99reposname
+       $RUNAS cvs -d $DIR/d99cvsroot co d99reposname
+}
+run_test 99c "cvs checkout ====================================="
+
+test_99d() {
+       cd $DIR/d99reposname
+       $RUNAS touch foo99
+       $RUNAS cvs add -m 'addmsg' foo99
+}
+run_test 99d "cvs add =========================================="
+
+test_99e() {
+       cd $DIR/d99reposname
+       $RUNAS cvs update
+}
+run_test 99e "cvs update ======================================="
+
+test_99f() {
+       cd $DIR/d99reposname
+       $RUNAS cvs commit -m 'nomsg' foo99
+}
+run_test 99f "cvs commit ======================================="
+
+TMPDIR=$OLDTMPDIR
+TMP=$OLDTMP
+HOME=$OLDHOME
  
  log "cleanup: ======================================================"
-rm -r $DIR/[Rdfs][1-9]*
+rm -rf $DIR/[Rdfs][1-9]*
  if [ "$I_MOUNTED" = "yes" ]; then
         sh llmountcleanup.sh || error
  fi
diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh

index 8145e63..1895c8a 100644 (file)
--- a/lustre/tests/sanityN.sh
+++ b/lustre/tests/sanityN.sh
@@ -2,130 +2,207 @@
  
  set -e
  
-PATH=$PATH:.
+ONLY=${ONLY:-"$*"}
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"8"} # bug 1557
+
+SRCDIR=`dirname $0`
+PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
  
  CHECKSTAT=${CHECKSTAT:-"checkstat -v"}
-MOUNT1=${MOUNT1:-/mnt/lustre1}
-MOUNT2=${MOUNT2:-/mnt/lustre2}
+CREATETEST=${CREATETEST:-createtest}
+LFIND=${LFIND:-lfind}
+LSTRIPE=${LSTRIPE:-lstripe}
+LCTL=${LCTL:-lctl}
+MCREATE=${MCREATE:-mcreate}
+OPENFILE=${OPENFILE:-openfile}
+OPENUNLINK=${OPENUNLINK:-openunlink}
+TOEXCL=${TOEXCL:-toexcl}
+TRUNCATE=${TRUNCATE:-truncate}
+
+if [ $UID -ne 0 ]; then
+       RUNAS_ID="$UID"
+       RUNAS=""
+else
+       RUNAS_ID=${RUNAS_ID:-500}
+       RUNAS=${RUNAS:-"runas -u $RUNAS_ID"}
+fi
+
  export NAME=${NAME:-mount2}
  
+SAVE_PWD=$PWD
+
  clean() {
-        echo -n "cln.."
-        sh llmountcleanup.sh > /dev/null
+       echo -n "cln.."
+       sh llmountcleanup.sh > /dev/null || exit 20
  }
-
  CLEAN=${CLEAN:-clean}
+
  start() {
-        echo -n "mnt.."
-        sh llrmount.sh > /dev/null
-        echo -n "done"
+       echo -n "mnt.."
+       sh llrmount.sh > /dev/null || exit 10
+       echo "done"
  }
  START=${START:-start}
  
-error () { 
-    echo FAIL
-    exit 1
-}
-
-pass() { 
-    echo PASS
-}
-
-mkdir -p $MOUNT2
-mount | grep $MOUNT1 || sh llmount.sh
-
-echo -n "test 1: check create on 2 mtpt's..."
-touch $MOUNT1/f1
-[ -f $MOUNT2/f1 ] || error
-pass
-
-echo "test 2: check attribute updates on 2 mtpt's..."
-chmod 777 $MOUNT2/f1
-$CHECKSTAT -t file -p 0777 $MOUNT1/f1 || error
-pass
-
-echo "test 2b: check cached attribute updates on 2 mtpt's..."
-touch $MOUNT1/f2b
-ls -l $MOUNT2/f2b
-chmod 777 $MOUNT2/f2b
-$CHECKSTAT -t file -p 0777 $MOUNT1/f2b || error
-pass
-
-echo "test 2c: check cached attribute updates on 2 mtpt's..."
-touch $MOUNT1/f2c
-ls -l $MOUNT2/f2c
-chmod 777 $MOUNT1/f2c
-$CHECKSTAT -t file -p 0777 $MOUNT2/f2c || error
-pass
-
-echo "test 3: check after remount attribute updates on 2 mtpt's..."
-chmod a-x $MOUNT2/f1
-$CLEAN
-$START
-$CHECKSTAT -t file -p 0666 $MOUNT1/f1 || error
-pass
-
-echo "test 4: unlink on one mountpoint removes file on other..."
-rm $MOUNT2/f1
-$CHECKSTAT -a $MOUNT1/f1 || error
-pass
-
-echo -n "test 5: symlink on one mtpt, readlink on another..."
-( cd $MOUNT1 ; ln -s this/is/good lnk )
-
-[ "this/is/good" = "`perl -e 'print readlink("/mnt/lustre2/lnk");'`" ] || error
-pass
-
-echo -n "test 6: fstat validation on multiple mount points..."
-./multifstat $MOUNT1/f6 $MOUNT2/f6
-pass
-
-if [ -n "$BUG_1365" ]; then
-echo -n "test 7: create a file on one mount, truncate it on the other..."
-mcreate $MOUNT1/f1
-truncate $MOUNT2/f1 100
-rm $MOUNT1/f1
-pass
-else
-echo "Skipping test for 1365: set \$BUG_1365 to run it (and crash, likely)."
-fi
+log() {
+       echo "$*"
+       lctl mark "$*" || true
+}
+
+run_one() {
+       if ! mount | grep -q $DIR1; then
+               $START
+       fi
+       log "== test $1: $2"
+       test_$1 || error
+       pass
+       cd $SAVE_PWD
+       $CLEAN
+}
+
+run_test() {
+       for O in $ONLY; do
+               if [ "`echo $1 | grep '\<'$O'[a-z]*\>'`" ]; then
+                       echo ""
+                       run_one $1 "$2"
+                       return $?
+               else
+                       echo -n "."
+               fi
+       done
+       for X in $EXCEPT $ALWAYS_EXCEPT; do
+               if [ "`echo $1 | grep '\<'$X'[a-z]*\>'`" ]; then
+                       echo "skipping excluded test $1"
+                       return 0
+               fi
+       done
+       if [ -z "$ONLY" ]; then
+               run_one $1 "$2"
+               return $?
+       fi
+}
+
+error () {
+       echo "FAIL: $@"
+       exit 1
+}
+
+pass() {
+       echo PASS
+}
+
+MOUNT1=`mount| awk '/^'$NAME' .* lustre_lite / { print $3 }'| head -1`
+MOUNT2=`mount| awk '/^'$NAME' .* lustre_lite / { print $3 }'| tail -1`
+[ -z "$MOUNT1" ] && error "NAME=$NAME not mounted once"
+[ "$MOUNT1" = "$MOUNT2" ] && error "NAME=$NAME not mounted twice"
+[ `mount| awk '/^'$NAME' .* lustre_lite / { print $3 }'| wc -l` -ne 2 ] && \
+       error "NAME=$NAME mounted more than twice"
+
+DIR1=${DIR1:-$MOUNT1}
+DIR2=${DIR2:-$MOUNT2}
+[ -z "`echo $DIR1 | grep $MOUNT1`" ] && echo "$DIR1 not in $MOUNT1" && exit 96
+[ -z "`echo $DIR2 | grep $MOUNT2`" ] && echo "$DIR2 not in $MOUNT2" && exit 95
+
+rm -f $DIR1/[df][0-9]* $DIR1/lnk
+
+test_1a() {
+       touch $DIR1/f1
+       [ -f $DIR2/f1 ] || error
+}
+run_test 1a "check create on 2 mtpt's =========================="
+
+test_1b() {
+       chmod 777 $DIR2/f1
+       $CHECKSTAT -t file -p 0777 $DIR1/f1 || error
+       chmod a-x $DIR2/f1
+}
+run_test 1b "check attribute updates on 2 mtpt's ==============="
+
+test_1c() {
+       $CHECKSTAT -t file -p 0666 $DIR1/f1 || error
+}
+run_test 1c "check after remount attribute updates on 2 mtpt's ="
+
+test_1d() {
+       rm $DIR2/f1
+       $CHECKSTAT -a $DIR1/f1 || error
+}
+run_test 1d "unlink on one mountpoint removes file on other ===="
+
+test_2a() {
+       touch $DIR1/f2a
+       ls -l $DIR2/f2a
+       chmod 777 $DIR2/f2a
+       $CHECKSTAT -t file -p 0777 $DIR1/f2a || error
+}
+run_test 2a "check cached attribute updates on 2 mtpt's ========"
+
+test_2b() {
+       touch $DIR1/f2b
+       ls -l $DIR2/f2b
+       chmod 777 $DIR1/f2b
+       $CHECKSTAT -t file -p 0777 $DIR2/f2b || error
+}
+run_test 2b "check cached attribute updates on 2 mtpt's ========"
+
+test_3() {
+       ( cd $DIR1 ; ln -s this/is/good lnk )
+       [ "this/is/good" = "`perl -e 'print readlink("'$DIR2/lnk'");'`" ] || \
+               error
+}
+run_test 3 "symlink on one mtpt, readlink on another ==========="
+
+test_4() {
+       ./multifstat $DIR1/f6 $DIR2/f6
+}
+run_test 4 "fstat validation on multiple mount points =========="
+
+test_5() {
+       mcreate $DIR1/f5
+       truncate $DIR2/f5 100
+       rm $DIR1/f5
+}
+run_test 5 "create a file on one mount, truncate it on the other"
+
+test_6() {
+       ./openunlink $DIR1/f6 $DIR2/f6 || error
+}
+run_test 6 "remove of open file on other node =================="
+
+test_7() {
+       ./opendirunlink $DIR1/d7 $DIR2/d7 || error
+}
+run_test 7 "remove of open directory on other node ============="
+
+test_8() {
+       ./opendevunlink $DIR1/dev8 $DIR2/dev8 || error
+}
+run_test 8 "remove of open special file on other node =========="
+
+test_9() {
+       MTPT=1
+       > $DIR2/f9
+       for C in a b c d e f g h i j k l; do
+               DIR=`eval echo \\$DIR$MTPT`
+               echo -n $C >> $DIR/f9
+               [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
+       done
+       [ "`cat $DIR1/f9`" = "abcdefghijkl" ] || error
+}
+run_test 9 "append of file with sub-page size on multiple mounts"
+
+test_10() {
+       MTPT=1
+       OFFSET=0
+       > $DIR2/f10
+       for C in a b c d e f g h i j k l; do
+               DIR=`eval echo \\$DIR$MTPT`
+               echo -n $C | dd of=$DIR/f10 bs=1 seek=$OFFSET count=1
+               [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
+               OFFSET=`expr $OFFSET + 1`
+       done
+       [ "`cat $DIR1/f10`" = "abcdefghijkl" ] || error
+}
+run_test 10 "write of file with sub-page size on multiple mounts "
  
-echo "test 9: remove of open file on other node..."
-./openunlink $MOUNT1/f9 $MOUNT2/f9 || error
-pass
-
-echo "test 9b: remove of open directory on other node..."
-./opendirunlink $MOUNT1/dir1 $MOUNT2/dir1 || error
-pass
-
-#echo "test 9c: remove of open special file on other node..."
-#./opendevunlink $MOUNT1/dev1 $MOUNT2/dev1 || error
-#pass
-
-echo -n "test 10: append of file with sub-page size on multiple mounts..."
-MTPT=1
-> $MOUNT2/f10
-for C in a b c d e f g h i j k l; do
-       MOUNT=`eval echo \\$MOUNT$MTPT`
-       echo -n $C >> $MOUNT/f10
-       [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
-done
-[ "`cat $MOUNT1/f10`" = "abcdefghijkl" ] && pass || error
-       
-echo -n "test 11: write of file with sub-page size on multiple mounts..."
-MTPT=1
-OFFSET=0
-> $MOUNT2/f11
-for C in a b c d e f g h i j k l; do
-       MOUNT=`eval echo \\$MOUNT$MTPT`
-       echo -n $C | dd of=$MOUNT/f11 bs=1 seek=$OFFSET count=1
-       [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
-       OFFSET=`expr $OFFSET + 1`
-done
-[ "`cat $MOUNT1/f11`" = "abcdefghijkl" ] && pass || error
-       
-rm -f $MOUNT1/f[0-9]* $MOUNT1/lnk
-
-$CLEAN
-
-exit
+rm -f $DIR1/f[0-9]* $DIR1/lnk
diff --git a/lustre/tests/uml.sh b/lustre/tests/uml.sh

index 2b3adc3..f7a9241 100644 (file)
--- a/lustre/tests/uml.sh
+++ b/lustre/tests/uml.sh
@@ -6,7 +6,7 @@ config=${1:-uml.xml}
  LMC=${LMC:-lmc}
  TMP=${TMP:-/tmp}
  
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
  MDSSIZE=${MDSSIZE:-50000}
  
  OSTDEVBASE=$TMP/ost
@@ -19,6 +19,7 @@ STRIPECNT=${STRIPECNT:-1}
  FSTYPE=${FSTYPE:-ext3}
  
  NETTYPE=${NETTYPE:-tcp}
+NIDTYPE=${NIDTYPE:-$NODETYPE}
  
  # NOTE - You can't have different MDS/OST nodes and also have clients on the
  #        MDS/OST nodes without using --endlevel and --startlevel during lconf.
@@ -50,6 +51,10 @@ CLIENTS=${CLIENTS:-"uml3"}
  
  rm -f $config
  
+h2localhost () {
+       echo localhost
+}
+       
  h2tcp () {
         case $1 in
         client) echo '\*' ;;
@@ -68,7 +73,7 @@ h2elan () {
  echo -n "adding NET for:"
  for NODE in `echo $MDSNODE $OSTNODES $CLIENTS | tr -s " " "\n" | sort -u`; do
         echo -n " $NODE"
-       ${LMC} -m $config --add net --node $NODE --nid `h2$NETTYPE $NODE` --nettype $NETTYPE || exit 1
+       ${LMC} -m $config --add net --node $NODE --nid `h2$NIDTYPE $NODE` --nettype $NETTYPE || exit 1
  done
  
  # configure mds server
@@ -82,7 +87,7 @@ echo -n "adding OST on:"
  for NODE in $OSTNODES; do
         eval OSTDEV=\$OSTDEV$COUNT
         echo -n " $NODE"
-       OSTDEV=${OSTDEV:-$OSTDEVBASE$COUNT}
+       OSTDEV=${OSTDEV:-$OSTDEVBASE$COUNT-`hostname`}
          ${LMC} -m $config --add ost --node $NODE --lov lov1 --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 21
         COUNT=`expr $COUNT + 1`
  done
diff --git a/lustre/tests/utime.c b/lustre/tests/utime.c

index c6a5d7d..9fe9f26 100644 (file)
--- a/lustre/tests/utime.c
+++ b/lustre/tests/utime.c
@@ -30,7 +30,14 @@ int main(int argc, char *argv[])
         if (argc != 2)
                 usage(argv[0]);
  
-       before_mknod = time(0);
+       /* Adjust the before time back one second, because the kernel's
+        * CURRENT_TIME (lockless clock reading, used to set inode times)
+        * may drift against the do_gettimeofday() time (TSC-corrected and
+        * locked clock reading, used to return timestamps to user space).
+        * This means that the mknod time could be a second older than the
+        * before time, even for a local filesystem such as ext3.
+        */
+       before_mknod = time(0) - 1;
         rc = mknod(filename, 0700, S_IFREG);
         after_mknod = time(0);
         if (rc && errno != EEXIST) {
@@ -52,13 +59,15 @@ int main(int argc, char *argv[])
                         return 4;
                 }
  
-               printf("%s: good mknod times %lu <= %lu <= %lu\n",
-                      prog, before_mknod, st.st_mtime, after_mknod);
+               printf("%s: good mknod times %lu%s <= %lu <= %lu\n",
+                      prog, before_mknod, before_mknod == st.st_mtime ? "*":"",
+                      st.st_mtime, after_mknod);
  
                 sleep(5);
         }
  
-       before_utime = time(0);
+       /* See above */
+       before_utime = time(0) - 1;
         rc = utime(filename, NULL);
         after_utime = time(0);
         if (rc) {
@@ -80,8 +89,9 @@ int main(int argc, char *argv[])
                 return 7;
         }
  
-       printf("%s: good utime times %lu <= %lu <= %lu\n",
-              prog, before_utime, st.st_mtime, after_utime);
+       printf("%s: good utime times %lu%s <= %lu <= %lu\n",
+              prog, before_utime, before_utime == st.st_mtime ? "*" : "",
+              st.st_mtime, after_utime);
  
         return 0;
  }
diff --git a/lustre/utils/.cvsignore b/lustre/utils/.cvsignore

index 06a1588..20f4185 100644 (file)
--- a/lustre/utils/.cvsignore
+++ b/lustre/utils/.cvsignore
@@ -15,4 +15,6 @@ obdstat
  obdio
  obdbarrier
  lload
-wirecheck
-\ No newline at end of file
+wirecheck
+.*.cmd
+.*.d
diff --git a/lustre/utils/Lustre/__init__.py b/lustre/utils/Lustre/__init__.py

index c1b93e6..7a21df3 100644 (file)
--- a/lustre/utils/Lustre/__init__.py
+++ b/lustre/utils/Lustre/__init__.py
@@ -4,4 +4,4 @@ from lustredb import LustreDB, LustreDB_XML, LustreDB_LDAP
  from error import LconfError, OptionError
  from cmdline import Options
  
-CONFIG_VERSION="2003060501"
+CONFIG_VERSION="2003070801"
diff --git a/lustre/utils/lactive b/lustre/utils/lactive

index a5e8580..04841eb 100644 (file)
--- a/lustre/utils/lactive
+++ b/lustre/utils/lactive
@@ -31,6 +31,7 @@
  import sys, getopt, types
  import string, os
  import ldap
+from stat import S_IROTH, S_IRGRP
  PYMOD_DIR = "/usr/lib/lustre/python"
  
  def development_mode():
@@ -43,13 +44,14 @@ if not development_mode():
      sys.path.append(PYMOD_DIR)
  
  import Lustre
+PARAM = Lustre.Options.PARAM
  
  lactive_options = [
-    ('ldapurl',"LDAP server URL", Lustre.Options.PARAM,
-     "ldap://localhost"),
-    ('config', "Cluster config name used for LDAP query", Lustre.Options.PARAM),
-    ('group', "The group of devices to update", Lustre.Options.PARAM),
-    ('active', "The active node name", Lustre.Options.PARAM),
+    ('ldapurl',"LDAP server URL", PARAM, "ldap://localhost"),
+    ('config', "Cluster config name used for LDAP query", PARAM),
+    ('group', "The group of devices to update", PARAM),
+    ('active', "The active node name", PARAM),
+    ('pwfile', "File containing password", PARAM),
      ]
  
  def fatal(*args):
@@ -57,7 +59,6 @@ def fatal(*args):
      print "! " + msg
      sys.exit(1)
  
-
  cl = Lustre.Options("lactive","", lactive_options)
  config, args = cl.parse(sys.argv[1:])
  
@@ -66,10 +67,32 @@ if not (config.group or config.active):
  
  if not config.config:
      fatal("Missing config")
-    
+
+if config.pwfile:
+    try:
+        pwperm = os.stat(config.pwfile)[0]
+        pwreadable = pwperm & (S_IRGRP | S_IROTH)
+        if pwreadable:
+            if pwreadable == (S_IRGRP | S_IROTH):
+                readable_by = "group and others"
+            elif pwreadable == S_IRGRP:
+                readable_by = "group"
+            else:
+                readable_by = "others"
+            print "WARNING: Password file %s is readable by %s" % (
+                config.pwfile, readable_by)
+                 
+        pwfile = open(config.pwfile, "r")
+        pw = string.strip(pwfile.readline())
+        pwfile.close()
+    except Exception, e:
+        fatal("Can't read secret from pwfile %s: %s" % (config.pwfile, e))
+else:
+    print "no pwfile specified, binding anonymously"
+    pw = ""
+
  base = "config=%s,fs=lustre" % (config.config,)
-db = Lustre.LustreDB_LDAP('', {}, base=base, pw = "secret",
-                          url = config.ldapurl)
+db = Lustre.LustreDB_LDAP('', {}, base=base, pw = pw, url = config.ldapurl)
  
  active_node = db.lookup_name(config.active)
  if not active_node:
diff --git a/lustre/utils/lconf b/lustre/utils/lconf

index 15e5a2c..92ec8e2 100755 (executable)
--- a/lustre/utils/lconf
+++ b/lustre/utils/lconf
@@ -1,7 +1,8 @@
  #!/usr/bin/env python
  #
-#  Copyright (C) 2002 Cluster File Systems, Inc.
-#   Author: Robert Read <rread@clusterfs.com>
+#  Copyright (C) 2002-2003 Cluster File Systems, Inc.
+#   Authors: Robert Read <rread@clusterfs.com>
+#            Mike Shaver <shaver@clusterfs.com>
  #   This file is part of Lustre, http://www.lustre.org.
  #
  #   Lustre is free software; you can redistribute it and/or
@@ -26,7 +27,7 @@
  
  import sys, getopt, types
  import string, os, stat, popen2, socket, time, random, fcntl, select
-import re, exceptions, signal
+import re, exceptions, signal, traceback
  import xml.dom.minidom
  
  if sys.version[0] == '1':
@@ -57,7 +58,7 @@ MAX_LOOP_DEVICES = 256
  PORTALS_DIR = 'portals'
  
  
-# Please keep these uptodate with the values in portals/kp30.h
+# Please keep these in sync with the values in portals/kp30.h
  ptldebug_names = { 
      "trace" :     (1 << 0),
      "inode" :     (1 << 1),
@@ -107,6 +108,8 @@ subsystem_names = {
      "ptlrouter" :   (20 << 24),
      "cobd" :        (21 << 24),
      "ptlbd" :       (22 << 24),
+    "log" :         (23 << 24),
+    "mgmt" :        (24 << 24),
      }
  
  
@@ -423,8 +426,11 @@ class LCTLInterface:
    add_route %s %s %s
    quit  """ % (net,
                 gw, lo, hi)
-        self.run(cmds)
-
+        try:
+            self.run(cmds)
+        except CommandError, e:
+            log ("ignore: ")
+            e.dump()
                  
      def del_route(self, net, gw, lo, hi):
          cmds =  """
@@ -443,7 +449,11 @@ class LCTLInterface:
    quit """ % (net,
                uuid, tgt, net,
                gw, tgt)
-        self.run(cmds)
+        try:
+            self.run(cmds)
+        except CommandError, e:
+            log ("ignore: ")
+            e.dump()
  
      # add a route to a range
      def del_route_host(self, net, uuid, gw, tgt):
@@ -795,7 +805,6 @@ def get_local_address(net_type, wildcard):
          local=string.rstrip(local[0])
  
      return local
-        
  
  # XXX: instead of device_list, ask for $name and see what we get
  def is_prepared(name):
@@ -1020,8 +1029,8 @@ class Network(Module):
                              self_nid = self.nid
                          if gw_nid < self_nid:
                              try:
-                                lctl.disconnect(router.net_type, router.nid, router.port,
-                                                router.uuid)
+                                lctl.disconnect(gw.net_type, gw.nid, gw.port,
+                                                gw.uuid)
                              except CommandError, e:
                                  print "disconnectAll failed: ", self.name
                                  e.dump()
@@ -1087,6 +1096,27 @@ class RouteTable(Module):
                  e.dump()
                  cleanup_error(e.rc)
  
+class Management(Module):
+    def __init__(self, db):
+        Module.__init__(self, 'MGMT', db)
+        self.add_lustre_module('obdclass', 'obdclass')
+        self.add_lustre_module('ptlrpc', 'ptlrpc')
+        self.add_lustre_module('ldlm', 'ldlm')
+        self.add_lustre_module('mgmt', 'mgmt_svc')
+
+    def prepare(self):
+        if is_prepared(self.name):
+            return
+        self.info()
+        lctl.newdev(attach="mgmt %s %s" % (self.name, self.uuid))
+
+    def safe_to_clean(self):
+        return 1
+
+    def cleanup(self):
+        if is_prepared(self.name):
+            Module.cleanup(self)
+
  class LDLM(Module):
      def __init__(self,db):
          Module.__init__(self, 'LDLM', db)
@@ -1109,7 +1139,7 @@ class LDLM(Module):
              Module.cleanup(self)
  
  class LOV(Module):
-    def __init__(self, db, uuid):
+    def __init__(self, db, uuid, fs_name):
          Module.__init__(self, 'LOV', db)
          self.add_lustre_module('mdc', 'mdc')
          self.add_lustre_module('lov', 'lov')
@@ -1123,11 +1153,12 @@ class LOV(Module):
          self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
          self.osclist = []
          self.client_uuid = generate_client_uuid(self.name)
+        self.fs_name = fs_name
          self.mdc_name = ''
-        self.mdc = get_mdc(db, self.client_uuid, self.name, self.mds_uuid)
+        self.mdc = get_mdc(db, self.client_uuid, fs_name, self.mds_uuid)
          for obd_uuid in self.devlist:
              obd = self.db.lookup(obd_uuid)
-            osc = get_osc(obd, self.client_uuid, self.name)
+            osc = get_osc(obd, self.client_uuid, fs_name)
              if osc:
                  self.osclist.append(osc)
              else:
@@ -1142,7 +1173,7 @@ class LOV(Module):
                  # isn't implemented here yet.
                  osc.prepare(ignore_connect_failure=0)
              except CommandError, e:
-                print "Error preparing OSC %s (inactive)\n" % osc.uuid
+                print "Error preparing OSC %s\n" % osc.uuid
                  raise e
          self.mdc.prepare()
          self.mdc_name = self.mdc.name
@@ -1156,7 +1187,7 @@ class LOV(Module):
              Module.cleanup(self)
          for osc in self.osclist:
              osc.cleanup()
-        mdc = get_mdc(self.db, self.client_uuid, self.name, self.mds_uuid)
+        mdc = get_mdc(self.db, self.client_uuid, self.fs_name, self.mds_uuid)
          mdc.cleanup()
  
      def load_module(self):
@@ -1172,12 +1203,12 @@ class LOV(Module):
              break
  
  class LOVConfig(Module):
-    def __init__(self,db):
+    def __init__(self, db):
          Module.__init__(self, 'LOVConfig', db)
  
          self.lov_uuid = self.db.get_first_ref('lov')
          l = self.db.lookup(self.lov_uuid)
-        self.lov = LOV(l, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
+        self.lov = LOV(l, "YOU_SHOULD_NEVER_SEE_THIS_UUID", '')
          
      def prepare(self):
          lov = self.lov
@@ -1410,9 +1441,20 @@ class OSD(Module):
          if not self.osdtype == 'obdecho':
              clean_loop(self.devpath)
  
+def mgmt_uuid_for_fs(mtpt_name):
+    if not mtpt_name:
+        return ''
+    mtpt_db = toplevel.lookup_name(mtpt_name)
+    fs_uuid = mtpt_db.get_first_ref('filesystem')
+    fs = toplevel.lookup(fs_uuid)
+    if not fs:
+        return ''
+    return fs.get_first_ref('mgmt')
+
  # Generic client module, used by OSC and MDC
  class Client(Module):
-    def __init__(self, tgtdb, uuid, module, owner):
+    def __init__(self, tgtdb, uuid, module, fs_name, self_name=None,
+                 module_dir=None):
          self.target_name = tgtdb.getName()
          self.target_uuid = tgtdb.getUUID()
          self.db = tgtdb
@@ -1427,11 +1469,22 @@ class Client(Module):
  
          self.module = module
          self.module_name = string.upper(module)
-        self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
-                                     self.target_name, owner)
+        if not self_name:
+            self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
+                                         self.target_name, fs_name)
+        else:
+            self.name = self_name
          self.uuid = uuid
          self.lookup_server(self.tgt_dev_uuid)
-        self.add_lustre_module(module, module)
+        mgmt_uuid = mgmt_uuid_for_fs(fs_name)
+        if mgmt_uuid:
+            self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid)
+        else:
+            self.mgmt_name = ''
+        self.fs_name = fs_name
+        if not module_dir:
+            module_dir = module
+        self.add_lustre_module(module_dir, module)
  
      def lookup_server(self, srv_uuid):
          """ Lookup a server's network information """
@@ -1461,7 +1514,8 @@ class Client(Module):
                  raise e
          if srv:
              lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
-                        setup ="%s %s" %(self.target_uuid, srv.uuid))
+                        setup ="%s %s %s" % (self.target_uuid, srv.uuid,
+                                             self.mgmt_name))
  
      def cleanup(self):
          if is_prepared(self.name):
@@ -1473,7 +1527,7 @@ class Client(Module):
                  else:
                      srv, r =  find_route(self.get_servers())
                      if srv:
-                        lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
+                        lctl.del_route_host(r[0], srv.uuid, r[1], r[3])
              except CommandError, e:
                  log(self.module_name, "cleanup failed: ", self.name)
                  e.dump()
@@ -1481,13 +1535,22 @@ class Client(Module):
  
  
  class MDC(Client):
-    def __init__(self, db, uuid, owner):
-         Client.__init__(self, db, uuid, 'mdc', owner)
+    def __init__(self, db, uuid, fs_name):
+         Client.__init__(self, db, uuid, 'mdc', fs_name)
+
  
  class OSC(Client):
-    def __init__(self, db, uuid, owner):
-         Client.__init__(self, db, uuid, 'osc', owner)
+    def __init__(self, db, uuid, fs_name):
+         Client.__init__(self, db, uuid, 'osc', fs_name)
  
+def mgmtcli_name_for_uuid(uuid):
+    return 'MGMTCLI_%s' % uuid
+
+class ManagementClient(Client):
+    def __init__(self, db, uuid):
+        Client.__init__(self, db, uuid, 'mgmt_cli', '',
+                        self_name = mgmtcli_name_for_uuid(db.getUUID()),
+                        module_dir = 'mgmt')
              
  class COBD(Module):
      def __init__(self, db):
@@ -1509,12 +1572,12 @@ class COBD(Module):
  
  # virtual interface for  OSC and LOV
  class VOSC(Module):
-    def __init__(self, db, uuid, owner):
+    def __init__(self, db, uuid, fs_name):
          Module.__init__(self, 'VOSC', db)
          if db.get_class() == 'lov':
-            self.osc = LOV(db, uuid)
+            self.osc = LOV(db, uuid, fs_name)
          else:
-            self.osc = get_osc(db, uuid, owner)
+            self.osc = get_osc(db, uuid, fs_name)
      def get_uuid(self):
          return self.osc.uuid
      def get_name(self):
@@ -1560,10 +1623,12 @@ class ECHO_CLIENT(Module):
      def load_module(self):
          self.osc.load_module()
          Module.load_module(self)
+
      def cleanup_module(self):
          Module.cleanup_module(self)
          self.osc.cleanup_module()
  
+
  def generate_client_uuid(name):
          client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
                                                 name,
@@ -1571,6 +1636,7 @@ def generate_client_uuid(name):
                                                 int(random.random() * 1048576))
          return client_uuid[:36]
  
+
  class Mountpoint(Module):
      def __init__(self,db):
          Module.__init__(self, 'MTPT', db)
@@ -1579,6 +1645,7 @@ class Mountpoint(Module):
          fs = self.db.lookup(self.fs_uuid)
          self.mds_uuid = fs.get_first_ref('mds')
          self.obd_uuid = fs.get_first_ref('obd')
+        self.mgmt_uuid = fs.get_first_ref('mgmt')
          obd = self.db.lookup(self.obd_uuid)
          client_uuid = generate_client_uuid(self.name)
          self.vosc = VOSC(obd, client_uuid, self.name)
@@ -1586,12 +1653,18 @@ class Mountpoint(Module):
              self.add_lustre_module('mdc', 'mdc')
              self.mdc = get_mdc(db, client_uuid, self.name, self.mds_uuid)
          self.add_lustre_module('llite', 'llite')
-
+        if self.mgmt_uuid:
+            self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid),
+                                            client_uuid)
+        else:
+            self.mgmtcli = None
  
      def prepare(self):
          if fs_is_mounted(self.path):
              log(self.path, "already mounted.")
              return
+        if self.mgmtcli:
+            self.mgmtcli.prepare()
          self.vosc.prepare()
          if self.vosc.need_mdc():
              self.mdc.prepare()
@@ -1632,13 +1705,20 @@ class Mountpoint(Module):
          self.vosc.cleanup()
          if self.vosc.need_mdc():
              self.mdc.cleanup()
+        if self.mgmtcli:
+            self.mgmtcli.cleanup()
  
      def load_module(self):
+        if self.mgmtcli:
+            self.mgmtcli.load_module()
          self.vosc.load_module()
          Module.load_module(self)
+
      def cleanup_module(self):
          Module.cleanup_module(self)
          self.vosc.cleanup_module()
+        if self.mgmtcli:
+            self.mgmtcli.cleanup_module()
  
  
  # ============================================================
@@ -1670,6 +1750,8 @@ def getServiceLevel(self):
          ret = 6
      elif type in ('ldlm',):
          ret = 20
+    elif type in ('mgmt',):
+        ret = 25
      elif type in ('osd', 'cobd'):
          ret = 30
      elif type in ('mdsdev',):
@@ -1707,15 +1789,15 @@ def getServices(self):
  #
  # OSC is no longer in the xml, so we have to fake it.
  # this is getting ugly and begging for another refactoring
-def get_osc(ost_db, uuid, owner):
-    osc = OSC(ost_db, uuid, owner)
+def get_osc(ost_db, uuid, fs_name):
+    osc = OSC(ost_db, uuid, fs_name)
      return osc
  
-def get_mdc(db, uuid, owner, mds_uuid):
+def get_mdc(db, uuid, fs_name, mds_uuid):
      mds_db = db.lookup(mds_uuid);
      if not mds_db:
          panic("no mds:", mds_uuid)
-    mdc = MDC(mds_db, uuid, owner)
+    mdc = MDC(mds_db, uuid, fs_name)
      return mdc
  
  ############################################################
@@ -1842,6 +1924,8 @@ def newService(db):
          n = Mountpoint(db)
      elif type == 'echoclient':
          n = ECHO_CLIENT(db)
+    elif type == 'mgmt':
+        n = Management(db)
      else:
          panic ("unknown service type:", type)
      return n
@@ -2060,7 +2144,7 @@ def sys_set_ptldebug():
  def sys_set_subsystem():
      if config.subsystem != None:
          try:
-            val = eval(config.ptldebug, ptldebug_names)
+            val = eval(config.subsystem, subsystem_names)
              val = "0x%x" % (val,)
              sysctl('portals/subsystem_debug', val)
          except NameError, e:
@@ -2191,7 +2275,7 @@ lconf_options = [
      ]      
  
  def main():
-    global lctl, config
+    global lctl, config, toplevel
  
      # in the upcall this is set to SIG_IGN
      signal.signal(signal.SIGCHLD, signal.SIG_DFL)
@@ -2241,9 +2325,12 @@ def main():
          dn = "config=%s,fs=lustre" % (config.config)
          db = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
      else:
-        cl.usage()
+        print 'Missing config file or ldap URL.'
+        print 'see lconf --help for command summary'
          sys.exit(1)
  
+    toplevel = db
+
      ver = db.get_version()
      if not ver:
          panic("No version found in config data, please recreate.")
@@ -2277,6 +2364,8 @@ if __name__ == "__main__":
          main()
      except Lustre.LconfError, e:
          print e
+#        traceback.print_exc(file=sys.stdout)
+        sys.exit(1)
      except CommandError, e:
          e.dump()
          sys.exit(e.rc)
diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c

index 80cdcf2..a4681ec 100644 (file)
--- a/lustre/utils/lctl.c
+++ b/lustre/utils/lctl.c
@@ -205,6 +205,9 @@ command_t cmdlist[] = {
          {"debug_kernel", jt_dbg_debug_kernel, 0,
           "get debug buffer and dump to a file"
           "usage: debug_kernel [file] [raw]"},
+        {"dk", jt_dbg_debug_kernel, 0,
+         "get debug buffer and dump to a file"
+         "usage: dk [file] [raw]"},
          {"debug_file", jt_dbg_debug_file, 0,
           "read debug buffer from input and dump to output"
           "usage: debug_file <input> [output] [raw]"},
diff --git a/lustre/utils/lfind.c b/lustre/utils/lfind.c

index 847dd4f..45f837c 100644 (file)
--- a/lustre/utils/lfind.c
+++ b/lustre/utils/lfind.c
@@ -21,38 +21,38 @@
  #define MAX_LOV_UUID_COUNT     1000
  #define OBD_NOT_FOUND          (-1)
  
-char *         cmd;
-struct option  longOpts[] = {
+char           *cmd;
+struct option   longOpts[] = {
                         {"help", 0, 0, 'h'},
                         {"obd", 1, 0, 'o'},
                         {"query", 0, 0, 'q'},
                         {"verbose", 0, 0, 'v'},
                         {0, 0, 0, 0}
-               };
-int            query;
-int            verbose;
-char *         shortOpts = "ho:qv";
-char *         usageMsg = "[ --obd <obd uuid> | --query ] <dir|file> ...";
-
-int            max_ost_count = MAX_LOV_UUID_COUNT;
-struct obd_uuid *      obduuid;
-char *         buf;
-int            buflen;
-struct obd_uuid *      uuids;
+                };
+int             query;
+int             verbose;
+char            shortOpts[] = "ho:qv";
+char            usageMsg[] = "[ --obd <obd uuid> | --query ] <dir|file> ...";
+
+int             max_ost_count = MAX_LOV_UUID_COUNT;
+struct obd_uuid *obduuid;
+char           *buf;
+int             buflen;
+struct obd_uuid *uuids;
  struct obd_ioctl_data data;
-struct lov_desc desc;
-int            uuidslen;
-int            cfglen;
+struct lov_desc  desc;
+int             uuidslen;
+int             cfglen;
  struct lov_mds_md *lmm;
-int            lmmlen;
+int             lmmlen;
+int             printed_UUIDs;
  
  void   init();
  void   usage(FILE *stream);
  void   errMsg(char *fmt, ...);
-void   processPath(const char *path);
+void   processPath(char *path);
  
-int
-main (int argc, char **argv) {
+int main (int argc, char **argv) {
         int c;
  
         cmd = basename(argv[0]);
@@ -61,8 +61,8 @@ main (int argc, char **argv) {
                 switch (c) {
                 case 'o':
                         if (obduuid) {
-                               errMsg("obd '%s' already specified: '%s'.",
-                                       obduuid, optarg);
+                               printf("obd '%s' already specified: '%s'\n",
+                                       obduuid->uuid, optarg);
                                 exit(1);
                         }
  
@@ -81,7 +81,7 @@ main (int argc, char **argv) {
                         usage(stderr);
                         exit(1);
                 default:
-                       errMsg("Internal error. Valid '%s' unrecognized.",
+                       printf("Internal error. Valid '%s' unrecognized\n",
                                 argv[optind - 1]);
                         usage(stderr);
                         exit(1);
@@ -105,8 +105,7 @@ main (int argc, char **argv) {
         exit (0);
  }
  
-void
-init()
+void init()
  {
         int datalen, desclen;
  
@@ -141,8 +140,7 @@ init()
         }
  
         if ((buf = malloc(buflen)) == NULL) {
-               errMsg("Unable to allocate %d bytes of memory for ioctl's.",
-                       buflen);
+               errMsg("Unable to allocate %d bytes of memory for ioctl's");
                 exit(1);
         }
  
@@ -150,112 +148,120 @@ init()
         uuids = (struct obd_uuid *)buf;
  }
  
-void
-usage(FILE *stream)
+void usage(FILE *stream)
  {
         fprintf(stream, "usage: %s %s\n", cmd, usageMsg);
  }
  
-void
-errMsg(char *fmt, ...)
+void errMsg(char *fmt, ...)
  {
         va_list args;
+       int tmp_errno = errno;
  
         fprintf(stderr, "%s: ", cmd);
         va_start(args, fmt);
         vfprintf(stderr, fmt, args);
         va_end(args);
-       fprintf(stderr, "\n");
+       fprintf(stderr, ": %s (%d)\n", strerror(tmp_errno), tmp_errno);
  }
  
-void
-processPath(const char *path)
+void processPath(char *path)
  {
         int fd;
         int rc;
         int i;
-       int obdindex;
+       int obdindex = OBD_NOT_FOUND;
         int obdcount;
         struct obd_uuid *uuidp;
+       char *fname, *dirname;
  
-       if (query || verbose && !obduuid) {
+       if ((query || verbose) && !obduuid) {
                 printf("%s\n", path);
         }
  
-       if ((fd = open(path, O_RDONLY | O_LOV_DELAY_CREATE)) < 0) {
-               errMsg("open \"%.20s\" failed.", path);
-               perror("open");
+       fname = strrchr(path, '/');
+       if (fname != NULL && fname[1] != '\0') {
+               *fname = '\0';
+               fname++;
+               dirname = path;
+       } else if (fname != NULL && fname[1] == '\0') {
+               printf("need getdents support\n");
                 return;
+       } else {
+               dirname = ".";
+               fname = path;
         }
  
-       memset(&data, 0, sizeof(data));
-        data.ioc_inllen1 = sizeof(desc);
-        data.ioc_inlbuf1 = (char *)&desc;
-        data.ioc_inllen2 = uuidslen;
-        data.ioc_inlbuf2 = (char *)uuids;
+       if ((fd = open(dirname, O_RDONLY)) < 0) {
+               errMsg("open \"%.20s\" failed", dirname);
+               return;
+       }
  
-        memset(&desc, 0, sizeof(desc));
-        desc.ld_tgt_count = max_ost_count;
+       if (!printed_UUIDs) {
+               memset(&data, 0, sizeof(data));
+               data.ioc_inllen1 = sizeof(desc);
+               data.ioc_inlbuf1 = (char *)&desc;
+               data.ioc_inllen2 = uuidslen;
+               data.ioc_inlbuf2 = (char *)uuids;
  
-        if (obd_ioctl_pack(&data, &buf, buflen)) {
-                errMsg("internal buffering error.");
-               exit(1);
-        }
+               memset(&desc, 0, sizeof(desc));
+               desc.ld_tgt_count = max_ost_count;
+
+               if (obd_ioctl_pack(&data, &buf, buflen)) {
+                       errMsg("internal buffering error");
+                       exit(1);
+               }
  
-        rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf);
-        if (rc) {
-               if (errno == ENOTTY) {
-                       if (!obduuid) {
-                               printf("Not a regular file or not Lustre file.\n\n");
+               rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf);
+               if (rc) {
+                       if (errno == ENOTTY) {
+                               if (!obduuid) {
+                                       errMsg("error getting LOV config");
+                               }
+                               return;
                         }
-                       return;
+                       errMsg("OBD_IOC_LOV_GET_CONFIG ioctl failed: %s");
+                       exit(1);
                 }
-               errMsg("OBD_IOC_LOV_GET_CONFIG ioctl failed: %d.", errno);
-               perror("ioctl");
-               exit(1);
-        }
  
-       if (obd_ioctl_unpack(&data, buf, buflen)) {
-               errMsg("Invalid reply from ioctl.");
-                exit(1);
-       }
+               if (obd_ioctl_unpack(&data, buf, buflen)) {
+                       errMsg("Invalid reply from ioctl");
+                       exit(1);
+               }
  
-        obdcount = desc.ld_tgt_count;
-       if (obdcount == 0)
-               return;
+               obdcount = desc.ld_tgt_count;
+               if (obdcount == 0)
+                       return;
  
-       obdindex = OBD_NOT_FOUND;
+               obdindex = OBD_NOT_FOUND;
  
-       if (obduuid) {
-               for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) {
-                       if (strncmp((const char *)obduuid, (const char *)uuidp,
-                                   sizeof(*uuidp)) == 0) {
-                               obdindex = i;
+               if (obduuid) {
+                       for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) {
+                               if (strncmp((char *)obduuid, (char *)uuidp,
+                                       sizeof(*uuidp)) == 0) {
+                                       obdindex = i;
+                               }
                         }
-               }
  
-               if (obdindex == OBD_NOT_FOUND)
-                       return;
-       } else  if (query || verbose) {
-               printf("OBDS:\n");
-               for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++)
-                       printf("%4d: %s\n", i, (char *)uuidp);
+                       if (obdindex == OBD_NOT_FOUND)
+                               return;
+               } else if (query || verbose) {
+                       printf("OBDS:\n");
+                       for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++)
+                               printf("%4d: %s\n", i, (char *)uuidp);
+               }
+               printed_UUIDs = 1;
         }
  
-       memset((void *)buf, 0, buflen);
-       lmm->lmm_magic = LOV_MAGIC;
-        lmm->lmm_ost_count = max_ost_count;
-
-       rc = ioctl(fd, LL_IOC_LOV_GETSTRIPE, (void *)lmm);
+       strcpy((char *)lmm, fname);
+       rc = ioctl(fd, IOC_MDC_GETSTRIPE, (void *)lmm);
         if (rc) {
                 if (errno == ENODATA) {
-                       if(!obduuid) {
-                               printf("Has no stripe information.\n\n");
-                       }
+                       if (!obduuid)
+                               printf("Has no stripe information.\n");
                 }
                 else {
-                       errMsg("LL_IOC_LOV_GETSTRIPE ioctl failed. %d", errno);
-                       perror("ioctl");
+                       errMsg("IOC_MDC_GETSTRIPE ioctl failed");
                 }
                 return;
         }
diff --git a/lustre/utils/lmc b/lustre/utils/lmc

index eaaed71..1a1bbc9 100755 (executable)
--- a/lustre/utils/lmc
+++ b/lustre/utils/lmc
@@ -19,9 +19,9 @@
  #
  
  """
-lmc - lustre configurtion data  manager
+lmc - lustre configuration data manager
  
-  See lustre book for documentation for lmc.
+  See Lustre book (http://www.lustre.org/docs/lustre.pdf) for documentation on lmc.
  
  """
  
@@ -98,6 +98,10 @@ Object creation command summary:
    --path /mnt/point
    --mds mds_name
    --ost ost_name OR --lov lov_name
+
+--add mgmt  - Management/monitoring service
+  --node node_name
+  --mgmt mgmt_service_name
  """
  
  PARAM = Lustre.Options.PARAM
@@ -123,10 +127,10 @@ lmc_options = [
      # network 
      ('nettype', "Specify the network type. This can be tcp/elan/gm/scimac.", PARAM),
      ('nid', "Give the network ID, e.g ElanID/IP Address as used by portals.", PARAM),
-    ('tcpbuf', "Optional arguement to specify the TCP buffer size.", PARAM, "0"),
-    ('port', "Optional arguement to specify the TCP port number.", PARAM, DEFAULT_PORT),
-    ('nid_exchange', "Optional arguement to indicate if nid exchange should be done.", PARAM, 0),
-    ('irq_affinity', "Optional arguement.", PARAM, 0),
+    ('tcpbuf', "Optional argument to specify the TCP buffer size.", PARAM, "0"),
+    ('port', "Optional argument to specify the TCP port number.", PARAM, DEFAULT_PORT),
+    ('nid_exchange', "Optional argument to indicate if nid exchange should be done.", PARAM, 0),
+    ('irq_affinity', "Optional argument.", PARAM, 0),
      ('hostaddr', "", PARAM,""),
      ('cluster_id', "Specify the cluster ID", PARAM, "0"),
  
@@ -143,12 +147,12 @@ lmc_options = [
      ('mds', "Specify MDS name.", PARAM),
      ('ost', "Specify the OST name.", PARAM,""),
      ('osdtype', "This could obdfilter or obdecho.", PARAM, "obdfilter"),
-    ('failover', ""),
+    ('failover', "Enable failover support on OSTs or MDS?"),
      ('group', "", PARAM),
      ('dev', "Path of the device on local system.", PARAM,""),
      ('size', "Specify the size of the device if needed.", PARAM,"0"),
      ('journal_size', "Specify new journal size for underlying ext3 file system.", PARAM,"0"),
-    ('fstype', "Optional arguement to specify the filesystem type.", PARAM, "ext3"),
+    ('fstype', "Optional argument to specify the filesystem type.", PARAM, "ext3"),
      ('ostuuid', "", PARAM,""),
      ('nspath', "Local mount point of server namespace.", PARAM,""),
      ('format', ""),
@@ -167,6 +171,8 @@ lmc_options = [
      # cobd
      ('real_obd', "", PARAM),
      ('cache_obd', "", PARAM),
+
+    ('mgmt', "Specify management/monitoring service name.", PARAM, ""),
      ]
  
  def error(*args):
@@ -393,16 +399,25 @@ class GenConfig:
          mdd.appendChild(self.ref("target", mds_uuid))
          return mdd
  
+    def mgmt(self, mgmt_name, mgmt_uuid, node_uuid):
+        mgmt = self.newService("mgmt", mgmt_name, mgmt_uuid)
+        mgmt.appendChild(self.ref("node", node_uuid))
+        # Placeholder until mgmt-service failover.
+        mgmt.appendChild(self.ref("active", mgmt_uuid))
+        return mgmt
+
      def mountpoint(self, name, uuid, fs_uuid, path):
          mtpt = self.newService("mountpoint", name, uuid)
          mtpt.appendChild(self.ref("filesystem", fs_uuid))
          self.addElement(mtpt, "path", path)
          return mtpt
  
-    def filesystem(self, name, uuid, mds_uuid, obd_uuid):
+    def filesystem(self, name, uuid, mds_uuid, obd_uuid, mgmt_uuid):
          fs = self.newService("filesystem", name, uuid)
          fs.appendChild(self.ref("mds", mds_uuid))
          fs.appendChild(self.ref("obd", obd_uuid))
+        if mgmt_uuid:
+            fs.appendChild(self.ref("mgmt", mgmt_uuid))
          return fs
          
      def echo_client(self, name, uuid, osc_uuid):
@@ -660,6 +675,23 @@ def add_mds(gen, lustre, options):
      lustre.appendChild(mdd)
                     
  
+def add_mgmt(gen, lustre, options):
+    node_name = get_option(options, 'node')
+    node_uuid = name2uuid(lustre, node_name)
+    mgmt_name = get_option(options, 'mgmt')
+    if not mgmt_name:
+        mgmt_name = new_name('MGMT_' + node_name)
+    mgmt_uuid = name2uuid(lustre, mgmt_name, fatal=0)
+    if not mgmt_uuid:
+        mgmt_uuid = new_uuid(mgmt_name)
+        mgmt = gen.mgmt(mgmt_name, mgmt_uuid, node_uuid)
+        lustre.appendChild(mgmt)
+    else:
+        mgmt = lookup(lustre, mgmt_uuid)
+
+    node = findByName(lustre, node_name, "node")
+    node_add_profile(gen, node, 'mgmt', mgmt_uuid)
+
  def add_ost(gen, lustre, options):
      node_name = get_option(options, 'node')
      lovname = get_option(options, 'lov')
@@ -793,23 +825,27 @@ def add_lov(gen, lustre, options):
      lovconfig = gen.lovconfig(lovconfig_name, lovconfig_uuid, uuid)
      lustre.appendChild(lovconfig)
  
-def new_filesystem(gen, lustre, mds_uuid, obd_uuid):
+def new_filesystem(gen, lustre, mds_uuid, obd_uuid, mgmt_uuid):
      fs_name = new_name("FS_fsname")
      fs_uuid = new_uuid(fs_name)
      mds = lookup(lustre, mds_uuid)
      mds.appendChild(gen.ref("filesystem", fs_uuid))
-    fs = gen.filesystem(fs_name, fs_uuid, mds_uuid, obd_uuid)
+    fs = gen.filesystem(fs_name, fs_uuid, mds_uuid, obd_uuid, mgmt_uuid)
      lustre.appendChild(fs)
      return fs_uuid
  
-def get_fs_uuid(gen, lustre, mds_name, obd_name):
+def get_fs_uuid(gen, lustre, mds_name, obd_name, mgmt_name):
      mds_uuid = name2uuid(lustre, mds_name, tag='mds')
      obd_uuid = name2uuid(lustre, obd_name, tag='lov', fatal=0)
      if not obd_uuid:
          obd_uuid = name2uuid(lustre, obd_name, tag='ost', fatal=1)
+    if mgmt_name:
+        mgmt_uuid = name2uuid(lustre, mgmt_name, tag='mgmt', fatal=1)
+    else:
+        mgmt_uuid = ''
      fs_uuid = lookup_filesystem(lustre, mds_uuid, obd_uuid)
      if not fs_uuid:
-        fs_uuid = new_filesystem(gen, lustre, mds_uuid, obd_uuid)
+        fs_uuid = new_filesystem(gen, lustre, mds_uuid, obd_uuid, mgmt_uuid)
      return fs_uuid
      
  def add_mtpt(gen, lustre, options):
@@ -825,7 +861,8 @@ def add_mtpt(gen, lustre, options):
              lov_name = get_option(options, 'ost')
              if lov_name == '':
                  error("--add mtpt requires either --filesystem or --mds with an  --lov lov_name or --ost ost_name")
-        fs_uuid = get_fs_uuid(gen, lustre, mds_name, lov_name)
+        mgmt_name = get_option(options, 'mgmt')
+        fs_uuid = get_fs_uuid(gen, lustre, mds_name, lov_name, mgmt_name)
      else:
          fs_uuid = name2uuid(lustre, fs_name, tag='filesystem')
  
@@ -910,6 +947,8 @@ def add(devtype, gen, lustre, options):
          add_echo_client(gen, lustre, options)
      elif devtype == 'cobd':
          add_cobd(gen, lustre, options)
+    elif devtype == 'mgmt':
+        add_mgmt(gen, lustre, options)
      else:
          error("unknown device type:", devtype)
      
diff --git a/lustre/utils/obdbarrier.c b/lustre/utils/obdbarrier.c

index 4373071..8774cef 100644 (file)
--- a/lustre/utils/obdbarrier.c
+++ b/lustre/utils/obdbarrier.c
@@ -214,9 +214,9 @@ main (int argc, char **argv)
                  }
          }
  
-        free (b);
+        free(b);
  
-        obdio_disconnect (conn);
+        obdio_disconnect(conn, 0);
  
          return (rc == 0 ? 0 : 1);
  }
diff --git a/lustre/utils/obdio.c b/lustre/utils/obdio.c

index 8264761..24b9e2d 100644 (file)
--- a/lustre/utils/obdio.c
+++ b/lustre/utils/obdio.c
@@ -294,10 +294,10 @@ main (int argc, char **argv)
          if (conn == NULL)
                  return (1);
  
-        rc = obdio_test_fixed_extent (conn, myhid, mypid, reps, locked,
-                                      oid, base_offset, size);
+        rc = obdio_test_fixed_extent(conn, myhid, mypid, reps, locked,
+                                     oid, base_offset, size);
  
-        obdio_disconnect (conn);
+        obdio_disconnect(conn, 0);
  
          return (rc == 0 ? 0 : 1);
  }
diff --git a/lustre/utils/obdiolib.c b/lustre/utils/obdiolib.c

index c871818..04dae88 100644 (file)
--- a/lustre/utils/obdiolib.c
+++ b/lustre/utils/obdiolib.c
@@ -116,7 +116,7 @@ obdio_connect (int device)
  }
  
  void
-obdio_disconnect (struct obdio_conn *conn)
+obdio_disconnect (struct obdio_conn *conn, int flags)
  {
          close (conn->oc_fd);
          /* obdclass will automatically close on last ref */
diff --git a/lustre/utils/obdiolib.h b/lustre/utils/obdiolib.h

index 3811b41..b2ec6b6 100644 (file)
--- a/lustre/utils/obdiolib.h
+++ b/lustre/utils/obdiolib.h
@@ -48,22 +48,24 @@ struct obdio_barrier {
  };
  
  extern struct obdio_conn * obdio_connect (int device);
-extern void obdio_disconnect (struct obdio_conn *conn);
-extern int obdio_open (struct obdio_conn *conn, uint64_t oid,
+extern void obdio_disconnect(struct obdio_conn *conn, int flags);
+extern int obdio_open(struct obdio_conn *conn, uint64_t oid,
+                      struct lustre_handle *fh);
+extern int obdio_close(struct obdio_conn *conn, uint64_t oid,
                         struct lustre_handle *fh);
-extern int obdio_close (struct obdio_conn *conn, uint64_t oid,
-                        struct lustre_handle *fh);
-extern int obdio_pread (struct obdio_conn *conn, uint64_t oid,
+extern int obdio_pread(struct obdio_conn *conn, uint64_t oid,
+                       char *buffer, uint32_t count, uint64_t offset);
+extern int obdio_pwrite(struct obdio_conn *conn, uint64_t oid,
                          char *buffer, uint32_t count, uint64_t offset);
-extern int obdio_pwrite (struct obdio_conn *conn, uint64_t oid,
-                         char *buffer, uint32_t count, uint64_t offset);
-extern int obdio_enqueue (struct obdio_conn *conn, uint64_t oid,
-                          int mode, uint64_t offset, uint32_t count,
-                          struct lustre_handle *lh);
-extern int obdio_cancel (struct obdio_conn *conn, struct lustre_handle *lh);
-extern void *obdio_alloc_aligned_buffer (void **spacep, int size);
-extern struct obdio_barrier *obdio_new_barrier (uint64_t oid, uint64_t id, int npeers) ;
-extern int obdio_setup_barrier (struct obdio_conn *conn, struct obdio_barrier *b);
-extern int obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b);
+extern int obdio_enqueue(struct obdio_conn *conn, uint64_t oid,
+                         int mode, uint64_t offset, uint32_t count,
+                         struct lustre_handle *lh);
+extern int obdio_cancel(struct obdio_conn *conn, struct lustre_handle *lh);
+extern void *obdio_alloc_aligned_buffer(void **spacep, int size);
+extern struct obdio_barrier *obdio_new_barrier(uint64_t oid, uint64_t id,
+                                               int npeers);
+extern int obdio_setup_barrier(struct obdio_conn *conn,
+                               struct obdio_barrier *b);
+extern int obdio_barrier(struct obdio_conn *conn, struct obdio_barrier *b);
  
  #endif
diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c

index 5b6a589..86ae507 100644 (file)
--- a/lustre/utils/wirecheck.c
+++ b/lustre/utils/wirecheck.c
@@ -518,12 +518,12 @@ main (int argc, char **argv)
         CHECK_VALUE (REINT_OPEN);
         CHECK_VALUE (REINT_MAX);
  
-       CHECK_VALUE (IT_INTENT_EXEC);
-       CHECK_VALUE (IT_OPEN_LOOKUP);
-       CHECK_VALUE (IT_OPEN_NEG);
-       CHECK_VALUE (IT_OPEN_POS);
-       CHECK_VALUE (IT_OPEN_CREATE);
-       CHECK_VALUE (IT_OPEN_OPEN);
+       CHECK_VALUE (DISP_IT_EXECD);
+       CHECK_VALUE (DISP_LOOKUP_EXECD);
+       CHECK_VALUE (DISP_LOOKUP_NEG);
+       CHECK_VALUE (DISP_LOOKUP_POS);
+       CHECK_VALUE (DISP_OPEN_CREATE);
+       CHECK_VALUE (DISP_OPEN_OPEN);
  
         CHECK_VALUE (MDS_STATUS_CONN);
         CHECK_VALUE (MDS_STATUS_LOV);
author	phil <phil>
	Fri, 25 Jul 2003 17:58:07 +0000 (17:58 +0000)
committer	phil <phil>
	Fri, 25 Jul 2003 17:58:07 +0000 (17:58 +0000)
lnet/.cvsignore		patch \| blob \| history
lnet/Kernelenv.in		patch \| blob \| history
lnet/Kernelenv.mk		patch \| blob \| history
lnet/Makefile.mk		patch \| blob \| history
lnet/archdep.m4		patch \| blob \| history
lnet/include/config.h.in		patch \| blob \| history
lnet/include/linux/kp30.h		patch \| blob \| history
lnet/include/linux/portals_compat25.h		patch \| blob \| history
lnet/include/lnet/internal.h		patch \| blob \| history
lnet/include/lnet/list.h		patch \| blob \| history
lnet/include/lnet/lltrace.h		patch \| blob \| history
lnet/include/lnet/myrnal.h		patch \| blob \| history
lnet/include/lnet/nal.h		patch \| blob \| history
lnet/include/lnet/ppid.h		patch \| blob \| history
lnet/include/lnet/stringtab.h		patch \| blob \| history
lnet/include/lnet/types.h		patch \| blob \| history
lnet/klnds/.cvsignore		patch \| blob \| history
lnet/klnds/Makefile.mk		patch \| blob \| history
lnet/klnds/gmlnd/gmnal.c		patch \| blob \| history
lnet/klnds/scimaclnd/scimacnal.c		patch \| blob \| history
lnet/klnds/socklnd/.cvsignore		patch \| blob \| history
lnet/klnds/socklnd/Makefile.mk		patch \| blob \| history
lnet/klnds/toelnd/toenal.c		patch \| blob \| history
lnet/klnds/toelnd/toenal_cb.c		patch \| blob \| history
lnet/libcfs/.cvsignore		patch \| blob \| history
lnet/libcfs/Makefile.mk		patch \| blob \| history
lnet/libcfs/debug.c		patch \| blob \| history
lnet/libcfs/module.c		patch \| blob \| history
lnet/lnet/.cvsignore		patch \| blob \| history
lnet/lnet/Makefile.mk		patch \| blob \| history
lnet/lnet/api-init.c		patch \| blob \| history
lnet/lnet/lib-move.c		patch \| blob \| history
lnet/router/.cvsignore		patch \| blob \| history
lnet/router/Makefile.mk		patch \| blob \| history
lnet/router/router.c		patch \| blob \| history
lnet/tests/.cvsignore		patch \| blob \| history
lnet/tests/ping_cli.c		patch \| blob \| history
lnet/tests/ping_srv.c		patch \| blob \| history
lnet/tests/sping_cli.c		patch \| blob \| history
lnet/tests/sping_srv.c		patch \| blob \| history
lnet/ulnds/debug.c		patch \| blob \| history
lnet/ulnds/socklnd/debug.c		patch \| blob \| history
lnet/utils/.cvsignore		patch \| blob \| history
lnet/utils/debug.c		patch \| blob \| history
lnet/utils/portals.c		patch \| blob \| history
lustre/.cvsignore		patch \| blob \| history
lustre/ChangeLog		patch \| blob \| history
lustre/Makefile.mk		patch \| blob \| history
lustre/cobd/cache_obd.c		patch \| blob \| history
lustre/cobd/lproc_cache.c		patch \| blob \| history
lustre/conf/lustre.dtd		patch \| blob \| history
lustre/configure.in		patch \| blob \| history
lustre/include/liblustre.h		patch \| blob \| history
lustre/include/linux/lprocfs_status.h		patch \| blob \| history
lustre/include/linux/lustre_compat25.h		patch \| blob \| history
lustre/include/linux/lustre_dlm.h		patch \| blob \| history
lustre/include/linux/lustre_export.h		patch \| blob \| history
lustre/include/linux/lustre_fsfilt.h		patch \| blob \| history
lustre/include/linux/lustre_idl.h		patch \| blob \| history
lustre/include/linux/lustre_lib.h		patch \| blob \| history
lustre/include/linux/lustre_lite.h		patch \| blob \| history
lustre/include/linux/lustre_mds.h		patch \| blob \| history
lustre/include/linux/lustre_net.h		patch \| blob \| history
lustre/include/linux/obd.h		patch \| blob \| history
lustre/include/linux/obd_class.h		patch \| blob \| history
lustre/include/linux/obd_lov.h		patch \| blob \| history
lustre/include/linux/obd_ost.h		patch \| blob \| history
lustre/include/linux/obd_support.h		patch \| blob \| history
lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch		patch \| blob \| history
lustre/kernel_patches/patches/export-truncate-2.5.63.patch		patch \| blob \| history
lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch		patch \| blob \| history
lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch		patch \| blob \| history
lustre/kernel_patches/patches/extN-misc-fixup.patch		patch \| blob \| history
lustre/kernel_patches/patches/extN-noread.patch		patch \| blob \| history
lustre/kernel_patches/patches/extN-wantedi.patch		patch \| blob \| history
lustre/kernel_patches/patches/iopen-2.4.18.patch		patch \| blob \| history
lustre/kernel_patches/patches/iopen-2.4.20.patch		patch \| blob \| history
lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch		patch \| blob \| history
lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch		patch \| blob \| history
lustre/kernel_patches/patches/lustre_version.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch		patch \| blob \| history
lustre/kernel_patches/pc/ext3-delete_thread-2.4.18.pc		patch \| blob \| history
lustre/kernel_patches/pc/ext3-delete_thread-2.4.20.pc		patch \| blob \| history
lustre/kernel_patches/pc/extN-wantedi.pc		patch \| blob \| history
lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc		patch \| blob \| history
lustre/kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc		patch \| blob \| history
lustre/kernel_patches/scripts/patchfns		patch \| blob \| history
lustre/kernel_patches/series/hp-pnnl-2.4.20		patch \| blob \| history
lustre/kernel_patches/series/rh-2.4.20		patch \| blob \| history
lustre/kernel_patches/series/vanilla-2.4.20		patch \| blob \| history
lustre/kernel_patches/which_patch		patch \| blob \| history
lustre/ldlm/.cvsignore		patch \| blob \| history
lustre/ldlm/ldlm_lib.c		patch \| blob \| history
lustre/ldlm/ldlm_lock.c		patch \| blob \| history
lustre/ldlm/ldlm_lockd.c		patch \| blob \| history
lustre/ldlm/ldlm_request.c		patch \| blob \| history
lustre/ldlm/ldlm_resource.c		patch \| blob \| history
lustre/liblustre/file.c		patch \| blob \| history
lustre/liblustre/super.c		patch \| blob \| history
lustre/llite/.cvsignore		patch \| blob \| history
lustre/llite/Makefile.am		patch \| blob \| history
lustre/llite/dcache.c		patch \| blob \| history
lustre/llite/dir.c		patch \| blob \| history
lustre/llite/file.c		patch \| blob \| history
lustre/llite/iod.c		patch \| blob \| history
lustre/llite/llite_internal.h		patch \| blob \| history
lustre/llite/lproc_llite.c		patch \| blob \| history
lustre/llite/namei.c		patch \| blob \| history
lustre/llite/rw.c		patch \| blob \| history
lustre/llite/super.c		patch \| blob \| history
lustre/llite/super25.c		patch \| blob \| history
lustre/llite/symlink.c		patch \| blob \| history
lustre/lov/.cvsignore		patch \| blob \| history
lustre/lov/Makefile.am		patch \| blob \| history
lustre/lov/lov_obd.c		patch \| blob \| history
lustre/lov/lov_pack.c		patch \| blob \| history
lustre/lov/lproc_lov.c		patch \| blob \| history
lustre/mdc/.cvsignore		patch \| blob \| history
lustre/mdc/lproc_mdc.c		patch \| blob \| history
lustre/mdc/mdc_internal.h		patch \| blob \| history
lustre/mdc/mdc_lib.c		patch \| blob \| history
lustre/mdc/mdc_reint.c		patch \| blob \| history
lustre/mdc/mdc_request.c		patch \| blob \| history
lustre/mds/.cvsignore		patch \| blob \| history
lustre/mds/Makefile.mk		patch \| blob \| history
lustre/mds/handler.c		patch \| blob \| history
lustre/mds/lproc_mds.c		patch \| blob \| history
lustre/mds/mds_fs.c		patch \| blob \| history
lustre/mds/mds_internal.h		patch \| blob \| history
lustre/mds/mds_lib.c		patch \| blob \| history
lustre/mds/mds_lov.c		patch \| blob \| history
lustre/mds/mds_open.c		patch \| blob \| history
lustre/mds/mds_reint.c		patch \| blob \| history
lustre/obdclass/.cvsignore		patch \| blob \| history
lustre/obdclass/Makefile.am		patch \| blob \| history
lustre/obdclass/class_obd.c		patch \| blob \| history
lustre/obdclass/fsfilt.c		patch \| blob \| history
lustre/obdclass/fsfilt_ext3.c		patch \| blob \| history
lustre/obdclass/fsfilt_extN.c		patch \| blob \| history
lustre/obdclass/fsfilt_reiserfs.c		patch \| blob \| history
lustre/obdclass/lprocfs_status.c		patch \| blob \| history
lustre/obdclass/lustre_handles.c		patch \| blob \| history
lustre/obdclass/lustre_peer.c		patch \| blob \| history
lustre/obdclass/simple.c		patch \| blob \| history
lustre/obdclass/statfs_pack.c		patch \| blob \| history
lustre/obdecho/.cvsignore		patch \| blob \| history
lustre/obdecho/echo.c		patch \| blob \| history
lustre/obdecho/echo_client.c		patch \| blob \| history
lustre/obdecho/lproc_echo.c		patch \| blob \| history
lustre/obdfilter/.cvsignore		patch \| blob \| history
lustre/obdfilter/Makefile.am		patch \| blob \| history
lustre/obdfilter/filter.c		patch \| blob \| history
lustre/obdfilter/lproc_obdfilter.c		patch \| blob \| history
lustre/osc/.cvsignore		patch \| blob \| history
lustre/osc/lproc_osc.c		patch \| blob \| history
lustre/osc/osc_lib.c		patch \| blob \| history
lustre/osc/osc_request.c		patch \| blob \| history
lustre/ost/.cvsignore		patch \| blob \| history
lustre/ost/lproc_ost.c		patch \| blob \| history
lustre/ost/ost_handler.c		patch \| blob \| history
lustre/portals/.cvsignore		patch \| blob \| history
lustre/portals/Kernelenv.in		patch \| blob \| history
lustre/portals/Kernelenv.mk		patch \| blob \| history
lustre/portals/Makefile.mk		patch \| blob \| history
lustre/portals/archdep.m4		patch \| blob \| history
lustre/portals/include/config.h.in		patch \| blob \| history
lustre/portals/include/linux/kp30.h		patch \| blob \| history
lustre/portals/include/linux/portals_compat25.h		patch \| blob \| history
lustre/portals/include/portals/list.h		patch \| blob \| history
lustre/portals/include/portals/lltrace.h		patch \| blob \| history
lustre/portals/include/portals/myrnal.h		patch \| blob \| history
lustre/portals/include/portals/nal.h		patch \| blob \| history
lustre/portals/include/portals/ppid.h		patch \| blob \| history
lustre/portals/include/portals/stringtab.h		patch \| blob \| history
lustre/portals/include/portals/types.h		patch \| blob \| history
lustre/portals/knals/.cvsignore		patch \| blob \| history
lustre/portals/knals/Makefile.mk		patch \| blob \| history
lustre/portals/knals/gmnal/gmnal.c		patch \| blob \| history
lustre/portals/knals/scimacnal/scimacnal.c		patch \| blob \| history
lustre/portals/knals/socknal/.cvsignore		patch \| blob \| history
lustre/portals/knals/socknal/Makefile.mk		patch \| blob \| history
lustre/portals/knals/toenal/toenal.c		patch \| blob \| history
lustre/portals/knals/toenal/toenal_cb.c		patch \| blob \| history
lustre/portals/libcfs/.cvsignore		patch \| blob \| history
lustre/portals/libcfs/Makefile.mk		patch \| blob \| history
lustre/portals/libcfs/debug.c		patch \| blob \| history
lustre/portals/libcfs/module.c		patch \| blob \| history
lustre/portals/portals/.cvsignore		patch \| blob \| history
lustre/portals/portals/Makefile.mk		patch \| blob \| history
lustre/portals/portals/api-init.c		patch \| blob \| history
lustre/portals/portals/lib-move.c		patch \| blob \| history
lustre/portals/router/.cvsignore		patch \| blob \| history
lustre/portals/router/Makefile.mk		patch \| blob \| history
lustre/portals/router/router.c		patch \| blob \| history
lustre/portals/tests/.cvsignore		patch \| blob \| history
lustre/portals/tests/ping_cli.c		patch \| blob \| history
lustre/portals/tests/ping_srv.c		patch \| blob \| history
lustre/portals/tests/sping_cli.c		patch \| blob \| history
lustre/portals/tests/sping_srv.c		patch \| blob \| history
lustre/portals/unals/debug.c		patch \| blob \| history
lustre/portals/utils/.cvsignore		patch \| blob \| history
lustre/portals/utils/debug.c		patch \| blob \| history
lustre/portals/utils/portals.c		patch \| blob \| history
lustre/ptlbd/client.c		patch \| blob \| history
lustre/ptlbd/main.c		patch \| blob \| history
lustre/ptlbd/server.c		patch \| blob \| history
lustre/ptlrpc/.cvsignore		patch \| blob \| history
lustre/ptlrpc/Makefile.am		patch \| blob \| history
lustre/ptlrpc/client.c		patch \| blob \| history
lustre/ptlrpc/lproc_ptlrpc.c		patch \| blob \| history
lustre/ptlrpc/pack_generic.c		patch \| blob \| history
lustre/ptlrpc/pinger.c		patch \| blob \| history
lustre/ptlrpc/ptlrpc_internal.h		patch \| blob \| history
lustre/ptlrpc/ptlrpc_lib.c		patch \| blob \| history
lustre/ptlrpc/ptlrpc_module.c		patch \| blob \| history
lustre/ptlrpc/recover.c		patch \| blob \| history
lustre/ptlrpc/service.c		patch \| blob \| history
lustre/scripts/lustre.spec.in		patch \| blob \| history
lustre/tests/.cvsignore		patch \| blob \| history
lustre/tests/Makefile.am		patch \| blob \| history
lustre/tests/acceptance-metadata-double.sh		patch \| blob \| history
lustre/tests/acceptance-metadata-single.sh		patch \| blob \| history
lustre/tests/acceptance-small.sh		patch \| blob \| history
lustre/tests/cobd.sh		patch \| blob \| history
lustre/tests/create.pl		patch \| blob \| history
lustre/tests/directio.c		patch \| blob \| history
lustre/tests/echo.sh		patch \| blob \| history
lustre/tests/fsx.c		patch \| blob \| history
lustre/tests/leak_finder.pl		patch \| blob \| history
lustre/tests/lkcdmap		patch \| blob \| history
lustre/tests/llecho.sh		patch \| blob \| history
lustre/tests/llmount.sh		patch \| blob \| history
lustre/tests/local.sh		patch \| blob \| history
lustre/tests/lov.sh		patch \| blob \| history
lustre/tests/mount2.sh		patch \| blob \| history
lustre/tests/mount2fs.sh		patch \| blob \| history
lustre/tests/opendevunlink.c		patch \| blob \| history
lustre/tests/openfile.c		patch \| blob \| history
lustre/tests/openunlink.c		patch \| blob \| history
lustre/tests/recovery-cleanup.sh		patch \| blob \| history
lustre/tests/recovery-small.sh		patch \| blob \| history
lustre/tests/rename.pl		patch \| blob \| history
lustre/tests/runas.c		patch \| blob \| history
lustre/tests/rundbench		patch \| blob \| history
lustre/tests/runobdstat		patch \| blob \| history
lustre/tests/runregression-brw.sh		patch \| blob \| history
lustre/tests/runtests		patch \| blob \| history
lustre/tests/runvmstat		patch \| blob \| history
lustre/tests/sanity.sh		patch \| blob \| history
lustre/tests/sanityN.sh		patch \| blob \| history
lustre/tests/uml.sh		patch \| blob \| history
lustre/tests/utime.c		patch \| blob \| history
lustre/utils/.cvsignore		patch \| blob \| history
lustre/utils/Lustre/__init__.py		patch \| blob \| history
lustre/utils/lactive		patch \| blob \| history
lustre/utils/lconf		patch \| blob \| history
lustre/utils/lctl.c		patch \| blob \| history
lustre/utils/lfind.c		patch \| blob \| history
lustre/utils/lmc		patch \| blob \| history
lustre/utils/obdbarrier.c		patch \| blob \| history
lustre/utils/obdio.c		patch \| blob \| history
lustre/utils/obdiolib.c		patch \| blob \| history
lustre/utils/obdiolib.h		patch \| blob \| history
lustre/utils/wirecheck.c		patch \| blob \| history