Whamcloud - gitweb
merge b_devel into HEAD, which will become 0.7.3 0.8.0
authorphil <phil>
Fri, 25 Jul 2003 17:58:07 +0000 (17:58 +0000)
committerphil <phil>
Fri, 25 Jul 2003 17:58:07 +0000 (17:58 +0000)
 - dozens and dozens of fixes for working with 2.6 kernels
 - new 2.4 kernel APIs
 - uncountable bug fixes

265 files changed:
lnet/.cvsignore
lnet/Kernelenv.in
lnet/Kernelenv.mk
lnet/Makefile.mk
lnet/archdep.m4
lnet/include/config.h.in
lnet/include/linux/kp30.h
lnet/include/linux/portals_compat25.h
lnet/include/lnet/internal.h
lnet/include/lnet/list.h
lnet/include/lnet/lltrace.h
lnet/include/lnet/myrnal.h
lnet/include/lnet/nal.h
lnet/include/lnet/ppid.h
lnet/include/lnet/stringtab.h
lnet/include/lnet/types.h
lnet/klnds/.cvsignore
lnet/klnds/Makefile.mk
lnet/klnds/gmlnd/gmnal.c
lnet/klnds/scimaclnd/scimacnal.c
lnet/klnds/socklnd/.cvsignore
lnet/klnds/socklnd/Makefile.mk
lnet/klnds/toelnd/toenal.c
lnet/klnds/toelnd/toenal_cb.c
lnet/libcfs/.cvsignore
lnet/libcfs/Makefile.mk
lnet/libcfs/debug.c
lnet/libcfs/module.c
lnet/lnet/.cvsignore
lnet/lnet/Makefile.mk
lnet/lnet/api-init.c
lnet/lnet/lib-move.c
lnet/router/.cvsignore
lnet/router/Makefile.mk
lnet/router/router.c
lnet/tests/.cvsignore
lnet/tests/ping_cli.c
lnet/tests/ping_srv.c
lnet/tests/sping_cli.c
lnet/tests/sping_srv.c
lnet/ulnds/debug.c
lnet/ulnds/socklnd/debug.c
lnet/utils/.cvsignore
lnet/utils/debug.c
lnet/utils/portals.c
lustre/.cvsignore
lustre/ChangeLog
lustre/Makefile.mk
lustre/cobd/cache_obd.c
lustre/cobd/lproc_cache.c
lustre/conf/lustre.dtd
lustre/configure.in
lustre/include/liblustre.h
lustre/include/linux/lprocfs_status.h
lustre/include/linux/lustre_compat25.h
lustre/include/linux/lustre_dlm.h
lustre/include/linux/lustre_export.h
lustre/include/linux/lustre_fsfilt.h
lustre/include/linux/lustre_idl.h
lustre/include/linux/lustre_lib.h
lustre/include/linux/lustre_lite.h
lustre/include/linux/lustre_mds.h
lustre/include/linux/lustre_net.h
lustre/include/linux/obd.h
lustre/include/linux/obd_class.h
lustre/include/linux/obd_lov.h
lustre/include/linux/obd_ost.h
lustre/include/linux/obd_support.h
lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch
lustre/kernel_patches/patches/export-truncate-2.5.63.patch
lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch
lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch
lustre/kernel_patches/patches/extN-misc-fixup.patch
lustre/kernel_patches/patches/extN-noread.patch
lustre/kernel_patches/patches/extN-wantedi.patch
lustre/kernel_patches/patches/iopen-2.4.18.patch
lustre/kernel_patches/patches/iopen-2.4.20.patch
lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch
lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch
lustre/kernel_patches/patches/lustre_version.patch
lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch
lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch
lustre/kernel_patches/pc/ext3-delete_thread-2.4.18.pc
lustre/kernel_patches/pc/ext3-delete_thread-2.4.20.pc
lustre/kernel_patches/pc/extN-wantedi.pc
lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc
lustre/kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc
lustre/kernel_patches/scripts/patchfns
lustre/kernel_patches/series/hp-pnnl-2.4.20
lustre/kernel_patches/series/rh-2.4.20
lustre/kernel_patches/series/vanilla-2.4.20
lustre/kernel_patches/which_patch
lustre/ldlm/.cvsignore
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c
lustre/ldlm/ldlm_resource.c
lustre/liblustre/file.c
lustre/liblustre/super.c
lustre/llite/.cvsignore
lustre/llite/Makefile.am
lustre/llite/dcache.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/iod.c
lustre/llite/llite_internal.h
lustre/llite/lproc_llite.c
lustre/llite/namei.c
lustre/llite/rw.c
lustre/llite/super.c
lustre/llite/super25.c
lustre/llite/symlink.c
lustre/lov/.cvsignore
lustre/lov/Makefile.am
lustre/lov/lov_obd.c
lustre/lov/lov_pack.c
lustre/lov/lproc_lov.c
lustre/mdc/.cvsignore
lustre/mdc/lproc_mdc.c
lustre/mdc/mdc_internal.h
lustre/mdc/mdc_lib.c
lustre/mdc/mdc_reint.c
lustre/mdc/mdc_request.c
lustre/mds/.cvsignore
lustre/mds/Makefile.mk
lustre/mds/handler.c
lustre/mds/lproc_mds.c
lustre/mds/mds_fs.c
lustre/mds/mds_internal.h
lustre/mds/mds_lib.c
lustre/mds/mds_lov.c
lustre/mds/mds_open.c
lustre/mds/mds_reint.c
lustre/obdclass/.cvsignore
lustre/obdclass/Makefile.am
lustre/obdclass/class_obd.c
lustre/obdclass/fsfilt.c
lustre/obdclass/fsfilt_ext3.c
lustre/obdclass/fsfilt_extN.c
lustre/obdclass/fsfilt_reiserfs.c
lustre/obdclass/lprocfs_status.c
lustre/obdclass/lustre_handles.c
lustre/obdclass/lustre_peer.c
lustre/obdclass/simple.c
lustre/obdclass/statfs_pack.c
lustre/obdecho/.cvsignore
lustre/obdecho/echo.c
lustre/obdecho/echo_client.c
lustre/obdecho/lproc_echo.c
lustre/obdfilter/.cvsignore
lustre/obdfilter/Makefile.am
lustre/obdfilter/filter.c
lustre/obdfilter/lproc_obdfilter.c
lustre/osc/.cvsignore
lustre/osc/lproc_osc.c
lustre/osc/osc_lib.c
lustre/osc/osc_request.c
lustre/ost/.cvsignore
lustre/ost/lproc_ost.c
lustre/ost/ost_handler.c
lustre/portals/.cvsignore
lustre/portals/Kernelenv.in
lustre/portals/Kernelenv.mk
lustre/portals/Makefile.mk
lustre/portals/archdep.m4
lustre/portals/include/config.h.in
lustre/portals/include/linux/kp30.h
lustre/portals/include/linux/portals_compat25.h
lustre/portals/include/portals/list.h
lustre/portals/include/portals/lltrace.h
lustre/portals/include/portals/myrnal.h
lustre/portals/include/portals/nal.h
lustre/portals/include/portals/ppid.h
lustre/portals/include/portals/stringtab.h
lustre/portals/include/portals/types.h
lustre/portals/knals/.cvsignore
lustre/portals/knals/Makefile.mk
lustre/portals/knals/gmnal/gmnal.c
lustre/portals/knals/scimacnal/scimacnal.c
lustre/portals/knals/socknal/.cvsignore
lustre/portals/knals/socknal/Makefile.mk
lustre/portals/knals/toenal/toenal.c
lustre/portals/knals/toenal/toenal_cb.c
lustre/portals/libcfs/.cvsignore
lustre/portals/libcfs/Makefile.mk
lustre/portals/libcfs/debug.c
lustre/portals/libcfs/module.c
lustre/portals/portals/.cvsignore
lustre/portals/portals/Makefile.mk
lustre/portals/portals/api-init.c
lustre/portals/portals/lib-move.c
lustre/portals/router/.cvsignore
lustre/portals/router/Makefile.mk
lustre/portals/router/router.c
lustre/portals/tests/.cvsignore
lustre/portals/tests/ping_cli.c
lustre/portals/tests/ping_srv.c
lustre/portals/tests/sping_cli.c
lustre/portals/tests/sping_srv.c
lustre/portals/unals/debug.c
lustre/portals/utils/.cvsignore
lustre/portals/utils/debug.c
lustre/portals/utils/portals.c
lustre/ptlbd/client.c
lustre/ptlbd/main.c
lustre/ptlbd/server.c
lustre/ptlrpc/.cvsignore
lustre/ptlrpc/Makefile.am
lustre/ptlrpc/client.c
lustre/ptlrpc/lproc_ptlrpc.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/pinger.c
lustre/ptlrpc/ptlrpc_internal.h
lustre/ptlrpc/ptlrpc_lib.c
lustre/ptlrpc/ptlrpc_module.c
lustre/ptlrpc/recover.c
lustre/ptlrpc/service.c
lustre/scripts/lustre.spec.in
lustre/tests/.cvsignore
lustre/tests/Makefile.am
lustre/tests/acceptance-metadata-double.sh
lustre/tests/acceptance-metadata-single.sh
lustre/tests/acceptance-small.sh
lustre/tests/cobd.sh
lustre/tests/create.pl
lustre/tests/directio.c
lustre/tests/echo.sh
lustre/tests/fsx.c
lustre/tests/leak_finder.pl
lustre/tests/lkcdmap
lustre/tests/llecho.sh
lustre/tests/llmount.sh
lustre/tests/local.sh
lustre/tests/lov.sh
lustre/tests/mount2.sh
lustre/tests/mount2fs.sh
lustre/tests/opendevunlink.c
lustre/tests/openfile.c
lustre/tests/openunlink.c
lustre/tests/recovery-cleanup.sh
lustre/tests/recovery-small.sh
lustre/tests/rename.pl
lustre/tests/runas.c
lustre/tests/rundbench
lustre/tests/runobdstat
lustre/tests/runregression-brw.sh
lustre/tests/runtests
lustre/tests/runvmstat
lustre/tests/sanity.sh
lustre/tests/sanityN.sh
lustre/tests/uml.sh
lustre/tests/utime.c
lustre/utils/.cvsignore
lustre/utils/Lustre/__init__.py
lustre/utils/lactive
lustre/utils/lconf
lustre/utils/lctl.c
lustre/utils/lfind.c
lustre/utils/lmc
lustre/utils/obdbarrier.c
lustre/utils/obdio.c
lustre/utils/obdiolib.c
lustre/utils/obdiolib.h
lustre/utils/wirecheck.c

index 99ac885..c1a9bdf 100644 (file)
@@ -6,3 +6,4 @@ autom4te.cache
 config.log
 config.status
 configure
+.*.o.cmd
index 29a713f..7a48c58 100644 (file)
@@ -1 +1,6 @@
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+# portals/utils/debug.c wants <linux/version.h> from userspace.  sigh.
+HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS)
+LIBREADLINE := @LIBREADLINE@
+# 2.5's makefiles aren't nice to cross dir libraries in host programs
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
index 29a713f..7c66dfa 100644 (file)
@@ -1 +1,4 @@
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+HOSTCFLAGS := $(EXTRA_CFLAGS)
+# the kernel doesn't want us to build archives for host binaries :/
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
index be0e51a..73a19df 100644 (file)
@@ -1,6 +1,12 @@
-include fs/lustre/portals/Kernelenv
+include $(src)/Kernelenv
 
-obj-y += portals/
+# The ordering of these determines the order that each subsystem's 
+# module_init() functions are called in.  if these are changed make sure
+# they reflect the dependencies between each subsystem's _init functions.
 obj-y += libcfs/
-obj-y += knals/
+obj-y += portals/
 obj-y += router/
+obj-y += knals/
+obj-y += tests/
+
+obj-m += utils/
index 7a4e05c..1a7741b 100644 (file)
@@ -11,8 +11,13 @@ AC_ARG_WITH(lib, [  --with-lib compile lustre library], host_cpu="lib")
 
 AC_ARG_WITH(linux, [  --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux)
 AC_SUBST(LINUX)
+if test x$enable_inkernel = xyes ; then
+        echo ln -s `pwd` $LINUX/fs/lustre
+        rm $LINUX/fs/lustre
+        ln -s `pwd` $LINUX/fs/lustre
+fi
 
-# --------- UML?  --------------------
+#  --------------------
 AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...)
 if test $host_cpu = "lib" ; then 
         host_cpu="lib"
@@ -111,6 +116,13 @@ case ${host_cpu} in
         MOD_LINK=elf64_ia64
 ;;
 
+       x86_64 )
+       AC_MSG_RESULT($host_cpu)
+        KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -fomit-frame-pointer -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -finline-limit=2000 -fno-strength-reduce -fno-asynchronous-unwind-tables'
+       KCPPFLAGS='-D__KERNEL__ -DMODULE'
+        MOD_LINK=elf_x86_64
+;;
+
        sparc64 )
        AC_MSG_RESULT($host_cpu)
         KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs'
@@ -160,21 +172,33 @@ if test $host_cpu != "lib" ; then
       AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.)
   fi
 
-# ------------ RELEASE and moduledir ------------------
+# ------------ LINUXRELEASE and moduledir ------------------
   AC_MSG_CHECKING(for Linux release)
   
   dnl We need to rid ourselves of the nasty [ ] quotes.
   changequote(, )
   dnl Get release from version.h
-  RELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
+  LINUXRELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
   changequote([, ])
   
-  moduledir='$(libdir)/modules/'$RELEASE/kernel
+  moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel
   AC_SUBST(moduledir)
   
   modulefsdir='$(moduledir)/fs/$(PACKAGE)'
   AC_SUBST(modulefsdir)
   
+  AC_MSG_RESULT($LINUXRELEASE)
+  AC_SUBST(LINUXRELEASE)
+
+# ------------ RELEASE --------------------------------
+  AC_MSG_CHECKING(lustre release)
+  
+  dnl We need to rid ourselves of the nasty [ ] quotes.
+  changequote(, )
+  dnl Get release from version.h
+  RELEASE="`sed -ne 's/-/_/g' -e 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_]*\).*/\1/p' $LINUX/include/linux/version.h`_`date +%Y%m%d%H%M`"
+  changequote([, ])
+
   AC_MSG_RESULT($RELEASE)
   AC_SUBST(RELEASE)
 
@@ -302,7 +326,7 @@ AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib)
 # This needs to run after we've defined the KCPPFLAGS
 
 AC_MSG_CHECKING(for kernel version)
-AC_TRY_LINK([#define __KERNEL__
+AC_TRY_COMPILE([#define __KERNEL__
              #include <linux/sched.h>],
             [struct task_struct p;
              p.sighand = NULL;],
@@ -313,5 +337,5 @@ if test $RH_2_4_20 = 1; then
        AC_MSG_RESULT(redhat-2.4.20)
        CPPFLAGS="$CPPFLAGS -DCONFIG_RH_2_4_20"
 else
-       AC_MSG_RESULT($RELEASE)
+       AC_MSG_RESULT($LINUXRELEASE)
 fi 
index 3aa6909..f9605ab 100644 (file)
@@ -1,5 +1,11 @@
 /* portals/include/config.h.in.  Generated from configure.in by autoheader.  */
 
+/* Compile with orphan support */
+#undef ENABLE_ORPHANS
+
+/* Use the Pinger */
+#undef ENABLE_PINGER
+
 /* Define to 1 if you have the <inttypes.h> header file. */
 #undef HAVE_INTTYPES_H
 
index ee3b9fc..2133391 100644 (file)
@@ -4,7 +4,6 @@
 #ifndef _KP30_INCLUDED
 #define _KP30_INCLUDED
 
-
 #define PORTAL_DEBUG
 
 #ifndef offsetof
 
 #define LOWEST_BIT_SET(x)      ((x) & ~((x) - 1))
 
-#ifndef CONFIG_SMP
-# define smp_processor_id() 0
-#endif
-
 /*
  *  Debugging
  */
@@ -24,39 +19,34 @@ extern unsigned int portal_subsystem_debug;
 extern unsigned int portal_stack;
 extern unsigned int portal_debug;
 extern unsigned int portal_printk;
-/* Debugging subsystems  (8 bit ID)
- *
- * If you add debug subsystem #32, you need to send email to phil, because
- * you're going to break kernel subsystem debug filtering. */
-#define S_UNDEFINED    (0 << 24)
-#define S_MDC          (1 << 24)
-#define S_MDS          (2 << 24)
-#define S_OSC          (3 << 24)
-#define S_OST          (4 << 24)
-#define S_CLASS        (5 << 24)
-#define S_OBDFS        (6 << 24) /* obsolete */
-#define S_LLITE        (7 << 24)
-#define S_RPC          (8 << 24)
-#define S_EXT2OBD      (9 << 24) /* obsolete */
-#define S_PORTALS     (10 << 24)
-#define S_SOCKNAL     (11 << 24)
-#define S_QSWNAL      (12 << 24)
-#define S_PINGER      (13 << 24)
-#define S_FILTER      (14 << 24)
-#define S_TRACE       (15 << 24) /* obsolete */
-#define S_ECHO        (16 << 24)
-#define S_LDLM        (17 << 24)
-#define S_LOV         (18 << 24)
-#define S_GMNAL       (19 << 24)
-#define S_PTLROUTER   (20 << 24)
-#define S_COBD        (21 << 24)
-#define S_PTLBD       (22 << 24)
-#define S_LOG         (23 << 24)
-
-/* If you change these values, please keep portals/linux/utils/debug.c
+/* Debugging subsystems (32 bits, non-overlapping) */
+#define S_UNDEFINED    (1 << 0)
+#define S_MDC          (1 << 1)
+#define S_MDS          (1 << 2)
+#define S_OSC          (1 << 3)
+#define S_OST          (1 << 4)
+#define S_CLASS        (1 << 5)
+#define S_LOG          (1 << 6)
+#define S_LLITE        (1 << 7)
+#define S_RPC          (1 << 8)
+#define S_MGMT         (1 << 9)
+#define S_PORTALS     (1 << 10)
+#define S_SOCKNAL     (1 << 11)
+#define S_QSWNAL      (1 << 12)
+#define S_PINGER      (1 << 13)
+#define S_FILTER      (1 << 14)
+#define S_PTLBD       (1 << 15)
+#define S_ECHO        (1 << 16)
+#define S_LDLM        (1 << 17)
+#define S_LOV         (1 << 18)
+#define S_GMNAL       (1 << 19)
+#define S_PTLROUTER   (1 << 20)
+#define S_COBD        (1 << 21)
+
+/* If you change these values, please keep portals/utils/debug.c
  * up to date! */
 
-/* Debugging masks (24 bits, non-overlapping) */
+/* Debugging masks (32 bits, non-overlapping) */
 #define D_TRACE     (1 << 0) /* ENTRY/EXIT markers */
 #define D_INODE     (1 << 1)
 #define D_SUPER     (1 << 2)
@@ -80,20 +70,23 @@ extern unsigned int portal_printk;
 #define D_RPCTRACE  (1 << 20) /* for distributed debugging */
 #define D_VFSTRACE  (1 << 21)
 
-#ifndef __KERNEL__
-#define THREAD_SIZE 8192
+#ifdef __KERNEL__
+# include <linux/sched.h> /* THREAD_SIZE */
+#else
+# define THREAD_SIZE 8192
 #endif
-#ifdef  __ia64__
-#define CDEBUG_STACK() (THREAD_SIZE -                                      \
+
+#ifdef __KERNEL__
+# ifdef  __ia64__
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
                         ((unsigned long)__builtin_dwarf_cfa() &            \
                          (THREAD_SIZE - 1)))
-#else
-#define CDEBUG_STACK() (THREAD_SIZE -                                      \
+# else
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
                         ((unsigned long)__builtin_frame_address(0) &       \
                          (THREAD_SIZE - 1)))
-#endif
+# endif
 
-#ifdef __KERNEL__
 #define CHECK_STACK(stack)                                                    \
         do {                                                                  \
                 if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) {    \
@@ -105,20 +98,21 @@ extern unsigned int portal_printk;
                       /*panic("LBUG");*/                                      \
                 }                                                             \
         } while (0)
-#else
+#else /* __KERNEL__ */
 #define CHECK_STACK(stack) do { } while(0)
-#endif
+#define CDEBUG_STACK (0L)
+#endif /* __KERNEL__ */
 
 #if 1
 #define CDEBUG(mask, format, a...)                                            \
 do {                                                                          \
-        CHECK_STACK(CDEBUG_STACK());                                          \
+        CHECK_STACK(CDEBUG_STACK);                                            \
         if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) ||                      \
             (portal_debug & (mask) &&                                         \
-             portal_subsystem_debug & (1 << (DEBUG_SUBSYSTEM >> 24))))        \
+             portal_subsystem_debug & DEBUG_SUBSYSTEM))                       \
                 portals_debug_msg(DEBUG_SUBSYSTEM, mask,                      \
                                   __FILE__, __FUNCTION__, __LINE__,           \
-                                  CDEBUG_STACK(), format , ## a);             \
+                                  CDEBUG_STACK, format, ## a);                \
 } while (0)
 
 #define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
@@ -162,7 +156,6 @@ do {                                                                    \
 #define EXIT                            do { } while (0)
 #endif
 
-
 #ifdef __KERNEL__
 # include <linux/vmalloc.h>
 # include <linux/time.h>
@@ -210,7 +203,8 @@ static inline void our_cond_resched(void)
 #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
 
 #ifdef PORTAL_DEBUG
-extern void kportal_assertion_failed(char *expr,char *file,char *func,int line);
+extern void kportal_assertion_failed(char *expr, char *file, const char *func,
+                                     const int line);
 #define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__,  \
                                                         __FUNCTION__, __LINE__))
 #else
@@ -560,14 +554,14 @@ extern struct prof_ent prof_ents[MAX_PROFS];
 #endif /* PORTALS_PROFILING */
 
 /* debug.c */
-void portals_run_lbug_upcall(char * file, char *fn, int line);
+void portals_run_lbug_upcall(char * file, const char *fn, const int line);
 void portals_debug_dumplog(void);
 int portals_debug_init(unsigned long bufsize);
 int portals_debug_cleanup(void);
 int portals_debug_clear_buffer(void);
 int portals_debug_mark_buffer(char *text);
 int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
-                char *file, unsigned int size);
+                             char *file, unsigned int size);
 __s32 portals_debug_copy_to_user(char *buf, unsigned long len);
 #if (__GNUC__)
 /* Use the special GNU C __attribute__ hack to have the compiler check the
@@ -578,13 +572,14 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len);
 # warning printf has been defined as a macro...
 # undef printf
 #endif
-void portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                        unsigned long stack, const char *format, ...)
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                       const int line, unsigned long stack,
+                       const char *format, ...)
         __attribute__ ((format (printf, 7, 8)));
 #else
-void portals_debug_msg (int subsys, int mask, char *file, char *fn,
-                        int line, unsigned long stack,
-                        const char *format, ...);
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                       const int line, unsigned long stack,
+                       const char *format, ...);
 #endif /* __GNUC__ */
 void portals_debug_set_level(unsigned int debug_level);
 
@@ -618,9 +613,9 @@ extern void kportal_blockallsigs (void);
 # define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0);
 # define PORTAL_FREE(a, b) do { free(a); } while (0);
 # define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \
-    printf ("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format,                    \
-            (subsys) >> 24, (mask), (long)time(0), file, fn, line,            \
-            getpid() , stack, ## a);
+    printf("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format,                    \
+           (subsys), (mask), (long)time(0), file, fn, line,                   \
+           getpid() , stack, ## a);
 #endif
 
 #ifndef CURRENT_TIME
@@ -911,13 +906,13 @@ ptl_handle_ni_t *kportal_get_ni (int nal);
 void kportal_put_ni (int nal);
 
 #ifdef __CYGWIN__
-#ifndef BITS_PER_LONG
-#if (~0UL) == 0xffffffffUL
-#define BITS_PER_LONG 32
-#else
-#define BITS_PER_LONG 64
-#endif
-#endif
+# ifndef BITS_PER_LONG
+#  if (~0UL) == 0xffffffffUL
+#   define BITS_PER_LONG 32
+#  else
+#   define BITS_PER_LONG 64
+#  endif
+# endif
 #endif
 
 #if (BITS_PER_LONG == 32 || __WORDSIZE == 32)
index e28fbac..a7cb4d1 100644 (file)
@@ -1,13 +1,56 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _PORTALS_COMPAT_H
+#define _PORTALS_COMPAT_H
+
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+#if SPINLOCK_DEBUG
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
+#  define SIGNAL_MASK_ASSERT() \
+   LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC)
+# else
+#  define SIGNAL_MASK_ASSERT() \
+   LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC)
+# endif
+#else
+# define SIGNAL_MASK_ASSERT()
+#endif
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
-# define SIGNAL_MASK_LOCK(task, flags)                              \
+
+# define SIGNAL_MASK_LOCK(task, flags)                                  \
   spin_lock_irqsave(&task->sighand->siglock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags)                            \
+# define SIGNAL_MASK_UNLOCK(task, flags)                                \
   spin_unlock_irqrestore(&task->sighand->siglock, flags)
+# define USERMODEHELPER(path, argv, envp)                               \
+  call_usermodehelper(path, argv, envp, 1)
 # define RECALC_SIGPENDING         recalc_sigpending()
-#else
-# define SIGNAL_MASK_LOCK(task, flags)                              \
+# define CURRENT_SECONDS           get_seconds()
+
+#else /* 2.4.x */
+
+# define SIGNAL_MASK_LOCK(task, flags)                                  \
   spin_lock_irqsave(&task->sigmask_lock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags)                            \
+# define SIGNAL_MASK_UNLOCK(task, flags)                                \
   spin_unlock_irqrestore(&task->sigmask_lock, flags)
+# define USERMODEHELPER(path, argv, envp)                               \
+  call_usermodehelper(path, argv, envp)
 # define RECALC_SIGPENDING         recalc_sigpending(current)
+# define CURRENT_SECONDS           CURRENT_TIME
+
+#endif
+
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt "|%d", ## a, current->thread.extern_pid)
+#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt "|%d", ## a, current->thread.mode.tt.extern_pid)
+#else
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt, ## a)
 #endif
+
+#endif /* _PORTALS_COMPAT_H */
index d78cad4..a70b465 100644 (file)
@@ -1,5 +1,3 @@
-/*
-*/
 #ifndef _P30_INTERNAL_H_
 #define _P30_INTERNAL_H_
 
index 2b63312..78a1e2d 100644 (file)
@@ -1,6 +1,4 @@
 #ifndef _LINUX_LIST_H
-#define _LINUX_LIST_H
-
 
 /*
  * Simple doubly linked list implementation.
@@ -101,7 +99,9 @@ static inline void list_del_init(struct list_head *entry)
        __list_del(entry->prev, entry->next);
        INIT_LIST_HEAD(entry);
 }
+#endif
 
+#ifndef list_for_each_entry
 /**
  * list_move - delete from one list and add as another's head
  * @list: the entry to move
@@ -124,7 +124,10 @@ static inline void list_move_tail(struct list_head *list,
        __list_del(list->prev, list->next);
        list_add_tail(list, head);
 }
+#endif
 
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
 /**
  * list_empty - tests whether a list is empty
  * @head: the list to test.
index 7d1b304..d389aab 100644 (file)
@@ -2,7 +2,7 @@
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
  * Compile with:
- * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl 
+ * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
  */
 #ifndef __LTRACE_H_
 #define __LTRACE_H_
@@ -31,20 +31,20 @@ static inline int ltrace_write_file(char* fname)
         argv[0] = "debug_kernel";
         argv[1] = fname;
         argv[2] = "1";
-        
+
         fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
-        
+
         return jt_dbg_debug_kernel(3, argv);
 }
 
 static inline int ltrace_clear()
 {
         char* argv[1];
-        
+
         argv[0] = "clear";
-        
+
         fprintf(stderr, "[ptlctl] %s\n", argv[0]);
-        
+
         return jt_dbg_clear_debug_buf(1, argv);
 }
 
@@ -52,9 +52,9 @@ static inline int ltrace_mark(int indent_level, char* text)
 {
         char* argv[2];
         char mark_buf[PATH_MAX];
-        
+
         snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
-        
+
         argv[0] = "mark";
         argv[1] = mark_buf;
         return jt_dbg_mark_debug_buf(2, argv);
@@ -65,9 +65,9 @@ static inline int ltrace_applymasks()
         char* argv[2];
         argv[0] = "list";
         argv[1] = "applymasks";
-        
+
         fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
-        
+
         return jt_dbg_list(2, argv);
 }
 
@@ -95,19 +95,19 @@ static inline int ltrace_start()
 #ifdef PORTALS_DEV_ID
         rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
 #endif
-        ltrace_filter("class"); 
+        ltrace_filter("class");
         ltrace_filter("socknal");
-        ltrace_filter("qswnal"); 
-        ltrace_filter("gmnal");  
-        ltrace_filter("portals");  
-        
-        ltrace_show("all_types");  
-        ltrace_filter("trace");  
-        ltrace_filter("malloc"); 
-        ltrace_filter("net"); 
-        ltrace_filter("page"); 
-        ltrace_filter("other"); 
-        ltrace_filter("info"); 
+        ltrace_filter("qswnal");
+        ltrace_filter("gmnal");
+        ltrace_filter("portals");
+
+        ltrace_show("all_types");
+        ltrace_filter("trace");
+        ltrace_filter("malloc");
+        ltrace_filter("net");
+        ltrace_filter("page");
+        ltrace_filter("other");
+        ltrace_filter("info");
         ltrace_applymasks();
 
         return rc;
@@ -146,7 +146,7 @@ static inline void ltrace_add_processnames(char* fname)
         struct timezone tz;
         int nob;
         int underuml = !not_uml();
-        
+
         gettimeofday(&tv, &tz);
 
         nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
@@ -167,7 +167,7 @@ static inline void ltrace_add_processnames(char* fname)
                                  "(%s:%d:%s() %d+%lu): ",
                                  "lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
         }
-         
+
         nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
         system(cmdbuf);
 }
index 12b1925..13790f7 100644 (file)
@@ -1,6 +1,3 @@
-/*
-*/
-
 #ifndef MYRNAL_H
 #define MYRNAL_H
 
index 88be63c..7cb3ab7 100644 (file)
@@ -1,5 +1,3 @@
-/*
-*/
 #ifndef _NAL_H_
 #define _NAL_H_
 
index 4727599..760f465 100644 (file)
@@ -1,6 +1,3 @@
-/*
- */
-
 #ifndef _INCppidh_
 #define _INCppidh_
 
index c9683f7..33e4375 100644 (file)
@@ -1,5 +1,3 @@
 /*
-*/
-/*
  * stringtab.h
  */
index d4038b6..0269290 100644 (file)
@@ -2,14 +2,19 @@
 #define _P30_TYPES_H_
 
 #ifdef __linux__
-#include <asm/types.h>
-#include <asm/timex.h>
+# include <asm/types.h>
+# include <asm/timex.h>
 #else
-#include <sys/types.h>
+# include <sys/types.h>
 typedef u_int32_t __u32;
 typedef u_int64_t __u64;
-typedef unsigned long long cycles_t;
-static inline cycles_t get_cycles(void) { return 0; }
+#endif
+
+#ifdef __KERNEL__
+# include <linux/time.h>
+#else
+# include <sys/time.h>
+# define do_gettimeofday(tv) gettimeofday(tv, NULL)
 #endif
 
 typedef __u64 ptl_nid_t;
@@ -31,7 +36,7 @@ typedef ptl_handle_any_t ptl_handle_md_t;
 typedef ptl_handle_any_t ptl_handle_me_t;
 
 #define PTL_HANDLE_NONE \
-((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
+    ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
 #define PTL_EQ_NONE PTL_HANDLE_NONE
 
 static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
@@ -108,17 +113,15 @@ typedef struct {
         ptl_handle_me_t unlinked_me;
         ptl_md_t mem_desc;
         ptl_hdr_data_t hdr_data;
-        cycles_t  arrival_time;
+        struct timeval arrival_time;
         volatile ptl_seq_t sequence;
 } ptl_event_t;
 
-
 typedef enum {
         PTL_ACK_REQ,
         PTL_NOACK_REQ
 } ptl_ack_req_t;
 
-
 typedef struct {
         volatile ptl_seq_t sequence;
         ptl_size_t size;
@@ -130,7 +133,6 @@ typedef struct {
         ptl_eq_t *eq;
 } ptl_ni_t;
 
-
 typedef struct {
         int max_match_entries;    /* max number of match entries */
         int max_mem_descriptors;  /* max number of memory descriptors */
index 282522d..89a4aa6 100644 (file)
@@ -1,2 +1,3 @@
 Makefile
 Makefile.in
+.*.o.cmd
index ce40a60..cd5d9d6 100644 (file)
@@ -1,4 +1,4 @@
-include ../Kernelenv
+include $(obj)/../Kernelenv
 
 obj-y = socknal/
-# more coming...
\ No newline at end of file
+# more coming...
index ceeea2a..0cffc15 100644 (file)
@@ -124,7 +124,7 @@ static nal_t *kgmnal_init(int interface, ptl_pt_index_t ptl_size,
         return &kgmnal_api;
 }
 
-static void __exit
+static void /*__exit*/
 kgmnal_finalize(void)
 {
         struct list_head *tmp;
index 1066d69..479cc2c 100644 (file)
@@ -112,7 +112,7 @@ static nal_t *kscimacnal_init(int interface, ptl_pt_index_t  ptl_size,
 
 
 /* Called by kernel at module unload time */
-static void __exit 
+static void /*__exit*/ 
 kscimacnal_finalize(void)
 {
         /* FIXME: How should the shutdown procedure really look? */
index e995588..95973d6 100644 (file)
@@ -1,3 +1,4 @@
 .deps
 Makefile
 Makefile.in
+.*.o.cmd
index 46edf01..5c1b366 100644 (file)
@@ -3,7 +3,7 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-include ../../Kernelenv
+include $(src)/../../Kernelenv
 
 obj-y += ksocknal.o
 ksocknal-objs    := socknal.o socknal_cb.o
index 1f5dc38..77ee473 100644 (file)
@@ -379,7 +379,7 @@ ktoenal_cmd(struct portal_ioctl_data * data, void * private)
 }
 
 
-void __exit
+void /*__exit*/
 ktoenal_module_fini (void)
 {
         CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
index ec37f6f..abd0731 100644 (file)
@@ -893,6 +893,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags)
                                 spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
                                 goto get_fmb;   /* => go get a fwd msg buffer */
                         default:
+                                break;
                         }
                         /* Not Reached */
                         LBUG ();
@@ -934,6 +935,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags)
                 goto out;                       /* (later) */
 
         default:
+                break;
         }
 
         /* Not Reached */
index 67d1a3d..7fa686f 100644 (file)
@@ -2,3 +2,4 @@
 Makefile
 Makefile.in
 link-stamp
+.*.o.cmd
index 3196ea2..9aa838f 100644 (file)
@@ -6,4 +6,4 @@
 include fs/lustre/portals/Kernelenv
 
 obj-y += libcfs.o
-licfs-objs    := module.o proc.o debug.o
\ No newline at end of file
+libcfs-objs    := module.o proc.o debug.o
index 8d26dbb..f37cd96 100644 (file)
@@ -571,8 +571,8 @@ int portals_debug_init(unsigned long bufsize)
         memset(debug_buf, 0, debug_size);
         debug_wrapped = 0;
 
-        printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
-               bufsize, debug_buf);
+        //printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
+               //bufsize, debug_buf);
         atomic_set(&debug_off_a, debug_off);
         notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier);
         debug_size = bufsize;
@@ -632,9 +632,9 @@ int portals_debug_mark_buffer(char *text)
         if (debug_buf == NULL)
                 return -EINVAL;
 
-        CDEBUG(0, "*******************************************************************************\n");
+        CDEBUG(0, "********************************************************\n");
         CDEBUG(0, "DEBUG MARKER: %s\n", text);
-        CDEBUG(0, "*******************************************************************************\n");
+        CDEBUG(0, "********************************************************\n");
 
         return 0;
 }
@@ -672,8 +672,8 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len)
 
 /* FIXME: I'm not very smart; someone smarter should make this better. */
 void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                   unsigned long stack, const char *format, ...)
+portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                  const int line, unsigned long stack, const char *format, ...)
 {
         va_list       ap;
         unsigned long flags;
@@ -728,8 +728,8 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
         do_gettimeofday(&tv);
 
         prefix_nob = snprintf(debug_buf + debug_off, max_nob,
-                              "%02x:%06x:%d:%lu.%06lu ",
-                              subsys >> 24, mask, smp_processor_id(),
+                              "%06x:%06x:%d:%lu.%06lu ",
+                              subsys, mask, smp_processor_id(),
                               tv.tv_sec, tv.tv_usec);
         max_nob -= prefix_nob;
 
@@ -752,7 +752,7 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
 
         va_start(ap, format);
         msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob,
-                            max_nob, format, ap);
+                             max_nob, format, ap);
         max_nob -= msg_nob;
         va_end(ap);
 
@@ -790,7 +790,7 @@ void portals_debug_set_level(unsigned int debug_level)
         portal_debug = debug_level;
 }
 
-void portals_run_lbug_upcall(char * file, char *fn, int line)
+void portals_run_lbug_upcall(char *file, const char *fn, const int line)
 {
         char *argv[6];
         char *envp[3];
@@ -803,7 +803,7 @@ void portals_run_lbug_upcall(char * file, char *fn, int line)
         argv[0] = portals_upcall;
         argv[1] = "LBUG";
         argv[2] = file;
-        argv[3] = fn;
+        argv[3] = (char *)fn;
         argv[4] = buf;
         argv[5] = NULL;
 
index 14cc325..e8eb290 100644 (file)
@@ -62,10 +62,10 @@ static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
 struct semaphore nal_cmd_sem;
 
 #ifdef PORTAL_DEBUG
-void
-kportal_assertion_failed (char *expr, char *file, char *func, int line)
+void kportal_assertion_failed(char *expr, char *file, const char *func,
+                              const int line)
 {
-        portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK(),
+        portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK,
                           "ASSERTION(%s) failed\n", expr);
         LBUG_WITH_LOC(file, func, line);
 }
index e995588..95973d6 100644 (file)
@@ -1,3 +1,4 @@
 .deps
 Makefile
 Makefile.in
+.*.o.cmd
index 5627ef7..7822846 100644 (file)
@@ -3,7 +3,10 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-include ../Kernelenv
+include $(src)/../Kernelenv
 
 obj-y += portals.o
-portals-objs    := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o lib-move.o lib-msg.o lib-ni.o lib-not-impl.o lib-pid.o api-eq.o api-errno.o api-init.o api-md.o api-me.o api-ni.o api-wrap.o
+portals-objs    :=     lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
+                       lib-move.o lib-msg.o lib-ni.o lib-pid.o \
+                       api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
+                       api-wrap.o
index e59c922..dc1fead 100644 (file)
@@ -26,7 +26,7 @@
 #include <portals/api-support.h>
 
 int ptl_init;
-unsigned int portal_subsystem_debug = 0xfff7e3ff;
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL | S_GMNAL);
 unsigned int portal_debug = ~0;
 unsigned int portal_printk;
 unsigned int portal_stack;
index fde4f16..02f8b60 100644 (file)
@@ -544,7 +544,7 @@ get_new_msg (nal_cb_t *nal, lib_md_t *md)
         msg->send_ack = 0;
 
         msg->md = md;
-        msg->ev.arrival_time = get_cycles();
+        do_gettimeofday(&msg->ev.arrival_time);
         md->pending++;
         if (md->threshold != PTL_MD_THRESH_INF) {
                 LASSERT (md->threshold > 0);
index e995588..95973d6 100644 (file)
@@ -1,3 +1,4 @@
 .deps
 Makefile
 Makefile.in
+.*.o.cmd
index 64bd09b..9b02c03 100644 (file)
@@ -3,7 +3,7 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-include ../Kernelenv
+include $(src)/../Kernelenv
 
 obj-y += kptlrouter.o
 kptlrouter-objs    := router.o proc.o
index 6074c3c..27a7fba 100644 (file)
@@ -23,8 +23,8 @@
 
 #include "router.h"
 
-struct list_head kpr_routes;
-struct list_head kpr_nals;
+LIST_HEAD(kpr_routes);
+LIST_HEAD(kpr_nals);
 
 unsigned long long kpr_fwd_bytes;
 unsigned long      kpr_fwd_packets;
@@ -35,7 +35,7 @@ atomic_t           kpr_queue_depth;
  *
  * Once in a blue moon we register/deregister NALs and add/remove routing
  * entries (thread context only)... */
-rwlock_t         kpr_rwlock;
+rwlock_t         kpr_rwlock = RW_LOCK_UNLOCKED;
 
 kpr_router_interface_t kpr_router_interface = {
        kprri_register:         kpr_register_nal,
@@ -55,7 +55,7 @@ kpr_control_interface_t kpr_control_interface = {
 int
 kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
 {
-       long               flags;
+       unsigned long      flags;
        struct list_head  *e;
        kpr_nal_entry_t   *ne;
 
@@ -98,7 +98,7 @@ kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
 void
 kpr_shutdown_nal (void *arg)
 {
-       long             flags;
+       unsigned long    flags;
        kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
 
         CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
@@ -123,7 +123,7 @@ kpr_shutdown_nal (void *arg)
 void
 kpr_deregister_nal (void *arg)
 {
-       long              flags;
+       unsigned long     flags;
        kpr_nal_entry_t  *ne = (kpr_nal_entry_t *)arg;
 
         CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
@@ -296,7 +296,7 @@ int
 kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
                ptl_nid_t hi_nid)
 {
-       long               flags;
+       unsigned long      flags;
        struct list_head  *e;
        kpr_route_entry_t *re;
 
@@ -345,7 +345,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
 int
 kpr_del_route (ptl_nid_t nid)
 {
-       long               flags;
+       unsigned long      flags;
        struct list_head  *e;
 
         CDEBUG(D_OTHER, "Del route "LPX64"\n", nid);
@@ -398,7 +398,7 @@ kpr_get_route(int idx, int *gateway_nalid, ptl_nid_t *gateway_nid,
         return (-ENOENT);
 }
 
-static void __exit
+static void /*__exit*/
 kpr_finalise (void)
 {
         LASSERT (list_empty (&kpr_nals));
@@ -427,10 +427,6 @@ kpr_initialise (void)
         CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
                atomic_read(&portal_kmemory));
 
-       rwlock_init(&kpr_rwlock);
-       INIT_LIST_HEAD(&kpr_routes);
-       INIT_LIST_HEAD(&kpr_nals);
-
         kpr_proc_init();
 
         PORTAL_SYMBOL_REGISTER(kpr_router_interface);
index 051d1bd..d0c4c88 100644 (file)
@@ -1,3 +1,4 @@
 Makefile
 Makefile.in
 .deps
+.*.o.cmd
index 389ffbb..4d04ffb 100644 (file)
@@ -260,7 +260,7 @@ pingcli_start(struct portal_ioctl_data *args)
 
 
 /* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
 {
         PORTAL_ALLOC (client, sizeof(struct pingcli_data));
         if (client == NULL)
@@ -282,7 +282,7 @@ static int __init pingcli_init(void)
 } /* pingcli_init() */
 
 
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
 {
         PORTAL_SYMBOL_UNREGISTER (kping_client);
 } /* pingcli_cleanup() */
index 1037d09..873e11c 100644 (file)
 #include <asm/semaphore.h>
 
 #define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
-#define MAXSIZE (16*1024*1024)
+#define MAXSIZE (16*1024)
 
 static unsigned ping_head_magic;
 static unsigned ping_bulk_magic;
-static int nal  = 0;                            // Your NAL,
+static int nal  = SOCKNAL;                            // Your NAL,
 static unsigned long packets_valid = 0;         // Valid packets 
 static int running = 1;
 atomic_t pkt;
@@ -282,7 +282,7 @@ static int __init pingsrv_init(void)
 } /* pingsrv_init() */
 
 
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
 {
         remove_proc_entry ("net/pingsrv", NULL);
         
index 4cef08b..35e114b 100644 (file)
@@ -235,7 +235,7 @@ pingcli_start(struct portal_ioctl_data *args)
 
 
 /* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
 {
 
         PORTAL_ALLOC (client, sizeof(struct pingcli_data));
@@ -258,7 +258,7 @@ static int __init pingcli_init(void)
 } /* pingcli_init() */
 
 
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
 {
         PORTAL_SYMBOL_UNREGISTER (kping_client);
 } /* pingcli_cleanup() */
index a18ea35..2b45a46 100644 (file)
@@ -269,7 +269,7 @@ static int __init pingsrv_init(void)
 } /* pingsrv_init() */
 
 
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
 {
         remove_proc_entry ("net/pingsrv", NULL);
         
index 529bb2d..b73f042 100644 (file)
@@ -84,8 +84,8 @@ int portals_debug_copy_to_user(char *buf, unsigned long len)
 
 /* FIXME: I'm not very smart; someone smarter should make this better. */
 void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                   const char *format, ...)
+portals_debug_msg (int subsys, int mask, char *file, const char *fn, 
+                   const int line, const char *format, ...)
 {
         va_list       ap;
         unsigned long flags;
index 529bb2d..b73f042 100644 (file)
@@ -84,8 +84,8 @@ int portals_debug_copy_to_user(char *buf, unsigned long len)
 
 /* FIXME: I'm not very smart; someone smarter should make this better. */
 void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                   const char *format, ...)
+portals_debug_msg (int subsys, int mask, char *file, const char *fn, 
+                   const int line, const char *format, ...)
 {
         va_list       ap;
         unsigned long flags;
index 148310a..8e474ad 100644 (file)
@@ -5,4 +5,5 @@ debugctl
 ptlctl
 .deps
 routerstat
-wirecheck
\ No newline at end of file
+wirecheck
+.*.cmd
index 9ab1c73..0a009d2 100644 (file)
@@ -53,17 +53,18 @@ static char rawbuf[8192];
 static char *buf = rawbuf;
 static int max = 8192;
 //static int g_pfd = -1;
-static int subsystem_array[1 << 8];
+static int subsystem_mask = ~0;
 static int debug_mask = ~0;
 
 static const char *portal_debug_subsystems[] =
-        {"undefined", "mdc", "mds", "osc", "ost", "class", "obdfs", "llite",
-         "rpc", "ext2obd", "portals", "socknal", "qswnal", "pinger", "filter",
-         "obdtrace", "echo", "ldlm", "lov", "gmnal", "router", "ptldb", NULL};
+        {"undefined", "mdc", "mds", "osc", "ost", "class", "log", "llite",
+         "rpc", "mgmt", "portals", "socknal", "qswnal", "pinger", "filter",
+         "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", NULL};
 static const char *portal_debug_masks[] =
         {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
          "blocks", "net", "warning", "buffs", "other", "dentry", "portals",
-         "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", NULL};
+         "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace",
+         NULL};
 
 struct debug_daemon_cmd {
         char *cmd;
@@ -88,7 +89,10 @@ static int do_debug_mask(char *name, int enable)
                         printf("%s output from subsystem \"%s\"\n",
                                 enable ? "Enabling" : "Disabling",
                                 portal_debug_subsystems[i]);
-                        subsystem_array[i] = enable;
+                        if (enable)
+                                subsystem_mask |= (1 << i);
+                        else
+                                subsystem_mask &= ~(1 << i);
                         found = 1;
                 }
         }
@@ -111,7 +115,6 @@ static int do_debug_mask(char *name, int enable)
 
 int dbg_initialize(int argc, char **argv)
 {
-        memset(subsystem_array, 1, sizeof(subsystem_array));
         return 0;
 }
 
@@ -213,12 +216,7 @@ int jt_dbg_list(int argc, char **argv)
                 for (i = 0; portal_debug_masks[i] != NULL; i++)
                         printf(", %s", portal_debug_masks[i]);
                 printf("\n");
-        }
-        else if (strcasecmp(argv[1], "applymasks") == 0) {
-                unsigned int subsystem_mask = 0;
-                for (i = 0; portal_debug_subsystems[i] != NULL; i++) {
-                        if (subsystem_array[i]) subsystem_mask |= (1 << i);
-                }
+        } else if (strcasecmp(argv[1], "applymasks") == 0) {
                 applymask_all(subsystem_mask, debug_mask);
         }
         return 0;
@@ -230,12 +228,6 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw)
 {
         char *p, *z;
         unsigned long subsystem, debug, dropped = 0, kept = 0;
-        int max_sub, max_type;
-
-        for (max_sub = 0; portal_debug_subsystems[max_sub] != NULL; max_sub++)
-                ;
-        for (max_type = 0; portal_debug_masks[max_type] != NULL; max_type++)
-                ;
 
         while (size) {
                 p = memchr(buf, '\n', size);
@@ -247,8 +239,7 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw)
                 z++;
                 /* for some reason %*s isn't working. */
                 *p = '\0';
-                if (subsystem < max_sub &&
-                    subsystem_array[subsystem] &&
+                if ((subsystem_mask & subsystem) &&
                     (!debug || (debug_mask & debug))) {
                         if (raw)
                                 fprintf(fd, "%s\n", buf);
@@ -551,6 +542,8 @@ int jt_dbg_modules(int argc, char **argv)
                 {"mds_ext3", "lustre/mds"},
                 {"mds_extN", "lustre/mds"},
                 {"ptlbd", "lustre/ptlbd"},
+                {"mgmt_svc", "lustre/mgmt"},
+                {"mgmt_cli", "lustre/mgmt"},
                 {NULL, NULL}
         };
         char *path = "..";
index 90d66f5..a89f4f7 100644 (file)
@@ -22,6 +22,7 @@
 
 #include <stdio.h>
 #include <sys/types.h>
+#include <netdb.h>
 #include <sys/socket.h>
 #include <netinet/tcp.h>
 #include <netdb.h>
@@ -106,6 +107,27 @@ nal2name (int nal)
         return ((e == NULL) ? "???" : e->name);
 }
 
+static struct hostent *
+ptl_gethostbyname(char * hname) {
+        struct hostent *he;
+        he = gethostbyname(hname);
+        if (!he) {
+                switch(h_errno) {
+                case HOST_NOT_FOUND:
+                case NO_ADDRESS:
+                        fprintf(stderr, "Unable to resolve hostname: %s\n",
+                                hname);
+                        break;
+                default:
+                        fprintf(stderr, "gethostbyname error: %s\n",
+                                strerror(errno));
+                        break;
+                }
+                return NULL;
+        }
+        return he;
+}
+
 int
 ptl_parse_nid (ptl_nid_t *nidp, char *str)
 {
@@ -127,7 +149,7 @@ ptl_parse_nid (ptl_nid_t *nidp, char *str)
         
         if ((('a' <= str[0] && str[0] <= 'z') ||
              ('A' <= str[0] && str[0] <= 'Z')) &&
-             (he = gethostbyname (str)) != NULL)
+             (he = ptl_gethostbyname (str)) != NULL)
         {
                 __u32 addr = *(__u32 *)he->h_addr;
 
@@ -351,12 +373,9 @@ int jt_ptl_connect(int argc, char **argv)
                         goto usage;
                 }
 
-                he = gethostbyname(argv[1]);
-                if (!he) {
-                        fprintf(stderr, "gethostbyname error: %s\n",
-                                strerror(errno));
+                he = ptl_gethostbyname(argv[1]);
+                if (!he)
                         return -1;
-                }
 
                 g_port = atol(argv[2]);
 
@@ -525,12 +544,9 @@ int jt_ptl_disconnect(int argc, char **argv)
 
                 PORTAL_IOC_INIT(data);
                 if (argc == 2) {
-                        he = gethostbyname(argv[1]);
-                        if (!he) {
-                                fprintf(stderr, "gethostbyname error: %s\n",
-                                        strerror(errno));
+                        he = ptl_gethostbyname(argv[1]);
+                        if (!he) 
                                 return -1;
-                        }
                         
                         data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
 
@@ -582,12 +598,9 @@ int jt_ptl_push_connection (int argc, char **argv)
 
                 PORTAL_IOC_INIT(data);
                 if (argc == 2) {
-                        he = gethostbyname(argv[1]);
-                        if (!he) {
-                                fprintf(stderr, "gethostbyname error: %s\n",
-                                        strerror(errno));
+                        he = ptl_gethostbyname(argv[1]);
+                        if (!he)
                                 return -1;
-                        }
                         
                         data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
 
index 776ef36..a8a5356 100644 (file)
@@ -15,4 +15,4 @@ cscope.files
 cscope.out
 autom4te-2.53.cache
 autom4te.cache
-
+.*.o.cmd
index 89eaef7..17c08c6 100644 (file)
@@ -1,4 +1,14 @@
 tbd
+       * version v0_8
+       * bug fixes
+        - orphans are moved into the PENDING directory for possible recovery
+        - replayed opens now open by fid for orphan/rename safety (1042)
+        - last close of an orphan inode generates a transno (683)
+       - chdir() and mount() now pin the directory entry (1020)
+       - avoid CERROR in normal ll_setattr_raw() error case (1500)
+       - discard very old requests without processing them (1502)
+
+2003-06-15  Phil Schwan  <phil@clusterfs.com>
        * version v0_7
        * bug fixes
        - imports and exports cleanup too early, need refcounts (349, 879, 1045)
index e540148..59178a4 100644 (file)
@@ -1,4 +1,22 @@
-include fs/lustre/portals/Kernelenv
+include $(src)/portals/Kernelenv
+
+# for scripts/version_tag.pl
+LINUX = @LINUX@
 
 obj-y += portals/
+# obdclass has to come before anything that does class_register..
+obj-y += obdclass/
+obj-y += ptlrpc/
+obj-y += ldlm/
+obj-y += obdfilter/
+obj-y += mdc/
 obj-y += mds/
+obj-y += obdecho/
+obj-y += osc/
+obj-y += ost/
+obj-y += lov/
+obj-y += llite/
+
+# portals needs to be before utils/, which pulls in ptlctl objects
+obj-m += utils/
+obj-m += tests/ 
index 5efb545..2d3549b 100644 (file)
@@ -36,13 +36,13 @@ static int cobd_attach(struct obd_device *dev, obd_count len, void *data)
 {
         struct lprocfs_static_vars lvars;
 
-        lprocfs_init_vars(&lvars);
-       return lprocfs_obd_attach(dev, lvars.obd_vars);
+        lprocfs_init_vars(cobd, &lvars);
+        return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
 static int cobd_detach(struct obd_device *dev)
 {
-       return lprocfs_obd_detach(dev);
+        return lprocfs_obd_detach(dev);
 }
 
 static int
@@ -82,24 +82,23 @@ cobd_setup (struct obd_device *dev, obd_count len, void *buf)
         return (0);
 
  fail_0:
-        obd_disconnect (&cobd->cobd_target, 0 );
+        obd_disconnect(&cobd->cobd_target, 0);
         return (rc);
 }
 
-static int
-cobd_cleanup (struct obd_device *dev, int force, int failover)
+static int cobd_cleanup(struct obd_device *dev, int flags)
 {
         struct cache_obd  *cobd = &dev->u.cobd;
         int                rc;
 
-        if (!list_empty (&dev->obd_exports))
+        if (!list_empty(&dev->obd_exports))
                 return (-EBUSY);
 
-        rc = obd_disconnect (&cobd->cobd_cache, failover);
+        rc = obd_disconnect(&cobd->cobd_cache, flags);
         if (rc != 0)
                 CERROR ("error %d disconnecting cache\n", rc);
 
-        rc = obd_disconnect (&cobd->cobd_target, failover);
+        rc = obd_disconnect(&cobd->cobd_target, flags);
         if (rc != 0)
                 CERROR ("error %d disconnecting target\n", rc);
 
@@ -116,13 +115,12 @@ cobd_connect (struct lustre_handle *conn, struct obd_device *obd,
         return (rc);
 }
 
-static int
-cobd_disconnect (struct lustre_handle *conn, int failover)
+static int cobd_disconnect(struct lustre_handle *conn, int flags)
 {
-       int rc = class_disconnect (conn, failover);
+        int rc = class_disconnect(conn, flags);
 
         CERROR ("rc %d\n", rc);
-       return (rc);
+        return (rc);
 }
 
 static int
@@ -144,23 +142,15 @@ cobd_get_info(struct lustre_handle *conn, obd_count keylen,
         return obd_get_info(&cobd->cobd_target, keylen, key, vallen, val);
 }
 
-static int cobd_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+static int cobd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                       unsigned long max_age)
 {
-        struct obd_export *cobd_exp;
-        int rc;
-
-        if (exp->exp_obd == NULL)
-                return -EINVAL;
-
-        cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
-        rc = obd_statfs(cobd_exp, osfs);
-        class_export_put(cobd_exp);
-        return rc;
+        return obd_statfs(class_conn2obd(&obd->u.cobd.cobd_target), osfs,
+                          max_age);
 }
 
-static int
-cobd_getattr(struct lustre_handle *conn, struct obdo *oa,
-             struct lov_stripe_md *lsm)
+static int cobd_getattr(struct lustre_handle *conn, struct obdo *oa,
+                        struct lov_stripe_md *lsm)
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct cache_obd  *cobd;
@@ -207,11 +197,10 @@ cobd_close(struct lustre_handle *conn, struct obdo *oa,
         return (obd_close (&cobd->cobd_target, oa, lsm, oti));
 }
 
-static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
+static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
                        int objcount, struct obd_ioobj *obj,
                        int niocount, struct niobuf_remote *nb,
-                       struct niobuf_local *res, void **desc_private,
-                       struct obd_trans_info *oti)
+                       struct niobuf_local *res, struct obd_trans_info *oti)
 {
         struct obd_export *cobd_exp;
         int rc;
@@ -223,16 +212,17 @@ static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
                 return -EOPNOTSUPP;
 
         cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
-        rc = obd_preprw(cmd, cobd_exp, obdo, objcount, obj, niocount, nb, res,
-                        desc_private, oti);
+        rc = obd_preprw(cmd, cobd_exp, oa, objcount, obj, niocount, nb, res,
+                        oti);
         class_export_put(cobd_exp);
+
         return rc;
 }
 
-static int cobd_commitrw(int cmd, struct obd_export *exp,
+static int cobd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa,
                          int objcount, struct obd_ioobj *obj,
                          int niocount, struct niobuf_local *local,
-                         void *desc_private, struct obd_trans_info *oti)
+                         struct obd_trans_info *oti)
 {
         struct obd_export *cobd_exp;
         int rc;
@@ -244,16 +234,14 @@ static int cobd_commitrw(int cmd, struct obd_export *exp,
                 return -EOPNOTSUPP;
 
         cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
-        rc = obd_commitrw(cmd, cobd_exp, objcount, obj, niocount, local,
-                          desc_private, oti);
+        rc = obd_commitrw(cmd, cobd_exp, oa, objcount, obj,niocount,local,oti);
         class_export_put(cobd_exp);
         return rc;
 }
 
-static inline int
-cobd_brw(int cmd, struct lustre_handle *conn,
-         struct lov_stripe_md *lsm, obd_count oa_bufs,
-         struct brw_page *pga, struct obd_trans_info *oti)
+static int cobd_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
+                    struct lov_stripe_md *lsm, obd_count oa_bufs,
+                    struct brw_page *pga, struct obd_trans_info *oti)
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct cache_obd  *cobd;
@@ -267,13 +255,11 @@ cobd_brw(int cmd, struct lustre_handle *conn,
                 return -EOPNOTSUPP;
 
         cobd = &obd->u.cobd;
-        return (obd_brw (cmd, &cobd->cobd_target,
-                         lsm, oa_bufs, pga, oti));
+        return (obd_brw(cmd, &cobd->cobd_target, oa, lsm, oa_bufs, pga, oti));
 }
 
-static int
-cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
-               void *karg, void *uarg)
+static int cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
+                          void *karg, void *uarg)
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct cache_obd  *cobd;
@@ -286,7 +272,7 @@ cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
         /* intercept? */
 
         cobd = &obd->u.cobd;
-        return (obd_iocontrol (cmd, &cobd->cobd_target, len, karg, uarg));
+        return (obd_iocontrol(cmd, &cobd->cobd_target, len, karg, uarg));
 }
 
 static struct obd_ops cobd_ops = {
@@ -317,16 +303,16 @@ static int __init cobd_init(void)
         struct lprocfs_static_vars lvars;
         ENTRY;
 
-       printk(KERN_INFO "Lustre Caching OBD driver; info@clusterfs.com\n");
+        printk(KERN_INFO "Lustre Caching OBD driver; info@clusterfs.com\n");
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(cobd, &lvars);
         RETURN(class_register_type(&cobd_ops, lvars.module_vars,
                                    OBD_CACHE_DEVICENAME));
 }
 
-static void __exit cobd_exit(void)
+static void /*__exit*/ cobd_exit(void)
 {
-       class_unregister_type(OBD_CACHE_DEVICENAME);
+        class_unregister_type(OBD_CACHE_DEVICENAME);
 }
 
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
index fd7474b..ba9b9cf 100644 (file)
 #include <linux/lprocfs_status.h>
 
 #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
 #else
 /* Common STATUS namespace */
-static int rd_target(char *page, char **start, off_t off, int count,
-                     int *eof, void *data)
+static int cobd_rd_target(char *page, char **start, off_t off, int count,
+                          int *eof, void *data)
 {
-        struct obd_device    *dev = (struct obd_device*)data;
-       struct lustre_handle *conn;
-       struct obd_export    *exp;
-       int    rc;
+        struct obd_device *cobd = (struct obd_device *)data;
+        int    rc;
 
-        LASSERT(dev != NULL);
-        conn = &dev->u.cobd.cobd_target;
+        LASSERT(cobd != NULL);
 
-       if (!dev->obd_set_up) {
-               rc = snprintf (page, count, "not set up\n");
-       } else {
-               exp = class_conn2export(conn);
-               LASSERT(exp != NULL);
-               rc = snprintf(page, count, "%s\n", 
-                              exp->exp_obd->obd_uuid.uuid);
-                class_export_put(exp);
-       }
-       return (rc);
+        if (!cobd->obd_set_up) {
+                rc = snprintf(page, count, "not set up\n");
+        } else {
+                struct obd_device *tgt =
+                        class_conn2obd(&cobd->u.cobd.cobd_target);
+                LASSERT(tgt != NULL);
+                rc = snprintf(page, count, "%s\n", tgt->obd_uuid.uuid);
+        }
+        return rc;
 }
 
-static int rd_cache(char *page, char **start, off_t off, int count,
-                    int *eof, void *data)
+static int cobd_rd_cache(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
 {
-        struct obd_device    *dev = (struct obd_device*)data;
-       struct lustre_handle *conn;
-       struct obd_export    *exp;
-       int    rc;
+        struct obd_device *cobd = (struct obd_device*)data;
+        int    rc;
 
-        LASSERT(dev != NULL);
-        conn = &dev->u.cobd.cobd_cache;
+        LASSERT(cobd != NULL);
 
-       if (!dev->obd_set_up) {
-               rc = snprintf (page, count, "not set up\n");
+        if (!cobd->obd_set_up) {
+                rc = snprintf(page, count, "not set up\n");
         } else {
-               exp = class_conn2export(conn);
-               LASSERT (exp != NULL);
-               rc = snprintf(page, count, "%s\n", 
-                              exp->exp_obd->obd_uuid.uuid);
-                class_export_put(exp);
-       }
-       return (rc);
+                struct obd_device *cache =
+                        class_conn2obd(&cobd->u.cobd.cobd_cache);
+                LASSERT(cache != NULL);
+                rc = snprintf(page, count, "%s\n", cache->obd_uuid.uuid);
+        }
+        return rc;
 }
 
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",        lprocfs_rd_uuid,    0, 0 },
-        { "target_uuid", rd_target,          0, 0 },
-        { "cache_uuid",  rd_cache,           0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",         lprocfs_rd_uuid,        0, 0 },
+        { "target_uuid",  cobd_rd_target,         0, 0 },
+        { "cache_uuid",   cobd_rd_cache,          0, 0 },
         { 0 }
 };
 
 struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs",    lprocfs_rd_numrefs, 0, 0 },
+        { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
         { 0 }
 };
 #endif /* LPROCFS */
 
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(cobd, lprocfs_module_vars, lprocfs_obd_vars)
index 51d1d1a..de4d653 100644 (file)
 
 <!ELEMENT profile (ldlm_ref | ptlrpc_ref | network_ref | routetbl_ref |
                    osd_ref | mdsdev_ref | lovconfig_ref|
-                   echoclient_ref | mountpoint_ref)*>
+                   echoclient_ref | mountpoint_ref | mgmt_ref)*>
 <!ATTLIST profile %object.attr;>
 
-<!ELEMENT mountpoint (path | fileset | mds_ref | obd_ref)*>
+<!ELEMENT mountpoint path #REQUIRED
+                     filesystem_ref #REQUIRED >
 <!ATTLIST mountpoint %object.attr;>
 
 <!ELEMENT echoclient (obd_ref)>
@@ -45,6 +46,9 @@
 <!ELEMENT ldlm EMPTY>
 <!ATTLIST ldlm %object.attr;>
 
+<!ELEMENT mgmt (active_ref)*>
+<!ATTLIST mgmt %object.attr;>
+
 <!ELEMENT ptlrpc EMPTY>
 <!ATTLIST ptlrpc %object.attr;>
 
 <!ATTLIST ost %object.attr;
               failover ( 1 | 0 ) #IMPLIED>
 
+<!ELEMENT filesystem mds_ref #REQUIRED
+                     obd_ref #REQUIRED
+                     (mgmt_ref)* >
+<!ATTLIST filesystem %object.attr;>
+
 <!ELEMENT mds (active_ref | lovconfig_ref | group)*>
 <!ATTLIST mds %object.attr;
               failover ( 1 | 0 ) #IMPLIED>
 <!ATTLIST obd_ref         %objref.attr;>
 <!ELEMENT ost_ref         %objref.content;>
 <!ATTLIST ost_ref         %objref.attr;>
-<!ELEMENT active_ref         %objref.content;>
-<!ATTLIST active_ref         %objref.attr;>
-<!ELEMENT target_ref         %objref.content;>
-<!ATTLIST target_ref         %objref.attr;>
+<!ELEMENT active_ref      %objref.content;>
+<!ATTLIST active_ref      %objref.attr;>
+<!ELEMENT target_ref      %objref.content;>
+<!ATTLIST target_ref      %objref.attr;>
 <!ELEMENT lov_ref         %objref.content;>
 <!ATTLIST lov_ref         %objref.attr;>
 <!ELEMENT lovconfig_ref   %objref.content;>
 <!ATTLIST lovconfig_ref   %objref.attr;>
+<!ELEMENT mgmt_ref        %objref.content;>
+<!ATTLIST mgmt_ref        %objref.attr;>
 <!ELEMENT mountpoint_ref  %objref.content;>
 <!ATTLIST mountpoint_ref  %objref.attr;>
+<!ELEMENT filesystem_ref  %objref.content;>
+<!ATTLIST filesystem_ref  %objref.attr;>
 <!ELEMENT echoclient_ref  %objref.content;>
 <!ATTLIST echoclient_ref  %objref.attr;>
 <!ELEMENT failover_ref    %objref.content;>
index 8e12135..50f82c8 100644 (file)
@@ -14,6 +14,18 @@ AM_INIT_AUTOMAKE(lustre, builtin([esyscmd], [sed -ne '/^%define version /{ s/.*v
 AC_ARG_ENABLE(extN, [  --enable-extN use extN instead of ext3 for lustre backend])
 AM_CONDITIONAL(EXTN, test x$enable_extN = xyes)
 
+# the pinger is temporary, until we have the recovery node in place
+AC_ARG_ENABLE(pinger, [  --enable-pinger recovery pinger support])
+if test x$enable_pinger = xyes ; then
+  AC_DEFINE(ENABLE_PINGER, 1, Use the Pinger)
+fi
+
+# very experimental orphan support
+AC_ARG_ENABLE(orphans, [  --enable-orphans very experimental orphan recovery support])
+if test x$enable_orphans = xyes ; then
+  AC_DEFINE(ENABLE_ORPHANS, 1, Compile with orphan support)
+fi
+
 AC_ARG_WITH(obd-buffer-size, [  --with-obd-buffer-size=[size] set lctl ioctl maximum (default=8K)],OBD_BUFFER_SIZE=$with_obd_buffer_size,OBD_BUFFER_SIZE=8192)
 AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size])
 
@@ -21,15 +33,8 @@ sinclude(portals/build.m4)
 sinclude(portals/archdep.m4)
 
 if test x$enable_inkernel = xyes ; then
-cp Makefile.mk Makefile.in
-cp mds/Makefile.mk mds/Makefile.in
-cp portals/Kernelenv.mk portals/Kernelenv.in
-cp portals/Makefile.mk portals/Makefile.in
-cp portals/libcfs/Makefile.mk portals/libcfs/Makefile.in
-cp portals/portals/Makefile.mk portals/portals/Makefile.in
-cp portals/knals/Makefile.mk portals/knals/Makefile.in
-cp portals/knals/socknal/Makefile.mk portals/knals/socknal/Makefile.in
-cp portals/router/Makefile.mk portals/router/Makefile.in
+       find . -name Makefile.mk | sed 's/.mk$//' | xargs -n 1 \
+               sh -e -x -c '(cp -f $0.mk $0.in)'
 fi
 
 AM_CONFIG_HEADER(portals/include/config.h)
index 202a761..6b94901 100644 (file)
@@ -363,16 +363,16 @@ struct page {
 #define kmap(page) (page)->addr
 #define kunmap(a) do { int foo = 1; foo++; } while (0)
 
-static inline struct page *alloc_pages(int mask, unsigned long foo)
+static inline struct page *alloc_pages(int mask, unsigned long order)
 {
         struct page *pg = malloc(sizeof(*pg));
 
         if (!pg)
                 return NULL;
 #ifdef MAP_ANONYMOUS
-        pg->addr = mmap(0, PAGE_SIZE, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+        pg->addr = mmap(0, PAGE_SIZE << order, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
 #else
-        pg->addr = malloc(PAGE_SIZE);
+        pg->addr = malloc(PAGE_SIZE << order);
 #endif
 
         if (!pg->addr) {
@@ -407,26 +407,27 @@ static inline struct page* __grab_cache_page(int index)
 /* arithmetic */
 #define do_div(a,b)                     \
         ({                              \
-                unsigned long ret;      \
-                ret = (a)%(b);          \
-                (a) = (a)/(b);          \
-                (ret);                  \
+                unsigned long remainder;\
+                remainder = (a) % (b);  \
+                (a) = (a) / (b);        \
+                (remainder);            \
         })
 
 /* VFS stuff */
-#define ATTR_MODE       1
-#define ATTR_UID        2
-#define ATTR_GID        4
-#define ATTR_SIZE       8
-#define ATTR_ATIME      16
-#define ATTR_MTIME      32
-#define ATTR_CTIME      64
-#define ATTR_ATIME_SET  128
-#define ATTR_MTIME_SET  256
-#define ATTR_FORCE      512     /* Not a change, but a change it */
-#define ATTR_ATTR_FLAG  1024
-#define ATTR_RAW        2048    /* file system, not vfs will massage attrs */
-#define ATTR_FROM_OPEN  4096    /* called from open path, ie O_TRUNC */
+#define ATTR_MODE       0x0001
+#define ATTR_UID        0x0002
+#define ATTR_GID        0x0004
+#define ATTR_SIZE       0x0008
+#define ATTR_ATIME      0x0010
+#define ATTR_MTIME      0x0020
+#define ATTR_CTIME      0x0040
+#define ATTR_ATIME_SET  0x0080
+#define ATTR_MTIME_SET  0x0100
+#define ATTR_FORCE      0x0200  /* Not a change, but a change it */
+#define ATTR_ATTR_FLAG  0x0400
+#define ATTR_RAW        0x0800  /* file system, not vfs will massage attrs */
+#define ATTR_FROM_OPEN  0x1000  /* called from open path, ie O_TRUNC */
+#define ATTR_CTIME_SET  0x2000
 
 struct iattr {
         unsigned int    ia_valid;
index fb96bde..e6678f8 100644 (file)
 #ifndef _LPROCFS_SNMP_H
 #define _LPROCFS_SNMP_H
 
+
 #ifdef __KERNEL__
 #include <linux/config.h>
 #include <linux/autoconf.h>
 #include <linux/proc_fs.h>
+#include <linux/version.h>
 #include <linux/smp.h>
+#include <linux/kp30.h>
+
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#  include <linux/statfs.h>
+# else 
+#  define kstatfs statfs
+# endif
+
+#else 
+#  define kstatfs statfs
 #endif
 
-#include <linux/kp30.h>
 
 #ifndef LPROCFS
 #ifdef  CONFIG_PROC_FS  /* Ensure that /proc is configured */
@@ -116,9 +127,8 @@ struct lprocfs_stats {
 /* class_obd.c */
 extern struct proc_dir_entry *proc_lustre_root;
 
-/* lproc_lov.c */
-extern struct file_operations ll_proc_target_fops;
 struct obd_device;
+struct file;
 
 #ifdef LPROCFS
 
@@ -184,14 +194,18 @@ void lprocfs_init_multi_vars(unsigned int idx,                            \
    x->obd_vars = glob[idx].obd_vars;                                      \
 }                                                                         \
 
-#define LPROCFS_INIT_VARS(vclass, vinstance)           \
-void lprocfs_init_vars(struct lprocfs_static_vars *x)  \
+#define LPROCFS_INIT_VARS(name, vclass, vinstance)           \
+void lprocfs_##name##_init_vars(struct lprocfs_static_vars *x)  \
 {                                                      \
         x->module_vars = vclass;                       \
         x->obd_vars = vinstance;                       \
 }                                                      \
 
-extern void lprocfs_init_vars(struct lprocfs_static_vars *var);
+#define lprocfs_init_vars(NAME, VAR)     \
+do {      \
+        extern void lprocfs_##NAME##_init_vars(struct lprocfs_static_vars *);  \
+        lprocfs_##NAME##_init_vars(VAR);                                       \
+} while (0)
 extern void lprocfs_init_multi_vars(unsigned int idx,
                                     struct lprocfs_static_vars *var);
 /* lprocfs_status.c */
@@ -220,6 +234,8 @@ extern int lprocfs_rd_uuid(char *page, char **start, off_t off,
                            int count, int *eof, void *data);
 extern int lprocfs_rd_name(char *page, char **start, off_t off,
                            int count, int *eof, void *data);
+extern int lprocfs_rd_fstype(char *page, char **start, off_t off,
+                             int count, int *eof, void *data);
 extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
                                   int count, int *eof, void *data);
 extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
@@ -228,38 +244,24 @@ extern int lprocfs_rd_numrefs(char *page, char **start, off_t off,
                               int count, int *eof, void *data);
 
 /* Statfs helpers */
-struct statfs;
 extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
-                              int count, int *eof, struct statfs *sfs);
+                              int count, int *eof, void *data);
 extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
-                                  int count, int *eof, struct statfs *sfs);
+                                  int count, int *eof, void *data);
 extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
-                                 int count, int *eof, struct statfs *sfs);
+                                 int count, int *eof, void *data);
 extern int lprocfs_rd_filestotal(char *page, char **start, off_t off,
-                                 int count, int *eof, struct statfs *sfs);
+                                 int count, int *eof, void *data);
 extern int lprocfs_rd_filesfree(char *page, char **start, off_t off,
-                                int count, int *eof, struct statfs *sfs);
+                                int count, int *eof, void *data);
 extern int lprocfs_rd_filegroups(char *page, char **start, off_t off,
-                                 int count, int *eof, struct statfs *sfs);
+                                 int count, int *eof, void *data);
 
 /* lprocfs_status.c: counter read/write functions */
-struct file;
 extern int lprocfs_counter_read(char *page, char **start, off_t off,
                                 int count, int *eof, void *data);
 extern int lprocfs_counter_write(struct file *file, const char *buffer,
                                  unsigned long count, void *data);
-
-#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct)               \
-int fct_name(char *page, char **start, off_t off,                         \
-             int count, int *eof, void *data)                             \
-{                                                                         \
-        struct statfs sfs;                                                \
-        int rc = get_statfs_fct((struct obd_device*)data, &sfs);          \
-        return (rc == 0 ?                                                 \
-                lprocfs_##fct_name (page, start, off, count, eof, &sfs) : \
-                rc);                                                      \
-}
-
 #else
 /* LPROCFS is not defined */
 static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
@@ -289,18 +291,17 @@ static inline void lprocfs_free_obd_stats(struct obd_device *obddev)
 static inline struct proc_dir_entry *
 lprocfs_register(const char *name, struct proc_dir_entry *parent,
                  struct lprocfs_vars *list, void *data) { return NULL; }
-#define LPROCFS_INIT_MULTI_VARS(array, size)
+#define LPROCFS_INIT_MULTI_VARS(array, size) do {} while (0)
 static inline void lprocfs_init_multi_vars(unsigned int idx,
                                            struct lprocfs_static_vars *x) { return; }
-#define LPROCFS_INIT_VARS(vclass, vinstance)
-static inline void lprocfs_init_vars(struct lprocfs_static_vars *x) { return; }
+#define LPROCFS_INIT_VARS(name, vclass, vinstance) do {} while (0)
+#define lprocfs_init_vars(...) do {} while (0)
 static inline int lprocfs_add_vars(struct proc_dir_entry *root,
                                    struct lprocfs_vars *var,
                                    void *data) { return 0; }
 static inline void lprocfs_remove(struct proc_dir_entry *root) {};
 static inline struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
                                     const char *name) {return 0;}
-struct obd_device;
 static inline int lprocfs_obd_attach(struct obd_device *dev,
                                      struct lprocfs_vars *list) { return 0; }
 static inline int lprocfs_obd_detach(struct obd_device *dev)  { return 0; }
@@ -318,37 +319,30 @@ static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off,
                                      int count, int *eof, void *data) { return 0; }
 
 /* Statfs helpers */
-struct statfs;
 static inline
 int lprocfs_rd_blksize(char *page, char **start, off_t off,
-                       int count, int *eof, struct statfs *sfs) { return 0; }
+                       int count, int *eof, void *data) { return 0; }
 static inline
 int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
-                           int count, int *eof, struct statfs *sfs) { return 0; }
+                           int count, int *eof, void *data) { return 0; }
 static inline
 int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
-                          int count, int *eof, struct statfs *sfs) { return 0; }
+                          int count, int *eof, void *data) { return 0; }
 static inline
 int lprocfs_rd_filestotal(char *page, char **start, off_t off,
-                          int count, int *eof, struct statfs *sfs) { return 0; }
+                          int count, int *eof, void *data) { return 0; }
 static inline
 int lprocfs_rd_filesfree(char *page, char **start, off_t off,
-                         int count, int *eof, struct statfs *sfs)  { return 0; }
+                         int count, int *eof, void *data)  { return 0; }
 static inline
 int lprocfs_rd_filegroups(char *page, char **start, off_t off,
-                          int count, int *eof, struct statfs *sfs) { return 0; }
+                          int count, int *eof, void *data) { return 0; }
 static inline
 int lprocfs_counter_read(char *page, char **start, off_t off,
                          int count, int *eof, void *data) { return 0; }
-struct file;
 static inline
 int lprocfs_counter_write(struct file *file, const char *buffer,
                           unsigned long count, void *data) { return 0; }
-
-#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct)  \
-int fct_name(char *page, char **start, off_t off,            \
-             int count, int *eof, void *data) { *eof = 1; return 0; }
-
 #endif /* LPROCFS */
 
 #endif /* LPROCFS_SNMP_H */
index 4275a10..3609d52 100644 (file)
 #ifndef _COMPAT25_H
 #define _COMPAT25_H
 
-#include <linux/portals_compat25.h>
+#ifdef __KERNEL__
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-#define KDEVT_VAL(dev, val)         dev.value = 0               
-#else
-#define KDEVT_VAL(dev, val)         dev = 0               
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) && LINUX_VERSION_CODE < KERNEL_VERSION(2,5,69)
+#error sorry, lustre requires at least 2.5.69
 #endif
 
+#include <linux/portals_compat25.h>
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
 # define PGCACHE_WRLOCK(mapping)          write_lock(&mapping->page_lock)
 # define PGCACHE_WRUNLOCK(mapping)        write_unlock(&mapping->page_lock)
-#else
+
+#define KDEVT_INIT(val)                 { .value = val }
+#define LTIME_S(time)                   (time.tv_sec)
+#define USERMODEHELPER(path, argv, envp) \
+                                        call_usermodehelper(path, argv, envp, 1)
+#define ll_path_lookup                  path_lookup
+
+
+#define ll_pgcache_lock(mapping)          spin_lock(&mapping->page_lock)
+#define ll_pgcache_unlock(mapping)        spin_unlock(&mapping->page_lock)
+
+#else /* 2.4.. */
+
 # define PGCACHE_WRLOCK(mapping)          spin_lock(&pagecache_lock)
 # define PGCACHE_WRUNLOCK(mapping)        spin_unlock(&pagecache_lock)
+
+/* 2.5 uses hlists for some things, like the d_hash.  we'll treat them
+ * as 2.5 and let macros drop back.. */
+#define hlist_entry                     list_entry
+#define hlist_head                      list_head
+#define hlist_node                      list_head
+#define HLIST_HEAD                      LIST_HEAD
+#define INIT_HLIST_HEAD                 INIT_LIST_HEAD
+#define hlist_del_init                  list_del_init
+#define hlist_add_head                  list_add
+#define hlist_for_each_safe             list_for_each_safe
+#define KDEVT_INIT(val)                 (val)
+#define ext3_xattr_set_handle           ext3_xattr_set
+#define try_module_get                  __MOD_INC_USE_COUNT
+#define module_put                      __MOD_DEC_USE_COUNT
+#define LTIME_S(time)                   (time)
+#ifndef CONFIG_RH_2_4_20
+#define cpu_online(cpu)                 (cpu_online_map & (1<<cpu))
 #endif
+#define USERMODEHELPER(path, argv, envp) \
+                                        call_usermodehelper(path, argv, envp)
+static inline int ll_path_lookup(const char *path, unsigned flags, 
+                              struct nameidata *nd)
+{
+        int error = 0;
+        if (path_init(path, flags, nd))
+                error = path_walk(path, nd);
+        return error;
+}
+typedef long sector_t;
+
+#define ll_pgcache_lock(mapping)        spin_lock(&pagecache_lock)
+#define ll_pgcache_unlock(mapping)      spin_unlock(&pagecache_lock)
 
+#endif /* end of 2.4 compat macros */
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
 # define filemap_fdatasync(mapping)       filemap_fdatawrite(mapping)
 # define Page_Uptodate(page)              PageUptodate(page)
 #endif
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define USERMODEHELPER(path, argv, envp) call_usermodehelper(path, argv, envp, 0)
-#else
-# define USERMODEHELPER(path, argv, envp) call_usermodehelper(path, argv, envp)
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define LL_CHECK_DIRTY(sb)              do { }while(0)
-#else
-# define LL_CHECK_DIRTY(sb)              ll_check_dirty(sb)
-#endif
-
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
 #define  rb_node_s rb_node
 #define  rb_root_s rb_root
@@ -73,4 +106,5 @@ typedef struct rb_root_s rb_root_t;
 typedef struct rb_node_s rb_node_t;
 #endif
 
+#endif /* __KERNEL__ */
 #endif /* _COMPAT25_H */
index 2db4196..8fc90ae 100644 (file)
@@ -188,6 +188,7 @@ struct ldlm_lock {
          * it's no longer in use.  If the lock is not granted, a process sleeps
          * on this waitq to learn when it becomes granted. */
         wait_queue_head_t     l_waitq;
+        struct timeval        l_enqueued_time;
 };
 
 typedef int (*ldlm_res_compat)(struct ldlm_lock *child, struct ldlm_lock *new);
@@ -316,6 +317,8 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
                                ldlm_res_iterator_t iter, void *closure);
 
 int ldlm_replay_locks(struct obd_import *imp);
+void ldlm_change_cbdata(struct ldlm_namespace *, struct ldlm_res_id *,
+                        ldlm_iterator_t iter, void *data);
 
 /* ldlm_extent.c */
 int ldlm_extent_compat(struct ldlm_lock *, struct ldlm_lock *);
@@ -450,6 +453,8 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *, struct ldlm_res_id *,
 
 /* mds/handler.c */
 /* This has to be here because recurisve inclusion sucks. */
+int intent_disposition(struct ldlm_reply *rep, int flag);
+void intent_set_disposition(struct ldlm_reply *rep, int flag);
 int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                      void *data, int flag);
 
index 6939a95..677ddc6 100644 (file)
@@ -11,7 +11,7 @@
 #define __EXPORT_H
 
 #include <linux/lustre_idl.h>
-#include <linux/obd_filter.h>
+#include <linux/lustre_dlm.h>
 
 struct mds_client_data;
 
@@ -19,7 +19,8 @@ struct mds_export_data {
         struct list_head        med_open_head;
         spinlock_t              med_open_lock;
         struct mds_client_data *med_mcd;
-        int                     med_off;
+        loff_t                  med_off;
+        int                     med_idx;
 };
 
 struct ldlm_export_data {
@@ -37,6 +38,16 @@ struct ec_export_data { /* echo client */
         struct list_head eced_locks;
 };
 
+/* In-memory access to client data from OST struct */
+struct filter_client_data;
+struct filter_export_data {
+        struct list_head           fed_open_head; //files to close on disconnect
+        spinlock_t                 fed_lock;      /* protects fed_open_head */
+        struct filter_client_data *fed_fcd;
+        loff_t                     fed_lr_off;
+        int                        fed_lr_idx;
+};
+
 struct obd_export {
         struct portals_handle     exp_handle;
         atomic_t                  exp_refcount;
@@ -48,7 +59,8 @@ struct obd_export {
         struct ptlrpc_request    *exp_outstanding_reply;
         time_t                    exp_last_request_time;
         spinlock_t                exp_lock; /* protects flags int below */
-        int                       exp_failed:1, exp_failover:1;
+        int                       exp_failed:1;
+        int                       exp_flags;
         union {
                 struct mds_export_data    eu_mds_data;
                 struct filter_export_data eu_filter_data;
index fc00fe1..37ffc4f 100644 (file)
@@ -30,7 +30,8 @@
 #include <linux/obd.h>
 #include <linux/fs.h>
 
-typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, int error);
+typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd,
+                            void *data, int error);
 
 struct fsfilt_objinfo {
         struct dentry *fso_dentry;
@@ -41,9 +42,9 @@ struct fsfilt_operations {
         struct list_head fs_list;
         struct module *fs_owner;
         char   *fs_type;
-        void   *(* fs_start)(struct inode *inode, int op);
+        void   *(* fs_start)(struct inode *inode, int op, void *desc_private);
         void   *(* fs_brw_start)(int objcount, struct fsfilt_objinfo *fso,
-                                 int niocount, struct niobuf_remote *nb);
+                                 int niocount, void *desc_private);
         int     (* fs_commit)(struct inode *inode, void *handle,int force_sync);
         int     (* fs_setattr)(struct dentry *dentry, void *handle,
                                struct iattr *iattr, int do_trunc);
@@ -54,16 +55,19 @@ struct fsfilt_operations {
                                 loff_t *offset);
         int     (* fs_journal_data)(struct file *file);
         int     (* fs_set_last_rcvd)(struct obd_device *obd, __u64 last_rcvd,
-                                     void *handle, fsfilt_cb_t cb_func);
+                                     void *handle, fsfilt_cb_t cb_func,
+                                     void *cb_data);
         int     (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs);
         int     (* fs_sync)(struct super_block *sb);
         int     (* fs_prep_san_write)(struct inode *inode, long *blocks,
                                       int nblocks, loff_t newsize);
+        int     (* fs_write_record)(struct file *, char *, int size, loff_t *);
+        int     (* fs_read_record)(struct file *, char *, int size, loff_t *);
 };
 
 extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops);
 extern void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops);
-extern struct fsfilt_operations *fsfilt_get_ops(char *type);
+extern struct fsfilt_operations *fsfilt_get_ops(const char *type);
 extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops);
 
 #define FSFILT_OP_UNLINK         1
@@ -75,26 +79,53 @@ extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops);
 #define FSFILT_OP_MKNOD          7
 #define FSFILT_OP_SETATTR        8
 #define FSFILT_OP_LINK           9
+#define FSFILT_OP_CREATE_LOG    10
+#define FSFILT_OP_UNLINK_LOG    11
 
-static inline void *fsfilt_start(struct obd_device *obd,
-                                 struct inode *inode, int op)
+static inline void *fsfilt_start(struct obd_device *obd, struct inode *inode,
+                                 int op, struct obd_trans_info *oti)
 {
         unsigned long now = jiffies;
-        void *handle = obd->obd_fsops->fs_start(inode, op);
-        CDEBUG(D_HA, "started handle %p\n", handle);
-        if (time_after(jiffies, now + 15*HZ))
+        void *parent_handle = oti ? oti->oti_handle : NULL;
+        void *handle = obd->obd_fsops->fs_start(inode, op, parent_handle);
+        CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle);
+
+        if (oti != NULL) {
+                if (parent_handle == NULL) {
+                        oti->oti_handle = handle;
+                } else if (handle != parent_handle) {
+                        CERROR("mismatch: parent %p, handle %p, oti %p\n",
+                               parent_handle, handle, oti->oti_handle);
+                        LBUG();
+                }
+        }
+        if (time_after(jiffies, now + 15 * HZ))
                 CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
         return handle;
 }
 
 static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount,
                                      struct fsfilt_objinfo *fso, int niocount,
-                                     struct niobuf_remote *nb)
+                                     struct obd_trans_info *oti)
 {
         unsigned long now = jiffies;
-        void *handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount,nb);
-        CDEBUG(D_HA, "started handle %p\n", handle);
-        if (time_after(jiffies, now + 15*HZ))
+        void *parent_handle = oti ? oti->oti_handle : NULL;
+        void *handle;
+
+        handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount,
+                                              parent_handle);
+        CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle);
+
+        if (oti != NULL) {
+                if (parent_handle == NULL) {
+                        oti->oti_handle = handle;
+                } else if (handle != parent_handle) {
+                        CERROR("mismatch: parent %p, handle %p, oti %p\n",
+                               parent_handle, handle, oti->oti_handle);
+                        LBUG();
+                }
+        }
+        if (time_after(jiffies, now + 15 * HZ))
                 CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
         return handle;
 }
@@ -105,7 +136,7 @@ static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode,
         unsigned long now = jiffies;
         int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync);
         CDEBUG(D_HA, "committing handle %p\n", handle);
-        if (time_after(jiffies, now + 15*HZ))
+        if (time_after(jiffies, now + 15 * HZ))
                 CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
         return rc;
 }
@@ -116,9 +147,8 @@ static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry,
         unsigned long now = jiffies;
         int rc;
         rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr, do_trunc);
-        if (time_after(jiffies, now + 15*HZ))
+        if (time_after(jiffies, now + 15 * HZ))
                 CERROR("long setattr time %lus\n", (jiffies - now) / HZ);
-
         return rc;
 }
 
@@ -147,9 +177,11 @@ static inline int fsfilt_journal_data(struct obd_device *obd, struct file *file)
 }
 
 static inline int fsfilt_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
-                                       void *handle, fsfilt_cb_t cb_func)
+                                       void *handle, fsfilt_cb_t cb_func,
+                                       void *cb_data)
 {
-        return obd->obd_fsops->fs_set_last_rcvd(obd, last_rcvd,handle,cb_func);
+        return obd->obd_fsops->fs_set_last_rcvd(obd, last_rcvd, handle,
+                                                cb_func, cb_data);
 }
 
 static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *fs,
@@ -172,6 +204,19 @@ static inline int fs_prep_san_write(struct obd_device *obd,
         return obd->obd_fsops->fs_prep_san_write(inode, blocks,
                                                  nblocks, newsize);
 }
+
+static inline int fsfilt_read_record(struct obd_device *obd, struct file *file,
+                                     char *buf, loff_t size, loff_t *offs)
+{
+        return obd->obd_fsops->fs_read_record(file, buf, size, offs);
+}
+
+static inline int fsfilt_write_record(struct obd_device *obd, struct file *file,
+                                      char *buf, loff_t size, loff_t *offs)
+{
+        return obd->obd_fsops->fs_write_record(file, buf, size, offs);
+}
+
 #endif /* __KERNEL__ */
 
 #endif
index f4a5f2d..055b7a4 100644 (file)
@@ -18,7 +18,7 @@
  *   along with Lustre; if not, write to the Free Software
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
- * (Un)packing of OST requests
+ * Lustre wire protocol definitions.
  *
  * We assume all nodes are either little-endian or big-endian, and we
  * always send messages in the sender's native format.  The receiver
@@ -29,9 +29,9 @@
  * implemented either here, inline (trivial implementations) or in
  * ptlrpc/pack_generic.c.  These 'swabbers' convert the type from "other"
  * endian, in-place in the message buffer.
- * 
+ *
  * A swabber takes a single pointer argument.  The caller must already have
- * verified that the length of the message buffer >= sizeof (type).  
+ * verified that the length of the message buffer >= sizeof (type).
  *
  * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
  * may be defined that swabs just the variable part, after the caller has
@@ -90,29 +90,33 @@ extern struct obd_uuid lctl_fake_uuid;
  * FOO_BULK_PORTAL    is for incoming bulk on the FOO
  */
 
-#define CONNMGR_REQUEST_PORTAL  1
-#define CONNMGR_REPLY_PORTAL    2
-//#define OSC_REQUEST_PORTAL      3
-#define OSC_REPLY_PORTAL        4
-//#define OSC_BULK_PORTAL         5
-#define OST_REQUEST_PORTAL      6
-//#define OST_REPLY_PORTAL        7
-#define OST_BULK_PORTAL         8
-//#define MDC_REQUEST_PORTAL      9
-#define MDC_REPLY_PORTAL        10
-//#define MDC_BULK_PORTAL         11
-#define MDS_REQUEST_PORTAL      12
-//#define MDS_REPLY_PORTAL        13
-#define MDS_BULK_PORTAL         14
-#define LDLM_CB_REQUEST_PORTAL     15
-#define LDLM_CB_REPLY_PORTAL       16
+#define CONNMGR_REQUEST_PORTAL          1
+#define CONNMGR_REPLY_PORTAL            2
+//#define OSC_REQUEST_PORTAL            3
+#define OSC_REPLY_PORTAL                4
+//#define OSC_BULK_PORTAL               5
+#define OST_REQUEST_PORTAL              6
+//#define OST_REPLY_PORTAL              7
+#define OST_BULK_PORTAL                 8
+//#define MDC_REQUEST_PORTAL            9
+#define MDC_REPLY_PORTAL               10
+//#define MDC_BULK_PORTAL              11
+#define MDS_REQUEST_PORTAL             12
+//#define MDS_REPLY_PORTAL             13
+#define MDS_BULK_PORTAL                14
+#define LDLM_CB_REQUEST_PORTAL         15
+#define LDLM_CB_REPLY_PORTAL           16
 #define LDLM_CANCEL_REQUEST_PORTAL     17
 #define LDLM_CANCEL_REPLY_PORTAL       18
 #define PTLBD_REQUEST_PORTAL           19
 #define PTLBD_REPLY_PORTAL             20
 #define PTLBD_BULK_PORTAL              21
-#define MDS_SETATTR_PORTAL      22
-#define MDS_READPAGE_PORTAL     23
+#define MDS_SETATTR_PORTAL             22
+#define MDS_READPAGE_PORTAL            23
+#define MGMT_REQUEST_PORTAL            24
+#define MGMT_REPLY_PORTAL              25
+#define MGMT_CLI_REQUEST_PORTAL        26
+#define MGMT_CLI_REPLY_PORTAL          27
 
 #define SVC_KILLED               1
 #define SVC_EVENT                2
@@ -159,7 +163,7 @@ struct lustre_msg {
 
 static inline int lustre_msg_swabbed (struct lustre_msg *msg)
 {
-        return (msg->magic == __swab32 (PTLRPC_MSG_MAGIC));
+        return (msg->magic == __swab32(PTLRPC_MSG_MAGIC));
 }
 
 /* Flags that are operation-specific go in the top 16 bits. */
@@ -207,9 +211,10 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags)
  * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT)
  */
 
-#define MSG_CONNECT_RECOVERING 0x1
-#define MSG_CONNECT_RECONNECT  0x2
+#define MSG_CONNECT_RECOVERING  0x1
+#define MSG_CONNECT_RECONNECT   0x2
 #define MSG_CONNECT_REPLAYABLE  0x4
+#define MSG_CONNECT_PEER        0x8
 
 /*
  *   OST requests: OBDO & OBD request records
@@ -234,13 +239,13 @@ typedef enum {
         OST_SAN_READ   = 14,
         OST_SAN_WRITE  = 15,
         OST_SYNCFS     = 16,
+        OST_SET_INFO   = 17,
         OST_LAST_OPC
 } ost_cmd_t;
 #define OST_FIRST_OPC  OST_REPLY
 /* When adding OST RPC opcodes, please update 
  * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */
 
-
 typedef uint64_t        obd_id;
 typedef uint64_t        obd_gr;
 typedef uint64_t        obd_time;
@@ -324,8 +329,23 @@ struct lov_mds_md {
 #define OBD_MD_LINKNAME (0x00040000)    /* symbolic link target */
 #define OBD_MD_FLHANDLE (0x00080000)    /* file handle */
 #define OBD_MD_FLCKSUM  (0x00100000)    /* bulk data checksum */
+#define OBD_MD_FLQOS    (0x00200000)    /* quality of service stats */
+#define OBD_MD_FLOSCOPQ (0x00400000)    /* osc opaque data */
+#define OBD_MD_FLCOOKIE (0x00800000)    /* log cancellation cookie */
 #define OBD_MD_FLNOTOBD (~(OBD_MD_FLOBDFLG | OBD_MD_FLBLOCKS | OBD_MD_LINKNAME|\
-                           OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM))
+                           OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM|\
+                           OBD_MD_FLQOS | OBD_MD_FLOSCOPQ | OBD_MD_FLCOOKIE))
+
+static inline struct lustre_handle *obdo_handle(struct obdo *oa)
+{
+        return (struct lustre_handle *)oa->o_inline;
+}
+
+static inline struct llog_cookie *obdo_logcookie(struct obdo *oa)
+{
+        return (struct llog_cookie *)(oa->o_inline +
+                                      sizeof(struct lustre_handle));
+}
 
 struct obd_statfs {
         __u64           os_type;
@@ -399,6 +419,8 @@ typedef enum {
         MDS_GETSTATUS    = 40,
         MDS_STATFS       = 41,
         MDS_GETLOVINFO   = 42,
+        MDS_PIN          = 43,
+        MDS_UNPIN        = 44,
         MDS_LAST_OPC
 } mds_cmd_t;
 #define MDS_FIRST_OPC    MDS_GETATTR
@@ -417,12 +439,20 @@ typedef enum {
 #define REINT_OPEN     6
 #define REINT_MAX      6
 
-#define IT_INTENT_EXEC   1
-#define IT_OPEN_LOOKUP  (1 << 1)
-#define IT_OPEN_NEG     (1 << 2)
-#define IT_OPEN_POS     (1 << 3)
-#define IT_OPEN_CREATE  (1 << 4)
-#define IT_OPEN_OPEN    (1 << 5)
+/* the disposition of the intent outlines what was executed */
+#define DISP_IT_EXECD   1
+#define DISP_LOOKUP_EXECD  (1 << 1)
+#define DISP_LOOKUP_NEG     (1 << 2)
+#define DISP_LOOKUP_POS     (1 << 3)
+#define DISP_OPEN_CREATE  (1 << 4)
+#define DISP_OPEN_OPEN    (1 << 5)
+#define DISP_ENQ_COMPLETE (1<<6)
+
+
+struct ll_uctxt {
+        __u32 gid1;
+        __u32 gid2;
+};
 
 struct ll_fid {
         __u64 id;
@@ -504,6 +534,11 @@ struct mds_rec_setattr {
         __u32           sa_suppgid;
 };
 
+/* Remove this once we declare it in include/linux/fs.h (v21 kernel patch?) */
+#ifndef ATTR_CTIME_SET
+#define ATTR_CTIME_SET 0x2000
+#endif
+
 extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa);
 
 struct mds_rec_create {
@@ -720,9 +755,109 @@ struct ptlbd_rsp {
 extern void lustre_swab_ptlbd_rsp (struct ptlbd_rsp *r);
 
 /*
+ * Opcodes for management/monitoring node.
+ */
+#define MGMT_CONNECT    250
+#define MGMT_DISCONNECT 251
+#define MGMT_EXCEPTION  252 /* node died, etc. */
+
+/*
  * Opcodes for multiple servers.
  */
 
-#define OBD_PING 400
+#define OBD_PING       400
+#define OBD_LOG_CANCEL 401
+#define OBD_LAST_OPC  (OBD_LOG_CANCEL + 1)
+#define OBD_FIRST_OPC OBD_PING
+
+/* catalog of log objects */
+
+/* Identifier for a single log object */
+struct llog_logid {
+        __u64                   lgl_oid;
+        __u32                   lgl_ogen;
+};
+
+/* Log data record types - there is no specific reason that these need to
+ * be related to the RPC opcodes, but no reason not to (may be handy later?)
+ */
+typedef enum {
+        OST_CREATE_REC = 0x10600000 | (OST_CREATE << 8),
+        OST_ORPHAN_REC = 0x10600000 | (OST_DESTROY << 8),
+        MDS_UNLINK_REC = 0x10610000 | (MDS_REINT << 8) | REINT_UNLINK,
+        LLOG_CATALOG_MAGIC = 0x1062e67d,
+        LLOG_OBJECT_MAGIC = 0x10645539,
+} llog_op_type;
+
+/* Log record header - stored in originating host endian order (use magic to
+ * check order).
+ * Each record must start with this struct, end with a __u32 for the struct
+ * length, and be a multiple of 64 bits in size.
+ */
+struct llog_trans_hdr {
+        __u32                   lth_len;
+        __u32                   lth_type;
+};
+
+struct llog_create_rec {
+        struct llog_trans_hdr   lcr_hdr;
+        struct ll_fid           lcr_fid;
+        obd_id                  lcr_oid;
+        obd_count               lcr_ogen;
+        __u32                   lcr_end_len;
+} __attribute__((packed));
+
+struct llog_orphan_rec {
+        struct llog_trans_hdr   lor_hdr;
+        obd_id                  lor_oid;
+        obd_count               lor_ogen;
+        __u32                   lor_end_len;
+} __attribute__((packed));
+
+struct llog_unlink_rec {
+        struct llog_trans_hdr   lur_hdr;
+        obd_id                  lur_oid;
+        obd_count               lur_ogen;
+        __u32                   lur_end_len;
+} __attribute__((packed));
+
+/* On-disk header structure of each log object - stored in creating host
+ * endian order, with the exception of the bitmap - stored in little endian
+ * order so that we can use ext2_{clear,set,test}_bit() for proper/optimized
+ * little-endian handling of bitmaps (which are otherwise a pain to handle).
+ */
+#define LLOG_CHUNK_SIZE         4096
+#define LLOG_HEADER_SIZE        (96)
+#define LLOG_BITMAP_BYTES       (LLOG_CHUNK_SIZE - LLOG_HEADER_SIZE)
+
+#define LLOG_MIN_REC_SIZE       (16) /* round(struct llog_trans_hdr+end_len) */
+
+struct llog_object_hdr {
+        struct llog_trans_hdr   llh_hdr;
+        __u64                   llh_timestamp;
+        __u32                   llh_count;
+        __u16                   llh_bitmap_offset;
+        __u16                   llh_unused;
+        struct obd_uuid         llh_tgtuuid;
+        __u8                    llh_padding[3];
+        __u32                   llh_reserved[LLOG_HEADER_SIZE/sizeof(__u32)-17];
+        __u32                   llh_bitmap[LLOG_BITMAP_BYTES/sizeof(__u32)];
+        __u32                   llh_hdr_end_len;
+};
+
+static inline int llog_log_swabbed(struct llog_object_hdr *hdr)
+{
+        if (hdr->llh_hdr.lth_type == __swab32(LLOG_OBJECT_MAGIC))
+                return 1;
+        if (hdr->llh_hdr.lth_type == LLOG_OBJECT_MAGIC)
+                return 0;
+        return -1;
+}
+
+/* log cookies are used to reference a specific log file and a record therein */
+struct llog_cookie {
+        struct llog_logid       lgc_lgl;
+        __u32                   lgc_index;
+};
 
 #endif
index b18e2d2..467132b 100644 (file)
@@ -80,7 +80,7 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id);
 
 int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf);
 int client_sanobd_setup(struct obd_device *obddev, obd_count len, void *buf);
-int client_obd_cleanup(struct obd_device * obddev, int force, int failover);
+int client_obd_cleanup(struct obd_device * obddev, int flags);
 struct client_obd *client_conn2cli(struct lustre_handle *conn);
 struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid);
 
@@ -89,13 +89,16 @@ struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid);
  * the server, we can just send the whole struct unaltered. */
 struct obd_client_handle {
         struct lustre_handle och_fh;
+        struct llog_cookie och_cookie;
         struct ptlrpc_request *och_req;
         __u32 och_magic;
 };
 #define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
 
 /* statfs_pack.c */
-int obd_self_statfs(struct obd_device *dev, struct statfs *sfs);
+struct statfs;
+void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs);
+void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs);
 
 /* l_lock.c */
 struct lustre_lock {
index 81184e7..fa83fb2 100644 (file)
 
 #ifdef __KERNEL__
 
+#include <linux/version.h>
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <asm/statfs.h>
+#endif
+
 #include <linux/fs.h>
+#include <linux/dcache.h>
 #include <linux/ext2_fs.h>
 #include <linux/proc_fs.h>
 
@@ -46,20 +53,62 @@ struct lustre_intent_data {
         __u32 it_lock_mode;
 };
 
+#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
+
+static inline struct lookup_intent *ll_nd2it(struct nameidata *nd)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+        return &nd->it;
+#else
+        return nd->it;
+#endif
+}
+
 struct ll_dentry_data {
-        struct semaphore      lld_it_sem;
+        int                      lld_cwd_count;
+        int                      lld_mnt_count;
+        struct obd_client_handle lld_cwd_och;
+        struct obd_client_handle lld_mnt_och;
 };
 
-#define ll_d2d(dentry) ((struct ll_dentry_data*) dentry->d_fsdata)
+#define ll_d2d(de) ((struct ll_dentry_data*) de->d_fsdata)
 
 extern struct file_operations ll_pgcache_seq_fops;
 
+/* 
+ * XXX used in obdecho/echo_client.c  must move (pjb)
+ *'p' list as its a list of pages linked together
+ * by ->private.. 
+ */
+struct plist {
+        struct page *pl_head;
+        struct page *pl_tail;
+        int pl_num;
+};
+
+struct ll_dirty_offsets {
+        rb_root_t       do_root;
+        spinlock_t      do_lock;
+        unsigned long   do_num_dirty;
+};
+
+struct ll_writeback_pages {
+        obd_count npgs, max;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+        int rw;
+        struct inode *inode;
+        struct brw_page pga[0];
+#else
+        struct brw_page *pga;
+#endif
+};
+
 struct ll_inode_info {
         struct lov_stripe_md   *lli_smd;
         char                   *lli_symlink_name;
         struct semaphore        lli_open_sem;
         struct list_head        lli_read_extents;
-        loff_t                  lli_maxbytes;
+        __u64                   lli_maxbytes;
         spinlock_t              lli_read_extent_lock;
         unsigned long           lli_flags;
 #define LLI_F_HAVE_SIZE_LOCK    0
@@ -81,13 +130,6 @@ struct ll_read_extent {
         struct ldlm_extent re_extent;
 };
 
-int ll_check_dirty( struct super_block *sb );
-int ll_batch_writepage( struct inode *inode, struct page *page );
-
-/* interpet return codes from intent lookup */
-#define LL_LOOKUP_POSITIVE 1
-#define LL_LOOKUP_NEGATIVE 2
-
 #define LL_SUPER_MAGIC 0x0BD00BD0
 
 #define LL_COMMITCBD_STOPPING  0x1
@@ -118,14 +160,22 @@ struct ll_sb_info {
         struct lprocfs_stats     *ll_stats; /* lprocfs stats counter */
 };
 
-static inline struct ll_sb_info *ll_s2sbi(struct super_block *sb)
-{
+
 #if  (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-        return (struct ll_sb_info *)(sb->s_fs_info);
-#else
-        return (struct ll_sb_info *)(sb->u.generic_sbp);
-#endif
+#define    ll_s2sbi(sb)     ((struct ll_sb_info *)((sb)->s_fs_info))
+void __d_rehash(struct dentry * entry, int lock);
+static inline __u64 ll_ts2u64(struct timespec *time)
+{
+        __u64 t = time->tv_sec;
+        return t;
+}
+#else  /* 2.4 here */
+#define    ll_s2sbi(sb)     ((struct ll_sb_info *)((sb)->u.generic_sbp))
+static inline __u64 ll_ts2u64(time_t *time)
+{
+        return *time;
 }
+#endif 
 
 static inline struct lustre_handle *ll_s2obdconn(struct super_block *sb)
 {
@@ -146,29 +196,6 @@ static inline struct ll_sb_info *ll_i2sbi(struct inode *inode)
         return ll_s2sbi(inode->i_sb);
 }
 
-static inline void d_unhash_aliases(struct inode *inode)
-{
-        struct dentry *dentry = NULL;
-        struct list_head *tmp;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        ENTRY;
-
-        CDEBUG(D_INODE, "marking dentries for ino %lx/%x invalid\n",
-               inode->i_ino, inode->i_generation);
-
-        spin_lock(&dcache_lock);
-        list_for_each(tmp, &inode->i_dentry) {
-                dentry = list_entry(tmp, struct dentry, d_alias);
-
-                list_del_init(&dentry->d_hash);
-                dentry->d_flags |= DCACHE_LUSTRE_INVALID;
-                list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
-        }
-
-        spin_unlock(&dcache_lock);
-        EXIT;
-}
-
 // FIXME: replace the name of this with LL_I to conform to kernel stuff
 // static inline struct ll_inode_info *LL_I(struct inode *inode)
 static inline struct ll_inode_info *ll_i2info(struct inode *inode)
@@ -199,21 +226,17 @@ static inline int ll_mds_max_easize(struct super_block *sb)
         return sbi2mdc(ll_s2sbi(sb))->cl_max_mds_easize;
 }
 
-static inline loff_t ll_file_maxbytes(struct inode *inode)
+static inline __u64 ll_file_maxbytes(struct inode *inode)
 {
         return ll_i2info(inode)->lli_maxbytes;
 }
 
 /* namei.c */
-int ll_lock(struct inode *dir, struct dentry *dentry,
-            struct lookup_intent *it, struct lustre_handle *lockh);
-int ll_unlock(__u32 mode, struct lustre_handle *lockh);
-
-typedef int (*intent_finish_cb)(int flag, struct ptlrpc_request *,
+typedef int (*intent_finish_cb)(struct ptlrpc_request *,
                                 struct inode *parent, struct dentry **, 
                                 struct lookup_intent *, int offset, obd_id ino);
 int ll_intent_lock(struct inode *parent, struct dentry **,
-                   struct lookup_intent *, intent_finish_cb);
+                   struct lookup_intent *, int, intent_finish_cb);
 int ll_mdc_blocking_ast(struct ldlm_lock *lock,
                         struct ldlm_lock_desc *desc,
                         void *data, int flag);
@@ -222,51 +245,7 @@ void ll_prepare_mdc_op_data(struct mdc_op_data *data,
                             struct inode *i1, struct inode *i2,
                             const char *name, int namelen, int mode);
 
-/* dcache.c */
-void ll_intent_release(struct dentry *, struct lookup_intent *);
-
-/****
-
-I originally implmented these as functions, then realized a macro
-would be more helpful for debugging, so the CDEBUG messages show
-the current calling function.  The orignal functions are in llite/dcache.c
-
-int ll_save_intent(struct dentry * de, struct lookup_intent * it);
-struct lookup_intent * ll_get_intent(struct dentry * de);
-****/
-
-#define IT_RELEASED_MAGIC 0xDEADCAFE
-
-#define LL_SAVE_INTENT(de, it)                                                 \
-do {                                                                           \
-        LASSERT(ll_d2d(de) != NULL);                                           \
-                                                                               \
-        down(&ll_d2d(de)->lld_it_sem);                                         \
-        LASSERT(de->d_it == NULL);                                             \
-        de->d_it = it;                                                         \
-        CDEBUG(D_DENTRY,                                                       \
-               "D_IT DOWN dentry %p fsdata %p intent: %p %s sem %d\n",         \
-               de, ll_d2d(de), de->d_it, ldlm_it2str(de->d_it->it_op),         \
-               atomic_read(&(ll_d2d(de)->lld_it_sem.count)));                  \
-} while(0)
-
-#define LL_GET_INTENT(de, it)                                                  \
-do {                                                                           \
-        it = de->d_it;                                                         \
-                                                                               \
-        LASSERT(ll_d2d(de) != NULL);                                           \
-        LASSERT(it);                                                           \
-        LASSERT(it->it_op != IT_RELEASED_MAGIC);                               \
-                                                                               \
-        CDEBUG(D_DENTRY, "D_IT UP dentry %p fsdata %p intent: %p %s\n",        \
-               de, ll_d2d(de), de->d_it, ldlm_it2str(de->d_it->it_op));        \
-        de->d_it = NULL;                                                       \
-        it->it_op = IT_RELEASED_MAGIC;                                         \
-        up(&ll_d2d(de)->lld_it_sem);                                           \
-} while(0)
-
-#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
-
+/* lprocfs.c */
 enum {
          LPROC_LL_DIRTY_HITS = 0,
          LPROC_LL_DIRTY_MISSES,
@@ -312,8 +291,6 @@ extern struct file_operations ll_file_operations;
 extern struct inode_operations ll_file_inode_operations;
 extern struct inode_operations ll_special_inode_operations;
 struct ldlm_lock;
-int ll_extent_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *,
-                            void *data, int flag);
 int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
                    struct lov_stripe_md *lsm, int mode,
                    struct ldlm_extent *extent, struct lustre_handle *lockh);
@@ -329,30 +306,22 @@ int ll_file_open(struct inode *inode, struct file *file);
 int ll_file_release(struct inode *inode, struct file *file);
 
 
-/* rw.c */
-struct page *ll_getpage(struct inode *inode, unsigned long offset,
-                        int create, int locked);
-void ll_truncate(struct inode *inode);
 
 /* super.c */
 void ll_update_inode(struct inode *, struct mds_body *, struct lov_stripe_md *);
 int ll_setattr_raw(struct inode *inode, struct iattr *attr);
+int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
+                       unsigned long maxage);
 
 /* symlink.c */
 extern struct inode_operations ll_fast_symlink_inode_operations;
 extern struct inode_operations ll_symlink_inode_operations;
 
-/* sysctl.c */
-void ll_sysctl_init(void);
-void ll_sysctl_clean(void);
-
 #else
 #include <linux/lustre_idl.h>
 #endif /* __KERNEL__ */
 
-static inline void ll_ino2fid(struct ll_fid *fid,
-                              obd_id ino,
-                              __u32 generation,
+static inline void ll_ino2fid(struct ll_fid *fid, obd_id ino, __u32 generation,
                               int type)
 {
         fid->id = ino;
@@ -360,11 +329,6 @@ static inline void ll_ino2fid(struct ll_fid *fid,
         fid->f_type = type;
 }
 
-struct ll_read_inode2_cookie {
-        struct mds_body      *lic_body;
-        struct lov_stripe_md *lic_lsm;
-};
-
 #include <asm/types.h>
 
 #define LL_IOC_GETFLAGS                 _IOR ('f', 151, long)
index 683d78d..e7ee6f0 100644 (file)
@@ -35,6 +35,8 @@
 #include <linux/lustre_idl.h>
 #include <linux/lustre_lib.h>
 #include <linux/lustre_dlm.h>
+#include <linux/lustre_log.h>
+#include <linux/lustre_export.h>
 
 struct ldlm_lock_desc;
 struct mds_obd;
@@ -49,6 +51,11 @@ struct ll_file_data;
 #define LUSTRE_MDT_NAME "mdt"
 #define LUSTRE_MDC_NAME "mdc"
 
+struct lustre_md {
+        struct mds_body *body;
+        struct lov_stripe_md *lsm;
+};
+
 struct mdc_rpc_lock {
         struct semaphore rpcl_sem;
         struct lookup_intent *rpcl_it;
@@ -144,6 +151,8 @@ struct mds_update_record {
         char *ur_tgt;
         int ur_eadatalen;
         void *ur_eadata;
+        int ur_cookielen;
+        struct llog_cookie *ur_logcookies;
         struct iattr ur_iattr;
         struct obd_ucred ur_uc;
         __u64 ur_rdev;
@@ -160,8 +169,31 @@ struct mds_update_record {
 #define ur_suppgid1 ur_uc.ouc_suppgid1
 #define ur_suppgid2 ur_uc.ouc_suppgid2
 
-#define MDS_LR_CLIENT  8192
-#define MDS_LR_SIZE     128
+/* i_attr_flags holds the open count in the inode in 2.4 */
+//Alex implement on 2.4 with i_attr_flags and find soln for 2.5 please
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+# define mds_open_orphan_count(inode)   (0)
+# define mds_open_orphan_inc(inode)  do { } while (0);
+# define mds_open_orphan_dec_test(inode)  (0)
+#else
+# define mds_inode_oatomic(inode)    ((atomic_t *)&(inode)->i_attr_flags)
+# define mds_open_orphan_count(inode)                          \
+  atomic_read(mds_inode_oatomic(inode))
+# define mds_open_orphan_inc(inode)                            \
+  atomic_inc(mds_inode_oatomic(inode))
+# define mds_open_orphan_dec_test(inode)                       \
+  atomic_dec_and_test(mds_inode_oatomic(inode))
+#endif
+#define mds_inode_is_orphan(inode)  ((inode)->i_flags & 0x4000000)
+#define mds_inode_set_orphan(inode) (inode)->i_flags |= 0x4000000
+
+#define MDS_LR_SERVER_SIZE    512
+
+#define MDS_LR_CLIENT_START  8192
+#define MDS_LR_CLIENT_SIZE    128
+#if MDS_LR_CLIENT_START < MDS_LR_SERVER_SIZE
+#error "Can't have MDS_LR_CLIENT_START < MDS_LR_SERVER_SIZE"
+#endif
 
 #define MDS_CLIENT_SLOTS 17
 
@@ -169,11 +201,24 @@ struct mds_update_record {
 
 /* Data stored per server at the head of the last_rcvd file.  In le32 order. */
 struct mds_server_data {
-        __u8 msd_uuid[37];      /* server UUID */
-        __u8 uuid_padding[3];   /* unused */
-        __u64 msd_last_transno; /* last completed transaction ID */
-        __u64 msd_mount_count;  /* MDS incarnation number */
-        __u8 padding[512 - 56];
+        __u8  msd_uuid[37];        /* server UUID */
+        __u8  uuid_padding[3];     /* unused */
+//      __u64 msd_last_objid;      /* last created object ID */
+        __u64 msd_last_transno;    /* last completed transaction ID */
+        __u64 msd_mount_count;     /* MDS incarnation number */
+        __u64 msd_padding_until_last_objid_is_enabled;
+        __u32 msd_feature_compat;  /* compatible feature flags */
+        __u32 msd_feature_rocompat;/* read-only compatible feature flags */
+        __u32 msd_feature_incompat;/* incompatible feature flags */
+        __u32 msd_server_size;     /* size of server data area */
+        __u32 msd_client_start;    /* start of per-client data area */
+        __u16 msd_client_size;     /* size of per-client data area */
+        __u16 msd_subdir_count;    /* number of subdirectories for objects */
+        __u64 msd_catalog_oid;     /* recovery catalog object id */
+        __u32 msd_catalog_ogen;    /* recovery catalog inode generation */
+        __u8  msd_peeruuid[37];    /* UUID of LOV/OSC associated with MDS */
+        __u8  peer_padding[3];     /* unused */
+        __u8  msd_padding[MDS_LR_SERVER_SIZE - 140];
 };
 
 /* Data stored per client in the last_rcvd file.  In le32 order. */
@@ -185,7 +230,7 @@ struct mds_client_data {
         __u64 mcd_last_xid;     /* xid for the last transaction */
         __u32 mcd_last_result;  /* result from last RPC */
         __u32 mcd_last_data;    /* per-op data (disposition for open &c.) */
-        __u8 padding[MDS_LR_SIZE - 74];
+        __u8 mcd_padding[MDS_LR_CLIENT_SIZE - 72];
 };
 
 /* file data for open files on MDS */
@@ -202,10 +247,6 @@ struct mds_file_data {
 int mds_reint_rec(struct mds_update_record *r, int offset,
                   struct ptlrpc_request *req, struct lustre_handle *);
 
-/* mds/mds_open.c */
-int mds_open(struct mds_update_record *rec, int offset,
-             struct ptlrpc_request *req, struct lustre_handle *);
-
 /* mds/handler.c */
 #ifdef __KERNEL__
 struct dentry *mds_name2locked_dentry(struct obd_device *, struct dentry *dir,
@@ -223,13 +264,22 @@ int mds_pack_md(struct obd_device *mds, struct lustre_msg *msg,
                 int offset, struct mds_body *body, struct inode *inode);
 void mds_steal_ack_locks(struct obd_export *exp,
                          struct ptlrpc_request *req);
+int mds_update_server_data(struct obd_device *);
 
 /* mds/mds_fs.c */
 int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt);
 int mds_fs_cleanup(struct obd_device *obddev, int failover);
 #endif
 
+/* mds/mds_lov.c */
+extern int mds_get_lovtgts(struct mds_obd *obd, int tgt_count,
+                           struct obd_uuid *uuidarray);
+extern int mds_get_lovdesc(struct mds_obd  *obd, struct lov_desc *desc);
+
 /* mdc/mdc_request.c */
+int mdc_req2lustre_md(struct ptlrpc_request *req, int offset,
+                      struct lustre_handle *obd_import,
+                      struct lustre_md *md);
 int mdc_enqueue(struct lustre_handle *conn, int lock_type,
                 struct lookup_intent *it, int lock_mode,
                 struct mdc_op_data *enq_data,
@@ -248,7 +298,7 @@ int mdc_getattr_name(struct lustre_handle *conn, struct ll_fid *fid,
                      unsigned int ea_size, struct ptlrpc_request **request);
 int mdc_setattr(struct lustre_handle *conn,
                 struct mdc_op_data *data,
-                struct iattr *iattr, void *ea, int ealen,
+                struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
                 struct ptlrpc_request **request);
 int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
              struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh,
index ac87d7f..bc70b9a 100644 (file)
 /* OST_MAXREQSIZE ~= 1640 bytes =
  * lustre_msg + obdo + 16 * obd_ioobj + 64 * niobuf_remote
  *
- * single object with 16 pages is 512 bytes
+ * - single object with 16 pages is 512 bytes
+ * - OST_MAXREQSIZE must be at least 1 page of cookies plus some spillover
  */
-#define OST_MAXREQSIZE  (2 * 1024)
+#define OST_MAXREQSIZE  (5 * 1024)
 
 #define PTLBD_NUM_THREADS        4
 #define PTLBD_NEVENTS    1024
@@ -188,15 +189,19 @@ union ptlrpc_async_args {
          * big enough.  For _tons_ of context, OBD_ALLOC a struct and store
          * a pointer to it here.  The pointer_arg ensures this struct is at
          * least big enough for that. */
-        void      *pointer_arg[4];
+        void      *pointer_arg[5];
         __u64      space[4];
 };
 
+struct ptlrpc_request_set;
+typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int);
+
 struct ptlrpc_request_set {
         int               set_remaining; /* # uncompleted requests */
         wait_queue_head_t set_waitq;
+        wait_queue_head_t *set_wakeup_ptr;
         struct list_head  set_requests;
-        void             *set_interpret; /* completion callback */
+        set_interpreter_func    set_interpret; /* completion callback */
         union ptlrpc_async_args set_args; /* completion context */
 };
 
index ba848a9..f30cbb2 100644 (file)
 #ifndef __OBD_H
 #define __OBD_H
 
+#define IOC_OSC_TYPE         'h'
+#define IOC_OSC_MIN_NR       20
+#define IOC_OSC_REGISTER_LOV _IOWR(IOC_OSC_TYPE, 20, struct obd_device *)
+#define IOC_OSC_SET_ACTIVE   _IOWR(IOC_OSC_TYPE, 21, struct obd_device *)
+#define IOC_OSC_MAX_NR       50
+
+#define IOC_MDC_TYPE         'i'
+#define IOC_MDC_MIN_NR       20
+#define IOC_MDC_LOOKUP       _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
+#define IOC_MDC_GETSTRIPE    _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *)
+#define IOC_MDC_MAX_NR       50
+
+#ifdef __KERNEL__
+# include <linux/fs.h>
+# include <linux/list.h>
+# include <linux/sched.h> /* for struct task_struct, for current.h */
+# include <asm/current.h> /* for smp_lock.h */
+# include <linux/smp_lock.h>
+# include <linux/proc_fs.h>
+# include <linux/mount.h>
+#endif
+
+#include <linux/lustre_lib.h>
+#include <linux/lustre_idl.h>
+#include <linux/lustre_export.h>
 #include <linux/lustre_otree.h>
 
 struct lov_oinfo { /* per-child structure */
@@ -34,31 +59,6 @@ struct lov_stripe_md {
         struct lov_oinfo lsm_oinfo[0];
 };
 
-#define IOC_OSC_TYPE         'h'
-#define IOC_OSC_MIN_NR       20
-#define IOC_OSC_REGISTER_LOV _IOWR(IOC_OSC_TYPE, 20, struct obd_device *)
-#define IOC_OSC_SET_ACTIVE   _IOWR(IOC_OSC_TYPE, 21, struct obd_device *)
-#define IOC_OSC_MAX_NR       50
-
-#define IOC_MDC_TYPE         'i'
-#define IOC_MDC_MIN_NR       20
-#define IOC_MDC_LOOKUP       _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
-#define IOC_MDC_MAX_NR       50
-
-#ifdef __KERNEL__
-# include <linux/fs.h>
-# include <linux/list.h>
-# include <linux/sched.h> /* for struct task_struct, for current.h */
-# include <asm/current.h> /* for smp_lock.h */
-# include <linux/smp_lock.h>
-# include <linux/proc_fs.h>
-
-# include <linux/lustre_lib.h>
-# include <linux/lustre_idl.h>
-# include <linux/lustre_mds.h>
-# include <linux/lustre_export.h>
-#endif
-
 struct obd_type {
         struct list_head typ_chain;
         struct obd_ops *typ_ops;
@@ -80,7 +80,7 @@ struct ost_server_data;
 
 struct filter_obd {
         const char          *fo_fstype;
-        char *fo_nspath;
+        char                *fo_nspath;
         struct super_block  *fo_sb;
         struct vfsmount     *fo_vfsmnt;
         struct obd_run_ctxt  fo_ctxt;
@@ -103,28 +103,57 @@ struct filter_obd {
         spinlock_t           fo_grant_lock;       /* protects tot_granted */
         obd_size             fo_tot_granted;
         obd_size             fo_tot_cached;
+
+        struct llog_handle  *fo_catalog;
+        struct obd_import   *fo_mdc_imp;
+        struct obd_uuid      fo_mdc_uuid;
+        struct lustre_handle fo_mdc_conn;
+        struct ptlrpc_client fo_mdc_client;
+        struct llog_commit_data *fo_llcd;
+        struct semaphore     fo_sem; /* protects fo_llcd */
 };
 
 struct mds_server_data;
 
 struct client_obd {
-        struct obd_import   *cl_import;
-        struct semaphore     cl_sem;
-        int                  cl_conn_count;
+        struct obd_import       *cl_import;
+        struct semaphore         cl_sem;
+        int                      cl_conn_count;
         /* max_mds_easize is purely a performance thing so we don't have to
          * call obd_size_wiremd() all the time. */
-        int                  cl_max_mds_easize;
-        struct obd_device   *cl_containing_lov;
-        kdev_t               cl_sandev;
-        struct semaphore     cl_dirty_sem;
-        obd_size             cl_dirty;  /* both in bytes */
-        obd_size             cl_dirty_granted;
-        /* this is just to keep existing infinitely caching behaviour between 
-         * clients and OSTs that don't have the grant code in yet.. it can 
+        int                      cl_max_mds_easize;
+        int                      cl_max_mds_cookiesize;
+        /* XXX can we replace cl_containing_lov with mgmt-events? */
+        struct obd_device       *cl_containing_lov;
+        kdev_t                   cl_sandev;
+
+        struct llog_commit_data *cl_llcd;
+        void                    *cl_llcd_offset;
+
+        struct semaphore         cl_dirty_sem;
+        obd_size                 cl_dirty;  /* both in bytes */
+        obd_size                 cl_dirty_granted;
+
+        struct obd_device       *cl_mgmtcli_obd;
+
+        /* this is just to keep existing infinitely caching behaviour between
+         * clients and OSTs that don't have the grant code in yet.. it can
          * be yanked once everything speaks grants */
-        char                 cl_ost_can_grant;
+        char                     cl_ost_can_grant;
 };
 
+/* Like a client, with some hangers-on.  Keep mc_client_obd first so that we
+ * can reuse the various client setup/connect functions. */
+struct mgmtcli_obd {
+        struct client_obd        mc_client_obd; /* nested */
+        struct ptlrpc_thread    *mc_ping_thread;
+        struct lustre_handle     mc_ping_handle; /* XXX single-target */
+        struct list_head         mc_registered;
+        void                    *mc_hammer;
+};
+
+#define mc_import mc_client_obd.cl_import
+
 struct mds_obd {
         struct ptlrpc_service           *mds_service;
         struct ptlrpc_service           *mds_setattr_service;
@@ -139,12 +168,20 @@ struct mds_obd {
         struct address_space_operations *mds_aops;
 
         int                              mds_max_mdsize;
+        int                              mds_max_cookiesize;
         struct file                     *mds_rcvd_filp;
         spinlock_t                       mds_transno_lock;
         __u64                            mds_last_transno;
         __u64                            mds_mount_count;
         struct ll_fid                    mds_rootfid;
         struct mds_server_data          *mds_server_data;
+        struct dentry                   *mds_pending_dir;
+        struct dentry                   *mds_logs_dir;
+
+        struct llog_handle              *mds_catalog;
+        struct obd_device               *mds_osc_obd;
+        struct obd_uuid                  mds_osc_uuid;
+        struct lustre_handle             mds_osc_conn;
 
         int                              mds_has_lov_desc;
         struct lov_desc                  mds_lov_desc;
@@ -159,7 +196,6 @@ struct ldlm_obd {
 };
 
 struct echo_obd {
-        char *eo_fstype;
         struct obdo oa;
         spinlock_t eo_lock;
         __u64 eo_lastino;
@@ -221,6 +257,7 @@ struct cache_obd {
 struct lov_tgt_desc {
         struct obd_uuid uuid;
         struct lustre_handle conn;
+        struct llog_handle *ltd_cathandle;
         int active; /* is this target available for requests, etc */
 };
 
@@ -230,6 +267,7 @@ struct lov_obd {
         struct lov_desc desc;
         int bufsize;
         int refcount;
+        int lo_catalog_loaded:1;
         struct lov_tgt_desc *tgts;
 };
 
@@ -247,14 +285,46 @@ struct niobuf_local {
 #define N_LOCAL_TEMP_PAGE 0x10000000
 
 struct obd_trans_info {
-        __u64     oti_transno;
+        __u64                   oti_transno;
         /* Only used on the server side for tracking acks. */
         struct oti_req_ack_lock {
                 struct lustre_handle lock;
                 __u32                mode;
         } oti_ack_locks[4];
+        void                    *oti_handle;
+        struct llog_cookie       oti_onecookie;
+        struct llog_cookie      *oti_logcookies;
+        int                      oti_numcookies;
 };
 
+static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies)
+{
+        if (!oti)
+                return;
+
+        if (num_cookies == 1)
+                oti->oti_logcookies = &oti->oti_onecookie;
+        else
+                OBD_ALLOC(oti->oti_logcookies,
+                          num_cookies * sizeof(oti->oti_onecookie));
+
+        oti->oti_numcookies = num_cookies;
+}
+
+static inline void oti_free_cookies(struct obd_trans_info *oti)
+{
+        if (!oti || !oti->oti_logcookies)
+                return;
+
+        if (oti->oti_logcookies == &oti->oti_onecookie)
+                LASSERT(oti->oti_numcookies == 1);
+        else
+                OBD_FREE(oti->oti_logcookies,
+                         oti->oti_numcookies * sizeof(oti->oti_onecookie));
+        oti->oti_logcookies = NULL;
+        oti->oti_numcookies = 0;
+}
+
 /* corresponds to one of the obd's */
 struct obd_device {
         struct obd_type *obd_type;
@@ -276,9 +346,11 @@ struct obd_device {
         struct ldlm_namespace *obd_namespace;
         struct ptlrpc_client   obd_ldlm_client; /* XXX OST/MDS only */
         /* a spinlock is OK for what we do now, may need a semaphore later */
-        spinlock_t obd_dev_lock;
+        spinlock_t             obd_dev_lock;
         __u64                  obd_last_committed;
         struct fsfilt_operations *obd_fsops;
+        struct obd_statfs      obd_osfs;
+        unsigned long          obd_osfs_age;
 
         /* XXX encapsulate all this recovery data into one struct */
         svc_handler_t                    obd_recovery_handler;
@@ -297,19 +369,25 @@ struct obd_device {
                 struct mds_obd mds;
                 struct client_obd cli;
                 struct ost_obd ost;
-                struct echo_client_obd echo_client;;
+                struct echo_client_obd echo_client;
                 struct ldlm_obd ldlm;
                 struct echo_obd echo;
                 struct recovd_obd recovd;
                 struct lov_obd lov;
                 struct cache_obd cobd;
                 struct ptlbd_obd ptlbd;
+                struct mgmtcli_obd mgmtcli;
         } u;
        /* Fields used by LProcFS */
         unsigned int           obd_cntr_base;
         struct lprocfs_stats  *obd_stats;
 };
 
+#define OBD_OPT_FORCE           0x0001
+#define OBD_OPT_FAILOVER        0x0002
+
+#define OBD_LLOG_FL_SENDNOW     0x0001
+
 struct obd_ops {
         struct module *o_owner;
         int (*o_iocontrol)(unsigned int cmd, struct lustre_handle *, int len,
@@ -321,16 +399,17 @@ struct obd_ops {
         int (*o_attach)(struct obd_device *dev, obd_count len, void *data);
         int (*o_detach)(struct obd_device *dev);
         int (*o_setup) (struct obd_device *dev, obd_count len, void *data);
-        int (*o_cleanup)(struct obd_device *dev, int force, int failover);
+        int (*o_cleanup)(struct obd_device *dev, int flags);
         int (*o_connect)(struct lustre_handle *conn, struct obd_device *src,
                          struct obd_uuid *cluuid);
-        int (*o_disconnect)(struct lustre_handle *conn, int failover);
+        int (*o_disconnect)(struct lustre_handle *conn, int flags);
 
-        int (*o_statfs)(struct obd_export *exp, struct obd_statfs *osfs);
-        int (*o_syncfs)(struct obd_export *);
+        int (*o_statfs)(struct obd_device *obd, struct obd_statfs *osfs,
+                        unsigned long max_age);
+        int (*o_syncfs)(struct obd_export *exp);
         int (*o_packmd)(struct lustre_handle *, struct lov_mds_md **disk_tgt,
                         struct lov_stripe_md *mem_src);
-        int (*o_unpackmd)(struct lustre_handle *,
+        int (*o_unpackmd)(struct lustre_handle *conn,
                           struct lov_stripe_md **mem_tgt,
                           struct lov_mds_md *disk_src, int disk_len);
         int (*o_preallocate)(struct lustre_handle *, obd_count *req,
@@ -344,42 +423,42 @@ struct obd_ops {
         int (*o_getattr)(struct lustre_handle *conn, struct obdo *oa,
                          struct lov_stripe_md *ea);
         int (*o_getattr_async)(struct lustre_handle *conn, struct obdo *oa,
-                               struct lov_stripe_md *ea, 
+                               struct lov_stripe_md *ea,
                                struct ptlrpc_request_set *set);
         int (*o_open)(struct lustre_handle *conn, struct obdo *oa,
                       struct lov_stripe_md *ea, struct obd_trans_info *oti,
                       struct obd_client_handle *och);
         int (*o_close)(struct lustre_handle *conn, struct obdo *oa,
                        struct lov_stripe_md *ea, struct obd_trans_info *oti);
-        int (*o_brw)(int rw, struct lustre_handle *conn,
+        int (*o_brw)(int rw, struct lustre_handle *conn, struct obdo *oa,
                      struct lov_stripe_md *ea, obd_count oa_bufs,
                      struct brw_page *pgarr, struct obd_trans_info *oti);
-        int (*o_brw_async)(int rw, struct lustre_handle *conn,
+        int (*o_brw_async)(int rw, struct lustre_handle *conn, struct obdo *oa,
                            struct lov_stripe_md *ea, obd_count oa_bufs,
                            struct brw_page *pgarr, struct ptlrpc_request_set *,
                            struct obd_trans_info *oti);
-        int (*o_punch)(struct lustre_handle *conn, struct obdo *tgt,
+        int (*o_punch)(struct lustre_handle *conn, struct obdo *oa,
                        struct lov_stripe_md *ea, obd_size count,
                        obd_off offset, struct obd_trans_info *oti);
-        int (*o_sync)(struct lustre_handle *conn, struct obdo *tgt,
+        int (*o_sync)(struct lustre_handle *conn, struct obdo *oa,
                       obd_size count, obd_off offset);
-        int (*o_migrate)(struct lustre_handle *conn, struct obdo *dst,
-                         struct obdo *src, obd_size count, obd_off offset);
-        int (*o_copy)(struct lustre_handle *dstconn, struct obdo *dst,
-                      struct lustre_handle *srconn, struct obdo *src,
+        int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst,
+                         struct lov_stripe_md *src, obd_size count,
+                         obd_off offset);
+        int (*o_copy)(struct lustre_handle *dstconn, struct lov_stripe_md *dst,
+                      struct lustre_handle *srconn, struct lov_stripe_md *src,
                       obd_size count, obd_off offset, struct obd_trans_info *);
         int (*o_iterate)(struct lustre_handle *conn,
                          int (*)(obd_id, obd_gr, void *),
                          obd_id *startid, obd_gr group, void *data);
-        int (*o_preprw)(int cmd, struct obd_export *, struct obdo *obdo,
+        int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa,
                         int objcount, struct obd_ioobj *obj,
                         int niocount, struct niobuf_remote *remote,
-                        struct niobuf_local *local, void **desc_private, 
-                        struct obd_trans_info *oti);
-        int (*o_commitrw)(int cmd, struct obd_export *,
+                        struct niobuf_local *local, struct obd_trans_info *oti);
+        int (*o_commitrw)(int cmd, struct obd_export *exp, struct obdo *oa,
                           int objcount, struct obd_ioobj *obj,
                           int niocount, struct niobuf_local *local,
-                          void *desc_private, struct obd_trans_info *oti);
+                          struct obd_trans_info *oti);
         int (*o_enqueue)(struct lustre_handle *conn, struct lov_stripe_md *md,
                          struct lustre_handle *parent_lock,
                          __u32 type, void *cookie, int cookielen, __u32 mode,
@@ -391,10 +470,17 @@ struct obd_ops {
         int (*o_cancel)(struct lustre_handle *, struct lov_stripe_md *md,
                         __u32 mode, struct lustre_handle *);
         int (*o_cancel_unused)(struct lustre_handle *, struct lov_stripe_md *,
-                               int local_only, void *opaque);
-        int (*o_san_preprw)(int cmd, struct lustre_handle *conn,
-                            int objcount, struct obd_ioobj *obj,
-                            int niocount, struct niobuf_remote *remote);
+                               int flags, void *opaque);
+        int (*o_log_add)(struct lustre_handle *conn,
+                         struct llog_handle *cathandle,
+                         struct llog_trans_hdr *rec, struct lov_stripe_md *lsm,
+                         struct llog_cookie *logcookies, int numcookies);
+        int (*o_log_cancel)(struct lustre_handle *, struct lov_stripe_md *,
+                            int count, struct llog_cookie *, int flags);
+        int (*o_san_preprw)(int cmd, struct obd_export *exp,
+                            struct obdo *oa, int objcount,
+                            struct obd_ioobj *obj, int niocount,
+                            struct niobuf_remote *remote);
         int (*o_mark_page_dirty)(struct lustre_handle *conn,
                                  struct lov_stripe_md *ea,
                                  unsigned long offset);
@@ -406,14 +492,22 @@ struct obd_ops {
         int (*o_last_dirty_offset)(struct lustre_handle *conn,
                                    struct lov_stripe_md *ea,
                                    unsigned long *offset);
-        void (*o_destroy_export)(struct obd_export *export);
+        void (*o_destroy_export)(struct obd_export *exp);
+
+        /* metadata-only methods */
+        int (*o_pin)(struct lustre_handle *, obd_id ino, __u32 gen, int type,
+                     struct obd_client_handle *, int flag);
+        int (*o_unpin)(struct lustre_handle *, struct obd_client_handle *, int);
+
+        /* If adding ops, also update obdclass/lprocfs_status.c,
+         * and include/linux/obd_class.h */
 };
 
 static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno,
                                          int error)
 {
         if (error) {
-                CDEBUG(D_ERROR, "%s: transno "LPD64" commit error: %d\n",
+                CERROR("%s: transno "LPD64" commit error: %d\n",
                        obd->obd_name, transno, error);
                 return;
         }
@@ -425,8 +519,4 @@ static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno,
         }
 }
 
-/* When adding a function pointer to struct obd_ops, please update 
- * function lprocfs_alloc_obd_counters() in obdclass/lprocfs_status.c
- * accordingly. */
-
 #endif /* __OBD_H */
index 0c33ceb..2e57d2f 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/types.h>
 #include <linux/fs.h>
 #include <linux/time.h>
+#include <linux/timer.h>
 #endif
 
 #include <linux/obd_support.h>
@@ -81,6 +82,17 @@ void class_disconnect_exports(struct obd_device *obddev, int failover);
 int class_multi_setup(struct obd_device *obddev, uint32_t len, void *data);
 int class_multi_cleanup(struct obd_device *obddev);
 
+/* obdo.c */
+#ifdef __KERNEL__
+void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned ia_valid);
+void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid);
+void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid);
+void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid);
+void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid);
+#endif
+void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid);
+int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_flag compare);
+
 static inline int obd_check_conn(struct lustre_handle *conn)
 {
         struct obd_device *obd;
@@ -277,7 +289,7 @@ static inline int obd_setup(struct obd_device *obd, int datalen, void *data)
         RETURN(rc);
 }
 
-static inline int obd_cleanup(struct obd_device *obd, int force, int failover)
+static inline int obd_cleanup(struct obd_device *obd, int flags)
 {
         int rc;
         ENTRY;
@@ -286,7 +298,7 @@ static inline int obd_cleanup(struct obd_device *obd, int force, int failover)
         OBD_CHECK_OP(obd, cleanup);
         OBD_COUNTER_INCREMENT(obd, cleanup);
 
-        rc = OBP(obd, cleanup)(obd, force, failover);
+        rc = OBP(obd, cleanup)(obd, flags);
         RETURN(rc);
 }
 
@@ -518,7 +530,7 @@ static inline int obd_connect(struct lustre_handle *conn,
         RETURN(rc);
 }
 
-static inline int obd_disconnect(struct lustre_handle *conn, int failover)
+static inline int obd_disconnect(struct lustre_handle *conn, int flags)
 {
         struct obd_export *exp;
         int rc;
@@ -528,7 +540,7 @@ static inline int obd_disconnect(struct lustre_handle *conn, int failover)
         OBD_CHECK_OP(exp->exp_obd, disconnect);
         OBD_COUNTER_INCREMENT(exp->exp_obd, disconnect);
 
-        rc = OBP(exp->exp_obd, disconnect)(conn, failover);
+        rc = OBP(exp->exp_obd, disconnect)(conn, flags);
         class_export_put(exp);
         RETURN(rc);
 }
@@ -541,15 +553,35 @@ static inline void obd_destroy_export(struct obd_export *exp)
         EXIT;
 }
 
-static inline int obd_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+#ifndef time_before
+#define time_before(t1, t2) ((long)t2 - (long)t1 > 0)
+#endif
+
+static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                             unsigned long max_age)
 {
-        int rc;
+        int rc = 0;
         ENTRY;
 
-        OBD_CHECK_OP(exp->exp_obd, statfs);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, statfs);
-
-        rc = OBP(exp->exp_obd, statfs)(exp, osfs);
+        if (obd == NULL)
+                RETURN(-EINVAL);
+
+        OBD_CHECK_OP(obd, statfs);
+        OBD_COUNTER_INCREMENT(obd, statfs);
+
+        CDEBUG(D_SUPER, "osfs %lu, max_age %lu\n", obd->obd_osfs_age, max_age);
+        if (obd->obd_osfs_age == 0 || time_before(obd->obd_osfs_age, max_age)) {
+                rc = OBP(obd, statfs)(obd, osfs, max_age);
+                spin_lock(&obd->obd_dev_lock);
+                memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs));
+                obd->obd_osfs_age = jiffies;
+                spin_unlock(&obd->obd_dev_lock);
+        } else {
+                CDEBUG(D_SUPER, "using cached obd_statfs data\n");
+                spin_lock(&obd->obd_dev_lock);
+                memcpy(osfs, &obd->obd_osfs, sizeof(*osfs));
+                spin_unlock(&obd->obd_dev_lock);
+        }
         RETURN(rc);
 }
 
@@ -582,7 +614,7 @@ static inline int obd_punch(struct lustre_handle *conn, struct obdo *oa,
         RETURN(rc);
 }
 
-static inline int obd_brw(int cmd, struct lustre_handle *conn,
+static inline int obd_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md *ea, obd_count oa_bufs,
                           struct brw_page *pg, struct obd_trans_info *oti)
 {
@@ -600,14 +632,14 @@ static inline int obd_brw(int cmd, struct lustre_handle *conn,
                 LBUG();
         }
 
-        rc = OBP(exp->exp_obd, brw)(cmd, conn, ea, oa_bufs, pg, oti);
+        rc = OBP(exp->exp_obd, brw)(cmd, conn, oa, ea, oa_bufs, pg, oti);
         class_export_put(exp);
         RETURN(rc);
 }
 
 static inline int obd_brw_async(int cmd, struct lustre_handle *conn,
-                                struct lov_stripe_md *ea, obd_count oa_bufs,
-                                struct brw_page *pg,
+                                struct obdo *oa, struct lov_stripe_md *ea,
+                                obd_count oa_bufs, struct brw_page *pg,
                                 struct ptlrpc_request_set *set,
                                 struct obd_trans_info *oti)
 {
@@ -624,15 +656,16 @@ static inline int obd_brw_async(int cmd, struct lustre_handle *conn,
                 LBUG();
         }
 
-        rc = OBP(exp->exp_obd, brw_async)(cmd, conn, ea, oa_bufs, pg, set, oti);
+        rc = OBP(exp->exp_obd, brw_async)(cmd, conn, oa, ea, oa_bufs, pg, set,
+                                          oti);
         class_export_put(exp);
         RETURN(rc);
 }
 
-static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
+static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
                              int objcount, struct obd_ioobj *obj,
                              int niocount, struct niobuf_remote *remote,
-                             struct niobuf_local *local, void **desc_private,
+                             struct niobuf_local *local,
                              struct obd_trans_info *oti)
 {
         int rc;
@@ -641,15 +674,15 @@ static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
         OBD_CHECK_OP(exp->exp_obd, preprw);
         OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
 
-        rc = OBP(exp->exp_obd, preprw)(cmd, exp, obdo, objcount, obj, niocount,
-                                       remote, local, desc_private, oti);
+        rc = OBP(exp->exp_obd, preprw)(cmd, exp, oa, objcount, obj, niocount,
+                                       remote, local, oti);
         RETURN(rc);
 }
 
-static inline int obd_commitrw(int cmd, struct obd_export *exp,
+static inline int obd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa,
                                int objcount, struct obd_ioobj *obj,
                                int niocount, struct niobuf_local *local,
-                               void *desc_private, struct obd_trans_info *oti)
+                               struct obd_trans_info *oti)
 {
         int rc;
         ENTRY;
@@ -657,8 +690,8 @@ static inline int obd_commitrw(int cmd, struct obd_export *exp,
         OBD_CHECK_OP(exp->exp_obd, commitrw);
         OBD_COUNTER_INCREMENT(exp->exp_obd, commitrw);
 
-        rc = OBP(exp->exp_obd, commitrw)(cmd, exp, objcount, obj, niocount,
-                                         local, desc_private, oti);
+        rc = OBP(exp->exp_obd, commitrw)(cmd, exp, oa, objcount, obj, niocount,
+                                         local, oti);
         RETURN(rc);
 }
 
@@ -754,25 +787,92 @@ static inline int obd_cancel_unused(struct lustre_handle *conn,
         RETURN(rc);
 }
 
-static inline int obd_san_preprw(int cmd, struct lustre_handle *conn,
+static inline int obd_log_add(struct lustre_handle *conn,
+                              struct llog_handle *cathandle,
+                              struct llog_trans_hdr *rec,
+                              struct lov_stripe_md *lsm,
+                              struct llog_cookie *logcookies,
+                              int numcookies)
+{
+        struct obd_export *exp;
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_SETUP(conn, exp);
+        OBD_CHECK_OP(exp->exp_obd, log_add);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, log_add);
+
+        rc = OBP(exp->exp_obd, log_add)(conn, cathandle, rec, lsm, logcookies,
+                                        numcookies);
+        class_export_put(exp);
+        RETURN(rc);
+}
+
+static inline int obd_log_cancel(struct lustre_handle *conn,
+                                 struct lov_stripe_md *lsm, int count,
+                                 struct llog_cookie *cookies, int flags)
+{
+        struct obd_export *exp;
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_SETUP(conn, exp);
+        OBD_CHECK_OP(exp->exp_obd, log_cancel);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, log_cancel);
+
+        rc = OBP(exp->exp_obd, log_cancel)(conn, lsm, count, cookies, flags);
+        class_export_put(exp);
+        RETURN(rc);
+}
+
+static inline int obd_san_preprw(int cmd, struct obd_export *exp,
+                                 struct obdo *oa,
                                  int objcount, struct obd_ioobj *obj,
                                  int niocount, struct niobuf_remote *remote)
 {
-        struct obd_export *exp;
         int rc;
 
-        OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, preprw);
         OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
 
-        rc = OBP(exp->exp_obd, san_preprw)(cmd, conn, objcount, obj,
+        rc = OBP(exp->exp_obd, san_preprw)(cmd, exp, oa, objcount, obj,
                                            niocount, remote);
         class_export_put(exp);
-        RETURN(rc);
+        return(rc);
+}
+
+static inline int obd_pin(struct lustre_handle *conn, obd_id ino, __u32 gen,
+                          int type, struct obd_client_handle *handle, int flag)
+{
+        struct obd_export *exp;
+        int rc;
+
+        OBD_CHECK_ACTIVE(conn, exp);
+        OBD_CHECK_OP(exp->exp_obd, pin);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, pin);
+
+        rc = OBP(exp->exp_obd, pin)(conn, ino, gen, type, handle, flag);
+        class_export_put(exp);
+        return(rc);
+}
+
+static inline int obd_unpin(struct lustre_handle *conn,
+                            struct obd_client_handle *handle, int flag)
+{
+        struct obd_export *exp;
+        int rc;
+
+        OBD_CHECK_ACTIVE(conn, exp);
+        OBD_CHECK_OP(exp->exp_obd, unpin);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, unpin);
+
+        rc = OBP(exp->exp_obd, unpin)(conn, handle, flag);
+        class_export_put(exp);
+        return(rc);
 }
 
 static inline int obd_mark_page_dirty(struct lustre_handle *conn,
-                                      struct lov_stripe_md *lsm,  
+                                      struct lov_stripe_md *lsm,
                                       unsigned long offset)
 {
         struct obd_export *exp;
@@ -780,14 +880,15 @@ static inline int obd_mark_page_dirty(struct lustre_handle *conn,
 
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, mark_page_dirty);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, mark_page_dirty);
 
         rc = OBP(exp->exp_obd, mark_page_dirty)(conn, lsm, offset);
         class_export_put(exp);
-        RETURN(rc);
+        return(rc);
 }
 
 static inline int obd_clear_dirty_pages(struct lustre_handle *conn,
-                                        struct lov_stripe_md *lsm,  
+                                        struct lov_stripe_md *lsm,
                                         unsigned long start,
                                         unsigned long end,
                                         unsigned long *cleared)
@@ -797,11 +898,12 @@ static inline int obd_clear_dirty_pages(struct lustre_handle *conn,
 
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, clear_dirty_pages);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, clear_dirty_pages);
 
         rc = OBP(exp->exp_obd, clear_dirty_pages)(conn, lsm, start, end,
                                                   cleared);
         class_export_put(exp);
-        RETURN(rc);
+        return(rc);
 }
 
 static inline int obd_last_dirty_offset(struct lustre_handle *conn,
@@ -813,10 +915,11 @@ static inline int obd_last_dirty_offset(struct lustre_handle *conn,
 
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, last_dirty_offset);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, last_dirty_offset);
 
         rc = OBP(exp->exp_obd, last_dirty_offset)(conn, lsm, offset);
         class_export_put(exp);
-        RETURN(rc);
+        return(rc);
 }
 
 /* OBD Metadata Support */
@@ -824,11 +927,6 @@ static inline int obd_last_dirty_offset(struct lustre_handle *conn,
 extern int obd_init_caches(void);
 extern void obd_cleanup_caches(void);
 
-static inline struct lustre_handle *obdo_handle(struct obdo *oa)
-{
-        return (struct lustre_handle *)&oa->o_inline;
-}
-
 /* support routines */
 extern kmem_cache_t *obdo_cachep;
 static inline struct obdo *obdo_alloc(void)
@@ -838,6 +936,7 @@ static inline struct obdo *obdo_alloc(void)
         oa = kmem_cache_alloc(obdo_cachep, SLAB_KERNEL);
         if (oa == NULL)
                 LBUG();
+        CDEBUG(D_MALLOC, "kmem_cache_alloced oa at %p\n", oa);
         memset(oa, 0, sizeof (*oa));
 
         return oa;
@@ -847,6 +946,7 @@ static inline void obdo_free(struct obdo *oa)
 {
         if (!oa)
                 return;
+        CDEBUG(D_MALLOC, "kmem_cache_freed oa at %p\n", oa);
         kmem_cache_free(obdo_cachep, oa);
 }
 
@@ -855,268 +955,6 @@ static inline void obdo_free(struct obdo *oa)
 #define kdev_t_to_nr(dev) dev
 #endif
 
-#ifdef __KERNEL__
-static inline void obdo_from_iattr(struct obdo *oa, struct iattr *attr)
-{
-        unsigned int ia_valid = attr->ia_valid;
-
-        if (ia_valid & ATTR_ATIME) {
-                oa->o_atime = LTIME_S(attr->ia_atime);
-                oa->o_valid |= OBD_MD_FLATIME;
-        }
-        if (ia_valid & ATTR_MTIME) {
-                oa->o_mtime = LTIME_S(attr->ia_mtime);
-                oa->o_valid |= OBD_MD_FLMTIME;
-        }
-        if (ia_valid & ATTR_CTIME) {
-                oa->o_ctime = LTIME_S(attr->ia_ctime);
-                oa->o_valid |= OBD_MD_FLCTIME;
-        }
-        if (ia_valid & ATTR_SIZE) {
-                oa->o_size = attr->ia_size;
-                oa->o_valid |= OBD_MD_FLSIZE;
-        }
-        if (ia_valid & ATTR_MODE) {
-                oa->o_mode = attr->ia_mode;
-                oa->o_valid |= OBD_MD_FLTYPE | OBD_MD_FLMODE;
-                if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
-                        oa->o_mode &= ~S_ISGID;
-        }
-        if (ia_valid & ATTR_UID) {
-                oa->o_uid = attr->ia_uid;
-                oa->o_valid |= OBD_MD_FLUID;
-        }
-        if (ia_valid & ATTR_GID) {
-                oa->o_gid = attr->ia_gid;
-                oa->o_valid |= OBD_MD_FLGID;
-        }
-}
-
-
-static inline void iattr_from_obdo(struct iattr *attr, struct obdo *oa,
-                                   obd_flag valid)
-{
-        memset(attr, 0, sizeof(*attr));
-        if (valid & OBD_MD_FLATIME) {
-                LTIME_S(attr->ia_atime) = oa->o_atime;
-                attr->ia_valid |= ATTR_ATIME;
-        }
-        if (valid & OBD_MD_FLMTIME) {
-                LTIME_S(attr->ia_mtime) = oa->o_mtime;
-                attr->ia_valid |= ATTR_MTIME;
-        }
-        if (valid & OBD_MD_FLCTIME) {
-                LTIME_S(attr->ia_ctime) = oa->o_ctime;
-                attr->ia_valid |= ATTR_CTIME;
-        }
-        if (valid & OBD_MD_FLSIZE) {
-                attr->ia_size = oa->o_size;
-                attr->ia_valid |= ATTR_SIZE;
-        }
-        if (valid & OBD_MD_FLTYPE) {
-                attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT);
-                attr->ia_valid |= ATTR_MODE;
-        }
-        if (valid & OBD_MD_FLMODE) {
-                attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT);
-                attr->ia_valid |= ATTR_MODE;
-                if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
-                        attr->ia_mode &= ~S_ISGID;
-        }
-        if (valid & OBD_MD_FLUID)
-        {
-                attr->ia_uid = oa->o_uid;
-                attr->ia_valid |= ATTR_UID;
-        }
-        if (valid & OBD_MD_FLGID) {
-                attr->ia_gid = oa->o_gid;
-                attr->ia_valid |= ATTR_GID;
-        }
-}
-
-
-/* WARNING: the file systems must take care not to tinker with
-   attributes they don't manage (such as blocks). */
-
-
-static inline void obdo_from_inode(struct obdo *dst, struct inode *src,
-                                   obd_flag valid)
-{
-        if (valid & OBD_MD_FLATIME)
-                dst->o_atime = LTIME_S(src->i_atime);
-        if (valid & OBD_MD_FLMTIME)
-                dst->o_mtime = LTIME_S(src->i_mtime);
-        if (valid & OBD_MD_FLCTIME)
-                dst->o_ctime = LTIME_S(src->i_ctime);
-        if (valid & OBD_MD_FLSIZE)
-                dst->o_size = src->i_size;
-        if (valid & OBD_MD_FLBLOCKS)   /* allocation of space */
-                dst->o_blocks = src->i_blocks;
-        if (valid & OBD_MD_FLBLKSZ)
-                dst->o_blksize = src->i_blksize;
-        if (valid & OBD_MD_FLTYPE)
-                dst->o_mode = (dst->o_mode & ~S_IFMT) | (src->i_mode & S_IFMT);
-        if (valid & OBD_MD_FLMODE)
-                dst->o_mode = (dst->o_mode & S_IFMT) | (src->i_mode & ~S_IFMT);
-        if (valid & OBD_MD_FLUID)
-                dst->o_uid = src->i_uid;
-        if (valid & OBD_MD_FLGID)
-                dst->o_gid = src->i_gid;
-        if (valid & OBD_MD_FLFLAGS)
-                dst->o_flags = src->i_flags;
-        if (valid & OBD_MD_FLNLINK)
-                dst->o_nlink = src->i_nlink;
-        if (valid & OBD_MD_FLGENER)
-                dst->o_generation = src->i_generation;
-        if (valid & OBD_MD_FLRDEV)
-                dst->o_rdev = (__u32)kdev_t_to_nr(src->i_rdev);
-
-        dst->o_valid |= (valid & ~OBD_MD_FLID);
-}
-
-static inline void obdo_refresh_inode(struct inode *dst, struct obdo *src,
-                                      obd_flag valid)
-{
-        valid &= src->o_valid;
-
-        if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(dst->i_atime))
-                LTIME_S(dst->i_atime) = src->o_atime;
-        if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(dst->i_mtime))
-                LTIME_S(dst->i_mtime) = src->o_mtime;
-        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime))
-                LTIME_S(dst->i_ctime) = src->o_ctime;
-        if (valid & OBD_MD_FLSIZE && src->o_size > dst->i_size)
-                dst->i_size = src->o_size;
-        /* allocation of space */
-        if (valid & OBD_MD_FLBLOCKS && src->o_blocks > dst->i_blocks)
-                dst->i_blocks = src->o_blocks;
-}
-
-static inline void obdo_to_inode(struct inode *dst, struct obdo *src,
-                                 obd_flag valid)
-{
-        valid &= src->o_valid;
-
-        if (valid & OBD_MD_FLATIME)
-                LTIME_S(dst->i_atime) = src->o_atime;
-        if (valid & OBD_MD_FLMTIME)
-                LTIME_S(dst->i_mtime) = src->o_mtime;
-        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime))
-                LTIME_S(dst->i_ctime) = src->o_ctime;
-        if (valid & OBD_MD_FLSIZE)
-                dst->i_size = src->o_size;
-        if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
-                dst->i_blocks = src->o_blocks;
-        if (valid & OBD_MD_FLBLKSZ)
-                dst->i_blksize = src->o_blksize;
-        if (valid & OBD_MD_FLTYPE)
-                dst->i_mode = (dst->i_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
-        if (valid & OBD_MD_FLMODE)
-                dst->i_mode = (dst->i_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
-        if (valid & OBD_MD_FLUID)
-                dst->i_uid = src->o_uid;
-        if (valid & OBD_MD_FLGID)
-                dst->i_gid = src->o_gid;
-        if (valid & OBD_MD_FLFLAGS)
-                dst->i_flags = src->o_flags;
-        if (valid & OBD_MD_FLNLINK)
-                dst->i_nlink = src->o_nlink;
-        if (valid & OBD_MD_FLGENER)
-                dst->i_generation = src->o_generation;
-        if (valid & OBD_MD_FLRDEV)
-                dst->i_rdev = to_kdev_t(src->o_rdev);
-}
-#endif
-
-static inline void obdo_cpy_md(struct obdo *dst, struct obdo *src,
-                               obd_flag valid)
-{
-#ifdef __KERNEL__
-        CDEBUG(D_INODE, "src obdo %Ld valid 0x%x, dst obdo %Ld\n",
-               (unsigned long long)src->o_id, src->o_valid,
-               (unsigned long long)dst->o_id);
-#endif
-        if (valid & OBD_MD_FLATIME)
-                dst->o_atime = src->o_atime;
-        if (valid & OBD_MD_FLMTIME)
-                dst->o_mtime = src->o_mtime;
-        if (valid & OBD_MD_FLCTIME)
-                dst->o_ctime = src->o_ctime;
-        if (valid & OBD_MD_FLSIZE)
-                dst->o_size = src->o_size;
-        if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
-                dst->o_blocks = src->o_blocks;
-        if (valid & OBD_MD_FLBLKSZ)
-                dst->o_blksize = src->o_blksize;
-        if (valid & OBD_MD_FLTYPE)
-                dst->o_mode = (dst->o_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
-        if (valid & OBD_MD_FLMODE)
-                dst->o_mode = (dst->o_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
-        if (valid & OBD_MD_FLUID)
-                dst->o_uid = src->o_uid;
-        if (valid & OBD_MD_FLGID)
-                dst->o_gid = src->o_gid;
-        if (valid & OBD_MD_FLFLAGS)
-                dst->o_flags = src->o_flags;
-        /*
-        if (valid & OBD_MD_FLOBDFLG)
-                dst->o_obdflags = src->o_obdflags;
-        */
-        if (valid & OBD_MD_FLNLINK)
-                dst->o_nlink = src->o_nlink;
-        if (valid & OBD_MD_FLGENER)
-                dst->o_generation = src->o_generation;
-        if (valid & OBD_MD_FLRDEV)
-                dst->o_rdev = src->o_rdev;
-        if (valid & OBD_MD_FLINLINE &&
-             src->o_obdflags & OBD_FL_INLINEDATA) {
-                memcpy(dst->o_inline, src->o_inline, sizeof(src->o_inline));
-                dst->o_obdflags |= OBD_FL_INLINEDATA;
-        }
-
-        dst->o_valid |= valid;
-}
-
-
-/* returns FALSE if comparison (by flags) is same, TRUE if changed */
-static inline int obdo_cmp_md(struct obdo *dst, struct obdo *src,
-                              obd_flag compare)
-{
-        int res = 0;
-
-        if ( compare & OBD_MD_FLATIME )
-                res = (res || (dst->o_atime != src->o_atime));
-        if ( compare & OBD_MD_FLMTIME )
-                res = (res || (dst->o_mtime != src->o_mtime));
-        if ( compare & OBD_MD_FLCTIME )
-                res = (res || (dst->o_ctime != src->o_ctime));
-        if ( compare & OBD_MD_FLSIZE )
-                res = (res || (dst->o_size != src->o_size));
-        if ( compare & OBD_MD_FLBLOCKS ) /* allocation of space */
-                res = (res || (dst->o_blocks != src->o_blocks));
-        if ( compare & OBD_MD_FLBLKSZ )
-                res = (res || (dst->o_blksize != src->o_blksize));
-        if ( compare & OBD_MD_FLTYPE )
-                res = (res || (((dst->o_mode ^ src->o_mode) & S_IFMT) != 0));
-        if ( compare & OBD_MD_FLMODE )
-                res = (res || (((dst->o_mode ^ src->o_mode) & ~S_IFMT) != 0));
-        if ( compare & OBD_MD_FLUID )
-                res = (res || (dst->o_uid != src->o_uid));
-        if ( compare & OBD_MD_FLGID )
-                res = (res || (dst->o_gid != src->o_gid));
-        if ( compare & OBD_MD_FLFLAGS )
-                res = (res || (dst->o_flags != src->o_flags));
-        if ( compare & OBD_MD_FLNLINK )
-                res = (res || (dst->o_nlink != src->o_nlink));
-        if ( compare & OBD_MD_FLGENER )
-                res = (res || (dst->o_generation != src->o_generation));
-        /* XXX Don't know if thses should be included here - wasn't previously
-        if ( compare & OBD_MD_FLINLINE )
-                res = (res || memcmp(dst->o_inline, src->o_inline));
-        */
-        return res;
-}
-
 /* I'm as embarrassed about this as you are.
  *
  * <shaver> // XXX do not look into _superhack with remaining eye
@@ -1124,11 +962,6 @@ static inline int obdo_cmp_md(struct obdo *dst, struct obdo *src,
 extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
 extern void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp);
 
-struct obd_statfs;
-struct statfs;
-void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs);
-void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs);
-
 struct obd_class_user_state {
         struct obd_device     *ocus_current_obd;
         struct list_head       ocus_conns;
index b12a062..6d68ae9 100644 (file)
@@ -8,14 +8,17 @@
 #define OBD_LOV_DEVICENAME "lov"
 
 struct lov_brw_async_args {
-        obd_count        aa_oa_bufs;
-        struct brw_page *aa_ioarr;
+        struct lov_stripe_md  *aa_lsm;
+        struct obdo           *aa_obdos;
+        struct obdo           *aa_oa;
+        struct brw_page       *aa_ioarr;
+        obd_count              aa_oa_bufs;
 };
 
 struct lov_getattr_async_args {
         struct lov_stripe_md  *aa_lsm;
         struct obdo           *aa_oa;
-        struct obdo           *aa_stripe_oas;
+        struct obdo           *aa_obdos;
 };
 
 static inline int lov_stripe_md_size(int stripes)
@@ -28,15 +31,6 @@ static inline int lov_mds_md_size(int stripes)
         return sizeof(struct lov_mds_md) + stripes*sizeof(struct lov_object_id);
 }
 
-extern int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmm,
-                       struct lov_stripe_md *lsm);
-extern int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsm,
-                         struct lov_mds_md *lmm, int lmmsize);
-extern int lov_setstripe(struct lustre_handle *conn,
-                         struct lov_stripe_md **lsmp, struct lov_mds_md *lmmu);
-extern int lov_getstripe(struct lustre_handle *conn, 
-                         struct lov_stripe_md *lsm, struct lov_mds_md *lmmu);
-
 #define IOC_LOV_TYPE                   'g'
 #define IOC_LOV_MIN_NR                 50
 #define IOC_LOV_SET_OSC_ACTIVE         _IOWR('g', 50, long)
index 22fe694..ac2e24b 100644 (file)
@@ -35,6 +35,7 @@
 #define LUSTRE_SANOST_NAME "sanost"
 
 struct osc_brw_async_args {
+        struct obdo     *aa_oa;
         int              aa_requested_nob;
         int              aa_nio_count;
         obd_count        aa_page_count;
index 2a76905..28a9a3d 100644 (file)
@@ -74,8 +74,10 @@ extern unsigned long obd_sync_filter;
 #define OBD_FAIL_MDS_STATFS_PACK         0x11d
 #define OBD_FAIL_MDS_STATFS_NET          0x11e
 #define OBD_FAIL_MDS_GETATTR_NAME_NET    0x11f
-#define OBD_FAIL_MDS_ALL_REPLY_NET       0x120
-#define OBD_FAIL_MDS_ALL_REQUEST_NET     0x121
+#define OBD_FAIL_MDS_PIN_NET             0x120
+#define OBD_FAIL_MDS_UNPIN_NET           0x121
+#define OBD_FAIL_MDS_ALL_REPLY_NET       0x122
+#define OBD_FAIL_MDS_ALL_REQUEST_NET     0x123
 
 #define OBD_FAIL_OST                     0x200
 #define OBD_FAIL_OST_CONNECT_NET         0x201
@@ -116,6 +118,9 @@ extern unsigned long obd_sync_filter;
 #define OBD_FAIL_PTLRPC                  0x500
 #define OBD_FAIL_PTLRPC_ACK              0x501
 
+#define OBD_FAIL_OBD_PING_NET            0x600
+#define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
+
 /* preparation for a more advanced failure testbed (not functional yet) */
 #define OBD_FAIL_MASK_SYS    0x0000FF00
 #define OBD_FAIL_MASK_LOC    (0x000000FF | OBD_FAIL_MASK_SYS)
@@ -169,37 +174,27 @@ do {                                                                         \
 
 
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#define ll_bdevname(a) __bdevname((a))
+#define BDEVNAME_DECLARE_STORAGE(foo) char foo[BDEVNAME_SIZE]
+#define ll_bdevname(DEV, STORAGE) __bdevname(DEV, STORAGE)
 #define ll_lock_kernel lock_kernel()
-#define LTIME_S(time) (time.tv_sec)
 #else
+#define BDEVNAME_DECLARE_STORAGE(foo) char __unused_##foo
+#define ll_bdevname(DEV, STORAGE) ((void)__unused_##STORAGE, bdevname((DEV)))
 #define ll_lock_kernel
-#define ll_bdevname(a) bdevname((a))
-#define LTIME_S(time) (time)
 #endif
 
 
 static inline void OBD_FAIL_WRITE(int id, kdev_t dev)
 {
         if (OBD_FAIL_CHECK(id)) {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+                BDEVNAME_DECLARE_STORAGE(tmp);
 #ifdef CONFIG_DEV_RDONLY
                 CERROR("obd_fail_loc=%x, fail write operation on %s\n",
-                       id, ll_bdevname(dev));
+                       id, ll_bdevname(kdev_t_to_nr(dev), tmp));
                 dev_set_rdonly(dev, 2);
 #else
                 CERROR("obd_fail_loc=%x, can't fail write operation on %s\n",
-                       id, ll_bdevname(dev));
-#endif
-#else
-#ifdef CONFIG_DEV_RDONLY
-                CERROR("obd_fail_loc=%x, fail write operation on %s\n",
-                       id, ll_bdevname(dev.value));
-                dev_set_rdonly(dev, 2);
-#else
-                CERROR("obd_fail_loc=%x, can't fail write operation on %s\n",
-                       id, ll_bdevname(dev.value));
-#endif
+                       id, ll_bdevname(kdev_t_to_nr(dev), tmp));
 #endif
                 /* We set FAIL_ONCE because we never "un-fail" a device */
                 obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE;
@@ -209,9 +204,9 @@ static inline void OBD_FAIL_WRITE(int id, kdev_t dev)
 #define LTIME_S(time) (time)
 #endif  /* __KERNEL__ */
 
-#define OBD_ALLOC(ptr, size)                                                  \
+#define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                    \
 do {                                                                          \
-        (ptr) = kmalloc(size, GFP_KERNEL);                                    \
+        (ptr) = kmalloc(size, gfp_mask);                                      \
         if ((ptr) == NULL) {                                                  \
                 CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
                        (int)(size), __FILE__, __LINE__);                      \
@@ -225,6 +220,12 @@ do {                                                                          \
         }                                                                     \
 } while (0)
 
+#ifndef OBD_GFP_MASK
+# define OBD_GFP_MASK GFP_KERNEL
+#endif
+
+#define OBD_ALLOC(ptr, size) OBD_ALLOC_GFP(ptr, size, OBD_GFP_MASK)
+
 #ifdef __arch_um__
 # define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size)
 #else
@@ -246,9 +247,9 @@ do {                                                                          \
 #endif
 
 #ifdef CONFIG_DEBUG_SLAB
-#define POISON(lptr, c, s) do {} while (0)
+#define POISON(ptr, c, s) do {} while (0)
 #else
-#define POISON(lptr, c, s) memset(lptr, c, s)
+#define POISON(ptr, c, s) memset(ptr, c, s)
 #endif
 
 #define OBD_FREE(ptr, size)                                                   \
@@ -277,9 +278,12 @@ do {                                                                          \
 } while (0)
 #endif
 
+/* we memset() the slab object to 0 when allocation succeeds, so DO NOT
+ * HAVE A CTOR THAT DOES ANYTHING.  its work will be cleared here.  we'd
+ * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */
 #define OBD_SLAB_ALLOC(ptr, slab, type, size)                                 \
 do {                                                                          \
-        LASSERT (!in_interrupt());                                            \
+        LASSERT(!in_interrupt());                                             \
         (ptr) = kmem_cache_alloc(slab, type);                                 \
         if ((ptr) == NULL) {                                                  \
                 CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \
index 55057d9..1b589b9 100644 (file)
@@ -1,13 +1,10 @@
-
-
-
  drivers/block/blkpg.c  |   35 +++++++++++++++++++++++++++++++++++
  drivers/block/loop.c   |    3 +++
- drivers/ide/ide-disk.c |    5 ++++-
- 3 files changed, 42 insertions(+), 1 deletion(-)
+ drivers/ide/ide-disk.c |    5 +++++
+ 3 files changed, 43 insertions(+)
 
---- rh-2.4.20/drivers/block/blkpg.c~dev_read_only_2.4.20       2003-04-11 14:05:03.000000000 +0800
-+++ rh-2.4.20-root/drivers/block/blkpg.c       2003-04-12 13:11:31.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/drivers/block/blkpg.c~dev_read_only_2.4.20-rh    2003-05-15 21:12:48.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/drivers/block/blkpg.c      2003-07-12 15:10:31.000000000 -0600
 @@ -297,3 +297,38 @@ int blk_ioctl(kdev_t dev, unsigned int c
  }
  
@@ -47,8 +44,8 @@
 +EXPORT_SYMBOL(dev_set_rdonly);
 +EXPORT_SYMBOL(dev_check_rdonly);
 +EXPORT_SYMBOL(dev_clear_rdonly);
---- rh-2.4.20/drivers/block/loop.c~dev_read_only_2.4.20        2003-04-11 14:05:08.000000000 +0800
-+++ rh-2.4.20-root/drivers/block/loop.c        2003-04-12 13:11:31.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/drivers/block/loop.c~dev_read_only_2.4.20-rh     2003-05-15 21:12:50.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/drivers/block/loop.c       2003-07-12 15:10:31.000000000 -0600
 @@ -491,6 +491,9 @@ static int loop_make_request(request_que
        spin_unlock_irq(&lo->lo_lock);
  
                if (lo->lo_flags & LO_FLAGS_READ_ONLY)
                        goto err;
        } else if (rw == READA) {
---- rh-2.4.20/drivers/ide/ide-disk.c~dev_read_only_2.4.20      2003-04-11 14:04:53.000000000 +0800
-+++ rh-2.4.20-root/drivers/ide/ide-disk.c      2003-04-12 13:14:48.000000000 +0800
-@@ -381,7 +381,10 @@ static ide_startstop_t do_rw_disk (ide_d
-       if (IS_PDC4030_DRIVE)
-               return promise_rw_disk(drive, rq, block);
- #endif /* CONFIG_BLK_DEV_PDC4030 */
--
-+      if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) {
-+              ide_end_request(1, HWGROUP(drive));
-+              return ide_stopped;
-+      }
+--- kernel-2.4.20-6chaos_18_7/drivers/ide/ide-disk.c~dev_read_only_2.4.20-rh   2003-05-15 21:13:09.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/drivers/ide/ide-disk.c     2003-07-12 15:12:03.000000000 -0600
+@@ -371,6 +371,11 @@ ide_startstop_t __ide_do_rw_disk (ide_dr
+       if (driver_blocked)
+               panic("Request while ide driver is blocked?");
++      if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) {
++              ide_end_request(1, HWGROUP(drive));
++              return ide_stopped;
++      }
++
        if (IDE_CONTROL_REG)
                hwif->OUTB(drive->ctl, IDE_CONTROL_REG);
  
index 3d82572..3063be4 100644 (file)
@@ -9,7 +9,7 @@
                return 0;
  }
 +/* truncate.c */
-+extern void truncate_complete_page(struct page *);
++extern void truncate_complete_page(struct address_space *mapping,struct page *);
  
  /* filemap.c */
  extern unsigned long page_unuse(struct page *);
index e01feca..a173981 100644 (file)
@@ -1,11 +1,17 @@
- fs/ext3/super.c            |  229 +++++++++++++++++++++++++++++++++++++++++++++
- include/linux/ext3_fs.h    |    2 
+
+Create a service thread to handle delete and truncate of inodes, to avoid
+long latency while truncating very large files.
+
+
+ fs/ext3/inode.c            |  116 ++++++++++++++++++++++
+ fs/ext3/super.c            |  231 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h    |    5 
  include/linux/ext3_fs_sb.h |   10 +
3 files changed, 241 insertions(+)
4 files changed, 362 insertions(+)
 
 --- linux-2.4.18-18.8.0-l15/fs/ext3/super.c~ext3-delete_thread-2.4.18  Tue Jun  3 17:26:21 2003
-+++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c    Wed Jun 18 11:59:14 2003
-@@ -396,6 +396,219 @@ static void dump_orphan_list(struct supe
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c    Wed Jul  2 23:49:40 2003
+@@ -396,6 +396,220 @@ static void dump_orphan_list(struct supe
        }
  }
  
 + * If we have any problem deferring the delete, just delete it right away.
 + * If we defer it, we also mark how many blocks it would free, so that we
 + * can keep the statfs data correct, and we know if we should sleep on the
-+ * truncate thread when we run out of space.
-+ *
-+ * In 2.5 this can be done much more cleanly by just registering a "drop"
-+ * method in the super_operations struct.
++ * delete thread when we run out of space.
 + */
 +static void ext3_delete_inode_thread(struct inode *old_inode)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
 +      struct inode *new_inode;
 +      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
 +
 +              return;
 +      }
 +
-+      if (!test_opt(old_inode->i_sb, ASYNCDEL)) {
-+              ext3_delete_inode(old_inode);
-+              return;
-+      }
++      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++              goto out_delete;
 +
 +      /* We may want to delete the inode immediately and not defer it */
-+      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
-+          !sbi->s_delete_list.next) {
-+              ext3_delete_inode(old_inode);
-+              return;
-+      }
++      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
++              goto out_delete;
 +
-+      if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) ||
-+          (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
++      /* We can't use the delete thread as-is during real orphan recovery,
++       * as we add to the orphan list here, causing ext3_orphan_cleanup()
++       * to loop endlessly.  It would be nice to do so, but needs work.
++       */
++      if (oei->i_state & EXT3_STATE_DELETE ||
++          sbi->s_mount_state & EXT3_ORPHAN_FS) {
 +              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
 +                         old_inode->i_ino, blocks);
-+              ext3_delete_inode(old_inode);
-+              return;
++              goto out_delete;
 +      }
 +
 +      /* We can iget this inode again here, because our caller has unhashed
 +       */
 +      down(&sbi->s_orphan_lock);
 +
-+      EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS;
++      sbi->s_mount_state |= EXT3_ORPHAN_FS;
 +      new_inode = iget(old_inode->i_sb, old_inode->i_ino);
-+      EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
++      sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
 +      if (is_bad_inode(new_inode)) {
 +              printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
 +              iput(new_inode);
 +              up(&sbi->s_orphan_lock);
 +              ext3_debug("delete inode %lu directly (bad read)\n",
 +                         old_inode->i_ino);
-+              ext3_delete_inode(old_inode);
-+              return;
++              goto out_delete;
 +      }
 +      J_ASSERT(new_inode != old_inode);
 +
-+      J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan));
++      J_ASSERT(!list_empty(&oei->i_orphan));
++
++      nei = EXT3_I(new_inode);
 +      /* Ugh.  We need to insert new_inode into the same spot on the list
 +       * as old_inode was, to ensure the in-memory orphan list is still
 +       * in the same order as the on-disk orphan list (badness otherwise).
 +       */
-+      EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE;
++      nei->i_orphan = oei->i_orphan;
++      nei->i_orphan.next->prev = &nei->i_orphan;
++      nei->i_orphan.prev->next = &nei->i_orphan;
++      nei->i_state |= EXT3_STATE_DELETE;
 +      up(&sbi->s_orphan_lock);
 +
 +      clear_inode(old_inode);
 +                 new_inode->i_ino, blocks);
 +
 +      wake_up(&sbi->s_delete_thread_queue);
++      return;
++
++out_delete:
++      ext3_delete_inode(old_inode);
 +}
 +#else
 +#define ext3_start_delete_thread(sbi) do {} while(0)
  void ext3_put_super (struct super_block * sb)
  {
        struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -403,6 +615,7 @@ void ext3_put_super (struct super_block 
+@@ -403,6 +617,7 @@ void ext3_put_super (struct super_block 
        kdev_t j_dev = sbi->s_journal->j_dev;
        int i;
  
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
        if (!(sb->s_flags & MS_RDONLY)) {
-@@ -451,7 +664,11 @@ static struct super_operations ext3_sops
+@@ -451,7 +666,11 @@ static struct super_operations ext3_sops
        write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
        dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
        put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
        put_super:      ext3_put_super,         /* BKL held */
        write_super:    ext3_write_super,       /* BKL held */
        write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
-@@ -511,6 +728,14 @@ static int parse_options (char * options
+@@ -511,6 +730,14 @@ static int parse_options (char * options
             this_char = strtok (NULL, ",")) {
                if ((value = strchr (this_char, '=')) != NULL)
                        *value++ = 0;
                if (!strcmp (this_char, "bsddf"))
                        clear_opt (*mount_options, MINIX_DF);
                else if (!strcmp (this_char, "nouid32")) {
-@@ -1206,6 +1431,7 @@ struct super_block * ext3_read_super (st
+@@ -1206,6 +1433,7 @@ struct super_block * ext3_read_super (st
        }
  
        ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
        /*
         * akpm: core read_super() calls in here with the superblock locked.
         * That deadlocks, because orphan cleanup needs to lock the superblock
-@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s
+@@ -1648,6 +1876,9 @@ int ext3_remount (struct super_block * s
        if (!parse_options(data, &tmp, sbi, &tmp, 1))
                return -EINVAL;
  
        if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
                ext3_abort(sb, __FUNCTION__, "Abort forced by user");
  
+--- linux/fs/ext3/file.c.orig  Fri Jan 17 10:57:31 2003
++++ linux/fs/ext3/file.c       Mon Jun 30 13:28:52 2003
+@@ -121,7 +121,11 @@ struct file_operations ext3_file_operati
+ };
+ struct inode_operations ext3_file_inode_operations = {
++#ifdef EXT3_DELETE_THREAD
++      truncate:       ext3_truncate_thread,   /* BKL held */
++#else
+       truncate:       ext3_truncate,          /* BKL held */
++#endif
+       setattr:        ext3_setattr,           /* BKL held */
+ };
+--- linux-2.4.18-18.8.0-l15/fs/ext3/inode.c~ext3-delete_thread-2.4.18  Wed Jul  2 23:13:58 2003
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/inode.c    Wed Jul  2 23:50:29 2003
+@@ -2004,6 +2004,118 @@ out_stop:
+       ext3_journal_stop(handle, inode);
+ }
++#ifdef EXT3_DELETE_THREAD
++/* Move blocks from to-be-truncated inode over to a new inode, and delete
++ * that one from the delete thread instead.  This avoids a lot of latency
++ * when truncating large files.
++ *
++ * If we have any problem deferring the truncate, just truncate it right away.
++ * If we defer it, we also mark how many blocks it would free, so that we
++ * can keep the statfs data correct, and we know if we should sleep on the
++ * delete thread when we run out of space.
++ */
++void ext3_truncate_thread(struct inode *old_inode)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
++      struct inode *new_inode;
++      handle_t *handle;
++      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
++
++      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++              goto out_truncate;
++
++      /* XXX This is a temporary limitation for code simplicity.
++       *     We could truncate to arbitrary sizes at some later time.
++       */
++      if (old_inode->i_size != 0)
++              goto out_truncate;
++
++      /* We may want to truncate the inode immediately and not defer it */
++      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
++          old_inode->i_size > oei->i_disksize)
++              goto out_truncate;
++
++      /* We can't use the delete thread as-is during real orphan recovery,
++       * as we add to the orphan list here, causing ext3_orphan_cleanup()
++       * to loop endlessly.  It would be nice to do so, but needs work.
++       */
++      if (oei->i_state & EXT3_STATE_DELETE ||
++          sbi->s_mount_state & EXT3_ORPHAN_FS) {
++              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
++                         old_inode->i_ino, blocks);
++              goto out_truncate;
++      }
++
++      ext3_discard_prealloc(old_inode);
++
++      /* old_inode   = 1
++       * new_inode   = sb + GDT + ibitmap
++       * orphan list = 1 inode/superblock for add, 2 inodes for del
++       * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
++       */
++      handle = ext3_journal_start(old_inode, 7);
++      if (IS_ERR(handle))
++              goto out_truncate;
++
++      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
++      if (IS_ERR(new_inode)) {
++              ext3_debug("truncate inode %lu directly (no new inodes)\n",
++                         old_inode->i_ino);
++              goto out_journal;
++      }
++
++      nei = EXT3_I(new_inode);
++
++      down_write(&oei->truncate_sem);
++      new_inode->i_size = old_inode->i_size;
++      new_inode->i_blocks = old_inode->i_blocks;
++      new_inode->i_uid = old_inode->i_uid;
++      new_inode->i_gid = old_inode->i_gid;
++      new_inode->i_nlink = 0;
++
++      /* FIXME when we do arbitrary truncates */
++      old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
++      old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
++
++      memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
++      memset(oei->i_data, 0, sizeof(oei->i_data));
++
++      nei->i_disksize = oei->i_disksize;
++      nei->i_state |= EXT3_STATE_DELETE;
++      up_write(&oei->truncate_sem);
++
++      if (ext3_orphan_add(handle, new_inode) < 0)
++              goto out_journal;
++
++      if (ext3_orphan_del(handle, old_inode) < 0) {
++              ext3_orphan_del(handle, new_inode);
++              iput(new_inode);
++              goto out_journal;
++      }
++
++      ext3_journal_stop(handle, old_inode);
++
++      spin_lock(&sbi->s_delete_lock);
++      J_ASSERT(list_empty(&new_inode->i_dentry));
++      list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
++      sbi->s_delete_blocks += blocks;
++      sbi->s_delete_inodes++;
++      spin_unlock(&sbi->s_delete_lock);
++
++      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++                 new_inode->i_ino, blocks);
++
++      wake_up(&sbi->s_delete_thread_queue);
++      return;
++
++out_journal:
++      ext3_journal_stop(handle, old_inode);
++out_truncate:
++      ext3_truncate(old_inode);
++}
++#endif /* EXT3_DELETE_THREAD */
++
+ /* 
+  * ext3_get_inode_loc returns with an extra refcount against the
+  * inode's underlying buffer_head on success. 
 --- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18  Tue Jun  3 17:26:20 2003
-+++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h    Tue Jun 17 12:36:56 2003
++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h    Wed Jul  2 23:19:09 2003
 @@ -190,6 +190,7 @@ struct ext3_group_desc
   */
  #define EXT3_STATE_JDATA              0x00000001 /* journaled data exists */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
+@@ -651,6 +653,9 @@ extern void ext3_discard_prealloc (struc
+ extern void ext3_dirty_inode(struct inode *);
+ extern int ext3_change_inode_journal_flag(struct inode *, int);
+ extern void ext3_truncate (struct inode *);
++#ifdef EXT3_DELETE_THREAD
++extern void ext3_truncate_thread(struct inode *inode);
++#endif
+ /* ioctl.c */
+ extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
 --- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18       Tue Jun  3 17:26:21 2003
-+++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h Tue Jun 17 12:36:56 2003
++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h Wed Jul  2 23:19:09 2003
 @@ -29,6 +29,8 @@
  
  #define EXT3_MAX_GROUP_LOADED 32
index 34c5158..a8816ec 100644 (file)
@@ -1,7 +1,13 @@
-diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
---- origin/fs/ext3/super.c     2003-05-04 17:23:52.000000000 +0400
-+++ linux/fs/ext3/super.c      2003-05-04 17:09:20.000000000 +0400
-@@ -398,6 +398,219 @@ static void dump_orphan_list(struct supe
+ fs/ext3/file.c             |    4 
+ fs/ext3/inode.c            |  116 ++++++++++++++++++++++
+ fs/ext3/super.c            |  230 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h    |    5 
+ include/linux/ext3_fs_sb.h |   10 +
+ 5 files changed, 365 insertions(+)
+
+--- linux/fs/ext3/super.c~ext3-delete_thread-2.4.20    Thu Jul 10 14:11:32 2003
++++ linux-mmonroe/fs/ext3/super.c      Thu Jul 10 14:11:33 2003
+@@ -400,6 +400,220 @@ static void dump_orphan_list(struct supe
        }
  }
  
@@ -126,14 +132,12 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 + * If we have any problem deferring the delete, just delete it right away.
 + * If we defer it, we also mark how many blocks it would free, so that we
 + * can keep the statfs data correct, and we know if we should sleep on the
-+ * truncate thread when we run out of space.
-+ *
-+ * In 2.5 this can be done much more cleanly by just registering a "drop"
-+ * method in the super_operations struct.
++ * delete thread when we run out of space.
 + */
 +static void ext3_delete_inode_thread(struct inode *old_inode)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
 +      struct inode *new_inode;
 +      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
 +
@@ -142,24 +146,22 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +              return;
 +      }
 +
-+      if (!test_opt(old_inode->i_sb, ASYNCDEL)) {
-+              ext3_delete_inode(old_inode);
-+              return;
-+      }
++      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++              goto out_delete;
 +
 +      /* We may want to delete the inode immediately and not defer it */
-+      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
-+          !sbi->s_delete_list.next) {
-+              ext3_delete_inode(old_inode);
-+              return;
-+      }
++      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
++              goto out_delete;
 +
-+      if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) ||
-+          (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
++      /* We can't use the delete thread as-is during real orphan recovery,
++       * as we add to the orphan list here, causing ext3_orphan_cleanup()
++       * to loop endlessly.  It would be nice to do so, but needs work.
++       */
++      if (oei->i_state & EXT3_STATE_DELETE ||
++          sbi->s_mount_state & EXT3_ORPHAN_FS) {
 +              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
 +                         old_inode->i_ino, blocks);
-+              ext3_delete_inode(old_inode);
-+              return;
++              goto out_delete;
 +      }
 +
 +      /* We can iget this inode again here, because our caller has unhashed
@@ -171,9 +173,9 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +       */
 +      down(&sbi->s_orphan_lock);
 +
-+      EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS;
++      sbi->s_mount_state |= EXT3_ORPHAN_FS;
 +      new_inode = iget(old_inode->i_sb, old_inode->i_ino);
-+      EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
++      sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
 +      if (is_bad_inode(new_inode)) {
 +              printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
 +              iput(new_inode);
@@ -183,20 +185,21 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +              up(&sbi->s_orphan_lock);
 +              ext3_debug("delete inode %lu directly (bad read)\n",
 +                         old_inode->i_ino);
-+              ext3_delete_inode(old_inode);
-+              return;
++              goto out_delete;
 +      }
 +      J_ASSERT(new_inode != old_inode);
 +
-+      J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan));
++      J_ASSERT(!list_empty(&oei->i_orphan));
++
++      nei = EXT3_I(new_inode);
 +      /* Ugh.  We need to insert new_inode into the same spot on the list
 +       * as old_inode was, to ensure the in-memory orphan list is still
 +       * in the same order as the on-disk orphan list (badness otherwise).
 +       */
-+      EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE;
++      nei->i_orphan = oei->i_orphan;
++      nei->i_orphan.next->prev = &nei->i_orphan;
++      nei->i_orphan.prev->next = &nei->i_orphan;
++      nei->i_state |= EXT3_STATE_DELETE;
 +      up(&sbi->s_orphan_lock);
 +
 +      clear_inode(old_inode);
@@ -212,6 +215,10 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +                 new_inode->i_ino, blocks);
 +
 +      wake_up(&sbi->s_delete_thread_queue);
++      return;
++
++out_delete:
++      ext3_delete_inode(old_inode);
 +}
 +#else
 +#define ext3_start_delete_thread(sbi) do {} while(0)
@@ -221,7 +228,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
  void ext3_put_super (struct super_block * sb)
  {
        struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -405,6 +611,7 @@ void ext3_put_super (struct super_block 
+@@ -407,6 +621,7 @@ void ext3_put_super (struct super_block 
        kdev_t j_dev = sbi->s_journal->j_dev;
        int i;
  
@@ -229,7 +236,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
        if (!(sb->s_flags & MS_RDONLY)) {
-@@ -453,7 +660,11 @@ static struct super_operations ext3_sops
+@@ -455,7 +670,11 @@ static struct super_operations ext3_sops
        write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
        dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
        put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
@@ -240,11 +247,11 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +#endif
        put_super:      ext3_put_super,         /* BKL held */
        write_super:    ext3_write_super,       /* BKL held */
-       write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
-@@ -514,6 +725,13 @@ static int parse_options (char * options
-            this_char = strtok (NULL, ",")) {
-               if ((value = strchr (this_char, '=')) != NULL)
-                       *value++ = 0;
+       sync_fs:        ext3_sync_fs,
+@@ -524,6 +743,13 @@ static int parse_options (char * options
+                       clear_opt (*mount_options, XATTR_USER);
+               else
+ #endif
 +#ifdef EXT3_DELETE_THREAD
 +              if (!strcmp(this_char, "asyncdel"))
 +                      set_opt(*mount_options, ASYNCDEL);
@@ -252,10 +259,10 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +                      clear_opt(*mount_options, ASYNCDEL);
 +              else
 +#endif
- #ifdef CONFIG_EXT3_FS_XATTR_USER
-               if (!strcmp (this_char, "user_xattr"))
-                       set_opt (*mount_options, XATTR_USER);
-@@ -1220,6 +1436,7 @@ struct super_block * ext3_read_super (st
+               if (!strcmp (this_char, "bsddf"))
+                       clear_opt (*mount_options, MINIX_DF);
+               else if (!strcmp (this_char, "nouid32")) {
+@@ -1223,6 +1449,7 @@ struct super_block * ext3_read_super (st
        }
  
        ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
@@ -263,7 +270,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
        /*
         * akpm: core read_super() calls in here with the superblock locked.
         * That deadlocks, because orphan cleanup needs to lock the superblock
-@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s
+@@ -1678,6 +1905,9 @@ int ext3_remount (struct super_block * s
        if (!parse_options(data, &tmp, sbi, &tmp, 1))
                return -EINVAL;
  
@@ -273,9 +280,143 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
        if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
                ext3_abort(sb, __FUNCTION__, "Abort forced by user");
  
-diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h
---- origin/include/linux/ext3_fs.h     2003-05-04 17:22:49.000000000 +0400
-+++ linux/include/linux/ext3_fs.h      2003-05-04 15:06:10.000000000 +0400
+--- linux/fs/ext3/inode.c~ext3-delete_thread-2.4.20    Thu Jul 10 14:11:29 2003
++++ linux-mmonroe/fs/ext3/inode.c      Thu Jul 10 14:11:33 2003
+@@ -2013,6 +2013,118 @@ out_stop:
+       ext3_journal_stop(handle, inode);
+ }
++#ifdef EXT3_DELETE_THREAD
++/* Move blocks from to-be-truncated inode over to a new inode, and delete
++ * that one from the delete thread instead.  This avoids a lot of latency
++ * when truncating large files.
++ *
++ * If we have any problem deferring the truncate, just truncate it right away.
++ * If we defer it, we also mark how many blocks it would free, so that we
++ * can keep the statfs data correct, and we know if we should sleep on the
++ * delete thread when we run out of space.
++ */
++void ext3_truncate_thread(struct inode *old_inode)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
++      struct inode *new_inode;
++      handle_t *handle;
++      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
++
++      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++              goto out_truncate;
++
++      /* XXX This is a temporary limitation for code simplicity.
++       *     We could truncate to arbitrary sizes at some later time.
++       */
++      if (old_inode->i_size != 0)
++              goto out_truncate;
++
++      /* We may want to truncate the inode immediately and not defer it */
++      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
++          old_inode->i_size > oei->i_disksize)
++              goto out_truncate;
++
++      /* We can't use the delete thread as-is during real orphan recovery,
++       * as we add to the orphan list here, causing ext3_orphan_cleanup()
++       * to loop endlessly.  It would be nice to do so, but needs work.
++       */
++      if (oei->i_state & EXT3_STATE_DELETE ||
++          sbi->s_mount_state & EXT3_ORPHAN_FS) {
++              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
++                         old_inode->i_ino, blocks);
++              goto out_truncate;
++      }
++
++      ext3_discard_prealloc(old_inode);
++
++      /* old_inode   = 1
++       * new_inode   = sb + GDT + ibitmap
++       * orphan list = 1 inode/superblock for add, 2 inodes for del
++       * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
++       */
++      handle = ext3_journal_start(old_inode, 7);
++      if (IS_ERR(handle))
++              goto out_truncate;
++
++      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
++      if (IS_ERR(new_inode)) {
++              ext3_debug("truncate inode %lu directly (no new inodes)\n",
++                         old_inode->i_ino);
++              goto out_journal;
++      }
++
++      nei = EXT3_I(new_inode);
++
++      down_write(&oei->truncate_sem);
++      new_inode->i_size = old_inode->i_size;
++      new_inode->i_blocks = old_inode->i_blocks;
++      new_inode->i_uid = old_inode->i_uid;
++      new_inode->i_gid = old_inode->i_gid;
++      new_inode->i_nlink = 0;
++
++      /* FIXME when we do arbitrary truncates */
++      old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
++      old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
++
++      memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
++      memset(oei->i_data, 0, sizeof(oei->i_data));
++
++      nei->i_disksize = oei->i_disksize;
++      nei->i_state |= EXT3_STATE_DELETE;
++      up_write(&oei->truncate_sem);
++
++      if (ext3_orphan_add(handle, new_inode) < 0)
++              goto out_journal;
++
++      if (ext3_orphan_del(handle, old_inode) < 0) {
++              ext3_orphan_del(handle, new_inode);
++              iput(new_inode);
++              goto out_journal;
++      }
++
++      ext3_journal_stop(handle, old_inode);
++
++      spin_lock(&sbi->s_delete_lock);
++      J_ASSERT(list_empty(&new_inode->i_dentry));
++      list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
++      sbi->s_delete_blocks += blocks;
++      sbi->s_delete_inodes++;
++      spin_unlock(&sbi->s_delete_lock);
++
++      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++                 new_inode->i_ino, blocks);
++
++      wake_up(&sbi->s_delete_thread_queue);
++      return;
++
++out_journal:
++      ext3_journal_stop(handle, old_inode);
++out_truncate:
++      ext3_truncate(old_inode);
++}
++#endif /* EXT3_DELETE_THREAD */
++
+ /* 
+  * ext3_get_inode_loc returns with an extra refcount against the
+  * inode's underlying buffer_head on success. 
+--- linux/fs/ext3/file.c~ext3-delete_thread-2.4.20     Thu Jul 10 14:11:21 2003
++++ linux-mmonroe/fs/ext3/file.c       Thu Jul 10 14:12:17 2003
+@@ -125,7 +125,11 @@ struct file_operations ext3_file_operati
+ };
+ struct inode_operations ext3_file_inode_operations = {
++#ifdef EXT3_DELETE_THREAD
++      truncate:       ext3_truncate_thread,   /* BKL held */
++#else
+       truncate:       ext3_truncate,          /* BKL held */
++#endif
+       setattr:        ext3_setattr,           /* BKL held */
+       setxattr:       ext3_setxattr,          /* BKL held */
+       getxattr:       ext3_getxattr,          /* BKL held */
+--- linux/include/linux/ext3_fs.h~ext3-delete_thread-2.4.20    Thu Jul 10 14:11:26 2003
++++ linux-mmonroe/include/linux/ext3_fs.h      Thu Jul 10 14:11:33 2003
 @@ -193,6 +193,7 @@ struct ext3_group_desc
   */
  #define EXT3_STATE_JDATA              0x00000001 /* journaled data exists */
@@ -284,17 +425,26 @@ diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h
  
  /*
   * ioctl commands
-@@ -321,6 +322,7 @@ struct ext3_inode {
+@@ -320,6 +321,7 @@ struct ext3_inode {
  #define EXT3_MOUNT_UPDATE_JOURNAL     0x1000  /* Update the journal format */
  #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
  #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
-+#define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
++#define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
-diff -puNr origin/include/linux/ext3_fs_sb.h linux/include/linux/ext3_fs_sb.h
---- origin/include/linux/ext3_fs_sb.h  2003-05-04 17:23:52.000000000 +0400
-+++ linux/include/linux/ext3_fs_sb.h   2003-05-04 11:37:04.000000000 +0400
+@@ -694,6 +696,9 @@ extern void ext3_discard_prealloc (struc
+ extern void ext3_dirty_inode(struct inode *);
+ extern int ext3_change_inode_journal_flag(struct inode *, int);
+ extern void ext3_truncate (struct inode *);
++#ifdef EXT3_DELETE_THREAD
++extern void ext3_truncate_thread(struct inode *inode);
++#endif
+ /* ioctl.c */
+ extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
+--- linux/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:32 2003
++++ linux-mmonroe/include/linux/ext3_fs_sb.h   Thu Jul 10 14:11:33 2003
 @@ -29,6 +29,8 @@
  
  #define EXT3_MAX_GROUP_LOADED 8
@@ -319,3 +469,5 @@ diff -puNr origin/include/linux/ext3_fs_sb.h linux/include/linux/ext3_fs_sb.h
  };
  
  #endif        /* _LINUX_EXT3_FS_SB */
+
+_
index 06ea72a..65d9347 100644 (file)
@@ -1,6 +1,9 @@
---- linux-2.4.17/fs/ext3/super.c.orig  Fri Dec 21 10:41:55 2001
-+++ linux-2.4.17/fs/ext3/super.c       Fri Mar 22 11:00:41 2002
-@@ -1344,10 +1342,10 @@
+ fs/ext3/super.c |    4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+--- linux-2.4.18-p4smp/fs/ext3/super.c~extN-misc-fixup 2003-07-21 23:07:50.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c   2003-07-21 23:08:06.000000000 -0600
+@@ -1578,10 +1578,10 @@ static journal_t *ext3_get_dev_journal(s
                printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
                goto out_journal;
        }
                goto out_journal;
        }
        EXT3_SB(sb)->journal_bdev = bdev;
-@@ -1560,6 +1560,7 @@
-       unlock_kernel();
-       return ret;
- }
-+EXPORT_SYMBOL(ext3_force_commit); /* here to avoid potential patch collisions */
- /*
-  * Ext3 always journals updates to the superblock itself, so we don't
+
+_
index 63f4463..305f6fd 100644 (file)
@@ -83,9 +83,7 @@
                DQUOT_DROP(inode);
 --- linux-2.4.18-chaos52/fs/ext3/inode.c~extN-noread   2003-05-16 12:26:29.000000000 +0800
 +++ linux-2.4.18-chaos52-root/fs/ext3/inode.c  2003-05-16 12:27:06.000000000 +0800
-@@ -2011,23 +2011,28 @@ out_stop:
-       ext3_journal_stop(handle, inode);
- }
+@@ -2013,21 +2013,26 @@ out_stop:
  
 -/* 
 - * ext3_get_inode_loc returns with an extra refcount against the
index fc74c6b..d40d678 100644 (file)
        j += i * EXT3_INODES_PER_GROUP(sb) + 1;
        if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) {
                ext3_error (sb, "ext3_new_inode",
+--- linux-2.4.18-18.8.0-l15/fs/ext3/inode.c~extN-wantedi       Thu Jul  3 00:15:41 2003
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/inode.c    Thu Jul  3 00:17:28 2003
+@@ -2070,7 +2070,7 @@ void ext3_truncate_thread(struct inode *
+       if (IS_ERR(handle))
+               goto out_truncate;
+-      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
++      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode, 0);
+       if (IS_ERR(new_inode)) {
+               ext3_debug("truncate inode %lu directly (no new inodes)\n",
+                          old_inode->i_ino);
 --- linux-2.4.20/fs/ext3/ioctl.c~extN-wantedi  2003-04-08 23:35:55.000000000 -0600
 +++ linux-2.4.20-braam/fs/ext3/ioctl.c 2003-04-08 23:35:55.000000000 -0600
 @@ -23,6 +23,31 @@ int ext3_ioctl (struct inode * inode, st
index 6eabe85..b983b33 100644 (file)
@@ -1,7 +1,15 @@
- 0 files changed
+ Documentation/filesystems/ext2.txt |   16 ++
+ fs/ext3/Makefile                   |    2 
+ fs/ext3/inode.c                    |    4 
+ fs/ext3/iopen.c                    |  259 +++++++++++++++++++++++++++++++++++++
+ fs/ext3/iopen.h                    |   13 +
+ fs/ext3/namei.c                    |   12 +
+ fs/ext3/super.c                    |   11 +
+ include/linux/ext3_fs.h            |    2 
+ 8 files changed, 318 insertions(+), 1 deletion(-)
 
---- linux-2.4.18-chaos52/Documentation/filesystems/ext2.txt~iopen-2.4.18       2003-04-13 15:21:33.000000000 +0800
-+++ linux-2.4.18-chaos52-root/Documentation/filesystems/ext2.txt       2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/Documentation/filesystems/ext2.txt~iopen-2.4.18 2003-07-09 12:17:30.000000000 -0600
++++ linux-2.4.18-p4smp-braam/Documentation/filesystems/ext2.txt        2003-07-09 17:13:02.000000000 -0600
 @@ -35,6 +35,22 @@ resgid=n                    The group ID which may use th
  
  sb=n                          Use alternate superblock at this location.
  grpquota,noquota,quota,usrquota       Quota options are silently ignored by ext2.
  
  
---- linux-2.4.18-chaos52/fs/ext3/Makefile~iopen-2.4.18 2003-06-01 03:24:07.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/Makefile 2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/Makefile~iopen-2.4.18   2003-07-09 17:12:12.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/Makefile  2003-07-09 17:13:15.000000000 -0600
 @@ -11,7 +11,7 @@ O_TARGET := ext3.o
  
- export-objs :=        super.o inode.o xattr.o
+ export-objs :=        super.o inode.o xattr.o ext3-exports.o
  
 -obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-+obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
-               ioctl.o namei.o super.o symlink.o xattr.o
++obj-y    := balloc.o iopen.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+               ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o
  obj-m    := $(O_TARGET)
  
---- linux-2.4.18-chaos52/fs/ext3/inode.c~iopen-2.4.18  2003-06-03 17:10:21.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/inode.c  2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/inode.c~iopen-2.4.18    2003-07-09 17:11:19.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/inode.c   2003-07-09 17:13:02.000000000 -0600
 @@ -31,6 +31,7 @@
  #include <linux/highuid.h>
  #include <linux/quotaops.h>
@@ -46,7 +54,7 @@
  
  /*
   * SEARCH_FROM_ZERO forces each block allocation to search from the start
-@@ -2135,6 +2136,9 @@ void ext3_read_inode(struct inode * inod
+@@ -2165,6 +2166,9 @@ void ext3_read_inode(struct inode * inod
        struct buffer_head *bh;
        int block;
        
@@ -56,8 +64,8 @@
        if(ext3_get_inode_loc(inode, &iloc))
                goto bad_inode;
        bh = iloc.bh;
---- /dev/null  2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/iopen.c  2003-06-03 17:10:55.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/iopen.c   2003-07-09 17:13:02.000000000 -0600
 @@ -0,0 +1,259 @@
 +/*
 + * linux/fs/ext3/iopen.c
 +
 +      return 1;
 +}
---- /dev/null  2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/iopen.h  2003-06-03 17:10:55.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/iopen.h   2003-07-09 17:13:02.000000000 -0600
 @@ -0,0 +1,13 @@
 +/*
 + * iopen.h
 +
 +extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
 +extern int ext3_iopen_get_inode(struct inode *inode);
---- linux-2.4.18-chaos52/fs/ext3/namei.c~iopen-2.4.18  2003-06-03 17:10:20.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/namei.c  2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/namei.c~iopen-2.4.18    2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/namei.c   2003-07-09 17:13:02.000000000 -0600
 @@ -34,6 +34,7 @@
  #include <linux/locks.h>
  #include <linux/quotaops.h>
        d_add(dentry, inode);
        return NULL;
  }
---- linux-2.4.18-chaos52/fs/ext3/super.c~iopen-2.4.18  2003-06-03 17:10:21.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/super.c  2003-06-03 17:10:55.000000000 +0800
-@@ -820,6 +820,17 @@ static int parse_options (char * options
+--- linux-2.4.18-p4smp/fs/ext3/super.c~iopen-2.4.18    2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c   2003-07-09 17:13:02.000000000 -0600
+@@ -831,6 +831,17 @@ static int parse_options (char * options
                         || !strcmp (this_char, "quota")
                         || !strcmp (this_char, "usrquota"))
                        /* Don't do anything ;-) */ ;
                else if (!strcmp (this_char, "journal")) {
                        /* @@@ FIXME */
                        /* Eventually we will want to be able to create
---- linux-2.4.18-chaos52/include/linux/ext3_fs.h~iopen-2.4.18  2003-06-03 17:10:22.000000000 +0800
-+++ linux-2.4.18-chaos52-root/include/linux/ext3_fs.h  2003-06-03 17:12:08.000000000 +0800
+--- linux-2.4.18-p4smp/include/linux/ext3_fs.h~iopen-2.4.18    2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_fs.h   2003-07-09 17:13:02.000000000 -0600
 @@ -321,6 +321,8 @@ struct ext3_inode {
  #define EXT3_MOUNT_UPDATE_JOURNAL     0x1000  /* Update the journal format */
  #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
index 3038cc8..ec48814 100644 (file)
@@ -1,15 +1,15 @@
  Documentation/filesystems/ext2.txt |   16 ++
  fs/ext3/Makefile                   |    2 
  fs/ext3/inode.c                    |    4 
- fs/ext3/iopen.c                    |  240 +++++++++++++++++++++++++++++++++++++
- fs/ext3/iopen.h                    |   15 ++
- fs/ext3/namei.c                    |   13 +-
+ fs/ext3/iopen.c                    |  259 +++++++++++++++++++++++++++++++++++++
+ fs/ext3/iopen.h                    |   1+
+ fs/ext3/namei.c                    |   13 +
  fs/ext3/super.c                    |   11 +
  include/linux/ext3_fs.h            |    2 
- 8 files changed, 301 insertions(+), 2 deletions(-)
+ 8 files changed, 318 insertions(+), 2 deletions(-)
 
---- linux-2.4.20/Documentation/filesystems/ext2.txt~iopen      2001-07-11 16:44:45.000000000 -0600
-+++ linux-2.4.20-braam/Documentation/filesystems/ext2.txt      2003-05-17 14:06:00.000000000 -0600
+--- linux/Documentation/filesystems/ext2.txt~iopen-2.4.20      Wed Jul 11 15:44:45 2001
++++ linux-mmonroe/Documentation/filesystems/ext2.txt   Thu Jul 10 12:28:54 2003
 @@ -35,6 +35,22 @@ resgid=n                    The group ID which may use th
  
  sb=n                          Use alternate superblock at this location.
@@ -33,8 +33,8 @@
  grpquota,noquota,quota,usrquota       Quota options are silently ignored by ext2.
  
  
---- linux-2.4.20/fs/ext3/Makefile~iopen        2003-05-17 14:05:57.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/Makefile        2003-05-17 14:06:00.000000000 -0600
+--- linux/fs/ext3/Makefile~iopen-2.4.20        Thu Jul 10 12:28:44 2003
++++ linux-mmonroe/fs/ext3/Makefile     Thu Jul 10 12:28:54 2003
 @@ -11,7 +11,7 @@ O_TARGET := ext3.o
  
  export-objs := ext3-exports.o
@@ -44,8 +44,8 @@
                ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o
  obj-m    := $(O_TARGET)
  
---- linux-2.4.20/fs/ext3/inode.c~iopen 2003-05-17 14:06:00.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/inode.c 2003-05-17 14:06:00.000000000 -0600
+--- linux/fs/ext3/inode.c~iopen-2.4.20 Thu Jul 10 12:28:46 2003
++++ linux-mmonroe/fs/ext3/inode.c      Thu Jul 10 12:28:54 2003
 @@ -31,6 +31,7 @@
  #include <linux/highuid.h>
  #include <linux/quotaops.h>
@@ -54,7 +54,7 @@
  
  /*
   * SEARCH_FROM_ZERO forces each block allocation to search from the start
-@@ -2137,6 +2138,9 @@ void ext3_read_inode(struct inode * inod
+@@ -2253,6 +2254,9 @@ void ext3_read_inode(struct inode * inod
        struct buffer_head *bh;
        int block;
        
@@ -64,8 +64,8 @@
        if(ext3_get_inode_loc(inode, &iloc))
                goto bad_inode;
        bh = iloc.bh;
---- /dev/null  2003-01-30 03:24:37.000000000 -0700
-+++ linux-2.4.20-braam/fs/ext3/iopen.c 2003-05-17 22:18:55.000000000 -0600
+--- /dev/null  Tue Jan 28 04:00:01 2003
++++ linux-mmonroe/fs/ext3/iopen.c      Thu Jul 10 12:28:54 2003
 @@ -0,0 +1,259 @@
 +/*
 + * linux/fs/ext3/iopen.c
 +
 +      return 1;
 +}
---- /dev/null  2003-01-30 03:24:37.000000000 -0700
-+++ linux-2.4.20-braam/fs/ext3/iopen.h 2003-05-17 14:06:00.000000000 -0600
+--- /dev/null  Tue Jan 28 04:00:01 2003
++++ linux-mmonroe/fs/ext3/iopen.h      Thu Jul 10 12:28:54 2003
 @@ -0,0 +1,13 @@
 +/*
 + * iopen.h
 +
 +extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
 +extern int ext3_iopen_get_inode(struct inode *inode);
---- linux-2.4.20/fs/ext3/namei.c~iopen 2003-05-17 14:05:59.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/namei.c 2003-05-17 22:23:08.000000000 -0600
+--- linux/fs/ext3/namei.c~iopen-2.4.20 Thu Jul 10 12:28:46 2003
++++ linux-mmonroe/fs/ext3/namei.c      Thu Jul 10 12:28:54 2003
 @@ -35,7 +35,7 @@
  #include <linux/string.h>
  #include <linux/locks.h>
        d_add(dentry, inode);
        return NULL;
  }
---- linux-2.4.20/fs/ext3/super.c~iopen 2003-05-17 14:05:59.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/super.c 2003-05-17 14:06:00.000000000 -0600
-@@ -820,6 +820,17 @@ static int parse_options (char * options
+--- linux/fs/ext3/super.c~iopen-2.4.20 Thu Jul 10 12:28:45 2003
++++ linux-mmonroe/fs/ext3/super.c      Thu Jul 10 12:28:54 2003
+@@ -835,6 +835,17 @@ static int parse_options (char * options
                         || !strcmp (this_char, "quota")
                         || !strcmp (this_char, "usrquota"))
                        /* Don't do anything ;-) */ ;
                else if (!strcmp (this_char, "journal")) {
                        /* @@@ FIXME */
                        /* Eventually we will want to be able to create
---- linux-2.4.20/include/linux/ext3_fs.h~iopen 2003-05-17 14:05:59.000000000 -0600
-+++ linux-2.4.20-braam/include/linux/ext3_fs.h 2003-05-17 14:06:29.000000000 -0600
+--- linux/include/linux/ext3_fs.h~iopen-2.4.20 Thu Jul 10 12:28:46 2003
++++ linux-mmonroe/include/linux/ext3_fs.h      Thu Jul 10 12:30:12 2003
 @@ -322,6 +322,8 @@ struct ext3_inode {
  #define EXT3_MOUNT_UPDATE_JOURNAL     0x1000  /* Update the journal format */
  #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
  #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
 +#define EXT3_MOUNT_IOPEN              0x8000  /* Allow access via iopen */
 +#define EXT3_MOUNT_IOPEN_NOPRIV               0x10000 /* Make iopen world-readable */
- #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
+ #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 
index 75ebcd0..15f1b2a 100644 (file)
@@ -1,7 +1,18 @@
- 0 files changed
+ fs/ext3/Makefile           |    4 
+ fs/ext3/ext3-exports.c     |   13 
+ fs/ext3/ialloc.c           |    2 
+ fs/ext3/inode.c            |   29 -
+ fs/ext3/namei.c            |   12 
+ fs/ext3/super.c            |   22 
+ fs/ext3/xattr.c            | 1242 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h    |   46 -
+ include/linux/ext3_jbd.h   |    8 
+ include/linux/ext3_xattr.h |  155 +++++
+ include/linux/xattr.h      |   15 
+ 11 files changed, 1496 insertions(+), 52 deletions(-)
 
---- linux-2.4.18-18/fs/ext3/ialloc.c~linux-2.4.18ea-0.8.26     2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/ialloc.c      2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/ialloc.c~linux-2.4.18ea-0.8.26  2003-07-20 17:12:43.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/ialloc.c  2003-07-21 22:49:05.000000000 -0600
 @@ -17,6 +17,7 @@
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
@@ -18,8 +29,8 @@
        DQUOT_FREE_INODE(inode);
        DQUOT_DROP(inode);
  
---- linux-2.4.18-18/fs/ext3/inode.c~linux-2.4.18ea-0.8.26      2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/inode.c       2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/inode.c~linux-2.4.18ea-0.8.26   2003-07-20 17:12:43.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/inode.c   2003-07-21 22:49:05.000000000 -0600
 @@ -39,6 +39,18 @@
   */
  #undef SEARCH_FROM_ZERO
@@ -59,7 +70,7 @@
                goto no_delete;
  
        lock_kernel();
-@@ -1861,6 +1871,8 @@ void ext3_truncate(struct inode * inode)
+@@ -1877,6 +1887,8 @@ void ext3_truncate(struct inode * inode)
        if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
            S_ISLNK(inode->i_mode)))
                return;
@@ -68,7 +79,7 @@
        if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
                return;
  
-@@ -2008,8 +2020,6 @@ int ext3_get_inode_loc (struct inode *in
+@@ -2038,8 +2050,6 @@ int ext3_get_inode_loc (struct inode *in
        struct ext3_group_desc * gdp;
                
        if ((inode->i_ino != EXT3_ROOT_INO &&
@@ -77,7 +88,7 @@
                inode->i_ino != EXT3_JOURNAL_INO &&
                inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) ||
                inode->i_ino > le32_to_cpu(
-@@ -2136,10 +2146,7 @@ void ext3_read_inode(struct inode * inod
+@@ -2166,10 +2176,7 @@ void ext3_read_inode(struct inode * inod
  
        brelse (iloc.bh);
  
                inode->i_op = &ext3_file_inode_operations;
                inode->i_fop = &ext3_file_operations;
                inode->i_mapping->a_ops = &ext3_aops;
-@@ -2147,7 +2154,7 @@ void ext3_read_inode(struct inode * inod
+@@ -2177,7 +2184,7 @@ void ext3_read_inode(struct inode * inod
                inode->i_op = &ext3_dir_inode_operations;
                inode->i_fop = &ext3_dir_operations;
        } else if (S_ISLNK(inode->i_mode)) {
                        inode->i_op = &ext3_fast_symlink_inode_operations;
                else {
                        inode->i_op = &page_symlink_inode_operations;
---- linux-2.4.18-18/fs/ext3/namei.c~linux-2.4.18ea-0.8.26      2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/namei.c       2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/namei.c~linux-2.4.18ea-0.8.26   2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/namei.c   2003-07-21 22:49:05.000000000 -0600
 @@ -27,6 +27,7 @@
  #include <linux/sched.h>
  #include <linux/ext3_fs.h>
                inode->i_op = &page_symlink_inode_operations;
                inode->i_mapping->a_ops = &ext3_aops;
                /*
---- linux-2.4.18-18/fs/ext3/super.c~linux-2.4.18ea-0.8.26      2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/super.c       2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/super.c~linux-2.4.18ea-0.8.26   2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c   2003-07-21 22:50:28.000000000 -0600
 @@ -24,6 +24,7 @@
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
  #include <linux/slab.h>
  #include <linux/init.h>
  #include <linux/locks.h>
-@@ -404,6 +405,7 @@ void ext3_put_super (struct super_block 
+@@ -406,6 +407,7 @@ void ext3_put_super (struct super_block 
        kdev_t j_dev = sbi->s_journal->j_dev;
        int i;
  
        journal_destroy(sbi->s_journal);
        if (!(sb->s_flags & MS_RDONLY)) {
                EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-@@ -1748,14 +1750,25 @@ int ext3_statfs (struct super_block * sb
+@@ -1749,17 +1751,27 @@ int ext3_statfs (struct super_block * sb
  
  static DECLARE_FSTYPE_DEV(ext3_fs_type, "ext3", ext3_read_super);
  
 +      return error;
  }
  
- EXPORT_SYMBOL(ext3_bread);
---- /dev/null  2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/xattr.c       2003-04-20 16:14:31.000000000 +0800
-@@ -0,0 +1,1247 @@
+-EXPORT_SYMBOL(ext3_bread);
+ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+ MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/ext3-exports.c    2003-07-21 22:49:05.000000000 -0600
+@@ -0,0 +1,13 @@
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/ext3_xattr.h>
++
++EXPORT_SYMBOL(ext3_force_commit);
++EXPORT_SYMBOL(ext3_bread);
++EXPORT_SYMBOL(ext3_xattr_register);
++EXPORT_SYMBOL(ext3_xattr_unregister);
++EXPORT_SYMBOL(ext3_xattr_get);
++EXPORT_SYMBOL(ext3_xattr_list);
++EXPORT_SYMBOL(ext3_xattr_set);
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/xattr.c   2003-07-21 22:50:40.000000000 -0600
+@@ -0,0 +1,1242 @@
 +/*
 + * linux/fs/ext3/xattr.c
 + *
 +#include <linux/module.h>
 +
 +/* These symbols may be needed by a module. */
-+EXPORT_SYMBOL(ext3_xattr_register);
-+EXPORT_SYMBOL(ext3_xattr_unregister);
-+EXPORT_SYMBOL(ext3_xattr_get);
-+EXPORT_SYMBOL(ext3_xattr_list);
-+EXPORT_SYMBOL(ext3_xattr_set);
 +
 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
 +# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1)
 +}
 +
 +#endif  /* CONFIG_EXT3_FS_XATTR_SHARING */
---- linux-2.4.18-18/include/linux/ext3_fs.h~linux-2.4.18ea-0.8.26      2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/ext3_fs.h       2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/include/linux/ext3_fs.h~linux-2.4.18ea-0.8.26   2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_fs.h   2003-07-21 22:49:05.000000000 -0600
 @@ -58,8 +58,6 @@
   */
  #define       EXT3_BAD_INO             1      /* Bad blocks inode */
  extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
  extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
  
---- linux-2.4.18-18/include/linux/ext3_jbd.h~linux-2.4.18ea-0.8.26     2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/ext3_jbd.h      2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/include/linux/ext3_jbd.h~linux-2.4.18ea-0.8.26  2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_jbd.h  2003-07-21 22:49:05.000000000 -0600
 @@ -30,13 +30,19 @@
  
  #define EXT3_SINGLEDATA_TRANS_BLOCKS  8
  
  extern int ext3_writepage_trans_blocks(struct inode *inode);
  
---- /dev/null  2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/ext3_xattr.h    2003-04-20 16:14:31.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_xattr.h        2003-07-21 22:49:05.000000000 -0600
 @@ -0,0 +1,155 @@
 +/*
 +  File: linux/ext3_xattr.h
 +
 +#endif  /* __KERNEL__ */
 +
---- /dev/null  2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/xattr.h 2003-04-20 16:14:31.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/include/linux/xattr.h     2003-07-21 22:49:05.000000000 -0600
 @@ -0,0 +1,15 @@
 +/*
 +  File: linux/xattr.h
 +#define XATTR_REPLACE 2       /* set value, fail if attr does not exist */
 +
 +#endif        /* _LINUX_XATTR_H */
---- linux-2.4.18-18/fs/ext3/Makefile~linux-2.4.18ea-0.8.26     2003-04-20 16:14:54.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/Makefile      2003-04-20 16:15:15.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/Makefile~linux-2.4.18ea-0.8.26  2003-07-21 22:27:37.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/Makefile  2003-07-21 22:51:23.000000000 -0600
 @@ -9,10 +9,10 @@
  
  O_TARGET := ext3.o
  
 -export-objs :=        super.o inode.o
-+export-objs :=        super.o inode.o xattr.o
++export-objs :=        ext3-exports.o
  
  obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
 -              ioctl.o namei.o super.o symlink.o
-+              ioctl.o namei.o super.o symlink.o xattr.o
++              ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o
  obj-m    := $(O_TARGET)
  
  include $(TOPDIR)/Rules.make
index 5c6c6a9..6d8eac6 100644 (file)
@@ -31,6 +31,7 @@
  fs/ext2/xattr.c               | 1212 +++++++++++++++++++++++++++++++++++++++++
  fs/ext2/xattr_user.c          |  103 +++
  fs/ext3/Makefile              |   10 
+ fs/ext3/ext3-exports.c        |   13 
  fs/ext3/file.c                |    5 
  fs/ext3/ialloc.c              |    2 
  fs/ext3/inode.c               |   35 -
  include/linux/mbcache.h       |   69 ++
  kernel/ksyms.c                |    4 
  mm/vmscan.c                   |   36 +
- fs/ext3/ext3-exports.c        |   14 +  
- 62 files changed, 4331 insertions(+), 197 deletions(-)
+ 62 files changed, 4344 insertions(+), 183 deletions(-)
 
---- linux-rh-2.4.20-8/Documentation/Configure.help~linux-2.4.20-xattr-0.8.54-chaos     2003-05-07 17:33:50.000000000 +0800
-+++ linux-rh-2.4.20-8-root/Documentation/Configure.help        2003-05-07 17:34:25.000000000 +0800
-@@ -15226,6 +15226,39 @@ CONFIG_EXT2_FS
+--- kernel-2.4.20-6chaos_18_7/Documentation/Configure.help~linux-2.4.20-xattr-0.8.54-chaos     2003-06-23 10:39:21.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/Documentation/Configure.help       2003-07-12 15:34:44.000000000 -0600
+@@ -15253,6 +15253,39 @@ CONFIG_EXT2_FS
    be compiled as a module, and so this could be dangerous.  Most
    everyone wants to say Y here.
  
  Ext3 journalling file system support (EXPERIMENTAL)
  CONFIG_EXT3_FS
    This is the journalling version of the Second extended file system
-@@ -15258,6 +15291,39 @@ CONFIG_EXT3_FS
+@@ -15285,6 +15318,39 @@ CONFIG_EXT3_FS
    of your root partition (the one containing the directory /) cannot
    be compiled as a module, and so this may be dangerous.
  
  Journal Block Device support (JBD for ext3) (EXPERIMENTAL)
  CONFIG_JBD
    This is a generic journalling layer for block devices.  It is
---- linux-rh-2.4.20-8/arch/alpha/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2001-11-20 07:19:42.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/alpha/defconfig        2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/alpha/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2002-05-07 15:53:54.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/alpha/defconfig       2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  CONFIG_ALPHA=y
  # CONFIG_UID16 is not set
  # CONFIG_RWSEM_GENERIC_SPINLOCK is not set
---- linux-rh-2.4.20-8/arch/alpha/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos        2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/alpha/kernel/entry.S   2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/alpha/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos        2003-05-15 21:11:53.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/alpha/kernel/entry.S  2003-07-12 15:34:44.000000000 -0600
 @@ -1162,6 +1162,18 @@ sys_call_table:
        .quad sys_readahead
        .quad sys_ni_syscall                    /* 380, sys_security */
  
  /* Remember to update everything, kids.  */
  .ifne (. - sys_call_table) - (NR_SYSCALLS * 8)
---- linux-rh-2.4.20-8/arch/arm/defconfig~linux-2.4.20-xattr-0.8.54-chaos       2001-05-20 08:43:05.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/arm/defconfig  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/arm/defconfig~linux-2.4.20-xattr-0.8.54-chaos       2002-05-07 15:53:56.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/arm/defconfig 2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  CONFIG_ARM=y
  # CONFIG_EISA is not set
  # CONFIG_SBUS is not set
---- linux-rh-2.4.20-8/arch/arm/kernel/calls.S~linux-2.4.20-xattr-0.8.54-chaos  2002-08-03 08:39:42.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/arm/kernel/calls.S     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/arm/kernel/calls.S~linux-2.4.20-xattr-0.8.54-chaos  2002-09-25 11:09:16.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/arm/kernel/calls.S    2003-07-12 15:34:44.000000000 -0600
 @@ -240,18 +240,18 @@ __syscall_start:
                .long   SYMBOL_NAME(sys_ni_syscall) /* Security */
                .long   SYMBOL_NAME(sys_gettid)
                .long   SYMBOL_NAME(sys_tkill)
                /*
                 * Please check 2.5 _before_ adding calls here,
---- linux-rh-2.4.20-8/arch/i386/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/i386/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/i386/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-05-15 21:12:00.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/i386/defconfig        2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  CONFIG_X86=y
  CONFIG_ISA=y
  # CONFIG_SBUS is not set
---- linux-rh-2.4.20-8/arch/ia64/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-04-11 14:04:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/ia64/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/ia64/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-05-15 21:12:04.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/ia64/defconfig        2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  
  #
  # Code maturity level options
---- linux-rh-2.4.20-8/arch/m68k/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2000-06-20 03:56:08.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/m68k/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/m68k/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2002-05-07 15:53:55.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/m68k/defconfig        2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  CONFIG_UID16=y
  
  #
---- linux-rh-2.4.20-8/arch/mips/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2002-11-29 07:53:10.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/mips/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/mips/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-02-14 15:58:06.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/mips/defconfig        2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  CONFIG_MIPS=y
  CONFIG_MIPS32=y
  # CONFIG_MIPS64 is not set
---- linux-rh-2.4.20-8/arch/mips64/defconfig~linux-2.4.20-xattr-0.8.54-chaos    2002-11-29 07:53:10.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/mips64/defconfig       2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/mips64/defconfig~linux-2.4.20-xattr-0.8.54-chaos    2003-02-14 15:58:11.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/mips64/defconfig      2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  CONFIG_MIPS=y
  # CONFIG_MIPS32 is not set
  CONFIG_MIPS64=y
---- linux-rh-2.4.20-8/arch/ppc/defconfig~linux-2.4.20-xattr-0.8.54-chaos       2003-04-11 14:04:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/ppc/defconfig  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/ppc/defconfig~linux-2.4.20-xattr-0.8.54-chaos       2003-05-15 21:12:20.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/ppc/defconfig 2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,20 @@
  #
  # Automatically generated make config: don't edit
  # CONFIG_UID16 is not set
  # CONFIG_RWSEM_GENERIC_SPINLOCK is not set
  CONFIG_RWSEM_XCHGADD_ALGORITHM=y
---- linux-rh-2.4.20-8/arch/ppc64/kernel/misc.S~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/ppc64/kernel/misc.S    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/ppc64/kernel/misc.S~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:20.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/ppc64/kernel/misc.S   2003-07-12 15:34:44.000000000 -0600
 @@ -731,6 +731,7 @@ _GLOBAL(sys_call_table32)
        .llong .sys_gettid              /* 207 */
  #if 0 /* Reserved syscalls */
        .llong .sys_futex
  #endif
        .llong .sys_perfmonctl   /* Put this here for now ... */
---- linux-rh-2.4.20-8/arch/s390/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-02-14 15:58:20.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390/defconfig        2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  # CONFIG_ISA is not set
  # CONFIG_EISA is not set
  # CONFIG_MCA is not set
---- linux-rh-2.4.20-8/arch/s390/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390/kernel/entry.S    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:20.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390/kernel/entry.S   2003-07-12 15:34:44.000000000 -0600
 @@ -558,18 +558,18 @@ sys_call_table:
          .long  sys_fcntl64 
        .long  sys_ni_syscall
        .long  sys_gettid
        .long  sys_tkill
        .rept  255-237
---- linux-rh-2.4.20-8/arch/s390x/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390x/defconfig        2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390x/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2003-02-14 15:58:21.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390x/defconfig       2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  # CONFIG_ISA is not set
  # CONFIG_EISA is not set
  # CONFIG_MCA is not set
---- linux-rh-2.4.20-8/arch/s390x/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos        2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390x/kernel/entry.S   2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390x/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos        2003-02-14 15:58:21.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390x/kernel/entry.S  2003-07-12 15:34:44.000000000 -0600
 @@ -591,18 +591,18 @@ sys_call_table:
        .long  SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper)
        .long  SYSCALL(sys_ni_syscall,sys_ni_syscall)
        .long  SYSCALL(sys_gettid,sys_gettid)
        .long  SYSCALL(sys_tkill,sys_tkill)
        .rept  255-237
---- linux-rh-2.4.20-8/arch/s390x/kernel/wrapper32.S~linux-2.4.20-xattr-0.8.54-chaos    2002-02-26 03:37:56.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390x/kernel/wrapper32.S       2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390x/kernel/wrapper32.S~linux-2.4.20-xattr-0.8.54-chaos    2002-05-07 15:53:59.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390x/kernel/wrapper32.S      2003-07-12 15:34:44.000000000 -0600
 @@ -1091,3 +1091,95 @@ sys32_fstat64_wrapper:
        llgtr   %r3,%r3                 # struct stat64 *
        llgfr   %r4,%r4                 # long
 +      jg      sys_fremovexattr
 +
 +
---- linux-rh-2.4.20-8/arch/sparc/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2002-08-03 08:39:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc/defconfig        2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2002-09-25 11:10:50.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc/defconfig       2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  CONFIG_UID16=y
  CONFIG_HIGHMEM=y
  
---- linux-rh-2.4.20-8/arch/sparc/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos      2002-08-03 08:39:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc/kernel/systbls.S 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos      2002-09-25 11:10:52.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc/kernel/systbls.S        2003-07-12 15:34:44.000000000 -0600
 @@ -51,11 +51,11 @@ sys_call_table:
  /*150*/       .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64
  /*155*/       .long sys_fcntl64, sys_nis_syscall, sys_statfs, sys_fstatfs, sys_oldumount
  /*190*/       .long sys_init_module, sys_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
  /*195*/       .long sys_nis_syscall, sys_nis_syscall, sys_getppid, sparc_sigaction, sys_sgetmask
  /*200*/       .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, old_readdir
---- linux-rh-2.4.20-8/arch/sparc64/defconfig~linux-2.4.20-xattr-0.8.54-chaos   2003-04-11 14:04:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc64/defconfig      2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc64/defconfig~linux-2.4.20-xattr-0.8.54-chaos   2003-05-15 21:12:29.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc64/defconfig     2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  
  #
  # Code maturity level options
---- linux-rh-2.4.20-8/arch/sparc64/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos    2002-08-03 08:39:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc64/kernel/systbls.S       2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc64/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos    2002-09-25 11:10:55.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc64/kernel/systbls.S      2003-07-12 15:34:44.000000000 -0600
 @@ -52,11 +52,11 @@ sys_call_table32:
  /*150*/       .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64
        .word sys32_fcntl64, sys_nis_syscall, sys32_statfs, sys32_fstatfs, sys_oldumount
  /*190*/       .word sys_init_module, sparc64_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
        .word sys_nis_syscall, sys_nis_syscall, sys_getppid, sys_nis_syscall, sys_sgetmask
  /*200*/       .word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall
---- linux-rh-2.4.20-8/fs/Config.in~linux-2.4.20-xattr-0.8.54-chaos     2003-04-11 14:05:03.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/Config.in        2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/Config.in~linux-2.4.20-xattr-0.8.54-chaos     2003-05-15 21:14:24.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/Config.in       2003-07-12 15:34:44.000000000 -0600
 @@ -34,6 +34,11 @@ dep_mbool '  Debug Befs' CONFIG_BEFS_DEB
  dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL
  
  mainmenu_option next_comment
  comment 'Partition Types'
  source fs/partitions/Config.in
---- linux-rh-2.4.20-8/fs/Makefile~linux-2.4.20-xattr-0.8.54-chaos      2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/Makefile 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/Makefile~linux-2.4.20-xattr-0.8.54-chaos      2003-07-12 15:33:34.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/Makefile        2003-07-12 15:34:44.000000000 -0600
 @@ -84,6 +84,9 @@ obj-y                                += binfmt_script.o
  
  obj-$(CONFIG_BINFMT_ELF)      += binfmt_elf.o
  # persistent filesystems
  obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o))
  
---- linux-rh-2.4.20-8/fs/ext2/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2001-10-11 23:05:18.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/Makefile    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/Makefile   2003-07-12 15:34:44.000000000 -0600
 @@ -13,4 +13,8 @@ obj-y    := balloc.o bitmap.o dir.o file
                ioctl.o namei.o super.o symlink.o
  obj-m    := $(O_TARGET)
 +obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o
 +
  include $(TOPDIR)/Rules.make
---- linux-rh-2.4.20-8/fs/ext2/file.c~linux-2.4.20-xattr-0.8.54-chaos   2001-10-11 23:05:18.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/file.c      2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/file.c~linux-2.4.20-xattr-0.8.54-chaos   2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/file.c     2003-07-12 15:34:44.000000000 -0600
 @@ -20,6 +20,7 @@
  
  #include <linux/fs.h>
 +      listxattr:      ext2_listxattr,
 +      removexattr:    ext2_removexattr,
  };
---- linux-rh-2.4.20-8/fs/ext2/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/ialloc.c    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:59:09.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/ialloc.c   2003-07-12 15:34:44.000000000 -0600
 @@ -15,6 +15,7 @@
  #include <linux/config.h>
  #include <linux/fs.h>
                DQUOT_FREE_INODE(inode);
                DQUOT_DROP(inode);
        }
---- linux-rh-2.4.20-8/fs/ext2/inode.c~linux-2.4.20-xattr-0.8.54-chaos  2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/inode.c     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/inode.c~linux-2.4.20-xattr-0.8.54-chaos  2003-02-14 15:59:09.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/inode.c    2003-07-12 15:34:44.000000000 -0600
 @@ -39,6 +39,18 @@ MODULE_LICENSE("GPL");
  static int ext2_update_inode(struct inode * inode, int do_sync);
  
        brelse (bh);
        inode->i_attr_flags = 0;
        if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) {
---- linux-rh-2.4.20-8/fs/ext2/namei.c~linux-2.4.20-xattr-0.8.54-chaos  2001-10-04 13:57:36.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/namei.c     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/namei.c~linux-2.4.20-xattr-0.8.54-chaos  2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/namei.c    2003-07-12 15:34:44.000000000 -0600
 @@ -31,6 +31,7 @@
  
  #include <linux/fs.h>
 +      listxattr:      ext2_listxattr,
 +      removexattr:    ext2_removexattr,
  };
---- linux-rh-2.4.20-8/fs/ext2/super.c~linux-2.4.20-xattr-0.8.54-chaos  2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/super.c     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/super.c~linux-2.4.20-xattr-0.8.54-chaos  2003-02-14 15:59:09.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/super.c    2003-07-12 15:34:44.000000000 -0600
 @@ -21,6 +21,7 @@
  #include <linux/string.h>
  #include <linux/fs.h>
  }
  
  EXPORT_NO_SYMBOLS;
---- linux-rh-2.4.20-8/fs/ext2/symlink.c~linux-2.4.20-xattr-0.8.54-chaos        2000-09-28 04:41:33.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/symlink.c   2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/symlink.c~linux-2.4.20-xattr-0.8.54-chaos        2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/symlink.c  2003-07-12 15:34:44.000000000 -0600
 @@ -19,6 +19,7 @@
  
  #include <linux/fs.h>
 +      listxattr:      ext2_listxattr,
 +      removexattr:    ext2_removexattr,
  };
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/xattr.c     2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/xattr.c    2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,1212 @@
 +/*
 + * linux/fs/ext2/xattr.c
 +}
 +
 +#endif  /* CONFIG_EXT2_FS_XATTR_SHARING */
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/xattr_user.c        2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/xattr_user.c       2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,103 @@
 +/*
 + * linux/fs/ext2/xattr_user.c
 +      ext2_xattr_unregister(EXT2_XATTR_INDEX_USER,
 +                            &ext2_xattr_user_handler);
 +}
---- linux-rh-2.4.20-8/fs/ext3/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/Makefile    2003-05-07 17:45:13.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/Makefile   2003-07-12 15:34:44.000000000 -0600
 @@ -1,5 +1,5 @@
  #
 -# Makefile for the linux ext2-filesystem routines.
 +obj-$(CONFIG_EXT3_FS_XATTR_USER) += xattr_user.o
 +
  include $(TOPDIR)/Rules.make
---- linux-rh-2.4.20-8/fs/ext3/file.c~linux-2.4.20-xattr-0.8.54-chaos   2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/file.c      2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/file.c~linux-2.4.20-xattr-0.8.54-chaos   2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/file.c     2003-07-12 15:34:44.000000000 -0600
 @@ -23,6 +23,7 @@
  #include <linux/locks.h>
  #include <linux/jbd.h>
 +      removexattr:    ext3_removexattr,       /* BKL held */
  };
  
---- linux-rh-2.4.20-8/fs/ext3/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:48.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/ialloc.c    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:14:30.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/ialloc.c   2003-07-12 15:34:44.000000000 -0600
 @@ -17,6 +17,7 @@
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
        DQUOT_FREE_INODE(inode);
        DQUOT_DROP(inode);
  
---- linux-rh-2.4.20-8/fs/ext3/inode.c~linux-2.4.20-xattr-0.8.54-chaos  2003-04-11 14:04:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/inode.c     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/inode.c~linux-2.4.20-xattr-0.8.54-chaos  2003-05-15 21:14:30.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/inode.c    2003-07-12 15:34:44.000000000 -0600
 @@ -39,6 +39,18 @@
   */
  #undef SEARCH_FROM_ZERO
        /* inode->i_attr_flags = 0;                             unused */
        if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) {
                /* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */
---- linux-rh-2.4.20-8/fs/ext3/namei.c~linux-2.4.20-xattr-0.8.54-chaos  2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/namei.c     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/namei.c~linux-2.4.20-xattr-0.8.54-chaos  2003-07-12 15:33:43.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/namei.c    2003-07-12 15:34:44.000000000 -0600
 @@ -29,6 +29,7 @@
  #include <linux/sched.h>
  #include <linux/ext3_fs.h>
 +      removexattr:    ext3_removexattr,       /* BKL held */
 +};
 +
---- linux-rh-2.4.20-8/fs/ext3/super.c~linux-2.4.20-xattr-0.8.54-chaos  2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/super.c     2003-05-07 17:40:45.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/super.c~linux-2.4.20-xattr-0.8.54-chaos  2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/super.c    2003-07-12 15:34:44.000000000 -0600
 @@ -24,6 +24,7 @@
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
  MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
  MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
  MODULE_LICENSE("GPL");
---- linux-rh-2.4.20-8/fs/ext3/symlink.c~linux-2.4.20-xattr-0.8.54-chaos        2001-11-10 06:25:04.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/symlink.c   2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/symlink.c~linux-2.4.20-xattr-0.8.54-chaos        2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/symlink.c  2003-07-12 15:34:44.000000000 -0600
 @@ -20,6 +20,7 @@
  #include <linux/fs.h>
  #include <linux/jbd.h>
 +      listxattr:      ext3_listxattr,         /* BKL held */
 +      removexattr:    ext3_removexattr,       /* BKL held */
  };
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/xattr.c     2003-05-07 17:42:06.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/xattr.c    2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,1225 @@
 +/*
 + * linux/fs/ext3/xattr.c
 +}
 +
 +#endif  /* CONFIG_EXT3_FS_XATTR_SHARING */
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/xattr_user.c        2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/xattr_user.c       2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,111 @@
 +/*
 + * linux/fs/ext3/xattr_user.c
 +      ext3_xattr_unregister(EXT3_XATTR_INDEX_USER,
 +                            &ext3_xattr_user_handler);
 +}
---- linux-rh-2.4.20-8/fs/jfs/jfs_xattr.h~linux-2.4.20-xattr-0.8.54-chaos       2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/jfs/jfs_xattr.h  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/jfs/jfs_xattr.h~linux-2.4.20-xattr-0.8.54-chaos       2003-02-14 15:59:11.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/jfs/jfs_xattr.h 2003-07-12 15:34:44.000000000 -0600
 @@ -52,8 +52,10 @@ struct jfs_ea_list {
  #define       END_EALIST(ealist) \
        ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist)))
  extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t);
  extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t);
  extern ssize_t jfs_listxattr(struct dentry *, char *, size_t);
---- linux-rh-2.4.20-8/fs/jfs/xattr.c~linux-2.4.20-xattr-0.8.54-chaos   2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/jfs/xattr.c      2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/jfs/xattr.c~linux-2.4.20-xattr-0.8.54-chaos   2003-02-14 15:59:11.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/jfs/xattr.c     2003-07-12 15:34:44.000000000 -0600
 @@ -641,7 +641,7 @@ static int ea_put(struct inode *inode, s
  }
  
                 size_t value_len, int flags)
  {
        if (value == NULL) {    /* empty EA, do not remove */
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/mbcache.c        2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/mbcache.c       2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,648 @@
 +/*
 + * linux/fs/mbcache.c
 +module_init(init_mbcache)
 +module_exit(exit_mbcache)
 +
---- linux-rh-2.4.20-8/include/asm-arm/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-arm/unistd.h    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-arm/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:14:42.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-arm/unistd.h   2003-07-12 15:34:44.000000000 -0600
 @@ -244,7 +244,6 @@
  #define __NR_security                 (__NR_SYSCALL_BASE+223)
  #define __NR_gettid                   (__NR_SYSCALL_BASE+224)
  #define __NR_tkill                    (__NR_SYSCALL_BASE+238)
  /*
   * Please check 2.5 _before_ adding calls here,
---- linux-rh-2.4.20-8/include/asm-ppc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-ppc64/unistd.h  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-ppc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-09-25 11:13:42.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-ppc64/unistd.h 2003-07-12 15:34:44.000000000 -0600
 @@ -218,6 +218,7 @@
  #define __NR_gettid           207
  #if 0 /* Reserved syscalls */
  #define __NR_futex            221
  #endif
  
---- linux-rh-2.4.20-8/include/asm-s390/unistd.h~linux-2.4.20-xattr-0.8.54-chaos        2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-s390/unistd.h   2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-s390/unistd.h~linux-2.4.20-xattr-0.8.54-chaos        2002-09-25 11:13:44.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-s390/unistd.h  2003-07-12 15:34:44.000000000 -0600
 @@ -212,9 +212,18 @@
  #define __NR_madvise            219
  #define __NR_getdents64               220
  #define __NR_gettid           236
  #define __NR_tkill            237
  
---- linux-rh-2.4.20-8/include/asm-s390x/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-s390x/unistd.h  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-s390x/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-09-25 11:13:45.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-s390x/unistd.h 2003-07-12 15:34:44.000000000 -0600
 @@ -180,9 +180,18 @@
  #define __NR_pivot_root         217
  #define __NR_mincore            218
  #define __NR_gettid           236
  #define __NR_tkill            237
  
---- linux-rh-2.4.20-8/include/asm-sparc/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-sparc/unistd.h  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-sparc/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-09-25 11:13:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-sparc/unistd.h 2003-07-12 15:34:44.000000000 -0600
 @@ -184,24 +184,24 @@
  /* #define __NR_exportfs        166    SunOS Specific                              */
  #define __NR_mount              167 /* Common                                      */
  #define __NR_tkill              187 /* SunOS: fpathconf                            */
  /* #define __NR_sysconf         188    SunOS Specific                              */
  #define __NR_uname              189 /* Linux Specific                              */
---- linux-rh-2.4.20-8/include/asm-sparc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos     2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-sparc64/unistd.h        2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-sparc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos     2002-09-25 11:13:48.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-sparc64/unistd.h       2003-07-12 15:34:44.000000000 -0600
 @@ -184,24 +184,24 @@
  /* #define __NR_exportfs        166    SunOS Specific                              */
  #define __NR_mount              167 /* Common                                      */
  #define __NR_tkill              187 /* SunOS: fpathconf                            */
  /* #define __NR_sysconf         188    SunOS Specific                              */
  #define __NR_uname              189 /* Linux Specific                              */
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/cache_def.h   2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/cache_def.h  2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,15 @@
 +/*
 + * linux/cache_def.h
 +
 +extern void register_cache(struct cache_definition *);
 +extern void unregister_cache(struct cache_definition *);
---- linux-rh-2.4.20-8/include/linux/errno.h~linux-2.4.20-xattr-0.8.54-chaos    2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/errno.h       2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/errno.h~linux-2.4.20-xattr-0.8.54-chaos    2003-05-15 21:15:06.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/errno.h      2003-07-12 15:34:44.000000000 -0600
 @@ -26,4 +26,8 @@
  
  #endif
 +#define ENOTSUP EOPNOTSUPP    /* Operation not supported */
 +
  #endif
---- linux-rh-2.4.20-8/include/linux/ext2_fs.h~linux-2.4.20-xattr-0.8.54-chaos  2003-04-12 15:46:42.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext2_fs.h     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/ext2_fs.h~linux-2.4.20-xattr-0.8.54-chaos  2003-06-24 11:31:16.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext2_fs.h    2003-07-12 15:34:44.000000000 -0600
 @@ -57,8 +57,6 @@
   */
  #define       EXT2_BAD_INO             1      /* Bad blocks inode */
  #define EXT2_FEATURE_INCOMPAT_SUPP    EXT2_FEATURE_INCOMPAT_FILETYPE
  #define EXT2_FEATURE_RO_COMPAT_SUPP   (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
                                         EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
-@@ -623,8 +600,10 @@ extern struct address_space_operations e
+@@ -624,8 +601,10 @@ extern struct address_space_operations e
  
  /* namei.c */
  extern struct inode_operations ext2_dir_inode_operations;
  extern struct inode_operations ext2_fast_symlink_inode_operations;
  
  #endif        /* __KERNEL__ */
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext2_xattr.h  2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext2_xattr.h 2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,157 @@
 +/*
 +  File: linux/ext2_xattr.h
 +
 +#endif  /* __KERNEL__ */
 +
---- linux-rh-2.4.20-8/include/linux/ext3_fs.h~linux-2.4.20-xattr-0.8.54-chaos  2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext3_fs.h     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/ext3_fs.h~linux-2.4.20-xattr-0.8.54-chaos  2003-07-12 15:33:41.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext3_fs.h    2003-07-12 15:34:44.000000000 -0600
 @@ -63,8 +63,6 @@
   */
  #define       EXT3_BAD_INO             1      /* Bad blocks inode */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
-@@ -520,7 +496,7 @@ struct ext3_super_block {
+@@ -521,7 +497,7 @@ struct ext3_super_block {
  #define EXT3_FEATURE_INCOMPAT_RECOVER         0x0004 /* Needs recovery */
  #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV     0x0008 /* Journal device */
  
  #define EXT3_FEATURE_INCOMPAT_SUPP    (EXT3_FEATURE_INCOMPAT_FILETYPE| \
                                         EXT3_FEATURE_INCOMPAT_RECOVER)
  #define EXT3_FEATURE_RO_COMPAT_SUPP   (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
-@@ -703,6 +679,7 @@ extern void ext3_check_inodes_bitmap (st
+@@ -704,6 +680,7 @@ extern void ext3_check_inodes_bitmap (st
  extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
  
  /* inode.c */
  extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
  extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
  
-@@ -771,8 +748,10 @@ extern struct address_space_operations e
+@@ -773,8 +750,10 @@ extern struct address_space_operations e
  
  /* namei.c */
  extern struct inode_operations ext3_dir_inode_operations;
  extern struct inode_operations ext3_fast_symlink_inode_operations;
  
  
---- linux-rh-2.4.20-8/include/linux/ext3_jbd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext3_jbd.h    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/ext3_jbd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext3_jbd.h   2003-07-12 15:34:44.000000000 -0600
 @@ -30,13 +30,19 @@
  
  #define EXT3_SINGLEDATA_TRANS_BLOCKS  8U
  
  extern int ext3_writepage_trans_blocks(struct inode *inode);
  
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext3_xattr.h  2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext3_xattr.h 2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,157 @@
 +/*
 +  File: linux/ext3_xattr.h
 +
 +#endif  /* __KERNEL__ */
 +
---- linux-rh-2.4.20-8/include/linux/fs.h~linux-2.4.20-xattr-0.8.54-chaos       2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/fs.h  2003-05-07 17:34:25.000000000 +0800
-@@ -915,7 +915,7 @@ struct inode_operations {
+--- kernel-2.4.20-6chaos_18_7/include/linux/fs.h~linux-2.4.20-xattr-0.8.54-chaos       2003-07-12 15:31:35.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/fs.h 2003-07-12 15:34:44.000000000 -0600
+@@ -914,7 +914,7 @@ struct inode_operations {
        int (*setattr) (struct dentry *, struct iattr *);
-       int (*setattr_raw) (struct inode *, struct iattr *);
+       int (*setattr_raw) (struct inode *, struct iattr *);
        int (*getattr) (struct dentry *, struct iattr *);
 -      int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
 +      int (*setxattr) (struct dentry *, const char *, const void *, size_t, int);
        ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
        ssize_t (*listxattr) (struct dentry *, char *, size_t);
        int (*removexattr) (struct dentry *, const char *);
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/mbcache.h     2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/mbcache.h    2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,69 @@
 +/*
 +  File: linux/mbcache.h
 +struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int,
 +                                              kdev_t, unsigned int);
 +#endif
---- linux-rh-2.4.20-8/kernel/ksyms.c~linux-2.4.20-xattr-0.8.54-chaos   2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/kernel/ksyms.c      2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/kernel/ksyms.c~linux-2.4.20-xattr-0.8.54-chaos   2003-07-12 15:14:02.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/kernel/ksyms.c     2003-07-12 15:35:19.000000000 -0600
 @@ -12,6 +12,7 @@
  #define __KERNEL_SYSCALLS__
  #include <linux/config.h>
  #include <linux/smp.h>
  #include <linux/module.h>
  #include <linux/blkdev.h>
-@@ -107,6 +108,7 @@ EXPORT_SYMBOL(exit_mm);
+@@ -106,6 +107,7 @@ EXPORT_SYMBOL(do_brk);
+ EXPORT_SYMBOL(exit_mm);
  EXPORT_SYMBOL(exit_files);
  EXPORT_SYMBOL(exit_fs);
- EXPORT_SYMBOL(exit_sighand);
 +EXPORT_SYMBOL(copy_fs_struct);
+ EXPORT_SYMBOL(exit_sighand);
+ EXPORT_SYMBOL_GPL(make_pages_present);
  
- /* internal kernel memory management */
- EXPORT_SYMBOL(_alloc_pages);
-@@ -125,6 +127,8 @@ EXPORT_SYMBOL(kmem_cache_alloc);
+@@ -126,6 +128,8 @@ EXPORT_SYMBOL(kmem_cache_alloc);
  EXPORT_SYMBOL(kmem_cache_free);
  EXPORT_SYMBOL(kmem_cache_validate);
  EXPORT_SYMBOL(kmem_cache_size);
  EXPORT_SYMBOL(kmalloc);
  EXPORT_SYMBOL(kfree);
  EXPORT_SYMBOL(vfree);
---- linux-rh-2.4.20-8/mm/vmscan.c~linux-2.4.20-xattr-0.8.54-chaos      2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/mm/vmscan.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/mm/vmscan.c~linux-2.4.20-xattr-0.8.54-chaos      2003-07-12 15:33:34.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/mm/vmscan.c        2003-07-12 15:34:44.000000000 -0600
 @@ -21,6 +21,7 @@
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
  #ifdef CONFIG_QUOTA
        ret += shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
  #endif
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-root/fs/ext3/ext3-exports.c  2003-05-05 18:19:11.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/ext3-exports.c     2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,13 @@
 +#include <linux/config.h>
 +#include <linux/module.h>
index 78855ac..c987485 100644 (file)
@@ -7,6 +7,6 @@
 --- /dev/null  Fri Aug 30 17:31:37 2002
 +++ linux-2.4.18-18.8.0-l12-braam/include/linux/lustre_version.h       Thu Feb 13 07:58:33 2003
 @@ -0,0 +1 @@
-+#define LUSTRE_KERNEL_VERSION 19
++#define LUSTRE_KERNEL_VERSION 21
 
 _
index 710cdc9..7aa5941 100644 (file)
@@ -1,7 +1,7 @@
  0 files changed
 
---- linux-2.4.20-rh/fs/dcache.c~vfs_intent-2.4.20-rh   2003-04-11 14:04:58.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/dcache.c   2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/dcache.c~vfs_intent-2.4.20-rh      2003-07-17 08:32:59.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/dcache.c   2003-07-17 08:35:22.000000000 -0700
 @@ -186,6 +186,13 @@ int d_invalidate(struct dentry * dentry)
                spin_unlock(&dcache_lock);
                return 0;
        /*
         * Check whether to do a partial shrink_dcache
         * to get rid of unused child entries.
-@@ -624,6 +631,7 @@ struct dentry * d_alloc(struct dentry * 
-       dentry->d_fsdata = NULL;
-       dentry->d_extra_attributes = NULL;
-       dentry->d_mounted = 0;
-+      dentry->d_it = NULL;
-       dentry->d_cookie = NULL;
-       INIT_LIST_HEAD(&dentry->d_hash);
-       INIT_LIST_HEAD(&dentry->d_lru);
-@@ -839,13 +847,19 @@ void d_delete(struct dentry * dentry)
+@@ -839,13 +846,19 @@ void d_delete(struct dentry * dentry)
   * Adds a dentry to the hash according to its name.
   */
   
  }
  
  #define do_switch(x,y) do { \
---- linux-2.4.20-rh/fs/namei.c~vfs_intent-2.4.20-rh    2003-04-11 14:04:57.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/namei.c    2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/namei.c~vfs_intent-2.4.20-rh       2003-07-17 08:32:47.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/namei.c    2003-07-17 08:35:22.000000000 -0700
 @@ -94,6 +94,13 @@
   * XEmacs seems to be relying on it...
   */
  
-+void intent_release(struct dentry *de, struct lookup_intent *it)
++void intent_release(struct lookup_intent *it)
 +{
-+      if (it && de->d_op && de->d_op->d_intent_release)
-+              de->d_op->d_intent_release(de, it);
++      if (it && it->it_op_release)
++              it->it_op_release(it);
 +
 +}
 +
@@ -73,8 +65,8 @@
  {
        struct dentry * dentry = d_lookup(parent, name);
  
-+      if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
-+              if (!dentry->d_op->d_revalidate2(dentry, flags, it) &&
++      if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
++              if (!dentry->d_op->d_revalidate_it(dentry, flags, it) &&
 +                  !d_invalidate(dentry)) {
 +                      dput(dentry);
 +                      dentry = NULL;
                result = ERR_PTR(-ENOMEM);
                if (dentry) {
                        lock_kernel();
-+                      if (dir->i_op->lookup2)
-+                              result = dir->i_op->lookup2(dir, dentry, it);
++                      if (dir->i_op->lookup_it)
++                              result = dir->i_op->lookup_it(dir, dentry, it, flags);
 +                      else
                        result = dir->i_op->lookup(dir, dentry);
                        unlock_kernel();
                        dput(result);
                        result = ERR_PTR(-ENOENT);
                }
-+      } else if (result->d_op && result->d_op->d_revalidate2) {
-+              if (!result->d_op->d_revalidate2(result, flags, it) &&
++      } else if (result->d_op && result->d_op->d_revalidate_it) {
++              if (!result->d_op->d_revalidate_it(result, flags, it) &&
 +                  !d_invalidate(result)) {
 +                      dput(result);
 +                      goto again;
  {
        int err;
        if (current->link_count >= max_recursive_link)
-@@ -348,10 +377,21 @@ static inline int do_follow_link(struct 
+@@ -348,10 +377,18 @@ static inline int do_follow_link(struct 
        current->link_count++;
        current->total_link_count++;
        UPDATE_ATIME(dentry->d_inode);
--      err = dentry->d_inode->i_op->follow_link(dentry, nd);
 +      nd->it = it;
-+      if (dentry->d_inode->i_op->follow_link2)
-+              err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+      else
-+              err = dentry->d_inode->i_op->follow_link(dentry, nd);
+       err = dentry->d_inode->i_op->follow_link(dentry, nd);
 +      if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
 +              /* vfs_follow_link was never called */
-+              intent_release(dentry, it);
++              intent_release(it);
 +              path_release(nd);
 +              err = -ENOLINK;
 +      }
        current->link_count--;
        return err;
  loop:
-+      intent_release(dentry, it);
++      intent_release(it);
        path_release(nd);
        return -ELOOP;
  }
-@@ -381,15 +421,26 @@ int follow_up(struct vfsmount **mnt, str
+@@ -381,15 +418,26 @@ int follow_up(struct vfsmount **mnt, str
        return __follow_up(mnt, dentry);
  }
  
 +                      opc = it->it_op;
 +                      mode = it->it_mode;
 +              }
-+              intent_release(*dentry, it);
++              intent_release(it);
 +              if (it) {
 +                      it->it_op = opc;
 +                      it->it_mode = mode;
                dput(*dentry);
                mntput(mounted->mnt_parent);
                *dentry = dget(mounted->mnt_root);
-@@ -401,7 +452,7 @@ static inline int __follow_down(struct v
+@@ -401,7 +449,7 @@ static inline int __follow_down(struct v
  
  int follow_down(struct vfsmount **mnt, struct dentry **dentry)
  {
  }
   
  static inline void follow_dotdot(struct nameidata *nd)
-@@ -437,7 +488,7 @@ static inline void follow_dotdot(struct 
+@@ -437,7 +485,7 @@ static inline void follow_dotdot(struct 
                mntput(nd->mnt);
                nd->mnt = parent;
        }
                ;
  }
  
-@@ -449,7 +500,8 @@ static inline void follow_dotdot(struct 
+@@ -449,7 +497,8 @@ static inline void follow_dotdot(struct 
   *
   * We expect 'base' to be positive and a directory.
   */
  {
        struct dentry *dentry;
        struct inode *inode;
-@@ -526,18 +578,18 @@ int link_path_walk(const char * name, st
+@@ -526,19 +575,18 @@ int link_path_walk(const char * name, st
                                break;
                }
                /* This does the actual lookups.. */
 -              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
-+              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
++              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
                if (!dentry) {
                        err = -EWOULDBLOCKIO;
                        if (atomic)
                                break;
 -                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
-+                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
++                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
                        err = PTR_ERR(dentry);
                        if (IS_ERR(dentry))
                                break;
                }
                /* Check mountpoints.. */
 -              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
-+              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL))
-                       ;
+-                      ;
++              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL));
  
                err = -ENOENT;
-@@ -548,8 +600,8 @@ int link_path_walk(const char * name, st
-               if (!inode->i_op)
+               inode = dentry->d_inode;
+@@ -549,7 +597,7 @@ int link_path_walk(const char * name, st
                        goto out_dput;
  
--              if (inode->i_op->follow_link) {
+               if (inode->i_op->follow_link) {
 -                      err = do_follow_link(dentry, nd);
-+              if (inode->i_op->follow_link || inode->i_op->follow_link2) {
 +                      err = do_follow_link(dentry, nd, NULL);
                        dput(dentry);
                        if (err)
                                goto return_err;
-@@ -565,7 +617,7 @@ int link_path_walk(const char * name, st
+@@ -565,7 +613,7 @@ int link_path_walk(const char * name, st
                        nd->dentry = dentry;
                }
                err = -ENOTDIR; 
 -              if (!inode->i_op->lookup)
-+              if (!inode->i_op->lookup && !inode->i_op->lookup2)
++              if (!inode->i_op->lookup && !inode->i_op->lookup_it)
                        break;
                continue;
                /* here ends the main loop */
-@@ -592,22 +644,23 @@ last_component:
+@@ -592,22 +640,22 @@ last_component:
                        if (err < 0)
                                break;
                }
 -              dentry = cached_lookup(nd->dentry, &this, 0);
-+              dentry = cached_lookup(nd->dentry, &this, 0, it);
++              dentry = cached_lookup(nd->dentry, &this, 0, it);
                if (!dentry) {
                        err = -EWOULDBLOCKIO;
                        if (atomic)
                                break;
 -                      dentry = real_lookup(nd->dentry, &this, 0);
-+                      dentry = real_lookup(nd->dentry, &this, 0, it);
++                      dentry = real_lookup(nd->dentry, &this, 0, it);
                        err = PTR_ERR(dentry);
                        if (IS_ERR(dentry))
                                break;
                }
 -              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
-+              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it))
++              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it))
                        ;
                inode = dentry->d_inode;
                if ((lookup_flags & LOOKUP_FOLLOW)
--                  && inode && inode->i_op && inode->i_op->follow_link) {
+                   && inode && inode->i_op && inode->i_op->follow_link) {
 -                      err = do_follow_link(dentry, nd);
-+                  && inode && inode->i_op &&
-+                  (inode->i_op->follow_link || inode->i_op->follow_link2)) {
-+                      err = do_follow_link(dentry, nd, it);
++                      err = do_follow_link(dentry, nd, it);
                        dput(dentry);
                        if (err)
                                goto return_err;
-@@ -621,7 +674,8 @@ last_component:
+@@ -621,7 +669,8 @@ last_component:
                        goto no_inode;
                if (lookup_flags & LOOKUP_DIRECTORY) {
                        err = -ENOTDIR; 
 -                      if (!inode->i_op || !inode->i_op->lookup)
 +                      if (!inode->i_op ||
-+                          (!inode->i_op->lookup && !inode->i_op->lookup2))
++                          (!inode->i_op->lookup && !inode->i_op->lookup_it))
                                break;
                }
                goto return_base;
-@@ -645,6 +699,23 @@ return_reval:
+@@ -645,6 +694,23 @@ return_reval:
                 * Check the cached dentry for staleness.
                 */
                dentry = nd->dentry;
-+        revalidate_again:
-+              if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
++      revalidate_again:
++              if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
 +                      err = -ESTALE;
-+                      if (!dentry->d_op->d_revalidate2(dentry, 0, it)) {
-+                                struct dentry *new;
-+                                err = permission(dentry->d_parent->d_inode, 
-+                                                 MAY_EXEC);
-+                                if (err)
-+                                        break;
-+                                new = real_lookup(dentry->d_parent,
-+                                                  &dentry->d_name, 0, NULL);
++                      if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
++                              struct dentry *new;
++                              err = permission(dentry->d_parent->d_inode,
++                                               MAY_EXEC);
++                              if (err)
++                                      break;
++                              new = real_lookup(dentry->d_parent,
++                                                &dentry->d_name, 0, NULL);
 +                              d_invalidate(dentry);
-+                                dput(dentry);
-+                                dentry = new;
-+                                goto revalidate_again;
-+                        }
++                              dput(dentry);
++                              dentry = new;
++                              goto revalidate_again;
++                      }
 +              } else
                if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
                        err = -ESTALE;
                        if (!dentry->d_op->d_revalidate(dentry, 0)) {
-@@ -658,15 +729,28 @@ out_dput:
+@@ -658,15 +724,28 @@ out_dput:
                dput(dentry);
                break;
        }
 +      if (err)
-+              intent_release(nd->dentry, it);
++              intent_release(it);
        path_release(nd);
  return_err:
        return err;
  }
  
  /* SMP-safe */
-@@ -751,6 +835,17 @@ walk_init_root(const char *name, struct 
+@@ -751,6 +830,17 @@ walk_init_root(const char *name, struct 
  }
  
  /* SMP-safe */
  int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
  {
        int error = 0;
-@@ -765,6 +860,7 @@ int path_init(const char *name, unsigned
+@@ -765,6 +855,7 @@ int path_init(const char *name, unsigned
  {
        nd->last_type = LAST_ROOT; /* if there are only slashes... */
        nd->flags = flags;
        if (*name=='/')
                return walk_init_root(name,nd);
        read_lock(&current->fs->lock);
-@@ -779,7 +875,8 @@ int path_init(const char *name, unsigned
+@@ -779,7 +870,8 @@ int path_init(const char *name, unsigned
   * needs parent already locked. Doesn't follow mounts.
   * SMP-safe.
   */
  {
        struct dentry * dentry;
        struct inode *inode;
-@@ -802,13 +899,16 @@ struct dentry * lookup_hash(struct qstr 
+@@ -802,13 +894,16 @@ struct dentry * lookup_hash(struct qstr 
                        goto out;
        }
  
                if (!new)
                        goto out;
                lock_kernel();
-+              if (inode->i_op->lookup2)
-+                      dentry = inode->i_op->lookup2(inode, new, it);
++              if (inode->i_op->lookup_it)
++                      dentry = inode->i_op->lookup_it(inode, new, it, 0);
 +              else
                dentry = inode->i_op->lookup(inode, new);
                unlock_kernel();
                if (!dentry)
-@@ -820,6 +920,12 @@ out:
+@@ -820,6 +915,12 @@ out:
        return dentry;
  }
  
  /* SMP-safe */
  struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
  {
-@@ -841,7 +947,7 @@ struct dentry * lookup_one_len(const cha
+@@ -841,7 +942,7 @@ struct dentry * lookup_one_len(const cha
        }
        this.hash = end_name_hash(hash);
  
  access:
        return ERR_PTR(-EACCES);
  }
-@@ -872,6 +978,23 @@ int __user_walk(const char *name, unsign
+@@ -872,6 +973,23 @@ int __user_walk(const char *name, unsign
        return err;
  }
  
  /*
   * It's inline, so penalty for filesystems that don't use sticky bit is
   * minimal.
-@@ -1010,7 +1133,8 @@ exit_lock:
+@@ -969,7 +1087,8 @@ static inline int lookup_flags(unsigned 
+       return retval;
+ }
+-int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode,
++                       struct lookup_intent *it)
+ {
+       int error;
+@@ -982,12 +1101,15 @@ int vfs_create(struct inode *dir, struct
+               goto exit_lock;
+       error = -EACCES;        /* shouldn't it be ENOSYS? */
+-      if (!dir->i_op || !dir->i_op->create)
++      if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it))
+               goto exit_lock;
+       DQUOT_INIT(dir);
+       lock_kernel();
+-      error = dir->i_op->create(dir, dentry, mode);
++      if (dir->i_op->create_it)
++              error = dir->i_op->create_it(dir, dentry, mode, it);
++      else
++              error = dir->i_op->create(dir, dentry, mode);
+       unlock_kernel();
+ exit_lock:
+       up(&dir->i_zombie);
+@@ -996,6 +1118,11 @@ exit_lock:
+       return error;
+ }
++int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++{
++      return vfs_create_it(dir, dentry, mode, NULL);
++}
++
+ /*
+  *    open_namei()
+  *
+@@ -1010,7 +1137,8 @@ exit_lock:
   * for symlinks (where the permissions are checked later).
   * SMP-safe
   */
  {
        int acc_mode, error = 0;
        struct inode *inode;
-@@ -1024,7 +1148,7 @@ int open_namei(const char * pathname, in
+@@ -1024,7 +1152,7 @@ int open_namei(const char * pathname, in
         * The simplest case - just a plain lookup.
         */
        if (!(flag & O_CREAT)) {
                if (error)
                        return error;
                dentry = nd->dentry;
-@@ -1034,6 +1158,10 @@ int open_namei(const char * pathname, in
+@@ -1034,6 +1162,10 @@ int open_namei(const char * pathname, in
        /*
         * Create - we need to know the parent.
         */
        error = path_lookup(pathname, LOOKUP_PARENT, nd);
        if (error)
                return error;
-@@ -1049,7 +1177,7 @@ int open_namei(const char * pathname, in
+@@ -1049,7 +1181,7 @@ int open_namei(const char * pathname, in
  
        dir = nd->dentry;
        down(&dir->d_inode->i_sem);
  
  do_last:
        error = PTR_ERR(dentry);
-@@ -1058,6 +1186,7 @@ do_last:
+@@ -1058,10 +1190,11 @@ do_last:
                goto exit;
        }
  
 +      it->it_mode = mode;
        /* Negative dentry, just create the file */
        if (!dentry->d_inode) {
-               error = vfs_create(dir->d_inode, dentry,
-@@ -1086,12 +1215,13 @@ do_last:
+-              error = vfs_create(dir->d_inode, dentry,
+-                                 mode & ~current->fs->umask);
++              error = vfs_create_it(dir->d_inode, dentry,
++                                 mode & ~current->fs->umask, it);
+               up(&dir->d_inode->i_sem);
+               dput(nd->dentry);
+               nd->dentry = dentry;
+@@ -1086,7 +1219,7 @@ do_last:
                error = -ELOOP;
                if (flag & O_NOFOLLOW)
                        goto exit_dput;
        }
        error = -ENOENT;
        if (!dentry->d_inode)
-               goto exit_dput;
--      if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
-+      if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link ||
-+                                    dentry->d_inode->i_op->follow_link2))
-               goto do_link;
-       dput(nd->dentry);
-@@ -1165,7 +1295,7 @@ ok:
+@@ -1165,7 +1298,7 @@ ok:
                if (!error) {
                        DQUOT_INIT(inode);
                        
                }
                put_write_access(inode);
                if (error)
-@@ -1177,8 +1307,10 @@ ok:
+@@ -1177,8 +1310,10 @@ ok:
        return 0;
  
  exit_dput:
-+      intent_release(dentry, it);
++      intent_release(it);
        dput(dentry);
  exit:
-+      intent_release(nd->dentry, it);
++      intent_release(it);
        path_release(nd);
        return error;
  
-@@ -1197,7 +1329,19 @@ do_link:
+@@ -1197,7 +1332,16 @@ do_link:
         * are done. Procfs-like symlinks just set LAST_BIND.
         */
        UPDATE_ATIME(dentry->d_inode);
--      error = dentry->d_inode->i_op->follow_link(dentry, nd);
 +      nd->it = it;
-+      if (dentry->d_inode->i_op->follow_link2)
-+              error = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+      else
-+              error = dentry->d_inode->i_op->follow_link(dentry, nd);
+       error = dentry->d_inode->i_op->follow_link(dentry, nd);
 +      if (error) {
-+              intent_release(dentry, it);
++              intent_release(it);
 +      } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
 +              /* vfs_follow_link was never called */
-+              intent_release(dentry, it);
++              intent_release(it);
 +              path_release(nd);
 +              error = -ENOLINK;
 +      }
        if (IS_ERR(dentry))
                goto fail;
        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1289,7 +1440,19 @@ asmlinkage long sys_mknod(const char * f
+@@ -1289,7 +1440,16 @@ asmlinkage long sys_mknod(const char * f
        error = path_lookup(tmp, LOOKUP_PARENT, &nd);
        if (error)
                goto out;
 -      dentry = lookup_create(&nd, 0);
 +
-+      if (nd.dentry->d_inode->i_op->mknod2) {
++      if (nd.dentry->d_inode->i_op->mknod_raw) {
 +              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->mknod2(nd.dentry->d_inode,
-+                                 nd.last.name,
-+                                 nd.last.len,
-+                                 mode, dev);
++              error = op->mknod_raw(&nd, mode, dev);
 +              /* the file system wants to use normal vfs path now */
 +              if (error != -EOPNOTSUPP)
 +                      goto out2;
        error = PTR_ERR(dentry);
  
        mode &= ~current->fs->umask;
-@@ -1310,6 +1473,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1310,6 +1470,7 @@ asmlinkage long sys_mknod(const char * f
                dput(dentry);
        }
        up(&nd.dentry->d_inode->i_sem);
        path_release(&nd);
  out:
        putname(tmp);
-@@ -1357,7 +1521,17 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1357,7 +1518,14 @@ asmlinkage long sys_mkdir(const char * p
                error = path_lookup(tmp, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 1);
-+              if (nd.dentry->d_inode->i_op->mkdir2) {
++              if (nd.dentry->d_inode->i_op->mkdir_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->mkdir2(nd.dentry->d_inode,
-+                                         nd.last.name,
-+                                         nd.last.len,
-+                                         mode);
++                      error = op->mkdir_raw(&nd, mode);
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out2;
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
                        error = vfs_mkdir(nd.dentry->d_inode, dentry,
-@@ -1365,6 +1539,7 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1365,6 +1533,7 @@ asmlinkage long sys_mkdir(const char * p
                        dput(dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
                path_release(&nd);
  out:
                putname(tmp);
-@@ -1465,8 +1640,33 @@ asmlinkage long sys_rmdir(const char * p
+@@ -1465,8 +1634,16 @@ asmlinkage long sys_rmdir(const char * p
                        error = -EBUSY;
                        goto exit1;
        }
-+      if (nd.dentry->d_inode->i_op->rmdir2) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              struct dentry *last;
-+
-+              down(&nd.dentry->d_inode->i_sem);
-+              last = lookup_hash_it(&nd.last, nd.dentry, NULL);
-+              up(&nd.dentry->d_inode->i_sem);
-+              if (IS_ERR(last)) {
-+                      error = PTR_ERR(last);
-+                      goto exit1;
-+              }
-+              if (d_mountpoint(last)) {
-+                      dput(last);
-+                      error = -EBUSY;
-+                      goto exit1;
-+              }
-+              dput(last);
++      if (nd.dentry->d_inode->i_op->rmdir_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
 +
-+              error = op->rmdir2(nd.dentry->d_inode,
-+                                 nd.last.name,
-+                                 nd.last.len);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++              error = op->rmdir_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
 +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                error = vfs_rmdir(nd.dentry->d_inode, dentry);
-@@ -1524,8 +1724,17 @@ asmlinkage long sys_unlink(const char * 
+@@ -1524,8 +1701,15 @@ asmlinkage long sys_unlink(const char * 
        error = -EISDIR;
        if (nd.last_type != LAST_NORM)
                goto exit1;
-+      if (nd.dentry->d_inode->i_op->unlink2) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->unlink2(nd.dentry->d_inode,
-+                                  nd.last.name,
-+                                  nd.last.len);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++      if (nd.dentry->d_inode->i_op->unlink_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->unlink_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
 +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                /* Why not before? Because we want correct error value */
-@@ -1592,15 +1801,26 @@ asmlinkage long sys_symlink(const char *
+@@ -1592,15 +1776,23 @@ asmlinkage long sys_symlink(const char *
                error = path_lookup(to, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 0);
-+              if (nd.dentry->d_inode->i_op->symlink2) {
++              if (nd.dentry->d_inode->i_op->symlink_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->symlink2(nd.dentry->d_inode,
-+                                           nd.last.name,
-+                                           nd.last.len,
-+                                           from);
++                      error = op->symlink_raw(&nd, from);
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out2;
                putname(to);
        }
        putname(from);
-@@ -1676,7 +1896,17 @@ asmlinkage long sys_link(const char * ol
+@@ -1676,7 +1868,14 @@ asmlinkage long sys_link(const char * ol
                error = -EXDEV;
                if (old_nd.mnt != nd.mnt)
                        goto out_release;
 -              new_dentry = lookup_create(&nd, 0);
-+              if (nd.dentry->d_inode->i_op->link2) {
++              if (nd.dentry->d_inode->i_op->link_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->link2(old_nd.dentry->d_inode,
-+                                        nd.dentry->d_inode,
-+                                        nd.last.name,
-+                                        nd.last.len);
++                      error = op->link_raw(&old_nd, &nd);
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out_release;
                error = PTR_ERR(new_dentry);
                if (!IS_ERR(new_dentry)) {
                        error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
-@@ -1720,7 +1950,8 @@ exit:
+@@ -1720,7 +1919,7 @@ exit:
   *       locking].
   */
  int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 -             struct inode *new_dir, struct dentry *new_dentry)
-+                 struct inode *new_dir, struct dentry *new_dentry,
-+                 struct lookup_intent *it)
++                 struct inode *new_dir, struct dentry *new_dentry)
  {
        int error;
        struct inode *target;
-@@ -1778,6 +2009,7 @@ int vfs_rename_dir(struct inode *old_dir
-               error = -EBUSY;
-       else 
-               error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+      intent_release(new_dentry, it);
-       if (target) {
-               if (!error)
-                       target->i_flags |= S_DEAD;
-@@ -1799,7 +2031,8 @@ out_unlock:
+@@ -1799,7 +1998,7 @@ out_unlock:
  }
  
  int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 -             struct inode *new_dir, struct dentry *new_dentry)
-+                   struct inode *new_dir, struct dentry *new_dentry,
-+                   struct lookup_intent *it)
++                   struct inode *new_dir, struct dentry *new_dentry)
  {
        int error;
  
-@@ -1830,6 +2063,7 @@ int vfs_rename_other(struct inode *old_d
-               error = -EBUSY;
-       else
-               error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+      intent_release(new_dentry, it);
-       double_up(&old_dir->i_zombie, &new_dir->i_zombie);
-       if (error)
-               return error;
-@@ -1841,13 +2075,14 @@ int vfs_rename_other(struct inode *old_d
- }
- int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
--             struct inode *new_dir, struct dentry *new_dentry)
-+             struct inode *new_dir, struct dentry *new_dentry,
-+             struct lookup_intent *it)
- {
-       int error;
-       if (S_ISDIR(old_dentry->d_inode->i_mode))
--              error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
-+              error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it);
-       else
--              error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
-+              error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it);
-       if (!error) {
-               if (old_dir == new_dir)
-                       inode_dir_notify(old_dir, DN_RENAME);
-@@ -1889,7 +2124,7 @@ static inline int do_rename(const char *
+@@ -1887,9 +2086,18 @@ static inline int do_rename(const char *
+       if (newnd.last_type != LAST_NORM)
+               goto exit2;
  
++      if (old_dir->d_inode->i_op->rename_raw) {
++              lock_kernel();
++              error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++              unlock_kernel();
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit2;
++      }
++
        double_lock(new_dir, old_dir);
  
 -      old_dentry = lookup_hash(&oldnd.last, old_dir);
        error = PTR_ERR(old_dentry);
        if (IS_ERR(old_dentry))
                goto exit3;
-@@ -1905,16 +2140,37 @@ static inline int do_rename(const char *
+@@ -1905,16 +2113,16 @@ static inline int do_rename(const char *
                if (newnd.last.name[newnd.last.len])
                        goto exit4;
        }
        if (IS_ERR(new_dentry))
                goto exit4;
  
-+      if (old_dir->d_inode->i_op->rename2) {
-+              lock_kernel();
-+              /* don't rename mount point. mds will take care of
-+               * the rest sanity checking */
-+              if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) {
-+                      error = -EBUSY;
-+                      goto exit5;
-+              }
-+
-+              error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
-+                                                      new_dir->d_inode,
-+                                                      oldnd.last.name,
-+                                                      oldnd.last.len,
-+                                                      newnd.last.name,
-+                                                      newnd.last.len);
-+              unlock_kernel();
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit5;
-+      }
 +
        lock_kernel();
        error = vfs_rename(old_dir->d_inode, old_dentry,
--                                 new_dir->d_inode, new_dentry);
-+                                 new_dir->d_inode, new_dentry, NULL);
+                                  new_dir->d_inode, new_dentry);
        unlock_kernel();
 -
-+exit5:
        dput(new_dentry);
  exit4:
        dput(old_dentry);
-@@ -1965,20 +2221,28 @@ out:
+@@ -1965,20 +2173,28 @@ out:
  }
  
  static inline int
  out:
        if (current->link_count || res || nd->last_type!=LAST_NORM)
                return res;
-@@ -2002,7 +2266,13 @@ fail:
+@@ -2002,7 +2218,13 @@ fail:
  
  int vfs_follow_link(struct nameidata *nd, const char *link)
  {
  }
  
  /* get the link contents into pagecache */
-@@ -2044,7 +2314,7 @@ int page_follow_link(struct dentry *dent
+@@ -2044,7 +2266,7 @@ int page_follow_link(struct dentry *dent
  {
        struct page *page = NULL;
        char *s = page_getlink(dentry, &page);
        if (page) {
                kunmap(page);
                page_cache_release(page);
---- linux-2.4.20-rh/fs/nfsd/vfs.c~vfs_intent-2.4.20-rh 2003-04-11 14:04:48.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/nfsd/vfs.c 2003-06-09 23:18:07.000000000 +0800
-@@ -1293,7 +1293,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
-                       err = nfserr_perm;
-       } else
- #endif
--      err = vfs_rename(fdir, odentry, tdir, ndentry);
-+      err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
-       if (!err && EX_ISSYNC(tfhp->fh_export)) {
-               nfsd_sync_dir(tdentry);
-               nfsd_sync_dir(fdentry);
---- linux-2.4.20-rh/fs/open.c~vfs_intent-2.4.20-rh     2003-04-11 14:04:57.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/open.c     2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/open.c~vfs_intent-2.4.20-rh        2003-07-17 08:32:45.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/open.c     2003-07-17 08:35:22.000000000 -0700
 @@ -19,6 +19,8 @@
  #include <asm/uaccess.h>
  
        int error;
        struct iattr newattrs;
  
-@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l
+@@ -108,7 +111,13 @@ int do_truncate(struct dentry *dentry, l
        down(&inode->i_sem);
        newattrs.ia_size = length;
        newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
 +              newattrs.ia_valid |= ATTR_FROM_OPEN;
 +      if (op->setattr_raw) {
 +              newattrs.ia_valid |= ATTR_RAW;
-+              newattrs.ia_ctime = CURRENT_TIME;
 +              error = op->setattr_raw(inode, &newattrs);
-+      } else 
++      } else
 +              error = notify_change(dentry, &newattrs);
        up(&inode->i_sem);
        return error;
  }
-@@ -118,12 +128,13 @@ static inline long do_sys_truncate(const
+@@ -118,12 +127,13 @@ static inline long do_sys_truncate(const
        struct nameidata nd;
        struct inode * inode;
        int error;
        if (error)
                goto out;
        inode = nd.dentry->d_inode;
-@@ -163,11 +174,13 @@ static inline long do_sys_truncate(const
+@@ -163,11 +173,13 @@ static inline long do_sys_truncate(const
        error = locks_verify_truncate(inode, NULL, length);
        if (!error) {
                DQUOT_INIT(inode);
 -              error = do_truncate(nd.dentry, length);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
 +              error = do_truncate(nd.dentry, length, 0);
        }
        put_write_access(inode);
  
  dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
        path_release(&nd);
  out:
        return error;
-@@ -215,7 +228,7 @@ static inline long do_sys_ftruncate(unsi
+@@ -215,7 +227,7 @@ static inline long do_sys_ftruncate(unsi
  
        error = locks_verify_truncate(inode, file, length);
        if (!error)
  out_putf:
        fput(file);
  out:
-@@ -260,11 +273,13 @@ asmlinkage long sys_utime(char * filenam
+@@ -260,11 +272,13 @@ asmlinkage long sys_utime(char * filenam
        struct inode * inode;
        struct iattr newattrs;
  
        error = -EROFS;
        if (IS_RDONLY(inode))
                goto dput_and_out;
-@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam
+@@ -279,11 +293,25 @@ asmlinkage long sys_utime(char * filenam
                        goto dput_and_out;
  
                newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
 +                      goto dput_and_out;
 +      }
 +
-+      error = -EROFS;
-+      if (IS_RDONLY(inode))
-+              goto dput_and_out;
-+
 +      error = -EPERM;
 +      if (!times) {
                if (current->fsuid != inode->i_uid &&
        error = notify_change(nd.dentry, &newattrs);
  dput_and_out:
        path_release(&nd);
-@@ -304,12 +337,14 @@ asmlinkage long sys_utimes(char * filena
+@@ -304,12 +332,14 @@ asmlinkage long sys_utimes(char * filena
        struct inode * inode;
        struct iattr newattrs;
  
        error = -EROFS;
        if (IS_RDONLY(inode))
                goto dput_and_out;
-@@ -324,7 +359,20 @@ asmlinkage long sys_utimes(char * filena
+@@ -324,7 +354,20 @@ asmlinkage long sys_utimes(char * filena
                newattrs.ia_atime = times[0].tv_sec;
                newattrs.ia_mtime = times[1].tv_sec;
                newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
                if (current->fsuid != inode->i_uid &&
                    (error = permission(inode,MAY_WRITE)) != 0)
                        goto dput_and_out;
-@@ -347,6 +395,7 @@ asmlinkage long sys_access(const char * 
+@@ -347,6 +390,7 @@ asmlinkage long sys_access(const char * 
        int old_fsuid, old_fsgid;
        kernel_cap_t old_cap;
        int res;
  
        if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
                return -EINVAL;
-@@ -364,13 +413,14 @@ asmlinkage long sys_access(const char * 
+@@ -364,13 +408,14 @@ asmlinkage long sys_access(const char * 
        else
                current->cap_effective = current->cap_permitted;
  
                if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
                   && !special_file(nd.dentry->d_inode->i_mode))
                        res = -EROFS;
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
  
-@@ -385,8 +435,9 @@ asmlinkage long sys_chdir(const char * f
+@@ -385,8 +430,9 @@ asmlinkage long sys_chdir(const char * f
  {
        int error;
        struct nameidata nd;
        if (error)
                goto out;
  
-@@ -397,6 +448,7 @@ asmlinkage long sys_chdir(const char * f
+@@ -397,6 +443,7 @@ asmlinkage long sys_chdir(const char * f
        set_fs_pwd(current->fs, nd.mnt, nd.dentry);
  
  dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
        path_release(&nd);
  out:
        return error;
-@@ -436,9 +488,10 @@ asmlinkage long sys_chroot(const char * 
+@@ -436,9 +483,10 @@ asmlinkage long sys_chroot(const char * 
  {
        int error;
        struct nameidata nd;
        if (error)
                goto out;
  
-@@ -454,6 +507,7 @@ asmlinkage long sys_chroot(const char * 
+@@ -454,6 +502,7 @@ asmlinkage long sys_chroot(const char * 
        set_fs_altroot();
        error = 0;
  dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
        path_release(&nd);
  out:
        return error;
-@@ -508,6 +562,18 @@ asmlinkage long sys_chmod(const char * f
+@@ -508,6 +557,18 @@ asmlinkage long sys_chmod(const char * f
        if (IS_RDONLY(inode))
                goto dput_and_out;
  
        error = -EPERM;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                goto dput_and_out;
-@@ -538,6 +604,20 @@ static int chown_common(struct dentry * 
+@@ -538,6 +599,20 @@ static int chown_common(struct dentry * 
        error = -EROFS;
        if (IS_RDONLY(inode))
                goto out;
 +
 +              newattrs.ia_uid = user;
 +              newattrs.ia_gid = group;
-+              newattrs.ia_valid = ATTR_UID | ATTR_GID;
++              newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
 +              newattrs.ia_valid |= ATTR_RAW;
 +              error = op->setattr_raw(inode, &newattrs);
 +              /* the file system wants to use normal vfs path now */
        error = -EPERM;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                goto out;
-@@ -642,6 +722,7 @@ struct file *filp_open(const char * file
+@@ -642,8 +717,9 @@ struct file *filp_open(const char * file
  {
        int namei_flags, error;
        struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = flags };
-       
-       flags &= ~O_DIRECT;
+-      
+-      flags &= ~O_DIRECT;
++      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = flags };
++
++      //flags &= ~O_DIRECT;
  
-@@ -651,14 +732,15 @@ struct file *filp_open(const char * file
+       namei_flags = flags;
+       if ((namei_flags+1) & O_ACCMODE)
+@@ -651,14 +727,15 @@ struct file *filp_open(const char * file
        if (namei_flags & O_TRUNC)
                namei_flags |= 2;
  
  {
        struct file * f;
        struct inode *inode;
-@@ -701,6 +783,7 @@ struct file *dentry_open(struct dentry *
+@@ -695,12 +772,15 @@ struct file *dentry_open(struct dentry *
+       }
+       if (f->f_op && f->f_op->open) {
++              f->f_it = it;
+               error = f->f_op->open(inode,f);
++              f->f_it = NULL;
+               if (error)
+                       goto cleanup_all;
        }
        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
  
-+      intent_release(dentry, it);
++      intent_release(it);
        return f;
  
  cleanup_all:
-@@ -715,11 +798,17 @@ cleanup_all:
+@@ -715,11 +795,17 @@ cleanup_all:
  cleanup_file:
        put_filp(f);
  cleanup_dentry:
-+      intent_release(dentry, it);
++      intent_release(it);
        dput(dentry);
        mntput(mnt);
        return ERR_PTR(error);
  /*
   * Find an empty file descriptor entry, and mark it busy.
   */
---- linux-2.4.20-rh/fs/stat.c~vfs_intent-2.4.20-rh     2003-04-11 14:05:08.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/stat.c     2003-06-09 23:18:07.000000000 +0800
-@@ -110,11 +110,13 @@ static int do_getattr(struct vfsmount *m
- int vfs_stat(char *name, struct kstat *stat)
+--- linux-2.4.20/fs/stat.c~vfs_intent-2.4.20-rh        2003-07-17 08:33:05.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/stat.c     2003-07-17 08:51:33.000000000 -0700
+@@ -17,10 +17,12 @@
+  * Revalidate the inode. This is required for proper NFS attribute caching.
+  */
+ static __inline__ int
+-do_revalidate(struct dentry *dentry)
++do_revalidate(struct dentry *dentry, struct lookup_intent *it)
+ {
+       struct inode * inode = dentry->d_inode;
+-      if (inode->i_op && inode->i_op->revalidate)
++      if (inode->i_op && inode->i_op->revalidate_it)
++              return inode->i_op->revalidate_it(dentry, it);
++      else if (inode->i_op && inode->i_op->revalidate)
+               return inode->i_op->revalidate(dentry);
+       return 0;
+ }
+@@ -32,13 +34,13 @@ static inline nlink_t user_nlink(struct 
+       return inode->i_nlink;
+ }
+-static int do_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
++static int do_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat, struct lookup_intent *it)
+ {
+       int res = 0;
+       unsigned int blocks, indirect;
+       struct inode *inode = dentry->d_inode;
+-      res = do_revalidate(dentry);
++      res = do_revalidate(dentry, it);
+       if (res)
+               return res;
+@@ -111,10 +113,12 @@ int vfs_stat(char *name, struct kstat *s
  {
        struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
        int error;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
  
 -      error = user_path_walk(name, &nd);
-+      error = user_path_walk_it(name, &nd, &it);
++      error = user_path_walk_it(name, &nd, &it);
        if (!error) {
-               error = do_getattr(nd.mnt, nd.dentry, stat);
-+              intent_release(nd.dentry, &it);
+-              error = do_getattr(nd.mnt, nd.dentry, stat);
++              error = do_getattr(nd.mnt, nd.dentry, stat, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
-@@ -123,11 +125,13 @@ int vfs_stat(char *name, struct kstat *s
- int vfs_lstat(char *name, struct kstat *stat)
+@@ -124,10 +128,12 @@ int vfs_lstat(char *name, struct kstat *
  {
        struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
        int error;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
  
 -      error = user_path_walk_link(name, &nd);
-+      error = user_path_walk_link_it(name, &nd, &it);
++      error = user_path_walk_link_it(name, &nd, &it);
        if (!error) {
-               error = do_getattr(nd.mnt, nd.dentry, stat);
-+              intent_release(nd.dentry, &it);
+-              error = do_getattr(nd.mnt, nd.dentry, stat);
++              error = do_getattr(nd.mnt, nd.dentry, stat, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
---- linux-2.4.20-rh/include/linux/dcache.h~vfs_intent-2.4.20-rh        2003-04-12 15:46:39.000000000 +0800
-+++ linux-2.4.20-rh-root/include/linux/dcache.h        2003-06-09 23:18:07.000000000 +0800
-@@ -7,6 +7,28 @@
+@@ -139,7 +145,7 @@ int vfs_fstat(unsigned int fd, struct ks
+       int error = -EBADF;
+       if (f) {
+-              error = do_getattr(f->f_vfsmnt, f->f_dentry, stat);
++              error = do_getattr(f->f_vfsmnt, f->f_dentry, stat, NULL);
+               fput(f);
+       }
+       return error;
+@@ -286,7 +292,7 @@ asmlinkage long sys_readlink(const char 
+               error = -EINVAL;
+               if (inode->i_op && inode->i_op->readlink &&
+-                  !(error = do_revalidate(nd.dentry))) {
++                  !(error = do_revalidate(nd.dentry, NULL))) {
+                       UPDATE_ATIME(inode);
+                       error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
+               }
+--- linux-2.4.20/include/linux/dcache.h~vfs_intent-2.4.20-rh   2003-07-17 08:32:48.000000000 -0700
++++ linux-2.4.20-mmonroe/include/linux/dcache.h        2003-07-17 08:35:22.000000000 -0700
+@@ -6,6 +6,45 @@
+ #include <asm/atomic.h>
  #include <linux/mount.h>
  #include <linux/kernel.h>
-+#define IT_OPEN     (1)
-+#define IT_CREAT    (1<<1)
-+#define IT_READDIR  (1<<2)
-+#define IT_GETATTR  (1<<3)
-+#define IT_LOOKUP   (1<<4)
-+#define IT_UNLINK   (1<<5)
++#include <linux/string.h>
++
++#define IT_OPEN     0x0001
++#define IT_CREAT    0x0002
++#define IT_READDIR  0x0004
++#define IT_GETATTR  0x0008
++#define IT_LOOKUP   0x0010
++#define IT_UNLINK   0x0020
++#define IT_GETXATTR 0x0040
++#define IT_EXEC     0x0080
++#define IT_PIN      0x0100
 +
-+#define IT_FL_LOCKED   (1)
-+#define IT_FL_FOLLOWED (1<<1) /* set by vfs_follow_link */
++#define IT_FL_LOCKED   0x0001
++#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */
++
++#define INTENT_MAGIC 0x19620323
 +
 +struct lookup_intent {
 +      int it_op;
++      void (*it_op_release)(struct lookup_intent *);
++      int it_magic;
 +      int it_mode;
 +      int it_flags;
 +      int it_disposition;
 +      void *it_data;
 +};
 +
++static inline void intent_init(struct lookup_intent *it, int op, int flags)
++{
++      memset(it, 0, sizeof(*it));
++      it->it_magic = INTENT_MAGIC;
++      it->it_op = op;
++      it->it_flags = flags;
++}
++
  /*
   * linux/include/linux/dcache.h
-  *
-@@ -82,6 +104,7 @@ struct dentry {
-       unsigned long d_time;           /* used by d_revalidate */
-       struct dentry_operations  *d_op;
-       struct super_block * d_sb;      /* The root of the dentry tree */
-+      struct lookup_intent *d_it;
-       unsigned long d_vfs_flags;
-       void * d_fsdata;                /* fs-specific data */
-       void * d_extra_attributes;      /* TUX-specific data */
-@@ -96,8 +119,15 @@ struct dentry_operations {
+@@ -96,8 +135,22 @@ struct dentry_operations {
        int (*d_delete)(struct dentry *);
        void (*d_release)(struct dentry *);
        void (*d_iput)(struct dentry *, struct inode *);
-+      int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *);
-+      void (*d_intent_release)(struct dentry *, struct lookup_intent *);
++      int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *);
++      void (*d_pin)(struct dentry *, struct vfsmount * , int);
++      void (*d_unpin)(struct dentry *, struct vfsmount *, int);
  };
  
++#define PIN(de,mnt,flag)  if (de->d_op && de->d_op->d_pin) \
++                              de->d_op->d_pin(de, mnt, flag);
++#define UNPIN(de,mnt,flag)  if (de->d_op && de->d_op->d_unpin) \
++                              de->d_op->d_unpin(de, mnt, flag);
++
++
 +/* defined in fs/namei.c */
-+extern void intent_release(struct dentry *de, struct lookup_intent *it);
++extern void intent_release(struct lookup_intent *it);
 +/* defined in fs/dcache.c */
 +extern void __d_rehash(struct dentry * entry, int lock);
 +
  /* the dentry parameter passed to d_hash and d_compare is the parent
   * directory of the entries to be compared. It is used in case these
   * functions need any directory specific information for determining
-@@ -129,6 +159,7 @@ d_iput:            no              no              yes
+@@ -129,6 +182,7 @@ d_iput:            no              no              yes
                                         * s_nfsd_free_path semaphore will be down
                                         */
  #define DCACHE_REFERENCED     0x0008  /* Recently used, don't discard. */
  
  extern spinlock_t dcache_lock;
  
---- linux-2.4.20-rh/include/linux/fs.h~vfs_intent-2.4.20-rh    2003-05-30 02:07:39.000000000 +0800
-+++ linux-2.4.20-rh-root/include/linux/fs.h    2003-06-09 23:18:07.000000000 +0800
-@@ -337,6 +337,8 @@ extern void set_bh_page(struct buffer_he
+--- linux-2.4.20/include/linux/fs.h~vfs_intent-2.4.20-rh       2003-07-17 08:34:44.000000000 -0700
++++ linux-2.4.20-mmonroe/include/linux/fs.h    2003-07-17 08:35:22.000000000 -0700
+@@ -337,6 +337,9 @@ extern void set_bh_page(struct buffer_he
  #define ATTR_MTIME_SET        256
  #define ATTR_FORCE    512     /* Not a change, but a change it */
  #define ATTR_ATTR_FLAG        1024
-+#define ATTR_RAW      2048    /* file system, not vfs will massage attrs */
-+#define ATTR_FROM_OPEN        4096    /* called from open path, ie O_TRUNC */
++#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
++#define ATTR_CTIME_SET 0x2000
  
  /*
   * This is the Inode Attributes structure, used for notify_change().  It
-@@ -574,6 +576,7 @@ struct file {
+@@ -574,6 +577,7 @@ struct file {
  
        /* needed for tty driver, and maybe others */
        void                    *private_data;
-+      struct lookup_intent    *f_intent;
++      struct lookup_intent    *f_it;
  
        /* preallocated helper kiobuf to speedup O_DIRECT */
        struct kiobuf           *f_iobuf;
-@@ -701,6 +704,7 @@ struct nameidata {
+@@ -701,6 +705,7 @@ struct nameidata {
        struct qstr last;
        unsigned int flags;
        int last_type;
  };
  
  /*
-@@ -821,7 +825,9 @@ extern int vfs_symlink(struct inode *, s
+@@ -821,7 +826,8 @@ extern int vfs_symlink(struct inode *, s
  extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
  extern int vfs_rmdir(struct inode *, struct dentry *);
  extern int vfs_unlink(struct inode *, struct dentry *);
 -extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
 +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-+              struct inode *new_dir, struct dentry *new_dentry,
-+              struct lookup_intent *it);
++             struct inode *new_dir, struct dentry *new_dentry);
  
  /*
   * File types
-@@ -882,20 +888,33 @@ struct file_operations {
+@@ -881,21 +887,32 @@ struct file_operations {
  struct inode_operations {
        int (*create) (struct inode *,struct dentry *,int);
++      int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *);
        struct dentry * (*lookup) (struct inode *,struct dentry *);
-+      struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
++      struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags);
        int (*link) (struct dentry *,struct inode *,struct dentry *);
-+      int (*link2) (struct inode *,struct inode *, const char *, int);
++      int (*link_raw) (struct nameidata *,struct nameidata *);
        int (*unlink) (struct inode *,struct dentry *);
-+      int (*unlink2) (struct inode *, const char *, int);
++      int (*unlink_raw) (struct nameidata *);
        int (*symlink) (struct inode *,struct dentry *,const char *);
-+      int (*symlink2) (struct inode *, const char *, int, const char *);
++      int (*symlink_raw) (struct nameidata *,const char *);
        int (*mkdir) (struct inode *,struct dentry *,int);
-+      int (*mkdir2) (struct inode *, const char *, int,int);
++      int (*mkdir_raw) (struct nameidata *,int);
        int (*rmdir) (struct inode *,struct dentry *);
-+      int (*rmdir2) (struct inode *, const char *, int);
++      int (*rmdir_raw) (struct nameidata *);
        int (*mknod) (struct inode *,struct dentry *,int,int);
-+      int (*mknod2) (struct inode *, const char *, int,int,int);
++      int (*mknod_raw) (struct nameidata *,int,dev_t);
        int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
-+      int (*rename2) (struct inode *, struct inode *,
-+                      const char *oldname, int oldlen,
-+                      const char *newname, int newlen);
++      int (*rename_raw) (struct nameidata *, struct nameidata *);
        int (*readlink) (struct dentry *, char *,int);
        int (*follow_link) (struct dentry *, struct nameidata *);
-+      int (*follow_link2) (struct dentry *, struct nameidata *,
-+                           struct lookup_intent *it);
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int);
        int (*revalidate) (struct dentry *);
++      int (*revalidate_it) (struct dentry *, struct lookup_intent *);
        int (*setattr) (struct dentry *, struct iattr *);
-+      int (*setattr_raw) (struct inode *, struct iattr *);
++      int (*setattr_raw) (struct inode *, struct iattr *);
        int (*getattr) (struct dentry *, struct iattr *);
        int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
        ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
-@@ -1091,10 +1110,14 @@ static inline int get_lease(struct inode
+@@ -1091,10 +1108,14 @@ static inline int get_lease(struct inode
  
  asmlinkage long sys_open(const char *, int, int);
  asmlinkage long sys_close(unsigned int);      /* yes, it's really unsigned */
  extern int filp_close(struct file *, fl_owner_t id);
  extern char * getname(const char *);
  
-@@ -1385,6 +1408,7 @@ typedef int (*read_actor_t)(read_descrip
+@@ -1385,6 +1406,7 @@ typedef int (*read_actor_t)(read_descrip
  extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
  
  extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_walk(const char *, struct nameidata *));
  extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
-@@ -1396,6 +1420,8 @@ extern struct dentry * lookup_one_len(co
+@@ -1396,6 +1418,8 @@ extern struct dentry * lookup_one_len(co
  extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
  #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
  #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
  
  extern void inode_init_once(struct inode *);
  extern void iput(struct inode *);
-@@ -1495,6 +1521,8 @@ extern struct file_operations generic_ro
+@@ -1497,6 +1521,8 @@ extern struct file_operations generic_ro
  
  extern int vfs_readlink(struct dentry *, char *, int, const char *);
  extern int vfs_follow_link(struct nameidata *, const char *);
  extern int page_readlink(struct dentry *, char *, int);
  extern int page_follow_link(struct dentry *, struct nameidata *);
  extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.20-rh/kernel/ksyms.c~vfs_intent-2.4.20-rh        2003-05-30 02:07:42.000000000 +0800
-+++ linux-2.4.20-rh-root/kernel/ksyms.c        2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/kernel/ksyms.c~vfs_intent-2.4.20-rh   2003-07-17 08:34:45.000000000 -0700
++++ linux-2.4.20-mmonroe/kernel/ksyms.c        2003-07-17 08:35:22.000000000 -0700
 @@ -298,6 +298,7 @@ EXPORT_SYMBOL(read_cache_page);
  EXPORT_SYMBOL(set_page_dirty);
  EXPORT_SYMBOL(vfs_readlink);
  EXPORT_SYMBOL(page_readlink);
  EXPORT_SYMBOL(page_follow_link);
  EXPORT_SYMBOL(page_symlink_inode_operations);
---- linux-2.4.20-rh/fs/exec.c~vfs_intent-2.4.20-rh     2003-04-13 10:07:02.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/exec.c     2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/exec.c~vfs_intent-2.4.20-rh        2003-07-17 08:33:09.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/exec.c     2003-07-17 08:35:22.000000000 -0700
 @@ -114,8 +114,9 @@ asmlinkage long sys_uselib(const char * 
        struct file * file;
        struct nameidata nd;
        int error;
--
++      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
 -      error = user_path_walk(library, &nd);
-+              struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
-+                                                                                                                                             
-+        error = user_path_walk_it(library, &nd, &it);
++      error = user_path_walk_it(library, &nd, &it);
        if (error)
                goto out;
  
                goto exit;
  
 -      file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
-+      file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);        
-+      intent_release(nd.dentry, &it);
++      file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++      intent_release(&it);
        error = PTR_ERR(file);
        if (IS_ERR(file))
                goto out;
        struct inode *inode;
        struct file *file;
        int err = 0;
--
--      err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
 +      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
-+                                                                                                                                             
+-      err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
 +      err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
        file = ERR_PTR(err);
        if (!err) {
                inode = nd.dentry->d_inode;
-@@ -395,7 +398,7 @@ struct file *open_exec(const char *name)
+@@ -395,7 +398,8 @@ struct file *open_exec(const char *name)
                                err = -EACCES;
                        file = ERR_PTR(err);
                        if (!err) {
 -                              file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
-+                                file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++                              file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++                              intent_release(&it);
                                if (!IS_ERR(file)) {
                                        err = deny_write_access(file);
                                        if (err) {
-@@ -404,6 +407,7 @@ struct file *open_exec(const char *name)
-                                       }
-                               }
- out:
-+                              intent_release(nd.dentry, &it);
+@@ -407,6 +411,7 @@ out:
                                return file;
                        }
                }
-@@ -1283,7 +1287,7 @@ int do_coredump(long signr, int exit_cod
++              intent_release(&it);
+               path_release(&nd);
+       }
+       goto out;
+@@ -1283,7 +1288,7 @@ int do_coredump(long signr, int exit_cod
                goto close_fail;
        if (!file->f_op->write)
                goto close_fail;
                goto close_fail;
  
        retval = binfmt->core_dump(signr, regs, file);
---- linux-2.4.20-rh/fs/proc/base.c~vfs_intent-2.4.20-rh        2003-06-09 23:16:51.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/proc/base.c        2003-06-09 23:18:52.000000000 +0800
+--- linux-2.4.20/fs/proc/base.c~vfs_intent-2.4.20-rh   2003-07-17 08:33:05.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/proc/base.c        2003-07-17 08:35:22.000000000 -0700
 @@ -464,6 +464,9 @@ static int proc_pid_follow_link(struct d
  
        error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
        nd->last_type = LAST_BIND;
 +
-+        if (nd->it != NULL)
-+                nd->it->it_int_flags |= IT_FL_FOLLOWED;
++      if (nd->it != NULL)
++              nd->it->it_int_flags |= IT_FL_FOLLOWED;
  out:
        return error;
  }
index 09bcb22..e522896 100644 (file)
@@ -1,17 +1,20 @@
- fs/dcache.c            |   20 ++
- fs/exec.c              |   15 +
- fs/namei.c             |  378 ++++++++++++++++++++++++++++++++++++++++++-------
- fs/nfsd/vfs.c          |    2 
- fs/open.c              |  126 ++++++++++++++--
- fs/proc/base.c         |    3 
- fs/stat.c              |   24 ++-
- include/linux/dcache.h |   31 ++++
- include/linux/fs.h     |   32 +++-
- kernel/ksyms.c         |    1 
- 10 files changed, 543 insertions(+), 89 deletions(-)
+ fs/dcache.c               |   19 ++
+ fs/exec.c                 |   15 +-
+ fs/namei.c                |  329 ++++++++++++++++++++++++++++++++++++++--------
+ fs/namespace.c            |   30 +++-
+ fs/open.c                 |  128 +++++++++++++++--
+ fs/proc/base.c            |    3 
+ fs/stat.c                 |   50 ++++--
+ include/linux/dcache.h    |   53 +++++++
+ include/linux/fs.h        |   29 +++-
+ include/linux/fs_struct.h |    4 
+ kernel/exit.c             |    3 
+ kernel/fork.c             |    3 
+ kernel/ksyms.c            |    1 
+ 13 files changed, 560 insertions(+), 107 deletions(-)
 
---- linux-2.4.20-l18/fs/exec.c~vfs_intent-2.4.20-vanilla       Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/exec.c    Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/fs/exec.c~vfs_intent-2.4.20-vanilla        2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/exec.c    2003-07-07 15:13:53.000000000 -0600
 @@ -107,8 +107,9 @@ asmlinkage long sys_uselib(const char * 
        struct file * file;
        struct nameidata nd;
@@ -29,7 +32,7 @@
  
 -      file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
 +      file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
        error = PTR_ERR(file);
        if (IS_ERR(file))
                goto out;
@@ -50,7 +53,7 @@
                        if (!err) {
 -                              file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
 +                              file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
-+                                intent_release(nd.dentry, &it);
++                              intent_release(&it);
                                if (!IS_ERR(file)) {
                                        err = deny_write_access(file);
                                        if (err) {
@@ -58,7 +61,7 @@
                                return file;
                        }
                }
-+                intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        goto out;
@@ -71,8 +74,8 @@
                goto close_fail;
  
        retval = binfmt->core_dump(signr, regs, file);
---- linux-2.4.20-l18/fs/dcache.c~vfs_intent-2.4.20-vanilla     Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/dcache.c  Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/fs/dcache.c~vfs_intent-2.4.20-vanilla      2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/dcache.c  2003-07-09 01:46:27.000000000 -0600
 @@ -181,6 +181,13 @@ int d_invalidate(struct dentry * dentry)
                spin_unlock(&dcache_lock);
                return 0;
        /*
         * Check whether to do a partial shrink_dcache
         * to get rid of unused child entries.
-@@ -616,6 +623,7 @@ struct dentry * d_alloc(struct dentry * 
-       dentry->d_op = NULL;
-       dentry->d_fsdata = NULL;
-       dentry->d_mounted = 0;
-+      dentry->d_it = NULL;
-       INIT_LIST_HEAD(&dentry->d_hash);
-       INIT_LIST_HEAD(&dentry->d_lru);
-       INIT_LIST_HEAD(&dentry->d_subdirs);
-@@ -830,13 +838,19 @@ void d_delete(struct dentry * dentry)
+@@ -830,13 +837,19 @@ void d_delete(struct dentry * dentry)
   * Adds a dentry to the hash according to its name.
   */
   
  }
  
  #define do_switch(x,y) do { \
---- linux-2.4.20-l18/fs/namei.c~vfs_intent-2.4.20-vanilla      Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/namei.c   Sun Jun  1 23:41:35 2003
+--- linux-2.4.20-ad/fs/namespace.c~vfs_intent-2.4.20-vanilla   2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/namespace.c       2003-07-07 15:13:53.000000000 -0600
+@@ -99,6 +99,7 @@ static void detach_mnt(struct vfsmount *
+ {
+       old_nd->dentry = mnt->mnt_mountpoint;
+       old_nd->mnt = mnt->mnt_parent;
++      UNPIN(old_nd->dentry, old_nd->mnt, 1);
+       mnt->mnt_parent = mnt;
+       mnt->mnt_mountpoint = mnt->mnt_root;
+       list_del_init(&mnt->mnt_child);
+@@ -110,6 +111,7 @@ static void attach_mnt(struct vfsmount *
+ {
+       mnt->mnt_parent = mntget(nd->mnt);
+       mnt->mnt_mountpoint = dget(nd->dentry);
++      PIN(nd->dentry, nd->mnt, 1);
+       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+       nd->dentry->d_mounted++;
+@@ -485,14 +487,17 @@ static int do_loopback(struct nameidata 
+ {
+       struct nameidata old_nd;
+       struct vfsmount *mnt = NULL;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int err = mount_is_safe(nd);
+       if (err)
+               return err;
+       if (!old_name || !*old_name)
+               return -EINVAL;
+-      err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
+-      if (err)
++      err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
++      if (err) {
++              intent_release(&it);
+               return err;
++      }
+       down_write(&current->namespace->sem);
+       err = -EINVAL;
+@@ -515,6 +520,7 @@ static int do_loopback(struct nameidata 
+       }
+       up_write(&current->namespace->sem);
++      intent_release(&it);
+       path_release(&old_nd);
+       return err;
+ }
+@@ -698,7 +704,8 @@ long do_mount(char * dev_name, char * di
+                 unsigned long flags, void *data_page)
+ {
+       struct nameidata nd;
+-      int retval = 0;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
++      int retval = 0;
+       int mnt_flags = 0;
+       /* Discard magic */
+@@ -722,10 +729,11 @@ long do_mount(char * dev_name, char * di
+       flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
+       /* ... and get the mountpoint */
+-      retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
+-      if (retval)
++      retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
++      if (retval) {
++              intent_release(&it);
+               return retval;
+-
++      }
+       if (flags & MS_REMOUNT)
+               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+                                   data_page);
+@@ -736,6 +744,8 @@ long do_mount(char * dev_name, char * di
+       else
+               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+                                     dev_name, data_page);
++
++      intent_release(&it);
+       path_release(&nd);
+       return retval;
+ }
+@@ -901,6 +911,8 @@ asmlinkage long sys_pivot_root(const cha
+ {
+       struct vfsmount *tmp;
+       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++      struct lookup_intent new_it = { .it_op = IT_GETATTR };
++      struct lookup_intent old_it = { .it_op = IT_GETATTR };
+       int error;
+       if (!capable(CAP_SYS_ADMIN))
+@@ -908,14 +920,14 @@ asmlinkage long sys_pivot_root(const cha
+       lock_kernel();
+-      error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
++      error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
+       if (error)
+               goto out0;
+       error = -EINVAL;
+       if (!check_mnt(new_nd.mnt))
+               goto out1;
+-      error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
++      error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
+       if (error)
+               goto out1;
+@@ -970,8 +982,10 @@ out2:
+       up(&old_nd.dentry->d_inode->i_zombie);
+       up_write(&current->namespace->sem);
+       path_release(&user_nd);
++      intent_release(&old_it);
+       path_release(&old_nd);
+ out1:
++      intent_release(&new_it);
+       path_release(&new_nd);
+ out0:
+       unlock_kernel();
+--- linux-2.4.20-ad/fs/namei.c~vfs_intent-2.4.20-vanilla       2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/namei.c   2003-07-08 13:53:48.000000000 -0600
 @@ -94,6 +94,13 @@
   * XEmacs seems to be relying on it...
   */
  
-+void intent_release(struct dentry *de, struct lookup_intent *it)
++void intent_release(struct lookup_intent *it)
 +{
-+      if (it && de->d_op && de->d_op->d_intent_release)
-+              de->d_op->d_intent_release(de, it);
++      if (it && it->it_op_release)
++              it->it_op_release(it);
 +
 +}
 +
  {
        struct dentry * dentry = d_lookup(parent, name);
  
-+      if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
-+              if (!dentry->d_op->d_revalidate2(dentry, flags, it) &&
++      if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
++              if (!dentry->d_op->d_revalidate_it(dentry, flags, it) &&
 +                  !d_invalidate(dentry)) {
 +                      dput(dentry);
 +                      dentry = NULL;
                result = ERR_PTR(-ENOMEM);
                if (dentry) {
                        lock_kernel();
-+                      if (dir->i_op->lookup2)
-+                              result = dir->i_op->lookup2(dir, dentry, it);
++                      if (dir->i_op->lookup_it)
++                              result = dir->i_op->lookup_it(dir, dentry, it, flags);
 +                      else
                        result = dir->i_op->lookup(dir, dentry);
                        unlock_kernel();
                        dput(result);
                        result = ERR_PTR(-ENOENT);
                }
-+      } else if (result->d_op && result->d_op->d_revalidate2) {
-+              if (!result->d_op->d_revalidate2(result, flags, it) &&
++      } else if (result->d_op && result->d_op->d_revalidate_it) {
++              if (!result->d_op->d_revalidate_it(result, flags, it) &&
 +                  !d_invalidate(result)) {
 +                      dput(result);
 +                      goto again;
  {
        int err;
        if (current->link_count >= 5)
-@@ -346,10 +375,21 @@ static inline int do_follow_link(struct 
+@@ -346,10 +375,18 @@ static inline int do_follow_link(struct 
        current->link_count++;
        current->total_link_count++;
        UPDATE_ATIME(dentry->d_inode);
 -      err = dentry->d_inode->i_op->follow_link(dentry, nd);
-+        nd->it = it;
-+      if (dentry->d_inode->i_op->follow_link2)
-+              err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+        else
-+              err = dentry->d_inode->i_op->follow_link(dentry, nd);
-+        if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
-+                /* vfs_follow_link was never called */
-+              intent_release(dentry, it);
-+                path_release(nd);
-+                err = -ENOLINK;
-+        }
++      nd->it = it;
++      err = dentry->d_inode->i_op->follow_link(dentry, nd);
++      if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
++              /* vfs_follow_link was never called */
++              intent_release(it);
++              path_release(nd);
++              err = -ENOLINK;
++      }
        current->link_count--;
        return err;
  loop:
-+      intent_release(dentry, it);
++      intent_release(it);
        path_release(nd);
        return -ELOOP;
  }
-@@ -379,15 +419,26 @@ int follow_up(struct vfsmount **mnt, str
+@@ -379,15 +416,26 @@ int follow_up(struct vfsmount **mnt, str
        return __follow_up(mnt, dentry);
  }
  
 +                      opc = it->it_op;
 +                      mode = it->it_mode;
 +              }
-+              intent_release(*dentry, it);
++              intent_release(it);
 +              if (it) {
 +                      it->it_op = opc;
 +                      it->it_mode = mode;
                dput(*dentry);
                mntput(mounted->mnt_parent);
                *dentry = dget(mounted->mnt_root);
-@@ -399,7 +450,7 @@ static inline int __follow_down(struct v
+@@ -399,7 +447,7 @@ static inline int __follow_down(struct v
  
  int follow_down(struct vfsmount **mnt, struct dentry **dentry)
  {
  }
   
  static inline void follow_dotdot(struct nameidata *nd)
-@@ -435,7 +486,7 @@ static inline void follow_dotdot(struct 
+@@ -435,7 +483,7 @@ static inline void follow_dotdot(struct 
                mntput(nd->mnt);
                nd->mnt = parent;
        }
                ;
  }
  
-@@ -447,7 +498,8 @@ static inline void follow_dotdot(struct 
+@@ -447,7 +495,8 @@ static inline void follow_dotdot(struct 
   *
   * We expect 'base' to be positive and a directory.
   */
  {
        struct dentry *dentry;
        struct inode *inode;
-@@ -520,15 +572,15 @@ int link_path_walk(const char * name, st
+@@ -520,15 +569,15 @@ int link_path_walk(const char * name, st
                                break;
                }
                /* This does the actual lookups.. */
                        ;
  
                err = -ENOENT;
-@@ -539,8 +591,8 @@ int link_path_walk(const char * name, st
+@@ -539,8 +588,8 @@ int link_path_walk(const char * name, st
                if (!inode->i_op)
                        goto out_dput;
  
 -              if (inode->i_op->follow_link) {
 -                      err = do_follow_link(dentry, nd);
-+              if (inode->i_op->follow_link || inode->i_op->follow_link2) {
++              if (inode->i_op->follow_link) {
 +                      err = do_follow_link(dentry, nd, NULL);
                        dput(dentry);
                        if (err)
                                goto return_err;
-@@ -556,7 +608,7 @@ int link_path_walk(const char * name, st
+@@ -556,7 +605,7 @@ int link_path_walk(const char * name, st
                        nd->dentry = dentry;
                }
                err = -ENOTDIR; 
 -              if (!inode->i_op->lookup)
-+              if (!inode->i_op->lookup && !inode->i_op->lookup2)
++              if (!inode->i_op->lookup && !inode->i_op->lookup_it)
                        break;
                continue;
                /* here ends the main loop */
-@@ -583,19 +635,20 @@ last_component:
+@@ -583,19 +632,19 @@ last_component:
                        if (err < 0)
                                break;
                }
                        ;
                inode = dentry->d_inode;
                if ((lookup_flags & LOOKUP_FOLLOW)
--                  && inode && inode->i_op && inode->i_op->follow_link) {
+                   && inode && inode->i_op && inode->i_op->follow_link) {
 -                      err = do_follow_link(dentry, nd);
-+                  && inode && inode->i_op &&
-+                  (inode->i_op->follow_link || inode->i_op->follow_link2)) {
 +                      err = do_follow_link(dentry, nd, it);
                        dput(dentry);
                        if (err)
                                goto return_err;
-@@ -609,7 +662,8 @@ last_component:
+@@ -609,7 +658,8 @@ last_component:
                        goto no_inode;
                if (lookup_flags & LOOKUP_DIRECTORY) {
                        err = -ENOTDIR; 
 -                      if (!inode->i_op || !inode->i_op->lookup)
 +                      if (!inode->i_op ||
-+                          (!inode->i_op->lookup && !inode->i_op->lookup2))
++                          (!inode->i_op->lookup && !inode->i_op->lookup_it))
                                break;
                }
                goto return_base;
-@@ -633,6 +687,23 @@ return_reval:
+@@ -633,6 +683,23 @@ return_reval:
                 * Check the cached dentry for staleness.
                 */
                dentry = nd->dentry;
-+        revalidate_again:
-+              if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
++      revalidate_again:
++              if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
 +                      err = -ESTALE;
-+                      if (!dentry->d_op->d_revalidate2(dentry, 0, it)) {
-+                                struct dentry *new;
-+                                err = permission(dentry->d_parent->d_inode, 
-+                                                 MAY_EXEC);
-+                                if (err)
-+                                        break;
-+                                new = real_lookup(dentry->d_parent,
-+                                                  &dentry->d_name, 0, NULL);
++                      if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
++                              struct dentry *new;
++                              err = permission(dentry->d_parent->d_inode,
++                                               MAY_EXEC);
++                              if (err)
++                                      break;
++                              new = real_lookup(dentry->d_parent,
++                                                &dentry->d_name, 0, NULL);
 +                              d_invalidate(dentry);
-+                                dput(dentry);
-+                                dentry = new;
-+                                goto revalidate_again;
-+                        }
++                              dput(dentry);
++                              dentry = new;
++                              goto revalidate_again;
++                      }
 +              } else
                if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
                        err = -ESTALE;
                        if (!dentry->d_op->d_revalidate(dentry, 0)) {
-@@ -646,15 +717,28 @@ out_dput:
+@@ -646,15 +713,28 @@ out_dput:
                dput(dentry);
                break;
        }
 +      if (err)
-+              intent_release(nd->dentry, it);
++              intent_release(it);
        path_release(nd);
  return_err:
        return err;
  }
  
  /* SMP-safe */
-@@ -739,6 +823,17 @@ walk_init_root(const char *name, struct 
+@@ -739,6 +819,17 @@ walk_init_root(const char *name, struct 
  }
  
  /* SMP-safe */
  int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
  {
        int error = 0;
-@@ -753,6 +848,7 @@ int path_init(const char *name, unsigned
+@@ -753,6 +844,7 @@ int path_init(const char *name, unsigned
  {
        nd->last_type = LAST_ROOT; /* if there are only slashes... */
        nd->flags = flags;
-+        nd->it = NULL;
++      nd->it = NULL;
        if (*name=='/')
                return walk_init_root(name,nd);
        read_lock(&current->fs->lock);
-@@ -767,7 +863,8 @@ int path_init(const char *name, unsigned
+@@ -767,7 +859,8 @@ int path_init(const char *name, unsigned
   * needs parent already locked. Doesn't follow mounts.
   * SMP-safe.
   */
  {
        struct dentry * dentry;
        struct inode *inode;
-@@ -790,13 +887,16 @@ struct dentry * lookup_hash(struct qstr 
+@@ -790,13 +883,16 @@ struct dentry * lookup_hash(struct qstr 
                        goto out;
        }
  
                if (!new)
                        goto out;
                lock_kernel();
-+              if (inode->i_op->lookup2)
-+                      dentry = inode->i_op->lookup2(inode, new, it);
++              if (inode->i_op->lookup_it)
++                      dentry = inode->i_op->lookup_it(inode, new, it, 0);
 +              else
                dentry = inode->i_op->lookup(inode, new);
                unlock_kernel();
                if (!dentry)
-@@ -808,6 +908,12 @@ out:
+@@ -808,6 +904,12 @@ out:
        return dentry;
  }
  
  /* SMP-safe */
  struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
  {
-@@ -829,7 +935,7 @@ struct dentry * lookup_one_len(const cha
+@@ -829,7 +931,7 @@ struct dentry * lookup_one_len(const cha
        }
        this.hash = end_name_hash(hash);
  
  access:
        return ERR_PTR(-EACCES);
  }
-@@ -860,6 +966,23 @@ int __user_walk(const char *name, unsign
+@@ -860,6 +962,23 @@ int __user_walk(const char *name, unsign
        return err;
  }
  
  /*
   * It's inline, so penalty for filesystems that don't use sticky bit is
   * minimal.
-@@ -996,7 +1119,8 @@ exit_lock:
+@@ -955,7 +1074,8 @@ static inline int lookup_flags(unsigned 
+       return retval;
+ }
+-int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode,
++                       struct lookup_intent *it)
+ {
+       int error;
+@@ -968,12 +1088,15 @@ int vfs_create(struct inode *dir, struct
+               goto exit_lock;
+       error = -EACCES;        /* shouldn't it be ENOSYS? */
+-      if (!dir->i_op || !dir->i_op->create)
++      if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it))
+               goto exit_lock;
+       DQUOT_INIT(dir);
+       lock_kernel();
+-      error = dir->i_op->create(dir, dentry, mode);
++      if (dir->i_op->create_it)
++              error = dir->i_op->create_it(dir, dentry, mode, it);
++      else
++              error = dir->i_op->create(dir, dentry, mode);
+       unlock_kernel();
+ exit_lock:
+       up(&dir->i_zombie);
+@@ -982,6 +1105,11 @@ exit_lock:
+       return error;
+ }
++int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++{
++      return vfs_create_it(dir, dentry, mode, NULL);
++}
++
+ /*
+  *    open_namei()
+  *
+@@ -996,7 +1124,8 @@ exit_lock:
   * for symlinks (where the permissions are checked later).
   * SMP-safe
   */
  {
        int acc_mode, error = 0;
        struct inode *inode;
-@@ -1010,7 +1134,7 @@ int open_namei(const char * pathname, in
+@@ -1010,7 +1139,7 @@ int open_namei(const char * pathname, in
         * The simplest case - just a plain lookup.
         */
        if (!(flag & O_CREAT)) {
                if (error)
                        return error;
                dentry = nd->dentry;
-@@ -1020,6 +1144,10 @@ int open_namei(const char * pathname, in
+@@ -1020,6 +1149,10 @@ int open_namei(const char * pathname, in
        /*
         * Create - we need to know the parent.
         */
        error = path_lookup(pathname, LOOKUP_PARENT, nd);
        if (error)
                return error;
-@@ -1035,7 +1163,7 @@ int open_namei(const char * pathname, in
+@@ -1035,7 +1168,7 @@ int open_namei(const char * pathname, in
  
        dir = nd->dentry;
        down(&dir->d_inode->i_sem);
  
  do_last:
        error = PTR_ERR(dentry);
-@@ -1044,6 +1172,7 @@ do_last:
+@@ -1044,10 +1177,11 @@ do_last:
                goto exit;
        }
  
 +      it->it_mode = mode;
        /* Negative dentry, just create the file */
        if (!dentry->d_inode) {
-               error = vfs_create(dir->d_inode, dentry,
-@@ -1072,12 +1201,13 @@ do_last:
+-              error = vfs_create(dir->d_inode, dentry,
+-                                 mode & ~current->fs->umask);
++              error = vfs_create_it(dir->d_inode, dentry,
++                                 mode & ~current->fs->umask, it);
+               up(&dir->d_inode->i_sem);
+               dput(nd->dentry);
+               nd->dentry = dentry;
+@@ -1072,7 +1206,7 @@ do_last:
                error = -ELOOP;
                if (flag & O_NOFOLLOW)
                        goto exit_dput;
        }
        error = -ENOENT;
        if (!dentry->d_inode)
-               goto exit_dput;
--      if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
-+      if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link ||
-+                                    dentry->d_inode->i_op->follow_link2))
-               goto do_link;
-       dput(nd->dentry);
-@@ -1151,7 +1281,7 @@ ok:
+@@ -1151,7 +1285,7 @@ ok:
                if (!error) {
                        DQUOT_INIT(inode);
                        
                }
                put_write_access(inode);
                if (error)
-@@ -1163,8 +1293,10 @@ ok:
+@@ -1163,8 +1297,10 @@ ok:
        return 0;
  
  exit_dput:
-+      intent_release(dentry, it);
++      intent_release(it);
        dput(dentry);
  exit:
-+      intent_release(nd->dentry, it);
++      intent_release(it);
        path_release(nd);
        return error;
  
-@@ -1183,7 +1315,19 @@ do_link:
+@@ -1183,7 +1319,16 @@ do_link:
         * are done. Procfs-like symlinks just set LAST_BIND.
         */
        UPDATE_ATIME(dentry->d_inode);
 -      error = dentry->d_inode->i_op->follow_link(dentry, nd);
-+        nd->it = it;
-+      if (dentry->d_inode->i_op->follow_link2)
-+              error = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+      else
-+              error = dentry->d_inode->i_op->follow_link(dentry, nd);
++      nd->it = it;
++      error = dentry->d_inode->i_op->follow_link(dentry, nd);
 +      if (error) {
-+              intent_release(dentry, it);
-+        } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
-+                /* vfs_follow_link was never called */
-+              intent_release(dentry, it);
-+                path_release(nd);
-+                error = -ENOLINK;
-+        }
++              intent_release(it);
++      } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
++              /* vfs_follow_link was never called */
++              intent_release(it);
++              path_release(nd);
++              error = -ENOLINK;
++      }
        dput(dentry);
        if (error)
                return error;
-@@ -1205,13 +1349,20 @@ do_link:
+@@ -1205,13 +1350,20 @@ do_link:
        }
        dir = nd->dentry;
        down(&dir->d_inode->i_sem);
  {
        struct dentry *dentry;
  
-@@ -1219,7 +1370,7 @@ static struct dentry *lookup_create(stru
+@@ -1219,7 +1371,7 @@ static struct dentry *lookup_create(stru
        dentry = ERR_PTR(-EEXIST);
        if (nd->last_type != LAST_NORM)
                goto fail;
        if (IS_ERR(dentry))
                goto fail;
        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1275,7 +1426,19 @@ asmlinkage long sys_mknod(const char * f
+@@ -1275,7 +1427,16 @@ asmlinkage long sys_mknod(const char * f
        error = path_lookup(tmp, LOOKUP_PARENT, &nd);
        if (error)
                goto out;
 -      dentry = lookup_create(&nd, 0);
 +
-+      if (nd.dentry->d_inode->i_op->mknod2) {
++      if (nd.dentry->d_inode->i_op->mknod_raw) {
 +              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->mknod2(nd.dentry->d_inode,
-+                                 nd.last.name,
-+                                 nd.last.len,
-+                                 mode, dev);
++              error = op->mknod_raw(&nd, mode, dev);
 +              /* the file system wants to use normal vfs path now */
 +              if (error != -EOPNOTSUPP)
 +                      goto out2;
        error = PTR_ERR(dentry);
  
        mode &= ~current->fs->umask;
-@@ -1296,6 +1459,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1296,6 +1457,7 @@ asmlinkage long sys_mknod(const char * f
                dput(dentry);
        }
        up(&nd.dentry->d_inode->i_sem);
        path_release(&nd);
  out:
        putname(tmp);
-@@ -1343,7 +1507,17 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1343,7 +1505,14 @@ asmlinkage long sys_mkdir(const char * p
                error = path_lookup(tmp, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 1);
-+              if (nd.dentry->d_inode->i_op->mkdir2) {
++              if (nd.dentry->d_inode->i_op->mkdir_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->mkdir2(nd.dentry->d_inode,
-+                                         nd.last.name,
-+                                         nd.last.len,
-+                                         mode);
++                      error = op->mkdir_raw(&nd, mode);
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out2;
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
                        error = vfs_mkdir(nd.dentry->d_inode, dentry,
-@@ -1351,6 +1525,7 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1351,6 +1520,7 @@ asmlinkage long sys_mkdir(const char * p
                        dput(dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
                path_release(&nd);
  out:
                putname(tmp);
-@@ -1451,8 +1626,33 @@ asmlinkage long sys_rmdir(const char * p
+@@ -1451,8 +1621,16 @@ asmlinkage long sys_rmdir(const char * p
                        error = -EBUSY;
                        goto exit1;
        }
-+      if (nd.dentry->d_inode->i_op->rmdir2) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              struct dentry *last;
-+
-+              down(&nd.dentry->d_inode->i_sem);
-+              last = lookup_hash_it(&nd.last, nd.dentry, NULL);
-+              up(&nd.dentry->d_inode->i_sem);
-+              if (IS_ERR(last)) {
-+                      error = PTR_ERR(last);
-+                      goto exit1;
-+              }
-+              if (d_mountpoint(last)) {
-+                      dput(last);
-+                      error = -EBUSY;
-+                      goto exit1;
-+              }
-+              dput(last);
++      if (nd.dentry->d_inode->i_op->rmdir_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
 +
-+              error = op->rmdir2(nd.dentry->d_inode,
-+                                 nd.last.name,
-+                                 nd.last.len);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++              error = op->rmdir_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
 +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                error = vfs_rmdir(nd.dentry->d_inode, dentry);
-@@ -1510,8 +1710,17 @@ asmlinkage long sys_unlink(const char * 
+@@ -1510,8 +1688,15 @@ asmlinkage long sys_unlink(const char * 
        error = -EISDIR;
        if (nd.last_type != LAST_NORM)
                goto exit1;
-+      if (nd.dentry->d_inode->i_op->unlink2) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->unlink2(nd.dentry->d_inode,
-+                                  nd.last.name,
-+                                  nd.last.len);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++      if (nd.dentry->d_inode->i_op->unlink_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->unlink_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
 +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                /* Why not before? Because we want correct error value */
-@@ -1578,15 +1787,26 @@ asmlinkage long sys_symlink(const char *
+@@ -1578,15 +1763,23 @@ asmlinkage long sys_symlink(const char *
                error = path_lookup(to, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 0);
-+              if (nd.dentry->d_inode->i_op->symlink2) {
++              if (nd.dentry->d_inode->i_op->symlink_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->symlink2(nd.dentry->d_inode,
-+                                           nd.last.name,
-+                                           nd.last.len,
-+                                           from);
++                      error = op->symlink_raw(&nd, from);
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out2;
                putname(to);
        }
        putname(from);
-@@ -1662,7 +1882,17 @@ asmlinkage long sys_link(const char * ol
+@@ -1662,7 +1855,14 @@ asmlinkage long sys_link(const char * ol
                error = -EXDEV;
                if (old_nd.mnt != nd.mnt)
                        goto out_release;
 -              new_dentry = lookup_create(&nd, 0);
-+              if (nd.dentry->d_inode->i_op->link2) {
++              if (nd.dentry->d_inode->i_op->link_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->link2(old_nd.dentry->d_inode,
-+                                        nd.dentry->d_inode,
-+                                        nd.last.name,
-+                                        nd.last.len);
++                      error = op->link_raw(&old_nd, &nd);
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out_release;
                error = PTR_ERR(new_dentry);
                if (!IS_ERR(new_dentry)) {
                        error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
-@@ -1706,7 +1936,8 @@ exit:
+@@ -1706,7 +1906,7 @@ exit:
   *       locking].
   */
  int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 -             struct inode *new_dir, struct dentry *new_dentry)
-+                 struct inode *new_dir, struct dentry *new_dentry,
-+                 struct lookup_intent *it)
++                 struct inode *new_dir, struct dentry *new_dentry)
  {
        int error;
        struct inode *target;
-@@ -1764,6 +1995,7 @@ int vfs_rename_dir(struct inode *old_dir
-               error = -EBUSY;
-       else 
-               error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+      intent_release(new_dentry, it);
-       if (target) {
-               if (!error)
-                       target->i_flags |= S_DEAD;
-@@ -1785,7 +2017,8 @@ out_unlock:
+@@ -1785,7 +1985,7 @@ out_unlock:
  }
  
  int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 -             struct inode *new_dir, struct dentry *new_dentry)
-+                   struct inode *new_dir, struct dentry *new_dentry,
-+                   struct lookup_intent *it)
++                   struct inode *new_dir, struct dentry *new_dentry)
  {
        int error;
  
-@@ -1816,6 +2049,7 @@ int vfs_rename_other(struct inode *old_d
-               error = -EBUSY;
-       else
-               error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+      intent_release(new_dentry, it);
-       double_up(&old_dir->i_zombie, &new_dir->i_zombie);
-       if (error)
-               return error;
-@@ -1827,13 +2061,14 @@ int vfs_rename_other(struct inode *old_d
- }
- int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
--             struct inode *new_dir, struct dentry *new_dentry)
-+             struct inode *new_dir, struct dentry *new_dentry,
-+             struct lookup_intent *it)
- {
-       int error;
-       if (S_ISDIR(old_dentry->d_inode->i_mode))
--              error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
-+              error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it);
-       else
--              error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
-+              error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it);
-       if (!error) {
-               if (old_dir == new_dir)
-                       inode_dir_notify(old_dir, DN_RENAME);
-@@ -1875,7 +2110,7 @@ static inline int do_rename(const char *
+@@ -1873,9 +2073,18 @@ static inline int do_rename(const char *
+       if (newnd.last_type != LAST_NORM)
+               goto exit2;
  
++      if (old_dir->d_inode->i_op->rename_raw) {
++              lock_kernel();
++              error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++              unlock_kernel();
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit2;
++      }
++
        double_lock(new_dir, old_dir);
  
 -      old_dentry = lookup_hash(&oldnd.last, old_dir);
        error = PTR_ERR(old_dentry);
        if (IS_ERR(old_dentry))
                goto exit3;
-@@ -1891,16 +2126,37 @@ static inline int do_rename(const char *
+@@ -1891,16 +2100,16 @@ static inline int do_rename(const char *
                if (newnd.last.name[newnd.last.len])
                        goto exit4;
        }
        if (IS_ERR(new_dentry))
                goto exit4;
  
-+      if (old_dir->d_inode->i_op->rename2) {
-+              lock_kernel();
-+              /* don't rename mount point. mds will take care of
-+               * the rest sanity checking */
-+              if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) {
-+                      error = -EBUSY;
-+                      goto exit5;
-+              }
-+
-+              error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
-+                                                      new_dir->d_inode,
-+                                                      oldnd.last.name,
-+                                                      oldnd.last.len,
-+                                                      newnd.last.name,
-+                                                      newnd.last.len);
-+              unlock_kernel();
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit5;
-+      }
 +
        lock_kernel();
        error = vfs_rename(old_dir->d_inode, old_dentry,
--                                 new_dir->d_inode, new_dentry);
-+                                 new_dir->d_inode, new_dentry, NULL);
+                                  new_dir->d_inode, new_dentry);
        unlock_kernel();
 -
-+exit5:
        dput(new_dentry);
  exit4:
        dput(old_dentry);
-@@ -1951,20 +2207,28 @@ out:
+@@ -1951,20 +2160,28 @@ out:
  }
  
  static inline int
        if (IS_ERR(link))
                goto fail;
  
-+        if (it == NULL)
-+                it = nd->it;
-+        else if (it != nd->it)
-+                printk("it != nd->it: tell phil@clusterfs.com\n");
-+        if (it != NULL)
-+                it->it_int_flags |= IT_FL_FOLLOWED;
++      if (it == NULL)
++              it = nd->it;
++      else if (it != nd->it)
++              printk("it != nd->it: tell phil@clusterfs.com\n");
++      if (it != NULL)
++              it->it_int_flags |= IT_FL_FOLLOWED;
 +
        if (*link == '/') {
                path_release(nd);
  out:
        if (current->link_count || res || nd->last_type!=LAST_NORM)
                return res;
-@@ -1986,7 +2250,13 @@ fail:
+@@ -1986,7 +2203,13 @@ fail:
  
  int vfs_follow_link(struct nameidata *nd, const char *link)
  {
  }
  
  /* get the link contents into pagecache */
-@@ -2028,7 +2298,7 @@ int page_follow_link(struct dentry *dent
+@@ -2028,7 +2251,7 @@ int page_follow_link(struct dentry *dent
  {
        struct page *page = NULL;
        char *s = page_getlink(dentry, &page);
        if (page) {
                kunmap(page);
                page_cache_release(page);
---- linux-2.4.20-l18/fs/nfsd/vfs.c~vfs_intent-2.4.20-vanilla   Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/nfsd/vfs.c        Wed May 28 01:39:18 2003
-@@ -1291,7 +1291,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
-                       err = nfserr_perm;
-       } else
- #endif
--      err = vfs_rename(fdir, odentry, tdir, ndentry);
-+      err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
-       if (!err && EX_ISSYNC(tfhp->fh_export)) {
-               nfsd_sync_dir(tdentry);
-               nfsd_sync_dir(fdentry);
---- linux-2.4.20-l18/fs/open.c~vfs_intent-2.4.20-vanilla       Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/open.c    Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/fs/open.c~vfs_intent-2.4.20-vanilla        2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/open.c    2003-07-08 13:51:14.000000000 -0600
 @@ -19,6 +19,8 @@
  #include <asm/uaccess.h>
  
        int error;
        struct iattr newattrs;
  
-@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l
+@@ -108,7 +111,13 @@ int do_truncate(struct dentry *dentry, l
        down(&inode->i_sem);
        newattrs.ia_size = length;
        newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
 +              newattrs.ia_valid |= ATTR_FROM_OPEN;
 +      if (op->setattr_raw) {
 +              newattrs.ia_valid |= ATTR_RAW;
-+              newattrs.ia_ctime = CURRENT_TIME;
 +              error = op->setattr_raw(inode, &newattrs);
-+      } else 
++      } else
 +              error = notify_change(dentry, &newattrs);
        up(&inode->i_sem);
        return error;
        if (!error) {
                DQUOT_INIT(inode);
 -              error = do_truncate(nd.dentry, length);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
 +              error = do_truncate(nd.dentry, length, 0);
        }
        put_write_access(inode);
  
  dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
        path_release(&nd);
  out:
        return error;
        error = -EROFS;
        if (IS_RDONLY(inode))
                goto dput_and_out;
-@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam
+@@ -279,11 +294,25 @@ asmlinkage long sys_utime(char * filenam
                        goto dput_and_out;
  
                newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
 +                      goto dput_and_out;
 +      }
 +
-+      error = -EROFS;
-+      if (IS_RDONLY(inode))
-+              goto dput_and_out;
-+
 +      error = -EPERM;
 +      if (!times) {
                if (current->fsuid != inode->i_uid &&
                if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
                   && !special_file(nd.dentry->d_inode->i_mode))
                        res = -EROFS;
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
  
        set_fs_pwd(current->fs, nd.mnt, nd.dentry);
  
  dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
        path_release(&nd);
  out:
        return error;
        set_fs_altroot();
        error = 0;
  dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
        path_release(&nd);
  out:
        return error;
 +
 +              newattrs.ia_uid = user;
 +              newattrs.ia_gid = group;
-+              newattrs.ia_valid = ATTR_UID | ATTR_GID;
++              newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
 +              newattrs.ia_valid |= ATTR_RAW;
 +              error = op->setattr_raw(inode, &newattrs);
 +              /* the file system wants to use normal vfs path now */
  {
        struct file * f;
        struct inode *inode;
-@@ -699,6 +782,7 @@ struct file *dentry_open(struct dentry *
+@@ -693,12 +776,15 @@ struct file *dentry_open(struct dentry *
+       }
+       if (f->f_op && f->f_op->open) {
++              f->f_it = it;
+               error = f->f_op->open(inode,f);
++              f->f_it = NULL;
+               if (error)
+                       goto cleanup_all;
        }
        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
  
-+      intent_release(dentry, it);
++      intent_release(it);
        return f;
  
  cleanup_all:
-@@ -713,11 +797,17 @@ cleanup_all:
+@@ -713,11 +799,17 @@ cleanup_all:
  cleanup_file:
        put_filp(f);
  cleanup_dentry:
-+      intent_release(dentry, it);
++      intent_release(it);
        dput(dentry);
        mntput(mnt);
        return ERR_PTR(error);
  /*
   * Find an empty file descriptor entry, and mark it busy.
   */
---- linux-2.4.20-l18/fs/stat.c~vfs_intent-2.4.20-vanilla       Thu Sep 13 19:04:43 2001
-+++ linux-2.4.20-l18-phil/fs/stat.c    Wed May 28 01:39:18 2003
-@@ -135,13 +135,15 @@ static int cp_new_stat(struct inode * in
+--- linux-2.4.20-ad/fs/stat.c~vfs_intent-2.4.20-vanilla        2001-09-13 17:04:43.000000000 -0600
++++ linux-2.4.20-ad-braam/fs/stat.c    2003-07-07 15:13:53.000000000 -0600
+@@ -17,10 +17,12 @@
+  * Revalidate the inode. This is required for proper NFS attribute caching.
+  */
+ static __inline__ int
+-do_revalidate(struct dentry *dentry)
++do_revalidate(struct dentry *dentry, struct lookup_intent *it)
+ {
+       struct inode * inode = dentry->d_inode;
+-      if (inode->i_op && inode->i_op->revalidate)
++      if (inode->i_op && inode->i_op->revalidate_it)
++              return inode->i_op->revalidate_it(dentry, it);
++      else if (inode->i_op && inode->i_op->revalidate)
+               return inode->i_op->revalidate(dentry);
+       return 0;
+ }
+@@ -135,13 +137,15 @@ static int cp_new_stat(struct inode * in
  asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf)
  {
        struct nameidata nd;
 -      error = user_path_walk(filename, &nd);
 +      error = user_path_walk_it(filename, &nd, &it);
        if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                if (!error)
                        error = cp_old_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
-@@ -151,13 +153,15 @@ asmlinkage long sys_stat(char * filename
+@@ -151,13 +155,15 @@ asmlinkage long sys_stat(char * filename
  asmlinkage long sys_newstat(char * filename, struct stat * statbuf)
  {
        struct nameidata nd;
 -      error = user_path_walk(filename, &nd);
 +      error = user_path_walk_it(filename, &nd, &it);
        if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                if (!error)
                        error = cp_new_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
-@@ -172,13 +176,15 @@ asmlinkage long sys_newstat(char * filen
+@@ -172,13 +178,15 @@ asmlinkage long sys_newstat(char * filen
  asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf)
  {
        struct nameidata nd;
 -      error = user_path_walk_link(filename, &nd);
 +      error = user_path_walk_link_it(filename, &nd, &it);
        if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                if (!error)
                        error = cp_old_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
-@@ -189,13 +195,15 @@ asmlinkage long sys_lstat(char * filenam
+@@ -189,13 +197,15 @@ asmlinkage long sys_lstat(char * filenam
  asmlinkage long sys_newlstat(char * filename, struct stat * statbuf)
  {
        struct nameidata nd;
 -      error = user_path_walk_link(filename, &nd);
 +      error = user_path_walk_link_it(filename, &nd, &it);
        if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                if (!error)
                        error = cp_new_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
-@@ -333,12 +341,14 @@ asmlinkage long sys_stat64(char * filena
+@@ -216,7 +226,7 @@ asmlinkage long sys_fstat(unsigned int f
+       if (f) {
+               struct dentry * dentry = f->f_dentry;
+-              err = do_revalidate(dentry);
++              err = do_revalidate(dentry, NULL);
+               if (!err)
+                       err = cp_old_stat(dentry->d_inode, statbuf);
+               fput(f);
+@@ -235,7 +245,7 @@ asmlinkage long sys_newfstat(unsigned in
+       if (f) {
+               struct dentry * dentry = f->f_dentry;
+-              err = do_revalidate(dentry);
++              err = do_revalidate(dentry, NULL);
+               if (!err)
+                       err = cp_new_stat(dentry->d_inode, statbuf);
+               fput(f);
+@@ -257,7 +267,7 @@ asmlinkage long sys_readlink(const char 
+               error = -EINVAL;
+               if (inode->i_op && inode->i_op->readlink &&
+-                  !(error = do_revalidate(nd.dentry))) {
++                  !(error = do_revalidate(nd.dentry, NULL))) {
+                       UPDATE_ATIME(inode);
+                       error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
+               }
+@@ -333,12 +343,14 @@ asmlinkage long sys_stat64(char * filena
  {
        struct nameidata nd;
        int error;
 -      error = user_path_walk(filename, &nd);
 +      error = user_path_walk_it(filename, &nd, &it);
        if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                if (!error)
                        error = cp_new_stat64(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
-@@ -348,12 +358,14 @@ asmlinkage long sys_lstat64(char * filen
+@@ -348,12 +360,14 @@ asmlinkage long sys_lstat64(char * filen
  {
        struct nameidata nd;
        int error;
 -      error = user_path_walk_link(filename, &nd);
 +      error = user_path_walk_link_it(filename, &nd, &it);
        if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                if (!error)
                        error = cp_new_stat64(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
---- linux-2.4.20-l18/fs/proc/base.c~vfs_intent-2.4.20-vanilla  Wed Jun  4 22:53:14 2003
-+++ linux-2.4.20-l18-phil/fs/proc/base.c       Wed Jun  4 22:50:35 2003
+@@ -368,7 +382,7 @@ asmlinkage long sys_fstat64(unsigned lon
+       if (f) {
+               struct dentry * dentry = f->f_dentry;
+-              err = do_revalidate(dentry);
++              err = do_revalidate(dentry, NULL);
+               if (!err)
+                       err = cp_new_stat64(dentry->d_inode, statbuf);
+               fput(f);
+--- linux-2.4.20-ad/fs/proc/base.c~vfs_intent-2.4.20-vanilla   2002-08-02 18:39:45.000000000 -0600
++++ linux-2.4.20-ad-braam/fs/proc/base.c       2003-07-07 15:13:53.000000000 -0600
 @@ -464,6 +464,9 @@ static int proc_pid_follow_link(struct d
  
        error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
        nd->last_type = LAST_BIND;
 +
-+        if (nd->it != NULL)
-+                nd->it->it_int_flags |= IT_FL_FOLLOWED;
++      if (nd->it != NULL)
++              nd->it->it_int_flags |= IT_FL_FOLLOWED;
  out:
        return error;
  }
---- linux-2.4.20-l18/include/linux/dcache.h~vfs_intent-2.4.20-vanilla  Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/include/linux/dcache.h       Sun Jun  1 22:35:10 2003
-@@ -7,6 +7,28 @@
+--- linux-2.4.20-ad/include/linux/dcache.h~vfs_intent-2.4.20-vanilla   2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/include/linux/dcache.h       2003-07-09 01:40:11.000000000 -0600
+@@ -7,6 +7,44 @@
  #include <linux/mount.h>
  #include <linux/kernel.h>
  
-+#define IT_OPEN     (1)
-+#define IT_CREAT    (1<<1)
-+#define IT_READDIR  (1<<2)
-+#define IT_GETATTR  (1<<3)
-+#define IT_LOOKUP   (1<<4)
-+#define IT_UNLINK   (1<<5)
++#define IT_OPEN     0x0001
++#define IT_CREAT    0x0002
++#define IT_READDIR  0x0004
++#define IT_GETATTR  0x0008
++#define IT_LOOKUP   0x0010
++#define IT_UNLINK   0x0020
++#define IT_GETXATTR 0x0040
++#define IT_EXEC     0x0080
++#define IT_PIN      0x0100
++
++#define IT_FL_LOCKED   0x0001
++#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */
 +
-+#define IT_FL_LOCKED   (1)
-+#define IT_FL_FOLLOWED (1<<1) /* set by vfs_follow_link */
++#define INTENT_MAGIC 0x19620323
 +
 +struct lookup_intent {
 +      int it_op;
++      void (*it_op_release)(struct lookup_intent *);
++      int it_magic;
 +      int it_mode;
 +      int it_flags;
 +      int it_disposition;
 +      void *it_data;
 +};
 +
++static inline void intent_init(struct lookup_intent *it, int op, int flags)
++{
++      memset(it, 0, sizeof(*it));
++      it->it_magic = INTENT_MAGIC;
++      it->it_op = op;
++      it->it_flags = flags;
++}
++
++
  /*
   * linux/include/linux/dcache.h
   *
-@@ -79,6 +101,7 @@ struct dentry {
-       unsigned long d_time;           /* used by d_revalidate */
-       struct dentry_operations  *d_op;
-       struct super_block * d_sb;      /* The root of the dentry tree */
-+      struct lookup_intent *d_it;
-       unsigned long d_vfs_flags;
-       void * d_fsdata;                /* fs-specific data */
-       unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
-@@ -91,8 +114,15 @@ struct dentry_operations {
+@@ -91,8 +129,22 @@ struct dentry_operations {
        int (*d_delete)(struct dentry *);
        void (*d_release)(struct dentry *);
        void (*d_iput)(struct dentry *, struct inode *);
-+      int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *);
-+      void (*d_intent_release)(struct dentry *, struct lookup_intent *);
++      int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *);
++      void (*d_pin)(struct dentry *, struct vfsmount * , int);
++      void (*d_unpin)(struct dentry *, struct vfsmount *, int);
  };
  
++#define PIN(de,mnt,flag)  if (de->d_op && de->d_op->d_pin) \
++                              de->d_op->d_pin(de, mnt, flag);
++#define UNPIN(de,mnt,flag)  if (de->d_op && de->d_op->d_unpin) \
++                              de->d_op->d_unpin(de, mnt, flag);
++
++
 +/* defined in fs/namei.c */
-+extern void intent_release(struct dentry *de, struct lookup_intent *it);
++extern void intent_release(struct lookup_intent *it);
 +/* defined in fs/dcache.c */
 +extern void __d_rehash(struct dentry * entry, int lock);
 +
  /* the dentry parameter passed to d_hash and d_compare is the parent
   * directory of the entries to be compared. It is used in case these
   * functions need any directory specific information for determining
-@@ -124,6 +154,7 @@ d_iput:            no              no              yes
+@@ -124,6 +176,7 @@ d_iput:            no              no              yes
                                         * s_nfsd_free_path semaphore will be down
                                         */
  #define DCACHE_REFERENCED     0x0008  /* Recently used, don't discard. */
  
  extern spinlock_t dcache_lock;
  
---- linux-2.4.20-l18/include/linux/fs.h~vfs_intent-2.4.20-vanilla      Wed May 28 01:39:17 2003
-+++ linux-2.4.20-l18-phil/include/linux/fs.h   Sun Jun  1 22:07:11 2003
-@@ -338,6 +338,8 @@ extern void set_bh_page(struct buffer_he
+--- linux-2.4.20/include/linux/fs.h~vfs_intent-2.4.20-vanilla  2003-06-12 03:24:59.000000000 -0600
++++ linux-2.4.20-braam/include/linux/fs.h      2003-06-12 03:25:00.000000000 -0600
+@@ -338,6 +338,9 @@ extern void set_bh_page(struct buffer_he
  #define ATTR_MTIME_SET        256
  #define ATTR_FORCE    512     /* Not a change, but a change it */
  #define ATTR_ATTR_FLAG        1024
-+#define ATTR_RAW      2048    /* file system, not vfs will massage attrs */
-+#define ATTR_FROM_OPEN        4096    /* called from open path, ie O_TRUNC */
++#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
++#define ATTR_CTIME_SET 0x2000
  
  /*
   * This is the Inode Attributes structure, used for notify_change().  It
  
        /* needed for tty driver, and maybe others */
        void                    *private_data;
-+      struct lookup_intent    *f_intent;
++      struct lookup_intent    *f_it;
  
        /* preallocated helper kiobuf to speedup O_DIRECT */
        struct kiobuf           *f_iobuf;
        struct qstr last;
        unsigned int flags;
        int last_type;
-+        struct lookup_intent *it;
++      struct lookup_intent *it;
  };
  
  #define DQUOT_USR_ENABLED     0x01            /* User diskquotas enabled */
-@@ -794,7 +798,9 @@ extern int vfs_symlink(struct inode *, s
+@@ -794,7 +798,8 @@ extern int vfs_symlink(struct inode *, s
  extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
  extern int vfs_rmdir(struct inode *, struct dentry *);
  extern int vfs_unlink(struct inode *, struct dentry *);
 -extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
 +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-+              struct inode *new_dir, struct dentry *new_dentry,
-+              struct lookup_intent *it);
++             struct inode *new_dir, struct dentry *new_dentry);
  
  /*
   * File types
-@@ -855,20 +861,33 @@ struct file_operations {
+@@ -854,21 +859,32 @@ struct file_operations {
  struct inode_operations {
        int (*create) (struct inode *,struct dentry *,int);
++      int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *);
        struct dentry * (*lookup) (struct inode *,struct dentry *);
-+      struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
++      struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags);
        int (*link) (struct dentry *,struct inode *,struct dentry *);
-+      int (*link2) (struct inode *,struct inode *, const char *, int);
++      int (*link_raw) (struct nameidata *,struct nameidata *);
        int (*unlink) (struct inode *,struct dentry *);
-+      int (*unlink2) (struct inode *, const char *, int);
++      int (*unlink_raw) (struct nameidata *);
        int (*symlink) (struct inode *,struct dentry *,const char *);
-+      int (*symlink2) (struct inode *, const char *, int, const char *);
++      int (*symlink_raw) (struct nameidata *,const char *);
        int (*mkdir) (struct inode *,struct dentry *,int);
-+      int (*mkdir2) (struct inode *, const char *, int,int);
++      int (*mkdir_raw) (struct nameidata *,int);
        int (*rmdir) (struct inode *,struct dentry *);
-+      int (*rmdir2) (struct inode *, const char *, int);
++      int (*rmdir_raw) (struct nameidata *);
        int (*mknod) (struct inode *,struct dentry *,int,int);
-+      int (*mknod2) (struct inode *, const char *, int,int,int);
++      int (*mknod_raw) (struct nameidata *,int,dev_t);
        int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
-+      int (*rename2) (struct inode *, struct inode *,
-+                      const char *oldname, int oldlen,
-+                      const char *newname, int newlen);
++      int (*rename_raw) (struct nameidata *, struct nameidata *);
        int (*readlink) (struct dentry *, char *,int);
        int (*follow_link) (struct dentry *, struct nameidata *);
-+      int (*follow_link2) (struct dentry *, struct nameidata *,
-+                           struct lookup_intent *it);
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int);
        int (*revalidate) (struct dentry *);
++      int (*revalidate_it) (struct dentry *, struct lookup_intent *);
        int (*setattr) (struct dentry *, struct iattr *);
-+      int (*setattr_raw) (struct inode *, struct iattr *);
++      int (*setattr_raw) (struct inode *, struct iattr *);
        int (*getattr) (struct dentry *, struct iattr *);
        int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
        ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
-@@ -1070,10 +1089,14 @@ static inline int get_lease(struct inode
+@@ -1070,10 +1086,14 @@ static inline int get_lease(struct inode
  
  asmlinkage long sys_open(const char *, int, int);
  asmlinkage long sys_close(unsigned int);      /* yes, it's really unsigned */
  extern int filp_close(struct file *, fl_owner_t id);
  extern char * getname(const char *);
  
-@@ -1335,6 +1358,7 @@ typedef int (*read_actor_t)(read_descrip
+@@ -1335,6 +1355,7 @@ typedef int (*read_actor_t)(read_descrip
  extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
  
  extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_walk(const char *, struct nameidata *));
  extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
-@@ -1346,6 +1370,8 @@ extern struct dentry * lookup_one_len(co
+@@ -1346,6 +1367,8 @@ extern struct dentry * lookup_one_len(co
  extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
  #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
  #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
  
  extern void iput(struct inode *);
  extern void force_delete(struct inode *);
-@@ -1455,6 +1481,8 @@ extern struct file_operations generic_ro
+@@ -1455,6 +1478,8 @@ extern struct file_operations generic_ro
  
  extern int vfs_readlink(struct dentry *, char *, int, const char *);
  extern int vfs_follow_link(struct nameidata *, const char *);
  extern int page_readlink(struct dentry *, char *, int);
  extern int page_follow_link(struct dentry *, struct nameidata *);
  extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.20-l18/kernel/ksyms.c~vfs_intent-2.4.20-vanilla  Wed May 28 01:39:18 2003
-+++ linux-2.4.20-l18-phil/kernel/ksyms.c       Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/include/linux/fs_struct.h~vfs_intent-2.4.20-vanilla        2001-07-13 16:10:44.000000000 -0600
++++ linux-2.4.20-ad-braam/include/linux/fs_struct.h    2003-07-07 15:13:53.000000000 -0600
+@@ -34,10 +34,12 @@ static inline void set_fs_root(struct fs
+       write_lock(&fs->lock);
+       old_root = fs->root;
+       old_rootmnt = fs->rootmnt;
++      PIN(dentry, mnt, 1);
+       fs->rootmnt = mntget(mnt);
+       fs->root = dget(dentry);
+       write_unlock(&fs->lock);
+       if (old_root) {
++              UNPIN(old_root, old_rootmnt, 1);
+               dput(old_root);
+               mntput(old_rootmnt);
+       }
+@@ -57,10 +59,12 @@ static inline void set_fs_pwd(struct fs_
+       write_lock(&fs->lock);
+       old_pwd = fs->pwd;
+       old_pwdmnt = fs->pwdmnt;
++      PIN(dentry, mnt, 0);
+       fs->pwdmnt = mntget(mnt);
+       fs->pwd = dget(dentry);
+       write_unlock(&fs->lock);
+       if (old_pwd) {
++              UNPIN(old_pwd, old_pwdmnt, 0);
+               dput(old_pwd);
+               mntput(old_pwdmnt);
+       }
+--- linux-2.4.20-ad/kernel/ksyms.c~vfs_intent-2.4.20-vanilla   2003-07-07 15:13:52.000000000 -0600
++++ linux-2.4.20-ad-braam/kernel/ksyms.c       2003-07-07 15:13:53.000000000 -0600
 @@ -269,6 +269,7 @@ EXPORT_SYMBOL(read_cache_page);
  EXPORT_SYMBOL(set_page_dirty);
  EXPORT_SYMBOL(vfs_readlink);
  EXPORT_SYMBOL(page_readlink);
  EXPORT_SYMBOL(page_follow_link);
  EXPORT_SYMBOL(page_symlink_inode_operations);
+--- linux-2.4.20-ad/kernel/fork.c~vfs_intent-2.4.20-vanilla    2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/kernel/fork.c        2003-07-07 15:13:53.000000000 -0600
+@@ -384,10 +384,13 @@ static inline struct fs_struct *__copy_f
+               fs->umask = old->umask;
+               read_lock(&old->lock);
+               fs->rootmnt = mntget(old->rootmnt);
++              PIN(old->pwd, old->pwdmnt, 0);
++              PIN(old->root, old->rootmnt, 1);
+               fs->root = dget(old->root);
+               fs->pwdmnt = mntget(old->pwdmnt);
+               fs->pwd = dget(old->pwd);
+               if (old->altroot) {
++                      PIN(old->altroot, old->altrootmnt, 1);
+                       fs->altrootmnt = mntget(old->altrootmnt);
+                       fs->altroot = dget(old->altroot);
+               } else {
+--- linux-2.4.20-ad/kernel/exit.c~vfs_intent-2.4.20-vanilla    2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/kernel/exit.c        2003-07-07 15:13:53.000000000 -0600
+@@ -238,11 +238,14 @@ static inline void __put_fs_struct(struc
+ {
+       /* No need to hold fs->lock if we are killing it */
+       if (atomic_dec_and_test(&fs->count)) {
++              UNPIN(fs->pwd, fs->pwdmnt, 0);
++              UNPIN(fs->root, fs->rootmnt, 1);
+               dput(fs->root);
+               mntput(fs->rootmnt);
+               dput(fs->pwd);
+               mntput(fs->pwdmnt);
+               if (fs->altroot) {
++                      UNPIN(fs->altroot, fs->altrootmnt, 1);
+                       dput(fs->altroot);
+                       mntput(fs->altrootmnt);
+               }
 
 _
index 5770132..1afa4d4 100644 (file)
@@ -1,3 +1,5 @@
 fs/ext3/super.c
+fs/ext3/file.c
+fs/ext3/inode.c
 include/linux/ext3_fs.h
 include/linux/ext3_fs_sb.h
index 5770132..a2c3109 100644 (file)
@@ -1,3 +1,5 @@
 fs/ext3/super.c
+fs/ext3/inode.c
+fs/ext3/file.c
 include/linux/ext3_fs.h
 include/linux/ext3_fs_sb.h
index 31901ee..6ad2589 100644 (file)
@@ -1,4 +1,5 @@
 fs/ext3/namei.c
 fs/ext3/ialloc.c
+fs/ext3/inode.c
 fs/ext3/ioctl.c
 include/linux/ext3_fs.h
index b647d5a..6c80106 100644 (file)
@@ -2,6 +2,7 @@ fs/ext3/ialloc.c
 fs/ext3/inode.c
 fs/ext3/namei.c
 fs/ext3/super.c
+fs/ext3/ext3-exports.c
 fs/ext3/xattr.c
 include/linux/ext3_fs.h
 include/linux/ext3_jbd.h
index f8a99ea..f3375a3 100644 (file)
@@ -1,5 +1,6 @@
 fs/exec.c
 fs/dcache.c
+fs/namespace.c
 fs/namei.c
 fs/nfsd/vfs.c
 fs/open.c
@@ -7,4 +8,7 @@ fs/stat.c
 fs/proc/base.c
 include/linux/dcache.h
 include/linux/fs.h
+include/linux/fs_struct.h
 kernel/ksyms.c
+kernel/fork.c
+kernel/exit.c
index 78e494b..8d3d4f0 100644 (file)
@@ -78,7 +78,7 @@ check_pc_match()
                if [ $? != 0 ]; then
                        echo " $1 do not match with $2 "
                        echo " $2 will be changed to match $2"
-                       cat $tmpfile > $P/pc/$PATCH_NAME.pc
+                       cat $tmpfile > $P/pc/$PATCH_NAME.pc
                fi
                rm -rf $tmpfile
        fi
index b951209..c2cc2fa 100644 (file)
@@ -2,7 +2,7 @@ dev_read_only_hp_2.4.20.patch
 exports_2.4.20-rh-hp.patch
 kmem_cache_validate_hp.patch
 lustre_version.patch
-vfs_intent-2.4.20-vanilla.patch
+vfs_intent-2.4.20-hp.patch
 invalidate_show.patch
 export-truncate.patch
 iod-stock-24-exports_hp.patch
@@ -21,5 +21,7 @@ ext3-delete_thread-2.4.20.patch
 ext3-noread-2.4.20.patch
 extN-wantedi.patch
 ext3-san-2.4.20.patch
+ext3-map_inode_page.patch
+ext3-error-export.patch
 iopen-2.4.20.patch
 tcp-zero-copy.patch
index a97c37c..970061d 100644 (file)
@@ -15,9 +15,12 @@ ext-2.4-patch-4.patch
 linux-2.4.20-xattr-0.8.54-chaos.patch
 ext3-2.4.20-fixes.patch
 ext3_orphan_lock-2.4.20-rh.patch
-ext3-delete_thread-2.4.20.patch
+ext3_delete_thread_2.4.20_chaos.patch
 ext3-noread-2.4.20.patch
 extN-wantedi.patch
 ext3-san-2.4.20.patch
+ext3-map_inode_page.patch
+ext3-error-export.patch
 iopen-2.4.20.patch
-tcp-zero-copy.patch
+tcp_zero_copy_2.4.20_chaos.patch
+gpl_header-chaos-2.4.20.patch
index e56cac6..726a028 100644 (file)
@@ -1,4 +1,4 @@
-uml-patch-2.4.20-4.patch
+uml-patch-2.4.20-6.patch
 dev_read_only_2.4.20.patch
 exports_2.4.20.patch
 kmem_cache_validate_2.4.20.patch
@@ -25,5 +25,7 @@ ext3-noread-2.4.20.patch
 ext3-delete_thread-2.4.20.patch
 extN-wantedi.patch
 ext3-san-2.4.20.patch
+ext3-map_inode_page.patch
+ext3-error-export.patch
 iopen-2.4.20.patch
 tcp-zero-copy.patch
index 2ef001d..28e8648 100644 (file)
@@ -1,13 +1,8 @@
-SERIES               MEMNONIC                  COMMENT
+SERIES            MEMNONIC                 COMMENT                     ARCH
 
-hp-pnnl-2.4.20       linux-2.4.20-hp4_pnnl1    same as vanilla but no uml
-vanilla-2.4.20       linux-2.4.20              patch includes uml
-chaos-2.4.20         linux-chaos-2.4.20        same as rh-2.4.20-8
-rh-2.4.20            linux-rh-2.4.20-8         same as chaos-2.4.20
-rh-2.4.18-18         linux-rh-2.4.18-18        same as chaos but includes uml
-chaos                linux-chaos-2.4.18        same as rh-2.4.18-18 but no uml
-
-REVIEW:
-
-vanilla-2.5          linux-2.5.63
-hp-pnnl              linux-2.4.19-hp2_pnnl6
+chaos-2.4.18      linux-chaos-2.4.18       LLNL 2.4.18 chaos ~65       i386
+hp-pnnl-2.4.20    linux-2.4.20-hp4_pnnl1   same as vanilla but no uml  ia64
+vanilla-2.4.20    linux-2.4.20             patch with uml-2.4.20-6     um
+chaos-2.4.20      linux-chaos-2.4.20       same as rh-2.4.20-8         i386
+rh-2.4.20         linux-rh-2.4.20-8        same as chaos-2.4.20        i386
+kgdb-2.5.73       linux-2.5.73             vanilla 2.5.73 with kgdb    i386
index e995588..e69dc6d 100644 (file)
@@ -1,3 +1,4 @@
 .deps
 Makefile
 Makefile.in
+.*.cmd
index 9b53b54..e3f8673 100644 (file)
@@ -32,7 +32,7 @@
 #include <linux/lustre_mds.h>
 #include <linux/lustre_net.h>
 
-int client_import_connect(struct lustre_handle *dlm_handle, 
+int client_import_connect(struct lustre_handle *dlm_handle,
                           struct obd_device *obd,
                           struct obd_uuid *cluuid)
 {
@@ -47,7 +47,6 @@ int client_import_connect(struct lustre_handle *dlm_handle,
         char *tmp[] = {imp->imp_target_uuid.uuid,
                        obd->obd_uuid.uuid,
                        (char *)dlm_handle};
-        int rq_opc = (obd->obd_type->typ_ops->o_brw) ? OST_CONNECT :MDS_CONNECT;
         int msg_flags;
 
         ENTRY;
@@ -67,13 +66,15 @@ int client_import_connect(struct lustre_handle *dlm_handle,
         if (obd->obd_namespace == NULL)
                 GOTO(out_disco, rc = -ENOMEM);
 
-        request = ptlrpc_prep_req(imp, rq_opc, 3, size, tmp);
+        request = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp);
         if (!request)
                 GOTO(out_ldlm, rc = -ENOMEM);
 
         request->rq_level = LUSTRE_CONN_NEW;
         request->rq_replen = lustre_msg_size(0, NULL);
 
+        lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_PEER);
+
         imp->imp_dlm_handle = *dlm_handle;
 
         imp->imp_level = LUSTRE_CONN_CON;
@@ -88,7 +89,7 @@ int client_import_connect(struct lustre_handle *dlm_handle,
         class_export_put(exp);
 
         msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
-        if (rq_opc == MDS_CONNECT || msg_flags & MSG_CONNECT_REPLAYABLE) {
+        if (msg_flags & MSG_CONNECT_REPLAYABLE) {
                 imp->imp_replayable = 1;
                 CDEBUG(D_HA, "connected to replayable target: %s\n",
                        imp->imp_target_uuid.uuid);
@@ -130,7 +131,16 @@ int client_import_disconnect(struct lustre_handle *dlm_handle, int failover)
                 RETURN(-EINVAL);
         }
 
-        rq_opc = obd->obd_type->typ_ops->o_brw ? OST_DISCONNECT:MDS_DISCONNECT;
+        switch (imp->imp_connect_op) {
+        case OST_CONNECT: rq_opc = OST_DISCONNECT; break;
+        case MDS_CONNECT: rq_opc = MDS_DISCONNECT; break;
+        case MGMT_CONNECT:rq_opc = MGMT_DISCONNECT;break;
+        default:
+                CERROR("don't know how to disconnect from %s (connect_op %d)\n",
+                       imp->imp_target_uuid.uuid, imp->imp_connect_op);
+                RETURN(-EINVAL);
+        }
+
         down(&cli->cl_sem);
         if (!cli->cl_conn_count) {
                 CERROR("disconnecting disconnected device (%s)\n",
@@ -229,36 +239,31 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
         struct obd_uuid remote_uuid;
         struct list_head *p;
         char *str, *tmp;
-        int rc, i, abort_recovery;
+        int rc = 0, abort_recovery;
         ENTRY;
 
         LASSERT_REQSWAB (req, 0);
-        str = lustre_msg_string (req->rq_reqmsg, 0, sizeof (tgtuuid.uuid) - 1);
+        str = lustre_msg_string(req->rq_reqmsg, 0, sizeof(tgtuuid) - 1);
         if (str == NULL) {
                 CERROR("bad target UUID for connect\n");
                 GOTO(out, rc = -EINVAL);
         }
+
         obd_str2uuid (&tgtuuid, str);
+        target = class_uuid2obd(&tgtuuid);
+        if (!target || target->obd_stopping || !target->obd_set_up) {
+                CERROR("UUID '%s' is not available for connect\n", str);
+                GOTO(out, rc = -ENODEV);
+        }
 
         LASSERT_REQSWAB (req, 1);
-        str = lustre_msg_string (req->rq_reqmsg, 1, sizeof (cluuid.uuid) - 1);
+        str = lustre_msg_string(req->rq_reqmsg, 1, sizeof(cluuid) - 1);
         if (str == NULL) {
                 CERROR("bad client UUID for connect\n");
                 GOTO(out, rc = -EINVAL);
         }
-        obd_str2uuid (&cluuid, str);
 
-        i = class_uuid2dev(&tgtuuid);
-        if (i == -1) {
-                CERROR("UUID '%s' not found for connect\n", tgtuuid.uuid);
-                GOTO(out, rc = -ENODEV);
-        }
-
-        target = &obd_dev[i];
-        if (!target || target->obd_stopping || !target->obd_set_up) {
-                CERROR("UUID '%s' is not available for connect\n", str);
-                GOTO(out, rc = -ENODEV);
-        }
+        obd_str2uuid (&cluuid, str);
 
         /* XXX extract a nettype and format accordingly */
         snprintf(remote_uuid.uuid, sizeof remote_uuid,
@@ -491,8 +496,7 @@ static void reset_recovery_timer(struct obd_device *obd)
 
         if (!recovering)
                 return;
-        CDEBUG(D_ERROR, "timer will expire in %ld seconds\n",
-               OBD_RECOVERY_TIMEOUT / HZ);
+        CERROR("timer will expire in %ld seconds\n", OBD_RECOVERY_TIMEOUT / HZ);
         mod_timer(&obd->obd_recovery_timer, jiffies + OBD_RECOVERY_TIMEOUT);
 }
 
@@ -568,7 +572,8 @@ static void process_recovery_queue(struct obd_device *obd)
                 DEBUG_REQ(D_ERROR, req, "processing: ");
                 (void)obd->obd_recovery_handler(req);
                 reset_recovery_timer(obd);
-#warning FIXME: mds_fsync_super(mds->mds_sb);
+                /* bug 1580: decide how to properly sync() in recovery */
+                //mds_fsync_super(mds->mds_sb);
                 class_export_put(req->rq_export);
                 OBD_FREE(req->rq_reqmsg, req->rq_reqlen);
                 OBD_FREE(req, sizeof *req);
@@ -715,8 +720,7 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc)
         if (recovery_done) {
                 struct list_head *tmp, *n;
                 ldlm_reprocess_all_ns(req->rq_export->exp_obd->obd_namespace);
-                CDEBUG(D_ERROR,
-                       "%s: all clients recovered, sending delayed replies\n",
+                CERROR("%s: all clients recovered, sending delayed replies\n",
                        obd->obd_name);
                 obd->obd_recovering = 0;
                 list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) {
index 2dc60cf..3995e95 100644 (file)
@@ -71,6 +71,8 @@ char *ldlm_it2str(int it)
                 return "lookup";
         case IT_UNLINK:
                 return "unlink";
+        case IT_GETXATTR:
+                return "getxattr";
         default:
                 CERROR("Unknown intent %d\n", it);
                 return "UNKNOWN";
@@ -954,8 +956,8 @@ int ldlm_run_ast_work(struct list_head *rpc_list)
                 if (rc == -ERESTART)
                         retval = rc;
                 else if (rc)
-                        CERROR("Failed AST - should clean & disconnect "
-                               "client\n");
+                        CDEBUG(D_DLMTRACE, "Failed AST - should clean & "
+                               "disconnect client\n");
                 LDLM_LOCK_PUT(w->w_lock);
                 list_del(&w->w_list);
                 OBD_FREE(w, sizeof(*w));
index de304d4..50bc96a 100644 (file)
@@ -243,8 +243,7 @@ int ldlm_del_waiting_lock(struct ldlm_lock *lock)
 
 #endif /* __KERNEL__ */
 
-static inline void ldlm_failed_ast(struct ldlm_lock *lock, int rc,
-                                   char *ast_type)
+static void ldlm_failed_ast(struct ldlm_lock *lock, int rc, char *ast_type)
 {
         CERROR("%s AST failed (%d) for res "LPU64"/"LPU64
                ", mode %s: evicting client %s@%s NID "LPU64"\n",
@@ -347,10 +346,19 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
         RETURN(rc);
 }
 
+/* XXX copied from ptlrpc/service.c */
+static long timeval_sub(struct timeval *large, struct timeval *small)
+{
+        return (large->tv_sec - small->tv_sec) * 1000000 +
+                (large->tv_usec - small->tv_usec);
+}
+
 int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
 {
         struct ldlm_request *body;
         struct ptlrpc_request *req;
+        struct timeval granted_time;
+        long total_enqueue_wait;
         int rc = 0, size = sizeof(*body);
         ENTRY;
 
@@ -359,6 +367,12 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
                 RETURN(-EINVAL);
         }
 
+        do_gettimeofday(&granted_time);
+        total_enqueue_wait = timeval_sub(&granted_time, &lock->l_enqueued_time);
+
+        if (total_enqueue_wait / 1000000 > obd_timeout)
+                LDLM_ERROR(lock, "enqueue wait took %ldus", total_enqueue_wait);
+
         req = ptlrpc_prep_req(lock->l_export->exp_ldlm_data.led_import,
                               LDLM_CP_CALLBACK, 1, &size, NULL);
         if (!req)
@@ -370,7 +384,8 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
         body->lock_flags = flags;
         ldlm_lock2desc(lock, &body->lock_desc);
 
-        LDLM_DEBUG(lock, "server preparing completion AST");
+        LDLM_DEBUG(lock, "server preparing completion AST (after %ldus wait)",
+                   total_enqueue_wait);
         req->rq_replen = lustre_msg_size(0, NULL);
 
         req->rq_level = LUSTRE_CONN_RECOVER;
@@ -447,6 +462,7 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req,
         if (!lock)
                 GOTO(out, err = -ENOMEM);
 
+        do_gettimeofday(&lock->l_enqueued_time);
         memcpy(&lock->l_remote_handle, &dlm_req->lock_handle1,
                sizeof(lock->l_remote_handle));
         LDLM_DEBUG(lock, "server-side enqueue handler, new lock created");
@@ -640,22 +656,10 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
                 lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
                 LDLM_DEBUG(lock, "completion AST, new lock mode");
         }
-        if (lock->l_resource->lr_type == LDLM_EXTENT) {
+        if (lock->l_resource->lr_type == LDLM_EXTENT)
                 memcpy(&lock->l_extent, &dlm_req->lock_desc.l_extent,
                        sizeof(lock->l_extent));
 
-                if ((lock->l_extent.end & ~PAGE_MASK) != ~PAGE_MASK) {
-                        /* XXX Old versions of BA OST code have a fencepost bug
-                         * which will cause them to grant a lock that's one
-                         * byte too large.  This can be safely removed after BA
-                         * ships their next release -phik (02 Apr 2003) */
-                        lock->l_extent.end--;
-                } else if ((lock->l_extent.start & ~PAGE_MASK) ==
-                           ~PAGE_MASK) {
-                        lock->l_extent.start++;
-                }
-        }
-
         ldlm_resource_unlink_lock(lock);
         if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
                    &lock->l_resource->lr_name,
@@ -961,7 +965,7 @@ static int ldlm_setup(struct obd_device *obddev, obd_count len, void *buf)
         return rc;
 }
 
-static int ldlm_cleanup(struct obd_device *obddev, int force, int failover)
+static int ldlm_cleanup(struct obd_device *obddev, int flags)
 {
         struct ldlm_obd *ldlm = &obddev->u.ldlm;
         ENTRY;
@@ -973,7 +977,7 @@ static int ldlm_cleanup(struct obd_device *obddev, int force, int failover)
         }
 
 #ifdef __KERNEL__
-        if (force) {
+        if (flags & OBD_OPT_FORCE) {
                 ptlrpc_put_ldlm_hooks();
         } else if (ptlrpc_ldlm_hooks_referenced()) {
                 CERROR("Some connections weren't cleaned up; run lconf with "
@@ -1084,6 +1088,7 @@ EXPORT_SYMBOL(ldlm_replay_locks);
 EXPORT_SYMBOL(ldlm_resource_foreach);
 EXPORT_SYMBOL(ldlm_namespace_foreach);
 EXPORT_SYMBOL(ldlm_namespace_foreach_res);
+EXPORT_SYMBOL(ldlm_change_cbdata);
 
 /* ldlm_lockd.c */
 EXPORT_SYMBOL(ldlm_server_blocking_ast);
index e6a8229..75e6dbd 100644 (file)
@@ -273,6 +273,7 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
                 /* Set a flag to prevent us from sending a CANCEL (bug 407) */
                 l_lock(&ns->ns_lock);
                 lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+                LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
                 l_unlock(&ns->ns_lock);
 
                 ldlm_lock_decref_and_cancel(lockh, mode);
@@ -295,7 +296,7 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
                 CERROR ("Can't unpack ldlm_reply\n");
                 GOTO (out_req, rc = -EPROTO);
         }
-        
+
         memcpy(&lock->l_remote_handle, &reply->lock_handle,
                sizeof(lock->l_remote_handle));
         *flags = reply->lock_flags;
@@ -309,17 +310,6 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
                        body->lock_desc.l_extent.end,
                        reply->lock_extent.start, reply->lock_extent.end);
 
-                if ((reply->lock_extent.end & ~PAGE_MASK) != ~PAGE_MASK) {
-                        /* XXX Old versions of BA OST code have a fencepost bug
-                         * which will cause them to grant a lock that's one
-                         * byte too large.  This can be safely removed after BA
-                         * ships their next release -phik (02 Apr 2003) */
-                        reply->lock_extent.end--;
-                } else if ((reply->lock_extent.start & ~PAGE_MASK) ==
-                           ~PAGE_MASK) {
-                        reply->lock_extent.start++;
-                }
-
                 cookie = &reply->lock_extent; /* FIXME bug 267 */
                 cookielen = sizeof(reply->lock_extent);
         }
@@ -454,7 +444,7 @@ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags)
                 CERROR ("Can't unpack ldlm_reply\n");
                 GOTO (out, rc = -EPROTO);
         }
-        
+
         res = ldlm_lock_convert(lock, new_mode, &reply->lock_flags);
         if (res != NULL)
                 ldlm_reprocess_all(res);
@@ -535,11 +525,11 @@ int ldlm_cli_cancel(struct lustre_handle *lockh)
         local_cancel:
                 ldlm_lock_cancel(lock);
         } else {
-                LDLM_DEBUG(lock, "client-side local cancel");
                 if (lock->l_resource->lr_namespace->ns_client) {
-                        CERROR("Trying to cancel local lock\n");
+                        LDLM_ERROR(lock, "Trying to cancel local lock\n");
                         LBUG();
                 }
+                LDLM_DEBUG(lock, "client-side local cancel");
                 ldlm_lock_cancel(lock);
                 ldlm_reprocess_all(lock->l_resource);
                 LDLM_DEBUG(lock, "client-side local cancel handler END");
@@ -631,9 +621,8 @@ static int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
                 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
 
                 if (opaque != NULL && lock->l_data != opaque) {
-                        LDLM_ERROR(lock, "data %p doesn't match opaque %p res"
-                                  LPU64":"LPU64, lock->l_data, opaque,
-                                  res_id.name[0], res_id.name[1]);
+                        LDLM_ERROR(lock, "data %p doesn't match opaque %p",
+                                  lock->l_data, opaque);
                         //LBUG();
                         continue;
                 }
@@ -797,12 +786,12 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
                                ldlm_res_iterator_t iter, void *closure)
 {
         int i, rc = LDLM_ITER_CONTINUE;
-        
+
         l_lock(&ns->ns_lock);
         for (i = 0; i < RES_HASH_SIZE; i++) {
                 struct list_head *tmp, *next;
                 list_for_each_safe(tmp, next, &(ns->ns_hash[i])) {
-                        struct ldlm_resource *res = 
+                        struct ldlm_resource *res =
                                 list_entry(tmp, struct ldlm_resource, lr_hash);
 
                         ldlm_resource_getref(res);
@@ -817,6 +806,34 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
         RETURN(rc);
 }
 
+/* non-blocking function to manipulate a lock whose cb_data is being put away.*/
+void ldlm_change_cbdata(struct ldlm_namespace *ns, 
+                       struct ldlm_res_id *res_id, 
+                       ldlm_iterator_t iter,
+                       void *data)
+{
+        struct ldlm_resource *res;
+        int rc = 0;
+        ENTRY;
+
+        if (ns == NULL) {
+                CERROR("must pass in namespace");
+                LBUG();
+        }
+
+        res = ldlm_resource_get(ns, NULL, *res_id, 0, 0);
+        if (res == NULL) {
+                EXIT;
+                return;
+        }
+
+        l_lock(&ns->ns_lock);
+        rc = ldlm_resource_foreach(res, iter, data);
+        l_unlock(&ns->ns_lock);
+        ldlm_resource_putref(res);
+        EXIT;
+}
+
 /* Lock replay */
 
 static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
@@ -858,7 +875,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
                 flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_WAIT;
         else
                 flags = LDLM_FL_REPLAY;
-                
+
         size = sizeof(*body);
         req = ptlrpc_prep_req(imp, LDLM_ENQUEUE, 1, &size, NULL);
         if (!req)
@@ -866,7 +883,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
 
         /* We're part of recovery, so don't wait for it. */
         req->rq_level = LUSTRE_CONN_RECOVER;
-        
+
         body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
         ldlm_lock2desc(lock, &body->lock_desc);
         body->lock_flags = flags;
@@ -879,14 +896,14 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
         rc = ptlrpc_queue_wait(req);
         if (rc != ELDLM_OK)
                 GOTO(out, rc);
-        
+
         reply = lustre_swab_repbuf(req, 0, sizeof (*reply),
                                    lustre_swab_ldlm_reply);
         if (reply == NULL) {
                 CERROR("Can't unpack ldlm_reply\n");
                 GOTO (out, rc = -EPROTO);
         }
-        
+
         memcpy(&lock->l_remote_handle, &reply->lock_handle,
                sizeof(lock->l_remote_handle));
         LDLM_DEBUG(lock, "replayed lock:");
@@ -901,7 +918,7 @@ int ldlm_replay_locks(struct obd_import *imp)
         struct list_head list, *pos, *next;
         struct ldlm_lock *lock;
         int rc = 0;
-        
+
         ENTRY;
         INIT_LIST_HEAD(&list);
 
index 84fdecc..4449c79 100644 (file)
@@ -114,12 +114,10 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
         if (!ns)
                 RETURN(NULL);
 
-        ns->ns_hash = vmalloc(sizeof(*ns->ns_hash) * RES_HASH_SIZE);
+        OBD_VMALLOC(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
         if (!ns->ns_hash)
                 GOTO(out_ns, NULL);
 
-        atomic_add(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
-
         OBD_ALLOC(ns->ns_name, strlen(name) + 1);
         if (!ns->ns_name)
                 GOTO(out_hash, NULL);
@@ -152,8 +150,7 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
 
 out_hash:
         POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
-        vfree(ns->ns_hash);
-        atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
+        OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
 out_ns:
         OBD_FREE(ns, sizeof(*ns));
         return NULL;
@@ -186,6 +183,7 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
                         lock->l_flags |= LDLM_FL_CBPENDING;
                         /* ... without sending a CANCEL message. */
                         lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+                        LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
                         /* ... and without calling the cancellation callback */
                         lock->l_flags |= LDLM_FL_CANCEL;
                         LDLM_LOCK_PUT(lock);
@@ -272,8 +270,7 @@ int ldlm_namespace_free(struct ldlm_namespace *ns)
         ldlm_namespace_cleanup(ns, 0);
 
         POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
-        vfree(ns->ns_hash /* , sizeof(*ns->ns_hash) * RES_HASH_SIZE */);
-        atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
+        OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
         OBD_FREE(ns->ns_name, strlen(ns->ns_name) + 1);
         OBD_FREE(ns, sizeof(*ns));
 
index 8344af5..88af047 100644 (file)
@@ -145,7 +145,7 @@ int llu_create(struct inode *dir, struct pnode_base *pnode, int mode)
 
         it = dentry->d_it;
 
-        rc = ll_it_open_error(IT_OPEN_CREATE, it);
+        rc = ll_it_open_error(DISP_OPEN_CREATE, it);
         if (rc) {
                 LL_GET_INTENT(dentry, it);
                 ptlrpc_req_finished(it->it_data);
@@ -317,7 +317,7 @@ static int llu_file_open(struct inode *inode)
 #if 0
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
         LL_GET_INTENT(file->f_dentry, it);
-        rc = ll_it_open_error(IT_OPEN_OPEN, it);
+        rc = ll_it_open_error(DISP_OPEN_OPEN, it);
         if (rc)
                 RETURN(rc);
 #endif
@@ -477,7 +477,7 @@ static int llu_file_release(struct inode *inode)
                 oa.o_id = lsm->lsm_object_id;
                 oa.o_mode = S_IFREG;
                 oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
-                
+
                 memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE);
                 oa.o_valid |= OBD_MD_FLHANDLE;
 
index 0e88933..0939352 100644 (file)
@@ -715,7 +715,7 @@ llu_fsswop_mount(const char *source,
 
 /* XXX do we need this??
         memset(&osfs, 0, sizeof(osfs));
-        rc = obd_statfs(&sbi->ll_mdc_conn, &osfs);
+        rc = obd_statfs(class_conn2obd(&sbi->ll_mdc_conn),&osfs,jiffies-100*HZ);
 */
         /* fetch attr of root inode */
         err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid,
@@ -765,9 +765,9 @@ out_inode:
 out_request:
         ptlrpc_req_finished(request);
 out_osc:
-        obd_disconnect(&sbi->ll_osc_conn);
+        obd_disconnect(&sbi->ll_osc_conn, 0);
 out_mdc:
-        obd_disconnect(&sbi->ll_mdc_conn);
+        obd_disconnect(&sbi->ll_mdc_conn, 0);
 out_free:
         OBD_FREE(sbi, sizeof(*sbi));
         return err;
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
+.*.cmd
index b6fc501..9ef9b7f 100644 (file)
@@ -9,8 +9,8 @@ MODULE = llite
 modulefs_DATA = llite.o
 EXTRA_PROGRAMS = llite
 
-llite_SOURCES = dcache.c commit_callback.c super.c rw.c iod.c super25.c
-llite_SOURCES += file.c dir.c sysctl.c symlink.c
-llite_SOURCES += namei.c lproc_llite.c llite_internal.h
+llite_SOURCES = dcache.c commit_callback.c  rw.c  super25.c
+llite_SOURCES += file.c dir.c sysctl.c symlink.c llite_lib.c
+llite_SOURCES += namei.c lproc_llite.c super.c iod.c llite_internal.h
 
 include $(top_srcdir)/Rules
index 0684968..8c55b3d 100644 (file)
 #include <linux/lustre_idl.h>
 #include <linux/lustre_dlm.h>
 
+#include "llite_internal.h"
+
 /* should NOT be called with the dcache lock, see fs/dcache.c */
-void ll_release(struct dentry *de)
+static void ll_release(struct dentry *de)
 {
+        struct ll_dentry_data *lld = ll_d2d(de);
         ENTRY;
+
+        LASSERT(lld->lld_cwd_count == 0);
+        LASSERT(lld->lld_mnt_count == 0);
         OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data));
-        EXIT;
-}
 
-int ll_delete(struct dentry *de)
-{
-        if (de->d_it != 0) {
-                CERROR("%s put dentry %p+%p with d_it %p\n", current->comm,
-                       de, de->d_fsdata, de->d_it);
-                LBUG();
-        }
-        return 0;
+        EXIT;
 }
 
 void ll_set_dd(struct dentry *de)
@@ -55,23 +52,20 @@ void ll_set_dd(struct dentry *de)
         LASSERT(de != NULL);
 
         lock_kernel();
-
         if (de->d_fsdata == NULL) {
                 OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
-                sema_init(&ll_d2d(de)->lld_it_sem, 1);
         }
-
         unlock_kernel();
 
         EXIT;
 }
 
-void ll_intent_release(struct dentry *de, struct lookup_intent *it)
+void ll_intent_release(struct lookup_intent *it)
 {
         struct lustre_handle *handle;
         ENTRY;
 
-        if (it->it_lock_mode) {
+        if (it->it_op && it->it_lock_mode) {
                 handle = (struct lustre_handle *)it->it_lock_handle;
                 CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
                        " from it %p\n",
@@ -83,84 +77,73 @@ void ll_intent_release(struct dentry *de, struct lookup_intent *it)
                    lock (see bug 494) */
                 it->it_lock_mode = 0;
         }
+        it->it_magic = 0;
+        it->it_op_release = 0;
+        EXIT;
+}
 
-        if (!de->d_it || it->it_op == IT_RELEASED_MAGIC) {
-                EXIT;
+void ll_unhash_aliases(struct inode *inode)
+{
+        struct dentry *dentry = NULL;
+        struct list_head *tmp;
+        struct ll_sb_info *sbi;
+        ENTRY;
+
+        if (inode == NULL) {
+                CERROR("unexpected NULL inode, tell phil\n");
                 return;
         }
 
-        if (de->d_it == it)
-                LL_GET_INTENT(de, it);
-        else
-                CDEBUG(D_INODE, "STRANGE intent release: %p %p\n",
-                       de->d_it, it);
+        sbi = ll_i2sbi(inode);
+
+        CDEBUG(D_INODE, "marking dentries for ino %lx/%x invalid\n",
+               inode->i_ino, inode->i_generation);
 
+        spin_lock(&dcache_lock);
+        list_for_each(tmp, &inode->i_dentry) {
+                dentry = list_entry(tmp, struct dentry, d_alias);
+
+                list_del_init(&dentry->d_hash);
+                dentry->d_flags |= DCACHE_LUSTRE_INVALID;
+                list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
+        }
+
+        spin_unlock(&dcache_lock);
         EXIT;
 }
 
 extern struct dentry *ll_find_alias(struct inode *, struct dentry *);
 
-static int revalidate2_finish(int flag, struct ptlrpc_request *request,
+static int revalidate_it_finish(struct ptlrpc_request *request,
                               struct inode *parent, struct dentry **de,
                               struct lookup_intent *it, int offset, obd_id ino)
 {
         struct ll_sb_info     *sbi = ll_i2sbi(parent);
-        struct mds_body       *body;
-        struct lov_stripe_md  *lsm = NULL;
-        struct lov_mds_md     *lmm;
-        int                    lmmsize;
+        struct lustre_md      md;
         int                    rc = 0;
         ENTRY;
 
         /* NB 1 request reference will be taken away by ll_intent_lock()
          * when I return */
 
-        if ((flag & LL_LOOKUP_NEGATIVE) != 0)
-                GOTO (out, rc = -ENOENT);
+        if (it_disposition(it, DISP_LOOKUP_NEG))
+                RETURN(-ENOENT);
 
-        /* We only get called if the mdc_enqueue() called from
-         * ll_intent_lock() was successful.  Therefore the mds_body is
-         * present and correct, and the eadata is present (but still
-         * opaque, so only obd_unpackmd() can check the size) */
-        body = lustre_msg_buf(request->rq_repmsg, offset, sizeof (*body));
-        LASSERT (body != NULL);
-        LASSERT_REPSWABBED (request, offset);
+        /* ll_intent_lock was successful, now prepare the lustre_md) */
+        rc = mdc_req2lustre_md(request, offset, &sbi->ll_osc_conn, &md);
+        if (rc)
+                RETURN(rc);
 
-        if (body->valid & OBD_MD_FLEASIZE) {
-                /* Only bother with this if inodes's LSM not set? */
-
-                if (body->eadatasize == 0) {
-                        CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n");
-                        GOTO (out, rc = -EPROTO);
-                }
-                lmmsize = body->eadatasize;
-                lmm = lustre_msg_buf (request->rq_repmsg, offset + 1, lmmsize);
-                LASSERT (lmm != NULL);
-                LASSERT_REPSWABBED (request, offset + 1);
-
-                rc = obd_unpackmd (&sbi->ll_osc_conn,
-                                   &lsm, lmm, lmmsize);
-                if (rc < 0) {
-                        CERROR ("Error %d unpacking eadata\n", rc);
-                        LBUG();
-                        /* XXX don't know if I should do this... */
-                        GOTO (out, rc);
-                        /* or skip the ll_update_inode but still do
-                         * mdc_lock_set_inode() */
-                }
-                LASSERT (rc >= sizeof (*lsm));
-                rc = 0;
-        }
+        ll_update_inode((*de)->d_inode, md.body, md.lsm);
 
-        ll_update_inode((*de)->d_inode, body, lsm);
+        if (md.lsm != NULL && ll_i2info((*de)->d_inode)->lli_smd != md.lsm)
+                obd_free_memmd (&sbi->ll_osc_conn, &md.lsm);
 
-        if (lsm != NULL &&
-            ll_i2info((*de)->d_inode)->lli_smd != lsm)
-                obd_free_memmd (&sbi->ll_osc_conn, &lsm);
-
-        ll_mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle,
-                              (*de)->d_inode);
- out:
+        CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+               (*de)->d_inode, (*de)->d_inode->i_ino,
+               (*de)->d_inode->i_generation);
+        ldlm_lock_set_data((struct lustre_handle *)it->it_lock_handle,
+                           (*de)->d_inode);
         RETURN(rc);
 }
 
@@ -197,20 +180,26 @@ int ll_have_md_lock(struct dentry *de)
         RETURN(0);
 }
 
-int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
+int ll_revalidate_it(struct dentry *de, int flags, struct lookup_intent *it)
 {
         int rc;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
                LL_IT2STR(it));
 
-        /* We don't want to cache negative dentries, so return 0 immediately.
-         * We believe that this is safe, that negative dentries cannot be
-         * pinned by someone else */
-        if (de->d_inode == NULL) {
-                CDEBUG(D_INODE, "negative dentry: ret 0 to force lookup2\n");
+        /* Cached negative dentries are unsafe for now - look them up again */
+        if (de->d_inode == NULL)
                 RETURN(0);
-        }
+
+        /* 
+         * never execute intents for mount points
+         * - attrs will be fixed up in ll_revalidate_inode
+         */
+        if (d_mountpoint(de))
+                RETURN(1);
+
+        if (it)
+                it->it_op_release = ll_intent_release;
 
         if (it == NULL || it->it_op == IT_GETATTR) {
                 /* We could just return 1 immediately, but since we should only
@@ -233,7 +222,6 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
                                 memcpy(it->it_lock_handle, &lockh,
                                        sizeof(lockh));
                                 it->it_lock_mode = LCK_PR;
-                                LL_SAVE_INTENT(de, it);
                         } else {
                                 ldlm_lock_decref(&lockh, LCK_PR);
                         }
@@ -248,7 +236,6 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
                                 memcpy(it->it_lock_handle, &lockh,
                                        sizeof(lockh));
                                 it->it_lock_mode = LCK_PW;
-                                LL_SAVE_INTENT(de, it);
                         } else {
                                 ldlm_lock_decref(&lockh, LCK_PW);
                         }
@@ -256,31 +243,123 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
                 }
                 if (S_ISDIR(de->d_inode->i_mode))
                         ll_invalidate_inode_pages(de->d_inode);
-                d_unhash_aliases(de->d_inode);
+                ll_unhash_aliases(de->d_inode);
                 RETURN(0);
         }
 
-        rc = ll_intent_lock(de->d_parent->d_inode, &de, it, revalidate2_finish);
+        rc = ll_intent_lock(de->d_parent->d_inode, &de, it, flags,
+                            revalidate_it_finish);
         if (rc < 0) {
                 if (rc != -ESTALE) {
                         CERROR("ll_intent_lock: rc %d : it->it_status %d\n", rc,
                                it->it_status);
                 }
+                ll_unhash_aliases(de->d_inode);
                 RETURN(0);
         }
         /* unfortunately ll_intent_lock may cause a callback and revoke our
            dentry */
         spin_lock(&dcache_lock);
-        list_del_init(&de->d_hash);
+        hlist_del_init(&de->d_hash);
         __d_rehash(de, 0);
         spin_unlock(&dcache_lock);
 
         RETURN(1);
 }
 
+static void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
+{
+        struct inode *inode= de->d_inode;
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        struct ll_dentry_data *ldd = ll_d2d(de);
+        struct obd_client_handle *handle;
+        int rc = 0;
+        ENTRY;
+        LASSERT(ldd);
+
+        lock_kernel();
+        /* Strictly speaking this introduces an additional race: the
+         * increments should wait until the rpc has returned.
+         * However, given that at present the function is void, this
+         * issue is moot. */
+        if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
+                unlock_kernel();
+                EXIT;
+                return;
+        }
+
+        if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
+                unlock_kernel();
+                EXIT;
+                return;
+        }
+        unlock_kernel();
+
+        handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
+        rc = obd_pin(&sbi->ll_mdc_conn, inode->i_ino, inode->i_generation,
+                     inode->i_mode & S_IFMT, handle, flag);
+
+        if (rc) {
+                lock_kernel();
+                memset(handle, 0, sizeof(*handle));
+                if (flag == 0)
+                        ldd->lld_cwd_count--;
+                else
+                        ldd->lld_mnt_count--;
+                unlock_kernel();
+        }
+
+        EXIT;
+        return;
+}
+
+static void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
+{
+        struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
+        struct ll_dentry_data *ldd = ll_d2d(de);
+        struct obd_client_handle handle;
+        int count, rc = 0;
+        ENTRY;
+        LASSERT(ldd);
+
+        lock_kernel();
+        /* Strictly speaking this introduces an additional race: the
+         * increments should wait until the rpc has returned.
+         * However, given that at present the function is void, this
+         * issue is moot. */
+        handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
+        if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
+                /* the "pin" failed */
+                unlock_kernel();
+                EXIT;
+                return;
+        }
+
+        if (flag)
+                count = --ldd->lld_mnt_count;
+        else
+                count = --ldd->lld_cwd_count;
+        unlock_kernel();
+
+        if (count != 0) {
+                EXIT;
+                return;
+        }
+
+        rc = obd_unpin(&sbi->ll_mdc_conn, &handle, flag);
+        EXIT;
+        return;
+}
+
 struct dentry_operations ll_d_ops = {
-        .d_revalidate2 = ll_revalidate2,
-        .d_intent_release = ll_intent_release,
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+        .d_revalidate_nd = ll_revalidate_nd,
+#else
+        .d_revalidate_it = ll_revalidate_it,
+#endif
         .d_release = ll_release,
-        .d_delete = ll_delete,
+#if 0
+        .d_pin = ll_pin,
+        .d_unpin = ll_unpin,
+#endif
 };
index 115ed4e..a81a7d4 100644 (file)
@@ -54,14 +54,6 @@ typedef struct ext2_dir_entry_2 ext2_dirent;
 #define PageChecked(page)        test_bit(PG_checked, &(page)->flags)
 #define SetPageChecked(page)     set_bit(PG_checked, &(page)->flags)
 
-
-static int ll_dir_prepare_write(struct file *file, struct page *page,
-                                unsigned from, unsigned to)
-{
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-        return 0;
-}
-
 /* returns the page unlocked, but with a reference */
 static int ll_dir_readpage(struct file *file, struct page *page)
 {
@@ -98,7 +90,7 @@ static int ll_dir_readpage(struct file *file, struct page *page)
                              &lockh);
         if (!rc) {
                 ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0);
-                
+
                 rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_PR,
                                  &data, &lockh, NULL, 0,
                                  ldlm_completion_ast, ll_mdc_blocking_ast,
@@ -137,39 +129,14 @@ static int ll_dir_readpage(struct file *file, struct page *page)
                 SetPageUptodate(page);
 
         unlock_page(page);
-        ll_unlock(LCK_PR, &lockh);
-        if (rc != ELDLM_OK)
-                CERROR("ll_unlock: err: %d\n", rc);
+        ldlm_lock_decref(&lockh, LCK_PR);
         return rc;
 }
 
 struct address_space_operations ll_dir_aops = {
         readpage: ll_dir_readpage,
-        prepare_write: ll_dir_prepare_write
 };
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3))
-int waitfor_one_page(struct page *page)
-{
-        int error = 0;
-        struct buffer_head *bh, *head = page->buffers;
-
-        bh = head;
-        do {
-                wait_on_buffer(bh);
-                if (buffer_req(bh) && !buffer_uptodate(bh))
-                        error = -EIO;
-        } while ((bh = bh->b_this_page) != head);
-        return error;
-}
-#elif (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-int waitfor_one_page(struct page *page)
-{
-        wait_on_page_locked(page);
-        return 0;
-}
-#endif
-
 /*
  * ext2 uses block-sized chunks. Arguably, sector-sized ones would be
  * more robust, but we have what we have
@@ -190,27 +157,6 @@ static inline unsigned long dir_pages(struct inode *inode)
         return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
 }
 
-extern void set_page_clean(struct page *page);
-
-static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to)
-{
-        struct inode *dir = page->mapping->host;
-        loff_t new_size = (page->index << PAGE_CACHE_SHIFT) + to;
-        int err = 0;
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        dir->i_version = ++event;
-#endif
-        if (new_size > dir->i_size)
-                dir->i_size = new_size;
-        SetPageUptodate(page);
-        set_page_clean(page);
-
-        //page->mapping->a_ops->commit_write(NULL, page, from, to);
-        //if (IS_SYNC(dir))
-        //      err = waitfor_one_page(page);
-        return err;
-}
 
 static void ext2_check_page(struct page *page)
 {
@@ -324,20 +270,6 @@ fail:
         return ERR_PTR(-EIO);
 }
 
-/*
- * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure.
- *
- * len <= EXT2_NAME_LEN and de != NULL are guaranteed by caller.
- */
-static inline int ext2_match (int len, const char * const name,
-                                        struct ext2_dir_entry_2 * de)
-{
-        if (len != de->name_len)
-                return 0;
-        if (!de->inode)
-                return 0;
-        return !memcmp(name, de->name, len);
-}
 
 /*
  * p is at least 6 bytes before the end of page
@@ -368,33 +300,6 @@ static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
         [EXT2_FT_SYMLINK]       DT_LNK,
 };
 
-static unsigned int ll_dt2fmt[DT_WHT + 1] = {
-        [EXT2_FT_UNKNOWN]       0,
-        [EXT2_FT_REG_FILE]      S_IFREG,
-        [EXT2_FT_DIR]           S_IFDIR,
-        [EXT2_FT_CHRDEV]        S_IFCHR,
-        [EXT2_FT_BLKDEV]        S_IFBLK,
-        [EXT2_FT_FIFO]          S_IFIFO,
-        [EXT2_FT_SOCK]          S_IFSOCK,
-        [EXT2_FT_SYMLINK]       S_IFLNK
-};
-
-#define S_SHIFT 12
-static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = {
-        [S_IFREG >> S_SHIFT]    EXT2_FT_REG_FILE,
-        [S_IFDIR >> S_SHIFT]    EXT2_FT_DIR,
-        [S_IFCHR >> S_SHIFT]    EXT2_FT_CHRDEV,
-        [S_IFBLK >> S_SHIFT]    EXT2_FT_BLKDEV,
-        [S_IFIFO >> S_SHIFT]    EXT2_FT_FIFO,
-        [S_IFSOCK >> S_SHIFT]   EXT2_FT_SOCK,
-        [S_IFLNK >> S_SHIFT]    EXT2_FT_SYMLINK,
-};
-
-static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode)
-{
-        mode_t mode = inode->i_mode;
-        de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
-}
 
 int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
@@ -437,7 +342,7 @@ int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
                 }
                 de = (ext2_dirent *)(kaddr+offset);
                 limit = kaddr + PAGE_CACHE_SIZE - EXT2_DIR_REC_LEN(1);
-                for ( ;(char*)de <= limit; de = ext2_next_entry(de))
+                for ( ;(char*)de <= limit; de = ext2_next_entry(de)) {
                         if (de->inode) {
                                 int over;
                                 unsigned char d_type = DT_UNKNOWN;
@@ -454,334 +359,31 @@ int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
                                         GOTO(done,0);
                                 }
                         }
+                }
                 ext2_put_page(page);
         }
 
 done:
         filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset;
         filp->f_version = inode->i_version;
-        UPDATE_ATIME(inode);
+        update_atime(inode);
         RETURN(0);
 }
 
-/*
- *      ext2_find_entry()
- *
- * finds an entry in the specified directory with the wanted name. It
- * returns the page in which the entry was found, and the entry itself
- * (as a parameter - res_dir). Page is returned mapped and unlocked.
- * Entry is guaranteed to be valid.
- */
-struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
-                        struct dentry *dentry, struct page ** res_page)
-{
-        const char *name = dentry->d_name.name;
-        int namelen = dentry->d_name.len;
-        unsigned reclen = EXT2_DIR_REC_LEN(namelen);
-        unsigned long start, n;
-        unsigned long npages = dir_pages(dir);
-        struct page *page = NULL;
-        ext2_dirent * de;
-
-        /* OFFSET_CACHE */
-        *res_page = NULL;
-
-        //      start = dir->u.ext2_i.i_dir_start_lookup;
-        start = 0;
-        if (start >= npages)
-                start = 0;
-        n = start;
-        do {
-                char *kaddr;
-                page = ll_get_dir_page(dir, n);
-                if (!IS_ERR(page)) {
-                        kaddr = page_address(page);
-                        de = (ext2_dirent *) kaddr;
-                        kaddr += PAGE_CACHE_SIZE - reclen;
-                        while ((char *) de <= kaddr) {
-                                if (ext2_match (namelen, name, de))
-                                        goto found;
-                                de = ext2_next_entry(de);
-                        }
-                        ext2_put_page(page);
-                }
-                if (++n >= npages)
-                        n = 0;
-        } while (n != start);
-        return NULL;
-
-found:
-        *res_page = page;
-        //      dir->u.ext2_i.i_dir_start_lookup = n;
-        return de;
-}
-
-struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p)
-{
-        struct page *page = ll_get_dir_page(dir, 0);
-        ext2_dirent *de = NULL;
-
-        if (!IS_ERR(page)) {
-                de = ext2_next_entry((ext2_dirent *) page_address(page));
-                *p = page;
-        }
-        return de;
-}
-
-obd_id ll_inode_by_name(struct inode * dir, struct dentry *dentry, int *type)
-{
-        obd_id res = 0;
-        struct ext2_dir_entry_2 * de;
-        struct page *page;
-
-        de = ext2_find_entry (dir, dentry, &page);
-        if (de) {
-                res = le32_to_cpu(de->inode);
-                *type = ll_dt2fmt[de->file_type];
-                kunmap(page);
-                page_cache_release(page);
-        }
-        return res;
-}
-
-/* Releases the page */
-void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
-                        struct page *page, struct inode *inode)
-{
-        unsigned from = (char *) de - (char *) page_address(page);
-        unsigned to = from + le16_to_cpu(de->rec_len);
-        int err;
-
-        lock_page(page);
-        err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
-        if (err)
-                LBUG();
-        de->inode = cpu_to_le32(inode->i_ino);
-        ext2_set_de_type (de, inode);
-        dir->i_mtime = dir->i_ctime = CURRENT_TIME;
-        err = ext2_commit_chunk(page, from, to);
-        unlock_page(page);
-        ext2_put_page(page);
-}
-
-/*
- *      Parent is locked.
- */
-int ll_add_link (struct dentry *dentry, struct inode *inode)
-{
-        struct inode *dir = dentry->d_parent->d_inode;
-        const char *name = dentry->d_name.name;
-        int namelen = dentry->d_name.len;
-        unsigned reclen = EXT2_DIR_REC_LEN(namelen);
-        unsigned short rec_len, name_len;
-        struct page *page = NULL;
-        ext2_dirent * de;
-        unsigned long npages = dir_pages(dir);
-        unsigned long n;
-        char *kaddr;
-        unsigned from, to;
-        int err;
-
-        /* We take care of directory expansion in the same loop */
-        for (n = 0; n <= npages; n++) {
-                page = ll_get_dir_page(dir, n);
-                err = PTR_ERR(page);
-                if (IS_ERR(page))
-                        goto out;
-                kaddr = page_address(page);
-                de = (ext2_dirent *)kaddr;
-                kaddr += PAGE_CACHE_SIZE - reclen;
-                while ((char *)de <= kaddr) {
-                        err = -EEXIST;
-                        if (ext2_match (namelen, name, de))
-                                goto out_page;
-                        name_len = EXT2_DIR_REC_LEN(de->name_len);
-                        rec_len = le16_to_cpu(de->rec_len);
-                        if ( n==npages && rec_len == 0) {
-                                CERROR("Fatal dir behaviour\n");
-                                goto out_page;
-                        }
-                        if (!de->inode && rec_len >= reclen)
-                                goto got_it;
-                        if (rec_len >= name_len + reclen)
-                                goto got_it;
-                        de = (ext2_dirent *) ((char *) de + rec_len);
-                }
-                ext2_put_page(page);
-        }
-        LBUG();
-        return -EINVAL;
-
-got_it:
-        from = (char*)de - (char*)page_address(page);
-        to = from + rec_len;
-        lock_page(page);
-        err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
-        if (err)
-                goto out_unlock;
-        if (de->inode) {
-                ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
-                de1->rec_len = cpu_to_le16(rec_len - name_len);
-                de->rec_len = cpu_to_le16(name_len);
-                de = de1;
-        }
-        de->name_len = namelen;
-        memcpy (de->name, name, namelen);
-        de->inode = cpu_to_le32(inode->i_ino);
-        ext2_set_de_type (de, inode);
-        CDEBUG(D_INODE, "type set to %o\n", de->file_type);
-        dir->i_mtime = dir->i_ctime = CURRENT_TIME;
-        err = ext2_commit_chunk(page, from, to);
-
-        // change_inode happens with the commit_chunk
-        /* XXX OFFSET_CACHE */
-
-out_unlock:
-        unlock_page(page);
-out_page:
-        ext2_put_page(page);
-out:
-        return err;
-}
-
-/*
- * ext2_delete_entry deletes a directory entry by merging it with the
- * previous entry. Page is up-to-date. Releases the page.
- */
-int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
-{
-        struct address_space *mapping = page->mapping;
-        struct inode *inode = mapping->host;
-        char *kaddr = page_address(page);
-        unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
-        unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len);
-        ext2_dirent * pde = NULL;
-        ext2_dirent * de = (ext2_dirent *) (kaddr + from);
-        int err;
-
-        while ((char*)de < (char*)dir) {
-                pde = de;
-                de = ext2_next_entry(de);
-        }
-        if (pde)
-                from = (char*)pde - (char*)page_address(page);
-        lock_page(page);
-        err = mapping->a_ops->prepare_write(NULL, page, from, to);
-        if (err)
-                LBUG();
-        if (pde)
-                pde->rec_len = cpu_to_le16(to-from);
-        dir->inode = 0;
-        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-        err = ext2_commit_chunk(page, from, to);
-        unlock_page(page);
-        ext2_put_page(page);
-        return err;
-}
-
-/*
- * Set the first fragment of directory.
- */
-int ext2_make_empty(struct inode *inode, struct inode *parent)
-{
-        struct address_space *mapping = inode->i_mapping;
-        struct page *page = grab_cache_page(mapping, 0);
-        unsigned chunk_size = ext2_chunk_size(inode);
-        struct ext2_dir_entry_2 * de;
-        char *base;
-        int err;
-        ENTRY;
-
-        if (!page)
-                return -ENOMEM;
-        base = kmap(page);
-        if (!base)
-                return -ENOMEM;
-
-        err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size);
-        if (err)
-                goto fail;
-
-        de = (struct ext2_dir_entry_2 *) base;
-        de->name_len = 1;
-        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
-        memcpy (de->name, ".\0\0", 4);
-        de->inode = cpu_to_le32(inode->i_ino);
-        ext2_set_de_type (de, inode);
-
-        de = (struct ext2_dir_entry_2 *) (base + EXT2_DIR_REC_LEN(1));
-        de->name_len = 2;
-        de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1));
-        de->inode = cpu_to_le32(parent->i_ino);
-        memcpy (de->name, "..\0", 4);
-        ext2_set_de_type (de, inode);
-
-        err = ext2_commit_chunk(page, 0, chunk_size);
-fail:
-        kunmap(page);
-        unlock_page(page);
-        page_cache_release(page);
-        ENTRY;
-        return err;
-}
-
-/*
- * routine to check that the specified directory is empty (for rmdir)
- */
-int ext2_empty_dir (struct inode * inode)
-{
-        struct page *page = NULL;
-        unsigned long i, npages = dir_pages(inode);
-
-        for (i = 0; i < npages; i++) {
-                char *kaddr;
-                ext2_dirent * de;
-                page = ll_get_dir_page(inode, i);
-
-                if (IS_ERR(page))
-                        continue;
-
-                kaddr = page_address(page);
-                de = (ext2_dirent *)kaddr;
-                kaddr += PAGE_CACHE_SIZE-EXT2_DIR_REC_LEN(1);
-
-                while ((char *)de <= kaddr) {
-                        if (de->inode != 0) {
-                                /* check for . and .. */
-                                if (de->name[0] != '.')
-                                        goto not_empty;
-                                if (de->name_len > 2)
-                                        goto not_empty;
-                                if (de->name_len < 2) {
-                                        if (de->inode !=
-                                            cpu_to_le32(inode->i_ino))
-                                                goto not_empty;
-                                } else if (de->name[1] != '.')
-                                        goto not_empty;
-                        }
-                        de = ext2_next_entry(de);
-                }
-                ext2_put_page(page);
-        }
-        return 1;
-
-not_empty:
-        ext2_put_page(page);
-        return 0;
-}
-
 static int ll_dir_ioctl(struct inode *inode, struct file *file,
                         unsigned int cmd, unsigned long arg)
 {
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         struct obd_ioctl_data *data;
         ENTRY;
+
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino,
                inode->i_generation, inode, cmd);
 
         if (_IOC_TYPE(cmd) == 'T') /* tty ioctls */
                 return -ENOTTY;
 
+        lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_IOCTL);
         switch(cmd) {
         case IOC_MDC_LOOKUP: {
                 struct ptlrpc_request *request = NULL;
@@ -834,9 +436,61 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 obd_ioctl_freedata(buf, len);
                 return rc;
         }
-        default:
-                CERROR("unrecognized ioctl %#x\n", cmd);
+        case LL_IOC_LOV_SETSTRIPE:
+        case LL_IOC_LOV_GETSTRIPE:
                 RETURN(-ENOTTY);
+        case IOC_MDC_GETSTRIPE: {
+                struct ptlrpc_request *request = NULL;
+                struct ll_fid fid;
+                struct mds_body *body;
+                struct lov_mds_md *lmm;
+                char *filename;
+                int rc, lmmsize;
+
+                filename = getname((const char *)arg);
+                if (IS_ERR(filename))
+                        RETURN(PTR_ERR(filename));
+
+                ll_inode2fid(&fid, inode);
+                rc = mdc_getattr_name(&sbi->ll_mdc_conn, &fid, filename,
+                                      strlen(filename)+1, OBD_MD_FLEASIZE,
+                                      obd_size_diskmd(&sbi->ll_osc_conn, NULL),
+                                      &request);
+                if (rc < 0) {
+                        CERROR("mdc_getattr_name: failed on %s: rc %d\n",
+                               filename, rc);
+                        GOTO(out_name, rc);
+                }
+
+                body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body));
+                LASSERT(body != NULL);         /* checked by mdc_getattr_name */
+                LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */
+
+                lmmsize = body->eadatasize;
+
+                if (!(body->valid & OBD_MD_FLEASIZE) || lmmsize == 0)
+                        GOTO(out_req, rc = -ENODATA);
+
+                if (lmmsize > 4096)
+                        GOTO(out_req, rc = -EFBIG);
+
+                lmm = lustre_msg_buf(request->rq_repmsg, 1, lmmsize);
+                LASSERT(lmm != NULL);
+                LASSERT_REPSWABBED(request, 1);
+
+                rc = copy_to_user((struct lov_mds_md *)arg, lmm, lmmsize);
+                if (rc)
+                        GOTO(out_req, rc = -EFAULT);
+
+                EXIT;
+        out_req:
+                ptlrpc_req_finished(request);
+        out_name:
+                putname(filename);
+                return rc;
+        }
+        default:
+                return obd_iocontrol(cmd,&sbi->ll_osc_conn,0,NULL,(void *)arg);
         }
 }
 
index 943ba1b..67d18fd 100644 (file)
@@ -32,8 +32,7 @@
 #include <linux/lustre_compat25.h>
 #endif
 
-int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
-extern int ll_setattr(struct dentry *de, struct iattr *attr);
+#include "llite_internal.h"
 
 static int ll_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode,
                         struct file *file)
@@ -135,28 +134,21 @@ int ll_file_release(struct inode *inode, struct file *file)
         lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_RELEASE);
         fd = (struct ll_file_data *)file->private_data;
         if (!fd) /* no process opened the file after an mcreate */
-                RETURN(rc = 0);
+                RETURN(0);
 
         /* we might not be able to get a valid handle on this file
          * again so we really want to flush our write cache.. */
-        if (S_ISREG(inode->i_mode)) {
-                filemap_fdatasync(inode->i_mapping);
-                filemap_fdatawait(inode->i_mapping);
-
-                if (lsm != NULL) {
-                        memset(&oa, 0, sizeof(oa));
-                        oa.o_id = lsm->lsm_object_id;
-                        oa.o_mode = S_IFREG;
-                        oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
-
-                        memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE);
-                        oa.o_valid |= OBD_MD_FLHANDLE;
+        if (S_ISREG(inode->i_mode) && lsm) {
+                write_inode_now(inode, 0);
+                obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+                                            OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+                memcpy(obdo_handle(&oa), &fd->fd_ost_och, FD_OSTDATA_SIZE);
+                oa.o_valid |= OBD_MD_FLHANDLE;
 
-                        rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
-                        if (rc)
-                                CERROR("inode %lu object close failed: rc = "
-                                       "%d\n", inode->i_ino, rc);
-                }
+                rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
+                if (rc)
+                        CERROR("inode %lu object close failed: rc %d\n",
+                               inode->i_ino, rc);
         }
 
         rc2 = ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
@@ -206,16 +198,16 @@ static int ll_osc_open(struct lustre_handle *conn, struct inode *inode,
                 RETURN(-ENOMEM);
         oa->o_id = lsm->lsm_object_id;
         oa->o_mode = S_IFREG;
-        oa->o_valid = (OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLBLOCKS |
-                       OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
         rc = obd_open(conn, oa, lsm, NULL, &fd->fd_ost_och);
         if (rc)
                 GOTO(out, rc);
 
         file->f_flags &= ~O_LOV_DELAY_CREATE;
-        obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
-                                 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-
+        obdo_refresh_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
+                                      OBD_MD_FLATIME | OBD_MD_FLMTIME |
+                                      OBD_MD_FLCTIME);
         EXIT;
 out:
         obdo_free(oa);
@@ -236,24 +228,33 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode,
         struct obdo *oa;
         struct iattr iattr;
         struct mdc_op_data op_data;
-        int rc, err, lmm_size = 0;;
+        struct obd_trans_info oti = { 0 };
+        int rc, err, lmm_size = 0;
         ENTRY;
 
         oa = obdo_alloc();
         if (!oa)
                 RETURN(-ENOMEM);
 
+        LASSERT(S_ISREG(inode->i_mode));
         oa->o_mode = S_IFREG | 0600;
         oa->o_id = inode->i_ino;
+        oa->o_generation = inode->i_generation;
         /* Keep these 0 for now, because chown/chgrp does not change the
          * ownership on the OST, and we don't want to allow BA OST NFS
          * users to access these objects by mistake. */
         oa->o_uid = 0;
         oa->o_gid = 0;
-        oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
-                OBD_MD_FLUID | OBD_MD_FLGID;
+        oa->o_valid = OBD_MD_FLID | OBD_MD_FLGENER | OBD_MD_FLTYPE |
+                OBD_MD_FLMODE | OBD_MD_FLUID | OBD_MD_FLGID;
+#ifdef ENABLE_ORPHANS
+        oa->o_valid |= OBD_MD_FLCOOKIE;
+#endif
 
-        rc = obd_create(conn, oa, &lsm, NULL);
+        obdo_from_inode(oa, inode, OBD_MD_FLTYPE|OBD_MD_FLATIME|OBD_MD_FLMTIME|
+                        OBD_MD_FLCTIME | (inode->i_size ? OBD_MD_FLSIZE : 0));
+
+        rc = obd_create(conn, oa, &lsm, &oti);
         if (rc) {
                 CERROR("error creating objects for inode %lu: rc = %d\n",
                        inode->i_ino, rc);
@@ -263,7 +264,7 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode,
                 }
                 GOTO(out_oa, rc);
         }
-        obdo_to_inode(inode, oa, OBD_MD_FLBLKSZ);
+        obdo_refresh_inode(inode, oa, OBD_MD_FLBLKSZ);
 
         LASSERT(lsm && lsm->lsm_object_id);
         rc = obd_packmd(conn, &lmm, lsm);
@@ -278,11 +279,18 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode,
 
         ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
 
-        rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, &op_data,
-                         &iattr, lmm, lmm_size, &req);
+#if 0
+#warning FIXME: next line is for debugging purposes only
+        obd_log_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, oti.oti_numcookies,
+                       oti.oti_logcookies, OBD_LLOG_FL_SENDNOW);
+#endif
+
+        rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, &op_data, &iattr,
+                         lmm, lmm_size, oti.oti_logcookies,
+                         oti.oti_numcookies * sizeof(oti.oti_onecookie), &req);
         ptlrpc_req_finished(req);
 
-        obd_free_diskmd (conn, &lmm);
+        obd_free_diskmd(conn, &lmm);
 
         /* If we couldn't complete mdc_open() and store the stripe MD on the
          * MDS, we need to destroy the objects now or they will be leaked.
@@ -297,13 +305,21 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode,
 
         EXIT;
 out_oa:
+        oti_free_cookies(&oti);
         obdo_free(oa);
         return rc;
 
 out_destroy:
-        obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
         oa->o_id = lsm->lsm_object_id;
-        oa->o_valid |= OBD_MD_FLID;
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
+#if 0
+        err = obd_log_cancel(conn, lsm, oti.oti_numcookies, oti.oti_logcookies,
+                             OBD_LLOG_FL_SENDNOW);
+        if (err)
+                CERROR("error cancelling inode %lu log cookies: rc %d\n",
+                       inode->i_ino, err);
+#endif
         err = obd_destroy(conn, oa, lsm, NULL);
         obd_free_memmd(conn, &lsm);
         if (err)
@@ -327,8 +343,6 @@ out_destroy:
  * before returning in the O_LOV_DELAY_CREATE case and dropping it here
  * or in ll_file_release(), but I'm not sure that is desirable/necessary.
  */
-extern int ll_it_open_error(int phase, struct lookup_intent *it);
-
 int ll_file_open(struct inode *inode, struct file *file)
 {
         struct ll_sb_info *sbi = ll_i2sbi(inode);
@@ -346,9 +360,10 @@ int ll_file_open(struct inode *inode, struct file *file)
         if (inode->i_sb->s_root == file->f_dentry)
                 RETURN(0);
 
+        it = file->f_it;
         lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
-        LL_GET_INTENT(file->f_dentry, it);
-        rc = ll_it_open_error(IT_OPEN_OPEN, it);
+
+        rc = ll_it_open_error(DISP_OPEN_OPEN, it);
         if (rc)
                 RETURN(rc);
 
@@ -363,7 +378,8 @@ int ll_file_open(struct inode *inode, struct file *file)
 
         lsm = lli->lli_smd;
         if (lsm == NULL) {
-                if (file->f_flags & O_LOV_DELAY_CREATE) {
+                if (file->f_flags & O_LOV_DELAY_CREATE ||
+                    !(file->f_mode & FMODE_WRITE)) {
                         CDEBUG(D_INODE, "delaying object creation\n");
                         RETURN(0);
                 }
@@ -418,7 +434,7 @@ int ll_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm,
                 OBD_MD_FLCTIME;
 
         if (ostdata != NULL) {
-                memcpy(&oa.o_inline, ostdata, FD_OSTDATA_SIZE);
+                memcpy(obdo_handle(&oa), ostdata, FD_OSTDATA_SIZE);
                 oa.o_valid |= OBD_MD_FLHANDLE;
         }
 
@@ -455,8 +471,8 @@ int ll_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm,
                  (aft != 0 || after < before) &&
                  oa.o_size < ((u64)before + 1) << PAGE_CACHE_SHIFT);
 
-        obdo_to_inode(inode, &oa, (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
-                                   OBD_MD_FLMTIME | OBD_MD_FLCTIME));
+        obdo_refresh_inode(inode, &oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
+                                       OBD_MD_FLMTIME | OBD_MD_FLCTIME);
         if (inode->i_blksize < PAGE_CACHE_SIZE)
                 inode->i_blksize = PAGE_CACHE_SIZE;
 
@@ -477,102 +493,6 @@ int ll_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm,
         RETURN(0);
 }
 
-/*
- * some callers, notably truncate, really don't want i_size set based
- * on the the size returned by the getattr, or lock acquisition in
- * the future.
- */
-int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
-                   struct lov_stripe_md *lsm,
-                   int mode, struct ldlm_extent *extent,
-                   struct lustre_handle *lockh)
-{
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        int rc, flags = 0;
-        ENTRY;
-
-        LASSERT(lockh->cookie == 0);
-
-        /* XXX phil: can we do this?  won't it screw the file size up? */
-        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
-            (sbi->ll_flags & LL_SBI_NOLCK))
-                RETURN(0);
-
-        CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n",
-               inode->i_ino, extent->start, extent->end);
-
-        rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, extent,
-                         sizeof(extent), mode, &flags, ll_extent_lock_callback,
-                         inode, lockh);
-
-        RETURN(rc);
-}
-
-/*
- * this grabs a lock and manually implements behaviour that makes it look like
- * the OST is returning the file size with each lock acquisition.
- */
-int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
-                   struct lov_stripe_md *lsm, int mode,
-                   struct ldlm_extent *extent, struct lustre_handle *lockh)
-{
-        struct ll_inode_info *lli = ll_i2info(inode);
-        struct ldlm_extent size_lock;
-        struct lustre_handle match_lockh = {0};
-        int flags, rc, matched;
-        ENTRY;
-
-        rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh);
-        if (rc != ELDLM_OK)
-                RETURN(rc);
-
-        if (test_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags))
-                RETURN(0);
-
-        rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL);
-        if (rc) {
-                ll_extent_unlock(fd, inode, lsm, mode, lockh);
-                RETURN(rc);
-        }
-
-        size_lock.start = inode->i_size;
-        size_lock.end = OBD_OBJECT_EOF;
-
-        /* XXX I bet we should be checking the lock ignore flags.. */
-        flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA;
-        matched = obd_match(&ll_i2sbi(inode)->ll_osc_conn, lsm, LDLM_EXTENT,
-                            &size_lock, sizeof(size_lock), LCK_PR, &flags,
-                            inode, &match_lockh);
-
-        /* hey, alright, we hold a size lock that covers the size we
-         * just found, its not going to change for a while.. */
-        if (matched == 1) {
-                set_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags);
-                obd_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, LCK_PR,
-                           &match_lockh);
-        }
-
-        RETURN(0);
-}
-
-int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
-                struct lov_stripe_md *lsm, int mode,
-                struct lustre_handle *lockh)
-{
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        int rc;
-        ENTRY;
-
-        /* XXX phil: can we do this?  won't it screw the file size up? */
-        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
-            (sbi->ll_flags & LL_SBI_NOLCK))
-                RETURN(0);
-
-        rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh);
-
-        RETURN(rc);
-}
-
 static inline void ll_remove_suid(struct inode *inode)
 {
         unsigned int mode;
@@ -591,22 +511,10 @@ static inline void ll_remove_suid(struct inode *inode)
 #if 0
 static void ll_update_atime(struct inode *inode)
 {
-#ifdef USE_ATIME
-        struct iattr attr;
-
-        attr.ia_atime = LTIME_S(CURRENT_TIME);
-        attr.ia_valid = ATTR_ATIME;
-
-        if (inode->i_atime == attr.ia_atime) return;
         if (IS_RDONLY(inode)) return;
-        if (IS_NOATIME(inode)) return;
 
-        /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */
-        ll_inode_setattr(inode, &attr, 0);
-#else
         /* update atime, but don't explicitly write it out just this change */
         inode->i_atime = CURRENT_TIME;
-#endif
 }
 #endif
 
@@ -676,19 +584,19 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
 
         /* start writeback on dirty pages in the extent when its PW */
         for (i = start, j = start % count;
-                        lock->l_granted_mode == LCK_PW && i < end; j++, i++) {
+             lock->l_granted_mode == LCK_PW && i < end; j++, i++) {
                 if (j == count) {
                         i += skip;
                         j = 0;
                 }
                 /* its unlikely, but give us a chance to bail when we're out */
-                PGCACHE_WRLOCK(inode->i_mapping);
+                ll_pgcache_lock(inode->i_mapping);
                 if (list_empty(&inode->i_mapping->dirty_pages)) {
                         CDEBUG(D_INODE, "dirty list empty\n");
-                        PGCACHE_WRUNLOCK(inode->i_mapping);
+                        ll_pgcache_unlock(inode->i_mapping);
                         break;
                 }
-                PGCACHE_WRUNLOCK(inode->i_mapping);
+                ll_pgcache_unlock(inode->i_mapping);
 
                 if (need_resched())
                         schedule();
@@ -702,10 +610,10 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
                 }
                 if (PageDirty(page)) {
                         CDEBUG(D_INODE, "writing page %p\n", page);
-                        PGCACHE_WRLOCK(inode->i_mapping);
+                        ll_pgcache_lock(inode->i_mapping);
                         list_del(&page->list);
                         list_add(&page->list, &inode->i_mapping->locked_pages);
-                        PGCACHE_WRUNLOCK(inode->i_mapping);
+                        ll_pgcache_unlock(inode->i_mapping);
 
                         /* this writepage might write out pages outside
                          * this extent, but that's ok, the pages are only
@@ -730,19 +638,19 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
         LASSERT((extent->start & ~PAGE_CACHE_MASK) == 0);
         LASSERT(((extent->end+1) & ~PAGE_CACHE_MASK) == 0);
         for (i = start, j = start % count ; i < end ; j++, i++) {
-                if ( j == count ) {
+                if (j == count) {
                         i += skip;
                         j = 0;
                 }
-                PGCACHE_WRLOCK(inode->i_mapping);
+                ll_pgcache_lock(inode->i_mapping);
                 if (list_empty(&inode->i_mapping->dirty_pages) &&
                      list_empty(&inode->i_mapping->clean_pages) &&
                      list_empty(&inode->i_mapping->locked_pages)) {
                         CDEBUG(D_INODE, "nothing left\n");
-                        PGCACHE_WRUNLOCK(inode->i_mapping);
+                        ll_pgcache_unlock(inode->i_mapping);
                         break;
                 }
-                PGCACHE_WRUNLOCK(inode->i_mapping);
+                ll_pgcache_unlock(inode->i_mapping);
                 if (need_resched())
                         schedule();
                 page = find_get_page(inode->i_mapping, i);
@@ -755,15 +663,16 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
                         truncate_complete_page(page);
 #else
                         truncate_complete_page(page->mapping, page);
-#endif                
+#endif
                 unlock_page(page);
                 page_cache_release(page);
         }
         EXIT;
 }
 
-int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
-                            void *data, int flag)
+static int ll_extent_lock_callback(struct ldlm_lock *lock,
+                                   struct ldlm_lock_desc *new, void *data,
+                                   int flag)
 {
         struct inode *inode = data;
         struct ll_inode_info *lli = ll_i2info(inode);
@@ -771,7 +680,10 @@ int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
         int rc;
         ENTRY;
 
-        LASSERT(inode != NULL);
+        if ((unsigned long)inode < 0x1000) {
+                LDLM_ERROR(lock, "cancelling lock with bad data %p", data);
+                LBUG();
+        }
 
         switch (flag) {
         case LDLM_CB_BLOCKING:
@@ -785,9 +697,15 @@ int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
                  * could know to write-back or simply throw away the pages
                  * based on if the cancel comes from a desire to, say,
                  * read or truncate.. */
-                LASSERT((unsigned long)inode > 0x1000);
-                LASSERT((unsigned long)lli > 0x1000);
-                LASSERT((unsigned long)lli->lli_smd > 0x1000);
+                if ((unsigned long)lli->lli_smd < 0x1000) {
+                        /* note that lli is part of the inode itself, so it
+                         * is valid if as checked the inode pointer above. */
+                        CERROR("inode %lu, sb %p, lli %p, lli_smd %p\n",
+                               inode->i_ino, inode->i_sb, lli, lli->lli_smd);
+                        LDLM_ERROR(lock, "cancel lock on bad inode %p", inode);
+                        LBUG();
+                }
+
                 ll_pgcache_remove_extent(inode, lli->lli_smd, lock);
                 break;
         default:
@@ -797,6 +715,102 @@ int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
         RETURN(0);
 }
 
+/*
+ * some callers, notably truncate, really don't want i_size set based
+ * on the the size returned by the getattr, or lock acquisition in
+ * the future.
+ */
+int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
+                   struct lov_stripe_md *lsm,
+                   int mode, struct ldlm_extent *extent,
+                   struct lustre_handle *lockh)
+{
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        int rc, flags = 0;
+        ENTRY;
+
+        LASSERT(lockh->cookie == 0);
+
+        /* XXX phil: can we do this?  won't it screw the file size up? */
+        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
+            (sbi->ll_flags & LL_SBI_NOLCK))
+                RETURN(0);
+
+        CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n",
+               inode->i_ino, extent->start, extent->end);
+
+        rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, extent,
+                         sizeof(extent), mode, &flags, ll_extent_lock_callback,
+                         inode, lockh);
+
+        RETURN(rc);
+}
+
+/*
+ * this grabs a lock and manually implements behaviour that makes it look like
+ * the OST is returning the file size with each lock acquisition.
+ */
+int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
+                   struct lov_stripe_md *lsm, int mode,
+                   struct ldlm_extent *extent, struct lustre_handle *lockh)
+{
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct ldlm_extent size_lock;
+        struct lustre_handle match_lockh = {0};
+        int flags, rc, matched;
+        ENTRY;
+
+        rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh);
+        if (rc != ELDLM_OK)
+                RETURN(rc);
+
+        if (test_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags))
+                RETURN(0);
+
+        rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL);
+        if (rc) {
+                ll_extent_unlock(fd, inode, lsm, mode, lockh);
+                RETURN(rc);
+        }
+
+        size_lock.start = inode->i_size;
+        size_lock.end = OBD_OBJECT_EOF;
+
+        /* XXX I bet we should be checking the lock ignore flags.. */
+        flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA;
+        matched = obd_match(&ll_i2sbi(inode)->ll_osc_conn, lsm, LDLM_EXTENT,
+                            &size_lock, sizeof(size_lock), LCK_PR, &flags,
+                            inode, &match_lockh);
+
+        /* hey, alright, we hold a size lock that covers the size we
+         * just found, its not going to change for a while.. */
+        if (matched == 1) {
+                set_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags);
+                obd_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, LCK_PR,
+                           &match_lockh);
+        }
+
+        RETURN(0);
+}
+
+int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
+                struct lov_stripe_md *lsm, int mode,
+                struct lustre_handle *lockh)
+{
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        int rc;
+        ENTRY;
+
+        /* XXX phil: can we do this?  won't it screw the file size up? */
+        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
+            (sbi->ll_flags & LL_SBI_NOLCK))
+                RETURN(0);
+
+        rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh);
+
+        RETURN(rc);
+}
+
 static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
                             loff_t *ppos)
 {
@@ -819,6 +833,10 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
 
         lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_READ_BYTES,
                             count);
+
+        if (!lsm)
+                RETURN(0);
+
         /* grab a -> eof extent to push extending writes out of node's caches
          * so we can see them at the getattr after lock acquisition.  this will
          * turn into a seperate [*ppos + count, EOF] 'size intent' lock attempt
@@ -852,8 +870,8 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
 /*
  * Write to a file (through the page cache).
  */
-static ssize_t
-ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
+static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
+                             loff_t *ppos)
 {
         struct ll_file_data *fd = file->private_data;
         struct inode *inode = file->f_dentry->d_inode;
@@ -868,6 +886,7 @@ ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
                inode->i_ino, inode->i_generation, inode, count, *ppos);
 
+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
         /*
          * sleep doing some writeback work of this mount's dirty data
          * if the VM thinks we're low on memory.. other dirtying code
@@ -875,12 +894,14 @@ ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
          * careful not to hold locked pages while they do so.  like
          * ll_prepare_write.  *cough*
          */
-        LL_CHECK_DIRTY(inode->i_sb);
+        ll_check_dirty(inode->i_sb);
 
         /* POSIX, but surprised the VFS doesn't check this already */
         if (count == 0)
                 RETURN(0);
 
+        LASSERT(lsm);
+
         if (file->f_flags & O_APPEND) {
                 extent.start = 0;
                 extent.end = OBD_OBJECT_EOF;
@@ -943,7 +964,8 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file,
         lsm = lli->lli_smd;
         if (lsm) {
                 up(&lli->lli_open_sem);
-                CERROR("stripe already exists for ino %lu\n", inode->i_ino);
+                CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
+                       inode->i_ino);
                 /* If we haven't already done the open, do so now */
                 if (file->f_flags & O_LOV_DELAY_CREATE) {
                         int rc2 = ll_osc_open(conn, inode, file, lsm);
@@ -987,6 +1009,7 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
         struct ll_file_data *fd = file->private_data;
         struct lustre_handle *conn;
         int flags;
+
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino,
                inode->i_generation, inode, cmd);
 
@@ -1077,8 +1100,8 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
 
 int ll_fsync(struct file *file, struct dentry *dentry, int data)
 {
-        int ret;
         struct inode *inode = dentry->d_inode;
+        int rc;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
                inode->i_generation, inode);
@@ -1090,17 +1113,17 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data)
          * still holding the PW lock that covered the dirty pages.  XXX we
          * should probably get a reference on it, though, just to be clear.
          */
-        ret = filemap_fdatasync(dentry->d_inode->i_mapping);
-        if ( ret == 0 )
-                ret = filemap_fdatawait(dentry->d_inode->i_mapping);
+        rc = filemap_fdatasync(inode->i_mapping);
+        if (rc == 0)
+                rc = filemap_fdatawait(inode->i_mapping);
 
-        RETURN(ret);
+        RETURN(rc);
 }
 
-int ll_inode_revalidate(struct dentry *dentry)
+int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
 {
         struct inode *inode = dentry->d_inode;
-        struct lov_stripe_md *lsm = NULL;
+        struct lov_stripe_md *lsm;
         ENTRY;
 
         if (!inode) {
@@ -1118,70 +1141,41 @@ int ll_inode_revalidate(struct dentry *dentry)
            below when the lock is marked CB_PENDING.  That RPC may not
            go out because someone else may be in another RPC waiting for
            that lock*/
-        if (!(dentry->d_it && dentry->d_it->it_lock_mode) &&
-            !ll_have_md_lock(dentry)) {
+        if (!(it && it->it_lock_mode) && !ll_have_md_lock(dentry)) {
+                struct lustre_md md;
                 struct ptlrpc_request *req = NULL;
                 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
                 struct ll_fid fid;
-                struct mds_body *body;
-                struct lov_mds_md *lmm;
                 unsigned long valid = 0;
-                int eadatalen = 0, rc;
+                int rc;
+                int ealen = 0;
 
-                /* Why don't we update all valid MDS fields here, if we're
-                 * doing an RPC anyways?  -phil */
                 if (S_ISREG(inode->i_mode)) {
-                        eadatalen = obd_size_diskmd(&sbi->ll_osc_conn, NULL);
+                        ealen = obd_size_diskmd(&sbi->ll_osc_conn, NULL);
                         valid |= OBD_MD_FLEASIZE;
                 }
                 ll_inode2fid(&fid, inode);
-                rc = mdc_getattr(&sbi->ll_mdc_conn, &fid,
-                                 valid, eadatalen, &req);
+                rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, valid, ealen, &req);
                 if (rc) {
                         CERROR("failure %d inode %lu\n", rc, inode->i_ino);
                         RETURN(-abs(rc));
                 }
-
-                body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
-                LASSERT (body != NULL);         /* checked by mdc_getattr() */
-                LASSERT_REPSWABBED (req, 0);    /* swabbed by mdc_getattr() */
-
-                if (S_ISREG(inode->i_mode) &&
-                    (body->valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))) {
-                        CERROR("MDS sent back size for regular file\n");
-                        body->valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
-                }
+                rc = mdc_req2lustre_md(req, 0, &sbi->ll_osc_conn, &md);
 
                 /* XXX Too paranoid? */
-                if ((body->valid ^ valid) & OBD_MD_FLEASIZE)
+                if ((md.body->valid ^ valid) & OBD_MD_FLEASIZE)
                         CERROR("Asked for %s eadata but got %s\n",
                                (valid & OBD_MD_FLEASIZE) ? "some" : "no",
-                               (body->valid & OBD_MD_FLEASIZE) ? "some":"none");
-
-                if (S_ISREG(inode->i_mode) &&
-                    (body->valid & OBD_MD_FLEASIZE)) {
-                        if (body->eadatasize == 0) { /* no EA data */
-                                CERROR("OBD_MD_FLEASIZE set but no data\n");
-                                RETURN(-EPROTO);
-                        }
-                        /* Only bother with this if inode's lsm not set? */
-                        lmm = lustre_msg_buf(req->rq_repmsg,1,body->eadatasize);
-                        LASSERT(lmm != NULL);       /* mdc_getattr() checked */
-                        LASSERT_REPSWABBED(req, 1); /* mdc_getattr() swabbed */
-
-                        rc = obd_unpackmd (&sbi->ll_osc_conn,
-                                           &lsm, lmm, body->eadatasize);
-                        if (rc < 0) {
-                                CERROR("Error %d unpacking eadata\n", rc);
-                                ptlrpc_req_finished(req);
-                                RETURN(rc);
-                        }
-                        LASSERT(rc >= sizeof(*lsm));
+                               (md.body->valid & OBD_MD_FLEASIZE) ? "some":
+                               "none");
+                if (rc) {
+                        ptlrpc_req_finished(req);
+                        RETURN(rc);
                 }
 
-                ll_update_inode(inode, body, lsm);
-                if (lsm != NULL && ll_i2info(inode)->lli_smd != lsm)
-                        obd_free_memmd(&sbi->ll_osc_conn, &lsm);
+                ll_update_inode(inode, md.body, md.lsm);
+                if (md.lsm != NULL && ll_i2info(inode)->lli_smd != md.lsm)
+                        obd_free_memmd(&sbi->ll_osc_conn, &md.lsm);
 
                 ptlrpc_req_finished(req);
         }
@@ -1211,19 +1205,20 @@ int ll_inode_revalidate(struct dentry *dentry)
 }
 
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-static int ll_getattr(struct vfsmount *mnt, struct dentry *de,
+int ll_getattr(struct vfsmount *mnt, struct dentry *de,
+                      struct lookup_intent *it, 
                       struct kstat *stat)
 {
         int res = 0;
         struct inode *inode = de->d_inode;
 
+        res = ll_inode_revalidate_it(de, it);
         lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_GETATTR);
-        res = ll_inode_revalidate(de);
+
         if (res)
                 return res;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        stat->dev = inode->i_dev;
-#endif
+
+        stat->dev = inode->i_sb->s_dev;
         stat->ino = inode->i_ino;
         stat->mode = inode->i_mode;
         stat->nlink = inode->i_nlink;
@@ -1234,6 +1229,8 @@ static int ll_getattr(struct vfsmount *mnt, struct dentry *de,
         stat->mtime = inode->i_mtime;
         stat->ctime = inode->i_ctime;
         stat->size = inode->i_size;
+        stat->blksize = inode->i_blksize;
+        stat->blocks = inode->i_blocks;
         return 0;
 }
 #endif
@@ -1254,9 +1251,9 @@ struct inode_operations ll_file_inode_operations = {
         setattr:    ll_setattr,
         truncate:   ll_truncate,
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-        getattr: ll_getattr,
+        getattr_it: ll_getattr,
 #else
-        revalidate: ll_inode_revalidate,
+        revalidate_it: ll_inode_revalidate_it,
 #endif
 };
 
@@ -1264,8 +1261,8 @@ struct inode_operations ll_special_inode_operations = {
         setattr_raw:    ll_setattr_raw,
         setattr:    ll_setattr,
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-        getattr:    ll_getattr,
+        getattr_it:    ll_getattr,
 #else
-        revalidate: ll_inode_revalidate,
+        revalidate_it: ll_inode_revalidate_it,
 #endif
 };
index e3fabe6..c30ef8a 100644 (file)
@@ -38,7 +38,6 @@
 #include <linux/rbtree.h>
 #include <linux/seq_file.h>
 #include <linux/time.h>
-#include "llite_internal.h"
 
 /* PG_inactive_clean is shorthand for rmap, we want free_high/low here.. */
 #ifdef PG_inactive_clean
@@ -47,6 +46,7 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 #include <linux/lustre_lite.h>
+#include "llite_internal.h"
 
 #ifndef list_for_each_prev_safe
 #define list_for_each_prev_safe(pos, n, head) \
 
 extern spinlock_t inode_lock;
 
-struct ll_writeback_pages {
-        obd_count npgs, max;
-        struct brw_page *pga;
-};
-
 /*
  * check to see if we're racing with truncate and put the page in
  * the brw_page array.  returns 0 if there is more room and 1
@@ -139,13 +134,13 @@ static void ll_get_dirty_pages(struct inode *inode,
                 list_del(&page->list);
                 list_add(&page->list, &mapping->locked_pages);
 
-                if ( ! PageDirty(page) ) {
+                if (!PageDirty(page)) {
                         unlock_page(page);
                         continue;
                 }
                 ClearPageDirty(page);
 
-                if ( llwp_consume_page(llwp, inode, page) != 0)
+                if (llwp_consume_page(llwp, inode, page) != 0)
                         break;
         }
 
@@ -153,26 +148,31 @@ static void ll_get_dirty_pages(struct inode *inode,
         EXIT;
 }
 
-static void ll_writeback(struct inode *inode, struct ll_writeback_pages *llwp)
+static void ll_writeback(struct inode *inode, struct obdo *oa,
+                         struct ll_writeback_pages *llwp)
 {
-        int rc, i;
         struct ptlrpc_request_set *set;
+        int rc, i;
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),bytes=%u\n",
                inode->i_ino, inode->i_generation, inode,
                ((llwp->npgs-1) << PAGE_SHIFT) + llwp->pga[llwp->npgs-1].count);
 
+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
         set = ptlrpc_prep_set();
         if (set == NULL) {
                 CERROR ("Can't create request set\n");
                 rc = -ENOMEM;
         } else {
-                rc = obd_brw_async(OBD_BRW_WRITE, ll_i2obdconn(inode),
+                rc = obd_brw_async(OBD_BRW_WRITE, ll_i2obdconn(inode), oa,
                                    ll_i2info(inode)->lli_smd, llwp->npgs,
                                    llwp->pga, set, NULL);
                 if (rc == 0)
-                        rc = ptlrpc_set_wait (set);
+                        rc = ptlrpc_set_wait(set);
+                if (rc == 0)
+                        obdo_refresh_inode(inode, oa,
+                                           oa->o_valid & ~OBD_MD_FLSIZE);
                 ptlrpc_set_destroy (set);
         }
         /*
@@ -278,6 +278,7 @@ int ll_check_dirty(struct super_block *sb)
         unsigned long old_flags; /* hack? */
         int making_progress;
         struct inode *inode;
+        struct obdo oa;
         int rc = 0;
         ENTRY;
 
@@ -328,12 +329,18 @@ int ll_check_dirty(struct super_block *sb)
                         llwp.npgs = 0;
                         ll_get_dirty_pages(inode, &llwp);
                         if (llwp.npgs) {
-                               lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
-                                                   LPROC_LL_WB_PRESSURE,
-                                                   llwp.npgs);
-                               ll_writeback(inode, &llwp);
-                               rc += llwp.npgs;
-                               making_progress = 1;
+                                oa.o_id =
+                                      ll_i2info(inode)->lli_smd->lsm_object_id;
+                                oa.o_valid = OBD_MD_FLID;
+                                obdo_from_inode(&oa, inode,
+                                                OBD_MD_FLTYPE | OBD_MD_FLATIME|
+                                                OBD_MD_FLMTIME| OBD_MD_FLCTIME);
+                                lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
+                                                    LPROC_LL_WB_PRESSURE,
+                                                    llwp.npgs);
+                                ll_writeback(inode, &oa, &llwp);
+                                rc += llwp.npgs;
+                                making_progress = 1;
                         }
                 } while (llwp.npgs && should_writeback());
 
@@ -382,13 +389,14 @@ cleanup:
 }
 #endif /* linux 2.5 */
 
-int ll_batch_writepage(struct inode *inode, struct page *page)
+int ll_batch_writepage(struct inode *inode, struct obdo *oa, struct page *page)
 {
         unsigned long old_flags; /* hack? */
         struct ll_writeback_pages llwp;
         int rc = 0;
         ENTRY;
 
+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
         old_flags = current->flags;
         current->flags |= PF_MEMALLOC;
         rc = ll_alloc_brw(inode, &llwp);
@@ -401,7 +409,7 @@ int ll_batch_writepage(struct inode *inode, struct page *page)
         if (llwp.npgs) {
                 lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                     LPROC_LL_WB_WRITEPAGE, llwp.npgs);
-                ll_writeback(inode, &llwp);
+                ll_writeback(inode, oa, &llwp);
         }
         kfree(llwp.pga);
 
index 4684383..fd37709 100644 (file)
 #ifndef LLITE_INTERNAL_H
 #define LLITE_INTERNAL_H
 
+
+struct ll_sb_info;
 struct lustre_handle;
 struct lov_stripe_md;
 
+extern void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
+extern struct proc_dir_entry *proc_lustre_fs_root;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# define hlist_del_init list_del_init
+#endif 
+
+static inline struct inode *ll_info2i(struct ll_inode_info *lli)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+        return &lli->lli_vfs_inode;
+#else
+        return list_entry(lli, struct inode, u.generic_ip);
+#endif
+}
+
+/* llite/commit_callback.c */
+int ll_commitcbd_setup(struct ll_sb_info *);
+int ll_commitcbd_cleanup(struct ll_sb_info *);
+
+/* lproc_llite.c */
+int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
+                                struct super_block *sb, char *osc, char *mdc);
+void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
+
+/* llite/namei.c */
+struct inode *ll_iget(struct super_block *sb, ino_t hash,
+                      struct lustre_md *lic);
+struct dentry *ll_find_alias(struct inode *, struct dentry *);
+int ll_it_open_error(int phase, struct lookup_intent *it);
 int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
                          int flags, void *opaque);
+
+/* llite/rw.c */
+void ll_end_writeback(struct inode *, struct page *);
+
+void ll_remove_dirty(struct inode *inode, unsigned long start,
+                     unsigned long end);
 int ll_rd_dirty_pages(char *page, char **start, off_t off, int count,
                       int *eof, void *data);
 int ll_rd_max_dirty_pages(char *page, char **start, off_t off, int count,
@@ -26,4 +64,96 @@ int ll_clear_dirty_pages(struct lustre_handle *conn, struct lov_stripe_md *lsm,
 int ll_mark_dirty_page(struct lustre_handle *conn, struct lov_stripe_md *lsm,
                        unsigned long index);
 
+/* llite/file.c */
+extern int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *);
+
+/* llite/super.c */
+int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
+int ll_setattr(struct dentry *de, struct iattr *attr);
+
+/* iod.c */
+#define IO_STAT_ADD(FIS, STAT, VAL) do {        \
+        struct file_io_stats *_fis_ = (FIS);    \
+        spin_lock(&_fis_->fis_lock);            \
+        _fis_->fis_##STAT += VAL;               \
+        spin_unlock(&_fis_->fis_lock);          \
+} while (0)
+
+#define INODE_IO_STAT_ADD(INODE, STAT, VAL)        \
+        IO_STAT_ADD(&ll_i2sbi(INODE)->ll_iostats, STAT, VAL)
+
+#define PAGE_IO_STAT_ADD(PAGE, STAT, VAL)               \
+        INODE_IO_STAT_ADD((PAGE)->mapping, STAT, VAL)
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+/* XXX lliod needs more work in 2.5 before being proven and brought back
+ * to 2.4, it'll at least require a patch to introduce page->private */
+int lliod_start(struct ll_sb_info *sbi, struct inode *inode);
+void lliod_stop(struct ll_sb_info *sbi);
+#else
+#define lliod_start(sbi, inode) ({int _ret = 0; (void)sbi, (void)inode; _ret;})
+#define lliod_stop(sbi) do { (void)sbi; } while (0)
+#endif
+void lliod_wakeup(struct inode *inode);
+void lliod_give_plist(struct inode *inode, struct plist *plist, int rw);
+void lliod_give_page(struct inode *inode, struct page *page, int rw);
+void plist_init(struct plist *plist); /* for lli initialization.. */
+
+void ll_lldo_init(struct ll_dirty_offsets *lldo);
+void ll_record_dirty(struct inode *inode, unsigned long offset);
+void ll_remove_dirty(struct inode *inode, unsigned long start,
+                     unsigned long end);
+int ll_find_dirty(struct ll_dirty_offsets *lldo, unsigned long *start,
+                  unsigned long *end);
+int ll_farthest_dirty(struct ll_dirty_offsets *lldo, unsigned long *farthest);
+
+
+/* llite/super25.c */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+int ll_getattr(struct vfsmount *mnt, struct dentry *de,
+               struct lookup_intent *it, 
+               struct kstat *stat);
+#endif
+
+
+/* llite/dcache.c */
+void ll_intent_release(struct lookup_intent *);
+extern void ll_set_dd(struct dentry *de);
+void ll_unhash_aliases(struct inode *);
+
+/* llite/rw.c */
+void ll_truncate(struct inode *inode);
+void ll_end_writeback(struct inode *inode, struct page *page);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+int ll_check_dirty(struct super_block *sb);
+int ll_batch_writepage(struct inode *inode, struct obdo *oa, struct page *page);
+#else
+#define ll_check_dirty(SB) do { (void)SB; } while (0)
+#endif
+
+/* llite/llite_lib.c */
+
+extern struct super_operations ll_super_operations;
+
+char *ll_read_opt(const char *opt, char *data);
+int ll_set_opt(const char *opt, char *data, int fl);
+void ll_options(char *options, char **ost, char **mds, int *flags);
+void ll_lli_init(struct ll_inode_info *lli);
+int ll_fill_super(struct super_block *sb, void *data, int silent);
+void ll_put_super(struct super_block *sb);
+void ll_clear_inode(struct inode *inode);
+int ll_attr2inode(struct inode *inode, struct iattr *attr, int trunc);
+int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
+int ll_setattr_raw(struct inode *inode, struct iattr *attr);
+int ll_setattr(struct dentry *de, struct iattr *attr);
+int ll_statfs(struct super_block *sb, struct kstatfs *sfs);
+void ll_update_inode(struct inode *inode, struct mds_body *body,
+                     struct lov_stripe_md *lsm);
+int it_disposition(struct lookup_intent *it, int flag);
+void it_set_disposition(struct lookup_intent *it, int flag);
+void ll_read_inode2(struct inode *inode, void *opaque);
+void ll_umount_begin(struct super_block *sb);
+
+
+
 #endif /* LLITE_INTERNAL_H */
index 42fea4b..8908d44 100644 (file)
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
 #include <linux/lustre_lite.h>
 #include <linux/lprocfs_status.h>
 
 #include "llite_internal.h"
 
 /* /proc/lustre/llite mount point registration */
+struct proc_dir_entry *proc_lustre_fs_root;
 
 #ifndef LPROCFS
 int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
@@ -41,36 +39,113 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
 void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi){}
 #else
 
-#define LPROC_LLITE_STAT_FCT(fct_name, get_statfs_fct)                    \
-int fct_name(char *page, char **start, off_t off,                         \
-             int count, int *eof, void *data)                             \
-{                                                                         \
-        struct statfs sfs;                                                \
-        int rc;                                                           \
-        LASSERT(data != NULL);                                            \
-        rc = get_statfs_fct((struct super_block*)data, &sfs);             \
-        return (rc==0                                                     \
-                ? lprocfs_##fct_name (page, start, off, count, eof, &sfs) \
-                : rc);                                                    \
+long long mnt_instance;
+
+static int ll_rd_blksize(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
+{
+        struct super_block *sb = (struct super_block *)data;
+        struct obd_statfs osfs;
+        int rc;
+
+        LASSERT(sb != NULL);
+        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        if (!rc) {
+              *eof = 1;
+              rc = snprintf(page, count, "%u\n", osfs.os_bsize);
+        }
+
+        return rc;
 }
 
-long long mnt_instance;
+static int ll_rd_kbytestotal(char *page, char **start, off_t off, int count,
+                             int *eof, void *data)
+{
+        struct super_block *sb = (struct super_block *)data;
+        struct obd_statfs osfs;
+        int rc;
+
+        LASSERT(sb != NULL);
+        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        if (!rc) {
+                __u32 blk_size = osfs.os_bsize >> 10;
+                __u64 result = osfs.os_blocks;
+
+                while (blk_size >>= 1)
+                        result <<= 1;
+
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", result);
+        }
+        return rc;
+
+}
+
+static int ll_rd_kbytesfree(char *page, char **start, off_t off, int count,
+                            int *eof, void *data)
+{
+        struct super_block *sb = (struct super_block *)data;
+        struct obd_statfs osfs;
+        int rc;
 
-LPROC_LLITE_STAT_FCT(rd_blksize,     vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_kbytestotal, vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_kbytesfree,  vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_filestotal,  vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_filesfree,   vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_filegroups,  vfs_statfs);
+        LASSERT(sb != NULL);
+        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        if (!rc) {
+                __u32 blk_size = osfs.os_bsize >> 10;
+                __u64 result = osfs.os_bfree;
+
+                while (blk_size >>= 1)
+                        result <<= 1;
+
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", result);
+        }
+        return rc;
+}
+
+static int ll_rd_filestotal(char *page, char **start, off_t off, int count,
+                            int *eof, void *data)
+{
+        struct super_block *sb = (struct super_block *)data;
+        struct obd_statfs osfs;
+        int rc;
+
+        LASSERT(sb != NULL);
+        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        if (!rc) {
+                 *eof = 1;
+                 rc = snprintf(page, count, LPU64"\n", osfs.os_files);
+        }
+        return rc;
+}
 
-int rd_path(char *page, char **start, off_t off, int count, int *eof,
-            void *data)
+static int ll_rd_filesfree(char *page, char **start, off_t off, int count,
+                           int *eof, void *data)
+{
+        struct super_block *sb = (struct super_block *)data;
+        struct obd_statfs osfs;
+        int rc;
+
+        LASSERT(sb != NULL);
+        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        if (!rc) {
+                 *eof = 1;
+                 rc = snprintf(page, count, LPU64"\n", osfs.os_ffree);
+        }
+        return rc;
+
+}
+
+#if 0
+static int ll_rd_path(char *page, char **start, off_t off, int count, int *eof,
+                      void *data)
 {
         return 0;
 }
+#endif
 
-int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
-              void *data)
+static int ll_rd_fstype(char *page, char **start, off_t off, int count,
+                        int *eof, void *data)
 {
         struct super_block *sb = (struct super_block*)data;
 
@@ -79,8 +154,8 @@ int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, "%s\n", sb->s_type->name);
 }
 
-int rd_sb_uuid(char *page, char **start, off_t off, int count, int *eof,
-               void *data)
+static int ll_rd_sb_uuid(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
 {
         struct super_block *sb = (struct super_block *)data;
 
@@ -89,18 +164,20 @@ int rd_sb_uuid(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, "%s\n", ll_s2sbi(sb)->ll_sb_uuid.uuid);
 }
 
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",        rd_sb_uuid,     0, 0 },
-        { "mntpt_path",  rd_path,        0, 0 },
-        { "fstype",      rd_fstype,      0, 0 },
-        { "blocksize",   rd_blksize,     0, 0 },
-        { "kbytestotal", rd_kbytestotal, 0, 0 },
-        { "kbytesfree",  rd_kbytesfree,  0, 0 },
-        { "filestotal",  rd_filestotal,  0, 0 },
-        { "filesfree",   rd_filesfree,   0, 0 },
-        { "filegroups",  rd_filegroups,  0, 0 },
-        { "dirty_pages", ll_rd_dirty_pages, 0, 0},
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",         ll_rd_sb_uuid,          0, 0 },
+        //{ "mntpt_path",   ll_rd_path,             0, 0 },
+        { "fstype",       ll_rd_fstype,           0, 0 },
+        { "blocksize",    ll_rd_blksize,          0, 0 },
+        { "kbytestotal",  ll_rd_kbytestotal,      0, 0 },
+        { "kbytesfree",   ll_rd_kbytesfree,       0, 0 },
+        { "filestotal",   ll_rd_filestotal,       0, 0 },
+        { "filesfree",    ll_rd_filesfree,        0, 0 },
+        //{ "filegroups",   lprocfs_rd_filegroups,  0, 0 },
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        { "dirty_pages",  ll_rd_dirty_pages,      0, 0},
         { "max_dirty_pages", ll_rd_max_dirty_pages, ll_wr_max_dirty_pages, 0},
+#endif
         { 0 }
 };
 
index da6e670..b9223e8 100644 (file)
 #include <linux/obd_support.h>
 #include <linux/lustre_lite.h>
 #include <linux/lustre_dlm.h>
-
-/* from dcache.c */
-extern void ll_set_dd(struct dentry *de);
-
-/* from super.c */
-extern void ll_change_inode(struct inode *inode);
-extern int ll_setattr(struct dentry *de, struct iattr *attr);
-
-/* from dir.c */
-extern int ll_add_link (struct dentry *dentry, struct inode *inode);
-obd_id ll_inode_by_name(struct inode * dir, struct dentry *dentry, int *typ);
-int ext2_make_empty(struct inode *inode, struct inode *parent);
-struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
-                   struct dentry *dentry, struct page ** res_page);
-int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page );
-int ext2_empty_dir (struct inode * inode);
-struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p);
-void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
-                   struct page *page, struct inode *inode);
-
-/*
- * Couple of helper functions - make the code slightly cleaner.
- */
-static inline void ext2_inc_count(struct inode *inode)
-{
-        inode->i_nlink++;
-}
-
-/* postpone the disk update until the inode really goes away */
-static inline void ext2_dec_count(struct inode *inode)
-{
-        inode->i_nlink--;
-}
-static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
-{
-        int err;
-        err = ll_add_link(dentry, inode);
-        if (!err) {
-                d_instantiate(dentry, inode);
-                return 0;
-        }
-        ext2_dec_count(inode);
-        iput(inode);
-        return err;
-}
+#include "llite_internal.h"
 
 /* methods */
 
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-static int ll_find_inode(struct inode *inode, unsigned long ino, void *opaque)
+static int ll_test_inode(struct inode *inode, unsigned long ino, void *opaque)
 #else
 static int ll_test_inode(struct inode *inode, void *opaque)
 #endif
 {
-        struct ll_read_inode2_cookie *lic = opaque;
-        struct mds_body *body = lic->lic_body;
+        struct lustre_md *md = opaque;
 
-        if (!(lic->lic_body->valid & (OBD_MD_FLGENER | OBD_MD_FLID)))
+        if (!(md->body->valid & (OBD_MD_FLGENER | OBD_MD_FLID)))
                 CERROR("invalid generation\n");
-        CDEBUG(D_VFSTRACE, "comparing inode %p ino %lu/%u to body %lu/%u\n",
-               inode, inode->i_ino, inode->i_generation, ino,
-               lic->lic_body->generation);
+        CDEBUG(D_VFSTRACE, "comparing inode %p ino %lu/%u to body %u/%u\n",
+               inode, inode->i_ino, inode->i_generation, 
+               md->body->ino, md->body->generation);
 
-        if (inode->i_generation != lic->lic_body->generation)
+        if (inode->i_generation != md->body->generation)
                 return 0;
 
         /* Apply the attributes in 'opaque' to this inode */
-        ll_update_inode(inode, body, lic->lic_lsm);
+        ll_update_inode(inode, md->body, md->lsm);
         return 1;
 }
 
@@ -127,16 +82,21 @@ int ll_unlock(__u32 mode, struct lustre_handle *lockh)
  * Returns inode or NULL
  */
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-extern int ll_read_inode2(struct inode *inode, void *opaque);
+int ll_set_inode(struct inode *inode, void *opaque)
+{
+        ll_read_inode2(inode, opaque);
+        return 0;
+}
 struct inode *ll_iget(struct super_block *sb, ino_t hash,
-                      struct ll_read_inode2_cookie *lic)
+                      struct lustre_md *md)
 {
         struct inode *inode;
 
         LASSERT(hash != 0);
-        inode = iget5_locked(sb, hash, ll_test_inode, ll_read_inode2, lic);
-        if (inode == NULL)
-                return NULL;              /* removed ERR_PTR(-ENOMEM) -eeb */
+        inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md);
+
+        if (!inode)
+                return (NULL);              /* removed ERR_PTR(-ENOMEM) -eeb */
 
         if (inode->i_state & I_NEW)
                 unlock_new_inode(inode);
@@ -146,11 +106,11 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash,
 }
 #else
 struct inode *ll_iget(struct super_block *sb, ino_t hash,
-                      struct ll_read_inode2_cookie *lic)
+                      struct lustre_md *md)
 {
         struct inode *inode;
         LASSERT(hash != 0);
-        inode = iget4(sb, hash, ll_find_inode, lic);
+        inode = iget4(sb, hash, ll_test_inode, md);
         CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", inode->i_ino,
                inode->i_generation, inode);
         return inode;
@@ -171,36 +131,37 @@ static int ll_intent_to_lock_mode(struct lookup_intent *it)
 
 int ll_it_open_error(int phase, struct lookup_intent *it)
 {
-        if (it->it_disposition & IT_OPEN_OPEN) {
-                if (phase == IT_OPEN_OPEN)
+        if (it_disposition(it, DISP_OPEN_OPEN)) {
+                if (phase == DISP_OPEN_OPEN)
                         return it->it_status;
                 else
                         return 0;
         }
 
-        if (it->it_disposition & IT_OPEN_CREATE) {
-                if (phase == IT_OPEN_CREATE)
+        if (it_disposition(it, DISP_OPEN_CREATE)) {
+                if (phase == DISP_OPEN_CREATE)
                         return it->it_status;
                 else
                         return 0;
         }
 
-        if (it->it_disposition & IT_OPEN_LOOKUP) {
-                if (phase == IT_OPEN_LOOKUP)
+        if (it_disposition(it, DISP_LOOKUP_EXECD)) {
+                if (phase == DISP_LOOKUP_EXECD)
                         return it->it_status;
                 else
                         return 0;
         }
+        CERROR("it disp: %X, status: %d\n", it->it_disposition, it->it_status);
         LBUG();
         return 0;
 }
 
-int ll_mdc_blocking_ast(struct ldlm_lock *lock,
-                        struct ldlm_lock_desc *desc,
+int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                         void *data, int flag)
 {
         int rc;
         struct lustre_handle lockh;
+        struct inode *inode = lock->l_data;
         ENTRY;
 
         switch (flag) {
@@ -214,9 +175,13 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock,
                 break;
         case LDLM_CB_CANCELING: {
                 /* Invalidate all dentries associated with this inode */
-                struct inode *inode = lock->l_data;
-                LASSERT(inode != NULL);
-
+                if (inode == NULL)
+                        break;
+                if (lock->l_resource->lr_name.name[0] != inode->i_ino ||
+                    lock->l_resource->lr_name.name[1] != inode->i_generation) {
+                        LDLM_ERROR(lock, "data mismatch with ino %lu/%u",
+                                   inode->i_ino, inode->i_generation);
+                }
                 if (S_ISDIR(inode->i_mode)) {
                         CDEBUG(D_INODE, "invalidating inode %lu\n",
                                inode->i_ino);
@@ -227,7 +192,7 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock,
 #warning FIXME: we should probably free this inode if there are no aliases
                 if (inode->i_sb->s_root &&
                     inode != inode->i_sb->s_root->d_inode)
-                        d_unhash_aliases(inode);
+                        ll_unhash_aliases(inode);
                 break;
         }
         default:
@@ -237,17 +202,6 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock,
         RETURN(0);
 }
 
-void ll_mdc_lock_set_inode(struct lustre_handle *lockh, struct inode *inode)
-{
-        struct ldlm_lock *lock = ldlm_handle2lock(lockh);
-        ENTRY;
-
-        LASSERT(lock != NULL);
-        lock->l_data = inode;
-        LDLM_LOCK_PUT(lock);
-        EXIT;
-}
-
 int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
                          int flags, void *opaque)
 {
@@ -287,35 +241,74 @@ void ll_prepare_mdc_op_data(struct mdc_op_data *data,
         data->mode = mode;
 }
 
-#define IT_ENQ_COMPLETE (1<<16)
-
+/* 
+ *This long block is all about fixing up the local state so that it is
+ *correct as of the moment _before_ the operation was applied; that
+ *way, the VFS will think that everything is normal and call Lustre's
+ *regular VFS methods.
+ *
+ * If we're performing a creation, that means that unless the creation
+ * failed with EEXIST, we should fake up a negative dentry.
+ *
+ * For everything else, we want to lookup to succeed.
+ *
+ * One additional note: if CREATE or OPEN succeeded, we add an extra
+ * reference to the request because we need to keep it around until
+ * ll_create/ll_open gets called.
+ *
+ * The server will return to us, in it_disposition, an indication of
+ * exactly what it_status refers to.
+ *
+ * If DISP_OPEN_OPEN is set, then it_status refers to the open() call,
+ * otherwise if DISP_OPEN_CREATE is set, then it status is the
+ * creation failure mode.  In either case, one of DISP_LOOKUP_NEG or
+ * DISP_LOOKUP_POS will be set, indicating whether the child lookup
+ * was successful.
+ *
+ * Else, if DISP_LOOKUP_EXECD then it_status is the rc of the child
+ * lookup.
+ */
 int ll_intent_lock(struct inode *parent, struct dentry **de,
-                   struct lookup_intent *it, intent_finish_cb intent_finish)
+                   struct lookup_intent *it, int flags, intent_finish_cb intent_finish)
 {
         struct dentry *dentry = *de;
         struct inode *inode = dentry->d_inode;
         struct ll_sb_info *sbi = ll_i2sbi(parent);
         struct lustre_handle lockh;
         struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
-        struct ptlrpc_request *request = NULL;
-        int rc = 0, offset, flag = 0;
+        struct ptlrpc_request *request;
+        int rc = 0;
+        struct mds_body *mds_body;
+        int mode;
         obd_id ino = 0;
         ENTRY;
 
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-        if (it && it->it_op == 0)
-                *it = lookup_it;
+        if (it && it->it_magic != INTENT_MAGIC) { 
+                CERROR("WARNING: uninitialized intent\n");
+                LBUG();
+                intent_init(it, IT_LOOKUP, 0);
+        }
+        if (it->it_op == IT_GETATTR || 
+            it->it_op == 0)
+                it->it_op = IT_LOOKUP;
+        
 #endif
-        if (it == NULL)
+        if (!it ||it->it_op == IT_GETXATTR)
                 it = &lookup_it;
 
+        it->it_op_release = ll_intent_release;
+
         CDEBUG(D_DLMTRACE, "name: %*s, intent: %s\n", dentry->d_name.len,
                dentry->d_name.name, ldlm_it2str(it->it_op));
-
+        
         if (dentry->d_name.len > EXT2_NAME_LEN)
                 RETURN(-ENAMETOOLONG);
 
-        if (!(it->it_disposition & IT_ENQ_COMPLETE)) {
+        /* This function may be called twice, we only once want to
+           execute the request associated with the intent. If it was
+           done already, we skip past this and use the results. */ 
+        if (!it_disposition(it, DISP_ENQ_COMPLETE)) {
                 struct mdc_op_data op_data;
 
                 ll_prepare_mdc_op_data(&op_data, parent, dentry->d_inode,
@@ -325,174 +318,73 @@ int ll_intent_lock(struct inode *parent, struct dentry **de,
                 rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, it,
                                  ll_intent_to_lock_mode(it), &op_data,
                                  &lockh, NULL, 0, ldlm_completion_ast,
-                                 ll_mdc_blocking_ast, parent);
+                                 ll_mdc_blocking_ast, NULL);
                 if (rc < 0)
                         RETURN(rc);
                 memcpy(it->it_lock_handle, &lockh, sizeof(lockh));
         }
-
-        request = (struct ptlrpc_request *)it->it_data;
+        request = it->it_data;
+        LASSERT(request != NULL);
 
         /* non-zero it_disposition indicates that the server performed the
          * intent on our behalf. */
-        if (it->it_disposition) {
-                struct mds_body *mds_body;
-                int mode;
-
-                /* This long block is all about fixing up the local
-                 * state so that it is correct as of the moment
-                 * _before_ the operation was applied; that way, the
-                 * VFS will think that everything is normal and call
-                 * Lustre's regular FS function.
-                 *
-                 * If we're performing a creation, that means that unless the
-                 * creation failed with EEXIST, we should fake up a negative
-                 * dentry.  Likewise for the target of a hard link.
-                 *
-                 * For everything else, we want to lookup to succeed. */
-
-                /* One additional note: if CREATE/MKDIR/etc succeeded,
-                 * we add an extra reference to the request because we
-                 * need to keep it around until ll_create gets called.
-                 * For anything else which results in
-                 * LL_LOOKUP_POSITIVE, we can do the iget()
-                 * immediately with the contents of the reply (in the
-                 * intent_finish callback).  In the create case,
-                 * however, we need to wait until ll_create_node to do
-                 * the iget() or the VFS will abort with -EEXISTS.
-                 */
-
-                offset = 1;
-                mds_body = lustre_msg_buf(request->rq_repmsg, offset,
-                                          sizeof(*mds_body));
-                LASSERT (mds_body != NULL);           /* mdc_enqueue checked */
-                LASSERT_REPSWABBED (request, offset); /* mdc_enqueue swabbed */
-
-                ino = mds_body->fid1.id;
-                mode = mds_body->mode;
-
-                /*We were called from revalidate2: did we find the same inode?*/
-                if (inode && (ino != inode->i_ino ||
-                    mds_body->fid1.generation != inode->i_generation)) {
-                        it->it_disposition |= IT_ENQ_COMPLETE;
-                        RETURN(-ESTALE);
-                }
+        LASSERT(it_disposition(it, DISP_IT_EXECD));
+
+                
+        mds_body = lustre_msg_buf(request->rq_repmsg, 1, sizeof(*mds_body));
+        LASSERT(mds_body != NULL);           /* mdc_enqueue checked */
+        LASSERT_REPSWABBED(request, 1); /* mdc_enqueue swabbed */
+
+        /* XXX everything with fids please, no ino's inode's etc */
+        ino = mds_body->fid1.id;
+        mode = mds_body->mode;
+
+        /*We were called from revalidate2: did we find the same inode?*/
+        if (inode && 
+            (ino != inode->i_ino ||
+             mds_body->fid1.generation != inode->i_generation)) {
+                it_set_disposition(it, DISP_ENQ_COMPLETE);
+                RETURN(-ESTALE);
+        }
 
-                /* If we're doing an IT_OPEN which did not result in an actual
-                 * successful open, then we need to remove the bit which saves
-                 * this request for unconditional replay. */
-                if (it->it_op & IT_OPEN &&
-                    (!(it->it_disposition & IT_OPEN_OPEN) ||
-                     it->it_status != 0)) {
+        /* If we're doing an IT_OPEN which did not result in an actual
+         * successful open, then we need to remove the bit which saves
+         * this request for unconditional replay. */
+        if (it->it_op & IT_OPEN) {
+                if (!it_disposition(it, DISP_OPEN_OPEN) ||
+                    it->it_status != 0) {
                         unsigned long flags;
-
+                
                         spin_lock_irqsave (&request->rq_lock, flags);
                         request->rq_replay = 0;
                         spin_unlock_irqrestore (&request->rq_lock, flags);
                 }
-
-                if (it->it_op & IT_CREAT) {
-                        mdc_store_inode_generation(request, 2, 1);
-                        /* The server will return to us, in it_disposition, an
-                         * indication of exactly what it_status refers to.
-                         *
-                         * If IT_OPEN_OPEN is set, then it_status refers to the
-                         * open() call, otherwise if IT_OPEN_CREATE is set, then
-                         * it status is the creation failure mode.  In either
-                         * case, one of IT_OPEN_NEG or IT_OPEN_POS will be set,
-                         * indicating whether the child lookup was successful.
-                         *
-                         * Else, if IT_OPEN_LOOKUP then it_status is the rc
-                         * of the child lookup.
-                         *
-                         * Finally, if none of the bits are set, then the
-                         * failure occurred while looking up the parent. */
-                        rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
-                        if (rc)
-                                GOTO(drop_req, rc);
-
-                        if (it->it_disposition & IT_OPEN_CREATE)
-                                ptlrpc_request_addref(request);
-                        if (it->it_disposition & IT_OPEN_OPEN)
-                                ptlrpc_request_addref(request);
-
-                        if (it->it_disposition & IT_OPEN_NEG)
-                                flag = LL_LOOKUP_NEGATIVE;
-                        else
-                                flag = LL_LOOKUP_POSITIVE;
-                } else if (it->it_op == IT_OPEN) {
-                        LASSERT(!(it->it_disposition & IT_OPEN_CREATE));
-
-                        rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
-                        if (rc)
-                                GOTO(drop_req, rc);
-
-                        if (it->it_disposition & IT_OPEN_OPEN)
-                                ptlrpc_request_addref(request);
-
-                        if (it->it_disposition & IT_OPEN_NEG)
-                                flag = LL_LOOKUP_NEGATIVE;
-                        else
-                                flag = LL_LOOKUP_POSITIVE;
-                } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
-                        /* For check ops, we want the lookup to succeed */
-                        it->it_data = NULL;
-                        if (it->it_status)
-                                flag = LL_LOOKUP_NEGATIVE;
-                        else
-                                flag = LL_LOOKUP_POSITIVE;
-                } else
-                        LBUG();
-        } else {
-                struct ll_fid fid;
-                obd_flag valid;
-                int eadatalen;
-                int mode;
-
-                LBUG(); /* For the moment, no non-intent locks */
-
-                /* it_disposition == 0 indicates that it just did a simple lock
-                 * request, for which we are very thankful.  move along with
-                 * the local lookup then. */
-
-                //memcpy(&lli->lli_intent_lock_handle, &lockh, sizeof(lockh));
-                offset = 0;
-
-                ino = ll_inode_by_name(parent, dentry, &mode);
-                if (!ino) {
-                        CERROR("inode %*s not found by name\n",
-                               dentry->d_name.len, dentry->d_name.name);
-                        GOTO(drop_lock, rc = -ENOENT);
-                }
-
-                valid = OBD_MD_FLNOTOBD;
-
-                if (S_ISREG(mode)) {
-                        eadatalen = obd_size_diskmd(&sbi->ll_osc_conn, NULL),
-                        valid |= OBD_MD_FLEASIZE;
-                } else {
-                        eadatalen = 0;
-                        valid |= OBD_MD_FLBLOCKS;
-                }
-
-                fid.id = ino;
-                fid.generation = 0;
-                fid.f_type = mode;
-                rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, valid,
-                                 eadatalen, &request);
-                if (rc) {
-                        CERROR("failure %d inode "LPX64"\n", rc, ino);
-                        GOTO(drop_lock, rc = -abs(rc));
-                }
         }
 
-        LASSERT (request != NULL);
+        rc = ll_it_open_error(DISP_LOOKUP_EXECD, it);
+        if (rc)
+                GOTO(drop_req, rc);
+        
+        /* keep requests around for the multiple phases of the call
+         * this shows the DISP_XX must guarantee we make it into the call 
+         */ 
+        if (it_disposition(it, DISP_OPEN_CREATE))
+                ptlrpc_request_addref(request);
+        if (it_disposition(it, DISP_OPEN_OPEN))
+                ptlrpc_request_addref(request);
+        
+        if (it->it_op & IT_CREAT) {
+                /* XXX this belongs in ll_create_iit */
+        } else if (it->it_op == IT_OPEN) {
+                LASSERT(!it_disposition(it, DISP_OPEN_CREATE));
+        } else 
+                LASSERT(it->it_op & (IT_GETATTR | IT_LOOKUP));
 
         if (intent_finish != NULL) {
                 struct lustre_handle old_lock;
                 struct ldlm_lock *lock;
 
-                rc = intent_finish(flag, request, parent, de, it, offset, ino);
+                rc = intent_finish(request, parent, de, it, 1, ino);
                 dentry = *de; /* intent_finish may change *de */
                 inode = dentry->d_inode;
                 if (rc != 0)
@@ -525,29 +417,16 @@ int ll_intent_lock(struct inode *parent, struct dentry **de,
         }
         ptlrpc_req_finished(request);
 
-        /* This places the intent in the dentry so that the vfs_xxx
-         * operation can lay its hands on it; but that is not always
-         * needed...  (we need to save it in the GETATTR case for the
-         * benefit of ll_inode_revalidate -phil) */
-        /* Ignore trying to save the intent for "special" inodes as
-         * they have special semantics that can cause deadlocks on
-         * the intent semaphore. -mmex */
-        if ((!inode || S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
-             S_ISLNK(inode->i_mode)) && (it->it_op & (IT_OPEN | IT_GETATTR)))
-                LL_SAVE_INTENT(dentry, it);
-        else
-                CDEBUG(D_DENTRY,
-                       "D_IT dentry %p fsdata %p intent: %s status %d\n",
-                       dentry, ll_d2d(dentry), ldlm_it2str(it->it_op),
-                       it->it_status);
-
+        CDEBUG(D_DENTRY, "D_IT dentry %p intent: %s status %d disp %x\n",
+               dentry, ldlm_it2str(it->it_op), it->it_status, it->it_disposition);
+        
+        /* drop IT_LOOKUP locks */
         if (it->it_op == IT_LOOKUP)
-                ll_intent_release(dentry, it);
-
+                ll_intent_release(it);
         RETURN(rc);
 
  drop_lock:
-        ll_intent_release(dentry, it);
+        ll_intent_release(it);
  drop_req:
         ptlrpc_req_finished(request);
         RETURN(rc);
@@ -582,7 +461,7 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
                 if (!list_empty(&dentry->d_lru))
                         list_del_init(&dentry->d_lru);
 
-                list_del_init(&dentry->d_hash);
+                hlist_del_init(&dentry->d_hash);
                 __d_rehash(dentry, 0); /* avoid taking dcache_lock inside */
                 spin_unlock(&dcache_lock);
                 atomic_inc(&dentry->d_count);
@@ -597,68 +476,34 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
 }
 
 static int
-lookup2_finish(int flag, struct ptlrpc_request *request,
+lookup2_finish(struct ptlrpc_request *request,
                struct inode *parent, struct dentry **de,
                struct lookup_intent *it, int offset, obd_id ino)
 {
         struct ll_sb_info *sbi = ll_i2sbi(parent);
         struct dentry *dentry = *de, *saved = *de;
         struct inode *inode = NULL;
-        struct ll_read_inode2_cookie lic = {.lic_body = NULL, .lic_lsm = NULL};
+        int rc;
 
         /* NB 1 request reference will be taken away by ll_intent_lock()
          * when I return */
-
-        if (!(flag & LL_LOOKUP_NEGATIVE)) {
+        if (!it_disposition(it, DISP_LOOKUP_NEG)) {
+                struct lustre_md md;
                 ENTRY;
 
-                /* We only get called if the mdc_enqueue() called from
-                 * ll_intent_lock() was successful.  Therefore the mds_body
-                 * is present and correct, and the eadata is present if
-                 * body->eadatasize != 0 (but still opaque, so only
-                 * obd_unpackmd() can check the size) */
-                lic.lic_body = lustre_msg_buf(request->rq_repmsg, offset,
-                                              sizeof (*lic.lic_body));
-                LASSERT(lic.lic_body != NULL);
-                LASSERT_REPSWABBED(request, offset);
-
-                if (S_ISREG(lic.lic_body->mode) &&
-                    (lic.lic_body->valid & OBD_MD_FLEASIZE)) {
-                        struct lov_mds_md    *lmm;
-                        int                   lmm_size;
-                        int                   rc;
-
-                        lmm_size = lic.lic_body->eadatasize;
-                        if (lmm_size == 0) {
-                                CERROR("OBD_MD_FLEASIZE set but "
-                                       "eadatasize 0\n");
-                                RETURN(-EPROTO);
-                        }
-                        lmm = lustre_msg_buf(request->rq_repmsg, offset + 1,
-                                             lmm_size);
-                        LASSERT(lmm != NULL);
-                        LASSERT_REPSWABBED(request, offset + 1);
-
-                        rc = obd_unpackmd(&sbi->ll_osc_conn,
-                                          &lic.lic_lsm, lmm, lmm_size);
-                        if (rc < 0) {
-                                CERROR("Error %d unpacking eadata\n", rc);
-                                RETURN(rc);
-                        }
-                        LASSERT(rc >= sizeof(*lic.lic_lsm));
-                }
+                rc =mdc_req2lustre_md(request, offset, &sbi->ll_osc_conn, &md);
+                if (rc) 
+                        RETURN(rc);
 
-                /* Both ENOMEM and an RPC timeout are possible in ll_iget; which
-                 * to pick?  A more generic EIO?  -phik */
-                inode = ll_iget(dentry->d_sb, ino, &lic);
+                inode = ll_iget(dentry->d_sb, ino, &md);
                 if (!inode) {
                         /* free the lsm if we allocated one above */
-                        if (lic.lic_lsm != NULL)
-                                obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm);
+                        if (md.lsm != NULL)
+                                obd_free_memmd(&sbi->ll_osc_conn, &md.lsm);
                         RETURN(-ENOMEM);
-                } else if (lic.lic_lsm != NULL &&
-                           ll_i2info(inode)->lli_smd != lic.lic_lsm) {
-                        obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm);
+                } else if (md.lsm != NULL &&
+                           ll_i2info(inode)->lli_smd != md.lsm) {
+                        obd_free_memmd(&sbi->ll_osc_conn, &md.lsm);
                 }
 
                 /* If this is a stat, get the authoritative file size */
@@ -685,8 +530,10 @@ lookup2_finish(int flag, struct ptlrpc_request *request,
                 /* We asked for a lock on the directory, and may have been
                  * granted a lock on the inode.  Just in case, fixup the data
                  * pointer. */
-                ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle,
-                                      inode);
+                CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+                       inode, inode->i_ino, inode->i_generation);
+                ldlm_lock_set_data((struct lustre_handle*)it->it_lock_handle,
+                                   inode);
         } else {
                 ENTRY;
         }
@@ -700,8 +547,8 @@ lookup2_finish(int flag, struct ptlrpc_request *request,
         RETURN(0);
 }
 
-static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry,
-                                 struct lookup_intent *it)
+static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
+                                   struct lookup_intent *it, int flags)
 {
         struct dentry *save = dentry, *retval;
         int rc;
@@ -711,7 +558,11 @@ static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry,
                dentry->d_name.name, parent->i_ino, parent->i_generation,
                parent, LL_IT2STR(it));
 
-        rc = ll_intent_lock(parent, &dentry, it, lookup2_finish);
+        if (d_mountpoint(dentry)) { 
+                CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
+        }
+
+        rc = ll_intent_lock(parent, &dentry, it, flags, lookup2_finish);
         if (rc < 0) {
                 CDEBUG(D_INFO, "ll_intent_lock: %d\n", rc);
                 GOTO(out, retval = ERR_PTR(rc));
@@ -725,167 +576,136 @@ static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry,
         return retval;
 }
 
-/* We depend on "mode" being set with the proper file type/umask by now */
-static struct inode *ll_create_node(struct inode *dir, const char *name,
-                                    int namelen, const void *data, int datalen,
-                                    int mode, __u64 extra,
-                                    struct lookup_intent *it)
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, 
+                                   struct nameidata *nd)
 {
-        struct inode *inode;
-        struct ptlrpc_request *request = NULL;
-        struct mds_body *body;
-        time_t time = LTIME_S(CURRENT_TIME);
-        struct ll_sb_info *sbi = ll_i2sbi(dir);
-        struct ll_read_inode2_cookie lic;
+        struct dentry *de;
         ENTRY;
 
-        if (it && it->it_disposition) {
-                ll_invalidate_inode_pages(dir);
-                request = it->it_data;
-                body = lustre_msg_buf(request->rq_repmsg, 1, sizeof (*body));
-                LASSERT (body != NULL);         /* checked already */
-                LASSERT_REPSWABBED (request, 1); /* swabbed already */
-        } else {
-                struct mdc_op_data op_data;
-                int gid = current->fsgid;
-                int rc;
-
-                if (dir->i_mode & S_ISGID) {
-                        gid = dir->i_gid;
-                        if (S_ISDIR(mode))
-                                mode |= S_ISGID;
-                }
-
-                ll_prepare_mdc_op_data(&op_data, dir, NULL, name, namelen, 0);
-                rc = mdc_create(&sbi->ll_mdc_conn, &op_data,
-                                data, datalen, mode, current->fsuid, gid,
-                                time, extra, &request);
-                if (rc) {
-                        inode = ERR_PTR(rc);
-                        GOTO(out, rc);
-                }
-                body = lustre_swab_repbuf(request, 0, sizeof (*body),
-                                          lustre_swab_mds_body);
-                if (body == NULL) {
-                        CERROR ("Can't unpack mds_body\n");
-                        GOTO (out, inode = ERR_PTR(-EPROTO));
-                }
-        }
-
-        lic.lic_body = body;
-        lic.lic_lsm = NULL;
-
-        inode = ll_iget(dir->i_sb, body->ino, &lic);
-        if (!inode || is_bad_inode(inode)) {
-                /* XXX might need iput() for bad inode */
-                int rc = -EIO;
-                CERROR("new_inode -fatal: rc %d\n", rc);
-                LBUG();
-                GOTO(out, rc);
-        }
-
-        if (!list_empty(&inode->i_dentry)) {
-                CERROR("new_inode -fatal: inode %d, ct %d lnk %d\n",
-                       body->ino, atomic_read(&inode->i_count),
-                       inode->i_nlink);
-                iput(inode);
-                LBUG();
-                inode = ERR_PTR(-EIO);
-                GOTO(out, -EIO);
-        }
-
-        if (it && it->it_disposition) {
-                /* We asked for a lock on the directory, but were
-                 * granted a lock on the inode.  Since we finally have
-                 * an inode pointer, stuff it in the lock. */
-                ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle,
-                                      inode);
-        }
+        if (nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
+                de = ll_lookup_it(parent, dentry, &nd->it, nd->flags);
+        else 
+                de = ll_lookup_it(parent, dentry, NULL, 0);
 
-        EXIT;
- out:
-        ptlrpc_req_finished(request);
-        return inode;
+        RETURN(de);
 }
+#endif
 
 static int ll_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode,
                          const char *name, int len)
 {
         struct ptlrpc_request *request = NULL;
-        struct ll_sb_info *sbi = ll_i2sbi(dir);
         struct mds_body *body;
         struct lov_mds_md *eadata;
         struct lov_stripe_md *lsm = NULL;
-        struct lustre_handle lockh;
-        struct lookup_intent it = { .it_op = IT_UNLINK };
-        struct obdo *oa;
-        int err;
+        struct obd_trans_info oti = { 0 };
         struct mdc_op_data op_data;
+        struct obdo *oa;
+        int rc;
         ENTRY;
 
         ll_prepare_mdc_op_data(&op_data, dir, child, name, len, mode);
-
-        err = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_EX,
-                         &op_data, &lockh, NULL, 0,
-                         ldlm_completion_ast, ll_mdc_blocking_ast,
-                         dir);
-        request = (struct ptlrpc_request *)it.it_data;
-        if (err < 0)
-                GOTO(out, err);
-        if (it.it_status)
-                GOTO(out, err = it.it_status);
-        err = 0;
-
-        body = lustre_msg_buf (request->rq_repmsg, 1, sizeof (*body));
-        LASSERT (body != NULL);                 /* checked by mdc_enqueue() */
-        LASSERT_REPSWABBED (request, 1);        /* swabbed by mdc_enqueue() */
+        rc = mdc_unlink(&ll_i2sbi(dir)->ll_mdc_conn, &op_data, &request);
+        if (rc)
+                GOTO(out, rc);
+        /* req is swabbed so this is safe */
+        body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body));
 
         if (!(body->valid & OBD_MD_FLEASIZE))
-                GOTO(out, 0);
+                GOTO(out, rc = 0);
 
         if (body->eadatasize == 0) {
-                CERROR ("OBD_MD_FLEASIZE set but eadatasize zero\n");
-                GOTO (out, err = -EPROTO);
+                CERROR("OBD_MD_FLEASIZE set but eadatasize zero\n");
+                GOTO(out, rc = -EPROTO);
         }
 
         /* The MDS sent back the EA because we unlinked the last reference
          * to this file. Use this EA to unlink the objects on the OST.
-         * Note that mdc_enqueue() has already checked there _is_ some EA
-         * data, but this data is opaque to both mdc_enqueue() and the MDS.
-         * We have to leave it to obd_unpackmd() to check it is complete
-         * and sensible. */
-        eadata = lustre_msg_buf (request->rq_repmsg, 2, body->eadatasize);
-        LASSERT (eadata != NULL);
-        LASSERT_REPSWABBED (request, 2);
-
-        err = obd_unpackmd(ll_i2obdconn(dir), &lsm, eadata,
-                           body->eadatasize);
-        if (err < 0) {
-                CERROR("obd_unpackmd: %d\n", err);
-                GOTO (out_unlock, err);
+         * It's opaque so we don't swab here; we leave it to obd_unpackmd() to
+         * check it is complete and sensible. */
+        eadata = lustre_swab_repbuf(request, 1, body->eadatasize, NULL);
+        LASSERT(eadata != NULL);
+        if (eadata == NULL) {
+                CERROR("Can't unpack MDS EA data\n");
+                GOTO(out, rc = -EPROTO);
         }
-        LASSERT (err >= sizeof (*lsm));
+
+        rc = obd_unpackmd(ll_i2obdconn(dir), &lsm, eadata, body->eadatasize);
+        if (rc < 0) {
+                CERROR("obd_unpackmd: %d\n", rc);
+                GOTO(out, rc);
+        }
+        LASSERT(rc >= sizeof(*lsm));
 
         oa = obdo_alloc();
         if (oa == NULL)
-                GOTO(out_free_memmd, err = -ENOMEM);
+                GOTO(out_free_memmd, rc = -ENOMEM);
 
         oa->o_id = lsm->lsm_object_id;
         oa->o_mode = body->mode & S_IFMT;
         oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
 
-        err = obd_destroy(ll_i2obdconn(dir), oa, lsm, NULL);
+        if (body->valid & OBD_MD_FLCOOKIE) {
+                oa->o_valid |= OBD_MD_FLCOOKIE;
+                oti.oti_logcookies = lustre_msg_buf(request->rq_repmsg, 3,
+                                                    body->eadatasize);
+        }
+
+        rc = obd_destroy(ll_i2obdconn(dir), oa, lsm, &oti);
         obdo_free(oa);
-        if (err)
+        if (rc)
                 CERROR("obd destroy objid 0x"LPX64" error %d\n",
-                       lsm->lsm_object_id, err);
+                       lsm->lsm_object_id, rc);
  out_free_memmd:
         obd_free_memmd(ll_i2obdconn(dir), &lsm);
- out_unlock:
-        ldlm_lock_decref_and_cancel(&lockh, LCK_EX);
  out:
         ptlrpc_req_finished(request);
-        return err;
+        return rc;
+}
+
+/* We depend on "mode" being set with the proper file type/umask by now */
+static struct inode *ll_create_node(struct inode *dir, const char *name,
+                                    int namelen, const void *data, int datalen,
+                                    int mode, __u64 extra,
+                                    struct lookup_intent *it)
+{
+        struct inode *inode;
+        struct ptlrpc_request *request = NULL;
+        struct ll_sb_info *sbi = ll_i2sbi(dir);
+        struct lustre_md md;
+        int rc;
+        ENTRY;
+
+        LASSERT(it && it->it_disposition);
+
+        ll_invalidate_inode_pages(dir);
+
+        request = it->it_data;
+        rc = mdc_req2lustre_md(request, 1, &sbi->ll_osc_conn, &md);
+        if (rc) { 
+                GOTO(out, inode = ERR_PTR(rc));
+        }
+
+        inode = ll_iget(dir->i_sb, md.body->ino, &md);
+        if (!inode || is_bad_inode(inode)) {
+                /* XXX might need iput() for bad inode */
+                int rc = -EIO;
+                CERROR("new_inode -fatal: rc %d\n", rc);
+                LBUG();
+                GOTO(out, rc);
+        }
+        LASSERT(list_empty(&inode->i_dentry));
+
+        CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+               inode, inode->i_ino, inode->i_generation);
+        ldlm_lock_set_data((struct lustre_handle*)it->it_lock_handle,
+                           inode);
+
+        EXIT;
+ out:
+        ptlrpc_req_finished(request);
+        return inode;
 }
 
 /*
@@ -902,54 +722,46 @@ static int ll_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode,
  * If the create succeeds, we fill in the inode information
  * with d_instantiate().
  */
-static int ll_create(struct inode *dir, struct dentry *dentry, int mode)
+static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, struct lookup_intent *it)
 {
-        struct lookup_intent *it;
         struct inode *inode;
+        struct ptlrpc_request *request = it->it_data;
         int rc = 0;
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
                dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
-               LL_IT2STR(dentry->d_it));
-
-        it = dentry->d_it;
+               LL_IT2STR(it));
 
-        rc = ll_it_open_error(IT_OPEN_CREATE, it);
+        rc = ll_it_open_error(DISP_OPEN_CREATE, it);
         if (rc) {
-                LL_GET_INTENT(dentry, it);
-                ptlrpc_req_finished(it->it_data);
+                ptlrpc_req_finished(request);
                 RETURN(rc);
         }
 
+        mdc_store_inode_generation(request, 2, 1);
         inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
                                NULL, 0, mode, 0, it);
-
         if (IS_ERR(inode)) {
-                LL_GET_INTENT(dentry, it);
                 RETURN(PTR_ERR(inode));
         }
 
-        /* no directory data updates when intents rule */
-        if (it && it->it_disposition) {
-                d_instantiate(dentry, inode);
-                RETURN(0);
-        }
-
-        rc = ext2_add_nondir(dentry, inode);
-        RETURN(rc);
+        d_instantiate(dentry, inode);
+        RETURN(0);
 }
 
-static int ll_mknod(struct inode *dir, struct dentry *dentry, int mode,
-                    int rdev)
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+static int ll_create_nd(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
 {
-        LBUG();
-        return -ENOSYS;
+        return ll_create_it(dir, dentry, mode, &nd->it);
 }
+#endif
 
-static int ll_mknod2(struct inode *dir, const char *name, int len, int mode,
-                     int rdev)
+static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev)
 {
+        struct inode *dir = nd->dentry->d_inode;
+        const char *name = nd->last.name;
+        int len = nd->last.len;
         struct ptlrpc_request *request = NULL;
         time_t time = LTIME_S(CURRENT_TIME);
         struct ll_sb_info *sbi = ll_i2sbi(dir);
@@ -966,10 +778,13 @@ static int ll_mknod2(struct inode *dir, const char *name, int len, int mode,
         mode &= ~current->fs->umask;
 
         switch (mode & S_IFMT) {
-        case 0: case S_IFREG:
+        case 0: 
+        case S_IFREG:
                 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
-        case S_IFCHR: case S_IFBLK:
-        case S_IFIFO: case S_IFSOCK:
+        case S_IFCHR: 
+        case S_IFBLK:
+        case S_IFIFO: 
+        case S_IFSOCK:
                 ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
                 err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode,
                                  current->fsuid, current->fsgid, time,
@@ -985,16 +800,11 @@ static int ll_mknod2(struct inode *dir, const char *name, int len, int mode,
         RETURN(err);
 }
 
-static int ll_symlink(struct inode *dir, struct dentry *dentry,
-                      const char *symname)
-{
-        LBUG();
-        return -ENOSYS;
-}
-
-static int ll_symlink2(struct inode *dir, const char *name, int len,
-                       const char *tgt)
+static int ll_symlink_raw(struct nameidata *nd, const char *tgt)
 {
+        struct inode *dir = nd->dentry->d_inode;
+        const char *name = nd->last.name;
+        int len = nd->last.len;
         struct ptlrpc_request *request = NULL;
         time_t time = LTIME_S(CURRENT_TIME);
         struct ll_sb_info *sbi = ll_i2sbi(dir);
@@ -1016,16 +826,12 @@ static int ll_symlink2(struct inode *dir, const char *name, int len,
         RETURN(err);
 }
 
-static int ll_link(struct dentry *old_dentry, struct inode * dir,
-                   struct dentry *dentry)
-{
-        LBUG();
-        return -ENOSYS;
-}
-
-static int ll_link2(struct inode *src, struct inode *dir,
-                    const char *name, int len)
+static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
 {
+        struct inode *src = srcnd->dentry->d_inode;
+        struct inode *dir = tgtnd->dentry->d_inode;
+        const char *name = tgtnd->last.name;
+        int len = tgtnd->last.len;
         struct ptlrpc_request *request = NULL;
         struct mdc_op_data op_data;
         int err;
@@ -1043,14 +849,12 @@ static int ll_link2(struct inode *src, struct inode *dir,
         RETURN(err);
 }
 
-static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
-        LBUG();
-        return -ENOSYS;
-}
 
-static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode)
+static int ll_mkdir_raw(struct nameidata *nd, int mode)
 {
+        struct inode *dir = nd->dentry->d_inode;
+        const char *name = nd->last.name;
+        int len = nd->last.len;
         struct ptlrpc_request *request = NULL;
         time_t time = LTIME_S(CURRENT_TIME);
         struct ll_sb_info *sbi = ll_i2sbi(dir);
@@ -1066,14 +870,16 @@ static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode)
         mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
         ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
         err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode,
-                         current->fsuid, current->fsgid,
-                         time, 0, &request);
+                         current->fsuid, current->fsgid, time, 0, &request);
         ptlrpc_req_finished(request);
         RETURN(err);
 }
 
-static int ll_rmdir2(struct inode *dir, const char *name, int len)
+static int ll_rmdir_raw(struct nameidata *nd)
 {
+        struct inode *dir = nd->dentry->d_inode;
+        const char *name = nd->last.name;
+        int len = nd->last.len;
         int rc;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
@@ -1083,8 +889,11 @@ static int ll_rmdir2(struct inode *dir, const char *name, int len)
         RETURN(rc);
 }
 
-static int ll_unlink2(struct inode *dir, const char *name, int len)
+static int ll_unlink_raw(struct nameidata *nd)
 {
+        struct inode *dir = nd->dentry->d_inode;
+        const char *name = nd->last.name;
+        int len = nd->last.len;
         int rc;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
@@ -1094,29 +903,14 @@ static int ll_unlink2(struct inode *dir, const char *name, int len)
         RETURN(rc);
 }
 
-static int ll_unlink(struct inode *dir, struct dentry *dentry)
-{
-        LBUG();
-        return -ENOSYS;
-}
-
-static int ll_rmdir(struct inode *dir, struct dentry *dentry)
-{
-        LBUG();
-        return -ENOSYS;
-}
-
-static int ll_rename(struct inode * old_dir, struct dentry * old_dentry,
-                     struct inode * new_dir, struct dentry * new_dentry)
-{
-        LBUG();
-        return -ENOSYS;
-}
-
-static int ll_rename2(struct inode *src, struct inode *tgt,
-                      const char *oldname, int oldlen,
-                      const char *newname, int newlen)
+static int ll_rename_raw(struct nameidata *oldnd, struct nameidata *newnd)
 {
+        struct inode *src = oldnd->dentry->d_inode;
+        struct inode *tgt = newnd->dentry->d_inode;
+        const char *oldname = oldnd->last.name;
+        int oldlen  = oldnd->last.len;
+        const char *newname = newnd->last.name;
+        int newlen  = newnd->last.len;
         struct ptlrpc_request *request = NULL;
         struct ll_sb_info *sbi = ll_i2sbi(src);
         struct mdc_op_data op_data;
@@ -1134,27 +928,23 @@ static int ll_rename2(struct inode *src, struct inode *tgt,
         RETURN(err);
 }
 
-extern int ll_inode_revalidate(struct dentry *dentry);
 struct inode_operations ll_dir_inode_operations = {
-        create:          ll_create,
-        lookup2:         ll_lookup2,
-        link:            ll_link,          /* LBUG() */
-        link2:           ll_link2,
-        unlink:          ll_unlink,        /* LBUG() */
-        unlink2:         ll_unlink2,
-        symlink:         ll_symlink,       /* LBUG() */
-        symlink2:        ll_symlink2,
-        mkdir:           ll_mkdir,         /* LBUG() */
-        mkdir2:          ll_mkdir2,
-        rmdir:           ll_rmdir,         /* LBUG() */
-        rmdir2:          ll_rmdir2,
-        mknod:           ll_mknod,         /* LBUG() */
-        mknod2:          ll_mknod2,
-        rename:          ll_rename,        /* LBUG() */
-        rename2:         ll_rename2,
+        link_raw:           ll_link_raw,
+        unlink_raw:         ll_unlink_raw,
+        symlink_raw:        ll_symlink_raw,
+        mkdir_raw:          ll_mkdir_raw,
+        rmdir_raw:          ll_rmdir_raw,
+        mknod_raw:          ll_mknod_raw,
+        rename_raw:         ll_rename_raw,
         setattr:         ll_setattr,
         setattr_raw:     ll_setattr_raw,
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        revalidate:      ll_inode_revalidate,
+        create_it:          ll_create_it,
+        lookup_it:            ll_lookup_it,
+        revalidate_it:      ll_inode_revalidate_it,
+#else
+        lookup_it:          ll_lookup_nd,
+        create_nd:          ll_create_nd,
+        getattr_it:         ll_getattr,
 #endif
 };
index 98f6086..b4004b5 100644 (file)
 #include <linux/version.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
-#include "llite_internal.h"
 
 #include <linux/fs.h>
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
 #include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include <linux/writeback.h>
 #else
 #include <linux/iobuf.h>
 #endif
@@ -51,7 +52,7 @@
 
 #include <linux/lustre_mds.h>
 #include <linux/lustre_lite.h>
-#include <linux/lustre_lib.h>
+#include "llite_internal.h"
 #include <linux/lustre_compat25.h>
 
 /*
@@ -90,7 +91,8 @@ void set_page_clean(struct page *page)
 }
 
 /* SYNCHRONOUS I/O to object storage for an inode */
-static int ll_brw(int cmd, struct inode *inode, struct page *page, int flags)
+static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,
+                  struct page *page, int flags)
 {
         struct ll_inode_info *lli = ll_i2info(inode);
         struct lov_stripe_md *lsm = lli->lli_smd;
@@ -124,8 +126,8 @@ static int ll_brw(int cmd, struct inode *inode, struct page *page, int flags)
         else
                 lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                     LPROC_LL_BRW_READ, pg.count);
-        rc = obd_brw(cmd, ll_i2obdconn(inode), lsm, 1, &pg, NULL);
-        if (rc)
+        rc = obd_brw(cmd, ll_i2obdconn(inode), oa, lsm, 1, &pg, NULL);
+        if (rc != 0 && rc != -EIO)
                 CERROR("error from obd_brw: rc = %d\n", rc);
 
         RETURN(rc);
@@ -142,6 +144,7 @@ static int ll_readpage(struct file *file, struct page *first_page)
         struct page *page = first_page;
         struct list_head *pos;
         struct brw_page *pgs;
+        struct obdo *oa;
         unsigned long end_index, extent_end = 0;
         struct ptlrpc_request_set *set;
         int npgs = 0, rc = 0, max_pages;
@@ -276,19 +279,33 @@ static int ll_readpage(struct file *file, struct page *first_page)
 
         } while (page);
 
-        set = ptlrpc_prep_set();
-        if (set == NULL) {
+        if ((oa = obdo_alloc()) == NULL) {
+                CERROR("ENOMEM allocing obdo\n");
+                rc = -ENOMEM;
+        } else if ((set = ptlrpc_prep_set()) == NULL) {
                 CERROR("ENOMEM allocing request set\n");
+                obdo_free(oa);
                 rc = -ENOMEM;
         } else {
-                rc = obd_brw_async(OBD_BRW_READ, ll_i2obdconn(inode),
+                struct ll_file_data *fd = file->private_data;
+
+                oa->o_id = lli->lli_smd->lsm_object_id;
+                memcpy(obdo_handle(oa), &fd->fd_ost_och.och_fh,
+                       sizeof(fd->fd_ost_och.och_fh));
+                oa->o_valid = OBD_MD_FLID | OBD_MD_FLHANDLE;
+                obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME);
+
+                rc = obd_brw_async(OBD_BRW_READ, ll_i2obdconn(inode), oa,
                                    ll_i2info(inode)->lli_smd, npgs, pgs,
                                    set, NULL);
                 if (rc == 0)
                         rc = ptlrpc_set_wait(set);
                 ptlrpc_set_destroy(set);
+                if (rc == 0)
+                        obdo_refresh_inode(inode, oa, oa->o_valid);
                 if (rc && rc != -EIO)
                         CERROR("error from obd_brw_async: rc = %d\n", rc);
+                obdo_free(oa);
         }
 
         while (npgs-- > 0) {
@@ -310,15 +327,15 @@ static int ll_readpage(struct file *file, struct page *first_page)
 void ll_truncate(struct inode *inode)
 {
         struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-        struct obdo oa = {0};
+        struct obdo oa;
         int err;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
                inode->i_generation, inode);
 
+        /* object not yet allocated */
         if (!lsm) {
-                /* object not yet allocated */
-                inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+                CERROR("truncate on inode %lu with no objects\n", inode->i_ino);
                 EXIT;
                 return;
         }
@@ -331,8 +348,9 @@ void ll_truncate(struct inode *inode)
                         ~0);
 
         oa.o_id = lsm->lsm_object_id;
-        oa.o_mode = inode->i_mode;
-        oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;
+        oa.o_valid = OBD_MD_FLID;
+        obdo_from_inode(&oa, inode, OBD_MD_FLTYPE|OBD_MD_FLMODE|OBD_MD_FLATIME|
+                                    OBD_MD_FLMTIME | OBD_MD_FLCTIME);
 
         CDEBUG(D_INFO, "calling punch for "LPX64" (all bytes after %Lu)\n",
                oa.o_id, inode->i_size);
@@ -343,7 +361,9 @@ void ll_truncate(struct inode *inode)
         if (err)
                 CERROR("obd_truncate fails (%d) ino %lu\n", err, inode->i_ino);
         else
-                obdo_to_inode(inode, &oa, oa.o_valid);
+                obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                          OBD_MD_FLATIME | OBD_MD_FLMTIME |
+                                          OBD_MD_FLCTIME);
 
         EXIT;
         return;
@@ -356,9 +376,11 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
 {
         struct inode *inode = page->mapping->host;
         struct ll_inode_info *lli = ll_i2info(inode);
+        struct ll_file_data *fd = file->private_data;
         struct lov_stripe_md *lsm = lli->lli_smd;
         obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
         struct brw_page pg;
+        struct obdo oa;
         int rc = 0;
         ENTRY;
 
@@ -375,7 +397,7 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
         pg.off = offset;
         pg.count = PAGE_SIZE;
         pg.flag = 0;
-        rc = obd_brw(OBD_BRW_CHECK, ll_i2obdconn(inode), lsm, 1, &pg, NULL);
+        rc = obd_brw(OBD_BRW_CHECK, ll_i2obdconn(inode), NULL, lsm, 1,&pg,NULL);
         if (rc)
                 RETURN(rc);
 
@@ -393,7 +415,15 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
                 GOTO(prepare_done, rc = 0);
         }
 
-        rc = ll_brw(OBD_BRW_READ, inode, page, 0);
+        oa.o_id = lsm->lsm_object_id;
+        oa.o_mode = inode->i_mode;
+        memcpy(obdo_handle(&oa), &fd->fd_ost_och.och_fh,
+               sizeof(fd->fd_ost_och.och_fh));
+        oa.o_valid = OBD_MD_FLID |OBD_MD_FLMODE |OBD_MD_FLTYPE |OBD_MD_FLHANDLE;
+
+        rc = ll_brw(OBD_BRW_READ, inode, &oa, page, 0);
+        if (rc == 0)
+                obdo_refresh_inode(inode, &oa, oa.o_valid);
 
         EXIT;
  prepare_done:
@@ -544,15 +574,19 @@ int ll_mark_dirty_page(struct lustre_handle *conn, struct lov_stripe_md *lsm,
 static int ll_writepage(struct page *page)
 {
         struct inode *inode = page->mapping->host;
+        struct obdo oa;
         ENTRY;
 
         CDEBUG(D_CACHE, "page %p [lau %d] inode %p\n", page,
-                        PageLaunder(page), inode);
+               PageLaunder(page), inode);
         LASSERT(PageLocked(page));
 
-        /* XXX should obd_brw errors trickle up? */
-        ll_batch_writepage(inode, page);
-        RETURN(0);
+        oa.o_id = ll_i2info(inode)->lli_smd->lsm_object_id;
+        oa.o_valid = OBD_MD_FLID;
+        obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+                                    OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
+        RETURN(ll_batch_writepage(inode, &oa, page));
 }
 
 /*
@@ -567,6 +601,7 @@ static int ll_commit_write(struct file *file, struct page *page,
         int rc = 0;
         ENTRY;
 
+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
         LASSERT(inode == file->f_dentry->d_inode);
         LASSERT(PageLocked(page));
 
@@ -595,7 +630,18 @@ static int ll_commit_write(struct file *file, struct page *page,
         /* This means that we've hit either the local cache limit or the limit
          * of the OST's grant. */
         if (rc == -EDQUOT) {
-                int rc = ll_batch_writepage(inode, page);
+                struct ll_file_data *fd = file->private_data;
+                struct obdo oa;
+                int rc;
+
+                oa.o_id = ll_i2info(inode)->lli_smd->lsm_object_id;
+                memcpy(obdo_handle(&oa), &fd->fd_ost_och.och_fh,
+                       sizeof(fd->fd_ost_och.och_fh));
+                oa.o_valid = OBD_MD_FLID | OBD_MD_FLHANDLE;
+                obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+                                            OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
+                rc = ll_batch_writepage(inode, &oa, page);
                 lock_page(page); /* caller expects to unlock */
                 RETURN(rc);
         }
@@ -624,12 +670,13 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
         struct lov_stripe_md *lsm = lli->lli_smd;
         struct brw_page *pga;
         struct ptlrpc_request_set *set;
+        struct obdo oa;
         int length, i, flags, rc = 0;
         loff_t offset;
         ENTRY;
 
         if (!lsm || !lsm->lsm_object_id)
-                RETURN(-ENOMEM);
+                RETURN(-EBADF);
 
         if ((iobuf->offset & (blocksize - 1)) ||
             (iobuf->length & (blocksize - 1)))
@@ -663,6 +710,11 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
                 }
         }
 
+        oa.o_id = lsm->lsm_object_id;
+        oa.o_valid = OBD_MD_FLID;
+        obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+                                    OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
         if (rw == WRITE)
                 lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                     LPROC_LL_DIRECT_WRITE, iobuf->length);
@@ -670,8 +722,8 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
                 lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                     LPROC_LL_DIRECT_READ, iobuf->length);
         rc = obd_brw_async(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
-                           ll_i2obdconn(inode), lsm, iobuf->nr_pages, pga, set,
-                           NULL);
+                           ll_i2obdconn(inode), &oa, lsm, iobuf->nr_pages, pga,
+                           set, NULL);
         if (rc) {
                 CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
                        "error from obd_brw_async: rc = %d\n", rc);
index 85532f0..9a3ffa1 100644 (file)
 #include "llite_internal.h"
 
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-kmem_cache_t *ll_file_data_slab;
 extern struct address_space_operations ll_aops;
 extern struct address_space_operations ll_dir_aops;
-struct super_operations ll_super_operations;
-
-/* /proc/lustre/llite root that tracks llite mount points */
-struct proc_dir_entry *proc_lustre_fs_root = NULL;
-/* lproc_llite.c */
-extern void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
-extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
-                                       struct super_block *sb,
-                                       char *osc, char *mdc);
-
-extern int ll_recover(struct recovd_data *, int);
-extern int ll_commitcbd_setup(struct ll_sb_info *);
-extern int ll_commitcbd_cleanup(struct ll_sb_info *);
-
-static char *ll_read_opt(const char *opt, char *data)
-{
-        char *value;
-        char *retval;
-        ENTRY;
-
-        CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
-        if (strncmp(opt, data, strlen(opt)))
-                RETURN(NULL);
-        if ((value = strchr(data, '=')) == NULL)
-                RETURN(NULL);
-
-        value++;
-        OBD_ALLOC(retval, strlen(value) + 1);
-        if (!retval) {
-                CERROR("out of memory!\n");
-                RETURN(NULL);
-        }
-
-        memcpy(retval, value, strlen(value)+1);
-        CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
-        RETURN(retval);
-}
-
-static int ll_set_opt(const char *opt, char *data, int fl)
-{
-        ENTRY;
-
-        CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
-        if (strncmp(opt, data, strlen(opt)))
-                RETURN(0);
-        else
-                RETURN(fl);
-}
-
-static void ll_options(char *options, char **ost, char **mds, int *flags)
-{
-        char *this_char;
-        ENTRY;
-
-        if (!options) {
-                EXIT;
-                return;
-        }
-
-        for (this_char = strtok (options, ",");
-             this_char != NULL;
-             this_char = strtok (NULL, ",")) {
-                CDEBUG(D_SUPER, "this_char %s\n", this_char);
-                if ((!*ost && (*ost = ll_read_opt("osc", this_char)))||
-                    (!*mds && (*mds = ll_read_opt("mdc", this_char)))||
-                    (!(*flags & LL_SBI_NOLCK) &&
-                     ((*flags) = (*flags) |
-                      ll_set_opt("nolock", this_char, LL_SBI_NOLCK))))
-                        continue;
-        }
-        EXIT;
-}
-
-#ifndef log2
-#define log2(n) ffz(~(n))
-#endif
 
 static struct super_block *ll_read_super(struct super_block *sb,
                                          void *data, int silent)
 {
-        struct inode *root = 0;
-        struct obd_device *obd;
-        struct ll_sb_info *sbi;
-        struct obd_export *mdc_export;
-        char *osc = NULL;
-        char *mdc = NULL;
         int err;
-        struct ll_fid rootfid;
-        struct obd_statfs osfs;
-        struct ptlrpc_request *request = NULL;
-        struct ptlrpc_connection *mdc_conn;
-        struct ll_read_inode2_cookie lic;
-        class_uuid_t uuid;
-
         ENTRY;
-
-        CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
-        OBD_ALLOC(sbi, sizeof(*sbi));
-        if (!sbi)
+        err = ll_fill_super(sb, data, silent);
+        if (err)
                 RETURN(NULL);
-
-        INIT_LIST_HEAD(&sbi->ll_conn_chain);
-        INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
-        generate_random_uuid(uuid);
-        class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
-
-        sb->u.generic_sbp = sbi;
-
-        ll_options(data, &osc, &mdc, &sbi->ll_flags);
-
-        if (!osc) {
-                CERROR("no osc\n");
-                GOTO(out_free, sb = NULL);
-        }
-
-        if (!mdc) {
-                CERROR("no mdc\n");
-                GOTO(out_free, sb = NULL);
-        }
-
-        obd = class_name2obd(mdc);
-        if (!obd) {
-                CERROR("MDC %s: not setup or attached\n", mdc);
-                GOTO(out_free, sb = NULL);
-        }
-
-        err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid);
-        if (err) {
-                CERROR("cannot connect to %s: rc = %d\n", mdc, err);
-                GOTO(out_free, sb = NULL);
-        }
-
-        mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection;
-
-        obd = class_name2obd(osc);
-        if (!obd) {
-                CERROR("OSC %s: not setup or attached\n", osc);
-                GOTO(out_mdc, sb = NULL);
-        }
-
-        err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid);
-        if (err) {
-                CERROR("cannot connect to %s: rc = %d\n", osc, err);
-                GOTO(out_mdc, sb = NULL);
-        }
-
-        err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
-        if (err) {
-                CERROR("cannot mds_connect: rc = %d\n", err);
-                GOTO(out_osc, sb = NULL);
-        }
-        CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
-        sbi->ll_rootino = rootfid.id;
-
-        memset(&osfs, 0, sizeof(osfs));
-        mdc_export = class_conn2export(&sbi->ll_mdc_conn);
-        if (mdc_export == NULL) {
-                CERROR("null mdc_export\n");
-                GOTO(out_osc, sb = NULL);
-        }
-        err = obd_statfs(mdc_export, &osfs);
-        class_export_put(mdc_export);
-        sb->s_blocksize = osfs.os_bsize;
-        sb->s_blocksize_bits = log2(osfs.os_bsize);
-        sb->s_magic = LL_SUPER_MAGIC;
-        sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
-
-        sb->s_op = &ll_super_operations;
-
-        /* make root inode 
-         * XXX: move this to after cbd setup? */
-        err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid,
-                          OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request);
-        if (err) {
-                CERROR("mdc_getattr failed for root: rc = %d\n", err);
-                GOTO(out_osc, sb = NULL);
-        }
-
-        /* initialize committed transaction callback daemon */
-        spin_lock_init(&sbi->ll_commitcbd_lock);
-        init_waitqueue_head(&sbi->ll_commitcbd_waitq);
-        init_waitqueue_head(&sbi->ll_commitcbd_ctl_waitq);
-        sbi->ll_commitcbd_flags = 0;
-        err = ll_commitcbd_setup(sbi);
-        if (err) {
-                CERROR("failed to start commit callback daemon: rc = %d\n",err);
-                ptlrpc_req_finished (request);
-                GOTO(out_osc, sb = NULL);
-        }
-
-        lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0,
-                                      sizeof(*lic.lic_body));
-        LASSERT (lic.lic_body != NULL);         /* checked by mdc_getattr() */
-        LASSERT_REPSWABBED (request, 0);        /* swabbed by mdc_getattr() */
-
-        lic.lic_lsm = NULL;
-
-        LASSERT(sbi->ll_rootino != 0);
-        root = iget4(sb, sbi->ll_rootino, NULL, &lic);
-
-        ptlrpc_req_finished(request);
-
-        if (root == NULL || is_bad_inode(root)) {
-                /* XXX might need iput() for bad inode */
-                CERROR("lustre_lite: bad iget4 for root\n");
-                GOTO(out_cbd, sb = NULL);
-        }
-
-        sb->s_root = d_alloc_root(root);
-
-        if (proc_lustre_fs_root) {
-                err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
-                                                  osc, mdc);
-                if (err < 0)
-                        CERROR("could not register mount in /proc/lustre");
-        }
-
-out_dev:
-        if (mdc)
-                OBD_FREE(mdc, strlen(mdc) + 1);
-        if (osc)
-                OBD_FREE(osc, strlen(osc) + 1);
-
         RETURN(sb);
-
-out_cbd:
-        ll_commitcbd_cleanup(sbi);
-out_osc:
-        obd_disconnect(&sbi->ll_osc_conn, 0);
-out_mdc:
-        obd_disconnect(&sbi->ll_mdc_conn, 0);
-out_free:
-        lprocfs_unregister_mountpoint(sbi);
-        OBD_FREE(sbi, sizeof(*sbi));
-
-        goto out_dev;
-} /* ll_read_super */
-
-static void ll_put_super(struct super_block *sb)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct list_head *tmp, *next;
-        struct ll_fid rootfid;
-        struct obd_device *obd = class_conn2obd(&sbi->ll_mdc_conn);
-        ENTRY;
-
-        CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
-        list_del(&sbi->ll_conn_chain);
-        ll_commitcbd_cleanup(sbi);
-        obd_disconnect(&sbi->ll_osc_conn, 0);
-
-        /* NULL request to force sync on the MDS, and get the last_committed
-         * value to flush remaining RPCs from the sending queue on client.
-         *
-         * XXX This should be an mdc_sync() call to sync the whole MDS fs,
-         *     which we can call for other reasons as well.
-         */
-        if (!obd->obd_no_recov)
-                mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
-
-        lprocfs_unregister_mountpoint(sbi);
-        if (sbi->ll_proc_root) {
-                lprocfs_remove(sbi->ll_proc_root);
-                sbi->ll_proc_root = NULL;
-        }
-
-        obd_disconnect(&sbi->ll_mdc_conn, 0);
-
-        spin_lock(&dcache_lock);
-        list_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) {
-                struct dentry *dentry = list_entry(tmp, struct dentry, d_hash);
-                shrink_dcache_parent(dentry);
-        }
-        spin_unlock(&dcache_lock);
-
-        OBD_FREE(sbi, sizeof(*sbi));
-
-        EXIT;
-} /* ll_put_super */
-
-static void ll_clear_inode(struct inode *inode)
-{
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        struct ll_inode_info *lli = ll_i2info(inode);
-        int rc;
-        ENTRY;
-
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-               inode->i_generation, inode);
-        rc = ll_mdc_cancel_unused(&sbi->ll_mdc_conn, inode,
-                                  LDLM_FL_NO_CALLBACK, inode);
-        if (rc < 0) {
-                CERROR("ll_mdc_cancel_unused: %d\n", rc);
-                /* XXX FIXME do something dramatic */
-        }
-
-        if (atomic_read(&inode->i_count) != 0)
-                CERROR("clearing in-use inode %lu: count = %d\n",
-                       inode->i_ino, atomic_read(&inode->i_count));
-
-        if (lli->lli_smd) {
-                rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd,
-                                       LDLM_FL_WARN, inode);
-                if (rc < 0) {
-                        CERROR("obd_cancel_unused: %d\n", rc);
-                        /* XXX FIXME do something dramatic */
-                }
-                obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
-                lli->lli_smd = NULL;
-        }
-
-        if (lli->lli_symlink_name) {
-                OBD_FREE(lli->lli_symlink_name,
-                         strlen(lli->lli_symlink_name) + 1);
-                lli->lli_symlink_name = NULL;
-        }
-
-        EXIT;
-}
-
-#if 0
-static void ll_delete_inode(struct inode *inode)
-{
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-               inode->i_generation, inode);
-        if (S_ISREG(inode->i_mode)) {
-                int err;
-                struct obdo *oa;
-                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-
-                /* mcreate with no open */
-                if (!lsm)
-                        GOTO(out, 0);
-
-                if (lsm->lsm_object_id == 0) {
-                        CERROR("This really happens\n");
-                        /* No obdo was ever created */
-                        GOTO(out, 0);
-                }
-
-                oa = obdo_alloc();
-                if (oa == NULL)
-                        GOTO(out, -ENOMEM);
-
-                oa->o_id = lsm->lsm_object_id;
-                obdo_from_inode(oa, inode, OBD_MD_FLID | OBD_MD_FLTYPE);
-
-                err = obd_destroy(ll_i2obdconn(inode), oa, lsm, NULL);
-                obdo_free(oa);
-                if (err)
-                        CDEBUG(D_INODE,
-                               "inode %lu obd_destroy objid "LPX64" error %d\n",
-                               inode->i_ino, lsm->lsm_object_id, err);
-        }
-out:
-        clear_inode(inode);
-        EXIT;
-}
-#endif
-
-/* like inode_setattr, but doesn't mark the inode dirty */
-static int ll_attr2inode(struct inode *inode, struct iattr *attr, int trunc)
-{
-        unsigned int ia_valid = attr->ia_valid;
-        int error = 0;
-
-        if ((ia_valid & ATTR_SIZE) && trunc) {
-                if (attr->ia_size > ll_file_maxbytes(inode)) {
-                        error = -EFBIG;
-                        goto out;
-                }
-                error = vmtruncate(inode, attr->ia_size);
-                if (error)
-                        goto out;
-        } else if (ia_valid & ATTR_SIZE)
-                inode->i_size = attr->ia_size;
-
-        if (ia_valid & ATTR_UID)
-                inode->i_uid = attr->ia_uid;
-        if (ia_valid & ATTR_GID)
-                inode->i_gid = attr->ia_gid;
-        if (ia_valid & ATTR_ATIME)
-                inode->i_atime = attr->ia_atime;
-        if (ia_valid & ATTR_MTIME)
-                inode->i_mtime = attr->ia_mtime;
-        if (ia_valid & ATTR_CTIME)
-                inode->i_ctime = attr->ia_ctime;
-        if (ia_valid & ATTR_MODE) {
-                inode->i_mode = attr->ia_mode;
-                if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-                        inode->i_mode &= ~S_ISGID;
-        }
-out:
-        return error;
-}
-
-int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc)
-{
-        struct ptlrpc_request *request = NULL;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        int err = 0;
-        ENTRY;
-
-        /* change incore inode */
-        err = ll_attr2inode(inode, attr, do_trunc);
-        if (err)
-                RETURN(err);
-
-        /* Don't send size changes to MDS to avoid "fast EA" problems, and
-         * also avoid a pointless RPC (we get file size from OST anyways).
-         */
-        attr->ia_valid &= ~ATTR_SIZE;
-        if (attr->ia_valid) {
-                struct mdc_op_data op_data;
-
-                ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
-                err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
-                                  attr, NULL, 0, &request);
-                if (err)
-                        CERROR("mdc_setattr fails: err = %d\n", err);
-
-                ptlrpc_req_finished(request);
-                if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
-                        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-                        struct obdo oa;
-                        int err2;
-
-                        CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
-                               inode->i_ino, attr->ia_mtime);
-                        oa.o_id = lsm->lsm_object_id;
-                        oa.o_mode = S_IFREG;
-                        oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME;
-                        oa.o_mtime = attr->ia_mtime;
-                        err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
-                        if (err2) {
-                                CERROR("obd_setattr fails: rc=%d\n", err);
-                                if (!err)
-                                        err = err2;
-                        }
-                }
-        }
-
-        RETURN(err);
-}
-
-int ll_setattr_raw(struct inode *inode, struct iattr *attr)
-{
-        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        struct ptlrpc_request *request = NULL;
-        struct mdc_op_data op_data;
-        int rc = 0, err;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-               inode->i_generation, inode);
-
-        if ((attr->ia_valid & ATTR_SIZE)) {
-                struct ldlm_extent extent = {attr->ia_size, OBD_OBJECT_EOF};
-                struct lustre_handle lockh = { 0 };
-
-                if (attr->ia_size > ll_file_maxbytes(inode))
-                        RETURN(-EFBIG);
-
-                /* writeback uses inode->i_size to determine how far out
-                 * its cached pages go.  ll_truncate gets a PW lock, canceling
-                 * our lock, _after_ it has updated i_size.  this can confuse
-                 *
-                 * If this file doesn't have stripes yet, it is already,
-                 * by definition, truncated. */
-                if ((attr->ia_valid & ATTR_FROM_OPEN) && lsm == NULL) {
-                        LASSERT(attr->ia_size == 0);
-                        GOTO(skip_extent_lock, rc = 0);
-                }
-
-                /* we really need to get our PW lock before we change
-                 * inode->i_size.  if we don't we can race with other
-                 * i_size updaters on our node, like ll_file_read.  we
-                 * can also race with i_size propogation to other
-                 * nodes through dirtying and writeback of final cached
-                 * pages.  this last one is especially bad for racing
-                 * o_append users on other nodes. */
-                rc = ll_extent_lock_no_validate(NULL, inode, lsm, LCK_PW,
-                                                &extent, &lockh);
-                if (rc != ELDLM_OK) {
-                        if (rc > 0)
-                                RETURN(-ENOLCK);
-                        RETURN(rc);
-                }
-
-                rc = vmtruncate(inode, attr->ia_size);
-                if (rc == 0)
-                        set_bit(LLI_F_HAVE_SIZE_LOCK,
-                                &ll_i2info(inode)->lli_flags);
-
-                /* unlock now as we don't mind others file lockers racing with
-                 * the mds updates below? */
-                err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh);
-                if (err)
-                        CERROR("ll_extent_unlock failed: %d\n", err);
-                if (rc)
-                        RETURN(rc);
-        }
-
-skip_extent_lock:
-        /* Don't send size changes to MDS to avoid "fast EA" problems, and
-         * also avoid a pointless RPC (we get file size from OST anyways).
-         */
-        attr->ia_valid &= ~ATTR_SIZE;
-        if (!attr->ia_valid)
-                RETURN(0);
-
-        ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
-
-        err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
-                          attr, NULL, 0, &request);
-        if (err)
-                CERROR("mdc_setattr fails: err = %d\n", err);
-
-        ptlrpc_req_finished(request);
-
-        if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_MTIME_SET)) {
-                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-                struct obdo oa;
-                int err2;
-
-                if (lsm == NULL) {
-                        CDEBUG(D_INODE, "no lsm: not setting mtime on OSTs\n");
-                        RETURN(err);
-                }
-
-                CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
-                       inode->i_ino, attr->ia_mtime);
-                oa.o_id = lsm->lsm_object_id;
-                oa.o_mode = S_IFREG;
-                oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMTIME;
-                oa.o_mtime = attr->ia_mtime;
-                err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
-                if (err2) {
-                        CERROR("obd_setattr fails: rc=%d\n", err);
-                        if (!err)
-                                err = err2;
-                }
-        }
-        RETURN(err);
-}
-
-int ll_setattr(struct dentry *de, struct iattr *attr)
-{
-        int rc = inode_change_ok(de->d_inode, attr);
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s\n", de->d_name.name);
-        if (rc)
-                return rc;
-        lprocfs_counter_incr(ll_i2sbi(de->d_inode)->ll_stats, LPROC_LL_SETATTR);
-
-        return ll_inode_setattr(de->d_inode, attr, 1);
-}
-
-static int ll_statfs(struct super_block *sb, struct statfs *sfs)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct obd_export *mdc_exp = class_conn2export(&sbi->ll_mdc_conn);
-        struct obd_export *osc_exp;
-        struct obd_statfs osfs;
-        int rc;
-        ENTRY;
-
-        if (mdc_exp == NULL)
-                RETURN(-EINVAL);
-
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-        lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_STAFS);
-        memset(sfs, 0, sizeof(*sfs));
-        rc = obd_statfs(mdc_exp, &osfs);
-        statfs_unpack(sfs, &osfs);
-        if (rc)
-                CERROR("mdc_statfs fails: rc = %d\n", rc);
-        else
-                CDEBUG(D_SUPER, "mdc_statfs shows blocks "LPU64"/"LPU64
-                       " objects "LPU64"/"LPU64"\n",
-                       osfs.os_bavail, osfs.os_blocks,
-                       osfs.os_ffree, osfs.os_files);
-
-        /* temporary until mds_statfs returns statfs info for all OSTs */
-        if (!rc) {
-                osc_exp = class_conn2export(&sbi->ll_osc_conn);
-                if (osc_exp == NULL)
-                        GOTO(out, rc = -EINVAL);
-                rc = obd_statfs(osc_exp, &osfs);
-                class_export_put(osc_exp);
-                if (rc) {
-                        CERROR("obd_statfs fails: rc = %d\n", rc);
-                        GOTO(out, rc);
-                }
-                CDEBUG(D_SUPER, "obd_statfs shows blocks "LPU64"/"LPU64
-                       " objects "LPU64"/"LPU64"\n",
-                       osfs.os_bavail, osfs.os_blocks,
-                       osfs.os_ffree, osfs.os_files);
-
-                while (osfs.os_blocks > ~0UL) {
-                        sfs->f_bsize <<= 1;
-
-                        osfs.os_blocks >>= 1;
-                        osfs.os_bfree >>= 1;
-                        osfs.os_bavail >>= 1;
-                }
-
-                sfs->f_blocks = osfs.os_blocks;
-                sfs->f_bfree = osfs.os_bfree;
-                sfs->f_bavail = osfs.os_bavail;
-
-                /* If we don't have as many objects free on the OST as inodes
-                 * on the MDS, we reduce the total number of inodes to
-                 * compensate, so that the "inodes in use" number is correct.
-                 */
-                if (osfs.os_ffree < (__u64)sfs->f_ffree) {
-                        sfs->f_files = (sfs->f_files - sfs->f_ffree) +
-                                       osfs.os_ffree;
-                        sfs->f_ffree = osfs.os_ffree;
-                }
-        }
-
-out:
-        class_export_put(mdc_exp);
-        RETURN(rc);
-}
-
-void dump_lsm(int level, struct lov_stripe_md *lsm)
-{
-        CDEBUG(level, "objid "LPX64", maxbytes "LPX64", magic %#08x, "
-               "stripe_size %#08x, offset %u, stripe_count %u\n",
-               lsm->lsm_object_id, lsm->lsm_maxbytes, lsm->lsm_magic,
-               lsm->lsm_stripe_size, lsm->lsm_stripe_offset,
-               lsm->lsm_stripe_count);
-}
-
-void ll_update_inode(struct inode *inode, struct mds_body *body,
-                     struct lov_stripe_md *lsm)
-{
-        struct ll_inode_info *lli = ll_i2info(inode);
-
-        LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
-        if (lsm != NULL) {
-                if (lli->lli_smd == NULL) {
-                        lli->lli_maxbytes = lsm->lsm_maxbytes;
-                        if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
-                                lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
-                        lli->lli_smd = lsm;
-                } else {
-                        if (memcmp(lli->lli_smd, lsm, sizeof(*lsm))) {
-                                CERROR("lsm mismatch for inode %ld\n",
-                                       inode->i_ino);
-                                CERROR("lli_smd:\n");
-                                dump_lsm(D_ERROR, lli->lli_smd);
-                                CERROR("lsm:\n");
-                                dump_lsm(D_ERROR, lsm);
-                                LBUG();
-                        }
-                }
-        }
-
-        if (body->valid & OBD_MD_FLID)
-                inode->i_ino = body->ino;
-        if (body->valid & OBD_MD_FLATIME)
-                LTIME_S(inode->i_atime) = body->atime;
-        if (body->valid & OBD_MD_FLMTIME)
-                LTIME_S(inode->i_mtime) = body->mtime;
-        if (body->valid & OBD_MD_FLCTIME)
-                LTIME_S(inode->i_ctime) = body->ctime;
-        if (body->valid & OBD_MD_FLMODE)
-                inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
-        if (body->valid & OBD_MD_FLTYPE)
-                inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
-        if (body->valid & OBD_MD_FLUID)
-                inode->i_uid = body->uid;
-        if (body->valid & OBD_MD_FLGID)
-                inode->i_gid = body->gid;
-        if (body->valid & OBD_MD_FLFLAGS)
-                inode->i_flags = body->flags;
-        if (body->valid & OBD_MD_FLNLINK)
-                inode->i_nlink = body->nlink;
-        if (body->valid & OBD_MD_FLGENER)
-                inode->i_generation = body->generation;
-        if (body->valid & OBD_MD_FLRDEV)
-                inode->i_rdev = body->rdev;
-        if (body->valid & OBD_MD_FLSIZE)
-                inode->i_size = body->size;
-        if (body->valid & OBD_MD_FLBLOCKS)
-                inode->i_blocks = body->blocks;
-}
-
-static void ll_read_inode2(struct inode *inode, void *opaque)
-{
-        struct ll_read_inode2_cookie *lic = opaque;
-        struct mds_body *body = lic->lic_body;
-        struct ll_inode_info *lli = ll_i2info(inode);
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-               inode->i_generation, inode);
-
-        sema_init(&lli->lli_open_sem, 1);
-        spin_lock_init(&lli->lli_read_extent_lock);
-        INIT_LIST_HEAD(&lli->lli_read_extents);
-        lli->lli_flags = 0;
-        /* We default to 2T-4k until the LSM is created/read, at which point
-         * it'll be updated. */
-        lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES;
-
-        LASSERT(!lli->lli_smd);
-
-        /* core attributes from the MDS first */
-        ll_update_inode(inode, body, lic->lic_lsm);
-
-        /* OIDEBUG(inode); */
-
-        if (S_ISREG(inode->i_mode)) {
-                inode->i_op = &ll_file_inode_operations;
-                inode->i_fop = &ll_file_operations;
-                inode->i_mapping->a_ops = &ll_aops;
-                EXIT;
-        } else if (S_ISDIR(inode->i_mode)) {
-                inode->i_op = &ll_dir_inode_operations;
-                inode->i_fop = &ll_dir_operations;
-                inode->i_mapping->a_ops = &ll_dir_aops;
-                EXIT;
-        } else if (S_ISLNK(inode->i_mode)) {
-                inode->i_op = &ll_fast_symlink_inode_operations;
-                EXIT;
-        } else {
-                inode->i_op = &ll_special_inode_operations;
-                init_special_inode(inode, inode->i_mode, inode->i_rdev);
-                EXIT;
-        }
-}
-
-void ll_umount_begin(struct super_block *sb)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct obd_device *obd;
-        struct obd_ioctl_data ioc_data = { 0 };
-
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
-        obd = class_conn2obd(&sbi->ll_mdc_conn);
-        obd->obd_no_recov = 1;
-        obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_mdc_conn, sizeof ioc_data,
-                      &ioc_data, NULL);
-
-        obd = class_conn2obd(&sbi->ll_osc_conn);
-        obd->obd_no_recov = 1;
-        obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_osc_conn, sizeof ioc_data,
-                      &ioc_data, NULL);
-
-        /* Really, we'd like to wait until there are no requests outstanding,
-         * and then continue.  For now, we just invalidate the requests,
-         * schedule, and hope.
-         */
-        schedule();
-
-        EXIT;
 }
 
 /* exported operations */
index 980bfcd..5ab03ff 100644 (file)
 #include <linux/lprocfs_status.h>
 #include "llite_internal.h"
 
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-kmem_cache_t *ll_file_data_slab;
-extern struct address_space_operations ll_aops;
-extern struct address_space_operations ll_dir_aops;
-struct super_operations ll_super_operations;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 
-/* /proc/lustre/llite root that tracks llite mount points */
-struct proc_dir_entry *proc_lustre_fs_root = NULL;
-/* lproc_llite.c */
-extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
-                                       struct super_block *sb,
-                                       char *osc, char *mdc);
-
-extern int ll_init_inodecache(void);
-extern void ll_destroy_inodecache(void);
-extern int ll_recover(struct recovd_data *, int);
-extern int ll_commitcbd_setup(struct ll_sb_info *);
-extern int ll_commitcbd_cleanup(struct ll_sb_info *);
-int ll_read_inode2(struct inode *inode, void *opaque);
-
-extern int ll_proc_namespace(struct super_block* sb, char* osc, char* mdc);
-
-static char *ll_read_opt(const char *opt, char *data)
-{
-        char *value;
-        char *retval;
-        ENTRY;
-
-        CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
-        if (strncmp(opt, data, strlen(opt)))
-                RETURN(NULL);
-        if ((value = strchr(data, '=')) == NULL)
-                RETURN(NULL);
-
-        value++;
-        OBD_ALLOC(retval, strlen(value) + 1);
-        if (!retval) {
-                CERROR("out of memory!\n");
-                RETURN(NULL);
-        }
-
-        memcpy(retval, value, strlen(value)+1);
-        CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
-        RETURN(retval);
-}
-
-static int ll_set_opt(const char *opt, char *data, int fl)
-{
-        ENTRY;
-
-        CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
-        if (strncmp(opt, data, strlen(opt)))
-                RETURN(0);
-        else
-                RETURN(fl);
-}
-
-static void ll_options(char *options, char **ost, char **mds, int *flags)
-{
-        char *opt_ptr = options;
-        char *this_char;
-        ENTRY;
-
-        if (!options) {
-                EXIT;
-                return;
-        }
-
-        while ((this_char = strsep (&opt_ptr, ",")) != NULL) {
-                CDEBUG(D_SUPER, "this_char %s\n", this_char);
-                if ((!*ost && (*ost = ll_read_opt("osc", this_char)))||
-                    (!*mds && (*mds = ll_read_opt("mdc", this_char)))||
-                    (!(*flags & LL_SBI_NOLCK) &&
-                     ((*flags) = (*flags) |
-                      ll_set_opt("nolock", this_char, LL_SBI_NOLCK))))
-                        continue;
-        }
-        EXIT;
-}
-
-#ifndef log2
-#define log2(n) ffz(~(n))
-#endif
-
-
-static int ll_fill_super(struct super_block *sb, void *data, int silent)
-{
-        struct inode *root = 0;
-        struct obd_device *obd;
-        struct ll_sb_info *sbi;
-        char *osc = NULL;
-        char *mdc = NULL;
-        int err;
-        struct ll_fid rootfid;
-        struct obd_statfs osfs;
-        struct ptlrpc_request *request = NULL;
-        struct ptlrpc_connection *mdc_conn;
-        struct ll_read_inode2_cookie lic;
-        class_uuid_t uuid;
-
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
-        OBD_ALLOC(sbi, sizeof(*sbi));
-        if (!sbi)
-                RETURN(-ENOMEM);
-
-        INIT_LIST_HEAD(&sbi->ll_conn_chain);
-        INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
-        generate_random_uuid(uuid);
-        class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
-
-        sb->s_fs_info = sbi;
-
-        ll_options(data, &osc, &mdc, &sbi->ll_flags);
-
-        if (!osc) {
-                CERROR("no osc\n");
-                GOTO(out_free, sb = NULL);
-        }
-
-        if (!mdc) {
-                CERROR("no mdc\n");
-                GOTO(out_free, sb = NULL);
-        }
-
-        obd = class_name2obd(mdc);
-        if (!obd) {
-                CERROR("MDC %s: not setup or attached\n", mdc);
-                GOTO(out_free, sb = NULL);
-        }
-
-        err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid);
-        if (err) {
-                CERROR("cannot connect to %s: rc = %d\n", mdc, err);
-                GOTO(out_free, sb = NULL);
-        }
-
-        mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection;
-
-        obd = class_name2obd(osc);
-        if (!obd) {
-                CERROR("OSC %s: not setup or attached\n", osc);
-                GOTO(out_mdc, sb = NULL);
-        }
-
-        err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid);
-        if (err) {
-                CERROR("cannot connect to %s: rc = %d\n", osc, err);
-                GOTO(out_mdc, sb = NULL);
-        }
-
-        err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
-        if (err) {
-                CERROR("cannot mds_connect: rc = %d\n", err);
-                GOTO(out_osc, sb = NULL);
-        }
-        CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
-        sbi->ll_rootino = rootfid.id;
-
-        memset(&osfs, 0, sizeof(osfs));
-        err = obd_statfs(&sbi->ll_mdc_conn, &osfs);
-        sb->s_blocksize = osfs.os_bsize;
-        sb->s_blocksize_bits = log2(osfs.os_bsize);
-        sb->s_magic = LL_SUPER_MAGIC;
-        sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
-
-        sb->s_op = &ll_super_operations;
-
-        /* make root inode 
-         * XXX: move this to after cbd setup? */
-        err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid,
-                          OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request);
-        if (err) {
-                CERROR("mdc_getattr failed for root: rc = %d\n", err);
-                GOTO(out_osc, sb = NULL);
-        }
-
-        /* initialize committed transaction callback daemon */
-        spin_lock_init(&sbi->ll_commitcbd_lock);
-        init_waitqueue_head(&sbi->ll_commitcbd_waitq);
-        init_waitqueue_head(&sbi->ll_commitcbd_ctl_waitq);
-        sbi->ll_commitcbd_flags = 0;
-        err = ll_commitcbd_setup(sbi);
-        if (err) {
-                CERROR("failed to start commit callback daemon: rc = %d\n",err);
-                ptlrpc_req_finished (request);
-                GOTO(out_osc, sb = NULL);
-        }
-
-        lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*lic.lic_body));
-        LASSERT (lic.lic_body != NULL);         /* checked by mdc_getattr() */
-        LASSERT_REPSWABBED (request, 0);        /* swabbed by mdc_getattr() */
-
-        lic.lic_lsm = NULL;
-
-        root = iget5_locked(sb, sbi->ll_rootino, NULL,
-                            ll_read_inode2, &lic);
-
-        ptlrpc_req_finished(request);
-
-        if (root == NULL || is_bad_inode(root)) {
-                /* XXX might need iput() for bad inode */
-                CERROR("lustre_lite: bad iget5 for root\n");
-                GOTO(out_cbd, sb = NULL);
-        }
-
-        sb->s_root = d_alloc_root(root);
-        root->i_state &= ~(I_LOCK | I_NEW);
-        printk("AMRUT 1\n");
-        if (proc_lustre_fs_root) {
-                err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
-                                                  osc, mdc);
-                if (err < 0)
-                        CERROR("could not register mount in /proc/lustre");
-        }
-
-out_dev:
-        if (mdc)
-                OBD_FREE(mdc, strlen(mdc) + 1);
-        if (osc)
-                OBD_FREE(osc, strlen(osc) + 1);
-        printk("AMRUT 2\n");
-
-        RETURN(0);
-
-out_cbd:
-        ll_commitcbd_cleanup(sbi);
-out_osc:
-        obd_disconnect(&sbi->ll_osc_conn, 0);
-out_mdc:
-        obd_disconnect(&sbi->ll_mdc_conn, 0);
-out_free:
-        lprocfs_unregister_mountpoint(sbi);
-        OBD_FREE(sbi, sizeof(*sbi));
-
-        goto out_dev;
-} /* ll_fill_super */
-
-
-int ll_setattr_raw(struct inode *inode, struct iattr *attr)
-{
-        struct ptlrpc_request *request = NULL;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        struct mdc_op_data op_data;
-        int err = 0;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
-
-        LPROC_COUNTER_INODE_INCBY1(inode, LPROC_LL_SETATTR);
-        if ((attr->ia_valid & ATTR_SIZE)) {
-                /* writeback uses inode->i_size to determine how far out
-                 * its cached pages go.  ll_truncate gets a PW lock, canceling
-                 * our lock, _after_ it has updated i_size.  this can confuse
-                 * us into zero extending the file to the newly truncated
-                 * size, and this has bad implications for a racing o_append.
-                 * if we're extending our size we need to flush the pages
-                 * with the correct i_size before vmtruncate stomps on
-                 * the new i_size.  again, this can only find pages to
-                 * purge if the PW lock that generated them is still held.
-                 */
-                if ( attr->ia_size > inode->i_size ) {
-                        filemap_fdatasync(inode->i_mapping);
-                        filemap_fdatawait(inode->i_mapping);
-                }
-                err = vmtruncate(inode, attr->ia_size);
-                if (err)
-                        RETURN(err);
-        }
-
-        /* Don't send size changes to MDS to avoid "fast EA" problems, and
-         * also avoid a pointless RPC (we get file size from OST anyways).
-         */
-        attr->ia_valid &= ~ATTR_SIZE;
-        if (!attr->ia_valid)
-                RETURN(0);
-
-        ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
-
-        err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
-                          attr, NULL, 0, &request);
-        if (err)
-                CERROR("mdc_setattr fails: err = %d\n", err);
-
-        ptlrpc_req_finished(request);
-
-        if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
-                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-                struct obdo oa;
-                int err2;
-
-                CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
-                       inode->i_ino, attr->ia_mtime);
-                oa.o_id = lsm->lsm_object_id;
-                oa.o_mode = S_IFREG;
-                oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMTIME;
-                oa.o_mtime = LTIME_S(attr->ia_mtime);
-                err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
-                if (err2) {
-                        CERROR("obd_setattr fails: rc=%d\n", err);
-                        if (!err)
-                                err = err2;
-                }
-        }
-        RETURN(err);
-}
 struct super_block * ll_get_sb(struct file_system_type *fs_type,
-                               int flags, char *devname, void * data)
+                               int flags, const char *devname, void * data)
 {
+        /* calls back in fill super */
         return get_sb_nodev(fs_type, flags, data, ll_fill_super);
 }
 
-static void ll_put_super(struct super_block *sb)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct list_head *tmp, *next;
-        struct ll_fid rootfid;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
-        list_del(&sbi->ll_conn_chain);
-        ll_commitcbd_cleanup(sbi);
-        obd_disconnect(&sbi->ll_osc_conn, 0);
-
-        /* NULL request to force sync on the MDS, and get the last_committed
-         * value to flush remaining RPCs from the pending queue on client.
-         *
-         * XXX This should be an mdc_sync() call to sync the whole MDS fs,
-         *     which we can call for other reasons as well.
-         */
-        mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
-
-        lprocfs_unregister_mountpoint(sbi);
-        if (sbi->ll_proc_root) {
-                lprocfs_remove(sbi->ll_proc_root);
-        sbi->ll_proc_root = NULL;
-        }
-
-        obd_disconnect(&sbi->ll_mdc_conn, 0);
-
-        spin_lock(&dcache_lock);
-        list_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list){
-                struct dentry *dentry = list_entry(tmp, struct dentry, d_hash);
-                shrink_dcache_parent(dentry);
-        }
-        spin_unlock(&dcache_lock);
-
-        OBD_FREE(sbi, sizeof(*sbi));
-
-        EXIT;
-} /* ll_put_super */
-
-static void ll_clear_inode(struct inode *inode)
-{
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        struct ll_inode_info *lli = ll_i2info(inode);
-        int rc;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
-
-#warning "Is there a reason we don't do this in 2.5, but we do in 2.4?"
-#if 0
-        rc = ll_mdc_cancel_unused(&sbi->ll_mdc_conn, inode, LDLM_FL_NO_CALLBACK);
-        if (rc < 0) {
-                CERROR("ll_mdc_cancel_unused: %d\n", rc);
-                /* XXX FIXME do something dramatic */
-        }
-
-        if (lli->lli_smd) {
-                rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd, 0);
-                if (rc < 0) {
-                        CERROR("obd_cancel_unused: %d\n", rc);
-                        /* XXX FIXME do something dramatic */
-                }
-        }
-#endif
-
-        if (atomic_read(&inode->i_count) != 0)
-                CERROR("clearing in-use inode %lu: count = %d\n",
-                       inode->i_ino, atomic_read(&inode->i_count));
-
-        if (lli->lli_smd) {
-                obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
-                lli->lli_smd = NULL;
-        }
-
-        if (lli->lli_symlink_name) {
-                OBD_FREE(lli->lli_symlink_name,strlen(lli->lli_symlink_name)+1);
-                lli->lli_symlink_name = NULL;
-        }
-
-        EXIT;
-}
-
-#if 0
-static void ll_delete_inode(struct inode *inode)
-{
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
-        if (S_ISREG(inode->i_mode)) {
-                int err;
-                struct obdo *oa;
-                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-
-                /* mcreate with no open */
-                if (!lsm)
-                        GOTO(out, 0);
-
-                if (lsm->lsm_object_id == 0) {
-                        CERROR("This really happens\n");
-                        /* No obdo was ever created */
-                        GOTO(out, 0);
-                }
-
-                oa = obdo_alloc();
-                if (oa == NULL)
-                        GOTO(out, -ENOMEM);
-
-                oa->o_id = lsm->lsm_object_id;
-                oa->o_mode = inode->i_mode;
-                oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
-
-                err = obd_destroy(ll_i2obdconn(inode), oa, lsm);
-                obdo_free(oa);
-                if (err)
-                        CDEBUG(D_SUPER, "obd destroy objid "LPX64" error %d\n",
-                               lsm->lsm_object_id, err);
-        }
-out:
-        clear_inode(inode);
-        EXIT;
-}
-#endif
-
-/* like inode_setattr, but doesn't mark the inode dirty */
-static int ll_attr2inode(struct inode * inode, struct iattr * attr, int trunc)
-{
-        unsigned int ia_valid = attr->ia_valid;
-        int error = 0;
-
-        if ((ia_valid & ATTR_SIZE) && trunc) {
-                if (attr->ia_size > ll_file_maxbytes(inode)) {
-                        error = -EFBIG;
-                        goto out;
-                }
-                error = vmtruncate(inode, attr->ia_size);
-                if (error)
-                        goto out;
-        } else if (ia_valid & ATTR_SIZE)
-                inode->i_size = attr->ia_size;
-
-        if (ia_valid & ATTR_UID)
-                inode->i_uid = attr->ia_uid;
-        if (ia_valid & ATTR_GID)
-                inode->i_gid = attr->ia_gid;
-        if (ia_valid & ATTR_ATIME)
-                inode->i_atime = attr->ia_atime;
-        if (ia_valid & ATTR_MTIME)
-                inode->i_mtime = attr->ia_mtime;
-        if (ia_valid & ATTR_CTIME)
-                inode->i_ctime = attr->ia_ctime;
-        if (ia_valid & ATTR_MODE) {
-                inode->i_mode = attr->ia_mode;
-                if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-                        inode->i_mode &= ~S_ISGID;
-        }
-out:
-        return error;
-}
-
-int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc)
-{
-        struct ptlrpc_request *request = NULL;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        int err = 0;
-
-        ENTRY;
-
-        /* change incore inode */
-        err = ll_attr2inode(inode, attr, do_trunc);
-        if (err)
-                RETURN(err);
-
-        /* Don't send size changes to MDS to avoid "fast EA" problems, and
-         * also avoid a pointless RPC (we get file size from OST anyways).
-         */
-        attr->ia_valid &= ~ATTR_SIZE;
-        if (attr->ia_valid) {
-                struct mdc_op_data op_data;
-
-                ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
-
-                err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
-                                  attr, NULL, 0, &request);
-                if (err)
-                        CERROR("mdc_setattr fails: err = %d\n", err);
-
-                ptlrpc_req_finished(request);
-                if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
-                        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-                        struct obdo oa;
-                        int err2;
-
-                        CDEBUG(D_ERROR, "setting mtime on OST\n");
-                        oa.o_id = lsm->lsm_object_id;
-                        oa.o_mode = S_IFREG;
-                        oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME;
-                        oa.o_mtime = LTIME_S(attr->ia_mtime);
-                        err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
-                        if (err2) {
-                                CERROR("obd_setattr fails: rc=%d\n", err);
-                                if (!err)
-                                        err = err2;
-                        }
-                }
-        }
-
-        RETURN(err);
-}
-
-int ll_setattr(struct dentry *de, struct iattr *attr)
-{
-        int rc = inode_change_ok(de->d_inode, attr);
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s\n", de->d_name.name);
-        if (rc)
-                return rc;
-
-        LPROC_COUNTER_INODE_INCBY1((de->d_inode), LPROC_LL_SETATTR);
-        return ll_inode_setattr(de->d_inode, attr, 1);
-}
-
-static int ll_statfs(struct super_block *sb, struct statfs *sfs)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct obd_statfs osfs;
-        int rc;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
-        LPROC_COUNTER_SBI_INCBY1(sbi, LPROC_LL_STAFS);
-        memset(sfs, 0, sizeof(*sfs));
-        rc = obd_statfs(&sbi->ll_mdc_conn, &osfs);
-        statfs_unpack(sfs, &osfs);
-        if (rc)
-                CERROR("mdc_statfs fails: rc = %d\n", rc);
-        else
-                CDEBUG(D_SUPER, "mdc_statfs shows blocks "LPU64"/"LPU64
-                       " objects "LPU64"/"LPU64"\n",
-                       osfs.os_bavail, osfs.os_blocks,
-                       osfs.os_ffree, osfs.os_files);
-
-        /* temporary until mds_statfs returns statfs info for all OSTs */
-        if (!rc) {
-                rc = obd_statfs(&sbi->ll_osc_conn, &osfs);
-                if (rc) {
-                        CERROR("obd_statfs fails: rc = %d\n", rc);
-                        GOTO(out, rc);
-                }
-                CDEBUG(D_SUPER, "obd_statfs shows blocks "LPU64"/"LPU64
-                       " objects "LPU64"/"LPU64"\n",
-                       osfs.os_bavail, osfs.os_blocks,
-                       osfs.os_ffree, osfs.os_files);
-
-                while (osfs.os_blocks > ~0UL) {
-                        sfs->f_bsize <<= 1;
-
-                        osfs.os_blocks >>= 1;
-                        osfs.os_bfree >>= 1;
-                        osfs.os_bavail >>= 1;
-                }
-                sfs->f_blocks = osfs.os_blocks;
-                sfs->f_bfree = osfs.os_bfree;
-                sfs->f_bavail = osfs.os_bavail;
-                if (osfs.os_ffree < (__u64)sfs->f_ffree) {
-                        sfs->f_files = (sfs->f_files - sfs->f_ffree) +
-                                       osfs.os_ffree;
-                        sfs->f_ffree = osfs.os_ffree;
-                }
-        }
-
-out:
-        RETURN(rc);
-}
-
-void ll_update_inode(struct inode *inode, struct mds_body *body,
-                     struct lov_stripe_md *lsm)
-{
-        struct ll_inode_info *lli = ll_i2info(inode);
-
-        LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
-        if (lsm != NULL) {
-                if (lli->lli_smd == NULL) {
-                        lli->lli_smd = lsm;
-                        lli->lli_maxbytes = lsm->lsm_maxbytes;
-                        if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
-                                lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
-                } else {
-                        LASSERT (!memcmp (lli->lli_smd, lsm, sizeof (*lsm)));
-                }
-        }
-
-        if (body->valid & OBD_MD_FLID)
-                inode->i_ino = body->ino;
-        if (body->valid & OBD_MD_FLATIME)
-                LTIME_S(inode->i_atime) = body->atime;
-        if (body->valid & OBD_MD_FLMTIME)
-                LTIME_S(inode->i_mtime) = body->mtime;
-        if (body->valid & OBD_MD_FLCTIME)
-                LTIME_S(inode->i_ctime) = body->ctime;
-        if (body->valid & OBD_MD_FLMODE)
-                inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
-        if (body->valid & OBD_MD_FLTYPE)
-                inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
-        if (body->valid & OBD_MD_FLUID)
-                inode->i_uid = body->uid;
-        if (body->valid & OBD_MD_FLGID)
-                inode->i_gid = body->gid;
-        if (body->valid & OBD_MD_FLFLAGS)
-                inode->i_flags = body->flags;
-        if (body->valid & OBD_MD_FLNLINK)
-                inode->i_nlink = body->nlink;
-        if (body->valid & OBD_MD_FLGENER)
-                inode->i_generation = body->generation;
-        if (body->valid & OBD_MD_FLRDEV)
-                inode->i_rdev = to_kdev_t(body->rdev);
-        if (body->valid & OBD_MD_FLSIZE)
-                inode->i_size = body->size;
-        if (body->valid & OBD_MD_FLBLOCKS)
-                inode->i_blocks = body->blocks;
-}
-
-int ll_read_inode2(struct inode *inode, void *opaque)
-{
-        struct ll_read_inode2_cookie *lic = opaque;
-        struct mds_body *body = lic->lic_body;
-        struct ll_inode_info *lli = ll_i2info(inode);
-        int rc = 0;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
-
-        sema_init(&lli->lli_open_sem, 1);
-        /* these are 2.4 only, but putting them here for consistency.. */
-        spin_lock_init(&lli->lli_read_extent_lock);
-        INIT_LIST_HEAD(&lli->lli_read_extents);
-        ll_lldo_init(&lli->lli_dirty);
-        lli->lli_flags = 0;
-        lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES;
-
-        LASSERT(!lli->lli_smd);
-
-        /* core attributes first */
-        ll_update_inode(inode, body, lic ? lic->lic_lsm : NULL);
-
-        /* OIDEBUG(inode); */
-
-        if (S_ISREG(inode->i_mode)) {
-                inode->i_op = &ll_file_inode_operations;
-                inode->i_fop = &ll_file_operations;
-                inode->i_mapping->a_ops = &ll_aops;
-                EXIT;
-        } else if (S_ISDIR(inode->i_mode)) {
-                inode->i_op = &ll_dir_inode_operations;
-                inode->i_fop = &ll_dir_operations;
-                inode->i_mapping->a_ops = &ll_dir_aops;
-                EXIT;
-        } else if (S_ISLNK(inode->i_mode)) {
-                inode->i_op = &ll_fast_symlink_inode_operations;
-                EXIT;
-        } else {
-                inode->i_op = &ll_special_inode_operations;
-                init_special_inode(inode, inode->i_mode,
-                                   kdev_t_to_nr(inode->i_rdev));
-                EXIT;
-        }
-
-        return rc;
-}
-
-
-void ll_umount_begin(struct super_block *sb)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct obd_device *obd;
-        struct obd_ioctl_data ioc_data = { 0 };
-
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
-        obd = class_conn2obd(&sbi->ll_mdc_conn);
-        obd->obd_no_recov = 1;
-        obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_mdc_conn, sizeof ioc_data,
-                      &ioc_data, NULL);
-
-        obd = class_conn2obd(&sbi->ll_osc_conn);
-        obd->obd_no_recov = 1;
-        obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_osc_conn, sizeof ioc_data,
-                      &ioc_data, NULL);
-        
-        /* Really, we'd like to wait until there are no requests outstanding,
-         * and then continue.  For now, we just invalidate the requests,
-         * schedule, and hope.
-         */
-        schedule();
-
-        EXIT;
-}
-
 static kmem_cache_t *ll_inode_cachep;
 
 static struct inode *ll_alloc_inode(struct super_block *sb)
 {
         struct ll_inode_info *lli;
-        LPROC_COUNTER_SBI_INCBY1((ll_s2sbi(sb)), LL_ALLOC_INODE);
+        lprocfs_counter_incr((ll_s2sbi(sb))->ll_stats, LPROC_LL_ALLOC_INODE);
         OBD_SLAB_ALLOC(lli, ll_inode_cachep, SLAB_KERNEL, sizeof *lli);
         if (lli == NULL)
                 return NULL;
 
-        memset(lli, 0, (char *)&lli->lli_vfs_inode - (char *)lli);
-        sema_init(&lli->lli_open_sem, 1);
-        init_MUTEX(&lli->lli_size_valid_sem);
-        lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+        inode_init_once(&lli->lli_vfs_inode);
+        ll_lli_init(lli);
 
         return &lli->lli_vfs_inode;
 }
 
 static void ll_destroy_inode(struct inode *inode)
 {
-        OBD_SLAB_FREE(ll_inode_cachep, ll_i2info(inode),
-                      sizeof(struct ll_inode_info));
+        struct ll_inode_info *ptr = ll_i2info(inode);
+        OBD_SLAB_FREE(ptr, ll_inode_cachep, sizeof(*ptr));
 }
 
 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
@@ -792,15 +91,12 @@ void ll_destroy_inodecache(void)
                 CERROR("ll_inode_cache: not all structures were freed\n");
 }
 
-
-
 /* exported operations */
 struct super_operations ll_super_operations =
 {
         alloc_inode: ll_alloc_inode,
         destroy_inode: ll_destroy_inode,
         clear_inode: ll_clear_inode,
-//        delete_inode: ll_delete_inode,
         put_super: ll_put_super,
         statfs: ll_statfs,
         umount_begin: ll_umount_begin
index 19d234e..427f7f0 100644 (file)
 #include <linux/stat.h>
 #include <linux/smp_lock.h>
 #include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include <linux/lustre_lite.h>
+#include "llite_internal.h"
 
 static int ll_readlink_internal(struct inode *inode,
                                 struct ptlrpc_request **request, char **symname)
@@ -117,82 +115,46 @@ static int ll_readlink(struct dentry *dentry, char *buffer, int buflen)
         RETURN(rc);
 }
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-static int ll_follow_link(struct dentry *dentry, struct nameidata *nd,
-                          struct lookup_intent *it)
+static int ll_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
         struct inode *inode = dentry->d_inode;
         struct ll_inode_info *lli = ll_i2info(inode);
+        struct lookup_intent *it = ll_nd2it(nd);
         struct ptlrpc_request *request;
-        int op = 0, mode = 0, rc;
+        int rc;
         char *symname;
         ENTRY;
 
-        CDEBUG(D_VFSTRACE, "VFS Op\n");
         if (it != NULL) {
-                op = it->it_op;
-                mode = it->it_mode;
-
-                ll_intent_release(dentry, it);
-        }
-
-        down(&lli->lli_open_sem);
-        rc = ll_readlink_internal(inode, &request, &symname);
-        up(&lli->lli_open_sem);
-        if (rc)
-                GOTO(out, rc);
+                int op = it->it_op;
+                int mode = it->it_mode;
 
-        if (it != NULL) {
+                ll_intent_release(it);
                 it->it_op = op;
                 it->it_mode = mode;
         }
 
-        rc = vfs_follow_link_it(nd, symname, it);
-        ptlrpc_req_finished(request);
- out:
-        RETURN(rc);
-}
-#else
-static int ll_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-        struct inode *inode = dentry->d_inode;
-        struct ll_inode_info *lli = ll_i2info(inode);
-        struct ptlrpc_request *request;
-        int op = 0, mode = 0, rc;
-        char *symname;
-        ENTRY;
-
-        op = nd->it.it_op;
-        mode = nd->it.it_mode;
-
-        ll_intent_release(dentry, &nd->it);
-
+        CDEBUG(D_VFSTRACE, "VFS Op\n");
         down(&lli->lli_open_sem);
-
         rc = ll_readlink_internal(inode, &request, &symname);
+        up(&lli->lli_open_sem);
         if (rc)
                 GOTO(out, rc);
 
-        nd->it.it_op = op;
-        nd->it.it_mode = mode;
-
         rc = vfs_follow_link(nd, symname);
         ptlrpc_req_finished(request);
  out:
-        up(&lli->lli_open_sem);
-
         RETURN(rc);
 }
-#endif
 
-extern int ll_inode_revalidate(struct dentry *dentry);
-extern int ll_setattr(struct dentry *de, struct iattr *attr);
 struct inode_operations ll_fast_symlink_inode_operations = {
         readlink:       ll_readlink,
         setattr:        ll_setattr,
         setattr_raw:    ll_setattr_raw,
-        follow_link2:   ll_follow_link,
+        follow_link   ll_follow_link,
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        revalidate:     ll_inode_revalidate
+        revalidate_it:  ll_inode_revalidate_it
+#else 
+        getattr_it:     ll_getattr
 #endif
 };
index e995588..e69dc6d 100644 (file)
@@ -1,3 +1,4 @@
 .deps
 Makefile
 Makefile.in
+.*.cmd
index 879e44d..83dba1a 100644 (file)
@@ -7,12 +7,12 @@ DEFS=
 
 if LIBLUSTRE
 lib_LIBRARIES = liblov.a
-liblov_a_SOURCES = lov_obd.c lov_pack.c
+liblov_a_SOURCES = lov_obd.c lov_pack.c lov_internal.h
 else
 MODULE = lov
 modulefs_DATA = lov.o
 EXTRA_PROGRAMS = lov
-lov_SOURCES = lov_obd.c lov_pack.c lproc_lov.c
+lov_SOURCES = lov_obd.c lov_pack.c lproc_lov.c lov_internal.h
 endif
 
 include $(top_srcdir)/Rules
index 2974b2a..9562a4f 100644 (file)
 #include <linux/seq_file.h>
 #include <linux/lprocfs_status.h>
 
+#include "lov_internal.h"
+
+static int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
+                             int stripeno, obd_off *obd_off);
+
 struct lov_file_handles {
         struct portals_handle lfh_handle;
         atomic_t lfh_refcount;
@@ -68,7 +73,7 @@ static void lov_lfh_addref(void *lfhp)
         struct lov_file_handles *lfh = lfhp;
 
         atomic_inc(&lfh->lfh_refcount);
-        CDEBUG(D_INFO, "GETting lfh %p : new refcount %d\n", lfh,
+        CDEBUG(D_MALLOC, "GETting lfh %p : new refcount %d\n", lfh,
                atomic_read(&lfh->lfh_refcount));
 }
 
@@ -99,7 +104,7 @@ static struct lov_file_handles *lov_handle2lfh(struct lustre_handle *handle)
 
 static void lov_lfh_put(struct lov_file_handles *lfh)
 {
-        CDEBUG(D_INFO, "PUTting lfh %p : new refcount %d\n", lfh,
+        CDEBUG(D_MALLOC, "PUTting lfh %p : new refcount %d\n", lfh,
                atomic_read(&lfh->lfh_refcount) - 1);
         LASSERT(atomic_read(&lfh->lfh_refcount) > 0 &&
                 atomic_read(&lfh->lfh_refcount) < 0x5a5a);
@@ -174,19 +179,18 @@ int lov_attach(struct obd_device *dev, obd_count len, void *data)
         struct proc_dir_entry *entry;
         int rc;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(lov, &lvars);
         rc = lprocfs_obd_attach(dev, lvars.obd_vars);
-        if (rc) 
+        if (rc)
                 return rc;
 
         entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry);
-        if (entry == NULL) 
+        if (entry == NULL)
                 RETURN(-ENOMEM);
-        entry->proc_fops = &ll_proc_target_fops;
+        entry->proc_fops = &lov_proc_target_fops;
         entry->data = dev;
-        
+
         return rc;
-        
 }
 
 int lov_detach(struct obd_device *dev)
@@ -214,15 +218,17 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
         if (rc)
                 RETURN(rc);
 
+        exp = class_conn2export(conn);
+        spin_lock_init(&exp->exp_lov_data.led_lock);
+        INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head);
+
         /* We don't want to actually do the underlying connections more than
          * once, so keep track. */
         lov->refcount++;
-        if (lov->refcount > 1)
+        if (lov->refcount > 1) {
+                class_export_put(exp);
                 RETURN(0);
-
-        exp = class_conn2export(conn);
-        spin_lock_init(&exp->exp_lov_data.led_lock);
-        INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head);
+        }
 
         /* retrieve LOV metadata from MDS */
         rc = obd_connect(&mdc_conn, lov->mdcobd, &lov_mds_uuid);
@@ -248,9 +254,9 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
          * array fits in LOV_MAX_UUID_BUFFER_SIZE and all uuids are
          * terminated), but I still need to verify it makes overall
          * sense */
-        mdesc = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*mdesc));
-        LASSERT (mdesc != NULL);
-        LASSERT_REPSWABBED (req, 0);
+        mdesc = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*mdesc));
+        LASSERT(mdesc != NULL);
+        LASSERT_REPSWABBED(req, 0);
 
         *desc = *mdesc;
 
@@ -279,15 +285,15 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
          * demands on memory here. */
         lov->bufsize = sizeof(struct lov_tgt_desc) * desc->ld_tgt_count;
         OBD_ALLOC(lov->tgts, lov->bufsize);
-        if (!lov->tgts) {
+        if (lov->tgts == NULL) {
                 CERROR("Out of memory\n");
                 GOTO(out_req, rc = -ENOMEM);
         }
 
         uuids = lustre_msg_buf(req->rq_repmsg, 1,
                                sizeof(*uuids) * desc->ld_tgt_count);
-        LASSERT (uuids != NULL);
-        LASSERT_REPSWABBED (req, 1);
+        LASSERT(uuids != NULL);
+        LASSERT_REPSWABBED(req, 1);
 
         for (i = 0, tgts = lov->tgts; i < desc->ld_tgt_count; i++, tgts++) {
                 struct obd_uuid *uuid = &tgts->uuid;
@@ -330,7 +336,9 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
         }
 
         mdc->cl_max_mds_easize = obd_size_diskmd(conn, NULL);
-        ptlrpc_req_finished (req);
+        mdc->cl_max_mds_cookiesize = desc->ld_tgt_count *
+                sizeof(struct llog_cookie);
+        ptlrpc_req_finished(req);
         class_export_put(exp);
         RETURN (0);
 
@@ -356,7 +364,7 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
         RETURN (rc);
 }
 
-static int lov_disconnect(struct lustre_handle *conn, int failover)
+static int lov_disconnect(struct lustre_handle *conn, int flags)
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct lov_obd *lov = &obd->u.lov;
@@ -383,7 +391,7 @@ static int lov_disconnect(struct lustre_handle *conn, int failover)
                                 class_conn2obd(&lov->tgts[i].conn);
                         osc_obd->obd_no_recov = 1;
                 }
-                rc = obd_disconnect(&lov->tgts[i].conn, failover);
+                rc = obd_disconnect(&lov->tgts[i].conn, flags);
                 if (rc) {
                         if (lov->tgts[i].active) {
                                 CERROR("Target %s disconnect error %d\n",
@@ -400,6 +408,7 @@ static int lov_disconnect(struct lustre_handle *conn, int failover)
         lov->bufsize = 0;
         lov->tgts = NULL;
 
+ out_local:
         exp = class_conn2export(conn);
         if (exp == NULL) {
                 CERROR("export handle "LPU64" invalid!  If you can reproduce, "
@@ -421,7 +430,6 @@ static int lov_disconnect(struct lustre_handle *conn, int failover)
         spin_unlock(&exp->exp_lov_data.led_lock);
         class_export_put(exp);
 
- out_local:
         rc = class_disconnect(conn, 0);
         RETURN(rc);
 }
@@ -548,6 +556,8 @@ static obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size,
 static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid,
                             struct lov_stripe_md *lsm, int stripeno, int *set)
 {
+        valid &= src->o_valid;
+
         if (*set) {
                 if (valid & OBD_MD_FLSIZE) {
                         /* this handles sparse files properly */
@@ -566,68 +576,102 @@ static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid,
                 if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
                         tgt->o_mtime = src->o_mtime;
         } else {
-                obdo_cpy_md(tgt, src, valid);
+                memcpy(tgt, src, sizeof(*tgt));
+                tgt->o_id = lsm->lsm_object_id;
                 if (valid & OBD_MD_FLSIZE)
                         tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
                 *set = 1;
         }
 }
 
+#ifndef log2
+#define log2(n) ffz(~(n))
+#endif
+
 /* the LOV expects oa->o_id to be set to the LOV object id */
-static int lov_create(struct lustre_handle *conn, struct obdo *oa,
+static int lov_create(struct lustre_handle *conn, struct obdo *src_oa,
                       struct lov_stripe_md **ea, struct obd_trans_info *oti)
 {
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
         struct lov_stripe_md *lsm;
-        struct lov_oinfo *loi;
-        struct obdo *tmp;
+        struct lov_oinfo *loi = NULL;
+        struct obdo *tmp_oa, *ret_oa;
+        struct llog_cookie *cookies = NULL;
         unsigned ost_count, ost_idx;
-        int set = 0, obj_alloc = 0;
-        int rc = 0, i;
+        int set = 0, obj_alloc = 0, cookie_sent = 0, rc = 0, i;
         ENTRY;
 
         LASSERT(ea);
 
         if (!export)
-                GOTO(out_exp, rc = -EINVAL);
+                RETURN(-EINVAL);
 
         lov = &export->exp_obd->u.lov;
 
         if (!lov->desc.ld_active_tgt_count)
                 GOTO(out_exp, rc = -EIO);
 
-        tmp = obdo_alloc();
-        if (!tmp)
+        ret_oa = obdo_alloc();
+        if (!ret_oa)
                 GOTO(out_exp, rc = -ENOMEM);
 
+        tmp_oa = obdo_alloc();
+        if (!tmp_oa)
+                GOTO(out_oa, rc = -ENOMEM);
+
         lsm = *ea;
 
         if (!lsm) {
-                rc = obd_alloc_memmd(conn, &lsm);
+                int stripes;
+                ost_count = lov_get_stripecnt(lov, 0);
+
+                /* If the MDS file was truncated up to some size, stripe over
+                 * enough OSTs to allow the file to be created at that size.
+                 */
+                if (src_oa->o_valid & OBD_MD_FLSIZE) {
+                        stripes=((src_oa->o_size+LUSTRE_STRIPE_MAXBYTES)>>12)-1;
+                        do_div(stripes, (__u32)(LUSTRE_STRIPE_MAXBYTES >> 12));
+
+                        if (stripes > lov->desc.ld_active_tgt_count)
+                                GOTO(out_exp, rc = -EFBIG);
+                        if (stripes < ost_count)
+                                stripes = ost_count;
+                } else
+                        stripes = ost_count;
+
+                rc = lov_alloc_memmd(&lsm, stripes);
                 if (rc < 0)
                         GOTO(out_tmp, rc);
 
                 rc = 0;
-                lsm->lsm_magic = LOV_MAGIC;
         }
 
         ost_count = lov->desc.ld_tgt_count;
 
-        LASSERT(oa->o_valid & OBD_MD_FLID);
-        lsm->lsm_object_id = oa->o_id;
+        LASSERT(src_oa->o_valid & OBD_MD_FLID);
+        lsm->lsm_object_id = src_oa->o_id;
         if (!lsm->lsm_stripe_size)
                 lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
 
         if (!*ea || lsm->lsm_stripe_offset >= ost_count) {
                 get_random_bytes(&ost_idx, 2);
                 ost_idx %= ost_count;
-        } else
+        } else {
                 ost_idx = lsm->lsm_stripe_offset;
+        }
 
         CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n",
                lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx);
 
+        /* XXX LOV STACKING: need to figure out how many real OSCs */
+        if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) {
+                oti_alloc_cookies(oti, lsm->lsm_stripe_count);
+                if (!oti->oti_logcookies)
+                        GOTO(out_cleanup, rc = -ENOMEM);
+                cookies = oti->oti_logcookies;
+        }
+
         loi = lsm->lsm_oinfo;
         for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
                 struct lov_stripe_md obj_md;
@@ -640,14 +684,30 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
                 }
 
                 /* create data objects with "parent" OA */
-                memcpy(tmp, oa, sizeof(*tmp));
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+
+                /* XXX When we start creating objects on demand, we need to
+                 *     make sure that we always create the object on the
+                 *     stripe which holds the existing file size.
+                 */
+                if (src_oa->o_valid & OBD_MD_FLSIZE) {
+                        if (lov_stripe_offset(lsm, src_oa->o_size, i,
+                                              &tmp_oa->o_size) < 0 &&
+                            tmp_oa->o_size)
+                                tmp_oa->o_size--;
+
+                        CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
+                               i, tmp_oa->o_size, src_oa->o_size);
+                }
+
                 /* XXX: LOV STACKING: use real "obj_mdp" sub-data */
-                err = obd_create(&lov->tgts[ost_idx].conn, tmp, &obj_mdp, oti);
+                err = obd_create(&lov->tgts[ost_idx].conn, tmp_oa,&obj_mdp,oti);
                 if (err) {
                         if (lov->tgts[ost_idx].active) {
                                 CERROR("error creating objid "LPX64" sub-object"
-                                       " on OST idx %d/%d: rc = %d\n", oa->o_id,
-                                       ost_idx, lsm->lsm_stripe_count, err);
+                                       " on OST idx %d/%d: rc = %d\n",
+                                       src_oa->o_id, ost_idx,
+                                       lsm->lsm_stripe_count, err);
                                 if (err > 0) {
                                         CERROR("obd_create returned invalid "
                                                "err %d\n", err);
@@ -658,17 +718,22 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
                                 rc = err;
                         continue;
                 }
-                loi->loi_id = tmp->o_id;
+                loi->loi_id = tmp_oa->o_id;
                 loi->loi_ost_idx = ost_idx;
                 CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64" at idx %d\n",
                        lsm->lsm_object_id, loi->loi_id, ost_idx);
 
                 if (set == 0)
                         lsm->lsm_stripe_offset = ost_idx;
-                lov_merge_attrs(oa, tmp, OBD_MD_FLBLKSZ, lsm, obj_alloc, &set);
-                ot_init(&loi->loi_dirty_ot_inline);
+                lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm,
+                                obj_alloc, &set);
                 loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
+                ot_init(loi->loi_dirty_ot);
 
+                if (cookies)
+                        ++oti->oti_logcookies;
+                if (tmp_oa->o_valid & OBD_MD_FLCOOKIE)
+                        ++cookie_sent;
                 ++obj_alloc;
                 ++loi;
 
@@ -677,6 +742,12 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
                         GOTO(out_done, rc = 0);
         }
 
+        /* If we were passed specific striping params, then a failure to
+         * meet those requirements is an error, since we can't reallocate
+         * that memory (it might be part of a larger array or something).
+         *
+         * We can only get here if lsm_stripe_count was originally > 1.
+         */
         if (*ea != NULL) {
                 CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n",
                        lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count,rc);
@@ -686,27 +757,61 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
         } else {
                 struct lov_stripe_md *lsm_new;
                 /* XXX LOV STACKING call into osc for sizes */
-                unsigned size = lov_stripe_md_size(obj_alloc);
+                unsigned oldsize, newsize;
+
+                if (oti && cookies && cookie_sent) {
+                        oldsize = lsm->lsm_stripe_count * sizeof(*cookies);
+                        newsize = obj_alloc * sizeof(*cookies);
+
+                        oti_alloc_cookies(oti, obj_alloc);
+                        if (oti->oti_logcookies) {
+                                memcpy(oti->oti_logcookies, cookies, newsize);
+                                OBD_FREE(cookies, oldsize);
+                                cookies = oti->oti_logcookies;
+                        } else {
+                                CWARN("'leaking' %d bytes\n", oldsize-newsize);
+                        }
+                }
 
                 CERROR("reallocating LSM for objid "LPX64": old %u new %u\n",
                        lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count);
-                OBD_ALLOC(lsm_new, size);
-                if (!lsm_new)
-                        GOTO(out_cleanup, rc = -ENOMEM);
-                memcpy(lsm_new, lsm, size);
-                lsm_new->lsm_stripe_count = obj_alloc;
-
-                /* XXX LOV STACKING call into osc for sizes */
-                OBD_FREE(lsm, lov_stripe_md_size(lsm->lsm_stripe_count));
-                lsm = lsm_new;
-
+                oldsize = lov_stripe_md_size(lsm->lsm_stripe_count);
+                newsize = lov_stripe_md_size(obj_alloc);
+                OBD_ALLOC(lsm_new, newsize);
+                if (lsm_new != NULL) {
+                        memcpy(lsm_new, lsm, newsize);
+                        lsm_new->lsm_stripe_count = obj_alloc;
+                        OBD_FREE(lsm, newsize);
+                        lsm = lsm_new;
+                } else {
+                        CWARN("'leaking' %d bytes\n", oldsize - newsize);
+                }
                 rc = 0;
         }
  out_done:
         *ea = lsm;
+        if (src_oa->o_valid & OBD_MD_FLSIZE &&
+            ret_oa->o_size != src_oa->o_size) {
+                CERROR("original size "LPU64" isn't new object size "LPU64"\n",
+                       src_oa->o_size, ret_oa->o_size);
+                LBUG();
+        }
+        ret_oa->o_id = src_oa->o_id;
+        memcpy(src_oa, ret_oa, sizeof(*src_oa));
 
  out_tmp:
-        obdo_free(tmp);
+        obdo_free(tmp_oa);
+ out_oa:
+        obdo_free(ret_oa);
+        if (oti && cookies) {
+                oti->oti_logcookies = cookies;
+                if (!cookie_sent) {
+                        oti_free_cookies(oti);
+                        src_oa->o_valid &= ~OBD_MD_FLCOOKIE;
+                } else {
+                        src_oa->o_valid |= OBD_MD_FLCOOKIE;
+                }
+        }
  out_exp:
         class_export_put(export);
         return rc;
@@ -717,15 +822,26 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
 
                 --loi;
                 /* destroy already created objects here */
-                memcpy(tmp, oa, sizeof(*tmp));
-                tmp->o_id = loi->loi_id;
-                err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL,
-                                  NULL);
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+                tmp_oa->o_id = loi->loi_id;
+
+                if (oti && cookie_sent) {
+                        err = obd_log_cancel(&lov->tgts[loi->loi_ost_idx].conn,
+                                             NULL, 1, --oti->oti_logcookies,
+                                             OBD_LLOG_FL_SENDNOW);
+                        if (err)
+                                CERROR("Failed to cancel objid "LPX64" subobj "
+                                       LPX64" cookie on OST idx %d: rc = %d\n",
+                                       src_oa->o_id, loi->loi_id,
+                                       loi->loi_ost_idx, err);
+                }
+
+                err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
+                                  NULL, oti);
                 if (err)
-                        CERROR("Failed to uncreate objid "LPX64" subobj "
-                               LPX64" on OST idx %d: rc = %d\n",
-                               oa->o_id, loi->loi_id, loi->loi_ost_idx,
-                               err);
+                        CERROR("Failed to uncreate objid "LPX64" subobj "LPX64
+                               " on OST idx %d: rc = %d\n", src_oa->o_id,
+                               loi->loi_id, loi->loi_ost_idx, err);
         }
         if (*ea == NULL)
                 obd_free_memmd(conn, &lsm);
@@ -779,12 +895,12 @@ static int lov_destroy(struct lustre_handle *conn, struct obdo *oa,
                 memcpy(&tmp, oa, sizeof(tmp));
                 tmp.o_id = loi->loi_id;
                 if (lfh)
-                        memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
-                               FD_OSTDATA_SIZE);
+                        memcpy(obdo_handle(&tmp), &lfh->lfh_och[i].och_fh,
+                               sizeof(lfh->lfh_och[i].och_fh));
                 else
                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
                 err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, &tmp,
-                                  NULL, NULL);
+                                  NULL, oti);
                 if (err && lov->tgts[loi->loi_ost_idx].active) {
                         CERROR("error: destroying objid "LPX64" subobj "
                                LPX64" on OST idx %d: rc = %d\n",
@@ -839,8 +955,8 @@ static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
                 memcpy(&tmp, oa, sizeof(tmp));
                 tmp.o_id = loi->loi_id;
                 if (lfh)
-                        memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
-                               FD_OSTDATA_SIZE);
+                        memcpy(obdo_handle(&tmp), &lfh->lfh_och[i].och_fh,
+                               sizeof(lfh->lfh_och[i].och_fh));
                 else
                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
 
@@ -867,12 +983,13 @@ static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
         return rc;
 }
 
-static int lov_getattr_interpret(struct ptlrpc_request_set *rqset,
-                                 struct lov_getattr_async_args *aa, int rc)
+static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, void *data, 
+                                 int rc)
 {
+        struct lov_getattr_async_args *aa = data;
         struct lov_stripe_md *lsm = aa->aa_lsm;
         struct obdo          *oa = aa->aa_oa;
-        struct obdo          *obdos = aa->aa_stripe_oas;
+        struct obdo          *obdos = aa->aa_obdos;
         struct lov_oinfo     *loi;
         int                   i;
         int                   set = 0;
@@ -881,8 +998,8 @@ static int lov_getattr_interpret(struct ptlrpc_request_set *rqset,
         if (rc == 0) {
                 /* NB all stripe requests succeeded to get here */
 
-                for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
-                     i++,loi++) {
+                for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+                     i++, loi++) {
                         if (obdos[i].o_valid == 0)      /* inactive stripe */
                                 continue;
 
@@ -955,8 +1072,8 @@ static int lov_getattr_async (struct lustre_handle *conn, struct obdo *oa,
                 memcpy(&obdos[i], oa, sizeof(obdos[i]));
                 obdos[i].o_id = loi->loi_id;
                 if (lfh)
-                        memcpy(obdo_handle(&obdos[i]), lfh->lfh_och + i,
-                               FD_OSTDATA_SIZE);
+                        memcpy(obdo_handle(&obdos[i]), &lfh->lfh_och[i].och_fh,
+                               sizeof(lfh->lfh_och[i].och_fh));
                 else
                         obdos[i].o_valid &= ~OBD_MD_FLHANDLE;
 
@@ -980,7 +1097,7 @@ static int lov_getattr_async (struct lustre_handle *conn, struct obdo *oa,
         aa = (struct lov_getattr_async_args *)&rqset->set_args;
         aa->aa_lsm = lsm;
         aa->aa_oa = oa;
-        aa->aa_stripe_oas = obdos;
+        aa->aa_obdos = obdos;
         GOTO (out, rc = 0);
 
  out_obdos:
@@ -992,10 +1109,10 @@ static int lov_getattr_async (struct lustre_handle *conn, struct obdo *oa,
         RETURN (rc);
 }
 
-static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
+static int lov_setattr(struct lustre_handle *conn, struct obdo *src_oa,
                        struct lov_stripe_md *lsm, struct obd_trans_info *oti)
 {
-        struct obdo *tmp;
+        struct obdo *tmp_oa, *ret_oa;
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
         struct lov_oinfo *loi;
@@ -1009,18 +1126,17 @@ static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
         if (!export || !export->exp_obd)
                 GOTO(out, rc = -ENODEV);
 
-        /* size changes should go through punch and not setattr */
-        LASSERT(!(oa->o_valid & OBD_MD_FLSIZE));
-
-        /* for now, we only expect mtime updates here */
-        LASSERT(!(oa->o_valid & ~(OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME)));
-
-        tmp = obdo_alloc();
-        if (!tmp)
+        /* for now, we only expect time updates here */
+        LASSERT(!(src_oa->o_valid & ~(OBD_MD_FLID|OBD_MD_FLTYPE|OBD_MD_FLMODE|
+                                      OBD_MD_FLATIME | OBD_MD_FLMTIME |
+                                      OBD_MD_FLCTIME)));
+        ret_oa = obdo_alloc();
+        if (!ret_oa)
                 GOTO(out, rc = -ENOMEM);
 
-        if (oa->o_valid & OBD_MD_FLHANDLE)
-                lfh = lov_handle2lfh(obdo_handle(oa));
+        tmp_oa = obdo_alloc();
+        if (!tmp_oa)
+                GOTO(out_oa, rc = -ENOMEM);
 
         lov = &export->exp_obd->u.lov;
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
@@ -1031,46 +1147,54 @@ static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
                         continue;
                 }
 
-                obdo_cpy_md(tmp, oa, oa->o_valid);
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
 
                 if (lfh)
-                        memcpy(obdo_handle(tmp), lfh->lfh_och + i,
-                               FD_OSTDATA_SIZE);
+                        memcpy(obdo_handle(tmp_oa), &lfh->lfh_och[i].och_fh,
+                               sizeof(lfh->lfh_och[i].och_fh));
                 else
-                        tmp->o_valid &= ~OBD_MD_FLHANDLE;
+                        tmp_oa->o_valid &= ~OBD_MD_FLHANDLE;
 
-                tmp->o_id = loi->loi_id;
+                tmp_oa->o_id = loi->loi_id;
 
-                err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp,
+                err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
                                   NULL, NULL);
                 if (err) {
                         if (lov->tgts[loi->loi_ost_idx].active) {
                                 CERROR("error: setattr objid "LPX64" subobj "
                                        LPX64" on OST idx %d: rc = %d\n",
-                                       oa->o_id, loi->loi_id, loi->loi_ost_idx,
-                                       err);
+                                       src_oa->o_id, loi->loi_id,
+                                       loi->loi_ost_idx, err);
                                 if (!rc)
                                         rc = err;
                         }
-                } else
-                        set = 1;
+                        continue;
+                }
+
+                lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, i, &set);
         }
-        obdo_free(tmp);
         if (!set && !rc)
                 rc = -EIO;
         if (lfh != NULL)
                 lov_lfh_put(lfh);
-        GOTO(out, rc);
- out:
+
+        ret_oa->o_id = src_oa->o_id;
+        memcpy(src_oa, ret_oa, sizeof(*src_oa));
+        GOTO(out_tmp, rc);
+out_tmp:
+        obdo_free(tmp_oa);
+out_oa:
+        obdo_free(ret_oa);
+out:
         class_export_put(export);
         return rc;
 }
 
-static int lov_open(struct lustre_handle *conn, struct obdo *oa,
+static int lov_open(struct lustre_handle *conn, struct obdo *src_oa,
                     struct lov_stripe_md *lsm, struct obd_trans_info *oti,
                     struct obd_client_handle *och)
 {
-        struct obdo *tmp; /* on the heap here, on the stack in lov_close? */
+        struct obdo *tmp_oa, *ret_oa;
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
         struct lov_oinfo *loi;
@@ -1085,20 +1209,24 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
         if (!export || !export->exp_obd)
                 GOTO(out_exp, rc = -ENODEV);
 
-        tmp = obdo_alloc();
-        if (!tmp)
+        ret_oa = obdo_alloc();
+        if (!ret_oa)
                 GOTO(out_exp, rc = -ENOMEM);
 
+        tmp_oa = obdo_alloc();
+        if (!tmp_oa)
+                GOTO(out_oa, rc = -ENOMEM);
+
         lfh = lov_lfh_new();
         if (lfh == NULL)
                 GOTO(out_tmp, rc = -ENOMEM);
-        OBD_ALLOC(lfh->lfh_och, lsm->lsm_stripe_count * sizeof *och);
+        OBD_ALLOC(lfh->lfh_och, lsm->lsm_stripe_count * sizeof(*och));
         if (!lfh->lfh_och)
                 GOTO(out_lfh, rc = -ENOMEM);
 
         lov = &export->exp_obd->u.lov;
-        oa->o_size = 0;
-        oa->o_blocks = 0;
+        src_oa->o_size = 0;
+        src_oa->o_blocks = 0;
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
                 if (lov->tgts[loi->loi_ost_idx].active == 0) {
                         CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
@@ -1106,11 +1234,11 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
                 }
 
                 /* create data objects with "parent" OA */
-                memcpy(tmp, oa, sizeof(*tmp));
-                tmp->o_id = loi->loi_id;
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+                tmp_oa->o_id = loi->loi_id;
 
-                rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp,
-                              NULL, NULL, lfh->lfh_och + i);
+                rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
+                              NULL, NULL, &lfh->lfh_och[i]);
                 if (rc) {
                         if (!lov->tgts[loi->loi_ost_idx].active) {
                                 rc = 0;
@@ -1118,27 +1246,31 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
                         }
                         CERROR("error: open objid "LPX64" subobj "LPX64
                                " on OST idx %d: rc = %d\n",
-                               oa->o_id, lsm->lsm_oinfo[i].loi_id,
+                               src_oa->o_id, lsm->lsm_oinfo[i].loi_id,
                                loi->loi_ost_idx, rc);
                         goto out_handles;
                 }
 
-                lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &set);
+                lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, i, &set);
         }
 
         lfh->lfh_count = lsm->lsm_stripe_count;
         och->och_fh.cookie = lfh->lfh_handle.h_cookie;
-        obdo_handle(oa)->cookie = lfh->lfh_handle.h_cookie;
-        oa->o_valid |= OBD_MD_FLHANDLE;
+        obdo_handle(ret_oa)->cookie = lfh->lfh_handle.h_cookie;
+        ret_oa->o_valid |= OBD_MD_FLHANDLE;
+        ret_oa->o_id = src_oa->o_id;
+        memcpy(src_oa, ret_oa, sizeof(*src_oa));
 
-        /* llfh refcount transfers to list */
+        /* lfh refcount transfers to list */
         spin_lock(&export->exp_lov_data.led_lock);
         list_add(&lfh->lfh_list, &export->exp_lov_data.led_open_head);
         spin_unlock(&export->exp_lov_data.led_lock);
 
         GOTO(out_tmp, rc);
  out_tmp:
-        obdo_free(tmp);
+        obdo_free(tmp_oa);
+ out_oa:
+        obdo_free(ret_oa);
  out_exp:
         class_export_put(export);
         return rc;
@@ -1150,16 +1282,16 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
                 if (lov->tgts[loi->loi_ost_idx].active == 0)
                         continue;
 
-                memcpy(tmp, oa, sizeof(*tmp));
-                tmp->o_id = loi->loi_id;
-                memcpy(obdo_handle(tmp), lfh->lfh_och + i, FD_OSTDATA_SIZE);
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+                tmp_oa->o_id = loi->loi_id;
+                memcpy(obdo_handle(tmp_oa), &lfh->lfh_och[i], FD_OSTDATA_SIZE);
 
-                err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp,
+                err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
                                 NULL, NULL);
                 if (err && lov->tgts[loi->loi_ost_idx].active) {
                         CERROR("error: closing objid "LPX64" subobj "LPX64
                                " on OST idx %d after open error: rc=%d\n",
-                               oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
+                               src_oa->o_id, loi->loi_id, loi->loi_ost_idx,err);
                 }
         }
 
@@ -1189,6 +1321,8 @@ static int lov_close(struct lustre_handle *conn, struct obdo *oa,
 
         if (oa->o_valid & OBD_MD_FLHANDLE)
                 lfh = lov_handle2lfh(obdo_handle(oa));
+        if (!lfh)
+                LBUG();
 
         lov = &export->exp_obd->u.lov;
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
@@ -1198,7 +1332,7 @@ static int lov_close(struct lustre_handle *conn, struct obdo *oa,
                 memcpy(&tmp, oa, sizeof(tmp));
                 tmp.o_id = loi->loi_id;
                 if (lfh)
-                        memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
+                        memcpy(obdo_handle(&tmp), &lfh->lfh_och[i],
                                FD_OSTDATA_SIZE);
                 else
                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
@@ -1223,18 +1357,16 @@ static int lov_close(struct lustre_handle *conn, struct obdo *oa,
 
                 OBD_FREE(lfh->lfh_och, lsm->lsm_stripe_count * FD_OSTDATA_SIZE);
                 lov_lfh_destroy(lfh);
+                LASSERT(atomic_read(&lfh->lfh_refcount) == 1);
                 lov_lfh_put(lfh); /* balance handle2lfh above */
-        }
+        } else
+                LBUG();
         GOTO(out, rc);
  out:
         class_export_put(export);
         return rc;
 }
 
-#ifndef log2
-#define log2(n) ffz(~(n))
-#endif
-
 /* we have an offset in file backed by an lov and want to find out where
  * that offset lands in our given stripe of the file.  for the easy
  * case where the offset is within the stripe, we just have to scale the
@@ -1404,8 +1536,8 @@ static int lov_punch(struct lustre_handle *conn, struct obdo *oa,
                 memcpy(&tmp, oa, sizeof(tmp));
                 tmp.o_id = loi->loi_id;
                 if (lfh)
-                        memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
-                               FD_OSTDATA_SIZE);
+                        memcpy(obdo_handle(&tmp), &lfh->lfh_och[i].och_fh,
+                               sizeof(lfh->lfh_och[i].och_fh));
                 else
                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
 
@@ -1455,7 +1587,7 @@ static int lov_brw_check(struct lov_obd *lov, struct lov_stripe_md *lsm,
         return 0;
 }
 
-static int lov_brw(int cmd, struct lustre_handle *conn,
+static int lov_brw(int cmd, struct lustre_handle *conn, struct obdo *src_oa,
                    struct lov_stripe_md *lsm, obd_count oa_bufs,
                    struct brw_page *pga, struct obd_trans_info *oti)
 {
@@ -1467,10 +1599,12 @@ static int lov_brw(int cmd, struct lustre_handle *conn,
                 int ost_idx;
         } *stripeinfo, *si, *si_last;
         struct obd_export *export = class_conn2export(conn);
+        struct obdo *ret_oa = NULL, *tmp_oa = NULL;
+        struct lov_file_handles *lfh = NULL;
         struct lov_obd *lov;
         struct brw_page *ioarr;
         struct lov_oinfo *loi;
-        int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count;
+        int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count, set = 0;
         ENTRY;
 
         if (lsm_bad_magic(lsm))
@@ -1495,6 +1629,21 @@ static int lov_brw(int cmd, struct lustre_handle *conn,
         if (!ioarr)
                 GOTO(out_where, rc = -ENOMEM);
 
+        if (src_oa) {
+                ret_oa = obdo_alloc();
+                if (!ret_oa)
+                        GOTO(out_ioarr, rc = -ENOMEM);
+
+                tmp_oa = obdo_alloc();
+                if (!tmp_oa)
+                        GOTO(out_oa, rc = -ENOMEM);
+
+                if (src_oa->o_valid & OBD_MD_FLHANDLE)
+                        lfh = lov_handle2lfh(obdo_handle(src_oa));
+                else
+                        src_oa->o_valid &= ~OBD_MD_FLHANDLE;
+        }
+
         for (i = 0; i < oa_bufs; i++) {
                 where[i] = lov_stripe_number(lsm, pga[i].off);
                 stripeinfo[where[i]].bufct++;
@@ -1524,23 +1673,46 @@ static int lov_brw(int cmd, struct lustre_handle *conn,
 
                 if (lov->tgts[si->ost_idx].active == 0) {
                         CDEBUG(D_HA, "lov idx %d inactive\n", si->ost_idx);
-                        GOTO(out_ioarr, rc = -EIO);
+                        GOTO(out_oa, rc = -EIO);
                 }
 
                 if (si->bufct) {
                         LASSERT(shift < oa_bufs);
-                        rc = obd_brw(cmd, &lov->tgts[si->ost_idx].conn,
+                        if (src_oa) {
+                                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+                                if (lfh)
+                                        memcpy(obdo_handle(tmp_oa),
+                                               &lfh->lfh_och[i].och_fh,
+                                               sizeof(lfh->lfh_och[i].och_fh));
+                        }
+
+                        tmp_oa->o_id = si->lsm.lsm_object_id;
+                        rc = obd_brw(cmd, &lov->tgts[si->ost_idx].conn, tmp_oa,
                                      &si->lsm, si->bufct, &ioarr[shift],
                                      oti);
                         if (rc)
                                 GOTO(out_ioarr, rc);
+
+                        lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm,
+                                        i, &set);
                 }
         }
-        GOTO(out_ioarr, rc);
+
+        ret_oa->o_id = src_oa->o_id;
+        memcpy(src_oa, ret_oa, sizeof(*src_oa));
+
+        GOTO(out_oa, rc);
+ out_oa:
+        if (tmp_oa)
+                obdo_free(tmp_oa);
+        if (ret_oa)
+                obdo_free(ret_oa);
  out_ioarr:
         OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
  out_where:
         OBD_FREE(where, sizeof(*where) * oa_bufs);
+        if (lfh)
+                lov_lfh_put(lfh);
  out_sinfo:
         OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
  out_exp:
@@ -1548,18 +1720,43 @@ static int lov_brw(int cmd, struct lustre_handle *conn,
         return rc;
 }
 
-static int lov_brw_interpret (struct ptlrpc_request_set *set,
-                              struct lov_brw_async_args *aa, int rc)
+static int lov_brw_interpret(struct ptlrpc_request_set *rqset,
+                             struct lov_brw_async_args *aa, int rc)
 {
-        obd_count        oa_bufs = aa->aa_oa_bufs;
-        struct brw_page *ioarr = aa->aa_ioarr;
+        struct lov_stripe_md *lsm = aa->aa_lsm;
+        obd_count             oa_bufs = aa->aa_oa_bufs;
+        struct obdo          *oa = aa->aa_oa;
+        struct obdo          *obdos = aa->aa_obdos;
+        struct brw_page      *ioarr = aa->aa_ioarr;
+        struct lov_oinfo     *loi;
+        int i, set = 0;
         ENTRY;
 
-        OBD_FREE (ioarr, sizeof (*ioarr) * oa_bufs);
-        RETURN (rc);
+        if (rc == 0) {
+                /* NB all stripe requests succeeded to get here */
+
+                for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+                     i++, loi++) {
+                        if (obdos[i].o_valid == 0)      /* inactive stripe */
+                                continue;
+
+                        lov_merge_attrs(oa, &obdos[i], obdos[i].o_valid, lsm,
+                                        i, &set);
+                }
+
+                if (!set) {
+                        CERROR("No stripes had valid attrs\n");
+                        rc = -EIO;
+                }
+        }
+        oa->o_id = lsm->lsm_object_id;
+
+        OBD_FREE(obdos, lsm->lsm_stripe_count * sizeof(*obdos));
+        OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
+        RETURN(rc);
 }
 
-static int lov_brw_async(int cmd, struct lustre_handle *conn,
+static int lov_brw_async(int cmd, struct lustre_handle *conn, struct obdo *oa,
                          struct lov_stripe_md *lsm, obd_count oa_bufs,
                          struct brw_page *pga, struct ptlrpc_request_set *set,
                          struct obd_trans_info *oti)
@@ -1573,7 +1770,9 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn,
         } *stripeinfo, *si, *si_last;
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
+        struct lov_file_handles *lfh = NULL;
         struct brw_page *ioarr;
+        struct obdo *obdos = NULL;
         struct lov_oinfo *loi;
         struct lov_brw_async_args *aa;
         int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count;
@@ -1597,9 +1796,20 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn,
         if (!where)
                 GOTO(out_sinfo, rc = -ENOMEM);
 
+        if (oa) {
+                OBD_ALLOC(obdos, sizeof(*obdos) * stripe_count);
+                if (!obdos)
+                        GOTO(out_where, rc = -ENOMEM);
+
+                if (oa->o_valid & OBD_MD_FLHANDLE)
+                        lfh = lov_handle2lfh(obdo_handle(oa));
+                else
+                        oa->o_valid &= ~OBD_MD_FLHANDLE;
+        }
+
         OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs);
         if (!ioarr)
-                GOTO(out_where, rc = -ENOMEM);
+                GOTO(out_obdos, rc = -ENOMEM);
 
         for (i = 0; i < oa_bufs; i++) {
                 where[i] = lov_stripe_number(lsm, pga[i].off);
@@ -1612,6 +1822,15 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn,
                         si->index = si_last->index + si_last->bufct;
                 si->lsm.lsm_object_id = loi->loi_id;
                 si->ost_idx = loi->loi_ost_idx;
+
+                if (oa) {
+                        memcpy(&obdos[i], oa, sizeof(*obdos));
+                        obdos[i].o_id = si->lsm.lsm_object_id;
+                        if (lfh)
+                                memcpy(obdo_handle(&obdos[i]),
+                                       &lfh->lfh_och[i].och_fh,
+                                       sizeof(lfh->lfh_och[i].och_fh));
+                }
         }
 
         for (i = 0; i < oa_bufs; i++) {
@@ -1637,24 +1856,35 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn,
                 }
 
                 LASSERT(shift < oa_bufs);
+
                 rc = obd_brw_async(cmd, &lov->tgts[si->ost_idx].conn,
-                                   &si->lsm, si->bufct, &ioarr[shift],
-                                   set, oti);
+                                   &obdos[i], &si->lsm, si->bufct,
+                                   &ioarr[shift], set, oti);
                 if (rc)
                         GOTO(out_ioarr, rc);
         }
-        LASSERT (rc == 0);
-        LASSERT (set->set_interpret == NULL);
-        set->set_interpret = lov_brw_interpret;
-        LASSERT (sizeof (set->set_args) >= sizeof (struct lov_brw_async_args));
+        LASSERT(rc == 0);
+        LASSERT(set->set_interpret == NULL);
+        set->set_interpret = (set_interpreter_func)lov_brw_interpret;
+        LASSERT(sizeof(set->set_args) >= sizeof(struct lov_brw_async_args));
         aa = (struct lov_brw_async_args *)&set->set_args;
-        aa->aa_oa_bufs = oa_bufs;
+        aa->aa_lsm = lsm;
+        aa->aa_obdos = obdos;
+        aa->aa_oa = oa;
         aa->aa_ioarr = ioarr;
+        aa->aa_oa_bufs = oa_bufs;
+
+        /* Don't free ioarr or obdos - that's done in lov_brw_interpret */
         GOTO(out_where, rc);
+
  out_ioarr:
         OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
+ out_obdos:
+        OBD_FREE(obdos, stripe_count * sizeof(*obdos));
  out_where:
         OBD_FREE(where, sizeof(*where) * oa_bufs);
+        if (lfh)
+                lov_lfh_put(lfh);
  out_sinfo:
         OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
  out_exp:
@@ -1980,20 +2210,16 @@ static int lov_cancel_unused(struct lustre_handle *conn,
                         (tot) += (add);                                 \
         } while(0)
 
-static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
+static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                      unsigned long max_age)
 {
-        struct obd_export *tgt_export;
-        struct lov_obd *lov;
+        struct lov_obd *lov = &obd->u.lov;
         struct obd_statfs lov_sfs;
         int set = 0;
         int rc = 0;
         int i;
         ENTRY;
 
-        if (!export || !export->exp_obd)
-                RETURN(-ENODEV);
-
-        lov = &export->exp_obd->u.lov;
 
         /* We only get block data from the OBD */
         for (i = 0; i < lov->desc.ld_tgt_count; i++) {
@@ -2004,14 +2230,8 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
                         continue;
                 }
 
-                tgt_export = class_conn2export(&lov->tgts[i].conn);
-                if (!tgt_export) {
-                        CDEBUG(D_HA, "lov idx %d NULL export\n", i);
-                        continue;
-                }
-
-                err = obd_statfs(tgt_export, &lov_sfs);
-                class_export_put(tgt_export);
+                err = obd_statfs(class_conn2obd(&lov->tgts[i].conn), &lov_sfs,
+                                 max_age);
                 if (err) {
                         if (lov->tgts[i].active) {
                                 CERROR("error: statfs OSC %s on OST idx %d: "
@@ -2022,6 +2242,7 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
                         }
                         continue;
                 }
+
                 if (!set) {
                         memcpy(osfs, &lov_sfs, sizeof(lov_sfs));
                         set = 1;
@@ -2044,6 +2265,7 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
                         LOV_SUM_MAX(osfs->os_ffree, lov_sfs.os_ffree);
                 }
         }
+
         if (set) {
                 __u32 expected_stripes = lov->desc.ld_default_stripe_count ?
                                          lov->desc.ld_default_stripe_count :
@@ -2055,6 +2277,7 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
                         do_div(osfs->os_ffree, expected_stripes);
         } else if (!rc)
                 rc = -EIO;
+
         RETURN(rc);
 }
 
@@ -2191,7 +2414,28 @@ static int lov_get_info(struct lustre_handle *conn, __u32 keylen,
         RETURN(-EINVAL);
 }
 
-static int lov_mark_page_dirty(struct lustre_handle *conn, 
+static int lov_set_info(struct lustre_handle *conn, obd_count keylen,
+                        void *key, obd_count vallen, void *val)
+{
+        struct obd_device *obddev = class_conn2obd(conn);
+        struct lov_obd *lov = &obddev->u.lov;
+        int i, rc = 0;
+        ENTRY;
+
+        if (keylen < strlen("mds_conn") ||
+            memcmp(key, "mds_conn", strlen("mds_conn")) != 0)
+                RETURN(-EINVAL);
+
+        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+                int er;
+                er = obd_set_info(&lov->tgts[i].conn, keylen, key, vallen, val);
+                if (!rc)
+                        rc = er;
+        }
+        RETURN(rc);
+}
+
+static int lov_mark_page_dirty(struct lustre_handle *conn,
                                struct lov_stripe_md *lsm, unsigned long offset)
 {
         struct lov_obd *lov = &class_conn2obd(conn)->u.lov;
@@ -2209,12 +2453,12 @@ static int lov_mark_page_dirty(struct lustre_handle *conn,
                 RETURN(-ENOMEM);
 
         stripe = lov_stripe_number(lsm, (obd_off)offset << PAGE_CACHE_SHIFT);
-        lov_stripe_offset(lsm, (obd_off)offset << PAGE_CACHE_SHIFT, stripe, 
+        lov_stripe_offset(lsm, (obd_off)offset << PAGE_CACHE_SHIFT, stripe,
                           &off);
         off >>= PAGE_CACHE_SHIFT;
 
         loi = &lsm->lsm_oinfo[stripe];
-        CDEBUG(D_INODE, "off %lu => off %lu on stripe %d\n", offset, 
+        CDEBUG(D_INODE, "off %lu => off %lu on stripe %d\n", offset,
                (unsigned long)off, stripe);
         submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
 
@@ -2223,7 +2467,7 @@ static int lov_mark_page_dirty(struct lustre_handle *conn,
         RETURN(rc);
 }
 
-static int lov_clear_dirty_pages(struct lustre_handle *conn, 
+static int lov_clear_dirty_pages(struct lustre_handle *conn,
                                  struct lov_stripe_md *lsm, unsigned long start,
                                  unsigned long end, unsigned long *cleared)
 
@@ -2267,11 +2511,11 @@ static int lov_clear_dirty_pages(struct lustre_handle *conn,
                 obd_start >>= PAGE_CACHE_SHIFT;
                 obd_end >>= PAGE_CACHE_SHIFT;
 
-                CDEBUG(D_INODE, "offs [%lu,%lu] => offs [%lu,%lu] stripe %d\n", 
-                       start, end, (unsigned long)obd_start, 
+                CDEBUG(D_INODE, "offs [%lu,%lu] => offs [%lu,%lu] stripe %d\n",
+                       start, end, (unsigned long)obd_start,
                        (unsigned long)obd_end, loi->loi_ost_idx);
                 submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
-                rc = obd_clear_dirty_pages(&lov->tgts[loi->loi_ost_idx].conn, 
+                rc = obd_clear_dirty_pages(&lov->tgts[loi->loi_ost_idx].conn,
                                            submd, obd_start, obd_end,
                                            &osc_cleared);
                 if (rc)
@@ -2310,15 +2554,14 @@ static int lov_last_dirty_offset(struct lustre_handle *conn,
         *offset = 0;
         lov = &export->exp_obd->u.lov;
         rc = -ENOENT;
-        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; 
-                                          i++, loi++) {
 
+        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++){
                 count = lsm->lsm_stripe_size >> PAGE_CACHE_SHIFT;
                 skip = (lsm->lsm_stripe_count - 1) * count;
 
                 submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
 
-                err = obd_last_dirty_offset(&lov->tgts[loi->loi_ost_idx].conn, 
+                err = obd_last_dirty_offset(&lov->tgts[loi->loi_ost_idx].conn,
                                             submd, &tmp);
                 if (err == -ENOENT)
                         continue;
@@ -2326,7 +2569,7 @@ static int lov_last_dirty_offset(struct lustre_handle *conn,
                         GOTO(out_exp, rc = err);
 
                 rc = 0;
-                if (tmp != ~0) 
+                if (tmp != ~0)
                         tmp += (tmp/count * skip) + (i * count);
                 if (tmp > *offset)
                         *offset = tmp;
@@ -2338,6 +2581,100 @@ out_exp:
         RETURN(rc);
 }
 
+/* For LOV catalogs, we "nest" catalogs from the parent catalog.  What this
+ * means is that the parent catalog has a bunch of log cookies that are
+ * pointing at one catalog for each OSC.  The OSC catalogs in turn hold
+ * cookies for actual log files. */
+static int lov_get_catalogs(struct lov_obd *lov, struct llog_handle *cathandle)
+{
+        int i, rc;
+
+        ENTRY;
+        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+                lov->tgts[i].ltd_cathandle = llog_new_log(cathandle,
+                                                          &lov->tgts[i].uuid);
+                if (IS_ERR(lov->tgts[i].ltd_cathandle))
+                        continue;
+                rc = llog_init_catalog(cathandle, &lov->tgts[i].uuid);
+                if (rc)
+                        GOTO(err_logs, rc);
+        }
+        lov->lo_catalog_loaded = 1;
+        RETURN(0);
+err_logs:
+        while (i-- > 0) {
+                llog_delete_log(cathandle, lov->tgts[i].ltd_cathandle);
+                llog_close_log(cathandle, lov->tgts[i].ltd_cathandle);
+        }
+        return rc;
+}
+
+/* Add log records for each OSC that this object is striped over, and return
+ * cookies for each one.  We _would_ have nice abstraction here, except that
+ * we need to keep cookies in stripe order, even if some are NULL, so that
+ * the right cookies are passed back to the right OSTs at the client side.
+ * Unset cookies should be all-zero (which will never occur naturally). */
+static int lov_log_add(struct lustre_handle *conn,
+                       struct llog_handle *cathandle,
+                       struct llog_trans_hdr *rec, struct lov_stripe_md *lsm,
+                       struct llog_cookie *logcookies, int numcookies)
+{
+        struct obd_device *obd = class_conn2obd(conn);
+        struct lov_obd *lov = &obd->u.lov;
+        struct lov_oinfo *loi;
+        int i, rc = 0;
+        ENTRY;
+
+        LASSERT(logcookies && numcookies >= lsm->lsm_stripe_count);
+
+        if (unlikely(!lov->lo_catalog_loaded))
+                lov_get_catalogs(lov, cathandle);
+
+        for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
+                rc += obd_log_add(&lov->tgts[loi->loi_ost_idx].conn,
+                                  lov->tgts[loi->loi_ost_idx].ltd_cathandle,
+                                  rec, NULL, logcookies + rc, numcookies - rc);
+        }
+
+        RETURN(rc);
+}
+
+static int lov_log_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm,
+                          int count, struct llog_cookie *cookies, int flags)
+{
+        struct obd_export *export = class_conn2export(conn);
+        struct lov_obd *lov;
+        struct lov_oinfo *loi;
+        int rc = 0, i;
+        ENTRY;
+
+        LASSERT(lsm != NULL);
+        if (export == NULL || export->exp_obd == NULL)
+                GOTO(out, rc = -ENODEV);
+
+        LASSERT(count == lsm->lsm_stripe_count);
+
+        loi = lsm->lsm_oinfo;
+        lov = &export->exp_obd->u.lov;
+        for (i = 0; i < count; i++, cookies++, loi++) {
+                int err;
+
+                err = obd_log_cancel(&lov->tgts[loi->loi_ost_idx].conn,
+                                     NULL, 1, cookies, flags);
+                if (err && lov->tgts[loi->loi_ost_idx].active) {
+                        CERROR("error: objid "LPX64" subobj "LPX64
+                               " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
+                               loi->loi_id, loi->loi_ost_idx, err);
+                        if (!rc)
+                                rc = err;
+                }
+        }
+        GOTO(out, rc);
+ out:
+        class_export_put(export);
+        return rc;
+}
+
 struct obd_ops lov_obd_ops = {
         o_owner:       THIS_MODULE,
         o_attach:      lov_attach,
@@ -2364,9 +2701,12 @@ struct obd_ops lov_obd_ops = {
         o_cancel_unused: lov_cancel_unused,
         o_iocontrol:   lov_iocontrol,
         o_get_info:    lov_get_info,
-        .o_mark_page_dirty =    lov_mark_page_dirty,
-        .o_clear_dirty_pages =    lov_clear_dirty_pages,
-        .o_last_dirty_offset =    lov_last_dirty_offset,
+        o_set_info:    lov_set_info,
+        o_log_add:     lov_log_add,
+        o_log_cancel:  lov_log_cancel,
+        o_mark_page_dirty:   lov_mark_page_dirty,
+        o_clear_dirty_pages: lov_clear_dirty_pages,
+        o_last_dirty_offset: lov_last_dirty_offset,
 };
 
 int __init lov_init(void)
@@ -2374,15 +2714,13 @@ int __init lov_init(void)
         struct lprocfs_static_vars lvars;
         int rc;
 
-        printk(KERN_INFO "Lustre Logical Object Volume driver; "
-               "info@clusterfs.com\n");
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(lov, &lvars);
         rc = class_register_type(&lov_obd_ops, lvars.module_vars,
                                  OBD_LOV_DEVICENAME);
         RETURN(rc);
 }
 
-static void __exit lov_exit(void)
+static void /*__exit*/ lov_exit(void)
 {
         class_unregister_type(OBD_LOV_DEVICENAME);
 }
index bbb40de..a719aac 100644 (file)
@@ -34,6 +34,8 @@
 #include <linux/obd_class.h>
 #include <linux/obd_support.h>
 
+#include "lov_internal.h"
+
 void lov_dump_lmm(int level, struct lov_mds_md *lmm)
 {
         struct lov_object_id *loi;
@@ -129,14 +131,14 @@ int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmmp,
         for (i = 0, loi = lsm->lsm_oinfo; i < stripe_count; i++, loi++) {
                 /* XXX call down to osc_packmd() to do the packing */
                 LASSERT (loi->loi_id);
-                lmm->lmm_objects[loi->loi_ost_idx].l_object_id = 
+                lmm->lmm_objects[loi->loi_ost_idx].l_object_id =
                         cpu_to_le64 (loi->loi_id);
         }
 
         RETURN(lmm_size);
 }
 
-static int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
+int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
 {
         if (!stripe_count)
                 stripe_count = lov->desc.ld_default_stripe_count;
@@ -146,6 +148,90 @@ static int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
         return stripe_count;
 }
 
+static int lov_verify_lmm(struct lov_mds_md *lmm, int lmm_bytes,
+                          int *ost_count, int *stripe_count, int *ost_offset)
+{
+        if (lmm_bytes < sizeof(*lmm)) {
+                CERROR("lov_mds_md too small: %d, need at least %d\n",
+                       lmm_bytes, (int)sizeof(*lmm));
+                return -EINVAL;
+        }
+
+        if (le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC) {
+                CERROR("bad disk LOV MAGIC: %#08x != %#08x\n",
+                       le32_to_cpu(lmm->lmm_magic), LOV_MAGIC);
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        *ost_count = le16_to_cpu(lmm->lmm_ost_count);
+        *stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
+        *ost_offset = le32_to_cpu(lmm->lmm_stripe_offset);
+
+        if (*ost_count == 0 || *stripe_count == 0) {
+                CERROR("zero OST count %d or stripe count %d\n",
+                       *ost_count, *stripe_count);
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        if (lmm_bytes < lov_mds_md_size(*ost_count)) {
+                CERROR("lov_mds_md too small: %d, need %d\n",
+                       lmm_bytes, lov_mds_md_size(*ost_count));
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        if (*ost_offset > *ost_count) {
+                CERROR("starting OST offset %d > number of OSTs %d\n",
+                       *ost_offset, *ost_count);
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        if (*stripe_count > *ost_count) {
+                CERROR("stripe count %d > number of OSTs %d\n",
+                       *stripe_count, *ost_count);
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        if (lmm->lmm_object_id == 0) {
+                CERROR("zero object id\n");
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        return 0;
+}
+
+int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count)
+{
+        int lsm_size = lov_stripe_md_size(stripe_count);
+        struct lov_oinfo *loi;
+        int i;
+
+        OBD_ALLOC(*lsmp, lsm_size);
+        if (!*lsmp)
+                return -ENOMEM;
+
+        (*lsmp)->lsm_magic = LOV_MAGIC;
+        (*lsmp)->lsm_stripe_count = stripe_count;
+        (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
+
+        for (i = 0, loi = (*lsmp)->lsm_oinfo; i < stripe_count; i++, loi++){
+                loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
+                ot_init(loi->loi_dirty_ot);
+        }
+        return lsm_size;
+}
+
+void lov_free_memmd(struct lov_stripe_md **lsmp)
+{
+        OBD_FREE(*lsmp, lov_stripe_md_size((*lsmp)->lsm_stripe_count));
+        *lsmp = NULL;
+}
+
 /* Unpack LOV object metadata from disk storage.  It is packed in LE byte
  * order and is opaque to the networking layer.
  */
@@ -156,75 +242,48 @@ int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
         struct lov_obd *lov = &obd->u.lov;
         struct lov_stripe_md *lsm;
         struct lov_oinfo *loi;
-        int ost_count = 0;
-        int ost_offset = 0;
+        int ost_count;
+        int ost_offset;
         int stripe_count;
         int lsm_size;
         int i;
         ENTRY;
 
+        /* If passed an MDS struct use values from there, otherwise defaults */
         if (lmm) {
-                if (lmm_bytes < sizeof (*lmm)) {
-                        CERROR("lov_mds_md too small: %d, need %d\n",
-                                lmm_bytes, (int)sizeof(*lmm));
-                        RETURN(-EINVAL);
-                }
-                if (le32_to_cpu (lmm->lmm_magic) != LOV_MAGIC) {
-                        CERROR("bad disk LOV MAGIC: %#08x != %#08x\n",
-                               le32_to_cpu (lmm->lmm_magic), LOV_MAGIC);
-                        RETURN(-EINVAL);
-                }
-
-                ost_count = le16_to_cpu (lmm->lmm_ost_count);
-                stripe_count = le16_to_cpu (lmm->lmm_stripe_count);
-
-                if (ost_count == 0 || stripe_count == 0) {
-                        CERROR ("zero ost %d or stripe %d count\n",
-                                ost_count, stripe_count);
-                        RETURN (-EINVAL);
-                }
-
-                if (lmm_bytes < lov_mds_md_size (ost_count)) {
-                        CERROR ("lov_mds_md too small: %d, need %d\n",
-                                lmm_bytes, lov_mds_md_size (ost_count));
-                        RETURN (-EINVAL);
-                }
-        } else
+                i = lov_verify_lmm(lmm, lmm_bytes, &ost_count, &stripe_count,
+                                   &ost_offset);
+                if (i)
+                        RETURN(i);
+        } else {
+                ost_count = 0;
                 stripe_count = lov_get_stripecnt(lov, 0);
+                ost_offset = 0;
+        }
 
-        /* XXX LOV STACKING call into osc for sizes */
-        lsm_size = lov_stripe_md_size(stripe_count);
-
+        /* If we aren't passed an lsmp struct, we just want the size */
         if (!lsmp)
-                RETURN(lsm_size);
+                /* XXX LOV STACKING call into osc for sizes */
+                RETURN(lov_stripe_md_size(stripe_count));
 
+        /* If we are passed an allocated struct but nothing to unpack, free */
         if (*lsmp && !lmm) {
-                stripe_count = (*lsmp)->lsm_stripe_count;
-                OBD_FREE(*lsmp, lov_stripe_md_size(stripe_count));
-                *lsmp = NULL;
+                lov_free_memmd(lsmp);
                 RETURN(0);
         }
 
-        if (!*lsmp) {
-                OBD_ALLOC(*lsmp, lsm_size);
-                if (!*lsmp)
-                        RETURN(-ENOMEM);
-        }
-
-        lsm = *lsmp;
-        lsm->lsm_magic = LOV_MAGIC;
-        lsm->lsm_stripe_count = stripe_count;
-        lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
+        lsm_size = lov_alloc_memmd(lsmp, stripe_count);
+        if (lsm_size < 0)
+                RETURN(lsm_size);
 
+        /* If we are passed a pointer but nothing to unpack, we only alloc */
         if (!lmm)
                 RETURN(lsm_size);
 
-        lsm->lsm_object_id = le64_to_cpu (lmm->lmm_object_id);
-        lsm->lsm_stripe_size = le32_to_cpu (lmm->lmm_stripe_size);
-        ost_offset = lsm->lsm_stripe_offset = le32_to_cpu (lmm->lmm_stripe_offset);
-
-        LMM_ASSERT(lsm->lsm_object_id);
-        LMM_ASSERT(ost_count);
+        lsm = *lsmp;
+        lsm->lsm_object_id = le64_to_cpu(lmm->lmm_object_id);
+        lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
+        lsm->lsm_stripe_offset = ost_offset;
 
         for (i = 0, loi = lsm->lsm_oinfo; i < ost_count; i++, ost_offset++) {
                 ost_offset %= ost_count;
@@ -232,17 +291,20 @@ int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
                 if (!lmm->lmm_objects[ost_offset].l_object_id)
                         continue;
 
-                LMM_ASSERT(loi - lsm->lsm_oinfo < stripe_count);
                 /* XXX LOV STACKING call down to osc_unpackmd() */
                 loi->loi_id =
-                        le64_to_cpu (lmm->lmm_objects[ost_offset].l_object_id);
+                        le64_to_cpu(lmm->lmm_objects[ost_offset].l_object_id);
                 loi->loi_ost_idx = ost_offset;
-                loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
-                ot_init(loi->loi_dirty_ot);
                 loi++;
         }
-        LMM_ASSERT(loi - lsm->lsm_oinfo > 0);
-        LMM_ASSERT(loi - lsm->lsm_oinfo == stripe_count);
+
+        if (loi - lsm->lsm_oinfo != stripe_count) {
+                CERROR("missing objects in lmm struct\n");
+                lov_dump_lmm(D_WARNING, lmm);
+                lov_free_memmd(lsmp);
+                RETURN(-EINVAL);
+        }
+
 
         RETURN(lsm_size);
 }
@@ -260,7 +322,6 @@ int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
         struct obd_device *obd = class_conn2obd(conn);
         struct lov_obd *lov = &obd->u.lov;
         struct lov_mds_md lmm;
-        struct lov_stripe_md *lsm;
         int stripe_count;
         int rc;
         ENTRY;
@@ -272,7 +333,7 @@ int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
         /* Bug 1185 FIXME: struct lov_mds_md is little-endian everywhere else */
 
         if (lmm.lmm_magic != LOV_MAGIC) {
-                CERROR("bad userland LOV MAGIC: %#08x != %#08x\n",
+                CDEBUG(D_IOCTL, "bad userland LOV MAGIC: %#08x != %#08x\n",
                        lmm.lmm_magic, LOV_MAGIC);
                 RETURN(-EINVAL);
         }
@@ -291,32 +352,27 @@ int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
         }
 #endif
         if (lmm.lmm_stripe_size & (PAGE_SIZE - 1)) {
-                CERROR("stripe size %u not multiple of %lu\n",
+                CDEBUG(D_IOCTL, "stripe size %u not multiple of %lu\n",
                        lmm.lmm_stripe_size, PAGE_SIZE);
                 RETURN(-EINVAL);
         }
         stripe_count = lov_get_stripecnt(lov, lmm.lmm_stripe_count);
 
         if ((__u64)lmm.lmm_stripe_size * stripe_count > ~0UL) {
-                CERROR("stripe width %ux%u > %lu on 32-bit system\n",
+                CDEBUG(D_IOCTL, "stripe width %ux%u > %lu on 32-bit system\n",
                        lmm.lmm_stripe_size, (int)lmm.lmm_stripe_count, ~0UL);
                 RETURN(-EINVAL);
         }
 
-        /* XXX LOV STACKING call into osc for sizes */
-        OBD_ALLOC(lsm, lov_stripe_md_size(stripe_count));
-        if (!lsm)
-                RETURN(-ENOMEM);
+        rc = lov_alloc_memmd(lsmp, stripe_count);
 
-        lsm->lsm_magic = LOV_MAGIC;
-        lsm->lsm_stripe_count = stripe_count;
-        lsm->lsm_stripe_offset = lmm.lmm_stripe_offset;
-        lsm->lsm_stripe_size = lmm.lmm_stripe_size;
-        lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
+        if (rc < 0)
+                RETURN(rc);
 
-        *lsmp = lsm;
+        (*lsmp)->lsm_stripe_offset = lmm.lmm_stripe_offset;
+        (*lsmp)->lsm_stripe_size = lmm.lmm_stripe_size;
 
-        RETURN(rc);
+        RETURN(0);
 }
 
 /* Retrieve object striping information.
index e0b3adb..7b7a00c 100644 (file)
 #include <linux/seq_file.h>
 
 #ifndef LPROCFS
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
 #else
 
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  obd_self_statfs);
-
-int rd_stripesize(char *page, char **start, off_t off, int count, int *eof,
-                  void *data)
+static int lov_rd_stripesize(char *page, char **start, off_t off, int count,
+                             int *eof, void *data)
 {
         struct obd_device *dev = (struct obd_device *)data;
         struct lov_desc *desc;
@@ -53,8 +46,8 @@ int rd_stripesize(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_size);
 }
 
-int rd_stripeoffset(char *page, char **start, off_t off, int count, int *eof,
-                    void *data)
+static int lov_rd_stripeoffset(char *page, char **start, off_t off, int count,
+                               int *eof, void *data)
 {
         struct obd_device *dev = (struct obd_device *)data;
         struct lov_desc *desc;
@@ -65,8 +58,8 @@ int rd_stripeoffset(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_offset);
 }
 
-int rd_stripetype(char *page, char **start, off_t off, int count, int *eof,
-                  void *data)
+static int lov_rd_stripetype(char *page, char **start, off_t off, int count,
+                             int *eof, void *data)
 {
         struct obd_device* dev = (struct obd_device*)data;
         struct lov_desc *desc;
@@ -77,8 +70,8 @@ int rd_stripetype(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, "%u\n", desc->ld_pattern);
 }
 
-int rd_stripecount(char *page, char **start, off_t off, int count, int *eof,
-                   void *data)
+static int lov_rd_stripecount(char *page, char **start, off_t off, int count,
+                              int *eof, void *data)
 {
         struct obd_device *dev = (struct obd_device *)data;
         struct lov_desc *desc;
@@ -89,8 +82,8 @@ int rd_stripecount(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, "%u\n", desc->ld_default_stripe_count);
 }
 
-int rd_numobd(char *page, char **start, off_t off, int count, int *eof,
-              void *data)
+static int lov_rd_numobd(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
 {
         struct obd_device *dev = (struct obd_device*)data;
         struct lov_desc *desc;
@@ -102,8 +95,8 @@ int rd_numobd(char *page, char **start, off_t off, int count, int *eof,
 
 }
 
-int rd_activeobd(char *page, char **start, off_t off, int count, int *eof,
-                 void *data)
+static int lov_rd_activeobd(char *page, char **start, off_t off, int count,
+                            int *eof, void *data)
 {
         struct obd_device* dev = (struct obd_device*)data;
         struct lov_desc *desc;
@@ -114,7 +107,8 @@ int rd_activeobd(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, "%u\n", desc->ld_active_tgt_count);
 }
 
-int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int lov_rd_mdc(char *page, char **start, off_t off, int count, int *eof,
+                      void *data)
 {
         struct obd_device *dev = (struct obd_device*) data;
         struct lov_obd *lov;
@@ -125,7 +119,7 @@ int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data)
         return snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid.uuid);
 }
 
-static void *ll_tgt_seq_start(struct seq_file *p, loff_t *pos)
+static void *lov_tgt_seq_start(struct seq_file *p, loff_t *pos)
 {
         struct obd_device *dev = p->private;
         struct lov_obd *lov = &dev->u.lov;
@@ -133,12 +127,12 @@ static void *ll_tgt_seq_start(struct seq_file *p, loff_t *pos)
         return (*pos >= lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]);
 
 }
-static void ll_tgt_seq_stop(struct seq_file *p, void *v)
-{
 
+static void lov_tgt_seq_stop(struct seq_file *p, void *v)
+{
 }
 
-static void *ll_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
+static void *lov_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
 {
         struct obd_device *dev = p->private;
         struct lov_obd *lov = &dev->u.lov;
@@ -147,7 +141,7 @@ static void *ll_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
         return (*pos >=lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]);
 }
 
-static int ll_tgt_seq_show(struct seq_file *p, void *v)
+static int lov_tgt_seq_show(struct seq_file *p, void *v)
 {
         struct lov_tgt_desc *tgt = v;
         struct obd_device *dev = p->private;
@@ -157,18 +151,18 @@ static int ll_tgt_seq_show(struct seq_file *p, void *v)
                           tgt->active ? "" : "IN");
 }
 
-struct seq_operations ll_tgt_sops = {
-        .start = ll_tgt_seq_start,
-        .stop = ll_tgt_seq_stop,
-        .next = ll_tgt_seq_next,
-        .show = ll_tgt_seq_show,
+struct seq_operations lov_tgt_sops = {
+        .start = lov_tgt_seq_start,
+        .stop = lov_tgt_seq_stop,
+        .next = lov_tgt_seq_next,
+        .show = lov_tgt_seq_show,
 };
 
-static int ll_target_seq_open(struct inode *inode, struct file *file)
+static int lov_target_seq_open(struct inode *inode, struct file *file)
 {
         struct proc_dir_entry *dp = inode->u.generic_ip;
         struct seq_file *seq;
-        int rc = seq_open(file, &ll_tgt_sops);
+        int rc = seq_open(file, &lov_tgt_sops);
 
         if (rc)
                 return rc;
@@ -178,35 +172,36 @@ static int ll_target_seq_open(struct inode *inode, struct file *file)
 
         return 0;
 }
+
 struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",         lprocfs_rd_uuid, 0, 0 },
-        { "stripesize",   rd_stripesize,   0, 0 },
-        { "stripeoffset", rd_stripeoffset, 0, 0 },
-        { "stripecount",  rd_stripecount,  0, 0 },
-        { "stripetype",   rd_stripetype,   0, 0 },
-        { "numobd",       rd_numobd,       0, 0 },
-        { "activeobd",    rd_activeobd,    0, 0 },
-        { "filestotal",   rd_filestotal,   0, 0 },
-        { "filesfree",    rd_filesfree,    0, 0 },
-        { "filegroups",   rd_filegroups,   0, 0 },
-        { "blocksize",    rd_blksize,      0, 0 },
-        { "kbytestotal",  rd_kbytestotal,  0, 0 },
-        { "kbytesfree",   rd_kbytesfree,   0, 0 },
-        { "target_mdc",   rd_mdc,          0, 0 },
+        { "uuid",         lprocfs_rd_uuid,        0, 0 },
+        { "stripesize",   lov_rd_stripesize,      0, 0 },
+        { "stripeoffset", lov_rd_stripeoffset,    0, 0 },
+        { "stripecount",  lov_rd_stripecount,     0, 0 },
+        { "stripetype",   lov_rd_stripetype,      0, 0 },
+        { "numobd",       lov_rd_numobd,          0, 0 },
+        { "activeobd",    lov_rd_activeobd,       0, 0 },
+        { "filestotal",   lprocfs_rd_filestotal,  0, 0 },
+        { "filesfree",    lprocfs_rd_filesfree,   0, 0 },
+        //{ "filegroups",   lprocfs_rd_filegroups,  0, 0 },
+        { "blocksize",    lprocfs_rd_blksize,     0, 0 },
+        { "kbytestotal",  lprocfs_rd_kbytestotal, 0, 0 },
+        { "kbytesfree",   lprocfs_rd_kbytesfree,  0, 0 },
+        { "target_mdc",   lov_rd_mdc,             0, 0 },
         { 0 }
 };
 
-struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs",     lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
         { 0 }
 };
 
-struct file_operations ll_proc_target_fops = {
-        .open = ll_target_seq_open,
+struct file_operations lov_proc_target_fops = {
+        .open = lov_target_seq_open,
         .read = seq_read,
         .llseek = seq_lseek,
         .release = seq_release,
 };
 
 #endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(lov, lprocfs_module_vars, lprocfs_obd_vars)
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
+.*.cmd
index 3f81507..6dca228 100644 (file)
 #define DEBUG_SUBSYSTEM S_CLASS
 
 #include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
+#include <linux/vfs.h>
 #include <linux/obd_class.h>
 #include <linux/lprocfs_status.h>
 
 #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
 #else
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  obd_self_statfs);
-
-struct lprocfs_vars lprocfs_obd_vars[] = {
+static struct lprocfs_vars lprocfs_obd_vars[] = {
         { "uuid",            lprocfs_rd_uuid,        0, 0 },
-        { "blocksize",       rd_blksize,             0, 0 },
-        { "kbytestotal",     rd_kbytestotal,         0, 0 },
-        { "kbytesfree",      rd_kbytesfree,          0, 0 },
-        { "filestotal",      rd_filestotal,          0, 0 },
-        { "filesfree",       rd_filesfree,           0, 0 },
-        { "filegroups",      rd_filegroups,          0, 0 },
+        { "blocksize",       lprocfs_rd_blksize,     0, 0 },
+        { "kbytestotal",     lprocfs_rd_kbytestotal, 0, 0 },
+        { "kbytesfree",      lprocfs_rd_kbytesfree,  0, 0 },
+        { "filestotal",      lprocfs_rd_filestotal,  0, 0 },
+        { "filesfree",       lprocfs_rd_filesfree,   0, 0 },
+        //{ "filegroups",      lprocfs_rd_filegroups,  0, 0 },
         { "mds_server_uuid", lprocfs_rd_server_uuid, 0, 0 },
         { "mds_conn_uuid",   lprocfs_rd_conn_uuid,   0, 0 },
         { 0 }
 };
 
-struct lprocfs_vars lprocfs_module_vars[] = {
+static struct lprocfs_vars lprocfs_module_vars[] = {
         { "num_refs",        lprocfs_rd_numrefs,     0, 0 },
         { 0 }
 };
 
 #endif /* LPROCFS */
 
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(mdc, lprocfs_module_vars, lprocfs_obd_vars)
index e39a0aa..49d85ab 100644 (file)
@@ -1,24 +1,25 @@
-void mds_pack_req_body(struct ptlrpc_request *);
-void mds_pack_rep_body(struct ptlrpc_request *);
-void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
+void mdc_pack_req_body(struct ptlrpc_request *);
+void mdc_pack_rep_body(struct ptlrpc_request *);
+void mdc_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
                       obd_id ino, int type);
-void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
+void mdc_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
                       int flags, struct mdc_op_data *data);
-void mds_setattr_pack(struct ptlrpc_request *req,
+void mdc_setattr_pack(struct ptlrpc_request *req,
                       struct mdc_op_data *data,
-                      struct iattr *iattr, void *ea, int ealen);
-void mds_create_pack(struct ptlrpc_request *req, int offset,
+                      struct iattr *iattr, void *ea, int ealen,
+                     void *ea2, int ea2len);
+void mdc_create_pack(struct ptlrpc_request *req, int offset,
                      struct mdc_op_data *op_data,
                      __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                      const void *data, int datalen);
-void mds_open_pack(struct ptlrpc_request *req, int offset,
+void mdc_open_pack(struct ptlrpc_request *req, int offset,
                    struct mdc_op_data *op_data,
                    __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                    __u32 flags, const void *data, int datalen);
-void mds_unlink_pack(struct ptlrpc_request *req, int offset,
+void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
                      struct mdc_op_data *data);
-void mds_link_pack(struct ptlrpc_request *req, int offset,
+void mdc_link_pack(struct ptlrpc_request *req, int offset,
                    struct mdc_op_data *data);
-void mds_rename_pack(struct ptlrpc_request *req, int offset,
+void mdc_rename_pack(struct ptlrpc_request *req, int offset,
                      struct mdc_op_data *data,
                      const char *old, int oldlen, const char *new, int newlen);
index 806a830..a17f7a1 100644 (file)
@@ -28,7 +28,7 @@
 #include <linux/lustre_mds.h>
 #include <linux/lustre_lite.h>
 
-void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
+void mdc_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
                       obd_id ino, int type, __u64 xid)
 {
         struct mds_body *b;
@@ -45,7 +45,7 @@ void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
         b->nlink = size;                        /* !! */
 }
 
-static void mds_pack_body(struct mds_body *b)
+static void mdc_pack_body(struct mds_body *b)
 {
         LASSERT (b != NULL);
 
@@ -54,14 +54,14 @@ static void mds_pack_body(struct mds_body *b)
         b->capability = current->cap_effective;
 }
 
-void mds_pack_req_body(struct ptlrpc_request *req)
+void mdc_pack_req_body(struct ptlrpc_request *req)
 {
         struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*b));
-        mds_pack_body(b);
+        mdc_pack_body(b);
 }
 
 /* packing of MDS records */
-void mds_create_pack(struct ptlrpc_request *req, int offset,
+void mdc_create_pack(struct ptlrpc_request *req, int offset,
                      struct mdc_op_data *op_data,
                      __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                      const void *data, int datalen)
@@ -94,8 +94,9 @@ void mds_create_pack(struct ptlrpc_request *req, int offset,
                 memcpy (tmp, data, datalen);
         }
 }
+
 /* packing of MDS records */
-void mds_open_pack(struct ptlrpc_request *req, int offset,
+void mdc_open_pack(struct ptlrpc_request *req, int offset,
                    struct mdc_op_data *op_data,
                    __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                    __u32 flags, const void *data, int datalen)
@@ -109,8 +110,9 @@ void mds_open_pack(struct ptlrpc_request *req, int offset,
         rec->cr_fsuid = current->fsuid;
         rec->cr_fsgid = current->fsgid;
         rec->cr_cap = current->cap_effective;
-        ll_ino2fid(&rec->cr_fid, op_data->ino1,
-                   op_data->gen1, op_data->typ1);
+        if (op_data != NULL)
+                ll_ino2fid(&rec->cr_fid, op_data->ino1,
+                           op_data->gen1, op_data->typ1);
         memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
         rec->cr_mode = mode;
         rec->cr_flags = flags;
@@ -123,17 +125,22 @@ void mds_open_pack(struct ptlrpc_request *req, int offset,
         else
                 rec->cr_suppgid = -1;
 
-        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1);
-        LOGL0(op_data->name, op_data->namelen, tmp);
+        if (op_data->name) {
+                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1,
+                                     op_data->namelen + 1);
+                LOGL0(op_data->name, op_data->namelen, tmp);
+        }
 
         if (data) {
                 tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen);
                 memcpy (tmp, data, datalen);
         }
 }
-void mds_setattr_pack(struct ptlrpc_request *req,
+
+void mdc_setattr_pack(struct ptlrpc_request *req,
                       struct mdc_op_data *data,
-                      struct iattr *iattr, void *ea, int ealen)
+                      struct iattr *iattr, void *ea, int ealen,
+                      void *ea2, int ea2len)
 {
         struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, 0,
                                                      sizeof (*rec));
@@ -163,11 +170,18 @@ void mds_setattr_pack(struct ptlrpc_request *req,
                         rec->sa_suppgid = -1;
         }
 
-        if (ealen != 0)
-                memcpy(lustre_msg_buf(req->rq_reqmsg, 1, ealen), ea, ealen);
+        if (ealen == 0)
+                return;
+
+        memcpy(lustre_msg_buf(req->rq_reqmsg, 1, ealen), ea, ealen);
+
+        if (ea2len == 0)
+                return;
+
+        memcpy(lustre_msg_buf(req->rq_reqmsg, 2, ea2len), ea2, ea2len);
 }
 
-void mds_unlink_pack(struct ptlrpc_request *req, int offset,
+void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
                      struct mdc_op_data *data)
 {
         struct mds_rec_unlink *rec;
@@ -194,7 +208,7 @@ void mds_unlink_pack(struct ptlrpc_request *req, int offset,
         LOGL0(data->name, data->namelen, tmp);
 }
 
-void mds_link_pack(struct ptlrpc_request *req, int offset,
+void mdc_link_pack(struct ptlrpc_request *req, int offset,
                    struct mdc_op_data *data)
 {
         struct mds_rec_link *rec;
@@ -221,7 +235,7 @@ void mds_link_pack(struct ptlrpc_request *req, int offset,
         LOGL0(data->name, data->namelen, tmp);
 }
 
-void mds_rename_pack(struct ptlrpc_request *req, int offset,
+void mdc_rename_pack(struct ptlrpc_request *req, int offset,
                      struct mdc_op_data *data,
                      const char *old, int oldlen, const char *new, int newlen)
 {
@@ -255,7 +269,7 @@ void mds_rename_pack(struct ptlrpc_request *req, int offset,
         }
 }
 
-void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
+void mdc_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
                       int flags, struct mdc_op_data *data)
 {
         struct mds_body *b;
index 2da2fdb..4f7443e 100644 (file)
 #include "mdc_internal.h"
 
 /* mdc_setattr does its own semaphore handling */
-static int mdc_reint(struct ptlrpc_request *request, int level)
+static int mdc_reint(struct ptlrpc_request *request,
+                     struct mdc_rpc_lock *rpc_lock, int level)
 {
         int rc;
-        __u32 *opcodeptr;
+        
 
-        opcodeptr = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*opcodeptr));
         request->rq_level = level;
 
-        if (!(*opcodeptr == REINT_SETATTR))
-                mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+        mdc_get_rpc_lock(rpc_lock, NULL);
         rc = ptlrpc_queue_wait(request);
-        if (!(*opcodeptr == REINT_SETATTR))
-                mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
-
+        mdc_put_rpc_lock(rpc_lock, NULL);
         if (rc)
                 CDEBUG(D_INFO, "error in handling %d\n", rc);
+        else if (!lustre_swab_repbuf(request, 0, sizeof(struct mds_body),
+                                     lustre_swab_mds_body)) {
+                CERROR ("Can't unpack mds_body\n");
+                rc = -EPROTO;
+        }
         return rc;
 }
 
@@ -60,42 +62,45 @@ static int mdc_reint(struct ptlrpc_request *request, int level)
  * If it is called with iattr->ia_valid & ATTR_FROM_OPEN, then it is a
  * magic open-path setattr that should take the setattr semaphore and
  * go to the setattr portal. */
-int mdc_setattr(struct lustre_handle *conn,
-                struct mdc_op_data *data,
-                struct iattr *iattr, void *ea, int ealen,
+int mdc_setattr(struct lustre_handle *conn, struct mdc_op_data *data,
+                struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
                 struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
         struct mds_rec_setattr *rec;
         struct mdc_rpc_lock *rpc_lock;
-        int rc, bufcount = 1, size[2] = {sizeof(*rec), ealen};
+        int rc, bufcount = 1, size[3] = {sizeof(*rec), ealen, ea2len};
         ENTRY;
 
         LASSERT(iattr != NULL);
 
-        if (ealen > 0)
+        if (ealen > 0) {
                 bufcount = 2;
+                if (ea2len > 0)
+                        bufcount = 3;
+        }
 
         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount,
                               size, NULL);
-        if (!req)
+        if (req == NULL)
                 RETURN(-ENOMEM);
 
         if (iattr->ia_valid & ATTR_FROM_OPEN) {
                 req->rq_request_portal = MDS_SETATTR_PORTAL; //XXX FIXME bug 249
                 rpc_lock = &mdc_setattr_lock;
-        } else
+        } else {
                 rpc_lock = &mdc_rpc_lock;
+        }
 
-        mds_setattr_pack(req, data, iattr, ea, ealen);
+        if (iattr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
+                CDEBUG(D_INODE, "setting mtime %lu, ctime %lu\n",
+                       iattr->ia_mtime, iattr->ia_ctime);
+        mdc_setattr_pack(req, data, iattr, ea, ealen, ea2, ea2len);
 
         size[0] = sizeof(struct mds_body);
         req->rq_replen = lustre_msg_size(1, size);
 
-        mdc_get_rpc_lock(rpc_lock, NULL);
-        rc = mdc_reint(req, LUSTRE_CONN_FULL);
-        mdc_put_rpc_lock(rpc_lock, NULL);
-
+        rc = mdc_reint(req, rpc_lock, LUSTRE_CONN_FULL);
         *request = req;
         if (rc == -ERESTARTSYS)
                 rc = 0;
@@ -103,17 +108,14 @@ int mdc_setattr(struct lustre_handle *conn,
         RETURN(rc);
 }
 
-int mdc_create(struct lustre_handle *conn,
-               struct mdc_op_data *op_data,
-               const void *data, int datalen,
-               int mode, __u32 uid, __u32 gid, __u64 time, __u64 rdev,
-               struct ptlrpc_request **request)
+int mdc_create(struct lustre_handle *conn, struct mdc_op_data *op_data,
+               const void *data, int datalen, int mode, __u32 uid, __u32 gid,
+               __u64 time, __u64 rdev, struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
-        int rc, size[3] = {sizeof(struct mds_rec_create),
-                           op_data->namelen + 1, 0};
+        int rc, size[3] = {sizeof(struct mds_rec_create), op_data->namelen + 1};
         int level, bufcount = 2;
-//        ENTRY;
+        ENTRY;
 
         if (data && datalen) {
                 size[bufcount] = datalen;
@@ -122,14 +124,12 @@ int mdc_create(struct lustre_handle *conn,
 
         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount,
                               size, NULL);
-        if (!req)
-                return -ENOMEM;
-//                RETURN(-ENOMEM);
+        if (req == NULL)
+                RETURN(-ENOMEM);
 
-        /* mds_create_pack fills msg->bufs[1] with name
+        /* mdc_create_pack fills msg->bufs[1] with name
          * and msg->bufs[2] with tgt, for symlinks or lov MD data */
-        mds_create_pack(req, 0, op_data,
-                        mode, rdev, uid, gid, time,
+        mdc_create_pack(req, 0, op_data, mode, rdev, uid, gid, time,
                         data, datalen);
 
         size[0] = sizeof(struct mds_body);
@@ -137,7 +137,7 @@ int mdc_create(struct lustre_handle *conn,
 
         level = LUSTRE_CONN_FULL;
  resend:
-        rc = mdc_reint(req, level);
+        rc = mdc_reint(req, &mdc_rpc_lock, level);
         /* Resend if we were told to. */
         if (rc == -ERESTARTSYS) {
                 level = LUSTRE_CONN_RECOVER;
@@ -148,12 +148,10 @@ int mdc_create(struct lustre_handle *conn,
                 mdc_store_inode_generation(req, 0, 0);
 
         *request = req;
-        return rc;
-//        RETURN(rc);
+        RETURN(rc);
 }
 
-int mdc_unlink(struct lustre_handle *conn,
-               struct mdc_op_data *data,
+int mdc_unlink(struct lustre_handle *conn, struct mdc_op_data *data,
                struct ptlrpc_request **request)
 {
         struct obd_device *obddev = class_conn2obd(conn);
@@ -162,27 +160,26 @@ int mdc_unlink(struct lustre_handle *conn,
         ENTRY;
 
         LASSERT(req == NULL);
-
         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size,
                               NULL);
-        if (!req)
+        if (req == NULL)
                 RETURN(-ENOMEM);
         *request = req;
 
         size[0] = sizeof(struct mds_body);
         size[1] = obddev->u.cli.cl_max_mds_easize;
-        req->rq_replen = lustre_msg_size(2, size);
+        size[2] = obddev->u.cli.cl_max_mds_cookiesize;
+        req->rq_replen = lustre_msg_size(3, size);
 
-        mds_unlink_pack(req, 0, data);
+        mdc_unlink_pack(req, 0, data);
 
-        rc = mdc_reint(req, LUSTRE_CONN_FULL);
+        rc = mdc_reint(req, &mdc_rpc_lock, LUSTRE_CONN_FULL);
         if (rc == -ERESTARTSYS)
                 rc = 0;
         RETURN(rc);
 }
 
-int mdc_link(struct lustre_handle *conn,
-             struct mdc_op_data *data,
+int mdc_link(struct lustre_handle *conn, struct mdc_op_data *data,
              struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
@@ -191,15 +188,15 @@ int mdc_link(struct lustre_handle *conn,
 
         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size,
                               NULL);
-        if (!req)
+        if (req == NULL)
                 RETURN(-ENOMEM);
 
-        mds_link_pack(req, 0, data);
+        mdc_link_pack(req, 0, data);
 
         size[0] = sizeof(struct mds_body);
         req->rq_replen = lustre_msg_size(1, size);
 
-        rc = mdc_reint(req, LUSTRE_CONN_FULL);
+        rc = mdc_reint(req, &mdc_rpc_lock, LUSTRE_CONN_FULL);
         *request = req;
         if (rc == -ERESTARTSYS)
                 rc = 0;
@@ -207,10 +204,8 @@ int mdc_link(struct lustre_handle *conn,
         RETURN(rc);
 }
 
-int mdc_rename(struct lustre_handle *conn,
-               struct mdc_op_data *data,
-               const char *old, int oldlen,
-               const char *new, int newlen,
+int mdc_rename(struct lustre_handle *conn, struct mdc_op_data *data,
+               const char *old, int oldlen, const char *new, int newlen,
                struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
@@ -220,15 +215,15 @@ int mdc_rename(struct lustre_handle *conn,
 
         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 3, size,
                               NULL);
-        if (!req)
+        if (req == NULL)
                 RETURN(-ENOMEM);
 
-        mds_rename_pack(req, 0, data, old, oldlen, new, newlen);
+        mdc_rename_pack(req, 0, data, old, oldlen, new, newlen);
 
         size[0] = sizeof(struct mds_body);
         req->rq_replen = lustre_msg_size(1, size);
 
-        rc = mdc_reint(req, LUSTRE_CONN_FULL);
+        rc = mdc_reint(req, &mdc_rpc_lock, LUSTRE_CONN_FULL);
         *request = req;
         if (rc == -ERESTARTSYS)
                 rc = 0;
index 204a836..b205d21 100644 (file)
@@ -46,6 +46,7 @@ struct mdc_rpc_lock mdc_setattr_lock;
 EXPORT_SYMBOL(mdc_rpc_lock);
 
 /* Helper that implements most of mdc_getstatus and signal_completed_replay. */
+/* XXX this should become mdc_get_info("key"), sending MDS_GET_INFO RPC */
 static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
                           int level, int msg_flags)
 {
@@ -62,7 +63,7 @@ static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
         req->rq_level = level;
         req->rq_replen = lustre_msg_size(1, &size);
 
-        mds_pack_req_body(req);
+        mdc_pack_req_body(req);
         req->rq_reqmsg->flags |= msg_flags;
         rc = ptlrpc_queue_wait(req);
 
@@ -88,13 +89,14 @@ static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
         return rc;
 }
 
-/* should become mdc_getinfo() */
+/* This should be mdc_get_info("rootfid") */
 int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid)
 {
         return send_getstatus(class_conn2cliimp(conn), rootfid, LUSTRE_CONN_CON,
                               0);
 }
 
+/* should call mdc_get_info("lovdesc") and mdc_get_info("lovtgts") */
 int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
                    struct ptlrpc_request **request)
 {
@@ -233,7 +235,7 @@ int mdc_getattr(struct lustre_handle *conn, struct ll_fid *fid,
         memcpy(&body->fid1, fid, sizeof(*fid));
         body->valid = valid;
         body->eadatasize = ea_size;
-        mds_pack_req_body(req);
+        mdc_pack_req_body(req);
 
         rc = mdc_getattr_common (conn, ea_size, req);
         if (rc != 0) {
@@ -263,7 +265,7 @@ int mdc_getattr_name(struct lustre_handle *conn, struct ll_fid *fid,
         memcpy(&body->fid1, fid, sizeof(*fid));
         body->valid = valid;
         body->eadatasize = ea_size;
-        mds_pack_req_body(req);
+        mdc_pack_req_body(req);
 
         LASSERT (strnlen (filename, namelen) == namelen - 1);
         memcpy(lustre_msg_buf(req->rq_reqmsg, 1, namelen), filename, namelen);
@@ -283,9 +285,9 @@ void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
                                 int repoff)
 {
         struct mds_rec_create *rec =
-                lustre_msg_buf(req->rq_reqmsg, reqoff, sizeof (*rec));
+                lustre_msg_buf(req->rq_reqmsg, reqoff, sizeof(*rec));
         struct mds_body *body =
-                lustre_msg_buf(req->rq_repmsg, repoff, sizeof (*body));
+                lustre_msg_buf(req->rq_repmsg, repoff, sizeof(*body));
 
         LASSERT (rec != NULL);
         LASSERT (body != NULL);
@@ -295,11 +297,49 @@ void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
                   rec->cr_replayfid.generation, rec->cr_replayfid.id);
 }
 
+int mdc_req2lustre_md(struct ptlrpc_request *req, int offset,
+                      struct lustre_handle *obd_import,
+                      struct lustre_md *md)
+{
+        int rc;
+        ENTRY;
+
+        LASSERT(md);
+        memset(md, 0, sizeof(*md));
+
+        md->body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*md->body));
+        LASSERT (md->body != NULL);
+        LASSERT_REPSWABBED (req, offset);
+
+        if (md->body->valid & OBD_MD_FLEASIZE) {
+                int lmmsize;
+                struct lov_mds_md *lmm;
+
+                LASSERT(S_ISREG(md->body->mode));
+
+                if (md->body->eadatasize == 0) {
+                        CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n");
+                        RETURN(-EPROTO);
+                }
+                lmmsize = md->body->eadatasize;
+                lmm = lustre_msg_buf(req->rq_repmsg, offset + 1, lmmsize);
+                LASSERT (lmm != NULL);
+                LASSERT_REPSWABBED (req, offset + 1);
+
+                rc = obd_unpackmd(obd_import, &md->lsm, lmm, lmmsize);
+                if (rc < 0) {
+                        /* XXX don't know if I should do this... */
+                        CERROR ("Error %d unpacking eadata\n", rc);
+                        LBUG();
+                }
+                LASSERT (rc >= sizeof (*md->lsm));
+        }
+        RETURN(0);
+}
+
+
 /* We always reserve enough space in the reply packet for a stripe MD, because
- * we don't know in advance the file type.
- *
- * XXX we could get that from ext2_dir_entry_2 file_type
- */
+ * we don't know in advance the file type. */
 int mdc_enqueue(struct lustre_handle *conn,
                 int lock_type,
                 struct lookup_intent *it,
@@ -318,9 +358,10 @@ int mdc_enqueue(struct lustre_handle *conn,
                 { .name = {data->ino1, data->gen1} };
         int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
         int rc, flags = LDLM_FL_HAS_INTENT;
-        int repsize[3] = {sizeof(struct ldlm_reply),
+        int repsize[4] = {sizeof(struct ldlm_reply),
                           sizeof(struct mds_body),
-                          obddev->u.cli.cl_max_mds_easize};
+                          obddev->u.cli.cl_max_mds_easize,
+                          obddev->u.cli.cl_max_mds_cookiesize};
         struct ldlm_reply *dlm_rep;
         struct ldlm_intent *lit;
         struct ldlm_request *lockreq;
@@ -352,7 +393,7 @@ int mdc_enqueue(struct lustre_handle *conn,
                 lit->opc = (__u64)it->it_op;
 
                 /* pack the intended request */
-                mds_open_pack(req, 2, data, it->it_mode, 0, current->fsuid,
+                mdc_open_pack(req, 2, data, it->it_mode, 0, current->fsuid,
                               current->fsgid, LTIME_S(CURRENT_TIME),
                               it->it_flags, tgt, tgtlen);
                 /* get ready for the reply */
@@ -371,10 +412,10 @@ int mdc_enqueue(struct lustre_handle *conn,
                 lit->opc = (__u64)it->it_op;
 
                 /* pack the intended request */
-                mds_unlink_pack(req, 2, data);
+                mdc_unlink_pack(req, 2, data);
                 /* get ready for the reply */
-                reply_buffers = 3;
-                req->rq_replen = lustre_msg_size(3, repsize);
+                reply_buffers = 4;
+                req->rq_replen = lustre_msg_size(4, repsize);
         } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
                 int valid = OBD_MD_FLNOTOBD | OBD_MD_FLEASIZE;
                 size[2] = sizeof(struct mds_body);
@@ -390,7 +431,7 @@ int mdc_enqueue(struct lustre_handle *conn,
                 lit->opc = (__u64)it->it_op;
 
                 /* pack the intended request */
-                mds_getattr_pack(req, valid, 2, it->it_flags, data);
+                mdc_getattr_pack(req, valid, 2, it->it_flags, data);
                 /* get ready for the reply */
                 reply_buffers = 3;
                 req->rq_replen = lustre_msg_size(3, repsize);
@@ -447,8 +488,8 @@ int mdc_enqueue(struct lustre_handle *conn,
         }
 
         dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep));
-        LASSERT (dlm_rep != NULL);           /* checked by ldlm_cli_enqueue() */
-        LASSERT_REPSWABBED (req, 0);         /* swabbed by ldlm_cli_enqueue() */
+        LASSERT(dlm_rep != NULL);           /* checked by ldlm_cli_enqueue() */
+        LASSERT_REPSWABBED(req, 0);         /* swabbed by ldlm_cli_enqueue() */
 
         it->it_disposition = (int) dlm_rep->lock_policy_res1;
         it->it_status = (int) dlm_rep->lock_policy_res2;
@@ -456,8 +497,8 @@ int mdc_enqueue(struct lustre_handle *conn,
         it->it_data = req;
 
         /* We know what to expect, so we do any byte flipping required here */
-        LASSERT (reply_buffers == 3 || reply_buffers == 1);
-        if (reply_buffers == 3) {
+        LASSERT(reply_buffers == 4 || reply_buffers == 3 || reply_buffers == 1);
+        if (reply_buffers >= 3) {
                 struct mds_body *body;
 
                 body = lustre_swab_repbuf (req, 1, sizeof (*body),
@@ -471,8 +512,8 @@ int mdc_enqueue(struct lustre_handle *conn,
                         /* The eadata is opaque; just check that it is
                          * there.  Eventually, obd_unpackmd() will check
                          * the contents */
-                        eadata = lustre_swab_repbuf (req, 2, body->eadatasize,
-                                                     NULL);
+                        eadata = lustre_swab_repbuf(req, 2, body->eadatasize,
+                                                    NULL);
                         if (eadata == NULL) {
                                 CERROR ("Missing/short eadata\n");
                                 RETURN (-EPROTO);
@@ -490,8 +531,7 @@ static void mdc_replay_open(struct ptlrpc_request *req)
         struct list_head *tmp;
         struct mds_body *body;
 
-        body = lustre_swab_repbuf (req, 1, sizeof (*body),
-                                   lustre_swab_mds_body);
+        body = lustre_swab_repbuf(req, 1, sizeof(*body), lustre_swab_mds_body);
         LASSERT (body != NULL);
 
         memcpy(&old, file_fh, sizeof(old));
@@ -517,15 +557,15 @@ void mdc_set_open_replay_data(struct obd_client_handle *och)
 {
         struct ptlrpc_request *req = och->och_req;
         struct mds_rec_create *rec =
-                lustre_msg_buf(req->rq_reqmsg, 2, sizeof (*rec));
+                lustre_msg_buf(req->rq_reqmsg, 2, sizeof(*rec));
         struct mds_body *body =
-                lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
+                lustre_msg_buf(req->rq_repmsg, 1, sizeof(*body));
 
-        LASSERT (rec != NULL);
+        LASSERT(rec != NULL);
         /* outgoing messages always in my byte order */
-        LASSERT (body != NULL);
+        LASSERT(body != NULL);
         /* incoming message in my byte order (it's been swabbed) */
-        LASSERT_REPSWABBED (req, 1);
+        LASSERT_REPSWABBED(req, 1);
 
         memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
         req->rq_replay_cb = mdc_replay_open;
@@ -589,7 +629,7 @@ int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
         if (rc != 0)
                 GOTO(out, rc);
 
-        mds_readdir_pack(req, offset, PAGE_CACHE_SIZE, ino, type);
+        mdc_readdir_pack(req, offset, PAGE_CACHE_SIZE, ino, type);
 
         req->rq_replen = lustre_msg_size(1, &size);
         rc = ptlrpc_queue_wait(req);
@@ -622,27 +662,28 @@ static int mdc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
         case OBD_IOC_CLIENT_RECOVER:
                 RETURN(ptlrpc_recover_import(imp, data->ioc_inlbuf1));
         case IOC_OSC_SET_ACTIVE:
-                if (data->ioc_offset) {
-                        CERROR("%s: can't reactivate MDC\n",
-                               obddev->obd_uuid.uuid);
-                        RETURN(-ENOTTY);
-                }
-                RETURN(ptlrpc_set_import_active(imp, 0));
+                RETURN(ptlrpc_set_import_active(imp, data->ioc_offset));
         default:
                 CERROR("osc_ioctl(): unrecognised ioctl %#x\n", cmd);
                 RETURN(-ENOTTY);
         }
 }
 
-static int mdc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                      unsigned long max_age)
 {
         struct ptlrpc_request *req;
         struct obd_statfs *msfs;
         int rc, size = sizeof(*msfs);
         ENTRY;
 
-        req = ptlrpc_prep_req(exp->exp_obd->u.cli.cl_import, MDS_STATFS, 0, 
-                              NULL, NULL);
+        /* We could possibly pass max_age in the request (as an absolute
+         * timestamp or a "seconds.usec ago") so the target can avoid doing
+         * extra calls into the filesystem if that isn't necessary (e.g.
+         * during mount that would help a bit).  Having relative timestamps
+         * is not so great if request processing is slow, while absolute
+         * timestamps are not ideal because they need time synchronization. */
+        req = ptlrpc_prep_req(obd->u.cli.cl_import, MDS_STATFS, 0, NULL, NULL);
         if (!req)
                 RETURN(-ENOMEM);
 
@@ -655,14 +696,13 @@ static int mdc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
         if (rc)
                 GOTO(out, rc);
 
-        msfs = lustre_swab_repbuf (req, 0, sizeof (*msfs),
-                                   lustre_swab_obd_statfs);
+        msfs = lustre_swab_repbuf(req, 0, sizeof(*msfs),lustre_swab_obd_statfs);
         if (msfs == NULL) {
-                CERROR ("Can't unpack obd_statfs\n");
-                GOTO (out, rc = -EPROTO);
+                CERROR("Can't unpack obd_statfs\n");
+                GOTO(out, rc = -EPROTO);
         }
 
-        memcpy (osfs, msfs, sizeof (*msfs));
+        memcpy(osfs, msfs, sizeof (*msfs));
         EXIT;
 out:
         ptlrpc_req_finished(req);
@@ -670,11 +710,83 @@ out:
         return rc;
 }
 
+static int mdc_pin(struct lustre_handle *conn, obd_id ino, __u32 gen, int type,
+                   struct obd_client_handle *handle, int flag)
+{
+        struct ptlrpc_request *req;
+        struct mds_body *body;
+        int rc, size = sizeof(*body);
+        ENTRY;
+
+        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_PIN, 1, &size, NULL);
+        if (req == NULL)
+                RETURN(-ENOMEM);
+
+        body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
+        ll_ino2fid(&body->fid1, ino, gen, type);
+        body->flags = flag;
+
+        req->rq_replen = lustre_msg_size(1, &size);
+
+        mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+        rc = ptlrpc_queue_wait(req);
+        mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
+        if (rc) {
+                CERROR("pin failed: %d\n", rc);
+                ptlrpc_req_finished(req);
+                RETURN(rc);
+        }
+
+        body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_mds_body);
+        if (body == NULL) {
+                ptlrpc_req_finished(req);
+                RETURN(rc);
+        }
+
+        memcpy(&handle->och_fh, &body->handle, sizeof(body->handle));
+        handle->och_req = req; /* will be dropped by unpin */
+        handle->och_magic = OBD_CLIENT_HANDLE_MAGIC;
+        RETURN(rc);
+}
+
+static int mdc_unpin(struct lustre_handle *conn,
+                     struct obd_client_handle *handle, int flag)
+{
+        struct ptlrpc_request *req;
+        struct mds_body *body;
+        int rc, size = sizeof(*body);
+        ENTRY;
+
+        if (handle->och_magic != OBD_CLIENT_HANDLE_MAGIC)
+                RETURN(0);
+
+        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size,
+                              NULL);
+        if (req == NULL)
+                RETURN(-ENOMEM);
+
+        body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body));
+        memcpy(&body->handle, &handle->och_fh, sizeof(body->handle));
+        body->flags = flag;
+
+        req->rq_replen = lustre_msg_size(0, NULL);
+        mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+        rc = ptlrpc_queue_wait(req);
+        mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
+
+        if (rc != 0)
+                CERROR("unpin failed: %d\n", rc);
+
+        ptlrpc_req_finished(req);
+        ptlrpc_req_finished(handle->och_req);
+        RETURN(rc);
+}
+
 static int mdc_attach(struct obd_device *dev, obd_count len, void *data)
 {
         struct lprocfs_static_vars lvars;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(mdc, &lvars);
         return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
@@ -692,7 +804,9 @@ struct obd_ops mdc_obd_ops = {
         o_connect:     client_import_connect,
         o_disconnect:  client_import_disconnect,
         o_iocontrol:   mdc_iocontrol,
-        o_statfs:      mdc_statfs
+        o_statfs:      mdc_statfs,
+        o_pin:         mdc_pin,
+        o_unpin:       mdc_unpin,
 };
 
 int __init mdc_init(void)
@@ -700,12 +814,12 @@ int __init mdc_init(void)
         struct lprocfs_static_vars lvars;
         mdc_init_rpc_lock(&mdc_rpc_lock);
         mdc_init_rpc_lock(&mdc_setattr_lock);
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(mdc, &lvars);
         return class_register_type(&mdc_obd_ops, lvars.module_vars,
                                    LUSTRE_MDC_NAME);
 }
 
-static void __exit mdc_exit(void)
+static void /*__exit*/ mdc_exit(void)
 {
         class_unregister_type(LUSTRE_MDC_NAME);
 }
@@ -715,6 +829,7 @@ MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
 MODULE_DESCRIPTION("Lustre Metadata Client");
 MODULE_LICENSE("GPL");
 
+EXPORT_SYMBOL(mdc_req2lustre_md);
 EXPORT_SYMBOL(mdc_getstatus);
 EXPORT_SYMBOL(mdc_getlovinfo);
 EXPORT_SYMBOL(mdc_enqueue);
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
+.*.cmd
index 6b712fb..0696bd7 100644 (file)
@@ -3,8 +3,9 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-include fs/lustre/portals/Kernelenv
+include $(src)/../portals/Kernelenv
 
 obj-y += mds.o
-
-mds-objs    := mds_lov.o handler.o mds_reint.o mds_fs.o lproc_mds.o mds_internal.h mds_updates.o mds_open.o simple.o target.o
+mds-objs := mds_lov.o handler.o mds_reint.o mds_fs.o lproc_mds.o mds_open.o \
+               mds_lib.o
+       
index de3f2ed..756e290 100644 (file)
 #include <linux/lustre_mds.h>
 #include <linux/lustre_fsfilt.h>
 #include <linux/lprocfs_status.h>
+#include <linux/lustre_commit_confd.h>
+
 #include "mds_internal.h"
 
-extern int mds_get_lovtgts(struct mds_obd *obd, int tgt_count,
-                           struct obd_uuid *uuidarray);
-extern int mds_get_lovdesc(struct mds_obd  *obd, struct lov_desc *desc);
-int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
-                       struct ptlrpc_request *req, int rc, int disp);
-static int mds_cleanup(struct obd_device * obddev, int force, int failover);
-
-inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req)
-{
-        return &req->rq_export->exp_obd->u.mds;
-}
+static int mds_cleanup(struct obd_device *obd, int flags);
 
 static int mds_bulk_timeout(void *data)
 {
@@ -188,6 +180,9 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
 
         snprintf(fid_name, sizeof(fid_name), "0x%lx", ino);
 
+        CDEBUG(D_DENTRY, "--> mds_fid2dentry: ino %lu, gen %u, sb %p\n",
+               ino, generation, mds->mds_sb);
+
         /* under ext3 this is neither supposed to return bad inodes
            nor NULL inodes. */
         result = ll_lookup_one_len(fid_name, mds->mds_fid_de, strlen(fid_name));
@@ -198,9 +193,6 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
         if (!inode)
                 RETURN(ERR_PTR(-ENOENT));
 
-        CDEBUG(D_DENTRY, "--> mds_fid2dentry: ino %lu, gen %u, sb %p\n",
-               inode->i_ino, inode->i_generation, inode->i_sb);
-
         if (generation && inode->i_generation != generation) {
                 /* we didn't find the right inode.. */
                 CERROR("bad inode %lu, link: %d ct: %d or generation %u/%u\n",
@@ -341,80 +333,125 @@ void mds_mfd_destroy(struct mds_file_data *mfd)
         mds_mfd_put(mfd);
 }
 
-/* Call with med->med_open_lock held, please. */
-static int mds_close_mfd(struct mds_file_data *mfd, struct mds_export_data *med)
+/* Close a "file descriptor" and possibly unlink an orphan from the
+ * PENDING directory.
+ *
+ * If we are being called from mds_disconnect() because the client has
+ * disappeared, then req == NULL and we do not update last_rcvd because
+ * there is nothing that could be recovered by the client at this stage
+ * (it will not even _have_ an entry in last_rcvd anymore).
+ */
+static int mds_mfd_close(struct ptlrpc_request *req, struct obd_device *obd,
+                         struct mds_file_data *mfd)
 {
-        struct dentry *de = NULL;
-
-#ifdef CONFIG_SMP
-        LASSERT(spin_is_locked(&med->med_open_lock));
-#endif
-        list_del(&mfd->mfd_list);
+        struct dentry *dparent = mfd->mfd_dentry->d_parent;
+        struct inode *child_inode = mfd->mfd_dentry->d_inode;
+        char fidname[LL_FID_NAMELEN];
+        int last_orphan, fidlen, rc = 0;
+        ENTRY;
 
-        if (mfd->mfd_dentry->d_parent) {
-                LASSERT(atomic_read(&mfd->mfd_dentry->d_parent->d_count));
-                de = dget(mfd->mfd_dentry->d_parent);
+        if (dparent) {
+                LASSERT(atomic_read(&dparent->d_count) > 0);
+                dparent = dget(dparent);
         }
 
-        /* this is the actual "close" */
-        l_dput(mfd->mfd_dentry);
+        fidlen = ll_fid2str(fidname, child_inode->i_ino,
+                            child_inode->i_generation);
 
-        if (de)
-                l_dput(de);
+        last_orphan = mds_open_orphan_dec_test(child_inode) &&
+                mds_inode_is_orphan(child_inode);
 
+        /* this is the actual "close" */
+        l_dput(mfd->mfd_dentry);
         mds_mfd_destroy(mfd);
-        RETURN(0);
-}
 
-static int mds_disconnect(struct lustre_handle *conn, int failover)
-{
-        struct obd_export *export = class_conn2export(conn);
-        int rc;
-        unsigned long flags;
-        ENTRY;
+        if (dparent)
+                l_dput(dparent);
 
-        ldlm_cancel_locks_for_export(export);
+        if (last_orphan) {
+                struct mds_obd *mds = &obd->u.mds;
+                struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+                struct dentry *pending_child = NULL;
+                void *handle;
 
-        spin_lock_irqsave(&export->exp_lock, flags);
-        export->exp_failover = failover;
-        spin_unlock_irqrestore(&export->exp_lock, flags);
+                CDEBUG(D_ERROR, "destroying orphan object %s\n", fidname);
 
-        rc = class_disconnect(conn, failover);
-        class_export_put(export);
+                /* Sadly, there is no easy way to save pending_child from
+                 * mds_reint_unlink() into mfd, so we need to re-lookup,
+                 * but normally it will still be in the dcache.
+                 */
+                down(&pending_dir->i_sem);
+                pending_child = lookup_one_len(fidname, mds->mds_pending_dir,
+                                               fidlen);
+                if (IS_ERR(pending_child))
+                        GOTO(out_lock, rc = PTR_ERR(pending_child));
+                LASSERT(pending_child->d_inode != NULL);
+
+                handle = fsfilt_start(obd, pending_dir, FSFILT_OP_UNLINK, NULL);
+                if (IS_ERR(handle))
+                        GOTO(out_dput, rc = PTR_ERR(handle));
+                rc = vfs_unlink(pending_dir, pending_child);
+                if (rc)
+                        CERROR("error unlinking orphan %s: rc %d\n",fidname,rc);
+
+                if (req) {
+                        rc = mds_finish_transno(mds, pending_dir, handle, req,
+                                                rc, 0);
+                } else {
+                        int err = fsfilt_commit(obd, pending_dir, handle, 0);
+                        if (err) {
+                                CERROR("error committing orphan unlink: %d\n",
+                                       err);
+                                if (!rc)
+                                        rc = err;
+                        }
+                }
+        out_dput:
+                dput(pending_child);
+        out_lock:
+                up(&pending_dir->i_sem);
+        }
 
         RETURN(rc);
 }
 
-static void mds_destroy_export(struct obd_export *export)
+static int mds_disconnect(struct lustre_handle *conn, int flags)
 {
+        struct obd_export *export = class_conn2export(conn);
         struct mds_export_data *med = &export->exp_mds_data;
-        struct list_head *tmp, *n;
+        struct obd_device *obd = export->exp_obd;
+        struct obd_run_ctxt saved;
         int rc;
-
         ENTRY;
-        LASSERT(!strcmp(export->exp_obd->obd_type->typ_name,
-                        LUSTRE_MDS_NAME));
 
-        /*
-         * Close any open files.
-         */
+        push_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+        /* Close any open files (which may also cause orphan unlinking). */
         spin_lock(&med->med_open_lock);
-        list_for_each_safe(tmp, n, &med->med_open_head) {
+        while (!list_empty(&med->med_open_head)) {
+                struct list_head *tmp = med->med_open_head.next;
                 struct mds_file_data *mfd =
                         list_entry(tmp, struct mds_file_data, mfd_list);
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+                /* bug 1579: fix force-closing for 2.5 */
                 struct dentry *dentry = mfd->mfd_dentry;
+
+                list_del(&mfd->mfd_list);
+                spin_unlock(&med->med_open_lock);
+
                 CERROR("force closing client file handle for %*s (%s:%lu)\n",
                        dentry->d_name.len, dentry->d_name.name,
                        kdevname(dentry->d_inode->i_sb->s_dev),
                        dentry->d_inode->i_ino);
+                rc = mds_mfd_close(NULL, obd, mfd);
 #endif
-                rc = mds_close_mfd(mfd, med);
                 if (rc)
                         CDEBUG(D_INODE, "Error closing file: %d\n", rc);
+                spin_lock(&med->med_open_lock);
         }
         spin_unlock(&med->med_open_lock);
+        pop_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
 
+        ldlm_cancel_locks_for_export(export);
         if (export->exp_outstanding_reply) {
                 struct ptlrpc_request *req = export->exp_outstanding_reply;
                 unsigned long          flags;
@@ -432,9 +469,13 @@ static void mds_destroy_export(struct obd_export *export)
                 export->exp_outstanding_reply = NULL;
         }
 
-        if (!export->exp_failover)
+        if (!(flags & OBD_OPT_FAILOVER))
                 mds_client_free(export);
-        EXIT;
+
+        rc = class_disconnect(conn, flags);
+        class_export_put(export);
+
+        RETURN(rc);
 }
 
 /*
@@ -448,14 +489,24 @@ static void mds_fsync_super(struct super_block *sb)
 {
         lock_kernel();
         lock_super(sb);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
         if (sb->s_dirt && sb->s_op && sb->s_op->write_super)
                 sb->s_op->write_super(sb);
+#else
+        if (sb->s_dirt && sb->s_op) {
+                if (sb->s_op->sync_fs)
+                        sb->s_op->sync_fs(sb, 1);
+                else if (sb->s_op->write_super)
+                        sb->s_op->write_super(sb);
+        }
+#endif
         unlock_super(sb);
         unlock_kernel();
 }
 
 static int mds_getstatus(struct ptlrpc_request *req)
 {
+        struct obd_device *obd = req->rq_export->exp_obd;
         struct mds_obd *mds = mds_req2mds(req);
         struct mds_body *body;
         int rc, size = sizeof(*body);
@@ -473,7 +524,7 @@ static int mds_getstatus(struct ptlrpc_request *req)
          * requests if they have any.  This would be fsync_super() if it
          * was exported.
          */
-        mds_fsync_super(mds->mds_sb);
+        fsfilt_sync(obd, mds->mds_sb);
 
         body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
         memcpy(&body->fid1, &mds->mds_rootfid, sizeof(body->fid1));
@@ -525,8 +576,7 @@ static int mds_getlovinfo(struct ptlrpc_request *req)
         memcpy(desc, &mds->mds_lov_desc, sizeof (*desc));
 
         tgt_count = mds->mds_lov_desc.ld_tgt_count;
-        uuid0 = lustre_msg_buf (req->rq_repmsg, 1,
-                                tgt_count * sizeof (*uuid0));
+        uuid0 = lustre_msg_buf(req->rq_repmsg, 1, tgt_count * sizeof (*uuid0));
         if (uuid0 == NULL) {
                 CERROR("too many targets, enlarge client buffers\n");
                 req->rq_status = -ENOSPC;
@@ -539,6 +589,8 @@ static int mds_getlovinfo(struct ptlrpc_request *req)
                 req->rq_status = rc;
                 RETURN(0);
         }
+        memcpy(&mds->mds_osc_uuid, &mds->mds_lov_desc.ld_uuid,
+               sizeof(mds->mds_osc_uuid));
         RETURN(0);
 }
 
@@ -616,8 +668,8 @@ int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg,
 
         rc = fsfilt_get_md(obd, inode, lmm, lmm_size);
         if (rc < 0) {
-                CERROR ("Error %d reading eadata for ino %lu\n",
-                        rc, inode->i_ino);
+                CERROR("Error %d reading eadata for ino %lu\n",
+                       rc, inode->i_ino);
         } else if (rc > 0) {
                 body->valid |= OBD_MD_FLEASIZE;
                 body->eadatasize = rc;
@@ -639,19 +691,22 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry,
         if (inode == NULL)
                 RETURN(-ENOENT);
 
-        body = lustre_msg_buf(req->rq_repmsg, reply_off, sizeof (*body));
-        LASSERT (body != NULL);                 /* caller prepped reply */
+        body = lustre_msg_buf(req->rq_repmsg, reply_off, sizeof(*body));
+        LASSERT(body != NULL);                 /* caller prepped reply */
 
         mds_pack_inode2fid(&body->fid1, inode);
         mds_pack_inode2body(body, inode);
 
-        if (S_ISREG(inode->i_mode) &&
-            (reqbody->valid & OBD_MD_FLEASIZE) != 0) {
-                rc = mds_pack_md(obd, req->rq_repmsg, reply_off + 1,
-                                 body, inode);
+        if (S_ISREG(inode->i_mode) && (reqbody->valid & OBD_MD_FLEASIZE) != 0) {
+                rc = mds_pack_md(obd, req->rq_repmsg, reply_off+1, body, inode);
+
+                /* If we have LOV EA data, the OST holds size, atime, mtime */
+                if (!(body->valid & OBD_MD_FLEASIZE))
+                        body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                        OBD_MD_FLATIME | OBD_MD_FLMTIME);
         } else if (S_ISLNK(inode->i_mode) &&
                    (reqbody->valid & OBD_MD_LINKNAME) != 0) {
-                char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1, 0);
+                char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1,0);
                 int len;
 
                 LASSERT (symname != NULL);       /* caller prepped reply */
@@ -672,6 +727,7 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry,
                         rc = 0;
                 }
         }
+
         RETURN(rc);
 }
 
@@ -684,11 +740,10 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
         ENTRY;
 
         body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body));
-        LASSERT (body != NULL);                 /* checked by caller */
-        LASSERT_REQSWABBED (req, offset);       /* swabbed by caller */
+        LASSERT(body != NULL);                 /* checked by caller */
+        LASSERT_REQSWABBED(req, offset);       /* swabbed by caller */
 
-        if (S_ISREG(inode->i_mode) &&
-            (body->valid & OBD_MD_FLEASIZE) != 0) {
+        if (S_ISREG(inode->i_mode) && (body->valid & OBD_MD_FLEASIZE)) {
                 int rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0);
                 CDEBUG(D_INODE, "got %d bytes MD data for inode %lu\n",
                        rc, inode->i_ino);
@@ -701,14 +756,14 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
                         size[bufcount] = 0;
                         CERROR("MD size %d larger than maximum possible %u\n",
                                rc, mds->mds_max_mdsize);
-                } else
+                } else {
                         size[bufcount] = rc;
+                }
                 bufcount++;
-        } else if (S_ISLNK (inode->i_mode) &&
-                   (body->valid & OBD_MD_LINKNAME) != 0) {
+        } else if (S_ISLNK(inode->i_mode) && (body->valid & OBD_MD_LINKNAME)) {
                 if (inode->i_size + 1 != body->eadatasize)
-                        CERROR ("symlink size: %Lu, reply space: %d\n",
-                                inode->i_size + 1, body->eadatasize);
+                        CERROR("symlink size: %Lu, reply space: %d\n",
+                               inode->i_size + 1, body->eadatasize);
                 size[bufcount] = MIN(inode->i_size + 1, body->eadatasize);
                 bufcount++;
                 CDEBUG(D_INODE, "symlink size: %Lu, reply space: %d\n",
@@ -724,9 +779,8 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
         rc = lustre_pack_msg(bufcount, size, NULL, &req->rq_replen,
                              &req->rq_repmsg);
         if (rc) {
-                CERROR("out of memoryK\n");
-                req->rq_status = rc;
-                GOTO(out, rc);
+                CERROR("out of memory\n");
+                GOTO(out, req->rq_status = rc);
         }
 
         EXIT;
@@ -738,6 +792,8 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
 static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req,
                                      struct lustre_handle *client_lockh)
 {
+        struct mds_export_data *med = &req->rq_export->exp_mds_data;
+        struct mds_client_data *mcd = med->med_mcd;
         struct obd_device *obd = req->rq_export->exp_obd;
         struct mds_obd *mds = mds_req2mds(req);
         struct dentry *parent, *child;
@@ -748,8 +804,15 @@ static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req,
         int namelen, rc = 0;
         char *name;
 
-        if (req->rq_export->exp_outstanding_reply)
-                mds_steal_ack_locks(req->rq_export, req);
+        req->rq_transno = mcd->mcd_last_transno;
+        req->rq_status = mcd->mcd_last_result;
+
+        LASSERT (req->rq_export->exp_outstanding_reply);
+
+        mds_steal_ack_locks(req->rq_export, req);
+
+        if (req->rq_status)
+                return;
 
         body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body));
         LASSERT (body != NULL);                 /* checked by caller */
@@ -770,6 +833,7 @@ static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req,
         uc.ouc_cap = body->capability;
         uc.ouc_suppgid1 = body->suppgid;
         uc.ouc_suppgid2 = -1;
+
         push_ctxt(&saved, &mds->mds_ctxt, &uc);
         parent = mds_fid2dentry(mds, &body->fid1, NULL);
         LASSERT(!IS_ERR(parent));
@@ -785,7 +849,8 @@ static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req,
         }
 
         rc = mds_getattr_internal(obd, child, req, body, offset);
-        req->rq_status = rc;
+        /* XXX need to handle error here */
+        LASSERT(!rc);
         l_dput(child);
         l_dput(parent);
 }
@@ -795,6 +860,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req,
 {
         struct mds_obd *mds = mds_req2mds(req);
         struct obd_device *obd = req->rq_export->exp_obd;
+        struct ldlm_reply *rep = NULL;
         struct obd_run_ctxt saved;
         struct mds_body *body;
         struct dentry *de = NULL, *dchild = NULL;
@@ -803,7 +869,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req,
         struct ldlm_res_id child_res_id = { .name = {0} };
         struct lustre_handle parent_lockh;
         int namesize;
-        int flags = 0, rc = 0, cleanup_phase = 0, req_was_resent;
+        int flags = 0, rc = 0, cleanup_phase = 0;
         char *name;
         ENTRY;
 
@@ -811,34 +877,39 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req,
 
         /* Swab now, before anyone looks inside the request */
 
-        body = lustre_swab_reqbuf (req, offset, sizeof (*body),
-                                   lustre_swab_mds_body);
+        body = lustre_swab_reqbuf(req, offset, sizeof(*body),
+                                  lustre_swab_mds_body);
         if (body == NULL) {
-                CERROR ("Can't swab mds_body\n");
-                GOTO (cleanup, rc = -EFAULT);
+                CERROR("Can't swab mds_body\n");
+                GOTO(cleanup, rc = -EFAULT);
         }
 
-        LASSERT_REQSWAB (req, offset + 1);
-        name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0);
+        LASSERT_REQSWAB(req, offset + 1);
+        name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0);
         if (name == NULL) {
-                CERROR ("Can't unpack name\n");
-                GOTO (cleanup, rc = -EFAULT);
+                CERROR("Can't unpack name\n");
+                GOTO(cleanup, rc = -EFAULT);
         }
         namesize = req->rq_reqmsg->buflens[offset + 1];
 
-        req_was_resent = lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT;
-        if (child_lockh->cookie) {
-                LASSERT(req_was_resent);
-                reconstruct_getattr_name(offset, req, child_lockh);
-                RETURN(0);
-        } else if (req_was_resent) {
-                DEBUG_REQ(D_HA, req, "no reply for RESENT req");
+        if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
+                struct obd_export *exp = req->rq_export;
+                if (exp->exp_outstanding_reply &&
+                    exp->exp_outstanding_reply->rq_xid == req->rq_xid) {
+                        reconstruct_getattr_name(offset, req, child_lockh);
+                        RETURN(0);
+                }
+                DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")",
+                          exp->exp_outstanding_reply ?
+                          exp->exp_outstanding_reply->rq_xid : (u64)0);
         }
 
         LASSERT (offset == 0 || offset == 2);
-        /* if requests were at offset 2, replies go back at 1 */
-        if (offset)
+        /* if requests were at offset 2, the getattr reply goes back at 1 */
+        if (offset) { 
+                rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
                 offset = 1;
+        }
 
         uc.ouc_fsuid = body->fsuid;
         uc.ouc_fsgid = body->fsgid;
@@ -847,6 +918,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req,
         uc.ouc_suppgid2 = -1;
         push_ctxt(&saved, &mds->mds_ctxt, &uc);
         /* Step 1: Lookup/lock parent */
+        intent_set_disposition(rep, DISP_LOOKUP_EXECD);
         de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_PR,
                                    &parent_lockh);
         if (IS_ERR(de))
@@ -868,7 +940,10 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req,
         cleanup_phase = 2; /* child dentry */
 
         if (dchild->d_inode == NULL) {
+                intent_set_disposition(rep, DISP_LOOKUP_NEG);
                 GOTO(cleanup, rc = -ENOENT);
+        } else {
+                intent_set_disposition(rep, DISP_LOOKUP_POS);
         }
 
         /* Step 3: Lock child */
@@ -963,11 +1038,17 @@ out_pop:
         return rc;
 }
 
+
+static int mds_obd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                          unsigned long max_age)
+{
+        return fsfilt_statfs(obd, obd->u.mds.mds_sb, osfs);
+}
+
 static int mds_statfs(struct ptlrpc_request *req)
 {
         struct obd_device *obd = req->rq_export->exp_obd;
-        struct obd_statfs *osfs;
-        int rc, size = sizeof(*osfs);
+        int rc, size = sizeof(struct obd_statfs);
         ENTRY;
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
@@ -976,10 +1057,10 @@ static int mds_statfs(struct ptlrpc_request *req)
                 GOTO(out, rc);
         }
 
-        osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*osfs));
-        rc = fsfilt_statfs(obd, obd->u.mds.mds_sb, osfs);
+        /* We call this so that we can cache a bit - 1 jiffie worth */
+        rc = obd_statfs(obd, lustre_msg_buf(req->rq_repmsg,0,size),jiffies-HZ);
         if (rc) {
-                CERROR("mdsstatfs failed: rc %d\n", rc);
+                CERROR("mds_obd_statfs failed: rc %d\n", rc);
                 GOTO(out, rc);
         }
 
@@ -1006,8 +1087,10 @@ static void reconstruct_close(struct ptlrpc_request *req)
 static int mds_close(struct ptlrpc_request *req)
 {
         struct mds_export_data *med = &req->rq_export->exp_mds_data;
+        struct obd_device *obd = req->rq_export->exp_obd;
         struct mds_body *body;
         struct mds_file_data *mfd;
+        struct obd_run_ctxt saved;
         int rc;
         ENTRY;
 
@@ -1028,10 +1111,20 @@ static int mds_close(struct ptlrpc_request *req)
                 RETURN(-ESTALE);
         }
 
+        rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc) {
+                CERROR("lustre_pack_msg: rc = %d\n", rc);
+                req->rq_status = rc;
+        }
+
         spin_lock(&med->med_open_lock);
-        req->rq_status = mds_close_mfd(mfd, med);
+        list_del(&mfd->mfd_list);
         spin_unlock(&med->med_open_lock);
 
+        push_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+        req->rq_status = mds_mfd_close(rc ? NULL : req, obd, mfd);
+        pop_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+
         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK)) {
                 CERROR("test case OBD_FAIL_MDS_CLOSE_PACK\n");
                 req->rq_status = -ENOMEM;
@@ -1039,12 +1132,6 @@ static int mds_close(struct ptlrpc_request *req)
                 RETURN(-ENOMEM);
         }
 
-        rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
-        if (rc) {
-                CERROR("mds: lustre_pack_msg: rc = %d\n", rc);
-                req->rq_status = rc;
-        }
-
         mds_mfd_put(mfd);
         RETURN(0);
 }
@@ -1073,7 +1160,7 @@ static int mds_readpage(struct ptlrpc_request *req)
                 GOTO (out, rc = -EFAULT);
 
         /* body->size is actually the offset -eeb */
-        if ((body->size & (PAGE_SIZE - 1)) != 0) {
+        if ((body->size & ~PAGE_MASK) != 0) {
                 CERROR ("offset "LPU64"not on a page boundary\n", body->size);
                 GOTO (out, rc = -EFAULT);
         }
@@ -1306,9 +1393,10 @@ int mds_handle(struct ptlrpc_request *req)
                 break;
 
         case MDS_REINT: {
-                __u32 *opcp = lustre_msg_buf (req->rq_reqmsg, 0, sizeof (*opcp));
+                __u32 *opcp = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*opcp));
                 __u32  opc;
-                int size[2] = {sizeof(struct mds_body), mds->mds_max_mdsize};
+                int size[3] = {sizeof(struct mds_body), mds->mds_max_mdsize,
+                               mds->mds_max_cookiesize};
                 int bufcount;
 
                 /* NB only peek inside req now; mds_reint() will swab it */
@@ -1319,15 +1407,18 @@ int mds_handle(struct ptlrpc_request *req)
                 }
                 opc = *opcp;
                 if (lustre_msg_swabbed (req->rq_reqmsg))
-                        __swab32s (&opc);
+                        __swab32s(&opc);
 
                 DEBUG_REQ(D_INODE, req, "reint %d (%s)", opc,
-                          (opc < sizeof (reint_names) / sizeof (reint_names[0]) ||
-                           reint_names[opc] == NULL) ? reint_names[opc] : "unknown opcode");
+                          (opc < sizeof(reint_names) / sizeof(reint_names[0]) ||
+                           reint_names[opc] == NULL) ? reint_names[opc] :
+                                                       "unknown opcode");
 
                 OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET, 0);
 
                 if (opc == REINT_UNLINK)
+                        bufcount = 3;
+                else if (opc == REINT_OPEN)
                         bufcount = 2;
                 else
                         bufcount = 1;
@@ -1348,11 +1439,23 @@ int mds_handle(struct ptlrpc_request *req)
                 rc = mds_close(req);
                 break;
 
+        case MDS_PIN:
+                DEBUG_REQ(D_INODE, req, "pin");
+                OBD_FAIL_RETURN(OBD_FAIL_MDS_PIN_NET, 0);
+                rc = mds_pin(req);
+                break;
+
         case OBD_PING:
                 DEBUG_REQ(D_INODE, req, "ping");
                 rc = target_handle_ping(req);
                 break;
 
+        case OBD_LOG_CANCEL:
+                CDEBUG(D_INODE, "log cancel\n");
+                OBD_FAIL_RETURN(OBD_FAIL_OBD_LOG_CANCEL_NET, 0);
+                rc = -ENOTSUPP; /* la la la */
+                break;
+
         case LDLM_ENQUEUE:
                 DEBUG_REQ(D_INODE, req, "enqueue");
                 OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
@@ -1385,7 +1488,7 @@ int mds_handle(struct ptlrpc_request *req)
                 struct obd_device *obd = list_entry(mds, struct obd_device,
                                                     u.mds);
                 req->rq_repmsg->last_xid =
-                        le64_to_cpu (med->med_mcd->mcd_last_xid);
+                        le64_to_cpu(med->med_mcd->mcd_last_xid);
 
                 if (!obd->obd_no_transno) {
                         req->rq_repmsg->last_committed =
@@ -1421,8 +1524,9 @@ int mds_handle(struct ptlrpc_request *req)
  *
  * Also assumes for mds_last_transno that we are not modifying it (no locking).
  */
-int mds_update_server_data(struct mds_obd *mds)
+int mds_update_server_data(struct obd_device *obd)
 {
+        struct mds_obd *mds = &obd->u.mds;
         struct mds_server_data *msd = mds->mds_server_data;
         struct file *filp = mds->mds_rcvd_filp;
         struct obd_run_ctxt saved;
@@ -1433,21 +1537,16 @@ int mds_update_server_data(struct mds_obd *mds)
         msd->msd_last_transno = cpu_to_le64(mds->mds_last_transno);
         msd->msd_mount_count = cpu_to_le64(mds->mds_mount_count);
 
-        CDEBUG(D_SUPER, "MDS mount_count is %Lu, last_transno is %Lu\n",
-               (unsigned long long)mds->mds_mount_count,
-               (unsigned long long)mds->mds_last_transno);
-        rc = lustre_fwrite(filp, (char *)msd, sizeof(*msd), &off);
+        CDEBUG(D_SUPER, "MDS mount_count is "LPU64", last_transno is "LPU64"\n",
+               mds->mds_mount_count, mds->mds_last_transno);
+        rc = fsfilt_write_record(obd, filp, (char *)msd, sizeof(*msd), &off);
         if (rc != sizeof(*msd)) {
                 CERROR("error writing MDS server data: rc = %d\n", rc);
                 if (rc > 0)
                         rc = -EIO;
                 GOTO(out, rc);
         }
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        rc = fsync_dev(filp->f_dentry->d_inode->i_rdev);
-#else
         rc = file_fsync(filp, filp->f_dentry, 1);
-#endif
         if (rc)
                 CERROR("error flushing MDS server data: rc = %d\n", rc);
 
@@ -1457,10 +1556,10 @@ out:
 }
 
 /* mount the file system (secretly) */
-static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
+static int mds_setup(struct obd_device *obd, obd_count len, void *buf)
 {
         struct obd_ioctl_data* data = buf;
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
         struct vfsmount *mnt;
         int rc = 0;
         unsigned long page;
@@ -1473,9 +1572,12 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
         if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2)
                 RETURN(rc = -EINVAL);
 
-        obddev->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2);
-        if (IS_ERR(obddev->obd_fsops))
-                RETURN(rc = PTR_ERR(obddev->obd_fsops));
+        if (data->ioc_inlbuf4)
+                obd_str2uuid(&mds->mds_osc_uuid, data->ioc_inlbuf4);
+
+        obd->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2);
+        if (IS_ERR(obd->obd_fsops))
+                RETURN(rc = PTR_ERR(obd->obd_fsops));
 
 
         if (data->ioc_inllen3 > 0 && data->ioc_inlbuf3) {
@@ -1511,73 +1613,93 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
 
         spin_lock_init(&mds->mds_transno_lock);
         mds->mds_max_mdsize = sizeof(struct lov_mds_md);
-        rc = mds_fs_setup(obddev, mnt);
+        mds->mds_max_cookiesize = sizeof(struct llog_cookie);
+        rc = mds_fs_setup(obd, mnt);
         if (rc) {
                 CERROR("MDS filesystem method init failed: rc = %d\n", rc);
                 GOTO(err_put, rc);
         }
 
-        obddev->obd_namespace =
-                ldlm_namespace_new("mds_server", LDLM_NAMESPACE_SERVER);
-        if (obddev->obd_namespace == NULL) {
-                mds_cleanup(obddev, 0, 0);
-                GOTO(err_fs, rc = -ENOMEM);
+#ifdef ENABLE_ORPHANS
+        rc = llog_start_commit_thread();
+        if (rc < 0)
+                GOTO(err_fs, rc);
+#endif
+
+#ifdef ENABLE_ORPHANS
+        mds->mds_catalog = mds_get_catalog(obd);
+        if (IS_ERR(mds->mds_catalog))
+                GOTO(err_fs, rc = PTR_ERR(mds->mds_catalog));
+#endif
+
+        obd->obd_namespace = ldlm_namespace_new("mds_server",
+                                                LDLM_NAMESPACE_SERVER);
+        if (obd->obd_namespace == NULL) {
+                mds_cleanup(obd, 0);
+                GOTO(err_log, rc = -ENOMEM);
         }
 
         ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
-                           "mds_ldlm_client", &obddev->obd_ldlm_client);
+                           "mds_ldlm_client", &obd->obd_ldlm_client);
 
         mds->mds_has_lov_desc = 0;
+        obd->obd_replayable = 1;
 
         RETURN(0);
 
+err_log:
+#ifdef ENABLE_ORPHANS
+        mds_put_catalog(mds->mds_catalog);
+        /* No extra cleanup needed for llog_init_commit_thread() */
 err_fs:
-        mds_fs_cleanup(obddev, 0);
+#endif
+        mds_fs_cleanup(obd, 0);
 err_put:
         unlock_kernel();
         mntput(mds->mds_vfsmnt);
         mds->mds_sb = 0;
         lock_kernel();
 err_ops:
-        fsfilt_put_ops(obddev->obd_fsops);
+        fsfilt_put_ops(obd->obd_fsops);
         return rc;
 }
 
-static int mds_cleanup(struct obd_device *obddev, int force, int failover)
+static int mds_cleanup(struct obd_device *obd, int flags)
 {
-        struct super_block *sb;
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
         ENTRY;
 
-        sb = mds->mds_sb;
-        if (!mds->mds_sb)
+        if (mds->mds_sb == NULL)
                 RETURN(0);
 
-        mds_update_server_data(mds);
-        mds_fs_cleanup(obddev, failover);
+#ifdef ENABLE_ORPHANS
+        mds_put_catalog(mds->mds_catalog);
+#endif
+        if (mds->mds_osc_obd)
+                obd_disconnect(&mds->mds_osc_conn, flags);
+        mds_update_server_data(obd);
+        mds_fs_cleanup(obd, flags);
 
         unlock_kernel();
 
         /* 2 seems normal on mds, (may_umount() also expects 2
           fwiw), but we only see 1 at this point in obdfilter. */
-        if (atomic_read(&obddev->u.mds.mds_vfsmnt->mnt_count) > 2){
-                CERROR("%s: mount point busy, mnt_count: %d\n",
-                       obddev->obd_name,
-                       atomic_read(&obddev->u.mds.mds_vfsmnt->mnt_count));
-        }
+        if (atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count) > 2)
+                CERROR("%s: mount point busy, mnt_count: %d\n", obd->obd_name,
+                       atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count));
 
         mntput(mds->mds_vfsmnt);
         mds->mds_sb = 0;
 
-        ldlm_namespace_free(obddev->obd_namespace);
+        ldlm_namespace_free(obd->obd_namespace);
 
-        if (obddev->obd_recovering)
-                target_cancel_recovery_timer(obddev);
+        if (obd->obd_recovering)
+                target_cancel_recovery_timer(obd);
         lock_kernel();
 #ifdef CONFIG_DEV_RDONLY
         dev_clear_rdonly(2);
 #endif
-        fsfilt_put_ops(obddev->obd_fsops);
+        fsfilt_put_ops(obd->obd_fsops);
 
         RETURN(0);
 }
@@ -1616,13 +1738,26 @@ static void fixup_handle_for_resent_req(struct ptlrpc_request *req,
                   remote_hdl.cookie);
 }
 
+int intent_disposition(struct ldlm_reply *rep, int flag)
+{
+        if (!rep)
+                return 0;
+        return (rep->lock_policy_res1 & flag);
+}
+
+void intent_set_disposition(struct ldlm_reply *rep, int flag)
+{
+        if (!rep)
+                return;
+        rep->lock_policy_res1 |= flag;
+}
+
 static int ldlm_intent_policy(struct ldlm_namespace *ns,
                               struct ldlm_lock **lockp, void *req_cookie,
                               ldlm_mode_t mode, int flags, void *data)
 {
         struct ptlrpc_request *req = req_cookie;
         struct ldlm_lock *lock = *lockp;
-        int rc = 0;
         ENTRY;
 
         if (!req_cookie)
@@ -1632,34 +1767,33 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns,
                 /* an intent needs to be considered */
                 struct ldlm_intent *it;
                 struct mds_obd *mds = &req->rq_export->exp_obd->u.mds;
-                struct mds_body *mds_body;
                 struct ldlm_reply *rep;
-                struct lustre_handle lockh = { 0 };
+                struct lustre_handle lockh;
                 struct ldlm_lock *new_lock;
-                int rc, offset = 2, repsize[3] = {sizeof(struct ldlm_reply),
-                                                  sizeof(struct mds_body),
-                                                  mds->mds_max_mdsize};
+                int offset = 2, repsize[4] = {sizeof(struct ldlm_reply),
+                                              sizeof(struct mds_body),
+                                              mds->mds_max_mdsize,
+                                              mds->mds_max_cookiesize};
 
-                it = lustre_swab_reqbuf (req, 1, sizeof (*it),
-                                         lustre_swab_ldlm_intent);
+                it = lustre_swab_reqbuf(req, 1, sizeof (*it),
+                                        lustre_swab_ldlm_intent);
                 if (it == NULL) {
                         CERROR ("Intent missing\n");
-                        rc = req->rq_status = -EFAULT;
-                        RETURN (rc);
+                        req->rq_status = -EFAULT;
+                        RETURN(req->rq_status);
                 }
 
                 LDLM_DEBUG(lock, "intent policy, opc: %s",
                            ldlm_it2str(it->opc));
 
-                rc = lustre_pack_msg(3, repsize, NULL, &req->rq_replen,
-                                     &req->rq_repmsg);
-                if (rc) {
-                        rc = req->rq_status = -ENOMEM;
-                        RETURN(rc);
-                }
+                req->rq_status = lustre_pack_msg(it->opc == IT_UNLINK ? 4 : 3,
+                                                 repsize, NULL, &req->rq_replen,
+                                                 &req->rq_repmsg);
+                if (req->rq_status)
+                        RETURN(req->rq_status);
 
                 rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
-                rep->lock_policy_res1 = IT_INTENT_EXEC;
+                intent_set_disposition(rep, DISP_IT_EXECD);
 
                 fixup_handle_for_resent_req(req, lock, &lockh);
 
@@ -1667,45 +1801,28 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns,
                 switch ((long)it->opc) {
                 case IT_OPEN:
                 case IT_CREAT|IT_OPEN:
-                        rc = mds_reint(req, offset, &lockh);
-                        /* We return a dentry to the client if IT_OPEN_POS is
-                         * set, or if we make it to the OPEN portion of the
-                         * programme (which implies that we created) */
-                        if (!(rep->lock_policy_res1 & IT_OPEN_POS ||
-                              rep->lock_policy_res1 & IT_OPEN_OPEN)) {
-                                rep->lock_policy_res2 = rc;
+                        /* XXX swab here to assert that an mds_open reint
+                         * packet is following */
+                        rep->lock_policy_res2 = mds_reint(req, offset, &lockh);
+                        /* We abort the lock if the lookup was negative and
+                         * we did not make it to the OPEN portion */
+                        if (intent_disposition(rep, DISP_LOOKUP_NEG) &&
+                            !intent_disposition(rep, DISP_OPEN_OPEN))
                                 RETURN(ELDLM_LOCK_ABORTED);
-                        }
-                        break;
-                case IT_UNLINK:
-                        rc = mds_reint(req, offset, &lockh);
-                        /* Don't return a lock if the unlink failed, or if we're
-                         * not sending back an EA */
-                        if (rc) {
-                                rep->lock_policy_res2 = rc;
-                                RETURN(ELDLM_LOCK_ABORTED);
-                        }
-                        if (req->rq_status != 0) {
-                                rep->lock_policy_res2 = req->rq_status;
-                                RETURN(ELDLM_LOCK_ABORTED);
-                        }
-                        mds_body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*mds_body));
-                        if (!(mds_body->valid & OBD_MD_FLEASIZE)) {
-                                rep->lock_policy_res2 = rc;
-                                RETURN(ELDLM_LOCK_ABORTED);
-                        }
                         break;
                 case IT_GETATTR:
                 case IT_LOOKUP:
                 case IT_READDIR:
-                        rc = mds_getattr_name(offset, req, &lockh);
+                        rep->lock_policy_res2 = mds_getattr_name(offset, req,
+                                                                 &lockh);
                         /* FIXME: we need to sit down and decide on who should
                          * set req->rq_status, who should return negative and
-                         * positive return values, and what they all mean. */
-                        if (rc) {
-                                rep->lock_policy_res2 = rc;
+                         * positive return values, and what they all mean. 
+                         * - replay: returns 0 & req->status is old status
+                         * - otherwise: returns req->status */
+                        if (!intent_disposition(rep, DISP_LOOKUP_POS) || 
+                            rep->lock_policy_res2)
                                 RETURN(ELDLM_LOCK_ABORTED);
-                        }
                         if (req->rq_status != 0) {
                                 rep->lock_policy_res2 = req->rq_status;
                                 RETURN(ELDLM_LOCK_ABORTED);
@@ -1717,10 +1834,17 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns,
                 }
 
                 /* By this point, whatever function we called above must have
-                 * filled in 'lockh' or returned an error.  We want to give the
-                 * new lock to the client instead of whatever lock it was about
-                 * to get. */
+                 * either filled in 'lockh', been an intent replay, or returned
+                 * an error.  We want to allow replayed RPCs to not get a lock,
+                 * since we would just drop it below anyways because lock replay
+                 * is done separately by the client afterwards.  For regular
+                 * RPCs we want to give the new lock to the client instead of
+                 * whatever lock it was about to get.
+                 */
                 new_lock = ldlm_handle2lock(&lockh);
+                if (flags & LDLM_FL_INTENT_ONLY && !new_lock)
+                        RETURN(ELDLM_LOCK_ABORTED);
+
                 LASSERT(new_lock != NULL);
 
                 /* If we've already given this lock to a client once, then we
@@ -1785,14 +1909,13 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns,
                 RETURN(ELDLM_LOCK_REPLACED);
         } else {
                 int size = sizeof(struct ldlm_reply);
-                rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
-                                     &req->rq_repmsg);
-                if (rc) {
+                if (lustre_pack_msg(1, &size, NULL, &req->rq_replen,
+                                    &req->rq_repmsg)) {
                         LBUG();
                         RETURN(-ENOMEM);
                 }
         }
-        RETURN(rc);
+        RETURN(0);
 }
 
 int mds_attach(struct obd_device *dev, obd_count len, void *data)
@@ -1906,7 +2029,7 @@ err_thread:
 }
 
 
-static int mdt_cleanup(struct obd_device *obddev, int force, int failover)
+static int mdt_cleanup(struct obd_device *obddev, int flags)
 {
         struct mds_obd *mds = &obddev->u.mds;
         ENTRY;
@@ -1928,15 +2051,15 @@ extern int mds_iocontrol(unsigned int cmd, struct lustre_handle *conn,
 
 /* use obd ops to offer management infrastructure */
 static struct obd_ops mds_obd_ops = {
-        o_owner:          THIS_MODULE,
-        o_attach:         mds_attach,
-        o_detach:         mds_detach,
-        o_connect:        mds_connect,
-        o_disconnect:     mds_disconnect,
-        o_setup:          mds_setup,
-        o_cleanup:        mds_cleanup,
-        o_iocontrol:      mds_iocontrol,
-        o_destroy_export: mds_destroy_export
+        o_owner:       THIS_MODULE,
+        o_attach:      mds_attach,
+        o_detach:      mds_detach,
+        o_connect:     mds_connect,
+        o_disconnect:  mds_disconnect,
+        o_setup:       mds_setup,
+        o_cleanup:     mds_cleanup,
+        o_statfs:      mds_obd_statfs,
+        o_iocontrol:   mds_iocontrol
 };
 
 static struct obd_ops mdt_obd_ops = {
@@ -1961,7 +2084,7 @@ static int __init mds_init(void)
         return 0;
 }
 
-static void __exit mds_exit(void)
+static void /*__exit*/ mds_exit(void)
 {
         ldlm_unregister_intent();
         class_unregister_type(LUSTRE_MDS_NAME);
index 5d6fa57..e355415 100644 (file)
@@ -37,71 +37,43 @@ struct lprocfs_vars lprocfs_mdt_module_vars[] = { {0} };
 
 #else
 
-static inline int lprocfs_mds_statfs(void *data, struct statfs *sfs)
-{
-        struct obd_device* dev = (struct obd_device*) data;
-        struct mds_obd *mds;
-
-        LASSERT(dev != NULL);
-        mds = &dev->u.mds;
-        return vfs_statfs(mds->mds_sb, sfs);
-}
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  lprocfs_mds_statfs);
-
-int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
-              void *data)
-{
-        struct obd_device *obd = (struct obd_device *)data;
-
-        LASSERT(obd != NULL);
-        LASSERT(obd->obd_fsops != NULL);
-        LASSERT(obd->obd_fsops->fs_type != NULL);
-        return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
-}
-
-int lprocfs_mds_rd_mntdev(char *page, char **start, off_t off, int count,
-                          int *eof, void *data)
+static int lprocfs_mds_rd_mntdev(char *page, char **start, off_t off, int count,
+                                 int *eof, void *data)
 {
         struct obd_device* obd = (struct obd_device *)data;
 
         LASSERT(obd != NULL);
         LASSERT(obd->u.mds.mds_vfsmnt->mnt_devname);
         *eof = 1;
-        return snprintf(page, count, "%s\n",
-                        obd->u.mds.mds_vfsmnt->mnt_devname);
+
+        return snprintf(page, count, "%s\n",obd->u.mds.mds_vfsmnt->mnt_devname);
 }
 
 struct lprocfs_vars lprocfs_mds_obd_vars[] = {
-        { "uuid",       lprocfs_rd_uuid, 0, 0 },
-        { "blocksize",  rd_blksize,      0, 0 },
-        { "kbytestotal",rd_kbytestotal,  0, 0 },
-        { "kbytesfree", rd_kbytesfree,   0, 0 },
-        { "fstype",     rd_fstype,       0, 0 },
-        { "filestotal", rd_filestotal,   0, 0 },
-        { "filesfree",  rd_filesfree,    0, 0 },
-        { "filegroups", rd_filegroups,   0, 0 },
-        { "mntdev",     lprocfs_mds_rd_mntdev,    0, 0 },
+        { "uuid",         lprocfs_rd_uuid,        0, 0 },
+        { "blocksize",    lprocfs_rd_blksize,     0, 0 },
+        { "kbytestotal",  lprocfs_rd_kbytestotal, 0, 0 },
+        { "kbytesfree",   lprocfs_rd_kbytesfree,  0, 0 },
+        { "fstype",       lprocfs_rd_fstype,      0, 0 },
+        { "filestotal",   lprocfs_rd_filestotal,  0, 0 },
+        { "filesfree",    lprocfs_rd_filesfree,   0, 0 },
+        //{ "filegroups",   lprocfs_rd_filegroups,  0, 0 },
+        { "mntdev",       lprocfs_mds_rd_mntdev,  0, 0 },
         { 0 }
 };
 
 struct lprocfs_vars lprocfs_mds_module_vars[] = {
-        { "num_refs",   lprocfs_rd_numrefs, 0, 0 },
+        { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
         { 0 }
 };
 
 struct lprocfs_vars lprocfs_mdt_obd_vars[] = {
-        { "uuid",       lprocfs_rd_uuid, 0, 0 },
+        { "uuid",         lprocfs_rd_uuid,        0, 0 },
         { 0 }
 };
 
 struct lprocfs_vars lprocfs_mdt_module_vars[] = {
-        { "num_refs",   lprocfs_rd_numrefs, 0, 0 },
+        { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
         { 0 }
 };
 
index cefc680..56346ca 100644 (file)
@@ -37,6 +37,9 @@
 #include <linux/obd_support.h>
 #include <linux/lustre_lib.h>
 #include <linux/lustre_fsfilt.h>
+#include <portals/list.h>
+
+#include "mds_internal.h"
 
 /* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
 #define MDS_MAX_CLIENTS (PAGE_SIZE * 8)
  * we know its offset.
  */
 int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
-                   struct mds_export_data *med, int cl_off)
+                   struct mds_export_data *med, int cl_idx)
 {
         unsigned long *bitmap = mds->mds_client_bitmap;
-        int new_client = (cl_off == -1);
+        int new_client = (cl_idx == -1);
 
         LASSERT(bitmap != NULL);
 
@@ -61,39 +64,40 @@ int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
         if (!strcmp(med->med_mcd->mcd_uuid, "OBD_CLASS_UUID"))
                 RETURN(0);
 
-        /* the bitmap operations can handle cl_off > sizeof(long) * 8, so
+        /* the bitmap operations can handle cl_idx > sizeof(long) * 8, so
          * there's no need for extra complication here
          */
         if (new_client) {
-                cl_off = find_first_zero_bit(bitmap, MDS_MAX_CLIENTS);
+                cl_idx = find_first_zero_bit(bitmap, MDS_MAX_CLIENTS);
         repeat:
-                if (cl_off >= MDS_MAX_CLIENTS) {
+                if (cl_idx >= MDS_MAX_CLIENTS) {
                         CERROR("no room for clients - fix MDS_MAX_CLIENTS\n");
                         return -ENOMEM;
                 }
-                if (test_and_set_bit(cl_off, bitmap)) {
+                if (test_and_set_bit(cl_idx, bitmap)) {
                         CERROR("MDS client %d: found bit is set in bitmap\n",
-                               cl_off);
-                        cl_off = find_next_zero_bit(bitmap, MDS_MAX_CLIENTS,
-                                                    cl_off);
+                               cl_idx);
+                        cl_idx = find_next_zero_bit(bitmap, MDS_MAX_CLIENTS,
+                                                    cl_idx);
                         goto repeat;
                 }
         } else {
-                if (test_and_set_bit(cl_off, bitmap)) {
+                if (test_and_set_bit(cl_idx, bitmap)) {
                         CERROR("MDS client %d: bit already set in bitmap!!\n",
-                               cl_off);
+                               cl_idx);
                         LBUG();
                 }
         }
 
-        CDEBUG(D_INFO, "client at offset %d with UUID '%s' added\n",
-               cl_off, med->med_mcd->mcd_uuid);
+        CDEBUG(D_INFO, "client at index %d with UUID '%s' added\n",
+               cl_idx, med->med_mcd->mcd_uuid);
 
-        med->med_off = cl_off;
+        med->med_idx = cl_idx;
+        med->med_off = MDS_LR_CLIENT_START + (cl_idx * MDS_LR_CLIENT_SIZE);
 
         if (new_client) {
                 struct obd_run_ctxt saved;
-                loff_t off = MDS_LR_CLIENT + (cl_off * MDS_LR_SIZE);
+                loff_t off = med->med_off;
                 ssize_t written;
                 void *handle;
 
@@ -114,14 +118,16 @@ int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
                  * could use any of them, or maybe an FSFILT_OP_NONE is best?
                  */
                 handle = fsfilt_start(obd,mds->mds_rcvd_filp->f_dentry->d_inode,
-                                      FSFILT_OP_SETATTR);
+                                      FSFILT_OP_SETATTR, NULL);
                 if (IS_ERR(handle)) {
                         written = PTR_ERR(handle);
                         CERROR("unable to start transaction: rc %d\n",
                                (int)written);
                 } else {
-                        written = lustre_fwrite(mds->mds_rcvd_filp,med->med_mcd,
-                                                sizeof(*med->med_mcd), &off);
+                        written = fsfilt_write_record(obd, mds->mds_rcvd_filp,
+                                                      (char *)med->med_mcd,
+                                                      sizeof(*med->med_mcd),
+                                                      &off);
                         fsfilt_commit(obd,mds->mds_rcvd_filp->f_dentry->d_inode,
                                       handle, 0);
                 }
@@ -132,8 +138,8 @@ int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
                                 RETURN(written);
                         RETURN(-EIO);
                 }
-                CDEBUG(D_INFO, "wrote client mcd at off %u (len %u)\n",
-                       MDS_LR_CLIENT + (cl_off * MDS_LR_SIZE),
+                CDEBUG(D_INFO, "wrote client mcd at idx %u off %llu (len %u)\n",
+                       med->med_idx, med->med_off,
                        (unsigned int)sizeof(*med->med_mcd));
         }
         return 0;
@@ -143,11 +149,11 @@ int mds_client_free(struct obd_export *exp)
 {
         struct mds_export_data *med = &exp->exp_mds_data;
         struct mds_obd *mds = &exp->exp_obd->u.mds;
+        struct obd_device *obd = exp->exp_obd;
         struct mds_client_data zero_mcd;
         struct obd_run_ctxt saved;
         int written;
         unsigned long *bitmap = mds->mds_client_bitmap;
-        loff_t off;
 
         LASSERT(bitmap);
         if (!med->med_mcd)
@@ -157,30 +163,29 @@ int mds_client_free(struct obd_export *exp)
         if (!strcmp(med->med_mcd->mcd_uuid, "OBD_CLASS_UUID"))
                 GOTO(free_and_out, 0);
 
-        off = MDS_LR_CLIENT + (med->med_off * MDS_LR_SIZE);
-
-        CDEBUG(D_INFO, "freeing client at offset %u (%lld)with UUID '%s'\n",
-               med->med_off, off, med->med_mcd->mcd_uuid);
+        CDEBUG(D_INFO, "freeing client at index %u (%lld)with UUID '%s'\n",
+               med->med_idx, med->med_off, med->med_mcd->mcd_uuid);
 
-        if (!test_and_clear_bit(med->med_off, bitmap)) {
+        if (!test_and_clear_bit(med->med_idx, bitmap)) {
                 CERROR("MDS client %u: bit already clear in bitmap!!\n",
-                       med->med_off);
+                       med->med_idx);
                 LBUG();
         }
 
         memset(&zero_mcd, 0, sizeof zero_mcd);
         push_ctxt(&saved, &mds->mds_ctxt, NULL);
-        written = lustre_fwrite(mds->mds_rcvd_filp, (const char *)&zero_mcd,
-                                sizeof(zero_mcd), &off);
+        written = fsfilt_write_record(obd, mds->mds_rcvd_filp,
+                                      (char *)&zero_mcd, sizeof(zero_mcd),
+                                      &med->med_off);
         pop_ctxt(&saved, &mds->mds_ctxt, NULL);
 
         if (written != sizeof(zero_mcd)) {
-                CERROR("error zeroing out client %s off %d in %s: %d\n",
-                       med->med_mcd->mcd_uuid, med->med_off, LAST_RCVD,
+                CERROR("error zeroing out client %s index %d in %s: %d\n",
+                       med->med_mcd->mcd_uuid, med->med_idx, LAST_RCVD,
                        written);
         } else {
                 CDEBUG(D_INFO, "zeroed out disconnecting client %s at off %d\n",
-                       med->med_mcd->mcd_uuid, med->med_off);
+                       med->med_mcd->mcd_uuid, med->med_idx);
         }
 
  free_and_out:
@@ -199,20 +204,20 @@ static int mds_server_free_data(struct mds_obd *mds)
         return 0;
 }
 
-static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
+static int mds_read_last_rcvd(struct obd_device *obd, struct file *file)
 {
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
         struct mds_server_data *msd;
         struct mds_client_data *mcd = NULL;
         loff_t off = 0;
-        int cl_off;
-        unsigned long last_rcvd_size = f->f_dentry->d_inode->i_size;
+        int cl_idx;
+        unsigned long last_rcvd_size = file->f_dentry->d_inode->i_size;
         __u64 last_transno = 0;
-        __u64 last_mount;
+        __u64 mount_count;
         int rc = 0;
 
-        LASSERT(sizeof(struct mds_client_data) == MDS_LR_SIZE);
-        LASSERT(sizeof(struct mds_server_data) <= MDS_LR_CLIENT);
+        LASSERT(sizeof(struct mds_client_data) == MDS_LR_CLIENT_SIZE);
+        LASSERT(sizeof(struct mds_server_data) <= MDS_LR_SERVER_SIZE);
 
         OBD_ALLOC(msd, sizeof(*msd));
         if (!msd)
@@ -225,40 +230,71 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
                 RETURN(-ENOMEM);
         }
 
-        rc = lustre_fread(f, (char *)msd, sizeof(*msd), &off);
-
         mds->mds_server_data = msd;
-        if (rc == 0) {
-                CERROR("%s: empty MDS %s, new MDS?\n", obddev->obd_name,
-                       LAST_RCVD);
+
+        if (last_rcvd_size == 0) {
+                CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD);
+                memcpy(msd->msd_uuid, obd->obd_uuid.uuid,sizeof(msd->msd_uuid));
+                msd->msd_server_size = cpu_to_le32(MDS_LR_SERVER_SIZE);
+                msd->msd_client_start = cpu_to_le32(MDS_LR_CLIENT_START);
+                msd->msd_client_size = cpu_to_le16(MDS_LR_CLIENT_SIZE);
+
                 RETURN(0);
         }
 
+        rc = fsfilt_read_record(obd, file, (char *)msd, sizeof(*msd), &off);
+
         if (rc != sizeof(*msd)) {
-                CERROR("error reading MDS %s: rc = %d\n", LAST_RCVD, rc);
+                CERROR("error reading MDS %s: rc = %d\n", LAST_RCVD,rc);
                 if (rc > 0)
                         rc = -EIO;
                 GOTO(err_msd, rc);
         }
+        if (!msd->msd_server_size)
+                msd->msd_server_size = cpu_to_le32(MDS_LR_SERVER_SIZE);
+        if (!msd->msd_client_start)
+                msd->msd_client_start = cpu_to_le32(MDS_LR_CLIENT_START);
+        if (!msd->msd_client_size)
+                msd->msd_client_size = cpu_to_le16(MDS_LR_CLIENT_SIZE);
+
+        if (msd->msd_feature_incompat) {
+                CERROR("unsupported incompat feature %x\n",
+                       le32_to_cpu(msd->msd_feature_incompat));
+                GOTO(err_msd, rc = -EINVAL);
+        }
+        if (msd->msd_feature_rocompat) {
+                CERROR("unsupported read-only feature %x\n",
+                       le32_to_cpu(msd->msd_feature_rocompat));
+                /* Do something like remount filesystem read-only */
+                GOTO(err_msd, rc = -EINVAL);
+        }
 
-        CDEBUG(D_INODE, "last_rcvd has size %lu (msd + %lu clients)\n",
-               last_rcvd_size, (last_rcvd_size - MDS_LR_CLIENT)/MDS_LR_SIZE);
-
-        /*
-         * When we do a clean MDS shutdown, we save the last_transno into
-         * the header.
-         */
         last_transno = le64_to_cpu(msd->msd_last_transno);
         mds->mds_last_transno = last_transno;
-        CDEBUG(D_INODE, "got "LPU64" for server last_rcvd value\n",
-               last_transno);
-
-        last_mount = le64_to_cpu(msd->msd_mount_count);
-        mds->mds_mount_count = last_mount;
-        CDEBUG(D_INODE, "got "LPU64" for server last_mount value\n",last_mount);
 
-        /* off is adjusted by lustre_fread, so we don't adjust it in the loop */
-        for (off = MDS_LR_CLIENT, cl_off = 0; off < last_rcvd_size; cl_off++) {
+        mount_count = le64_to_cpu(msd->msd_mount_count);
+        mds->mds_mount_count = mount_count;
+
+        CDEBUG(D_INODE, "%s: server last_transno: "LPU64"\n",
+               obd->obd_name, last_transno);
+        CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n",
+               obd->obd_name, mount_count);
+        CDEBUG(D_INODE, "%s: server data size: %u\n",
+               obd->obd_name, le32_to_cpu(msd->msd_server_size));
+        CDEBUG(D_INODE, "%s: per-client data start: %u\n",
+               obd->obd_name, le32_to_cpu(msd->msd_client_start));
+        CDEBUG(D_INODE, "%s: per-client data size: %u\n",
+               obd->obd_name, le32_to_cpu(msd->msd_client_size));
+        CDEBUG(D_INODE, "%s: last_rcvd size: %lu\n",
+               obd->obd_name, last_rcvd_size);
+        CDEBUG(D_INODE, "%s: last_rcvd clients: %lu\n", obd->obd_name,
+               (last_rcvd_size - MDS_LR_CLIENT_START) / MDS_LR_CLIENT_SIZE);
+
+        /* When we do a clean FILTER shutdown, we save the last_transno into
+         * the header.  If we find clients with higher last_transno values
+         * then those clients may need recovery done. */
+        for (cl_idx = 0; off < last_rcvd_size; cl_idx++) {
+                __u64 last_transno;
                 int mount_age;
 
                 if (!mcd) {
@@ -267,10 +303,16 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
                                 GOTO(err_msd, rc = -ENOMEM);
                 }
 
-                rc = lustre_fread(f, (char *)mcd, sizeof(*mcd), &off);
+                /* Don't assume off is incremented properly, in case
+                 * sizeof(fsd) isn't the same as fsd->fsd_client_size.
+                 */
+                off = le32_to_cpu(msd->msd_client_start) +
+                        cl_idx * le16_to_cpu(msd->msd_client_size);
+                rc = fsfilt_read_record(obd, file, (char *)mcd,
+                                        sizeof(*mcd), &off);
                 if (rc != sizeof(*mcd)) {
                         CERROR("error reading MDS %s offset %d: rc = %d\n",
-                               LAST_RCVD, cl_off, rc);
+                               LAST_RCVD, cl_idx, rc);
                         if (rc > 0) /* XXX fatal error or just abort reading? */
                                 rc = -EIO;
                         break;
@@ -278,7 +320,7 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
 
                 if (mcd->mcd_uuid[0] == '\0') {
                         CDEBUG(D_INFO, "skipping zeroed client at offset %d\n",
-                               cl_off);
+                               cl_idx);
                         continue;
                 }
 
@@ -287,10 +329,15 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
                 /* These exports are cleaned up by mds_disconnect(), so they
                  * need to be set up like real exports as mds_connect() does.
                  */
-                mount_age = last_mount - le64_to_cpu(mcd->mcd_mount_count);
+                mount_age = mount_count - le64_to_cpu(mcd->mcd_mount_count);
                 if (mount_age < MDS_MOUNT_RECOV) {
-                        struct obd_export *exp = class_new_export(obddev);
+                        struct obd_export *exp = class_new_export(obd);
                         struct mds_export_data *med;
+                        CERROR("RCVRNG CLIENT uuid: %s off: %d lr: "LPU64
+                               "srv lr: "LPU64" mnt: "LPU64" last mount: "LPU64
+                               "\n", mcd->mcd_uuid, cl_idx,
+                               last_transno, le64_to_cpu(msd->msd_last_transno),
+                               le64_to_cpu(mcd->mcd_mount_count), mount_count);
 
                         if (!exp) {
                                 rc = -ENOMEM;
@@ -301,35 +348,35 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
                                sizeof exp->exp_client_uuid.uuid);
                         med = &exp->exp_mds_data;
                         med->med_mcd = mcd;
-                        mds_client_add(obddev, mds, med, cl_off);
+                        mds_client_add(obd, mds, med, cl_idx);
                         /* create helper if export init gets more complex */
                         INIT_LIST_HEAD(&med->med_open_head);
                         spin_lock_init(&med->med_open_lock);
 
                         mcd = NULL;
-                        obddev->obd_recoverable_clients++;
+                        obd->obd_recoverable_clients++;
                         class_export_put(exp);
                 } else {
                         CDEBUG(D_INFO, "discarded client %d, UUID '%s', count "
-                               LPU64"\n", cl_off, mcd->mcd_uuid,
+                               LPU64"\n", cl_idx, mcd->mcd_uuid,
                                le64_to_cpu(mcd->mcd_mount_count));
                 }
 
-                CDEBUG(D_OTHER, "client at offset %d has last_transno = %Lu\n",
-                       cl_off, (unsigned long long)last_transno);
+                CDEBUG(D_OTHER, "client at offset %d has last_transno = "
+                       LPU64"\n", cl_idx, last_transno);
 
                 if (last_transno > mds->mds_last_transno)
                         mds->mds_last_transno = last_transno;
         }
 
-        obddev->obd_last_committed = mds->mds_last_transno;
-        if (obddev->obd_recoverable_clients) {
+        obd->obd_last_committed = mds->mds_last_transno;
+        if (obd->obd_recoverable_clients) {
                 CERROR("RECOVERY: %d recoverable clients, last_transno "
                        LPU64"\n",
-                       obddev->obd_recoverable_clients, mds->mds_last_transno);
-                obddev->obd_next_recovery_transno = obddev->obd_last_committed
+                       obd->obd_recoverable_clients, mds->mds_last_transno);
+                obd->obd_next_recovery_transno = obd->obd_last_committed
                         + 1;
-                obddev->obd_recovering = 1;
+                obd->obd_recovering = 1;
         }
 
         if (mcd)
@@ -342,12 +389,12 @@ err_msd:
         return rc;
 }
 
-static int mds_fs_prep(struct obd_device *obddev)
+static int mds_fs_prep(struct obd_device *obd)
 {
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
         struct obd_run_ctxt saved;
         struct dentry *dentry;
-        struct file *f;
+        struct file *file;
         int rc;
 
         push_ctxt(&saved, &mds->mds_ctxt, NULL);
@@ -373,46 +420,76 @@ static int mds_fs_prep(struct obd_device *obddev)
         }
         mds->mds_fid_de = dentry;
 
-        f = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0644);
-        if (IS_ERR(f)) {
-                rc = PTR_ERR(f);
+        dentry = simple_mkdir(current->fs->pwd, "PENDING", 0777);
+        if (IS_ERR(dentry)) {
+                rc = PTR_ERR(dentry);
+                CERROR("cannot create PENDING directory: rc = %d\n", rc);
+                GOTO(err_fid, rc);
+        }
+        mds->mds_pending_dir = dentry;
+
+        dentry = simple_mkdir(current->fs->pwd, "LOGS", 0700);
+        if (IS_ERR(dentry)) {
+                rc = PTR_ERR(dentry);
+                CERROR("cannot create LOGS directory: rc = %d\n", rc);
+                GOTO(err_pending, rc);
+        }
+        mds->mds_logs_dir = dentry;
+
+        file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0644);
+        if (IS_ERR(file)) {
+                rc = PTR_ERR(file);
                 CERROR("cannot open/create %s file: rc = %d\n", LAST_RCVD, rc);
-                GOTO(err_pop, rc = PTR_ERR(f));
+
+                GOTO(err_logs, rc = PTR_ERR(file));
         }
-        if (!S_ISREG(f->f_dentry->d_inode->i_mode)) {
+        if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
                 CERROR("%s is not a regular file!: mode = %o\n", LAST_RCVD,
-                       f->f_dentry->d_inode->i_mode);
+                       file->f_dentry->d_inode->i_mode);
                 GOTO(err_filp, rc = -ENOENT);
         }
 
-        rc = fsfilt_journal_data(obddev, f);
+        rc = fsfilt_journal_data(obd, file);
         if (rc) {
                 CERROR("cannot journal data on %s: rc = %d\n", LAST_RCVD, rc);
                 GOTO(err_filp, rc);
         }
 
-        rc = mds_read_last_rcvd(obddev, f);
+        rc = mds_read_last_rcvd(obd, file);
         if (rc) {
                 CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
                 GOTO(err_client, rc);
         }
-        mds->mds_rcvd_filp = f;
+        mds->mds_rcvd_filp = file;
+#ifdef I_SKIP_PDFLUSH
+        /*
+         * we need this to protect from deadlock
+         * pdflush vs. lustre_fwrite()
+         */
+        file->f_dentry->d_inode->i_flags |= I_SKIP_PDFLUSH;
+#endif
 err_pop:
         pop_ctxt(&saved, &mds->mds_ctxt, NULL);
 
         return rc;
 
 err_client:
-        class_disconnect_exports(obddev, 0);
+        class_disconnect_exports(obd, 0);
 err_filp:
-        if (filp_close(f, 0))
+        if (filp_close(file, 0))
                 CERROR("can't close %s after error\n", LAST_RCVD);
+err_logs:
+        dput(mds->mds_logs_dir);
+err_pending:
+        dput(mds->mds_pending_dir);
+err_fid:
+        dput(mds->mds_fid_de);
         goto err_pop;
 }
 
-int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt)
+int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt)
 {
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
         ENTRY;
 
         mds->mds_vfsmnt = mnt;
@@ -421,21 +498,20 @@ int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt)
         mds->mds_ctxt.pwdmnt = mnt;
         mds->mds_ctxt.pwd = mnt->mnt_root;
         mds->mds_ctxt.fs = get_ds();
-        RETURN(mds_fs_prep(obddev));
+        RETURN(mds_fs_prep(obd));
 }
 
-int mds_fs_cleanup(struct obd_device *obddev, int failover)
+int mds_fs_cleanup(struct obd_device *obd, int flags)
 {
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
         struct obd_run_ctxt saved;
         int rc = 0;
 
-        if (failover)
+        if (flags & OBD_OPT_FAILOVER)
                 CERROR("%s: shutting down for failover; client state will"
-                       " be preserved.\n", obddev->obd_name);
+                       " be preserved.\n", obd->obd_name);
 
-        class_disconnect_exports(obddev, failover); /* this cleans up client
-                                                   info too */
+        class_disconnect_exports(obd, flags); /* cleans up client info too */
         mds_server_free_data(mds);
 
         push_ctxt(&saved, &mds->mds_ctxt, NULL);
@@ -443,7 +519,15 @@ int mds_fs_cleanup(struct obd_device *obddev, int failover)
                 rc = filp_close(mds->mds_rcvd_filp, 0);
                 mds->mds_rcvd_filp = NULL;
                 if (rc)
-                        CERROR("last_rcvd file won't close, rc=%d\n", rc);
+                        CERROR("%s file won't close, rc=%d\n", LAST_RCVD, rc);
+        }
+        if (mds->mds_logs_dir) {
+                l_dput(mds->mds_logs_dir);
+                mds->mds_logs_dir = NULL;
+        }
+        if (mds->mds_pending_dir) {
+                l_dput(mds->mds_pending_dir);
+                mds->mds_pending_dir = NULL;
         }
         pop_ctxt(&saved, &mds->mds_ctxt, NULL);
         shrink_dcache_parent(mds->mds_fid_de);
@@ -451,3 +535,233 @@ int mds_fs_cleanup(struct obd_device *obddev, int failover)
 
         return rc;
 }
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+int mds_log_close(struct llog_handle *cathandle, struct llog_handle *loghandle)
+{
+        struct llog_object_hdr *llh = loghandle->lgh_hdr;
+        struct mds_obd *mds = &cathandle->lgh_obd->u.mds;
+        struct dentry *dchild = NULL;
+        int rc;
+        ENTRY;
+
+        /* If we are going to delete this log, grab a ref before we close
+         * it so we don't have to immediately do another lookup.
+         */
+        if (llh->llh_hdr.lth_type != LLOG_CATALOG_MAGIC && llh->llh_count == 0){
+                CDEBUG(D_INODE, "deleting log file "LPX64":%x\n",
+                       loghandle->lgh_cookie.lgc_lgl.lgl_oid,
+                       loghandle->lgh_cookie.lgc_lgl.lgl_ogen);
+                down(&mds->mds_logs_dir->d_inode->i_sem);
+                dchild = dget(loghandle->lgh_file->f_dentry);
+                llog_delete_log(cathandle, loghandle);
+        } else {
+                CDEBUG(D_INODE, "closing log file "LPX64":%x\n",
+                       loghandle->lgh_cookie.lgc_lgl.lgl_oid,
+                       loghandle->lgh_cookie.lgc_lgl.lgl_ogen);
+        }
+
+        rc = filp_close(loghandle->lgh_file, 0);
+
+        llog_free_handle(loghandle); /* also removes loghandle from list */
+
+        if (dchild) {
+                int err = vfs_unlink(mds->mds_logs_dir->d_inode, dchild);
+                if (err) {
+                        CERROR("error unlinking empty log %*s: rc %d\n",
+                               dchild->d_name.len, dchild->d_name.name, err);
+                        if (!rc)
+                                rc = err;
+                }
+                l_dput(dchild);
+                up(&mds->mds_logs_dir->d_inode->i_sem);
+        }
+        RETURN(rc);
+}
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+struct llog_handle *mds_log_open(struct obd_device *obd,
+                                 struct llog_cookie *logcookie)
+{
+        struct ll_fid fid = { .id = logcookie->lgc_lgl.lgl_oid,
+                              .generation = logcookie->lgc_lgl.lgl_ogen,
+                              .f_type = S_IFREG };
+        struct llog_handle *loghandle;
+        struct dentry *dchild;
+        int rc;
+        ENTRY;
+
+        loghandle = llog_alloc_handle();
+        if (loghandle == NULL)
+                RETURN(ERR_PTR(-ENOMEM));
+
+        down(&obd->u.mds.mds_logs_dir->d_inode->i_sem);
+        dchild = mds_fid2dentry(&obd->u.mds, &fid, NULL);
+        up(&obd->u.mds.mds_logs_dir->d_inode->i_sem);
+        if (IS_ERR(dchild)) {
+                rc = PTR_ERR(dchild);
+                CERROR("error looking up log file "LPX64":%x: rc %d\n",
+                       fid.id, fid.generation, rc);
+                GOTO(out, rc);
+        }
+
+        if (dchild->d_inode == NULL) {
+                rc = -ENOENT;
+                CERROR("nonexistent log file "LPX64":%x: rc %d\n",
+                       fid.id, fid.generation, rc);
+                GOTO(out_put, rc);
+        }
+
+        /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
+        mntget(obd->u.mds.mds_vfsmnt);
+        loghandle->lgh_file = dentry_open(dchild, obd->u.mds.mds_vfsmnt,
+                                          O_RDWR | O_LARGEFILE);
+        if (IS_ERR(loghandle->lgh_file)) {
+                rc = PTR_ERR(loghandle->lgh_file);
+                CERROR("error opening logfile "LPX64":%x: rc %d\n",
+                       fid.id, fid.generation, rc);
+                GOTO(out, rc);
+        }
+        memcpy(&loghandle->lgh_cookie, logcookie, sizeof(*logcookie));
+        loghandle->lgh_log_create = mds_log_create;
+        loghandle->lgh_log_open = mds_log_open;
+        loghandle->lgh_log_close = mds_log_close;
+        loghandle->lgh_obd = obd;
+
+        RETURN(loghandle);
+
+out_put:
+        l_dput(dchild);
+out:
+        llog_free_handle(loghandle);
+        return ERR_PTR(rc);
+}
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+struct llog_handle *mds_log_create(struct obd_device *obd)
+{
+        char logbuf[24], *logname; /* logSSSSSSSSSS.count */
+        struct llog_handle *loghandle;
+        int rc, open_flags = O_RDWR | O_CREAT | O_LARGEFILE;
+        ENTRY;
+
+        loghandle = llog_alloc_handle();
+        if (!loghandle)
+                RETURN(ERR_PTR(-ENOMEM));
+
+retry:
+        if (!obd->u.mds.mds_catalog) {
+                logname = "LOGS/catalog";
+        } else {
+                sprintf(logbuf, "LOGS/log%lu.%u\n",
+                        CURRENT_SECONDS, obd->u.mds.mds_catalog->lgh_index++);
+                open_flags |= O_EXCL;
+                logname = logbuf;
+        }
+        loghandle->lgh_file = filp_open(logname, open_flags, 0644);
+        if (IS_ERR(loghandle->lgh_file)) {
+                rc = PTR_ERR(loghandle->lgh_file);
+                if (rc == -EEXIST) {
+                        CDEBUG(D_HA, "collision in logfile %s creation\n",
+                               logname);
+                        obd->u.mds.mds_catalog->lgh_index++;
+                        goto retry;
+                }
+                CERROR("error opening/creating %s: rc %d\n", logname, rc);
+                GOTO(out_handle, rc);
+        }
+
+        loghandle->lgh_cookie.lgc_lgl.lgl_oid =
+                loghandle->lgh_file->f_dentry->d_inode->i_ino;
+        loghandle->lgh_cookie.lgc_lgl.lgl_ogen =
+                loghandle->lgh_file->f_dentry->d_inode->i_generation;
+        loghandle->lgh_log_create = mds_log_create;
+        loghandle->lgh_log_open = mds_log_open;
+        loghandle->lgh_log_close = mds_log_close;
+        loghandle->lgh_obd = obd;
+
+        RETURN(loghandle);
+
+out_handle:
+        llog_free_handle(loghandle);
+        return ERR_PTR(rc);
+}
+
+struct llog_handle *mds_get_catalog(struct obd_device *obd)
+{
+        struct mds_server_data *msd = obd->u.mds.mds_server_data;
+        struct obd_run_ctxt saved;
+        struct llog_handle *cathandle = NULL;
+        int rc = 0;
+        ENTRY;
+
+        push_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+
+        if (msd->msd_catalog_oid) {
+                struct llog_cookie catcookie;
+
+                catcookie.lgc_lgl.lgl_oid = le64_to_cpu(msd->msd_catalog_oid);
+                catcookie.lgc_lgl.lgl_ogen = le32_to_cpu(msd->msd_catalog_ogen);
+                cathandle = mds_log_open(obd, &catcookie);
+                if (IS_ERR(cathandle)) {
+                        CERROR("error opening catalog "LPX64":%x: rc %d\n",
+                               catcookie.lgc_lgl.lgl_oid,
+                               catcookie.lgc_lgl.lgl_ogen,
+                               (int)PTR_ERR(cathandle));
+                        msd->msd_catalog_oid = 0;
+                        msd->msd_catalog_ogen = 0;
+                }
+                /* ORPHANS FIXME: compare catalog UUID to msd_peeruuid */
+        }
+
+        if (!msd->msd_catalog_oid) {
+                struct llog_logid *lgl;
+
+                cathandle = mds_log_create(obd);
+                if (IS_ERR(cathandle)) {
+                        CERROR("error creating new catalog: rc %d\n",
+                               (int)PTR_ERR(cathandle));
+                        GOTO(out, cathandle);
+                }
+                lgl = &cathandle->lgh_cookie.lgc_lgl;
+                msd->msd_catalog_oid = cpu_to_le64(lgl->lgl_oid);
+                msd->msd_catalog_ogen = cpu_to_le32(lgl->lgl_ogen);
+                rc = mds_update_server_data(obd);
+                if (rc) {
+                        CERROR("error writing new catalog to disk: rc %d\n",rc);
+                        GOTO(out_handle, rc);
+                }
+        }
+
+        rc = llog_init_catalog(cathandle, &obd->u.mds.mds_osc_uuid);
+
+out:
+        pop_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+        RETURN(cathandle);
+
+out_handle:
+        mds_log_close(cathandle, cathandle);
+        cathandle = ERR_PTR(rc);
+        goto out;
+
+}
+
+void mds_put_catalog(struct llog_handle *cathandle)
+{
+        struct llog_handle *loghandle, *n;
+        int rc;
+        ENTRY;
+
+        list_for_each_entry_safe(loghandle, n, &cathandle->lgh_list, lgh_list)
+                mds_log_close(cathandle, loghandle);
+
+        rc = filp_close(cathandle->lgh_file, 0);
+        if (rc)
+                CERROR("error closing catalog: rc %d\n", rc);
+
+        llog_free_handle(cathandle);
+        EXIT;
+}
index 0b62a92..c2d3d77 100644 (file)
@@ -1,9 +1,41 @@
+#ifndef _MDS_INTERNAL_H
+#define _MDS_INTERNAL_H
+static inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req)
+{
+        return &req->rq_export->exp_obd->u.mds;
+}
+
+/* mds/mds_fs.c */
+struct llog_handle *mds_log_create(struct obd_device *obd);
+int mds_log_close(struct llog_handle *cathandle, struct llog_handle *loghandle);
+struct llog_handle *mds_log_open(struct obd_device *obd,
+                                 struct llog_cookie *logcookie);
+struct llog_handle *mds_get_catalog(struct obd_device *obd);
+void mds_put_catalog(struct llog_handle *cathandle);
+
+/* mds/handler.c */
 struct mds_file_data *mds_mfd_new(void);
 void mds_mfd_put(struct mds_file_data *mfd);
 void mds_mfd_destroy(struct mds_file_data *mfd);
+
+/* mds/mds_reint.c */
+void mds_commit_cb(struct obd_device *, __u64 last_rcvd, void *data, int error);
+int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
+                       struct ptlrpc_request *req, int rc, __u32 op_data);
+
+/* mds/mds_lib.c */
 int mds_update_unpack(struct ptlrpc_request *, int offset,
                       struct mds_update_record *);
 
+/* mds/mds_lov.c */
+int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,
+                    struct obd_uuid *uuidarray);
+
+/* mds/mds_open.c */
+int mds_open(struct mds_update_record *rec, int offset,
+             struct ptlrpc_request *req, struct lustre_handle *);
+int mds_pin(struct ptlrpc_request *req);
+
 /* mds/mds_fs.c */
 int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
                   struct mds_export_data *med, int cl_off);
@@ -13,3 +45,5 @@ int mds_client_free(struct obd_export *exp);
 void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode);
 void mds_pack_inode2body(struct mds_body *body, struct inode *inode);
 #endif
+
+#endif /* _MDS_INTERNAL_H */
index 8f16795..93ac300 100644 (file)
@@ -57,17 +57,15 @@ void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode)
         fid->f_type = (S_IFMT & inode->i_mode);
 }
 
+/* Note that we can copy all of the fields, just some will not be "valid" */
 void mds_pack_inode2body(struct mds_body *b, struct inode *inode)
 {
-        b->valid = OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME |
-                OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
-                OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
-                OBD_MD_FLNLINK | OBD_MD_FLGENER;
+        b->valid = OBD_MD_FLID | OBD_MD_FLCTIME | OBD_MD_FLUID | OBD_MD_FLGID |
+                OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLGENER;
 
-        /* The MDS file size isn't authoritative for regular files, so don't
-         * even pretend. */
-        if (S_ISREG(inode->i_mode))
-                b->valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
+        if (!S_ISREG(inode->i_mode))
+                b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLATIME |
+                            OBD_MD_FLMTIME;
 
         b->ino = inode->i_ino;
         b->atime = LTIME_S(inode->i_atime);
@@ -80,10 +78,12 @@ void mds_pack_inode2body(struct mds_body *b, struct inode *inode)
         b->gid = inode->i_gid;
         b->flags = inode->i_flags;
         b->rdev = b->rdev;
-        b->nlink = inode->i_nlink;
+        /* Return the correct link count for orphan inodes */
+        b->nlink = mds_inode_is_orphan(inode) ? 0 : inode->i_nlink;
         b->generation = inode->i_generation;
         b->suppgid = -1;
 }
+
 /* unpacking */
 static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
                               struct mds_update_record *r)
@@ -92,8 +92,8 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
         struct mds_rec_setattr *rec;
         ENTRY;
 
-        rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
-                                  lustre_swab_mds_rec_setattr);
+        rec = lustre_swab_reqbuf(req, offset, sizeof(*rec),
+                                 lustre_swab_mds_rec_setattr);
         if (rec == NULL)
                 RETURN (-EFAULT);
 
@@ -120,9 +120,14 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
                 if (r->ur_eadata == NULL)
                         RETURN (-EFAULT);
                 r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 1];
-        } else {
-                r->ur_eadata = NULL;
-                r->ur_eadatalen = 0;
+        }
+
+        if (req->rq_reqmsg->bufcount > offset + 2) {
+                r->ur_logcookies = lustre_msg_buf(req->rq_reqmsg, offset + 2,0);
+                if (r->ur_eadata == NULL)
+                        RETURN (-EFAULT);
+
+                r->ur_cookielen = req->rq_reqmsg->buflens[offset + 2];
         }
 
         RETURN(0);
@@ -172,9 +177,6 @@ static int mds_create_unpack(struct ptlrpc_request *req, int offset,
                 if (r->ur_tgt == NULL)
                         RETURN (-EFAULT);
                 r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2];
-        } else {
-                r->ur_tgt = NULL;
-                r->ur_tgtlen = 0;
         }
         RETURN(0);
 }
index 02c53cc..ecca88c 100644 (file)
@@ -32,6 +32,9 @@
 #include <linux/obd_class.h>
 #include <linux/obd_lov.h>
 #include <linux/lustre_lib.h>
+#include <linux/lustre_fsfilt.h>
+
+#include "mds_internal.h"
 
 void le_lov_desc_to_cpu (struct lov_desc *ld)
 {
@@ -141,6 +144,7 @@ int mds_set_lovdesc(struct obd_device *obd, struct lov_desc *desc,
         mds->mds_has_lov_desc = 1;
         /* XXX the MDS should not really know about this */
         mds->mds_max_mdsize = lov_mds_md_size(desc->ld_tgt_count);
+        mds->mds_max_cookiesize = desc->ld_tgt_count*sizeof(struct llog_cookie);
 
 out:
         pop_ctxt(&saved, &mds->mds_ctxt, NULL);
@@ -182,7 +186,8 @@ out:
         return rc;
 }
 
-int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,struct obd_uuid *uuidarray)
+int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,
+                    struct obd_uuid *uuidarray)
 {
         struct obd_run_ctxt saved;
         struct file *f;
@@ -266,13 +271,13 @@ int mds_iocontrol(unsigned int cmd, struct lustre_handle *conn,
 
                 RETURN(rc);
 
-        case OBD_IOC_SET_READONLY:
+        case OBD_IOC_SET_READONLY: {
+                BDEVNAME_DECLARE_STORAGE(tmp);
                 CERROR("setting device %s read-only\n",
-                       ll_bdevname(obd->u.mds.mds_sb->s_dev));
-#ifdef CONFIG_DEV_RDONLY
+                       ll_bdevname(obd->u.mds.mds_sb->s_dev, tmp));
                 dev_set_rdonly(obd->u.mds.mds_sb->s_dev, 2);
-#endif
                 RETURN(0);
+        }
 
         case OBD_IOC_ABORT_RECOVERY:
                 CERROR("aborting recovery for device %s\n", obd->obd_name);
index 04d6ee9..2bd2f8c 100644 (file)
 
 #include "mds_internal.h"
 
-extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req);
-int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
-                       struct ptlrpc_request *req, int rc, __u32 op_data);
-extern int enqueue_ordered_locks(int lock_mode, struct obd_device *obd,
-                                 struct ldlm_res_id *p1_res_id,
-                                 struct ldlm_res_id *p2_res_id,
-                                 struct ldlm_res_id *c1_res_id,
-                                 struct ldlm_res_id *c2_res_id,
-                                 struct lustre_handle *p1_lockh,
-                                 struct lustre_handle *p2_lockh,
-                                 struct lustre_handle *c1_lockh,
-                                 struct lustre_handle *c2_lockh);
-
 struct mds_file_data *mds_dentry_open(struct dentry *dentry,
                                       struct vfsmount *mnt,
                                       int flags,
@@ -65,17 +52,16 @@ struct mds_file_data *mds_dentry_open(struct dentry *dentry,
 {
         struct mds_export_data *med = &req->rq_export->exp_mds_data;
         struct inode *inode;
-        int mode;
         struct mds_file_data *mfd;
-        int error;
+        int mode, error;
 
         mfd = mds_mfd_new();
-        if (!mfd) {
+        if (mfd == NULL) {
                 CERROR("mds: out of memory\n");
                 GOTO(cleanup_dentry, error = -ENOMEM);
         }
 
-        mode = (flags+1) & O_ACCMODE;
+        mode = (flags + 1) & O_ACCMODE;
         inode = dentry->d_inode;
 
         if (mode & FMODE_WRITE) {
@@ -107,6 +93,7 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
                       struct ptlrpc_request *req,
                       struct lustre_handle *child_lockh)
 {
+        struct ptlrpc_request *oldreq = req->rq_export->exp_outstanding_reply;
         struct mds_export_data *med = &req->rq_export->exp_mds_data;
         struct mds_client_data *mcd = med->med_mcd;
         struct mds_obd *mds = mds_req2mds(req);
@@ -115,7 +102,7 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
         struct dentry *parent, *child;
         struct ldlm_reply *rep;
         struct mds_body *body;
-        int disp, rc;
+        int rc;
         struct list_head *t;
         int put_child = 1;
         ENTRY;
@@ -127,14 +114,13 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
         /* copy rc, transno and disp; steal locks */
         req->rq_transno = mcd->mcd_last_transno;
         req->rq_status = mcd->mcd_last_result;
-        disp = rep->lock_policy_res1 = mcd->mcd_last_data;
+        intent_set_disposition(rep, mcd->mcd_last_data);
 
-        if (req->rq_export->exp_outstanding_reply)
+        if (oldreq)
                 mds_steal_ack_locks(req->rq_export, req);
 
-        /* We never care about these. */
-        disp &= ~(IT_OPEN_LOOKUP | IT_OPEN_POS | IT_OPEN_NEG);
-        if (!disp) {
+        /* Only replay if create or open actually happened. */
+        if (!intent_disposition(rep, DISP_OPEN_CREATE | DISP_OPEN_OPEN) ) {
                 EXIT;
                 return; /* error looking up parent or child */
         }
@@ -149,11 +135,11 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
                 GOTO(out_dput, 0); /* child not present to open */
         }
 
-        /* At this point, we know we have a child, which means that we'll send
-         * it back _unless_ it was open failed, _and_ we didn't create the file.
-         * I love you guys.  No, really.
+        /* At this point, we know we have a child. We'll send
+         * it back _unless_ it not created and open failed.
          */
-        if (((disp & (IT_OPEN_OPEN | IT_OPEN_CREATE)) == IT_OPEN_OPEN) &&
+        if (intent_disposition(rep, DISP_OPEN_OPEN) &&
+            !intent_disposition(rep, DISP_OPEN_CREATE) &&
             req->rq_status) {
                 GOTO(out_dput, 0);
         }
@@ -165,8 +151,14 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
         if (S_ISREG(child->d_inode->i_mode)) {
                 rc = mds_pack_md(obd, req->rq_repmsg, 2, body,
                                  child->d_inode);
+
                 if (rc)
                         LASSERT(rc == req->rq_status);
+
+                /* If we have LOV EA data, the OST holds size, mtime */
+                if (!(body->valid & OBD_MD_FLEASIZE))
+                        body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                        OBD_MD_FLATIME | OBD_MD_FLMTIME);
         } else {
                 /* XXX need to check this case */
         }
@@ -185,7 +177,7 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
         /* If we didn't get as far as trying to open, then some locking thing
          * probably went wrong, and we'll just bail here.
          */
-        if ((disp & IT_OPEN_OPEN) == 0)
+        if (!intent_disposition(rep, DISP_OPEN_OPEN))
                 GOTO(out_dput, 0);
 
         /* If we failed, then we must have failed opening, so don't look for
@@ -197,12 +189,12 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
         mfd = NULL;
         list_for_each(t, &med->med_open_head) {
                 mfd = list_entry(t, struct mds_file_data, mfd_list);
-                if (mfd->mfd_xid == req->rq_xid) 
+                if (mfd->mfd_xid == req->rq_xid)
                         break;
                 mfd = NULL;
         }
 
-        if (req->rq_export->exp_outstanding_reply) {
+        if (oldreq) {
                 /* if we're not recovering, it had better be found */
                 LASSERT(mfd);
         } else if (mfd == NULL) {
@@ -226,35 +218,180 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
         EXIT;
 }
 
+int mds_pin(struct ptlrpc_request *req)
+{
+        struct mds_obd *mds = mds_req2mds(req);
+        struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+        struct mds_file_data *mfd = NULL;
+        struct mds_body *body;
+        struct dentry *dchild;
+        struct obd_run_ctxt saved;
+        char fidname[LL_FID_NAMELEN];
+        int fidlen = 0, rc, cleanup_phase = 0, size = sizeof(*body);
+        ENTRY;
+
+        body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
+
+        down(&pending_dir->i_sem);
+        fidlen = ll_fid2str(fidname, body->fid1.id, body->fid1.generation);
+        dchild = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
+        if (IS_ERR(dchild)) {
+                up(&pending_dir->i_sem);
+                rc = PTR_ERR(dchild);
+                CERROR("error looking up %s in PENDING: rc = %d\n",
+                       fidname, rc);
+                RETURN(rc);
+        }
+
+        cleanup_phase = 2;
+
+        if (dchild->d_inode) {
+                up(&pending_dir->i_sem);
+                mds_inode_set_orphan(dchild->d_inode);
+                mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+                mds_pack_inode2body(body, dchild->d_inode);
+                GOTO(openit, rc = 0);
+        }
+        dput(dchild);
+        up(&pending_dir->i_sem);
+
+        /* We didn't find it in PENDING so it isn't an orphan.  See
+         * if it's a regular inode. */
+        dchild = mds_fid2dentry(mds, &body->fid1, NULL);
+        if (!IS_ERR(dchild)) {
+                mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+                mds_pack_inode2body(body, dchild->d_inode);
+                GOTO(openit, rc = 0);
+        }
+
+        /* We didn't find this inode on disk, but we're trying to pin it.
+         * This should never happen. */
+        CERROR("ENOENT during mds_pin for fid "LPU64"/%u\n", body->fid1.id,
+               body->fid1.generation);
+        RETURN(-ENOENT);
+
+ openit:
+        /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
+        mfd = mds_dentry_open(dchild, mds->mds_vfsmnt, body->flags, req);
+        if (IS_ERR(mfd)) {
+                dchild = NULL; /* prevent a double dput in cleanup phase 2 */
+                GOTO(cleanup, rc = PTR_ERR(mfd));
+        }
+
+        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc) {
+                CERROR("out of memoryK\n");
+                GOTO(cleanup, rc);
+        }
+        body = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*body));
+
+        cleanup_phase = 4; /* mfd allocated */
+        body->handle.cookie = mfd->mfd_handle.h_cookie;
+        CDEBUG(D_INODE, "mfd %p, cookie "LPX64"\n", mfd,
+               mfd->mfd_handle.h_cookie);
+        GOTO(cleanup, rc = 0);
+
+ cleanup:
+        push_ctxt(&saved, &mds->mds_ctxt, NULL);
+        rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, NULL,
+                                req, rc, 0);
+        pop_ctxt(&saved, &mds->mds_ctxt, NULL);
+        /* XXX what do we do here if mds_finish_transno itself failed? */
+        switch (cleanup_phase) {
+        case 4:
+                if (rc)
+                        mds_mfd_destroy(mfd);
+        case 2:
+                if (rc || S_ISLNK(dchild->d_inode->i_mode))
+                        l_dput(dchild);
+        }
+        return rc;
+}
+
 int mds_open(struct mds_update_record *rec, int offset,
              struct ptlrpc_request *req, struct lustre_handle *child_lockh)
 {
+        /* XXX ALLOCATE _something_ - 464 bytes on stack here */
         static const char acc_table [] = {[O_RDONLY] MAY_READ,
                                           [O_WRONLY] MAY_WRITE,
                                           [O_RDWR]   MAY_READ | MAY_WRITE};
         struct mds_obd *mds = mds_req2mds(req);
         struct obd_device *obd = req->rq_export->exp_obd;
-        struct ldlm_reply *rep;
-        struct mds_body *body;
-        struct dentry *dchild = NULL, *parent;
+        struct ldlm_reply *rep = NULL;
+        struct mds_body *body = NULL;
+        struct dentry *dchild = NULL, *parent = NULL;
         struct mds_export_data *med;
         struct mds_file_data *mfd = NULL;
         struct ldlm_res_id child_res_id = { .name = {0} };
         struct lustre_handle parent_lockh;
         int rc = 0, parent_mode, child_mode = LCK_PR, lock_flags, created = 0;
-        int cleanup_phase = 0;
+        int cleanup_phase = 0, acc_mode;
         void *handle = NULL;
-        int acc_mode;
         ENTRY;
 
-        LASSERT(offset == 2);                  /* only called via intent */
-        rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
-        body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
+        if (offset == 2) { /* intent */
+                rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
+                body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
+        } else if (offset == 0) { /* non-intent reint */
+                body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
+        } else {
+                body = NULL;
+                LBUG();
+        }
 
         MDS_CHECK_RESENT(req, reconstruct_open(rec, offset, req, child_lockh));
 
+        /* Step 0: If we are passed a fid, then we assume the client already
+         * opened this file and is only replaying the RPC, so we open the
+         * inode by fid (at some large expense in security).
+         */
+        if (rec->ur_fid2->id) {
+                struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+                char fidname[LL_FID_NAMELEN];
+                int fidlen = 0;
+
+                down(&pending_dir->i_sem);
+                fidlen = ll_fid2str(fidname, rec->ur_fid2->id,
+                                    rec->ur_fid2->generation);
+                dchild = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
+                if (IS_ERR(dchild)) {
+                        up(&pending_dir->i_sem);
+                        rc = PTR_ERR(dchild);
+                        CERROR("error looking up %s in PENDING: rc = %d\n",
+                               fidname, rc);
+                        RETURN(rc);
+                }
+
+                if (dchild->d_inode) {
+                        up(&pending_dir->i_sem);
+                        mds_inode_set_orphan(dchild->d_inode);
+                        mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+                        mds_pack_inode2body(body, dchild->d_inode);
+                        cleanup_phase = 2;
+                        GOTO(openit, rc = 0);
+                }
+                dput(dchild);
+                up(&pending_dir->i_sem);
+
+                /* We didn't find it in PENDING so it isn't an orphan.  See
+                 * if it was a regular inode that was previously created.
+                 */
+                dchild = mds_fid2dentry(mds, rec->ur_fid2, NULL);
+                if (!IS_ERR(dchild)) {
+                        mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+                        mds_pack_inode2body(body, dchild->d_inode);
+                        cleanup_phase = 2;
+                        GOTO(openit, rc = 0);
+                }
+
+                /* We didn't find the correct inode on disk either, so we
+                 * need to re-create it via a regular replay.  Do that below.
+                 */
+                LASSERT(rec->ur_flags & O_CREAT);
+        }
+        LASSERT(offset == 2); /* If we got here, we must be called via intent */
+
         med = &req->rq_export->exp_mds_data;
-        rep->lock_policy_res1 |= IT_OPEN_LOOKUP;
         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_PACK)) {
                 CERROR("test case OBD_FAIL_MDS_OPEN_PACK\n");
                 req->rq_status = -ENOMEM;
@@ -263,11 +400,12 @@ int mds_open(struct mds_update_record *rec, int offset,
 
         if ((rec->ur_flags & O_ACCMODE) >= sizeof (acc_table))
                 RETURN(-EINVAL);
-        acc_mode = acc_table [rec->ur_flags & O_ACCMODE];
+        acc_mode = acc_table[rec->ur_flags & O_ACCMODE];
         if ((rec->ur_flags & O_TRUNC) != 0)
                 acc_mode |= MAY_WRITE;
 
         /* Step 1: Find and lock the parent */
+        intent_set_disposition(rep, DISP_LOOKUP_EXECD);
         parent_mode = (rec->ur_flags & O_CREAT) ? LCK_PW : LCK_PR;
         parent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, parent_mode,
                                        &parent_lockh);
@@ -288,38 +426,88 @@ int mds_open(struct mds_update_record *rec, int offset,
         cleanup_phase = 2; /* child dentry */
 
         if (dchild->d_inode)
-                rep->lock_policy_res1 |= IT_OPEN_POS;
+                intent_set_disposition(rep, DISP_LOOKUP_POS);
         else
-                rep->lock_policy_res1 |= IT_OPEN_NEG;
+                intent_set_disposition(rep, DISP_LOOKUP_NEG);
 
         /* Step 3: If the child was negative, and we're supposed to,
          * create it. */
         if (!dchild->d_inode) {
+                unsigned long ino = rec->ur_fid2->id;
+
                 if (!(rec->ur_flags & O_CREAT)) {
                         /* It's negative and we weren't supposed to create it */
                         GOTO(cleanup, rc = -ENOENT);
                 }
 
-                rep->lock_policy_res1 |= IT_OPEN_CREATE;
-                handle = fsfilt_start(obd, parent->d_inode, FSFILT_OP_CREATE);
+                intent_set_disposition(rep, DISP_OPEN_CREATE);
+                handle = fsfilt_start(obd, parent->d_inode, FSFILT_OP_CREATE,
+                                      NULL);
                 if (IS_ERR(handle)) {
                         rc = PTR_ERR(handle);
                         handle = NULL;
                         GOTO(cleanup, rc);
                 }
+                if (ino)
+                        dchild->d_fsdata = (void *)(unsigned long)ino;
+
                 rc = vfs_create(parent->d_inode, dchild, rec->ur_mode);
-                if (rc)
+                if (dchild->d_fsdata == (void *)(unsigned long)ino)
+                        dchild->d_fsdata = NULL;
+
+                if (rc) {
+                        CDEBUG(D_INODE, "error during create: %d\n", rc);
                         GOTO(cleanup, rc);
-                created = 1;
+                } else {
+                        struct iattr iattr;
+                        struct inode *inode = dchild->d_inode;
+
+                        if (ino) {
+                                LASSERT(ino == inode->i_ino);
+                                /* Written as part of setattr */
+                                inode->i_generation = rec->ur_fid2->generation;
+                                CDEBUG(D_HA, "recreated ino %lu with gen %x\n",
+                                       inode->i_ino, inode->i_generation);
+                        }
+
+                        created = 1;
+                        LTIME_S(iattr.ia_atime) = rec->ur_time;
+                        LTIME_S(iattr.ia_ctime) = rec->ur_time;
+                        LTIME_S(iattr.ia_mtime) = rec->ur_time;
+
+                        iattr.ia_uid = rec->ur_uid;
+                        if (parent->d_inode->i_mode & S_ISGID) {
+                                iattr.ia_gid = parent->d_inode->i_gid;
+                        } else
+                                iattr.ia_gid = rec->ur_gid;
+
+                        iattr.ia_valid = ATTR_UID | ATTR_GID | ATTR_ATIME |
+                                ATTR_MTIME | ATTR_CTIME;
+
+                        rc = fsfilt_setattr(obd, dchild, handle, &iattr, 0);
+                        if (rc) {
+                                CERROR("error on setattr: rc = %d\n", rc);
+                                /* XXX should we abort here in case of error? */
+                        }
+                }
+
                 child_mode = LCK_PW;
                 acc_mode = 0;                  /* Don't check for permissions */
         }
 
+        LASSERT(!mds_inode_is_orphan(dchild->d_inode));
+
         /* Step 4: It's positive, so lock the child */
         child_res_id.name[0] = dchild->d_inode->i_ino;
         child_res_id.name[1] = dchild->d_inode->i_generation;
  reacquire:
         lock_flags = 0;
+        /* For the open(O_CREAT) case, this would technically be a lock
+         * inversion (getting a VFS lock after starting a transaction),
+         * but in that case we cannot possibly block on this lock because
+         * we just created the child and also hold a write lock on the
+         * parent, so nobody could be holding the lock yet.
+         */
         rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
                               child_res_id, LDLM_PLAIN, NULL, 0, child_mode,
                               &lock_flags, ldlm_completion_ast,
@@ -346,15 +534,19 @@ int mds_open(struct mds_update_record *rec, int offset,
 
                 /* An append-only file must be opened in append mode for
                  * writing */
-                if (IS_APPEND(dchild->d_inode) &&
-                    (acc_mode & MAY_WRITE) != 0 &&
+                if (IS_APPEND(dchild->d_inode) && (acc_mode & MAY_WRITE) != 0 &&
                     ((rec->ur_flags & O_APPEND) == 0 ||
                      (rec->ur_flags & O_TRUNC) != 0))
-                        GOTO (cleanup, rc = -EPERM);
+                        GOTO(cleanup, rc = -EPERM);
 
                 rc = mds_pack_md(obd, req->rq_repmsg, 2, body, dchild->d_inode);
                 if (rc)
                         GOTO(cleanup, rc);
+
+                /* If we have LOV EA data, the OST holds size, mtime */
+                if (!(body->valid & OBD_MD_FLEASIZE))
+                        body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                        OBD_MD_FLATIME | OBD_MD_FLMTIME);
         }
 
         if (!created && (rec->ur_flags & O_CREAT) &&
@@ -364,9 +556,9 @@ int mds_open(struct mds_update_record *rec, int offset,
                 GOTO(cleanup, rc = -EEXIST); // returns a lock to the client
         }
 
-        /* If we're opening a file without an EA, the client needs a write
-         * lock. */
-        if (S_ISREG(dchild->d_inode->i_mode) &&
+        /* If we're opening a file without an EA for write, the client needs
+         * a write lock. */
+        if (S_ISREG(dchild->d_inode->i_mode) && (rec->ur_flags & O_ACCMODE) &&
             child_mode != LCK_PW && !(body->valid & OBD_MD_FLEASIZE)) {
                 ldlm_lock_decref(child_lockh, child_mode);
                 child_mode = LCK_PW;
@@ -381,15 +573,14 @@ int mds_open(struct mds_update_record *rec, int offset,
                 GOTO(cleanup, rc = -ENOTDIR);
 
         /* Step 5: mds_open it */
-        rep->lock_policy_res1 |= IT_OPEN_OPEN;
-
+        intent_set_disposition(rep, DISP_OPEN_OPEN);
+ openit:
         /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
         mfd = mds_dentry_open(dchild, mds->mds_vfsmnt,
                               rec->ur_flags & ~(O_DIRECT | O_TRUNC), req);
-        if (!mfd) {
-                CERROR("mds: out of memory\n");
-                dchild = NULL; /* prevent a double dput in step 2 */
-                GOTO(cleanup, rc = -ENOMEM);
+        if (IS_ERR(mfd)) {
+                dchild = NULL; /* prevent a double dput in cleanup phase 2 */
+                GOTO(cleanup, rc = PTR_ERR(mfd));
         }
 
         cleanup_phase = 4; /* mfd allocated */
@@ -401,6 +592,7 @@ int mds_open(struct mds_update_record *rec, int offset,
  cleanup:
         rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle,
                                 req, rc, rep->lock_policy_res1);
+        /* XXX what do we do here if mds_finish_transno itself failed? */
         switch (cleanup_phase) {
         case 4:
                 if (rc && !S_ISLNK(dchild->d_inode->i_mode))
@@ -410,19 +602,22 @@ int mds_open(struct mds_update_record *rec, int offset,
                  * ldlm_intent_policy: if we found the dentry, or we tried to
                  * open it (meaning that we created, if it wasn't found), then
                  * we return the lock to the caller and client. */
-                if (!(rep->lock_policy_res1 & (IT_OPEN_OPEN | IT_OPEN_POS)))
+                if (intent_disposition(rep, DISP_LOOKUP_NEG) &&
+                    !intent_disposition(rep, DISP_OPEN_OPEN))
                         ldlm_lock_decref(child_lockh, child_mode);
         case 2:
                 if (rc || S_ISLNK(dchild->d_inode->i_mode))
                         l_dput(dchild);
         case 1:
-                l_dput(parent);
-                if (rc) {
-                        ldlm_lock_decref(&parent_lockh, parent_mode);
-                } else {
-                        memcpy(&req->rq_ack_locks[0].lock, &parent_lockh,
-                               sizeof(parent_lockh));
-                        req->rq_ack_locks[0].mode = parent_mode;
+                if (parent) {
+                        l_dput(parent);
+                        if (rc) {
+                                ldlm_lock_decref(&parent_lockh, parent_mode);
+                        } else {
+                                memcpy(&req->rq_ack_locks[0].lock,&parent_lockh,
+                                       sizeof(parent_lockh));
+                                req->rq_ack_locks[0].mode = parent_mode;
+                        }
                 }
         }
         RETURN(rc);
index 50949dd..61871d7 100644 (file)
 #include <linux/lustre_mds.h>
 #include <linux/lustre_dlm.h>
 #include <linux/lustre_fsfilt.h>
+
 #include "mds_internal.h"
 
-extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req);
+void mds_commit_cb(struct obd_device *obd, __u64 transno, void *data,
+                   int error)
+{
+        obd_transno_commit_cb(obd, transno, error);
+}
+
+struct mds_logcancel_data {
+        struct lov_mds_md      *mlcd_lmm;
+        int                     mlcd_size;
+        int                     mlcd_cookielen;
+        int                     mlcd_eadatalen;
+        struct llog_cookie      mlcd_cookies[0];
+};
+
+/* Establish a connection to the OSC when we first need it.  We don't do
+ * this during MDS setup because that would introduce setup ordering issues. */
+static int mds_osc_connect(struct obd_device *obd, struct mds_obd *mds)
+{
+        int rc;
+        ENTRY;
+
+        if (IS_ERR(mds->mds_osc_obd))
+                RETURN(PTR_ERR(mds->mds_osc_obd));
+
+        if (mds->mds_osc_obd)
+                RETURN(0);
+
+        mds->mds_osc_obd = class_uuid2obd(&mds->mds_osc_uuid);
+        if (!mds->mds_osc_obd) {
+                CERROR("MDS cannot locate OSC/LOV %s - no logging!\n",
+                       mds->mds_osc_uuid.uuid);
+                mds->mds_osc_obd = ERR_PTR(-ENOTCONN);
+                RETURN(-ENOTCONN);
+        }
+
+        rc = obd_connect(&mds->mds_osc_conn, mds->mds_osc_obd, &obd->obd_uuid);
+        if (rc) {
+                CERROR("MDS cannot locate OSC/LOV %s - no logging!\n",
+                       mds->mds_osc_uuid.uuid);
+                mds->mds_osc_obd = ERR_PTR(rc);
+                RETURN(rc);
+        }
+
+        rc = obd_set_info(&mds->mds_osc_conn, strlen("mds_conn"), "mds_conn",
+                          0, NULL);
+        RETURN(rc);
+}
 
-static void mds_commit_cb(struct obd_device *obd, __u64 transno, int error)
+static void mds_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
+                                  void *cb_data, int error)
 {
+        struct mds_logcancel_data *mlcd = cb_data;
+        struct lov_stripe_md *lsm = NULL;
+        int rc;
+
         obd_transno_commit_cb(obd, transno, error);
+
+        CDEBUG(D_HA, "cancelling %d cookies\n",
+               (int)(mlcd->mlcd_cookielen / sizeof(*mlcd->mlcd_cookies)));
+
+        rc = obd_unpackmd(&obd->u.mds.mds_osc_conn, &lsm, mlcd->mlcd_lmm,
+                          mlcd->mlcd_eadatalen);
+        if (rc < 0) {
+                CERROR("bad LSM cancelling %d log cookies: rc %d\n",
+                       (int)(mlcd->mlcd_cookielen/sizeof(*mlcd->mlcd_cookies)),
+                       rc);
+        } else {
+                rc = obd_log_cancel(&obd->u.mds.mds_osc_conn, lsm,
+                                    mlcd->mlcd_cookielen /
+                                    sizeof(*mlcd->mlcd_cookies),
+                                    mlcd->mlcd_cookies, OBD_LLOG_FL_SENDNOW);
+                ///* XXX 0 normally, SENDNOW for debug */);
+                if (rc)
+                        CERROR("error cancelling %d log cookies: rc %d\n",
+                               (int)(mlcd->mlcd_cookielen /
+                                     sizeof(*mlcd->mlcd_cookies)), rc);
+        }
+
+        OBD_FREE(mlcd, mlcd->mlcd_size);
 }
 
 /* Assumes caller has already pushed us into the kernel context. */
-int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
-                       struct ptlrpc_request *req, int rc,
-                       __u32 op_data)
+int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
+                       struct ptlrpc_request *req, int rc, __u32 op_data)
 {
         struct mds_export_data *med = &req->rq_export->exp_mds_data;
         struct mds_client_data *mcd = med->med_mcd;
@@ -70,15 +144,15 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
 
         if (!handle) {
                 /* if we're starting our own xaction, use our own inode */
-                i = mds->mds_rcvd_filp->f_dentry->d_inode;
-                handle = fsfilt_start(obd, i, FSFILT_OP_SETATTR);
+                inode = mds->mds_rcvd_filp->f_dentry->d_inode;
+                handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL);
                 if (IS_ERR(handle)) {
                         CERROR("fsfilt_start: %ld\n", PTR_ERR(handle));
                         GOTO(out, rc = PTR_ERR(handle));
                 }
         }
 
-        off = MDS_LR_CLIENT + med->med_off * MDS_LR_SIZE;
+        off = med->med_off;
 
         transno = req->rq_reqmsg->transno;
         if (transno == 0) {
@@ -94,10 +168,11 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
         mcd->mcd_last_data = cpu_to_le32(op_data);
 
         fsfilt_set_last_rcvd(req->rq_export->exp_obd, transno, handle,
-                             mds_commit_cb);
-        written = lustre_fwrite(mds->mds_rcvd_filp, mcd, sizeof(*mcd), &off);
-        CDEBUG(D_INODE, "wrote trans "LPU64" client %s at #%u: written = "
-               LPSZ"\n", transno, mcd->mcd_uuid, med->med_off, written);
+                             mds_commit_cb, NULL);
+        written = fsfilt_write_record(obd, mds->mds_rcvd_filp,
+                                      (char *)mcd, sizeof(*mcd), &off);
+        CDEBUG(D_INODE, "wrote trans "LPU64" client %s at idx %u: written = "
+               LPSZ"\n", transno, mcd->mcd_uuid, med->med_idx, written);
 
         if (written != sizeof(*mcd)) {
                 CERROR("error writing to last_rcvd: rc = "LPSZ"\n", written);
@@ -110,7 +185,7 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
         }
 
 commit:
-        err = fsfilt_commit(obd, i, handle, 0);
+        err = fsfilt_commit(obd, inode, handle, 0);
         if (err) {
                 CERROR("error committing transaction: %d\n", err);
                 if (!rc)
@@ -139,22 +214,29 @@ int mds_fix_attr(struct inode *inode, struct mds_update_record *rec)
         if (!(ia_valid & ATTR_RAW))
                 RETURN(0);
 
-        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                RETURN(-EPERM);
-
-        LTIME_S(attr->ia_ctime) = now;
+        if (!(ia_valid & ATTR_CTIME_SET))
+                LTIME_S(attr->ia_ctime) = now;
         if (!(ia_valid & ATTR_ATIME_SET))
                 LTIME_S(attr->ia_atime) = now;
         if (!(ia_valid & ATTR_MTIME_SET))
                 LTIME_S(attr->ia_mtime) = now;
 
+        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+                RETURN(-EPERM);
+
         /* times */
-        if ((ia_valid & (ATTR_MTIME|ATTR_ATIME))==(ATTR_MTIME|ATTR_ATIME) &&
-             !(ia_valid & ATTR_ATIME_SET)) {
+        if ((ia_valid & (ATTR_MTIME|ATTR_ATIME))==(ATTR_MTIME|ATTR_ATIME)) {
                 if (rec->ur_fsuid != inode->i_uid &&
                     (error = permission(inode,MAY_WRITE)) != 0)
                         RETURN(error);
-        } else if (ia_valid & ATTR_UID) {
+        }
+
+        if (ia_valid & ATTR_SIZE) {
+                if ((error = permission(inode,MAY_WRITE)) != 0)
+                        RETURN(error);
+        }
+
+        if (ia_valid & ATTR_UID) {
                 /* chown */
                 error = -EPERM;
                 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
@@ -164,7 +246,6 @@ int mds_fix_attr(struct inode *inode, struct mds_update_record *rec)
                 if (attr->ia_gid == (gid_t) -1)
                         attr->ia_gid = inode->i_gid;
                 attr->ia_mode = inode->i_mode;
-                attr->ia_valid =  ATTR_UID | ATTR_GID | ATTR_CTIME;
                 /*
                  * If the user or group of a non-directory has been
                  * changed by a non-root user, remove the setuid bit.
@@ -232,6 +313,14 @@ static void reconstruct_reint_setattr(struct mds_update_record *rec,
         mds_pack_inode2fid(&body->fid1, de->d_inode);
         mds_pack_inode2body(body, de->d_inode);
 
+        /* Don't return OST-specific attributes if we didn't just set them */
+        if (rec->ur_iattr.ia_valid & ATTR_SIZE)
+                body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+        if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_MTIME_SET))
+                body->valid |= OBD_MD_FLMTIME;
+        if (rec->ur_iattr.ia_valid & (ATTR_ATIME | ATTR_ATIME_SET))
+                body->valid |= OBD_MD_FLATIME;
+
         l_dput(de);
 }
 
@@ -251,6 +340,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
         struct inode *inode = NULL;
         struct lustre_handle lockh;
         void *handle = NULL;
+        struct mds_logcancel_data *mlcd = NULL;
         int rc = 0, cleanup_phase = 0, err, locked = 0;
         ENTRY;
 
@@ -279,21 +369,28 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE,
                        to_kdev_t(inode->i_sb->s_dev));
 
-        handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR);
+#ifdef ENABLE_ORPHANS
+        if (unlikely(mds->mds_osc_obd == NULL))
+                mds_osc_connect(obd, mds);
+#endif
+
+        handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL);
         if (IS_ERR(handle)) {
                 rc = PTR_ERR(handle);
                 handle = NULL;
                 GOTO(cleanup, rc);
         }
 
+        if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_CTIME))
+                CDEBUG(D_INODE, "setting mtime %lu, ctime %lu\n",
+                       LTIME_S(rec->ur_iattr.ia_mtime),
+                       LTIME_S(rec->ur_iattr.ia_ctime));
         rc = mds_fix_attr(inode, rec);
         if (rc)
                 GOTO(cleanup, rc);
 
         rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr, 0);
-        if (rc == 0 &&
-            S_ISREG(inode->i_mode) &&
-            rec->ur_eadata != NULL) {
+        if (rc == 0 && S_ISREG(inode->i_mode) && rec->ur_eadata != NULL) {
                 rc = fsfilt_set_md(obd, inode, handle,
                                    rec->ur_eadata, rec->ur_eadatalen);
         }
@@ -302,10 +399,39 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
         mds_pack_inode2fid(&body->fid1, inode);
         mds_pack_inode2body(body, inode);
 
+        /* Don't return OST-specific attributes if we didn't just set them */
+        if (rec->ur_iattr.ia_valid & ATTR_SIZE)
+                body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+        if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_MTIME_SET))
+                body->valid |= OBD_MD_FLMTIME;
+        if (rec->ur_iattr.ia_valid & (ATTR_ATIME | ATTR_ATIME_SET))
+                body->valid |= OBD_MD_FLATIME;
+
+        if (rc == 0 && rec->ur_cookielen && !IS_ERR(mds->mds_osc_obd)) {
+                OBD_ALLOC(mlcd, sizeof(*mlcd) + rec->ur_cookielen +
+                          rec->ur_eadatalen);
+                if (mlcd) {
+                        mlcd->mlcd_size = sizeof(*mlcd) + rec->ur_cookielen +
+                                rec->ur_eadatalen;
+                        mlcd->mlcd_eadatalen = rec->ur_eadatalen;
+                        mlcd->mlcd_cookielen = rec->ur_cookielen;
+                        mlcd->mlcd_lmm = (void *)&mlcd->mlcd_cookies +
+                                mlcd->mlcd_cookielen;
+                        memcpy(&mlcd->mlcd_cookies, rec->ur_logcookies,
+                               mlcd->mlcd_cookielen);
+                        memcpy(mlcd->mlcd_lmm, rec->ur_eadata,
+                               mlcd->mlcd_eadatalen);
+                } else {
+                        CERROR("unable to allocate log cancel data\n");
+                }
+        }
         EXIT;
  cleanup:
+        if (mlcd != NULL)
+                fsfilt_set_last_rcvd(req->rq_export->exp_obd, 0, handle,
+                                     mds_cancel_cookies_cb, mlcd);
         err = mds_finish_transno(mds, inode, handle, req, rc, 0);
-        switch(cleanup_phase) {
+        switch (cleanup_phase) {
         case 1:
                 l_dput(de);
                 if (locked) {
@@ -418,7 +544,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
 
         switch (type) {
         case S_IFREG:{
-                handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE, NULL);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 rc = vfs_create(dir, dchild, rec->ur_mode);
@@ -426,7 +552,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                 break;
         }
         case S_IFDIR:{
-                handle = fsfilt_start(obd, dir, FSFILT_OP_MKDIR);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_MKDIR, NULL);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 rc = vfs_mkdir(dir, dchild, rec->ur_mode);
@@ -434,7 +560,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                 break;
         }
         case S_IFLNK:{
-                handle = fsfilt_start(obd, dir, FSFILT_OP_SYMLINK);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_SYMLINK, NULL);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 if (rec->ur_tgt == NULL)        /* no target supplied */
@@ -449,7 +575,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
         case S_IFIFO:
         case S_IFSOCK:{
                 int rdev = rec->ur_rdev;
-                handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD, NULL);
                 if (IS_ERR(handle))
                         GOTO(cleanup, (handle = NULL, rc = PTR_ERR(handle)));
                 rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev);
@@ -458,13 +584,13 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
         }
         default:
                 CERROR("bad file type %o creating %s\n", type, rec->ur_name);
+                dchild->d_fsdata = NULL;
                 GOTO(cleanup, rc = -EINVAL);
         }
 
-        /* In case we stored the desired inum in here, we want to clean up.
-         * We also do this in the cleanup block, for the error cases.
-         */
-        dchild->d_fsdata = NULL;
+        /* In case we stored the desired inum in here, we want to clean up. */
+        if (dchild->d_fsdata == (void *)(unsigned long)rec->ur_fid2->id)
+                dchild->d_fsdata = NULL;
 
         if (rc) {
                 CDEBUG(D_INODE, "error during create: %d\n", rc);
@@ -532,7 +658,6 @@ cleanup:
         }
         switch (cleanup_phase) {
         case 2: /* child dentry */
-                dchild->d_fsdata = NULL;
                 l_dput(dchild);
         case 1: /* locked parent dentry */
                 if (rc) {
@@ -634,43 +759,134 @@ static void reconstruct_reint_unlink(struct mds_update_record *rec, int offset,
                   "can't get EA for reconstructed unlink, leaking OST inodes");
 }
 
+/* If we are unlinking an open file/dir (i.e. creating an orphan) then
+ * we instead link the inode into the PENDING directory until it is
+ * finally released.  We can't simply call mds_reint_rename() or some
+ * part thereof, because we don't have the inode to check for link
+ * count/open status until after it is locked.
+ *
+ * For lock ordering, we always get the PENDING, then pending_child lock
+ * last to avoid deadlocks.
+ */
+static int mds_unlink_orphan(struct mds_update_record *rec,
+                             struct obd_device *obd, struct dentry *dparent,
+                             struct dentry *dchild, void **handle)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+        struct dentry *pending_child;
+        char fidname[LL_FID_NAMELEN];
+        int fidlen = 0, rc;
+        ENTRY;
+
+        LASSERT(!mds_inode_is_orphan(dchild->d_inode));
+
+        down(&pending_dir->i_sem);
+        fidlen = ll_fid2str(fidname, dchild->d_inode->i_ino,
+                            dchild->d_inode->i_generation);
+
+        CDEBUG(D_ERROR, "pending destroy of %dx open file %s = %s\n",
+               mds_open_orphan_count(dchild->d_inode),
+               rec->ur_name, fidname);
+
+        pending_child = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
+        if (IS_ERR(pending_child))
+                GOTO(out_lock, rc = PTR_ERR(pending_child));
+
+        if (pending_child->d_inode != NULL) {
+                CERROR("re-destroying orphan file %s?\n", rec->ur_name);
+                LASSERT(pending_child->d_inode == dchild->d_inode);
+                GOTO(out_dput, rc = 0);
+        }
+
+        *handle = fsfilt_start(obd, pending_dir, FSFILT_OP_RENAME, NULL);
+        if (IS_ERR(*handle))
+                GOTO(out_dput, rc = PTR_ERR(*handle));
+
+        rc = vfs_rename(dparent->d_inode, dchild, pending_dir, pending_child);
+        if (rc)
+                CERROR("error renaming orphan %lu/%s to PENDING: rc = %d\n",
+                       dparent->d_inode->i_ino, rec->ur_name, rc);
+        else
+                mds_inode_set_orphan(dchild->d_inode);
+out_dput:
+        dput(pending_child);
+out_lock:
+        up(&pending_dir->i_sem);
+        RETURN(rc);
+}
+
+static int mds_log_op_unlink(struct obd_device *obd, struct mds_obd *mds,
+                             struct inode *inode, struct lustre_msg *repmsg,
+                             int offset)
+{
+        struct lov_stripe_md *lsm = NULL;
+        struct llog_unlink_rec *lur;
+        int rc;
+        ENTRY;
+
+        if (IS_ERR(mds->mds_osc_obd))
+                RETURN(PTR_ERR(mds->mds_osc_obd));
+
+        rc = obd_unpackmd(&mds->mds_osc_conn, &lsm,
+                          lustre_msg_buf(repmsg, offset, 0),
+                          repmsg->buflens[offset]);
+        if (rc < 0)
+                RETURN(rc);
+
+        OBD_ALLOC(lur, sizeof(*lur));
+        if (!lur)
+                RETURN(-ENOMEM);
+        lur->lur_hdr.lth_len = lur->lur_end_len = sizeof(*lur);
+        lur->lur_hdr.lth_type = MDS_UNLINK_REC;
+        lur->lur_oid = inode->i_ino;
+        lur->lur_ogen = inode->i_generation;
+
+        rc = obd_log_add(&mds->mds_osc_conn, mds->mds_catalog, &lur->lur_hdr,
+                         lsm, lustre_msg_buf(repmsg, offset + 1, 0),
+                         repmsg->buflens[offset+1]/sizeof(struct llog_cookie));
+
+        obd_free_memmd(&mds->mds_osc_conn, &lsm);
+        OBD_FREE(lur, sizeof(*lur));
+
+        RETURN(rc);
+}
+
 static int mds_reint_unlink(struct mds_update_record *rec, int offset,
                             struct ptlrpc_request *req,
-                            struct lustre_handle *child_lockh)
+                            struct lustre_handle *lh)
 {
-        struct dentry *dir_de = NULL;
+        struct dentry *dparent = NULL;
         struct dentry *dchild = NULL;
         struct mds_obd *mds = mds_req2mds(req);
         struct obd_device *obd = req->rq_export->exp_obd;
         struct mds_body *body = NULL;
-        struct inode *dir_inode = NULL, *child_inode;
-        struct lustre_handle parent_lockh;
+        struct inode *child_inode;
+        struct lustre_handle parent_lockh, child_lockh;
         void *handle = NULL;
         struct ldlm_res_id child_res_id = { .name = {0} };
-        int rc = 0, flags = 0, return_lock = 0;
-        int cleanup_phase = 0;
+        int rc = 0, flags = 0, log_unlink = 0, cleanup_phase = 0;
         ENTRY;
 
         LASSERT(offset == 0 || offset == 2);
 
         MDS_CHECK_RESENT(req, reconstruct_reint_unlink(rec, offset, req,
-                                                       child_lockh));
+                                                       &child_lockh));
 
         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK))
                 GOTO(cleanup, rc = -ENOENT);
 
         /* Step 1: Lookup the parent by FID */
-        dir_de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW,
-                                       &parent_lockh);
-        if (IS_ERR(dir_de))
-                GOTO(cleanup, rc = PTR_ERR(dir_de));
-        dir_inode = dir_de->d_inode;
-        LASSERT(dir_inode);
+        dparent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW,
+                                        &parent_lockh);
+        if (IS_ERR(dparent))
+                GOTO(cleanup, rc = PTR_ERR(dparent));
+        LASSERT(dparent->d_inode);
 
         cleanup_phase = 1; /* Have parent dentry lock */
 
         /* Step 2: Lookup the child */
-        dchild = ll_lookup_one_len(rec->ur_name, dir_de, rec->ur_namelen - 1);
+        dchild = ll_lookup_one_len(rec->ur_name, dparent, rec->ur_namelen - 1);
         if (IS_ERR(dchild))
                 GOTO(cleanup, rc = PTR_ERR(dchild));
 
@@ -678,15 +894,13 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
 
         child_inode = dchild->d_inode;
         if (child_inode == NULL) {
-                CDEBUG(D_INODE,
-                       "child doesn't exist (dir %lu, name %s)\n",
-                       dir_inode->i_ino, rec->ur_name);
-                rc = -ENOENT;
-                GOTO(cleanup, rc);
+                CDEBUG(D_INODE, "child doesn't exist (dir %lu, name %s)\n",
+                       dparent->d_inode->i_ino, rec->ur_name);
+                GOTO(cleanup, rc = -ENOENT);
         }
 
         DEBUG_REQ(D_INODE, req, "parent ino %lu, child ino %lu",
-                  dir_inode->i_ino, child_inode->i_ino);
+                  dparent->d_inode->i_ino, child_inode->i_ino);
 
         /* Step 3: Get a lock on the child */
         child_res_id.name[0] = child_inode->i_ino;
@@ -695,14 +909,14 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
         rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
                               child_res_id, LDLM_PLAIN, NULL, 0, LCK_EX,
                               &flags, ldlm_completion_ast, mds_blocking_ast,
-                              NULL, child_lockh);
+                              NULL, &child_lockh);
         if (rc != ELDLM_OK)
                 GOTO(cleanup, rc);
 
         cleanup_phase = 3; /* child lock */
 
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE,
-                       to_kdev_t(dir_inode->i_sb->s_dev));
+                       to_kdev_t(dparent->d_inode->i_sb->s_dev));
 
         /* ldlm_reply in buf[0] if called via intent */
         if (offset)
@@ -711,43 +925,89 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
         body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*body));
         LASSERT(body != NULL);
 
-        /* Step 4: Do the unlink: client decides between rmdir/unlink!
-         * (bug 72) */
+#ifdef ENABLE_ORPHANS
+        if (unlikely(mds->mds_osc_obd == NULL))
+                mds_osc_connect(obd, mds);
+#endif
+
+        /* If this is the last reference to this inode, get the OBD EA
+         * data first so the client can destroy OST objects */
+        if (S_ISREG(child_inode->i_mode) && child_inode->i_nlink == 1) {
+                mds_pack_inode2fid(&body->fid1, child_inode);
+                mds_pack_inode2body(body, child_inode);
+                mds_pack_md(obd, req->rq_repmsg, offset + 1, body, child_inode);
+                if (!(body->valid & OBD_MD_FLEASIZE)) {
+                        body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                        OBD_MD_FLATIME | OBD_MD_FLMTIME);
+                        log_unlink = 1;
+                }
+        }
+
+        /* We have to do these checks ourselves, in case we are making an
+         * orphan.  The client tells us whether rmdir() or unlink() was called,
+         * so we need to return appropriate errors (bug 72).
+         *
+         * We don't have to check permissions, because vfs_rename (called from
+         * mds_unlink_orphan) also calls may_delete. */
+        if ((rec->ur_mode & S_IFMT) == S_IFDIR) {
+                if (!S_ISDIR(child_inode->i_mode))
+                        GOTO(cleanup, rc = -ENOTDIR);
+        } else {
+                if (S_ISDIR(child_inode->i_mode))
+                        GOTO(cleanup, rc = -EISDIR);
+        }
+
+        if (mds_open_orphan_count(child_inode) > 0) {
+                rc = mds_unlink_orphan(rec, obd, dparent, dchild, &handle);
+#ifdef ENABLE_ORPHANS
+                if (!rc && mds_log_op_unlink(obd, mds, child_inode,
+                                             req->rq_repmsg, offset + 1) > 0)
+                        body->valid |= OBD_MD_FLCOOKIE;
+#endif
+                GOTO(cleanup, rc);
+        }
+
+        // Step 4: Do the unlink: client decides between rmdir/unlink! (bug 72)
         switch (rec->ur_mode & S_IFMT) {
         case S_IFDIR:
                 /* Drop any lingering child directories before we start our
                  * transaction, to avoid doing multiple inode dirty/delete
-                 * in our compound transaction (bug 1321).
-                 */
+                 * in our compound transaction (bug 1321). */
                 shrink_dcache_parent(dchild);
-                handle = fsfilt_start(obd, dir_inode, FSFILT_OP_RMDIR);
+                handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_RMDIR,
+                                      NULL);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 cleanup_phase = 4;
-                rc = vfs_rmdir(dir_inode, dchild);
+                rc = vfs_rmdir(dparent->d_inode, dchild);
                 break;
-        case S_IFREG:
-                /* If this is the last reference to this inode, get the OBD EA
-                 * data first so the client can destroy OST objects */
-                if (S_ISREG(child_inode->i_mode) && child_inode->i_nlink == 1) {
-                        mds_pack_inode2fid(&body->fid1, child_inode);
-                        mds_pack_inode2body(body, child_inode);
-                        mds_pack_md(obd, req->rq_repmsg, offset + 1,
-                                    body, child_inode);
-                        if (body->valid & OBD_MD_FLEASIZE)
-                                return_lock = 1;
-                }
-                /* no break */
+        case S_IFREG: {
+                handle = fsfilt_start(obd, dparent->d_inode,
+                                      FSFILT_OP_UNLINK_LOG, NULL);
+                if (IS_ERR(handle))
+                        GOTO(cleanup, rc = PTR_ERR(handle));
+
+                cleanup_phase = 4;
+                rc = vfs_unlink(dparent->d_inode, dchild);
+#ifdef ENABLE_ORPHANS
+                if (!rc && log_unlink)
+                        if (mds_log_op_unlink(obd, mds, child_inode,
+                                              req->rq_repmsg, offset + 1) > 0)
+                                body->valid |= OBD_MD_FLCOOKIE;
+#endif
+                break;
+        }
         case S_IFLNK:
         case S_IFCHR:
         case S_IFBLK:
         case S_IFIFO:
         case S_IFSOCK:
-                handle = fsfilt_start(obd, dir_inode, FSFILT_OP_UNLINK);
+                handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK,
+                                      NULL);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 cleanup_phase = 4;
-                rc = vfs_unlink(dir_inode, dchild);
+                rc = vfs_unlink(dparent->d_inode, dchild);
                 break;
         default:
                 CERROR("bad file type %o unlinking %s\n", rec->ur_mode,
@@ -758,29 +1018,29 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
 
  cleanup:
         switch(cleanup_phase) {
-            case 4:
-                rc = mds_finish_transno(mds, dir_inode, handle, req, rc, 0);
-                if (rc && body) {
-                        /* Don't unlink the OST objects if the MDS unlink failed */
+        case 4:
+                rc = mds_finish_transno(mds, dparent->d_inode, handle, req,
+                                        rc, 0);
+                if (rc && body != NULL) {
+                        // Don't unlink the OST objects if the MDS unlink failed
                         body->valid = 0;
                 }
-            case 3: /* child lock */
-                if (rc != 0 || return_lock == 0)
-                        ldlm_lock_decref(child_lockh, LCK_EX);
-            case 2: /* child dentry */
+        case 3: /* child lock */
+                ldlm_lock_decref(&child_lockh, LCK_EX);
+        case 2: /* child dentry */
                 l_dput(dchild);
-            case 1: /* parent dentry and lock */
+        case 1: /* parent dentry and lock */
                 if (rc) {
-                        ldlm_lock_decref(&parent_lockh, LCK_EX);
+                        ldlm_lock_decref(&parent_lockh, LCK_PW);
                 } else {
                         memcpy(&req->rq_ack_locks[0].lock, &parent_lockh,
                                sizeof(parent_lockh));
-                        req->rq_ack_locks[0].mode = LCK_EX;
+                        req->rq_ack_locks[0].mode = LCK_PW;
                 }
-                l_dput(dir_de);
-            case 0:
+                l_dput(dparent);
+        case 0:
                 break;
-            default:
+        default:
                 CERROR("invalid cleanup_phase %d\n", cleanup_phase);
                 LBUG();
         }
@@ -857,8 +1117,10 @@ static int mds_reint_link(struct mds_update_record *rec, int offset,
         /* Step 3: Lookup the child */
         dchild = ll_lookup_one_len(rec->ur_name, de_tgt_dir, rec->ur_namelen-1);
         if (IS_ERR(dchild)) {
-                CERROR("child lookup error %ld\n", PTR_ERR(dchild));
-                GOTO(cleanup, rc = PTR_ERR(dchild));
+                rc = PTR_ERR(dchild);
+                if (rc != -EPERM && rc != -EACCES)
+                        CERROR("child lookup error %d\n", rc);
+                GOTO(cleanup, rc);
         }
 
         cleanup_phase = 4; /* child dentry */
@@ -874,15 +1136,15 @@ static int mds_reint_link(struct mds_update_record *rec, int offset,
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE,
                        to_kdev_t(de_src->d_inode->i_sb->s_dev));
 
-        handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK);
+        handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK, NULL);
         if (IS_ERR(handle)) {
                 rc = PTR_ERR(handle);
                 GOTO(cleanup, rc);
         }
 
         rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild);
-        if (rc)
-                CERROR("link error %d\n", rc);
+        if (rc && rc != -EPERM && rc != -EACCES)
+                CERROR("vfs_link error %d\n", rc);
 cleanup:
         rc = mds_finish_transno(mds, de_tgt_dir ? de_tgt_dir->d_inode : NULL,
                                 handle, req, rc, 0);
@@ -1057,13 +1319,12 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset,
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE,
                        to_kdev_t(de_srcdir->d_inode->i_sb->s_dev));
 
-        handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME);
+        handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME, NULL);
         if (IS_ERR(handle))
                 GOTO(cleanup, rc = PTR_ERR(handle));
 
         lock_kernel();
-        rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new,
-                        NULL);
+        rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new);
         unlock_kernel();
 
         EXIT;
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
+.*.cmd
index 61f4bc2..06d60d4 100644 (file)
@@ -1,5 +1,3 @@
-# FIXME: we need to make it clear that obdclass.o depends on
-# lustre_build_version, or 'make -j2' breaks!
 DEFS=
 MODULE = obdclass
 
@@ -9,15 +7,13 @@ else
 FSMOD = fsfilt_ext3
 endif
 
+class_obd.o: lustre_build_version
+
 if LIBLUSTRE
 lib_LIBRARIES = liblustreclass.a
-liblustreclass_a_SOURCES = uuid.c statfs_pack.c genops.c debug.c class_obd.c lustre_handles.c lustre_peer.c lprocfs_status.c simple.c
-
-class_obd.o: lustre_version
-
-lustre_version:
-       echo '#define LUSTRE_VERSION 12' > $(top_builddir)/include/linux/lustre_build_version.h
-       echo '#define BUILD_VERSION "1"' >> $(top_builddir)/include/linux/lustre_build_version.h
+liblustreclass_a_SOURCES = uuid.c statfs_pack.c genops.c debug.c class_obd.c
+liblustreclass_a_SOURCES += lustre_handles.c lustre_peer.c lprocfs_status.c
+liblustreclass_a_SOURCES += simple.c recov_log.c obdo.c
 
 else
 modulefs_DATA = lustre_build_version obdclass.o $(FSMOD).o fsfilt_reiserfs.o
@@ -25,15 +21,16 @@ EXTRA_PROGRAMS = obdclass $(FSMOD) fsfilt_reiserfs
 
 obdclass_SOURCES = class_obd.c debug.c genops.c sysctl.c uuid.c simple.c
 obdclass_SOURCES += lprocfs_status.c lustre_handles.c lustre_peer.c
-obdclass_SOURCES += fsfilt.c statfs_pack.c otree.c
+obdclass_SOURCES += fsfilt.c statfs_pack.c otree.c recov_log.c obdo.c
 endif
 
 include $(top_srcdir)/Rules
 
-# XXX I'm sure there's some automake mv-if-different helper for this.
 lustre_build_version:
        perl $(top_srcdir)/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver
+       echo #define LUSTRE_RELEASE @RELEASE@ >> tmpver
        cmp -s $(top_builddir)/include/linux/lustre_build_version.h tmpver \
-               2> /dev/null &&                                            \
-               $(RM) tmpver ||                                            \
-               mv tmpver $(top_builddir)/include/linux/lustre_build_version.h
+                2> /dev/null &&                                            \
+                $(RM) tmpver ||                                            \
+                mv tmpver $(top_builddir)/include/linux/lustre_build_version.h
+
index b497aa3..8275ed8 100644 (file)
@@ -53,9 +53,7 @@
 #include <linux/miscdevice.h>
 #include <linux/smp_lock.h>
 #else
-
 # include <liblustre.h>
-
 #endif
 
 #include <linux/obd_support.h>
@@ -64,6 +62,7 @@
 #include <linux/lprocfs_status.h>
 #include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
 #include <linux/lustre_build_version.h>
+#include <portals/list.h>
 
 struct semaphore obd_conf_sem;   /* serialize configuration commands */
 struct obd_device obd_dev[MAX_OBD_DEVICES];
@@ -181,12 +180,10 @@ static inline void obd_conn2data(struct obd_ioctl_data *data,
 
 static void dump_exports(struct obd_device *obd)
 {
-        struct list_head *tmp, *n;
+        struct obd_export *exp, *n;
 
-        list_for_each_safe(tmp, n, &obd->obd_exports) {
-                struct obd_export *exp = list_entry(tmp, struct obd_export,
-                                                    exp_obd_chain);
-                CDEBUG(D_ERROR, "%s: %p %s %d %d %p\n",
+        list_for_each_entry_safe(exp, n, &obd->obd_exports, exp_obd_chain) {
+                CERROR("%s: %p %s %d %d %p\n",
                        obd->obd_name, exp, exp->exp_client_uuid.uuid,
                        atomic_read(&exp->exp_refcount),
                        exp->exp_failed, exp->exp_outstanding_reply );
@@ -543,6 +540,7 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
                 obd->obd_type->typ_refcnt--;
                 class_put_type(obd->obd_type);
                 obd->obd_type = NULL;
+                memset(obd, 0, sizeof(*obd));
                 GOTO(out, err = 0);
         }
 
@@ -562,7 +560,7 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
 
                 atomic_set(&obd->obd_refcount, 0);
 
-                if ( OBT(obd) && OBP(obd, setup) )
+                if (OBT(obd) && OBP(obd, setup))
                         err = obd_setup(obd, sizeof(*data), data);
 
                 if (!err) {
@@ -574,8 +572,8 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
                 GOTO(out, err);
         }
         case OBD_IOC_CLEANUP: {
-                int force = 0, failover = 0;
-                char * flag;
+                int flags = 0;
+                char *flag;
 
                 if (!obd->obd_set_up) {
                         CERROR("Device %d not setup\n", obd->obd_minor);
@@ -586,18 +584,19 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
                         for (flag = data->ioc_inlbuf1; *flag != 0; flag++)
                                 switch (*flag) {
                                 case 'F':
-                                        force = 1;
+                                        flags |= OBD_OPT_FORCE;
                                         break;
                                 case 'A':
-                                        failover = 1;
+                                        flags |= OBD_OPT_FAILOVER;
                                         break;
                                 default:
-                                        CERROR("unrecognised flag '%c'\n", 
+                                        CERROR("unrecognised flag '%c'\n",
                                                *flag);
                                 }
                 }
-                
-                if (atomic_read(&obd->obd_refcount) == 1 || force) {
+
+                if (atomic_read(&obd->obd_refcount) == 1 ||
+                    flags & OBD_OPT_FORCE) {
                         /* this will stop new connections, and need to
                            do it before class_disconnect_exports() */
                         obd->obd_stopping = 1;
@@ -607,19 +606,19 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
                         struct l_wait_info lwi = LWI_TIMEOUT_INTR(60 * HZ, NULL,
                                                                   NULL, NULL);
                         int rc;
-                        
-                        if (!force) {
+
+                        if (!(flags & OBD_OPT_FORCE)) {
                                 CERROR("OBD device %d (%p) has refcount %d\n",
-                                       obd->obd_minor, obd, 
+                                       obd->obd_minor, obd,
                                        atomic_read(&obd->obd_refcount));
                                 dump_exports(obd);
                                 GOTO(out, err = -EBUSY);
                         }
-                        class_disconnect_exports(obd, failover);
-                        CDEBUG(D_IOCTL, 
-                               "%s: waiting for obd refs to go away: %d\n", 
+                        class_disconnect_exports(obd, flags);
+                        CDEBUG(D_IOCTL,
+                               "%s: waiting for obd refs to go away: %d\n",
                                obd->obd_name, atomic_read(&obd->obd_refcount));
-                
+
                         rc = l_wait_event(obd->obd_refcount_waitq,
                                      atomic_read(&obd->obd_refcount) < 2, &lwi);
                         if (rc == 0) {
@@ -630,12 +629,12 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
                                        atomic_read(&obd->obd_refcount));
                                 dump_exports(obd);
                         }
-                        CDEBUG(D_IOCTL, "%s: awake, now finishing cleanup\n", 
+                        CDEBUG(D_IOCTL, "%s: awake, now finishing cleanup\n",
                                obd->obd_name);
                 }
 
                 if (OBT(obd) && OBP(obd, cleanup))
-                        err = obd_cleanup(obd, force, failover);
+                        err = obd_cleanup(obd, flags);
 
                 if (!err) {
                         obd->obd_set_up = obd->obd_stopping = 0;
@@ -807,10 +806,10 @@ EXPORT_SYMBOL(class_conn2cliimp);
 EXPORT_SYMBOL(class_conn2ldlmimp);
 EXPORT_SYMBOL(class_disconnect);
 EXPORT_SYMBOL(class_disconnect_exports);
-EXPORT_SYMBOL(lustre_uuid_to_peer);
 
 /* uuid.c */
 EXPORT_SYMBOL(class_uuid_unparse);
+EXPORT_SYMBOL(lustre_uuid_to_peer);
 EXPORT_SYMBOL(client_tgtuuid2obd);
 
 EXPORT_SYMBOL(class_handle_hash);
@@ -831,12 +830,15 @@ int init_obdclass(void)
                       ", info@clusterfs.com\n");
 
         class_init_uuidlist();
-        class_handle_init();
+        err = class_handle_init();
+        if (err)
+                return err;
 
         sema_init(&obd_conf_sem, 1);
         INIT_LIST_HEAD(&obd_types);
 
-        if ((err = misc_register(&obd_psdev))) {
+        err = misc_register(&obd_psdev);
+        if (err) {
                 CERROR("cannot register %d err %d\n", OBD_MINOR, err);
                 return err;
         }
@@ -875,7 +877,7 @@ int obd_proc_read_version(char *page, char **start, off_t off, int count, int *e
 #endif
 
 #ifdef __KERNEL__
-static void __exit cleanup_obdclass(void)
+static void /*__exit*/ cleanup_obdclass(void)
 #else
 static void cleanup_obdclass(void)
 #endif
@@ -914,8 +916,8 @@ static void cleanup_obdclass(void)
  * kernel patch */
 #ifdef __KERNEL__
 #include <linux/lustre_version.h>
-#define LUSTRE_MIN_VERSION 18
-#define LUSTRE_MAX_VERSION 19
+#define LUSTRE_MIN_VERSION 21
+#define LUSTRE_MAX_VERSION 21
 #if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION)
 # error Cannot continue: Your Lustre kernel patch is older than the sources
 #elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION)
index 4357b79..d0abdfe 100644 (file)
@@ -64,7 +64,7 @@ void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops)
         /* unlock fsfilt_types list */
 }
 
-struct fsfilt_operations *fsfilt_get_ops(char *type)
+struct fsfilt_operations *fsfilt_get_ops(const char *type)
 {
         struct fsfilt_operations *fs_ops;
 
@@ -89,7 +89,7 @@ struct fsfilt_operations *fsfilt_get_ops(char *type)
                        /* unlock fsfilt_types list */
                 }
         }
-        __MOD_INC_USE_COUNT(fs_ops->fs_owner);
+        try_module_get(fs_ops->fs_owner);
         /* unlock fsfilt_types list */
 
         return fs_ops;
@@ -97,7 +97,7 @@ struct fsfilt_operations *fsfilt_get_ops(char *type)
 
 void fsfilt_put_ops(struct fsfilt_operations *fs_ops)
 {
-        __MOD_DEC_USE_COUNT(fs_ops->fs_owner);
+        module_put(fs_ops->fs_owner);
 }
 
 
index 5f6322f..5dd196d 100644 (file)
 #include <linux/quotaops.h>
 #include <linux/ext3_fs.h>
 #include <linux/ext3_jbd.h>
-#include <linux/ext3_xattr.h>
+#include <linux/version.h>
+/* XXX ugh */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ #include <linux/ext3_xattr.h>
+#else 
+ #include <linux/../../fs/ext3/xattr.h>
+#endif
 #include <linux/kp30.h>
 #include <linux/lustre_fsfilt.h>
 #include <linux/obd.h>
@@ -43,10 +49,11 @@ static kmem_cache_t *fcb_cache;
 static atomic_t fcb_cache_count = ATOMIC_INIT(0);
 
 struct fsfilt_cb_data {
-        struct journal_callback cb_jcb; /* data private to jbd */
+        struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
         fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
         struct obd_device *cb_obd;      /* MDS/OBD completion device */
         __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
+        void *cb_data;                  /* MDS/OST completion function data */
 };
 
 #define EXT3_XATTR_INDEX_LUSTRE         5
@@ -58,13 +65,24 @@ struct fsfilt_cb_data {
  * the inode (which we will be changing anyways as part of this
  * transaction).
  */
-static void *fsfilt_ext3_start(struct inode *inode, int op)
+static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private)
 {
         /* For updates to the last recieved file */
         int nblocks = EXT3_DATA_TRANS_BLOCKS;
         void *handle;
 
         switch(op) {
+        case FSFILT_OP_CREATE_LOG:
+                nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
+                op = FSFILT_OP_CREATE;
+                break;
+        case FSFILT_OP_UNLINK_LOG:
+                nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
+                op = FSFILT_OP_UNLINK;
+                break;
+        }
+
+        switch(op) {
         case FSFILT_OP_RMDIR:
         case FSFILT_OP_UNLINK:
                 nblocks += EXT3_DELETE_TRANS_BLOCKS;
@@ -95,7 +113,7 @@ static void *fsfilt_ext3_start(struct inode *inode, int op)
                  LBUG();
         }
 
-        LASSERT(!current->journal_info);
+        LASSERT(current->journal_info == desc_private);
         lock_kernel();
         handle = journal_start(EXT3_JOURNAL(inode), nblocks);
         unlock_kernel();
@@ -185,14 +203,14 @@ static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso)
  * the pages have been written.
  */
 static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso,
-                                   int niocount, struct niobuf_remote *nb)
+                                   int niocount, void *desc_private)
 {
         journal_t *journal;
         handle_t *handle;
         int needed;
         ENTRY;
 
-        LASSERT(!current->journal_info);
+        LASSERT(current->journal_info == desc_private);
         journal = EXT3_SB(fso->fso_dentry->d_inode->i_sb)->s_journal;
         needed = fsfilt_ext3_credits_needed(objcount, fso);
 
@@ -218,6 +236,8 @@ static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso,
         if (IS_ERR(handle))
                 CERROR("can't get handle for %d credits: rc = %ld\n", needed,
                        PTR_ERR(handle));
+        else
+                LASSERT(handle->h_buffer_credits >= needed);
 
         RETURN(handle);
 }
@@ -249,24 +269,26 @@ static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle,
          * in the block pointers; this is really the "small" stripe MD data.
          * We can avoid further hackery by virtue of the MDS file size being
          * zero all the time (which doesn't invoke block truncate at unlink
-         * time), so we assert we never change the MDS file size from zero.
-         */
+         * time), so we assert we never change the MDS file size from zero. */
         if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
                 /* ATTR_SIZE would invoke truncate: clear it */
                 iattr->ia_valid &= ~ATTR_SIZE;
-                inode->i_size = iattr->ia_size;
+                EXT3_I(inode)->i_disksize = inode->i_size = iattr->ia_size;
 
                 /* make sure _something_ gets set - so new inode
-                 * goes to disk (probably won't work over XFS
-                 */
-                if (!iattr->ia_valid & ATTR_MODE) {
+                 * goes to disk (probably won't work over XFS */
+                if (!(iattr->ia_valid & (ATTR_MODE | ATTR_MTIME | ATTR_CTIME))){
                         iattr->ia_valid |= ATTR_MODE;
                         iattr->ia_mode = inode->i_mode;
                 }
         }
-        if (inode->i_op->setattr)
+
+        /* Don't allow setattr to change file type */
+        iattr->ia_mode = (inode->i_mode & S_IFMT)|(iattr->ia_mode & ~S_IFMT);
+
+        if (inode->i_op->setattr) {
                 rc = inode->i_op->setattr(dentry, iattr);
-        else{
+        } else {
                 rc = inode_change_ok(inode, iattr);
                 if (!rc)
                         rc = inode_setattr(inode, iattr);
@@ -286,8 +308,8 @@ static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
          * it will fit, because putting it in an EA currently kills the MDS
          * performance.  We'll fix this with "fast EAs" in the future.
          */
-        if (lmm_size <= sizeof(EXT3_I(inode)->i_data) -
-                        sizeof(EXT3_I(inode)->i_data[0])) {
+        if (inode->i_blocks == 0 && lmm_size <= sizeof(EXT3_I(inode)->i_data) -
+                                            sizeof(EXT3_I(inode)->i_data[0])) {
                 /* XXX old_size is debugging only */
                 int old_size = EXT3_I(inode)->i_data[0];
                 if (old_size != 0) {
@@ -303,8 +325,15 @@ static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
         } else {
                 down(&inode->i_sem);
                 lock_kernel();
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
                 rc = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_LUSTRE,
                                     XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0);
+#else
+                rc = ext3_xattr_set_handle(handle, inode, 
+                                           EXT3_XATTR_INDEX_LUSTRE,
+                                           XATTR_LUSTRE_MDS_OBJID, lmm, 
+                                           lmm_size, 0);
+#endif
                 unlock_kernel();
                 up(&inode->i_sem);
         }
@@ -319,7 +348,7 @@ static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size)
 {
         int rc;
 
-        if (EXT3_I(inode)->i_data[0]) {
+        if (inode->i_blocks == 0 && EXT3_I(inode)->i_data[0]) {
                 int size = le32_to_cpu(EXT3_I(inode)->i_data[0]);
                 LASSERT(size < sizeof(EXT3_I(inode)->i_data));
                 if (lmm) {
@@ -411,14 +440,15 @@ static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error)
 {
         struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
 
-        fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error);
+        fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error);
 
         OBD_SLAB_FREE(fcb, fcb_cache, sizeof *fcb);
         atomic_dec(&fcb_cache_count);
 }
 
 static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
-                                     void *handle, fsfilt_cb_t cb_func)
+                                     void *handle, fsfilt_cb_t cb_func,
+                                     void *cb_data)
 {
         struct fsfilt_cb_data *fcb;
 
@@ -430,10 +460,10 @@ static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
         fcb->cb_func = cb_func;
         fcb->cb_obd = obd;
         fcb->cb_last_rcvd = last_rcvd;
+        fcb->cb_data = cb_data;
 
         CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
         lock_kernel();
-        /* Note that an "incompatible pointer" warning here is OK for now */
         journal_callback_set(handle, fsfilt_ext3_cb_func,
                              (struct journal_callback *)fcb);
         unlock_kernel();
@@ -443,10 +473,11 @@ static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
 
 static int fsfilt_ext3_journal_data(struct file *filp)
 {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        /* bug 1576: enable data journaling on 2.5 when appropriate */
         struct inode *inode = filp->f_dentry->d_inode;
-
         EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
-
+#endif
         return 0;
 }
 
@@ -459,7 +490,7 @@ static int fsfilt_ext3_journal_data(struct file *filp)
  */
 static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs)
 {
-        struct statfs sfs;
+        struct kstatfs sfs;
         int rc = vfs_statfs(sb, &sfs);
 
         if (!rc && sfs.f_bfree < sfs.f_ffree) {
@@ -484,6 +515,110 @@ static int fsfilt_ext3_prep_san_write(struct inode *inode, long *blocks,
         return ext3_prep_san_write(inode, blocks, nblocks, newsize);
 }
 
+static int fsfilt_ext3_read_record(struct file * file, char *buf,
+                                   int size, loff_t *offs)
+{
+        struct buffer_head *bh;
+        unsigned long block, boffs;
+        struct inode *inode = file->f_dentry->d_inode;
+        int err;
+
+        if (inode->i_size < *offs + size) {
+                CERROR("file size %llu is too short for read %u@%llu\n",
+                       inode->i_size, size, *offs);
+                return -EIO;
+        }
+
+        block = *offs >> inode->i_blkbits;
+        bh = ext3_bread(NULL, inode, block, 0, &err);
+        if (!bh) {
+                CERROR("can't read block: %d\n", err);
+                return err;
+        }
+
+        boffs = (unsigned)*offs % bh->b_size;
+        if (boffs + size > bh->b_size) {
+                CERROR("request crosses block's border. offset %llu, size %u\n",
+                       *offs, size);
+                brelse(bh);
+                return -EIO;
+        }
+
+        memcpy(buf, bh->b_data + boffs, size);
+        brelse(bh);
+        *offs += size;
+        return size;
+}
+
+static int fsfilt_ext3_write_record(struct file * file, char *buf,
+                                    int size, loff_t *offs)
+{
+        struct buffer_head *bh;
+        unsigned long block, boffs;
+        struct inode *inode = file->f_dentry->d_inode;
+        loff_t old_size = inode->i_size;
+        journal_t *journal;
+        handle_t *handle;
+        int err;
+
+        journal = EXT3_SB(inode->i_sb)->s_journal;
+        handle = journal_start(journal, EXT3_DATA_TRANS_BLOCKS + 2);
+        if (handle == NULL) {
+                CERROR("can't start transaction\n");
+                return -EIO;
+        }
+
+        block = *offs >> inode->i_blkbits;
+        if (*offs + size > inode->i_size) {
+                down(&inode->i_sem);
+                if (*offs + size > inode->i_size)
+                        inode->i_size = ((loff_t)block + 1) << inode->i_blkbits;
+                up(&inode->i_sem);
+        }
+
+        bh = ext3_bread(handle, inode, block, 1, &err);
+        if (!bh) {
+                CERROR("can't read/create block: %d\n", err);
+                goto out;
+        }
+
+        /* This is a hack only needed because ext3_get_block_handle() updates
+         * i_disksize after marking the inode dirty in ext3_splice_branch().
+         * We will fix that when we get a chance, as ext3_mark_inode_dirty()
+         * is not without cost, nor is it even exported.
+         */
+        if (inode->i_size > old_size)
+                mark_inode_dirty(inode);
+
+        boffs = (unsigned)*offs % bh->b_size;
+        if (boffs + size > bh->b_size) {
+                CERROR("request crosses block's border. offset %llu, size %u\n",
+                       *offs, size);
+                err = -EIO;
+                goto out;
+        }
+
+        err = ext3_journal_get_write_access(handle, bh);
+        if (err) {
+                CERROR("journal_get_write_access() returned error %d\n", err);
+                goto out;
+        }
+        memcpy(bh->b_data + boffs, buf, size);
+        err = ext3_journal_dirty_metadata(handle, bh);
+        if (err) {
+                CERROR("journal_dirty_metadata() returned error %d\n", err);
+                goto out;
+        }
+        err = size;
+out:
+        if (bh)
+                brelse(bh);
+        journal_stop(handle);
+        if (err > 0)
+                *offs += size;
+        return err;
+}
+
 static struct fsfilt_operations fsfilt_ext3_ops = {
         fs_type:                "ext3",
         fs_owner:               THIS_MODULE,
@@ -499,6 +634,8 @@ static struct fsfilt_operations fsfilt_ext3_ops = {
         fs_statfs:              fsfilt_ext3_statfs,
         fs_sync:                fsfilt_ext3_sync,
         fs_prep_san_write:      fsfilt_ext3_prep_san_write,
+        fs_write_record:        fsfilt_ext3_write_record,
+        fs_read_record:         fsfilt_ext3_read_record,
 };
 
 static int __init fsfilt_ext3_init(void)
index 1fba0f4..80f7e50 100644 (file)
@@ -43,10 +43,11 @@ static kmem_cache_t *fcb_cache;
 static atomic_t fcb_cache_count = ATOMIC_INIT(0);
 
 struct fsfilt_cb_data {
-        struct journal_callback cb_jcb; /* data private to jbd */
+        struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
         fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
         struct obd_device *cb_obd;      /* MDS/OBD completion device */
         __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
+        void *cb_data;                  /* MDS/OST completion function data */
 };
 
 #define EXTN_XATTR_INDEX_LUSTRE         5
@@ -58,13 +59,24 @@ struct fsfilt_cb_data {
  * the inode (which we will be changing anyways as part of this
  * transaction).
  */
-static void *fsfilt_extN_start(struct inode *inode, int op)
+static void *fsfilt_extN_start(struct inode *inode, int op, void *desc_private)
 {
         /* For updates to the last recieved file */
         int nblocks = EXTN_DATA_TRANS_BLOCKS;
         void *handle;
 
         switch(op) {
+        case FSFILT_OP_CREATE_LOG:
+                nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS;
+                op = FSFILT_OP_CREATE;
+                break;
+        case FSFILT_OP_UNLINK_LOG:
+                nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS;
+                op = FSFILT_OP_UNLINK;
+                break;
+        }
+
+        switch(op) {
         case FSFILT_OP_RMDIR:
         case FSFILT_OP_UNLINK:
                 nblocks += EXTN_DELETE_TRANS_BLOCKS;
@@ -95,7 +107,7 @@ static void *fsfilt_extN_start(struct inode *inode, int op)
                  LBUG();
         }
 
-        LASSERT(!current->journal_info);
+        LASSERT(current->journal_info == desc_private);
         lock_kernel();
         handle = journal_start(EXTN_JOURNAL(inode), nblocks);
         unlock_kernel();
@@ -124,7 +136,7 @@ static void *fsfilt_extN_start(struct inode *inode, int op)
  * objcount inode blocks
  * 1 superblock
  * 2 * EXTN_SINGLEDATA_TRANS_BLOCKS for the quota files
- * 
+ *
  * 1 EXTN_DATA_TRANS_BLOCKS for the last_rcvd update.
  */
 static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso)
@@ -155,7 +167,7 @@ static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso)
                 ngdblocks = EXTN_SB(sb)->s_gdb_count;
 
         needed += nbitmaps + ngdblocks;
-        
+
         /* last_rcvd update */
         needed += EXTN_DATA_TRANS_BLOCKS;
 
@@ -185,14 +197,14 @@ static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso)
  * the pages have been written.
  */
 static void *fsfilt_extN_brw_start(int objcount, struct fsfilt_objinfo *fso,
-                                   int niocount, struct niobuf_remote *nb)
+                                   int niocount, void *desc_private)
 {
         journal_t *journal;
         handle_t *handle;
         int needed;
         ENTRY;
 
-        LASSERT(!current->journal_info);
+        LASSERT(current->journal_info == desc_private);
         journal = EXTN_SB(fso->fso_dentry->d_inode->i_sb)->s_journal;
         needed = fsfilt_extN_credits_needed(objcount, fso);
 
@@ -218,6 +230,8 @@ static void *fsfilt_extN_brw_start(int objcount, struct fsfilt_objinfo *fso,
         if (IS_ERR(handle))
                 CERROR("can't get handle for %d credits: rc = %ld\n", needed,
                        PTR_ERR(handle));
+        else
+                LASSERT(handle->h_buffer_credits >= needed);
 
         RETURN(handle);
 }
@@ -249,24 +263,26 @@ static int fsfilt_extN_setattr(struct dentry *dentry, void *handle,
          * in the block pointers; this is really the "small" stripe MD data.
          * We can avoid further hackery by virtue of the MDS file size being
          * zero all the time (which doesn't invoke block truncate at unlink
-         * time), so we assert we never change the MDS file size from zero.
-         */
+         * time), so we assert we never change the MDS file size from zero. */
         if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
                 /* ATTR_SIZE would invoke truncate: clear it */
                 iattr->ia_valid &= ~ATTR_SIZE;
-                inode->i_size = iattr->ia_size;
+                EXTN_I(inode)->i_disksize = inode->i_size = iattr->ia_size;
 
                 /* make sure _something_ gets set - so new inode
-                 * goes to disk (probably won't work over XFS
-                 */
-                if (!iattr->ia_valid & ATTR_MODE) {
+                 * goes to disk (probably won't work over XFS */
+                if (!(iattr->ia_valid & (ATTR_MODE | ATTR_MTIME | ATTR_CTIME))){
                         iattr->ia_valid |= ATTR_MODE;
                         iattr->ia_mode = inode->i_mode;
                 }
         }
-        if (inode->i_op->setattr)
+
+        /* Don't allow setattr to change file type */
+        iattr->ia_mode = (inode->i_mode & S_IFMT)|(iattr->ia_mode & ~S_IFMT);
+
+        if (inode->i_op->setattr) {
                 rc = inode->i_op->setattr(dentry, iattr);
-        else{
+        } else {
                 rc = inode_change_ok(inode, iattr);
                 if (!rc)
                         rc = inode_setattr(inode, iattr);
@@ -286,8 +302,8 @@ static int fsfilt_extN_set_md(struct inode *inode, void *handle,
          * it will fit, because putting it in an EA currently kills the MDS
          * performance.  We'll fix this with "fast EAs" in the future.
          */
-        if (lmm_size <= sizeof(EXTN_I(inode)->i_data) -
-                        sizeof(EXTN_I(inode)->i_data[0])) {
+        if (inode->i_blocks == 0 && lmm_size <= sizeof(EXTN_I(inode)->i_data) -
+                                            sizeof(EXTN_I(inode)->i_data[0])) {
                 /* XXX old_size is debugging only */
                 int old_size = EXTN_I(inode)->i_data[0];
                 if (old_size != 0) {
@@ -319,7 +335,7 @@ static int fsfilt_extN_get_md(struct inode *inode, void *lmm, int lmm_size)
 {
         int rc;
 
-        if (EXTN_I(inode)->i_data[0]) {
+        if (inode->i_blocks == 0 && EXTN_I(inode)->i_data[0]) {
                 int size = le32_to_cpu(EXTN_I(inode)->i_data[0]);
                 LASSERT(size < sizeof(EXTN_I(inode)->i_data));
                 if (lmm) {
@@ -411,14 +427,15 @@ static void fsfilt_extN_cb_func(struct journal_callback *jcb, int error)
 {
         struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
 
-        fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error);
+        fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error);
 
         OBD_SLAB_FREE(fcb, fcb_cache, sizeof *fcb);
         atomic_dec(&fcb_cache_count);
 }
 
 static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
-                                     void *handle, fsfilt_cb_t cb_func)
+                                     void *handle, fsfilt_cb_t cb_func,
+                                     void *cb_data)
 {
         struct fsfilt_cb_data *fcb;
 
@@ -430,10 +447,10 @@ static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
         fcb->cb_func = cb_func;
         fcb->cb_obd = obd;
         fcb->cb_last_rcvd = last_rcvd;
+        fcb->cb_data = cb_data;
 
         CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
         lock_kernel();
-        /* Note that an "incompatible pointer" warning here is OK for now */
         journal_callback_set(handle, fsfilt_extN_cb_func,
                              (struct journal_callback *)fcb);
         unlock_kernel();
@@ -459,7 +476,7 @@ static int fsfilt_extN_journal_data(struct file *filp)
  */
 static int fsfilt_extN_statfs(struct super_block *sb, struct obd_statfs *osfs)
 {
-        struct statfs sfs;
+        struct kstatfs sfs;
         int rc = vfs_statfs(sb, &sfs);
 
         if (!rc && sfs.f_bfree < sfs.f_ffree) {
@@ -484,6 +501,110 @@ static int fsfilt_extN_prep_san_write(struct inode *inode, long *blocks,
         return extN_prep_san_write(inode, blocks, nblocks, newsize);
 }
 
+static int fsfilt_extN_read_record(struct file * file, char *buf,
+                                   int size, loff_t *offs)
+{
+        struct buffer_head *bh;
+        unsigned long block, boffs;
+        struct inode *inode = file->f_dentry->d_inode;
+        int err;
+
+        if (inode->i_size < *offs + size) {
+                CERROR("file size %llu is too short for read %u@%llu\n",
+                       inode->i_size, size, *offs);
+                return -EIO;
+        }
+
+        block = *offs >> inode->i_blkbits;
+        bh = extN_bread(NULL, inode, block, 0, &err);
+        if (!bh) {
+                CERROR("can't read block: %d\n", err);
+                return err;
+        }
+
+        boffs = (unsigned)*offs % bh->b_size;
+        if (boffs + size > bh->b_size) {
+                CERROR("request crosses block's border. offset %llu, size %u\n",
+                       *offs, size);
+                brelse(bh);
+                return -EIO;
+        }
+
+        memcpy(buf, bh->b_data + boffs, size);
+        brelse(bh);
+        *offs += size;
+        return size;
+}
+
+static int fsfilt_extN_write_record(struct file * file, char *buf,
+                                    int size, loff_t *offs)
+{
+        struct buffer_head *bh;
+        unsigned long block, boffs;
+        struct inode *inode = file->f_dentry->d_inode;
+        loff_t old_size = inode->i_size;
+        journal_t *journal;
+        handle_t *handle;
+        int err;
+
+        journal = EXTN_SB(inode->i_sb)->s_journal;
+        handle = journal_start(journal, EXTN_DATA_TRANS_BLOCKS + 2);
+        if (handle == NULL) {
+                CERROR("can't start transaction\n");
+                return -EIO;
+        }
+
+        block = *offs >> inode->i_blkbits;
+        if (*offs + size > inode->i_size) {
+                down(&inode->i_sem);
+                if (*offs + size > inode->i_size)
+                        inode->i_size = ((loff_t)block + 1) << inode->i_blkbits;
+                up(&inode->i_sem);
+        }
+
+        bh = extN_bread(handle, inode, block, 1, &err);
+        if (!bh) {
+                CERROR("can't read/create block: %d\n", err);
+                goto out;
+        }
+
+        /* This is a hack only needed because extN_get_block_handle() updates
+         * i_disksize after marking the inode dirty in extN_splice_branch().
+         * We will fix that when we get a chance, as extN_mark_inode_dirty()
+         * is not without cost, nor is it even exported.
+         */
+        if (inode->i_size > old_size)
+                mark_inode_dirty(inode);
+
+        boffs = (unsigned)*offs % bh->b_size;
+        if (boffs + size > bh->b_size) {
+                CERROR("request crosses block's border. offset %llu, size %u\n",
+                       *offs, size);
+                err = -EIO;
+                goto out;
+        }
+
+        err = extN_journal_get_write_access(handle, bh);
+        if (err) {
+                CERROR("journal_get_write_access() returned error %d\n", err);
+                goto out;
+        }
+        memcpy(bh->b_data + boffs, buf, size);
+        err = extN_journal_dirty_metadata(handle, bh);
+        if (err) {
+                CERROR("journal_dirty_metadata() returned error %d\n", err);
+                goto out;
+        }
+        err = size;
+out:
+        if (bh)
+                brelse(bh);
+        journal_stop(handle);
+        if (err > 0)
+                *offs += size;
+        return err;
+}
+
 static struct fsfilt_operations fsfilt_extN_ops = {
         fs_type:                "extN",
         fs_owner:               THIS_MODULE,
@@ -499,6 +620,8 @@ static struct fsfilt_operations fsfilt_extN_ops = {
         fs_statfs:              fsfilt_extN_statfs,
         fs_sync:                fsfilt_extN_sync,
         fs_prep_san_write:      fsfilt_extN_prep_san_write,
+        fs_write_record:        fsfilt_extN_write_record,
+        fs_read_record:         fsfilt_extN_read_record,
 };
 
 static int __init fsfilt_extN_init(void)
index ccefb92..3d118fc 100644 (file)
 #include <linux/obd_class.h>
 #include <linux/module.h>
 
-static void *fsfilt_reiserfs_start(struct inode *inode, int op)
+static void *fsfilt_reiserfs_start(struct inode *inode, int op,
+                                   void *desc_private)
 {
         return (void *)0xf00f00be;
 }
 
 static void *fsfilt_reiserfs_brw_start(int objcount, struct fsfilt_objinfo *fso,
-                                   int niocount, struct niobuf_remote *nb)
+                                       int niocount, void *desc_private)
 {
         return (void *)0xf00f00be;
 }
 
-static int fsfilt_reiserfs_commit(struct inode *inode, void *handle, 
+static int fsfilt_reiserfs_commit(struct inode *inode, void *handle,
                                   int force_sync)
 {
         if (handle != (void *)0xf00f00be) {
@@ -131,8 +132,9 @@ static ssize_t fsfilt_reiserfs_readpage(struct file *file, char *buf, size_t cou
         return file->f_op->read(file, buf, count, offset);
 }
 
-static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
-                                         void *handle, fsfilt_cb_t cb_func)
+static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd,
+                                         __u64 last_rcvd, void *handle,
+                                         fsfilt_cb_t cb_func, void *cb_data)
 {
         static long next = 0;
 
@@ -141,7 +143,7 @@ static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd
                 next = jiffies + 300 * HZ;
         }
 
-        cb_func(obd, last_rcvd, 0);
+        cb_func(obd, last_rcvd, cb_data, 0);
 
         return 0;
 }
index 4862cf3..bb48e5d 100644 (file)
 
 #define EXPORT_SYMTAB
 #define DEBUG_SUBSYSTEM S_CLASS
-#ifdef __KERNEL__
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/version.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
-#include <linux/seq_file.h>
 
-#else
-#include <liblustre.h>
+#ifdef __KERNEL__
+# include <linux/config.h>
+# include <linux/module.h>
+# include <linux/version.h>
+# include <linux/slab.h>
+# include <linux/types.h>
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#  include <asm/statfs.h>
+# endif
+# include <linux/seq_file.h>
+#else /* __KERNEL__ */
+# include <liblustre.h>
 #endif
 
 #include <linux/obd_class.h>
 #include <linux/lprocfs_status.h>
+#include <linux/lustre_fsfilt.h>
 
 #ifdef LPROCFS
 
 struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
                                     const char *name)
 {
-        struct proc_dir_entrytemp;
+        struct proc_dir_entry *temp;
 
-        if (!head)
+        if (head == NULL)
                 return NULL;
 
         temp = head->subdir;
         while (temp != NULL) {
-                if (!strcmp(temp->name, name))
+                if (strcmp(temp->name, name) == 0)
                         return temp;
 
                 temp = temp->next;
@@ -65,26 +66,30 @@ struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
 int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
                      void *data)
 {
-        if ((root == NULL) || (list == NULL))
+        if (root == NULL || list == NULL)
                 return -EINVAL;
 
-        while (list->name) {
+        while (list->name != NULL) {
                 struct proc_dir_entry *cur_root, *proc;
-                char *pathcopy, *cur, *next;
-                int pathsize = strlen(list->name)+1;
+                char *pathcopy, *cur, *next, pathbuf[64];
+                int pathsize = strlen(list->name) + 1;
 
                 proc = NULL;
                 cur_root = root;
 
                 /* need copy of path for strsep */
-                OBD_ALLOC(pathcopy, pathsize);
-                if (!pathcopy)
-                        return -ENOMEM;
+                if (strlen(list->name) > sizeof(pathbuf) - 1) {
+                        OBD_ALLOC(pathcopy, pathsize);
+                        if (pathcopy == NULL)
+                                return -ENOMEM;
+                } else {
+                        pathcopy = pathbuf;
+                }
 
                 next = pathcopy;
                 strcpy(pathcopy, list->name);
 
-                while (cur_root && (cur = strsep(&next, "/"))) {
+                while (cur_root != NULL && (cur = strsep(&next, "/"))) {
                         if (*cur =='\0') /* skip double/trailing "/" */
                                 continue;
 
@@ -92,10 +97,10 @@ int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
                         CDEBUG(D_OTHER, "cur_root=%s, cur=%s, next=%s, (%s)\n",
                                cur_root->name, cur, next,
                                (proc ? "exists" : "new"));
-                        if (next)
+                        if (next != NULL) {
                                 cur_root = (proc ? proc :
-                                                   proc_mkdir(cur, cur_root));
-                        else if (!proc) {
+                                            proc_mkdir(cur, cur_root));
+                        } else if (proc == NULL) {
                                 mode_t mode = 0444;
                                 if (list->write_fptr)
                                         mode = 0644;
@@ -103,9 +108,10 @@ int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
                         }
                 }
 
+                if (pathcopy != pathbuf)
                 OBD_FREE(pathcopy, pathsize);
 
-                if ((cur_root == NULL) || (proc == NULL)) {
+                if (cur_root == NULL || proc == NULL) {
                         CERROR("LprocFS: No memory to create /proc entry %s",
                                list->name);
                         return -ENOMEM;
@@ -119,7 +125,7 @@ int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
         return 0;
 }
 
-void lprocfs_remove(struct proc_dir_entryroot)
+void lprocfs_remove(struct proc_dir_entry *root)
 {
         struct proc_dir_entry *temp = root;
         struct proc_dir_entry *rm_entry;
@@ -130,7 +136,7 @@ void lprocfs_remove(struct proc_dir_entry* root)
         LASSERT(parent != NULL);
 
         while (1) {
-                while (temp->subdir)
+                while (temp->subdir != NULL)
                         temp = temp->subdir;
 
                 rm_entry = temp;
@@ -148,14 +154,14 @@ struct proc_dir_entry *lprocfs_register(const char *name,
         struct proc_dir_entry *newchild;
 
         newchild = lprocfs_srch(parent, name);
-        if (newchild) {
+        if (newchild != NULL) {
                 CERROR(" Lproc: Attempting to register %s more than once \n",
                        name);
                 return ERR_PTR(-EALREADY);
         }
 
         newchild = proc_mkdir(name, parent);
-        if (newchild && list) {
+        if (newchild != NULL && list != NULL) {
                 int rc = lprocfs_add_vars(newchild, list, data);
                 if (rc) {
                         lprocfs_remove(newchild);
@@ -175,10 +181,10 @@ int lprocfs_rd_u64(char *page, char **start, off_t off,
         return snprintf(page, count, LPU64"\n", *(__u64 *)data);
 }
 
-int lprocfs_rd_uuid(charpage, char **start, off_t off, int count,
+int lprocfs_rd_uuid(char *page, char **start, off_t off, int count,
                     int *eof, void *data)
 {
-        struct obd_devicedev = (struct obd_device*)data;
+        struct obd_device *dev = (struct obd_device*)data;
 
         LASSERT(dev != NULL);
         *eof = 1;
@@ -186,9 +192,9 @@ int lprocfs_rd_uuid(char* page, char **start, off_t off, int count,
 }
 
 int lprocfs_rd_name(char *page, char **start, off_t off, int count,
-                    int *eof, void *data)
+                    int *eof, voiddata)
 {
-        struct obd_devicedev = (struct obd_device *)data;
+        struct obd_device *dev = (struct obd_device *)data;
 
         LASSERT(dev != NULL);
         LASSERT(dev->obd_name != NULL);
@@ -196,72 +202,98 @@ int lprocfs_rd_name(char *page, char **start, off_t off, int count,
         return snprintf(page, count, "%s\n", dev->obd_name);
 }
 
-int lprocfs_rd_blksize(char* page, char **start, off_t off, int count,
-                       int *eof, struct statfs *sfs)
+int lprocfs_rd_fstype(char *page, char **start, off_t off, int count, int *eof,
+                      void *data)
 {
-        LASSERT(sfs != NULL);
-        *eof = 1;
-        return snprintf(page, count, "%lu\n", sfs->f_bsize);
+        struct obd_device *obd = (struct obd_device *)data;
+
+        LASSERT(obd != NULL);
+        LASSERT(obd->obd_fsops != NULL);
+        LASSERT(obd->obd_fsops->fs_type != NULL);
+        return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
 }
 
-int lprocfs_rd_kbytestotal(char* page, char **start, off_t off, int count,
-                           int *eof, struct statfs *sfs)
+int lprocfs_rd_blksize(char *page, char **start, off_t off, int count,
+                       int *eof, void *data)
 {
-        __u32 blk_size;
-        __u64 result;
+        struct obd_statfs osfs;
+        int rc = obd_statfs(data, &osfs, jiffies - HZ);
+        if (!rc) {
+                *eof = 1;
+                rc = snprintf(page, count, "%u\n", osfs.os_bsize);
+        }
+        return rc;
+}
 
-        LASSERT(sfs != NULL);
-        blk_size = sfs->f_bsize >> 10;
-        result = sfs->f_blocks;
+int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, int count,
+                           int *eof, void *data)
+{
+        struct obd_statfs osfs;
+        int rc = obd_statfs(data, &osfs, jiffies - HZ);
+        if (!rc) {
+                __u32 blk_size = osfs.os_bsize >> 10;
+                __u64 result = osfs.os_blocks;
 
-        while (blk_size >>= 1)
-                result <<= 1;
+                while (blk_size >>= 1)
+                        result <<= 1;
 
-        *eof = 1;
-        return snprintf(page, count, LPU64"\n", result);
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", result);
+        }
+        return rc;
 }
 
-int lprocfs_rd_kbytesfree(charpage, char **start, off_t off, int count,
-                          int *eof, struct statfs *sfs)
+int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, int count,
+                          int *eof, void *data)
 {
-        __u32 blk_size;
-        __u64 result;
+        struct obd_statfs osfs;
+        int rc = obd_statfs(data, &osfs, jiffies - HZ);
+        if (!rc) {
+                __u32 blk_size = osfs.os_bsize >> 10;
+                __u64 result = osfs.os_bfree;
 
-        LASSERT(sfs != NULL);
-        blk_size = sfs->f_bsize >> 10;
-        result = sfs->f_bfree;
+                while (blk_size >>= 1)
+                        result <<= 1;
 
-        while (blk_size >>= 1)
-                result <<= 1;
-
-        *eof = 1;
-        return snprintf(page, count, LPU64"\n", result);
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", result);
+        }
+        return rc;
 }
 
-int lprocfs_rd_filestotal(charpage, char **start, off_t off, int count,
-                          int *eof, struct statfs *sfs)
+int lprocfs_rd_filestotal(char *page, char **start, off_t off, int count,
+                          int *eof, void *data)
 {
-        LASSERT(sfs != NULL);
-        *eof = 1;
-        return snprintf(page, count, "%ld\n", sfs->f_files);
+        struct obd_statfs osfs;
+        int rc = obd_statfs(data, &osfs, jiffies - HZ);
+        if (!rc) {
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", osfs.os_files);
+        }
+
+        return rc;
 }
 
-int lprocfs_rd_filesfree(charpage, char **start, off_t off, int count,
-                         int *eof, struct statfs *sfs)
+int lprocfs_rd_filesfree(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
 {
-        LASSERT(sfs != NULL);
-        *eof = 1;
-        return snprintf(page, count, "%ld\n", sfs->f_ffree);
+        struct obd_statfs osfs;
+        int rc = obd_statfs(data, &osfs, jiffies - HZ);
+        if (!rc) {
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", osfs.os_ffree);
+        }
+        return rc;
 }
 
-int lprocfs_rd_filegroups(charpage, char **start, off_t off, int count,
-                          int *eof, struct statfs *sfs)
+int lprocfs_rd_filegroups(char *page, char **start, off_t off, int count,
+                          int *eof, void *data)
 {
         *eof = 1;
         return snprintf(page, count, "unimplemented\n");
 }
 
-int lprocfs_rd_server_uuid(charpage, char **start, off_t off, int count,
+int lprocfs_rd_server_uuid(char *page, char **start, off_t off, int count,
                            int *eof, void *data)
 {
         struct obd_device *obd = (struct obd_device *)data;
@@ -290,7 +322,7 @@ int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count,
 int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count,
                        int *eof, void *data)
 {
-        struct obd_typeclass = (struct obd_type*) data;
+        struct obd_type *class = (struct obd_type*) data;
 
         LASSERT(class != NULL);
         *eof = 1;
@@ -334,21 +366,21 @@ struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num)
         if (num == 0)
                 return NULL;
 
-        OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus]));
+        OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[num_online_cpus()]));
         if (stats == NULL)
                 return NULL;
 
         percpusize = L1_CACHE_ALIGN(offsetof(typeof(*percpu), lp_cntr[num]));
-        stats->ls_percpu_size = smp_num_cpus * percpusize;
+        stats->ls_percpu_size = num_online_cpus() * percpusize;
         OBD_ALLOC(stats->ls_percpu[0], stats->ls_percpu_size);
         if (stats->ls_percpu[0] == NULL) {
                 OBD_FREE(stats, offsetof(typeof(*stats),
-                                         ls_percpu[smp_num_cpus]));
+                                         ls_percpu[num_online_cpus()]));
                 return NULL;
         }
 
         stats->ls_num = num;
-        for (i = 1; i < smp_num_cpus; i++)
+        for (i = 1; i < num_online_cpus(); i++)
                 stats->ls_percpu[i] = (void *)(stats->ls_percpu[i - 1]) +
                         percpusize;
 
@@ -361,7 +393,7 @@ void lprocfs_free_stats(struct lprocfs_stats *stats)
                 return;
 
         OBD_FREE(stats->ls_percpu[0], stats->ls_percpu_size);
-        OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus]));
+        OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_online_cpus()]));
 }
 
 /* Reset counter under lock */
@@ -410,17 +442,18 @@ static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
        }
        idx = cntr - &(stats->ls_percpu[0])->lp_cntr[0];
 
-       for (i = 0; i < smp_num_cpus; i++) {
+       for (i = 0; i < num_online_cpus(); i++) {
                struct lprocfs_counter *percpu_cntr =
                        &(stats->ls_percpu[i])->lp_cntr[idx];
                int centry;
+
                do {
-                        centry = atomic_read(&percpu_cntr->lc_cntl.la_entry);
-                        t.lc_count = percpu_cntr->lc_count;
-                        t.lc_sum = percpu_cntr->lc_sum;
-                        t.lc_min = percpu_cntr->lc_min;
-                        t.lc_max = percpu_cntr->lc_max;
-                        t.lc_sumsquare = percpu_cntr->lc_sumsquare;
+                       centry = atomic_read(&percpu_cntr->lc_cntl.la_entry);
+                       t.lc_count = percpu_cntr->lc_count;
+                       t.lc_sum = percpu_cntr->lc_sum;
+                       t.lc_min = percpu_cntr->lc_min;
+                       t.lc_max = percpu_cntr->lc_max;
+                       t.lc_sumsquare = percpu_cntr->lc_sumsquare;
                } while (centry != atomic_read(&percpu_cntr->lc_cntl.la_entry) &&
                         centry != atomic_read(&percpu_cntr->lc_cntl.la_exit));
                ret.lc_count += t.lc_count;
@@ -453,10 +486,10 @@ static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
 }
 
 struct seq_operations lprocfs_stats_seq_sops = {
-        .start = lprocfs_stats_seq_start,
-        .stop = lprocfs_stats_seq_stop,
-        .next = lprocfs_stats_seq_next,
-        .show = lprocfs_stats_seq_show,
+        start: lprocfs_stats_seq_start,
+        stop:  lprocfs_stats_seq_stop,
+        next:  lprocfs_stats_seq_next,
+        show:  lprocfs_stats_seq_show,
 };
 
 static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
@@ -474,13 +507,13 @@ static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
 }
 
 struct file_operations lprocfs_stats_seq_fops = {
-        .open    = lprocfs_stats_seq_open,
-        .read    = seq_read,
-        .llseek  = seq_lseek,
-        .release = seq_release,
+        open:    lprocfs_stats_seq_open,
+        read:    seq_read,
+        llseek:  seq_lseek,
+        release: seq_release,
 };
 
-int lprocfs_register_stats(struct proc_dir_entry *root, const charname,
+int lprocfs_register_stats(struct proc_dir_entry *root, const char *name,
                            struct lprocfs_stats *stats)
 {
         struct proc_dir_entry *entry;
@@ -502,7 +535,7 @@ void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
         int i;
 
         LASSERT(stats != NULL);
-        for (i = 0; i < smp_num_cpus; i++) {
+        for (i = 0; i < num_online_cpus(); i++) {
                 c = &(stats->ls_percpu[i]->lp_cntr[index]);
                 c->lc_config = conf;
                 c->lc_min = ~(__u64)0;
@@ -515,7 +548,7 @@ EXPORT_SYMBOL(lprocfs_counter_init);
 #define LPROCFS_OBD_OP_INIT(base, stats, op)                               \
 do {                                                                       \
         unsigned int coffset = base + OBD_COUNTER_OFFSET(op);              \
-        LASSERT(coffset < stats->ls_num);                                     \
+        LASSERT(coffset < stats->ls_num);                                  \
         lprocfs_counter_init(stats, coffset, 0, #op, "reqs");              \
 } while (0)
 
@@ -529,10 +562,10 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
         LASSERT(obd->obd_proc_entry != NULL);
         LASSERT(obd->obd_cntr_base == 0);
 
-        num_stats = 1 + OBD_COUNTER_OFFSET(destroy_export) +
+        num_stats = 1 + OBD_COUNTER_OFFSET(unpin) +
                 num_private_stats;
         stats = lprocfs_alloc_stats(num_stats);
-        if (!stats)
+        if (stats == NULL)
                 return -ENOMEM;
 
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, iocontrol);
@@ -569,16 +602,28 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, match);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, log_add);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, log_cancel);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, san_preprw);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, mark_page_dirty);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, clear_dirty_pages);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, last_dirty_offset);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, pin); 
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpin);
 
         for (i = num_private_stats; i < num_stats; i++) {
-                /* If this assertion failed, it is likely that an obd
+                /* If this LBUGs, it is likely that an obd
                  * operation was added to struct obd_ops in
                  * <linux/obd.h>, and that the corresponding line item
                  * LPROCFS_OBD_OP_INIT(.., .., opname)
                  * is missing from the list above. */
-                LASSERT(&(stats->ls_percpu[0])->lp_cntr[i].lc_name != NULL);
+                if (stats->ls_percpu[0]->lp_cntr[i].lc_name == NULL) {
+                        CERROR("Missing obd_stat initializer obd_op "
+                               "operation at offset %d. Aborting.\n",
+                               i - num_private_stats);
+                        LBUG();
+                }
         }
         rc = lprocfs_register_stats(obd->obd_proc_entry, "stats", stats);
         if (rc < 0) {
@@ -617,6 +662,7 @@ EXPORT_SYMBOL(lprocfs_free_obd_stats);
 EXPORT_SYMBOL(lprocfs_rd_u64);
 EXPORT_SYMBOL(lprocfs_rd_uuid);
 EXPORT_SYMBOL(lprocfs_rd_name);
+EXPORT_SYMBOL(lprocfs_rd_fstype);
 EXPORT_SYMBOL(lprocfs_rd_server_uuid);
 EXPORT_SYMBOL(lprocfs_rd_conn_uuid);
 EXPORT_SYMBOL(lprocfs_rd_numrefs);
index 06f86ad..bc07df9 100644 (file)
@@ -4,32 +4,31 @@
  * Copyright (C) 2002 Cluster File Systems, Inc.
  *   Author: Phil Schwan <phil@clusterfs.com>
  *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *   This file is part of Lustre, http://www.lustre.org/
  *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2.1 of the GNU Lesser General
- *   Public License as published by the Free Software Foundation.
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
  *
- *   Portals is distributed in the hope that it will be useful,
+ *   Lustre is distributed in the hope that it will be useful,
  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU Lesser General Public License for more details.
+ *   GNU General Public License for more details.
  *
- *   You should have received a copy of the GNU Lesser General Public
- *   License along with Portals; if not, write to the Free Software
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
 #ifdef __KERNEL__
-#include <linux/types.h>
-#include <linux/random.h>
+# include <linux/types.h>
+# include <linux/random.h>
 #else 
-#include <liblustre.h>
+# include <liblustre.h>
 #endif 
 
-
-#include <linux/kp30.h>
+#include <linux/obd_support.h>
 #include <linux/lustre_handles.h>
 
 static spinlock_t handle_lock = SPIN_LOCK_UNLOCKED;
@@ -118,7 +117,7 @@ int class_handle_init(void)
 
         LASSERT(handle_hash == NULL);
 
-        PORTAL_ALLOC(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
+        OBD_VMALLOC(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
         if (handle_hash == NULL)
                 return -ENOMEM;
 
@@ -158,7 +157,7 @@ void class_handle_cleanup(void)
                 cleanup_all_handles();
         }
 
-        PORTAL_FREE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
+        OBD_VFREE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
         handle_hash = NULL;
 
         if (handle_count)
index 016354c..5987d2e 100644 (file)
@@ -64,8 +64,8 @@ void class_exit_uuidlist(void)
                 struct uuid_nid_data *data =
                         list_entry(tmp, struct uuid_nid_data, head);
 
-                PORTAL_FREE(data->uuid, strlen(data->uuid) + 1);
-                PORTAL_FREE(data, sizeof(*data));
+                OBD_FREE(data->uuid, strlen(data->uuid) + 1);
+                OBD_FREE(data, sizeof(*data));
         }
 }
 
@@ -109,11 +109,11 @@ int class_add_uuid(char *uuid, __u64 nid, __u32 nal)
         }
 
         rc = -ENOMEM;
-        PORTAL_ALLOC(data, sizeof(*data));
+        OBD_ALLOC(data, sizeof(*data));
         if (data == NULL)
                 goto fail_0;
 
-        PORTAL_ALLOC(data->uuid, nob);
+        OBD_ALLOC(data->uuid, nob);
         if (data == NULL)
                 goto fail_1;
 
@@ -131,7 +131,7 @@ int class_add_uuid(char *uuid, __u64 nid, __u32 nal)
         return 0;
 
  fail_1:
-        PORTAL_FREE (data, sizeof (*data));
+        OBD_FREE (data, sizeof (*data));
  fail_0:
         kportal_put_ni (nal);
         return (rc);
@@ -171,8 +171,8 @@ int class_del_uuid (char *uuid)
                 list_del (&data->head);
 
                 kportal_put_ni (data->nal);
-                PORTAL_FREE(data->uuid, strlen(data->uuid) + 1);
-                PORTAL_FREE(data, sizeof(*data));
+                OBD_FREE(data->uuid, strlen(data->uuid) + 1);
+                OBD_FREE(data, sizeof(*data));
         } while (!list_empty (&deathrow));
 
         return 0;
index 0ce54a3..bd1363a 100644 (file)
@@ -139,7 +139,6 @@ void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx,
                 current->fsgid = saved->ouc.ouc_fsgid;
                 current->cap_effective = saved->ouc.ouc_cap;
                 current->ngroups = saved->ngroups;
-
                 current->groups[0] = saved->ouc.ouc_suppgid1;
                 current->groups[1] = saved->ouc.ouc_suppgid2;
         }
@@ -167,7 +166,7 @@ struct dentry *simple_mknod(struct dentry *dir, char *name, int mode)
         ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
         CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name);
 
-        dchild = lookup_one_len(name, dir, strlen(name));
+        dchild = ll_lookup_one_len(name, dir, strlen(name));
         if (IS_ERR(dchild))
                 GOTO(out_up, dchild);
 
@@ -201,7 +200,7 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode)
 
         ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
         CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name);
-        dchild = lookup_one_len(name, dir, strlen(name));
+        dchild = ll_lookup_one_len(name, dir, strlen(name));
         if (IS_ERR(dchild))
                 GOTO(out_up, dchild);
 
index 786a768..8bb78cc 100644 (file)
@@ -31,7 +31,7 @@
 #else
 #include <linux/version.h>
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
+#include <linux/statfs.h>
 #endif
 #endif
 
@@ -40,8 +40,9 @@
 #include <linux/obd_support.h>
 #include <linux/obd_class.h>
 
-void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs)
+void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs)
 {
+        memset(osfs, 0, sizeof(*osfs));
         osfs->os_type = sfs->f_type;
         osfs->os_blocks = sfs->f_blocks;
         osfs->os_bfree = sfs->f_bfree;
@@ -52,8 +53,9 @@ void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs)
         osfs->os_namelen = sfs->f_namelen;
 }
 
-void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs)
+void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs)
 {
+        memset(sfs, 0, sizeof(*sfs));
         sfs->f_type = osfs->os_type;
         sfs->f_blocks = osfs->os_blocks;
         sfs->f_bfree = osfs->os_bfree;
@@ -64,39 +66,5 @@ void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs)
         sfs->f_namelen = osfs->os_namelen;
 }
 
-int obd_self_statfs(struct obd_device *obd, struct statfs *sfs)
-{
-        struct obd_export *export, *my_export = NULL;
-        struct obd_statfs osfs = { 0 };
-        int rc;
-        ENTRY;
-
-        LASSERT( obd != NULL );
-
-        spin_lock(&obd->obd_dev_lock);
-        if (list_empty(&obd->obd_exports)) {
-                spin_unlock(&obd->obd_dev_lock);
-                export = my_export = class_new_export(obd);
-                if (export == NULL)
-                        RETURN(-ENOMEM);
-        } else {
-                export = list_entry(obd->obd_exports.next, typeof(*export),
-                                    exp_obd_chain);
-                export = class_export_get(export);
-                spin_unlock(&obd->obd_dev_lock);
-        }
-
-        rc = obd_statfs(export, &osfs);
-        if (!rc)
-                statfs_unpack(sfs, &osfs);
-
-        if (my_export)
-                class_unlink_export(my_export);
-
-        class_export_put(export);
-        RETURN(rc);
-}
-
 EXPORT_SYMBOL(statfs_pack);
 EXPORT_SYMBOL(statfs_unpack);
-EXPORT_SYMBOL(obd_self_statfs);
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
+.*.cmd
index f89df07..887889a 100644 (file)
@@ -64,7 +64,7 @@ static int echo_connect(struct lustre_handle *conn, struct obd_device *obd,
         return class_connect(conn, obd, cluuid);
 }
 
-static int echo_disconnect(struct lustre_handle *conn, int failover)
+static int echo_disconnect(struct lustre_handle *conn, int flags)
 {
         struct obd_export *exp = class_conn2export(conn);
 
@@ -72,7 +72,7 @@ static int echo_disconnect(struct lustre_handle *conn, int failover)
 
         ldlm_cancel_locks_for_export(exp);
         class_export_put(exp);
-        return (class_disconnect(conn, failover));
+        return class_disconnect(conn, flags);
 }
 
 static __u64 echo_next_id(struct obd_device *obddev)
@@ -235,7 +235,7 @@ static int echo_setattr(struct lustre_handle *conn, struct obdo *oa,
 int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
                 int objcount, struct obd_ioobj *obj, int niocount,
                 struct niobuf_remote *nb, struct niobuf_local *res,
-                void **desc_private, struct obd_trans_info *oti)
+                struct obd_trans_info *oti)
 {
         struct obd_device *obd;
         struct niobuf_local *r = res;
@@ -253,7 +253,8 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
         CDEBUG(D_PAGE, "%s %d obdos with %d IOs\n",
                cmd == OBD_BRW_READ ? "reading" : "writing", objcount, niocount);
 
-        *desc_private = (void *)DESC_PRIV;
+        if (oti)
+                oti->oti_handle = (void *)DESC_PRIV;
 
         for (i = 0; i < objcount; i++, obj++) {
                 int gfp_mask = (obj->ioo_id & 1) ? GFP_HIGHUSER : GFP_KERNEL;
@@ -285,7 +286,7 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
 
                         r->offset = nb->offset;
                         r->len = nb->len;
-                        LASSERT ((r->offset & (PAGE_SIZE - 1)) + r->len <= PAGE_SIZE);
+                        LASSERT((r->offset & ~PAGE_MASK) + r->len <= PAGE_SIZE);
 
                         CDEBUG(D_PAGE, "$$$$ get page %p @ "LPU64" for %d\n",
                                r->page, r->offset, r->len);
@@ -339,9 +340,9 @@ preprw_cleanup:
         return rc;
 }
 
-int echo_commitrw(int cmd, struct obd_export *export, int objcount,
-                  struct obd_ioobj *obj, int niocount, struct niobuf_local *res,
-                  void *desc_private, struct obd_trans_info *oti)
+int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa,
+                  int objcount, struct obd_ioobj *obj, int niocount,
+                  struct niobuf_local *res, struct obd_trans_info *oti)
 {
         struct obd_device *obd;
         struct niobuf_local *r = res;
@@ -365,7 +366,7 @@ int echo_commitrw(int cmd, struct obd_export *export, int objcount,
                 RETURN(-EINVAL);
         }
 
-        LASSERT(desc_private == (void *)DESC_PRIV);
+        LASSERT(oti == NULL || oti->oti_handle == (void *)DESC_PRIV);
 
         for (i = 0; i < objcount; i++, obj++) {
                 int verify = obj->ioo_id != 0;
@@ -437,7 +438,7 @@ static int echo_setup(struct obd_device *obddev, obd_count len, void *buf)
         RETURN(0);
 }
 
-static int echo_cleanup(struct obd_device *obddev, int force, int failover)
+static int echo_cleanup(struct obd_device *obddev, int flags)
 {
         ENTRY;
 
@@ -453,7 +454,7 @@ int echo_attach(struct obd_device *obd, obd_count len, void *data)
         struct lprocfs_static_vars lvars;
         int rc;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(echo, &lvars);
         rc = lprocfs_obd_attach(obd, lvars.obd_vars);
         if (rc != 0)
                 return rc;
@@ -539,7 +540,7 @@ static int __init obdecho_init(void)
 
         printk(KERN_INFO "Lustre Echo OBD driver; info@clusterfs.com\n");
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(echo, &lvars);
 
         rc = echo_object0_pages_init ();
         if (rc != 0)
@@ -561,7 +562,7 @@ static int __init obdecho_init(void)
         RETURN(rc);
 }
 
-static void __exit obdecho_exit(void)
+static void /*__exit*/ obdecho_exit(void)
 {
         echo_client_cleanup();
         class_unregister_type(OBD_ECHO_DEVICENAME);
index 79da7ea..c010798 100644 (file)
@@ -484,7 +484,7 @@ echo_client_kbrw (struct obd_device *obd, int rw,
                 }
         }
 
-        rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, NULL);
+        rc = obd_brw(rw, &ec->ec_conn, oa, lsm, npages, pga, NULL);
 
  out:
         if (rc != 0)
@@ -568,7 +568,7 @@ static int echo_client_ubrw(struct obd_device *obd, int rw,
                 pgp->flag = 0;
         }
 
-        rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, NULL);
+        rc = obd_brw(rw, &ec->ec_conn, oa, lsm, npages, pga, NULL);
 
         //        if (rw == OBD_BRW_READ)
         //                mark_dirty_kiobuf (kiobuf, count);
@@ -1009,7 +1009,7 @@ static int echo_setup(struct obd_device *obddev, obd_count len, void *buf)
         RETURN(rc);
 }
 
-static int echo_cleanup(struct obd_device * obddev, int force, int failover)
+static int echo_cleanup(struct obd_device *obddev, int flags)
 {
         struct list_head       *el;
         struct ec_object       *eco;
@@ -1023,21 +1023,21 @@ static int echo_cleanup(struct obd_device * obddev, int force, int failover)
         }
 
         /* XXX assuming sole access */
-        while (!list_empty (&ec->ec_objects)) {
+        while (!list_empty(&ec->ec_objects)) {
                 el = ec->ec_objects.next;
-                eco = list_entry (el, struct ec_object, eco_obj_chain);
+                eco = list_entry(el, struct ec_object, eco_obj_chain);
 
-                LASSERT (eco->eco_refcount == 0);
+                LASSERT(eco->eco_refcount == 0);
                 eco->eco_refcount = 1;
                 eco->eco_deleted = 1;
-                echo_put_object (eco);
+                echo_put_object(eco);
         }
 
-        rc = obd_disconnect (&ec->ec_conn, 0);
+        rc = obd_disconnect(&ec->ec_conn, 0);
         if (rc != 0)
                 CERROR("fail to disconnect device: %d\n", rc);
 
-        RETURN (rc);
+        RETURN(rc);
 }
 
 static int echo_connect(struct lustre_handle *conn, struct obd_device *src,
@@ -1057,7 +1057,7 @@ static int echo_connect(struct lustre_handle *conn, struct obd_device *src,
         RETURN (rc);
 }
 
-static int echo_disconnect(struct lustre_handle *conn, int failover)
+static int echo_disconnect(struct lustre_handle *conn, int flags)
 {
         struct obd_export      *exp = class_conn2export (conn);
         struct obd_device      *obd;
@@ -1128,7 +1128,7 @@ int echo_client_init(void)
 {
         struct lprocfs_static_vars lvars;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(echo, &lvars);
         return class_register_type(&echo_obd_ops, lvars.module_vars,
                                    OBD_ECHO_CLIENT_DEVICENAME);
 }
index 6a16001..c25d156 100644 (file)
 #include <linux/obd_class.h>
 
 #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
 #else
 
-int rd_fstype(char* page, char **start, off_t off, int count, int *eof,
-              void *data)
-{
-        struct obd_device* dev = (struct obd_device*)data;
-        
-        LASSERT(dev != NULL);
-        *eof = 1;
-        return snprintf(page, count, "%s\n", dev->u.echo.eo_fstype);
-}
-
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",     lprocfs_rd_uuid,    0, 0 },
-        { "fstype",   rd_fstype,          0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",         lprocfs_rd_uuid,        0, 0 },
         { 0 }
 };
 
-struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
         { 0 }
 };
 
 #endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(echo, lprocfs_module_vars, lprocfs_obd_vars)
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
+.*.cmd
index b9addf1..ed4ca1e 100644 (file)
@@ -6,6 +6,7 @@
 MODULE = obdfilter
 modulefs_DATA = obdfilter.o
 EXTRA_PROGRAMS = obdfilter
-obdfilter_SOURCES = filter.c lproc_obdfilter.c
+obdfilter_SOURCES = filter.c filter_io.c filter_log.c filter_san.c \
+lproc_obdfilter.c filter_internal.h
 
 include $(top_srcdir)/Rules
index 6f2d96c..b6c1bd9 100644 (file)
  *            threaded operation on the OST.
  */
 
-#define EXPORT_SYMTAB
 #define DEBUG_SUBSYSTEM S_FILTER
 
 #include <linux/config.h>
 #include <linux/module.h>
-#include <linux/pagemap.h> // XXX kill me soon
 #include <linux/fs.h>
 #include <linux/dcache.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_dlm.h>
-#include <linux/obd_filter.h>
 #include <linux/init.h>
-#include <linux/random.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lprocfs_status.h>
 #include <linux/version.h>
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-#include <linux/mount.h>
+# include <linux/mount.h>
+# include <linux/buffer_head.h>
 #endif
 
-enum {
-        LPROC_FILTER_READ_BYTES = 0,
-        LPROC_FILTER_WRITE_BYTES = 1,
-        LPROC_FILTER_LAST,
-};
+#include <linux/obd_class.h>
+#include <linux/lustre_dlm.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/lprocfs_status.h>
+#include <linux/lustre_log.h>
+#include <linux/lustre_commit_confd.h>
+
+#include "filter_internal.h"
 
 #define S_SHIFT 12
 static char *obd_type_by_mode[S_IFMT >> S_SHIFT] = {
@@ -132,19 +128,79 @@ static void filter_ffd_destroy(struct filter_file_data *ffd)
         filter_ffd_put(ffd);
 }
 
-static void filter_commit_cb(struct obd_device *obd, __u64 transno, int error)
+static void filter_commit_cb(struct obd_device *obd, __u64 transno,
+                             void *cb_data, int error)
 {
         obd_transno_commit_cb(obd, transno, error);
 }
-/* Assumes caller has already pushed us into the kernel context. */
-int filter_finish_transno(struct obd_export *export, void *handle,
-                          struct obd_trans_info *oti, int rc)
+
+static int filter_client_log_cancel(struct lustre_handle *conn,
+                                    struct lov_stripe_md *lsm, int count,
+                                    struct llog_cookie *cookies, int flags)
 {
-        __u64 last_rcvd;
-        struct obd_device *obd = export->exp_obd;
+        struct obd_device *obd = class_conn2obd(conn);
+        struct llog_commit_data *llcd;
         struct filter_obd *filter = &obd->u.filter;
-        struct filter_export_data *fed = &export->exp_filter_data;
+        int rc = 0;
+        ENTRY;
+
+        if (count == 0 || cookies == NULL) {
+                down(&filter->fo_sem);
+                if (filter->fo_llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW))
+                        GOTO(out, rc);
+
+                llcd = filter->fo_llcd;
+                GOTO(send_now, rc);
+        }
+
+        down(&filter->fo_sem);
+        llcd = filter->fo_llcd;
+        if (llcd == NULL) {
+                llcd = llcd_grab();
+                if (llcd == NULL) {
+                        CERROR("couldn't get an llcd - dropped "LPX64":%x+%u\n",
+                               cookies->lgc_lgl.lgl_oid,
+                               cookies->lgc_lgl.lgl_ogen, cookies->lgc_index);
+                        GOTO(out, rc = -ENOMEM);
+                }
+                llcd->llcd_import = filter->fo_mdc_imp;
+                filter->fo_llcd = llcd;
+        }
+
+        memcpy(llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies,
+               sizeof(*cookies));
+        llcd->llcd_cookiebytes += sizeof(*cookies);
+
+        GOTO(send_now, rc);
+send_now:
+        if ((PAGE_SIZE - llcd->llcd_cookiebytes < sizeof(*cookies) ||
+             flags & OBD_LLOG_FL_SENDNOW)) {
+                filter->fo_llcd = NULL;
+                llcd_send(llcd);
+        }
+out:
+        up(&filter->fo_sem);
+
+        return rc;
+}
+
+/* When this (destroy) operation is committed, return the cancel cookie */
+static void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
+                                     void *cb_data, int error)
+{
+        filter_client_log_cancel(&obd->u.filter.fo_mdc_conn, NULL, 1,
+                                 cb_data, OBD_LLOG_FL_SENDNOW);
+        OBD_FREE(cb_data, sizeof(struct llog_cookie));
+}
+
+/* Assumes caller has already pushed us into the kernel context. */
+int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti,
+                          int rc)
+{
+        struct filter_obd *filter = &exp->exp_obd->u.filter;
+        struct filter_export_data *fed = &exp->exp_filter_data;
         struct filter_client_data *fcd = fed->fed_fcd;
+        __u64 last_rcvd;
         loff_t off;
         ssize_t written;
 
@@ -152,14 +208,14 @@ int filter_finish_transno(struct obd_export *export, void *handle,
         if (rc)
                 RETURN(rc);
 
-        if (!obd->obd_replayable)
+        if (!exp->exp_obd->obd_replayable)
                 RETURN(rc);
 
         /* we don't allocate new transnos for replayed requests */
-        if (oti && oti->oti_transno == 0) {
+        if (oti != NULL && oti->oti_transno == 0) {
                 spin_lock(&filter->fo_translock);
-                last_rcvd = le64_to_cpu(filter->fo_fsd->fsd_last_rcvd) + 1;
-                filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd);
+                last_rcvd = le64_to_cpu(filter->fo_fsd->fsd_last_transno) + 1;
+                filter->fo_fsd->fsd_last_transno = cpu_to_le64(last_rcvd);
                 spin_unlock(&filter->fo_translock);
                 oti->oti_transno = last_rcvd;
                 fcd->fcd_last_rcvd = cpu_to_le64(last_rcvd);
@@ -169,27 +225,28 @@ int filter_finish_transno(struct obd_export *export, void *handle,
                 fcd->fcd_last_xid = 0;
 
                 off = fed->fed_lr_off;
-                fsfilt_set_last_rcvd(obd, last_rcvd, handle, filter_commit_cb);
-                written = lustre_fwrite(filter->fo_rcvd_filp, (char *)fcd, 
-                                        sizeof(*fcd), &off);
+                fsfilt_set_last_rcvd(exp->exp_obd, last_rcvd, oti->oti_handle,
+                                     filter_commit_cb, NULL);
+                written = fsfilt_write_record(exp->exp_obd,
+                                              filter->fo_rcvd_filp, (char *)fcd,
+                                              sizeof(*fcd), &off);
                 CDEBUG(D_HA, "wrote trans #"LPD64" for client %s at #%d: "
-                       "written = "LPSZ"\n", last_rcvd, fcd->fcd_uuid, 
+                       "written = "LPSZ"\n", last_rcvd, fcd->fcd_uuid,
                        fed->fed_lr_idx, written);
 
                 if (written == sizeof(*fcd))
                         RETURN(0);
-                CERROR("error writing to last_rcvd file: rc = %d\n", 
+                CERROR("error writing to %s: rc = %d\n", LAST_RCVD,
                        (int)written);
                 if (written >= 0)
-                        RETURN(-EIO);
-
+                        RETURN(-ENOSPC);
                 RETURN(written);
-        }                 
+        }
 
         RETURN(0);
 }
 
-static inline void f_dput(struct dentry *dentry)
+void f_dput(struct dentry *dentry)
 {
         /* Can't go inside filter_ddelete because it can block */
         CDEBUG(D_INODE, "putting %s: %p, count = %d\n",
@@ -207,26 +264,19 @@ static void filter_drelease(struct dentry *dentry)
 }
 
 struct dentry_operations filter_dops = {
-        .d_release = filter_drelease,
+        d_release: filter_drelease,
 };
 
-#define LAST_RCVD "last_rcvd"
-#define INIT_OBJID 2
-
-/* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
-#define FILTER_LR_MAX_CLIENTS (PAGE_SIZE * 8)
-#define FILTER_LR_MAX_CLIENT_WORDS (FILTER_LR_MAX_CLIENTS/sizeof(unsigned long))
-
 /* Add client data to the FILTER.  We use a bitmap to locate a free space
  * in the last_rcvd file if cl_idx is -1 (i.e. a new client).
  * Otherwise, we have just read the data from the last_rcvd file and
- * we know its offset.
- */
-int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
-                      struct filter_export_data *fed, int cl_idx)
+ * we know its offset. */
+static int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
+                             struct filter_export_data *fed, int cl_idx)
 {
         unsigned long *bitmap = filter->fo_last_rcvd_slots;
         int new_client = (cl_idx == -1);
+        ENTRY;
 
         LASSERT(bitmap != NULL);
 
@@ -242,7 +292,7 @@ int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
         repeat:
                 if (cl_idx >= FILTER_LR_MAX_CLIENTS) {
                         CERROR("no client slots - fix FILTER_LR_MAX_CLIENTS\n");
-                        return -ENOMEM;
+                        RETURN(-ENOMEM);
                 }
                 if (test_and_set_bit(cl_idx, bitmap)) {
                         CERROR("FILTER client %d: found bit is set in bitmap\n",
@@ -270,23 +320,23 @@ int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
         if (new_client) {
                 struct obd_run_ctxt saved;
                 loff_t off = fed->fed_lr_off;
-                ssize_t written;
+                int written;
                 void *handle;
 
                 CDEBUG(D_INFO, "writing client fcd at idx %u (%llu) (len %u)\n",
                        fed->fed_lr_idx,off,(unsigned int)sizeof(*fed->fed_fcd));
 
                 push_ctxt(&saved, &filter->fo_ctxt, NULL);
-                /* Transaction eeded to fix for bug 1403 */
+                /* Transaction needed to fix bug 1403 */
                 handle = fsfilt_start(obd,
                                       filter->fo_rcvd_filp->f_dentry->d_inode,
-                                      FSFILT_OP_SETATTR);
+                                      FSFILT_OP_SETATTR, NULL);
                 if (IS_ERR(handle)) {
                         written = PTR_ERR(handle);
                         CERROR("unable to start transaction: rc %d\n",
                                (int)written);
                 } else {
-                        written = lustre_fwrite(filter->fo_rcvd_filp,
+                        written = fsfilt_write_record(obd, filter->fo_rcvd_filp,
                                                 (char *)fed->fed_fcd,
                                                 sizeof(*fed->fed_fcd), &off);
                         fsfilt_commit(obd,
@@ -296,32 +346,35 @@ int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
                 pop_ctxt(&saved, &filter->fo_ctxt, NULL);
 
                 if (written != sizeof(*fed->fed_fcd)) {
+                        CERROR("error writing %s client idx %u: rc %d\n",
+                               LAST_RCVD, fed->fed_lr_idx, written);
                         if (written < 0)
                                 RETURN(written);
-                        RETURN(-EIO);
+                        RETURN(-ENOSPC);
                 }
         }
-        return 0;
+        RETURN(0);
 }
 
-int filter_client_free(struct obd_export *exp, int failover)
+static int filter_client_free(struct obd_export *exp, int flags)
 {
         struct filter_export_data *fed = &exp->exp_filter_data;
         struct filter_obd *filter = &exp->exp_obd->u.filter;
+        struct obd_device *obd = exp->exp_obd;
         struct filter_client_data zero_fcd;
         struct obd_run_ctxt saved;
         int written;
         loff_t off;
         ENTRY;
 
-        if (!fed->fed_fcd)
+        if (fed->fed_fcd == NULL)
                 RETURN(0);
 
-        if (failover != 0)
+        if (flags & OBD_OPT_FAILOVER)
                 GOTO(free, 0);
 
         /* XXX if fcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
-        if (!strcmp(fed->fed_fcd->fcd_uuid, "OBD_CLASS_UUID"))
+        if (strcmp(fed->fed_fcd->fcd_uuid, "OBD_CLASS_UUID") == 0)
                 GOTO(free, 0);
 
         LASSERT(filter->fo_last_rcvd_slots != NULL);
@@ -339,8 +392,9 @@ int filter_client_free(struct obd_export *exp, int failover)
 
         memset(&zero_fcd, 0, sizeof zero_fcd);
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
-        written = lustre_fwrite(filter->fo_rcvd_filp, (const char *)&zero_fcd,
-                                sizeof(zero_fcd), &off);
+        written = fsfilt_write_record(obd, filter->fo_rcvd_filp,
+                                      (char *)&zero_fcd, sizeof(zero_fcd),
+                                      &off);
 
         /* XXX: this write gets lost sometimes, unless this sync is here. */
         if (written > 0)
@@ -374,29 +428,30 @@ static int filter_free_server_data(struct filter_obd *filter)
         return 0;
 }
 
-
 /* assumes caller is already in kernel ctxt */
-static int filter_update_server_data(struct file *filp,
-                                     struct filter_server_data *fsd)
+int filter_update_server_data(struct obd_device *obd,
+                              struct file *filp, struct filter_server_data *fsd)
 {
         loff_t off = 0;
         int rc;
+        ENTRY;
 
         CDEBUG(D_INODE, "server uuid      : %s\n", fsd->fsd_uuid);
         CDEBUG(D_INODE, "server last_objid: "LPU64"\n",
                le64_to_cpu(fsd->fsd_last_objid));
         CDEBUG(D_INODE, "server last_rcvd : "LPU64"\n",
-               le64_to_cpu(fsd->fsd_last_rcvd));
+               le64_to_cpu(fsd->fsd_last_transno));
         CDEBUG(D_INODE, "server last_mount: "LPU64"\n",
                le64_to_cpu(fsd->fsd_mount_count));
 
-        rc = lustre_fwrite(filp, (char *)fsd, sizeof(*fsd), &off);
-        if (rc != sizeof(*fsd)) {
-                CDEBUG(D_INODE, "error writing filter_server_data: rc = %d\n",
-                       rc);
-                RETURN(-EIO);
-        }
-        RETURN(0);
+        rc = fsfilt_write_record(obd, filp, (char *)fsd, sizeof(*fsd), &off);
+        if (rc == sizeof(*fsd))
+                RETURN(0);
+
+        CDEBUG(D_INODE, "error writing filter_server_data: rc = %d\n", rc);
+        if (rc >= 0)
+                RETURN(-ENOSPC);
+        RETURN(rc);
 }
 
 /* assumes caller has already in kernel ctxt */
@@ -432,11 +487,11 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
         }
 
         if (last_rcvd_size == 0) {
-                CERROR("%s: initializing new last_rcvd\n", obd->obd_name);
+                CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD);
 
                 memcpy(fsd->fsd_uuid, obd->obd_uuid.uuid,sizeof(fsd->fsd_uuid));
                 fsd->fsd_last_objid = cpu_to_le64(init_lastobjid);
-                fsd->fsd_last_rcvd = 0;
+                fsd->fsd_last_transno = 0;
                 mount_count = fsd->fsd_mount_count = 0;
                 fsd->fsd_server_size = cpu_to_le32(FILTER_LR_SERVER_SIZE);
                 fsd->fsd_client_start = cpu_to_le32(FILTER_LR_CLIENT_START);
@@ -444,15 +499,18 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
                 fsd->fsd_subdir_count = cpu_to_le16(FILTER_SUBDIR_COUNT);
                 filter->fo_subdir_count = FILTER_SUBDIR_COUNT;
         } else {
-                ssize_t retval = lustre_fread(filp, (char *)fsd, sizeof(*fsd),
-                                              &off);
+                int retval = fsfilt_read_record(obd, filp, (char *)fsd,
+                                                sizeof(*fsd), &off);
                 if (retval != sizeof(*fsd)) {
-                        CDEBUG(D_INODE,"OBD filter: error reading %s\n",
-                               LAST_RCVD);
+                        CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n",
+                               LAST_RCVD, retval);
                         GOTO(err_fsd, rc = -EIO);
                 }
                 mount_count = le64_to_cpu(fsd->fsd_mount_count);
                 filter->fo_subdir_count = le16_to_cpu(fsd->fsd_subdir_count);
+                fsd->fsd_last_objid =
+                        cpu_to_le64(le64_to_cpu(fsd->fsd_last_objid) +
+                                    FILTER_SKIP_OBJID);
         }
 
         if (fsd->fsd_feature_incompat) {
@@ -470,7 +528,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
         CDEBUG(D_INODE, "%s: server last_objid: "LPU64"\n",
                obd->obd_name, le64_to_cpu(fsd->fsd_last_objid));
         CDEBUG(D_INODE, "%s: server last_rcvd : "LPU64"\n",
-               obd->obd_name, le64_to_cpu(fsd->fsd_last_rcvd));
+               obd->obd_name, le64_to_cpu(fsd->fsd_last_transno));
         CDEBUG(D_INODE, "%s: server last_mount: "LPU64"\n",
                obd->obd_name, mount_count);
         CDEBUG(D_INODE, "%s: server data size: %u\n",
@@ -482,13 +540,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
         CDEBUG(D_INODE, "%s: server subdir_count: %u\n",
                obd->obd_name, le16_to_cpu(fsd->fsd_subdir_count));
 
-        /*
-         * When we do a clean FILTER shutdown, we save the last_rcvd into
-         * the header.  If we find clients with higher last_rcvd values
-         * then those clients may need recovery done.
-         */
         if (!obd->obd_replayable) {
-                CERROR("%s: recovery support OFF\n", obd->obd_name);
+                CWARN("%s: recovery support OFF\n", obd->obd_name);
                 GOTO(out, rc = 0);
         }
 
@@ -507,7 +560,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
                  */
                 off = le32_to_cpu(fsd->fsd_client_start) +
                         cl_idx * le16_to_cpu(fsd->fsd_client_size);
-                rc = lustre_fread(filp, (char *)fcd, sizeof(*fcd), &off);
+                rc = fsfilt_read_record(obd, filp, (char *)fcd, sizeof(*fcd),
+                                        &off);
                 if (rc != sizeof(*fcd)) {
                         CERROR("error reading FILTER %s offset %d: rc = %d\n",
                                LAST_RCVD, cl_idx, rc);
@@ -534,7 +588,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
                         CERROR("RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64
                                " srv lr: "LPU64" mnt: "LPU64" last mount: "
                                LPU64"\n", fcd->fcd_uuid, cl_idx,
-                               last_rcvd, le64_to_cpu(fsd->fsd_last_rcvd),
+                               last_rcvd, le64_to_cpu(fsd->fsd_last_transno),
                                le64_to_cpu(fcd->fcd_mount_count), mount_count);
                         if (exp == NULL) {
                                 /* XXX this rc is ignored  */
@@ -563,15 +617,16 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
                 CDEBUG(D_OTHER, "client at idx %d has last_rcvd = "LPU64"\n",
                        cl_idx, last_rcvd);
 
-                if (last_rcvd > le64_to_cpu(filter->fo_fsd->fsd_last_rcvd))
-                        filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd);
+                if (last_rcvd > le64_to_cpu(filter->fo_fsd->fsd_last_transno))
+                        filter->fo_fsd->fsd_last_transno=cpu_to_le64(last_rcvd);
 
                 obd->obd_last_committed =
-                        le64_to_cpu(filter->fo_fsd->fsd_last_rcvd);
+                        le64_to_cpu(filter->fo_fsd->fsd_last_transno);
+
                 if (obd->obd_recoverable_clients) {
                         CERROR("RECOVERY: %d recoverable clients, last_rcvd "
                                LPU64"\n", obd->obd_recoverable_clients,
-                               le64_to_cpu(filter->fo_fsd->fsd_last_rcvd));
+                               le64_to_cpu(filter->fo_fsd->fsd_last_transno));
                         obd->obd_next_recovery_transno =
                                 obd->obd_last_committed + 1;
                         obd->obd_recovering = 1;
@@ -585,8 +640,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
 out:
         fsd->fsd_mount_count = cpu_to_le64(mount_count + 1);
 
-        /* save it,so mount count and last_recvd is current */
-        rc = filter_update_server_data(filp, filter->fo_fsd);
+        /* save it, so mount count and last_transno is current */
+        rc = filter_update_server_data(obd, filp, filter->fo_fsd);
 
         RETURN(rc);
 
@@ -639,7 +694,7 @@ static int filter_prep(struct obd_device *obd)
                 filter->fo_dentry_O_mode[mode] = dentry;
         }
 
-        file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0700);
+        file = filp_open(LAST_RCVD, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
         if (!file || IS_ERR(file)) {
                 rc = PTR_ERR(file);
                 CERROR("OBD filter: cannot open/create %s: rc = %d\n",
@@ -663,8 +718,15 @@ static int filter_prep(struct obd_device *obd)
         filter->fo_fop = file->f_op;
         filter->fo_iop = inode->i_op;
         filter->fo_aops = inode->i_mapping->a_ops;
+#ifdef I_SKIP_PDFLUSH
+        /*
+         * we need this to protect from deadlock
+         * pdflush vs. lustre_fwrite()
+         */
+        inode->i_flags |= I_SKIP_PDFLUSH;
+#endif
 
-        rc = filter_init_server_data(obd, file, INIT_OBJID);
+        rc = filter_init_server_data(obd, file, FILTER_INIT_OBJID);
         if (rc) {
                 CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
                 GOTO(err_client, rc);
@@ -740,9 +802,10 @@ static void filter_post(struct obd_device *obd)
          * from lastobjid */
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
-        rc = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd);
+        rc = filter_update_server_data(obd, filter->fo_rcvd_filp,
+                                       filter->fo_fsd);
         if (rc)
-                CERROR("OBD filter: error writing lastobjid: rc = %ld\n", rc);
+                CERROR("error writing lastobjid: rc = %ld\n", rc);
 
 
         if (filter->fo_rcvd_filp) {
@@ -751,7 +814,7 @@ static void filter_post(struct obd_device *obd)
                 filp_close(filter->fo_rcvd_filp, 0);
                 filter->fo_rcvd_filp = NULL;
                 if (rc)
-                        CERROR("last_rcvd file won't closed rc = %ld\n", rc);
+                        CERROR("error closing %s: rc = %ld\n", LAST_RCVD, rc);
         }
 
         if (filter->fo_subdir_count) {
@@ -777,8 +840,7 @@ static void filter_post(struct obd_device *obd)
         pop_ctxt(&saved, &filter->fo_ctxt, NULL);
 }
 
-
-static __u64 filter_next_id(struct filter_obd *filter)
+__u64 filter_next_id(struct filter_obd *filter)
 {
         obd_id id;
         LASSERT(filter->fo_fsd != NULL);
@@ -792,8 +854,9 @@ static __u64 filter_next_id(struct filter_obd *filter)
 }
 
 /* direct cut-n-paste of mds_blocking_ast() */
-int filter_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
-                     void *data, int flag)
+static int filter_blocking_ast(struct ldlm_lock *lock,
+                               struct ldlm_lock_desc *desc,
+                               void *data, int flag)
 {
         int do_ast;
         ENTRY;
@@ -852,6 +915,7 @@ static int filter_lock_dentry(struct obd_device *obd, struct dentry *de,
         RETURN(rc == ELDLM_OK ? 0 : -ENOLCK);  /* XXX translate ldlm code */
 }
 
+/* We never dget the object parent, so DON'T dput it either */
 static void filter_parent_unlock(struct dentry *dparent,
                                  struct lustre_handle *lockh,
                                  ldlm_mode_t lock_mode)
@@ -860,8 +924,8 @@ static void filter_parent_unlock(struct dentry *dparent,
 }
 
 /* We never dget the object parent, so DON'T dput it either */
-static inline struct dentry *filter_parent(struct obd_device *obd,
-                                           obd_mode mode, obd_id objid)
+struct dentry *filter_parent(struct obd_device *obd, obd_mode mode,
+                             obd_id objid)
 {
         struct filter_obd *filter = &obd->u.filter;
 
@@ -873,10 +937,9 @@ static inline struct dentry *filter_parent(struct obd_device *obd,
 }
 
 /* We never dget the object parent, so DON'T dput it either */
-static inline struct dentry *filter_parent_lock(struct obd_device *obd,
-                                                obd_mode mode, obd_id objid,
-                                                ldlm_mode_t lock_mode,
-                                                struct lustre_handle *lockh)
+struct dentry *filter_parent_lock(struct obd_device *obd, obd_mode mode,
+                                  obd_id objid, ldlm_mode_t lock_mode,
+                                  struct lustre_handle *lockh)
 {
         unsigned long now = jiffies;
         struct dentry *de = filter_parent(obd, mode, objid);
@@ -886,7 +949,7 @@ static inline struct dentry *filter_parent_lock(struct obd_device *obd,
                 return de;
 
         rc = filter_lock_dentry(obd, de, lock_mode, lockh);
-        if (time_after(jiffies, now + 15*HZ))
+        if (time_after(jiffies, now + 15 * HZ))
                 CERROR("slow parent lock %lus\n", (jiffies - now) / HZ);
         return rc ? ERR_PTR(rc) : de;
 }
@@ -897,13 +960,11 @@ static inline struct dentry *filter_parent_lock(struct obd_device *obd,
  * appropriately for this operation (normally a write lock).  If
  * dir_dentry is NULL, we do a read lock while we do the lookup to
  * avoid races with create/destroy and such changing the directory
- * internal to the filesystem code.
- */
-static struct dentry *filter_fid2dentry(struct obd_device *obd,
-                                        struct dentry *dir_dentry,
-                                        obd_mode mode, obd_id id)
+ * internal to the filesystem code. */
+struct dentry *filter_fid2dentry(struct obd_device *obd,
+                                 struct dentry *dir_dentry,
+                                 obd_mode mode, obd_id id)
 {
-        struct super_block *sb = obd->u.filter.fo_sb;
         struct lustre_handle lockh;
         struct dentry *dparent = dir_dentry;
         struct dentry *dchild;
@@ -911,11 +972,6 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd,
         int len;
         ENTRY;
 
-        if (!sb || !sb->s_dev) {
-                CERROR("device not initialized.\n");
-                RETURN(ERR_PTR(-ENXIO));
-        }
-
         if (id == 0) {
                 CERROR("fatal: invalid object id 0\n");
                 LBUG();
@@ -923,7 +979,7 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd,
         }
 
         len = sprintf(name, LPU64, id);
-        if (!dir_dentry) {
+        if (dir_dentry == NULL) {
                 dparent = filter_parent_lock(obd, mode, id, LCK_PR, &lockh);
                 if (IS_ERR(dparent))
                         RETURN(dparent);
@@ -931,7 +987,7 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd,
         CDEBUG(D_INODE, "looking up object O/%*s/%s\n",
                dparent->d_name.len, dparent->d_name.name, name);
         dchild = ll_lookup_one_len(name, dparent, len);
-        if (!dir_dentry)
+        if (dir_dentry == NULL)
                 filter_parent_unlock(dparent, &lockh, LCK_PR);
         if (IS_ERR(dchild)) {
                 CERROR("child lookup error %ld\n", PTR_ERR(dchild));
@@ -947,13 +1003,12 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd,
 }
 
 static struct file *filter_obj_open(struct obd_export *export,
-                                    __u64 id, __u32 type,
-                                    ldlm_mode_t parent_mode,
+                                    struct obd_trans_info *oti,
+                                    __u64 id, __u32 type, int parent_mode,
                                     struct lustre_handle *parent_lockh)
 {
         struct obd_device *obd = export->exp_obd;
         struct filter_obd *filter = &obd->u.filter;
-        struct super_block *sb = filter->fo_sb;
         struct dentry *dchild = NULL, *dparent = NULL;
         struct filter_export_data *fed = &export->exp_filter_data;
         struct filter_dentry_data *fdd = NULL;
@@ -966,11 +1021,6 @@ static struct file *filter_obj_open(struct obd_export *export,
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
 
-        if (!sb || !sb->s_dev) {
-                CERROR("fatal: device not initialized.\n");
-                GOTO(cleanup, file = ERR_PTR(-ENXIO));
-        }
-
         if (!id) {
                 CERROR("fatal: invalid obdo "LPU64"\n", id);
                 GOTO(cleanup, file = ERR_PTR(-ESTALE));
@@ -1014,6 +1064,7 @@ static struct file *filter_obj_open(struct obd_export *export,
 
         if (dchild->d_inode == NULL) {
                 CERROR("opening non-existent object %s - O_CREAT?\n", name);
+                /* dput(dchild); call filter_create_internal here */
                 file = ERR_PTR(-ENOENT);
                 GOTO(cleanup, file);
         }
@@ -1083,9 +1134,8 @@ cleanup:
 }
 
 /* Caller must hold LCK_PW on parent and push us into kernel context.
- * Caller is also required to ensure that dchild->d_inode exists.
- */
-static int filter_destroy_internal(struct obd_device *obd,
+ * Caller is also required to ensure that dchild->d_inode exists. */
+static int filter_destroy_internal(struct obd_device *obd, obd_id objid,
                                    struct dentry *dparent,
                                    struct dentry *dchild)
 {
@@ -1099,6 +1149,39 @@ static int filter_destroy_internal(struct obd_device *obd,
                        inode->i_nlink, atomic_read(&inode->i_count));
         }
 
+        
+#if 0
+        /* Tell the clients that the object is gone now and that they should
+         * throw away any cached pages.  We don't need to wait until they're
+         * done, so just decref the lock right away and let ldlm_completion_ast
+         * clean up when it's all over. */
+        ldlm_cli_enqueue(..., LCK_PW, AST_INTENT_DESTROY, &lockh);
+        ldlm_lock_decref(&lockh, LCK_PW);
+#endif
+
+        if (0) {
+                struct lustre_handle lockh;
+                int flags = 0, rc;
+                struct ldlm_res_id res_id = { .name = { objid } };
+
+                /* This part is a wee bit iffy: we really only want to bust the
+                 * locks on our stripe, so that we don't end up bouncing
+                 * [0->EOF] locks around on each of the OSTs as the rest of the
+                 * destroys get processed.  Because we're only talking to
+                 * the local LDLM, though, we should only end up locking the 
+                 * whole of our stripe.  When bug 1425 (take all locks on OST
+                 * for stripe 0) is fixed, this code should be revisited. */
+                struct ldlm_extent extent = { 0, OBD_OBJECT_EOF };
+
+                rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+                                      res_id, LDLM_EXTENT, &extent,
+                                      sizeof(extent), LCK_PW, &flags,
+                                      ldlm_completion_ast, filter_blocking_ast,
+                                      NULL, &lockh);
+                /* We only care about the side-effects, just drop the lock. */
+                ldlm_lock_decref(&lockh, LCK_PW);
+        }
+
         rc = vfs_unlink(dparent->d_inode, dchild);
 
         if (rc)
@@ -1113,8 +1196,7 @@ static int filter_destroy_internal(struct obd_device *obd,
 */
 static int filter_close_internal(struct obd_export *exp,
                                  struct filter_file_data *ffd,
-                                 struct obd_trans_info *oti,
-                                 int failover)
+                                 struct obd_trans_info *oti, int flags)
 {
         struct obd_device *obd = exp->exp_obd;
         struct filter_obd *filter = &obd->u.filter;
@@ -1128,13 +1210,14 @@ static int filter_close_internal(struct obd_export *exp,
         ENTRY;
 
         LASSERT(filp->private_data == ffd);
-        LASSERT(fdd);
+        LASSERT(fdd != NULL);
         LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
 
         rc = filp_close(filp, 0);
 
         if (atomic_dec_and_test(&fdd->fdd_open_count) &&
-            fdd->fdd_flags & FILTER_FLAG_DESTROY && !failover) {
+            (fdd->fdd_flags & FILTER_FLAG_DESTROY) &&
+            !(flags & OBD_OPT_FAILOVER)) {
                 void *handle;
 
                 push_ctxt(&saved, &filter->fo_ctxt, NULL);
@@ -1148,15 +1231,27 @@ static int filter_close_internal(struct obd_export *exp,
                 cleanup_phase = 2;
 
                 handle = fsfilt_start(obd, dparent->d_inode,
-                                      FSFILT_OP_UNLINK);
+                                      FSFILT_OP_UNLINK_LOG, oti);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
 
+                if (oti != NULL) {
+                        if (oti->oti_handle == NULL)
+                                oti->oti_handle = handle;
+                        else
+                                LASSERT(oti->oti_handle == handle);
+                }
+
+#ifdef ENABLE_ORPHANS
+                /* Remove orphan unlink record from log */
+                llog_cancel_records(filter->fo_catalog, 1, &fdd->fdd_cookie);
+#endif
                 /* XXX unlink from PENDING directory now too */
-                rc2 = filter_destroy_internal(obd, dparent, dchild);
+                rc2 = filter_destroy_internal(obd, fdd->fdd_objid, dparent,
+                                              dchild);
                 if (rc2 && !rc)
                         rc = rc2;
-                rc = filter_finish_transno(exp, handle, oti, rc);
+                rc = filter_finish_transno(exp, oti, rc);
                 rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0);
                 if (rc2) {
                         CERROR("error on commit, err = %d\n", rc2);
@@ -1189,14 +1284,12 @@ cleanup:
         RETURN(rc);
 }
 
-/* obd methods */
 /* mount the file system (secretly) */
-static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
-                               char *option)
+int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
+                        char *option)
 {
         struct obd_ioctl_data* data = buf;
         struct filter_obd *filter = &obd->u.filter;
-
         struct vfsmount *mnt;
         int rc = 0;
         ENTRY;
@@ -1208,7 +1301,8 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
         if (IS_ERR(obd->obd_fsops))
                 RETURN(PTR_ERR(obd->obd_fsops));
 
-        mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, option);
+        mnt = do_kern_mount(data->ioc_inlbuf2, MS_NOATIME | MS_NODIRATIME,
+                            data->ioc_inlbuf1, option);
         rc = PTR_ERR(mnt);
         if (IS_ERR(mnt))
                 GOTO(err_ops, rc);
@@ -1257,14 +1351,27 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
         spin_lock_init(&filter->fo_objidlock);
         INIT_LIST_HEAD(&filter->fo_export_list);
 
+        ptlrpc_init_client(MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL,
+                           "filter_mdc", &filter->fo_mdc_client);
+        sema_init(&filter->fo_sem, 1);
+
         obd->obd_namespace = ldlm_namespace_new("filter-tgt",
                                                 LDLM_NAMESPACE_SERVER);
-        if (!obd->obd_namespace)
+        if (obd->obd_namespace == NULL)
                 GOTO(err_post, rc = -ENOMEM);
 
         ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
                            "filter_ldlm_cb_client", &obd->obd_ldlm_client);
 
+        /* Create a non-replaying connection for recovery logging, so that
+         * we don't create a client entry for this local connection, and do
+         * not log or assign transaction numbers for logging operations. */
+#ifdef ENABLE_ORPHANS
+        filter->fo_catalog = filter_get_catalog(obd);
+        if (IS_ERR(filter->fo_catalog))
+                GOTO(err_post, rc = PTR_ERR(filter->fo_catalog));
+#endif
+
         RETURN(0);
 
 err_post:
@@ -1284,82 +1391,67 @@ static int filter_setup(struct obd_device *obd, obd_count len, void *buf)
         struct obd_ioctl_data* data = buf;
         char *option = NULL;
 
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        /* bug 1577: implement async-delete for 2.5 */
         if (!strcmp(data->ioc_inlbuf2, "ext3"))
                 option = "asyncdel";
+#endif
 
         return filter_common_setup(obd, len, buf, option);
 }
 
-/* sanobd setup methods - use a specific mount option */
-static int filter_san_setup(struct obd_device *obd, obd_count len, void *buf)
-{
-        struct obd_ioctl_data* data = buf;
-        char *option = NULL;
-
-        if (!data->ioc_inlbuf2)
-                RETURN(-EINVAL);
-
-        /* for extN/ext3 filesystem, we must mount it with 'writeback' mode */
-        if (!strcmp(data->ioc_inlbuf2, "extN"))
-                option = "data=writeback";
-        else if (!strcmp(data->ioc_inlbuf2, "ext3"))
-                option = "data=writeback,asyncdel";
-        else
-                LBUG(); /* just a reminder */
-
-        return filter_common_setup(obd, len, buf, option);
-}
-
-static int filter_cleanup(struct obd_device *obd, int force, int failover)
+static int filter_cleanup(struct obd_device *obd, int flags)
 {
-        struct super_block *sb;
+        struct filter_obd *filter = &obd->u.filter;
         ENTRY;
 
-        if (failover)
+        if (flags & OBD_OPT_FAILOVER)
                 CERROR("%s: shutting down for failover; client state will"
                        " be preserved.\n", obd->obd_name);
 
         if (!list_empty(&obd->obd_exports)) {
                 CERROR("%s: still has clients!\n", obd->obd_name);
-                class_disconnect_exports(obd, failover);
+                class_disconnect_exports(obd, flags);
                 if (!list_empty(&obd->obd_exports)) {
                         CERROR("still has exports after forced cleanup?\n");
                         RETURN(-EBUSY);
                 }
         }
 
+#ifdef ENABLE_ORPHANS
+        filter_put_catalog(filter->fo_catalog);
+#endif
+
         ldlm_namespace_free(obd->obd_namespace);
 
-        sb = obd->u.filter.fo_sb;
-        if (!sb)
+        if (filter->fo_sb == NULL)
                 RETURN(0);
 
         filter_post(obd);
 
-        shrink_dcache_parent(sb->s_root);
-        unlock_kernel();
+        shrink_dcache_parent(filter->fo_sb->s_root);
+        filter->fo_sb = 0;
 
-        if (atomic_read(&obd->u.filter.fo_vfsmnt->mnt_count) > 1){
+        if (atomic_read(&filter->fo_vfsmnt->mnt_count) > 1)
                 CERROR("%s: mount point busy, mnt_count: %d\n", obd->obd_name,
-                       atomic_read(&obd->u.filter.fo_vfsmnt->mnt_count));
-        }
-
-        mntput(obd->u.filter.fo_vfsmnt);
-        obd->u.filter.fo_sb = 0;
-/*        destroy_buffers(obd->u.filter.fo_sb->s_dev);*/
+                       atomic_read(&filter->fo_vfsmnt->mnt_count));
 
+        unlock_kernel();
+        mntput(filter->fo_vfsmnt);
+        //destroy_buffers(filter->fo_sb->s_dev);
+        filter->fo_sb = NULL;
         fsfilt_put_ops(obd->obd_fsops);
         lock_kernel();
 
         RETURN(0);
 }
 
-int filter_attach(struct obd_device *obd, obd_count len, void *data)
+static int filter_attach(struct obd_device *obd, obd_count len, void *data)
 {
         struct lprocfs_static_vars lvars;
         int rc;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(filter, &lvars);
         rc = lprocfs_obd_attach(obd, lvars.obd_vars);
         if (rc != 0)
                 return rc;
@@ -1376,7 +1468,7 @@ int filter_attach(struct obd_device *obd, obd_count len, void *data)
         return rc;
 }
 
-int filter_detach(struct obd_device *dev)
+static int filter_detach(struct obd_device *dev)
 {
         lprocfs_free_obd_stats(dev);
         return lprocfs_obd_detach(dev);
@@ -1391,17 +1483,16 @@ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd,
         struct filter_client_data *fcd;
         struct filter_obd *filter = &obd->u.filter;
         int rc;
-
         ENTRY;
 
-        if (!conn || !obd || !cluuid)
+        if (conn == NULL || obd == NULL || cluuid == NULL)
                 RETURN(-EINVAL);
 
         rc = class_connect(conn, obd, cluuid);
         if (rc)
                 RETURN(rc);
         exp = class_conn2export(conn);
-        LASSERT(exp);
+        LASSERT(exp != NULL);
 
         fed = &exp->exp_filter_data;
         class_export_put(exp);
@@ -1450,37 +1541,37 @@ static void filter_destroy_export(struct obd_export *exp)
                 list_del(&ffd->ffd_export_list);
                 spin_unlock(&fed->fed_lock);
 
-                CERROR("force close file %*s (hdl %p:"LPX64") on disconnect\n",
-                       ffd->ffd_file->f_dentry->d_name.len,
+                CDEBUG(D_INFO, "force close file %*s (hdl %p:"LPX64") on "
+                       "disconnect\n", ffd->ffd_file->f_dentry->d_name.len,
                        ffd->ffd_file->f_dentry->d_name.name,
                        ffd, ffd->ffd_handle.h_cookie);
 
-                filter_close_internal(exp, ffd, NULL, exp->exp_failover);
+                filter_close_internal(exp, ffd, NULL, exp->exp_flags);
                 spin_lock(&fed->fed_lock);
         }
         spin_unlock(&fed->fed_lock);
 
         if (exp->exp_obd->obd_replayable)
-                filter_client_free(exp, exp->exp_failover);
+                filter_client_free(exp, exp->exp_flags);
         EXIT;
 }
 
 /* also incredibly similar to mds_disconnect */
-static int filter_disconnect(struct lustre_handle *conn, int failover)
+static int filter_disconnect(struct lustre_handle *conn, int flags)
 {
         struct obd_export *exp = class_conn2export(conn);
+        unsigned long irqflags;
         int rc;
-        unsigned long flags;
         ENTRY;
 
         LASSERT(exp);
         ldlm_cancel_locks_for_export(exp);
 
-        spin_lock_irqsave(&exp->exp_lock, flags);
-        exp->exp_failover = failover;
-        spin_unlock_irqrestore(&exp->exp_lock, flags);
+        spin_lock_irqsave(&exp->exp_lock, irqflags);
+        exp->exp_flags = flags;
+        spin_unlock_irqrestore(&exp->exp_lock, irqflags);
 
-        rc = class_disconnect(conn, failover);
+        rc = class_disconnect(conn, flags);
 
         fsfilt_sync(exp->exp_obd, exp->exp_obd->u.filter.fo_sb);
         class_export_put(exp);
@@ -1488,29 +1579,8 @@ static int filter_disconnect(struct lustre_handle *conn, int failover)
         RETURN(rc);
 }
 
-static void filter_from_inode(struct obdo *oa, struct inode *inode, int valid)
-{
-        int type = oa->o_mode & S_IFMT;
-        ENTRY;
-
-        CDEBUG(D_INFO, "src inode %lu (%p), dst obdo "LPU64" valid 0x%08x\n",
-               inode->i_ino, inode, oa->o_id, valid);
-        /* Don't copy the inode number in place of the object ID */
-        obdo_from_inode(oa, inode, valid);
-        oa->o_mode &= ~S_IFMT;
-        oa->o_mode |= type;
-
-        if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
-                obd_rdev rdev = kdev_t_to_nr(inode->i_rdev);
-                oa->o_rdev = rdev;
-                oa->o_valid |= OBD_MD_FLRDEV;
-        }
-
-        EXIT;
-}
-
-static struct dentry *__filter_oa2dentry(struct lustre_handle *conn,
-                                         struct obdo *oa, char *what)
+struct dentry *__filter_oa2dentry(struct obd_device *obd,
+                                  struct obdo *oa, const char *what)
 {
         struct dentry *dchild = NULL;
 
@@ -1525,22 +1595,14 @@ static struct dentry *__filter_oa2dentry(struct lustre_handle *conn,
                         LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
                         filter_ffd_put(ffd);
 
-                        CDEBUG(D_INODE,
-                               "got child objid %*s: %p, count = %d\n",
-                               dchild->d_name.len, dchild->d_name.name,
+                        CDEBUG(D_INODE,"%s got child objid %*s: %p, count %d\n",
+                               what, dchild->d_name.len, dchild->d_name.name,
                                dchild, atomic_read(&dchild->d_count));
                 }
         }
 
-        if (!dchild) {
-                struct obd_device *obd = class_conn2obd(conn);
-
-                if (!obd) {
-                        CERROR("invalid client cookie "LPX64"\n", conn->cookie);
-                        RETURN(ERR_PTR(-EINVAL));
-                }
+        if (!dchild)
                 dchild = filter_fid2dentry(obd, NULL, oa->o_mode, oa->o_id);
-        }
 
         if (IS_ERR(dchild)) {
                 CERROR("%s error looking up object: "LPU64"\n", what, oa->o_id);
@@ -1556,20 +1618,27 @@ static struct dentry *__filter_oa2dentry(struct lustre_handle *conn,
         return dchild;
 }
 
-#define filter_oa2dentry(conn, oa) __filter_oa2dentry(conn, oa, __FUNCTION__)
-
 static int filter_getattr(struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md *md)
 {
         struct dentry *dentry = NULL;
+        struct obd_device *obd;
         int rc = 0;
         ENTRY;
 
-        dentry = filter_oa2dentry(conn, oa);
+        obd = class_conn2obd(conn);
+        if (obd == NULL) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
+                RETURN(-EINVAL);
+        }
+
+        dentry = filter_oa2dentry(obd, oa);
         if (IS_ERR(dentry))
                 RETURN(PTR_ERR(dentry));
 
-        filter_from_inode(oa, dentry->d_inode, oa->o_valid);
+        /* Limit the valid bits in the return data to what we actually use */
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS);
 
         f_dput(dentry);
         RETURN(rc);
@@ -1580,48 +1649,55 @@ static int filter_setattr(struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md *md, struct obd_trans_info *oti)
 {
         struct obd_run_ctxt saved;
-        struct obd_export *export = class_conn2export(conn);
-        struct obd_device *obd = class_conn2obd(conn);
-        struct filter_obd *filter = &obd->u.filter;
+        struct obd_export *exp;
+        struct filter_obd *filter;
         struct dentry *dentry;
         struct iattr iattr;
-        struct inode *inode;
-        void * handle;
+        void *handle;
         int rc, rc2;
         ENTRY;
 
-        dentry = filter_oa2dentry(conn, oa);
+        LASSERT(oti != NULL);
+        exp = class_conn2export(conn);
+        if (!exp) {
+                CERROR("invalid client cookie "LPX64"\n", conn->cookie);
+                RETURN(-EINVAL);
+        }
 
+        dentry = filter_oa2dentry(exp->exp_obd, oa);
         if (IS_ERR(dentry))
                 GOTO(out_exp, rc = PTR_ERR(dentry));
 
+        filter = &exp->exp_obd->u.filter;
+
         iattr_from_obdo(&iattr, oa, oa->o_valid);
-        iattr.ia_mode = (iattr.ia_mode & ~S_IFMT) | S_IFREG;
-        inode = dentry->d_inode;
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
         lock_kernel();
+
+        /* XXX this could be a rwsem instead, if filter_preprw played along */
         if (iattr.ia_valid & ATTR_SIZE)
-                down(&inode->i_sem);
+                down(&dentry->d_inode->i_sem);
 
-        handle = fsfilt_start(obd, dentry->d_inode, FSFILT_OP_SETATTR);
+        handle = fsfilt_start(exp->exp_obd, dentry->d_inode, FSFILT_OP_SETATTR,
+                              oti);
         if (IS_ERR(handle))
                 GOTO(out_unlock, rc = PTR_ERR(handle));
 
-        rc = fsfilt_setattr(obd, dentry, handle, &iattr, 1);
-        rc = filter_finish_transno(export, handle, oti, rc);
-        rc2 = fsfilt_commit(obd, dentry->d_inode, handle, 0);
+        rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1);
+        rc = filter_finish_transno(exp, oti, rc);
+        rc2 = fsfilt_commit(exp->exp_obd, dentry->d_inode, handle, 0);
         if (rc2) {
                 CERROR("error on commit, err = %d\n", rc2);
                 if (!rc)
                         rc = rc2;
         }
 
-        if (iattr.ia_valid & ATTR_SIZE) {
-                up(&inode->i_sem);
-                oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLCTIME | OBD_MD_FLMTIME;
-                obdo_from_inode(oa, inode, oa->o_valid);
-        }
+        if (iattr.ia_valid & ATTR_SIZE)
+                up(&dentry->d_inode->i_sem);
+
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS);
 
 out_unlock:
         unlock_kernel();
@@ -1629,7 +1705,7 @@ out_unlock:
 
         f_dput(dentry);
  out_exp:
-        class_export_put(export);
+        class_export_put(exp);
         RETURN(rc);
 }
 
@@ -1637,7 +1713,7 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa,
                        struct lov_stripe_md *ea, struct obd_trans_info *oti,
                        struct obd_client_handle *och)
 {
-        struct obd_export *export = NULL;
+        struct obd_export *exp;
         struct lustre_handle *handle;
         struct filter_file_data *ffd;
         struct file *filp;
@@ -1645,19 +1721,19 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa,
         int rc = 0;
         ENTRY;
 
-        export = class_conn2export(conn);
-        if (!export) {
-                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
-                       conn->cookie);
-                GOTO(out, rc = -EINVAL);
+        exp = class_conn2export(conn);
+        if (exp == NULL) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
+                RETURN(-EINVAL);
         }
 
-        filp = filter_obj_open(export, oa->o_id, oa->o_mode,
+        filp = filter_obj_open(exp, oti, oa->o_id, oa->o_mode,
                                LCK_PR, &parent_lockh);
         if (IS_ERR(filp))
                 GOTO(out, rc = PTR_ERR(filp));
 
-        filter_from_inode(oa, filp->f_dentry->d_inode, oa->o_valid);
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa, filp->f_dentry->d_inode, FILTER_VALID_FLAGS);
 
         ffd = filp->private_data;
         handle = obdo_handle(oa);
@@ -1665,7 +1741,7 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa,
         oa->o_valid |= OBD_MD_FLHANDLE;
 
 out:
-        class_export_put(export);
+        class_export_put(exp);
         if (!rc) {
                 memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh,
                        sizeof(parent_lockh));
@@ -1677,15 +1753,16 @@ out:
 static int filter_close(struct lustre_handle *conn, struct obdo *oa,
                         struct lov_stripe_md *ea, struct obd_trans_info *oti)
 {
-        struct obd_export *exp = class_conn2export(conn);
+        struct obd_export *exp;
         struct filter_file_data *ffd;
         struct filter_export_data *fed;
         int rc;
         ENTRY;
 
-        if (!exp) {
-                CDEBUG(D_IOCTL, "invalid client cookie"LPX64"\n", conn->cookie);
-                GOTO(out, rc = -EINVAL);
+        exp = class_conn2export(conn);
+        if (exp == NULL) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
+                RETURN(-EINVAL);
         }
 
         if (!(oa->o_valid & OBD_MD_FLHANDLE)) {
@@ -1705,6 +1782,9 @@ static int filter_close(struct lustre_handle *conn, struct obdo *oa,
         list_del(&ffd->ffd_export_list);
         spin_unlock(&fed->fed_lock);
 
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa,ffd->ffd_file->f_dentry->d_inode,FILTER_VALID_FLAGS);
+
         rc = filter_close_internal(exp, ffd, oti, 0);
         filter_ffd_put(ffd);
         GOTO(out, rc);
@@ -1717,24 +1797,25 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa,
                          struct lov_stripe_md **ea, struct obd_trans_info *oti)
 {
         struct obd_export *exp;
-        struct obd_device *obd = class_conn2obd(conn);
-        struct filter_obd *filter = &obd->u.filter;
+        struct obd_device *obd;
+        struct filter_obd *filter;
         struct obd_run_ctxt saved;
         struct lustre_handle parent_lockh;
         struct dentry *dparent;
+        struct ll_fid mds_fid = { .id = 0 };
         struct dentry *dchild = NULL;
-        struct iattr;
         void *handle;
         int err, rc, cleanup_phase;
         ENTRY;
 
-        if (!obd) {
-                CERROR("invalid client cookie "LPX64"\n", conn->cookie);
+        exp = class_conn2export(conn);
+        if (exp == NULL) {
+                CDEBUG(D_IOCTL,"invalid client cookie "LPX64"\n", conn->cookie);
                 RETURN(-EINVAL);
         }
 
-        exp = class_conn2export(conn);
-
+        obd = exp->exp_obd;
+        filter = &obd->u.filter;
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
  retry:
         oa->o_id = filter_next_id(filter);
@@ -1760,21 +1841,42 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa,
         }
 
         cleanup_phase = 2;
-        handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_CREATE);
+        handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_CREATE_LOG, oti);
         if (IS_ERR(handle))
                 GOTO(cleanup, rc = PTR_ERR(handle));
 
         rc = vfs_create(dparent->d_inode, dchild, oa->o_mode);
-        if (rc)
+        if (rc) {
                 CERROR("create failed rc = %d\n", rc);
+        } else if (oa->o_valid & (OBD_MD_FLCTIME|OBD_MD_FLMTIME|OBD_MD_FLSIZE)){
+                struct iattr attr;
 
-        rc = filter_finish_transno(exp, handle, oti, rc);
-        err = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd);
-        if (err) {
-                CERROR("unable to write lastobjid but file created\n");
-                if (!rc)
-                        rc = err;
+                iattr_from_obdo(&attr, oa, oa->o_valid);
+                rc = fsfilt_setattr(obd, dchild, handle, &attr, 1);
+                if (rc)
+                        CERROR("create setattr failed rc = %d\n", rc);
         }
+        rc = filter_finish_transno(exp, oti, rc);
+        err = filter_update_server_data(obd, filter->fo_rcvd_filp,
+                                        filter->fo_fsd);
+        if (err)
+                CERROR("unable to write lastobjid but file created\n");
+
+        /* Set flags for fields we have set in the inode struct */
+        if (!rc && mds_fid.id && (oa->o_valid & OBD_MD_FLCOOKIE)) {
+                err = filter_log_op_create(obd->u.filter.fo_catalog, &mds_fid,
+                                           dchild->d_inode->i_ino,
+                                           dchild->d_inode->i_generation,
+                                           oti->oti_logcookies);
+                if (err) {
+                        CERROR("error logging create record: rc %d\n", err);
+                        oa->o_valid = OBD_MD_FLID;
+                } else {
+                        oa->o_valid = OBD_MD_FLID | OBD_MD_FLCOOKIE;
+                }
+        } else
+                oa->o_valid = OBD_MD_FLID;
+
         err = fsfilt_commit(obd, dparent->d_inode, handle, 0);
         if (err) {
                 CERROR("error on commit, err = %d\n", err);
@@ -1786,9 +1888,7 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa,
                 GOTO(cleanup, rc);
 
         /* Set flags for fields we have set in the inode struct */
-        oa->o_valid = OBD_MD_FLID | OBD_MD_FLBLKSZ | OBD_MD_FLBLOCKS |
-                 OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME;
-        filter_from_inode(oa, dchild->d_inode, oa->o_valid);
+        obdo_from_inode(oa, dchild->d_inode, FILTER_VALID_FLAGS);
 
         EXIT;
 cleanup:
@@ -1819,24 +1919,25 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md *ea, struct obd_trans_info *oti)
 {
         struct obd_export *exp;
-        struct obd_device *obd = class_conn2obd(conn);
-        struct filter_obd *filter = &obd->u.filter;
-        struct dentry *dparent, *dchild = NULL;
+        struct obd_device *obd;
+        struct filter_obd *filter;
+        struct dentry *dchild = NULL, *dparent = NULL;
         struct filter_dentry_data *fdd;
         struct obd_run_ctxt saved;
         void *handle = NULL;
         struct lustre_handle parent_lockh;
+        struct llog_cookie *fcc = NULL;
         int rc, rc2, cleanup_phase = 0;
         ENTRY;
 
-        if (!obd) {
-                CERROR("invalid client cookie "LPX64"\n", conn->cookie);
+        exp = class_conn2export(conn);
+        if (exp == NULL) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
                 RETURN(-EINVAL);
         }
 
-        exp = class_conn2export(conn);
-
-        CDEBUG(D_INODE, "destroying objid "LPU64"\n", oa->o_id);
+        obd = exp->exp_obd;
+        filter = &obd->u.filter;
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
         dparent = filter_parent_lock(obd, oa->o_mode, oa->o_id,
@@ -1850,38 +1951,53 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
                 GOTO(cleanup, rc = -ENOENT);
         cleanup_phase = 2;
 
-        if (!dchild->d_inode) {
+        if (dchild->d_inode == NULL) {
                 CERROR("destroying non-existent object "LPU64"\n", oa->o_id);
                 GOTO(cleanup, rc = -ENOENT);
         }
-
-        handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK);
+        handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK_LOG, oti);
         if (IS_ERR(handle))
                 GOTO(cleanup, rc = PTR_ERR(handle));
         cleanup_phase = 3;
 
         fdd = dchild->d_fsdata;
-        if (fdd && atomic_read(&fdd->fdd_open_count)) {
-                LASSERT(fdd->fdd_magic = FILTER_DENTRY_MAGIC);
+
+        /* Our MDC connection is established by the MDS to us */
+        if ((oa->o_valid & OBD_MD_FLCOOKIE) && filter->fo_mdc_imp != NULL) {
+                OBD_ALLOC(fcc, sizeof(*fcc));
+                if (fcc != NULL)
+                        memcpy(fcc, obdo_logcookie(oa), sizeof(*fcc));
+        }
+
+        if (fdd != NULL && atomic_read(&fdd->fdd_open_count)) {
+                LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
                 if (!(fdd->fdd_flags & FILTER_FLAG_DESTROY)) {
                         fdd->fdd_flags |= FILTER_FLAG_DESTROY;
-                        /* XXX put into PENDING directory in case of crash */
+
+#ifdef ENABLE_ORPHANS
+                        filter_log_op_orphan(filter->fo_catalog, oa->o_id,
+                                             oa->o_generation,&fdd->fdd_cookie);
+#endif
                         CDEBUG(D_INODE,
                                "defer destroy of %dx open objid "LPU64"\n",
                                atomic_read(&fdd->fdd_open_count), oa->o_id);
-                } else
+                } else {
                         CDEBUG(D_INODE,
                                "repeat destroy of %dx open objid "LPU64"\n",
                                atomic_read(&fdd->fdd_open_count), oa->o_id);
+                }
                 GOTO(cleanup, rc = 0);
         }
 
-        rc = filter_destroy_internal(obd, dparent, dchild);
+        rc = filter_destroy_internal(obd, oa->o_id, dparent, dchild);
 
 cleanup:
         switch(cleanup_phase) {
         case 3:
-                rc = filter_finish_transno(exp, handle, oti, rc);
+                if (fcc != NULL)
+                        fsfilt_set_last_rcvd(obd, 0, oti->oti_handle,
+                                             filter_cancel_cookies_cb, fcc);
+                rc = filter_finish_transno(exp, oti, rc);
                 rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0);
                 if (rc2) {
                         CERROR("error on commit, err = %d\n", rc2);
@@ -1930,742 +2046,17 @@ static int filter_truncate(struct lustre_handle *conn, struct obdo *oa,
         RETURN(error);
 }
 
-static inline void lustre_put_page(struct page *page)
-{
-        page_cache_release(page);
-}
-
-static int filter_start_page_read(struct inode *inode, struct niobuf_local *lnb)
-{
-        struct address_space *mapping = inode->i_mapping;
-        struct page *page;
-        unsigned long index = lnb->offset >> PAGE_SHIFT;
-        int rc;
-
-        page = grab_cache_page(mapping, index); /* locked page */
-        if (IS_ERR(page))
-                return lnb->rc = PTR_ERR(page);
-
-        lnb->page = page;
-
-        if (inode->i_size < lnb->offset + lnb->len - 1)
-                lnb->rc = inode->i_size - lnb->offset;
-        else
-                lnb->rc = lnb->len;
-
-        if (PageUptodate(page)) {
-                unlock_page(page);
-                return 0;
-        }
-
-        rc = mapping->a_ops->readpage(NULL, page);
-        if (rc < 0) {
-                CERROR("page index %lu, rc = %d\n", index, rc);
-                lnb->page = NULL;
-                lustre_put_page(page);
-                return lnb->rc = rc;
-        }
-
-        return 0;
-}
-
-static int filter_finish_page_read(struct niobuf_local *lnb)
-{
-        if (lnb->page == NULL)
-                return 0;
-
-        if (PageUptodate(lnb->page))
-                return 0;
-
-        wait_on_page(lnb->page);
-        if (!PageUptodate(lnb->page)) {
-                CERROR("page index %lu/offset "LPX64" not uptodate\n",
-                       lnb->page->index, lnb->offset);
-                GOTO(err_page, lnb->rc = -EIO);
-        }
-        if (PageError(lnb->page)) {
-                CERROR("page index %lu/offset "LPX64" has error\n",
-                       lnb->page->index, lnb->offset);
-                GOTO(err_page, lnb->rc = -EIO);
-        }
-
-        return 0;
-
-err_page:
-        lustre_put_page(lnb->page);
-        lnb->page = NULL;
-        return lnb->rc;
-}
-
-static struct page *lustre_get_page_write(struct inode *inode,
-                                          unsigned long index)
-{
-        struct address_space *mapping = inode->i_mapping;
-        struct page *page;
-        int rc;
-
-        page = grab_cache_page(mapping, index); /* locked page */
-
-        if (!IS_ERR(page)) {
-                /* Note: Called with "O" and "PAGE_SIZE" this is essentially
-                 * a no-op for most filesystems, because we write the whole
-                 * page.  For partial-page I/O this will read in the page.
-                 */
-                rc = mapping->a_ops->prepare_write(NULL, page, 0, PAGE_SIZE);
-                if (rc) {
-                        CERROR("page index %lu, rc = %d\n", index, rc);
-                        if (rc != -ENOSPC)
-                                LBUG();
-                        GOTO(err_unlock, rc);
-                }
-                /* XXX not sure if we need this if we are overwriting page */
-                if (PageError(page)) {
-                        CERROR("error on page index %lu, rc = %d\n", index, rc);
-                        LBUG();
-                        GOTO(err_unlock, rc = -EIO);
-                }
-        }
-        return page;
-
-err_unlock:
-        unlock_page(page);
-        lustre_put_page(page);
-        return ERR_PTR(rc);
-}
-
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-int waitfor_one_page(struct page *page)
-{
-        wait_on_page_locked(page);
-        return 0;
-}
-#endif
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-/* We should only change the file mtime (and not the ctime, like
- * update_inode_times() in generic_file_write()) when we only change data.
- */
-static inline void inode_update_time(struct inode *inode, int ctime_too)
-{
-        time_t now = CURRENT_TIME;
-        if (inode->i_mtime == now && (!ctime_too || inode->i_ctime == now))
-                return;
-        inode->i_mtime = now;
-        if (ctime_too)
-                inode->i_ctime = now;
-        mark_inode_dirty_sync(inode);
-}
-#endif
-
-static int lustre_commit_write(struct niobuf_local *lnb)
-{
-        struct page *page = lnb->page;
-        unsigned from = lnb->offset & ~PAGE_MASK;
-        unsigned to = from + lnb->len;
-        struct inode *inode = page->mapping->host;
-        int err;
-
-        LASSERT(to <= PAGE_SIZE);
-        err = page->mapping->a_ops->commit_write(NULL, page, from, to);
-        if (!err && IS_SYNC(inode))
-                err = waitfor_one_page(page);
-        //SetPageUptodate(page); // the client commit_write will do this
-
-        SetPageReferenced(page);
-        unlock_page(page);
-        lustre_put_page(page);
-        return err;
-}
-
-int filter_get_page_write(struct inode *inode, struct niobuf_local *lnb,
-                          int *pglocked)
-{
-        unsigned long index = lnb->offset >> PAGE_SHIFT;
-        struct address_space *mapping = inode->i_mapping;
-        struct page *page;
-        int rc;
-
-        //ASSERT_PAGE_INDEX(index, GOTO(err, rc = -EINVAL));
-        if (*pglocked)
-                page = grab_cache_page_nowait(mapping, index); /* locked page */
-        else
-                page = grab_cache_page(mapping, index); /* locked page */
-
-
-        /* This page is currently locked, so get a temporary page instead. */
-        if (!page) {
-                CDEBUG(D_ERROR,"ino %lu page %ld locked\n", inode->i_ino,index);
-                page = alloc_pages(GFP_KERNEL, 0); /* locked page */
-                if (!page) {
-                        CERROR("no memory for a temp page\n");
-                        GOTO(err, rc = -ENOMEM);
-                }
-                page->index = index;
-                lnb->page = page;
-                lnb->flags |= N_LOCAL_TEMP_PAGE;
-        } else if (!IS_ERR(page)) {
-                (*pglocked)++;
-
-                rc = mapping->a_ops->prepare_write(NULL, page,
-                                                   lnb->offset & ~PAGE_MASK,
-                                                   lnb->len);
-                if (rc) {
-                        if (rc != -ENOSPC)
-                                CERROR("page index %lu, rc = %d\n", index, rc);
-                        GOTO(err_unlock, rc);
-                }
-                /* XXX not sure if we need this if we are overwriting page */
-                if (PageError(page)) {
-                        CERROR("error on page index %lu, rc = %d\n", index, rc);
-                        LBUG();
-                        GOTO(err_unlock, rc = -EIO);
-                }
-                lnb->page = page;
-        }
-
-        return 0;
-
-err_unlock:
-        unlock_page(page);
-        lustre_put_page(page);
-err:
-        return lnb->rc = rc;
-}
-
-/*
- * We need to balance prepare_write() calls with commit_write() calls.
- * If the page has been prepared, but we have no data for it, we don't
- * want to overwrite valid data on disk, but we still need to zero out
- * data for space which was newly allocated.  Like part of what happens
- * in __block_prepare_write() for newly allocated blocks.
- *
- * XXX currently __block_prepare_write() creates buffers for all the
- *     pages, and the filesystems mark these buffers as BH_New if they
- *     were newly allocated from disk. We use the BH_New flag similarly.
- */
-static int filter_commit_write(struct niobuf_local *lnb, int err)
-{
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        if (err) {
-                unsigned block_start, block_end;
-                struct buffer_head *bh, *head = lnb->page->buffers;
-                unsigned blocksize = head->b_size;
-
-                /* debugging: just seeing if this ever happens */
-                CDEBUG(err == -ENOSPC ? D_INODE : D_ERROR,
-                       "called for ino %lu:%lu on err %d\n",
-                       lnb->page->mapping->host->i_ino, lnb->page->index, err);
-
-                /* Currently one buffer per page, but in the future... */
-                for (bh = head, block_start = 0; bh != head || !block_start;
-                     block_start = block_end, bh = bh->b_this_page) {
-                        block_end = block_start + blocksize;
-                        if (buffer_new(bh)) {
-                                memset(kmap(lnb->page) + block_start, 0,
-                                       blocksize);
-                                kunmap(lnb->page);
-                        }
-                }
-        }
-#endif
-        return lustre_commit_write(lnb);
-}
-
-static int filter_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
-                         int objcount, struct obd_ioobj *obj,
-                         int niocount, struct niobuf_remote *nb,
-                         struct niobuf_local *res, void **desc_private,
-                         struct obd_trans_info *oti)
-{
-        struct obd_run_ctxt saved;
-        struct obd_device *obd;
-        struct obd_ioobj *o;
-        struct niobuf_remote *rnb;
-        struct niobuf_local *lnb;
-        struct fsfilt_objinfo *fso;
-        struct dentry *dentry;
-        struct inode *inode;
-        int pglocked = 0, rc = 0, i, j, tot_bytes = 0;
-        unsigned long now = jiffies;
-        ENTRY;
-
-        memset(res, 0, niocount * sizeof(*res));
-
-        obd = exp->exp_obd;
-        if (obd == NULL)
-                RETURN(-EINVAL);
-
-        // theoretically we support multi-obj BRW RPCs, but until then...
-        LASSERT(objcount == 1);
-
-        OBD_ALLOC(fso, objcount * sizeof(*fso));
-        if (!fso)
-                RETURN(-ENOMEM);
-
-        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-
-        for (i = 0, o = obj; i < objcount; i++, o++) {
-                struct filter_dentry_data *fdd;
-
-                LASSERT(o->ioo_bufcnt);
-
-                dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id);
-
-                if (IS_ERR(dentry))
-                        GOTO(out_objinfo, rc = PTR_ERR(dentry));
-
-                fso[i].fso_dentry = dentry;
-                fso[i].fso_bufcnt = o->ioo_bufcnt;
-
-                if (!dentry->d_inode) {
-                        CERROR("trying to BRW to non-existent file "LPU64"\n",
-                               o->ioo_id);
-                        f_dput(dentry);
-                        GOTO(out_objinfo, rc = -ENOENT);
-                }
-
-                /* If we ever start to support mutli-object BRW RPCs, we will
-                 * need to get locks on mulitple inodes (in order) or use the
-                 * DLM to do the locking for us (and use the same locking in
-                 * filter_setattr() for truncate).  That isn't all, because
-                 * there still exists the possibility of a truncate starting
-                 * a new transaction while holding the ext3 rwsem = write
-                 * while some writes (which have started their transactions
-                 * here) blocking on the ext3 rwsem = read => lock inversion.
-                 *
-                 * The handling gets very ugly when dealing with locked pages.
-                 * It may be easier to just get rid of the locked page code
-                 * (which has problems of its own) and either discover we do
-                 * not need it anymore (i.e. it was a symptom of another bug)
-                 * or ensure we get the page locks in an appropriate order.
-                 */
-                if (cmd & OBD_BRW_WRITE)
-                        down(&dentry->d_inode->i_sem);
-                fdd = dentry->d_fsdata;
-                if (!fdd || !atomic_read(&fdd->fdd_open_count))
-                        CDEBUG(D_PAGE, "I/O to unopened object "LPU64"\n",
-                               o->ioo_id);
-        }
-
-        if (time_after(jiffies, now + 15*HZ))
-                CERROR("slow prep setup %lus\n", (jiffies - now) / HZ);
-
-        if (cmd & OBD_BRW_WRITE) {
-                *desc_private = fsfilt_brw_start(obd, objcount, fso,
-                                                 niocount, nb);
-                if (IS_ERR(*desc_private)) {
-                        rc = PTR_ERR(*desc_private);
-                        CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
-                               "error starting transaction: rc = %d\n", rc);
-                        *desc_private = NULL;
-                        GOTO(out_objinfo, rc);
-                }
-        }
-
-        for (i = 0, o = obj, rnb = nb, lnb = res; i < objcount; i++, o++) {
-                dentry = fso[i].fso_dentry;
-                inode = dentry->d_inode;
-
-                for (j = 0; j < o->ioo_bufcnt; j++, rnb++, lnb++) {
-                        if (j == 0)
-                                lnb->dentry = dentry;
-                        else
-                                lnb->dentry = dget(dentry);
-
-                        lnb->offset = rnb->offset;
-                        lnb->len    = rnb->len;
-                        lnb->flags  = rnb->flags;
-                        lnb->start  = jiffies;
-
-                        if (cmd & OBD_BRW_WRITE) {
-                                rc = filter_get_page_write(inode,lnb,&pglocked);
-                                if (rc)
-                                        up(&dentry->d_inode->i_sem);
-                        } else if (inode->i_size <= rnb->offset) {
-                                /* If there's no more data, abort early.
-                                 * lnb->page == NULL and lnb->rc == 0, so it's
-                                 * easy to detect later. */
-                                f_dput(dentry);
-                                lnb->dentry = NULL;
-                                break;
-                        } else {
-                                rc = filter_start_page_read(inode, lnb);
-                        }
-
-                        if (rc) {
-                                CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
-                                       "page err %u@"LPU64" %u/%u %p: rc %d\n",
-                                       lnb->len, lnb->offset, j, o->ioo_bufcnt,
-                                       dentry, rc);
-                                f_dput(dentry);
-                                GOTO(out_pages, rc);
-                        }
-
-                        tot_bytes += lnb->len;
-
-                        if ((cmd & OBD_BRW_READ) && lnb->rc < lnb->len) {
-                                /* Likewise with a partial read */
-                                break;
-                        }
-                }
-        }
-
-        if (time_after(jiffies, now + 15*HZ))
-                CERROR("slow prep get page %lus\n", (jiffies - now) / HZ);
-
-        if (cmd & OBD_BRW_READ) {
-                lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES,
-                                    tot_bytes);
-                while (lnb-- > res) {
-                        rc = filter_finish_page_read(lnb);
-                        if (rc) {
-                                CERROR("error page %u@"LPU64" %u %p: rc %d\n",
-                                       lnb->len, lnb->offset, lnb - res,
-                                       lnb->dentry, rc);
-                                f_dput(lnb->dentry);
-                                GOTO(out_pages, rc);
-                        }
-                }
-        } else
-                lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_WRITE_BYTES,
-                                    tot_bytes);
-
-        if (time_after(jiffies, now + 15*HZ))
-                CERROR("slow prep finish page %lus\n", (jiffies - now) / HZ);
-
-        EXIT;
-out:
-        OBD_FREE(fso, objcount * sizeof(*fso));
-        current->journal_info = NULL;
-        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-        return rc;
-
-out_pages:
-        while (lnb-- > res) {
-                if (cmd & OBD_BRW_WRITE) {
-                        filter_commit_write(lnb, rc);
-                        up(&lnb->dentry->d_inode->i_sem);
-                } else {
-                        lustre_put_page(lnb->page);
-                }
-                f_dput(lnb->dentry);
-        }
-        if (cmd & OBD_BRW_WRITE) {
-                filter_finish_transno(exp, *desc_private, oti, rc);
-                fsfilt_commit(obd,
-                              filter_parent(obd,S_IFREG,obj->ioo_id)->d_inode,
-                              *desc_private, 0);
-        }
-        goto out; /* dropped the dentry refs already (one per page) */
-
-out_objinfo:
-        for (i = 0; i < objcount && fso[i].fso_dentry; i++) {
-                if (cmd & OBD_BRW_WRITE)
-                        up(&fso[i].fso_dentry->d_inode->i_sem);
-                f_dput(fso[i].fso_dentry);
-        }
-        goto out;
-}
-
-static int filter_write_locked_page(struct niobuf_local *lnb)
-{
-        struct page *lpage;
-        void        *lpage_addr;
-        void        *lnb_addr;
-        int rc;
-        ENTRY;
-
-        lpage = lustre_get_page_write(lnb->dentry->d_inode, lnb->page->index);
-        if (IS_ERR(lpage)) {
-                /* It is highly unlikely that we would ever get an error here.
-                 * The page we want to get was previously locked, so it had to
-                 * have already allocated the space, and we were just writing
-                 * over the same data, so there would be no hole in the file.
-                 *
-                 * XXX: possibility of a race with truncate could exist, need
-                 *      to check that.  There are no guarantees w.r.t.
-                 *      write order even on a local filesystem, although the
-                 *      normal response would be to return the number of bytes
-                 *      successfully written and leave the rest to the app.
-                 */
-                rc = PTR_ERR(lpage);
-                CERROR("error getting locked page index %ld: rc = %d\n",
-                       lnb->page->index, rc);
-                LBUG();
-                lustre_commit_write(lnb);
-                RETURN(rc);
-        }
-
-        /* 2 kmaps == vanishingly small deadlock opportunity */
-        lpage_addr = kmap(lpage);
-        lnb_addr = kmap(lnb->page);
-
-        memcpy(lpage_addr, lnb_addr, PAGE_SIZE);
-
-        kunmap(lnb->page);
-        kunmap(lpage);
-
-        lustre_put_page(lnb->page);
-
-        lnb->page = lpage;
-        rc = lustre_commit_write(lnb);
-        if (rc)
-                CERROR("error committing locked page %ld: rc = %d\n",
-                       lnb->page->index, rc);
-
-        RETURN(rc);
-}
-
 static int filter_syncfs(struct obd_export *exp)
 {
-        struct obd_device *obd = exp->exp_obd;
         ENTRY;
 
-        RETURN(fsfilt_sync(obd, obd->u.filter.fo_sb));
-}
-
-static int filter_commitrw(int cmd, struct obd_export *exp,
-                           int objcount, struct obd_ioobj *obj,
-                           int niocount, struct niobuf_local *res,
-                           void *desc_private, struct obd_trans_info *oti)
-{
-        struct obd_run_ctxt saved;
-        struct obd_ioobj *o;
-        struct niobuf_local *lnb;
-        struct obd_device *obd = exp->exp_obd;
-        int found_locked = 0, rc = 0, i;
-        unsigned long now = jiffies;  /* DEBUGGING OST TIMEOUTS */
-        ENTRY;
-
-        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-
-        LASSERT(!current->journal_info);
-        current->journal_info = desc_private;
-
-        for (i = 0, o = obj, lnb = res; i < objcount; i++, o++) {
-                int j;
-
-                if (cmd & OBD_BRW_WRITE) {
-                        inode_update_time(lnb->dentry->d_inode, 1);
-                        up(&lnb->dentry->d_inode->i_sem);
-                }
-                for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) {
-                        if (lnb->page == NULL) {
-                                continue;
-                        }
-
-                        if (lnb->flags & N_LOCAL_TEMP_PAGE) {
-                                found_locked++;
-                                continue;
-                        }
-
-                        if (time_after(jiffies, lnb->start + 15*HZ))
-                                CERROR("slow commitrw %lus\n",
-                                       (jiffies - lnb->start) / HZ);
-
-                        if (cmd & OBD_BRW_WRITE) {
-                                int err = filter_commit_write(lnb, 0);
-
-                                if (!rc)
-                                        rc = err;
-                        } else {
-                                lustre_put_page(lnb->page);
-                        }
-
-                        f_dput(lnb->dentry);
-                        if (time_after(jiffies, lnb->start + 15*HZ))
-                                CERROR("slow commit_write %lus\n",
-                                       (jiffies - lnb->start) / HZ);
-                }
-        }
-
-        for (i = 0, o = obj, lnb = res; found_locked > 0 && i < objcount;
-             i++, o++) {
-                int j;
-                for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) {
-                        int err;
-                        if (!(lnb->flags & N_LOCAL_TEMP_PAGE))
-                                continue;
-
-                        if (time_after(jiffies, lnb->start + 15*HZ))
-                                CERROR("slow commitrw locked %lus\n",
-                                       (jiffies - lnb->start) / HZ);
-
-                        err = filter_write_locked_page(lnb);
-                        if (!rc)
-                                rc = err;
-                        f_dput(lnb->dentry);
-                        found_locked--;
-
-                        if (time_after(jiffies, lnb->start + 15*HZ))
-                                CERROR("slow commit_write locked %lus\n",
-                                       (jiffies - lnb->start) / HZ);
-                }
-        }
-
-        if (cmd & OBD_BRW_WRITE) {
-                /* We just want any dentry for the commit, for now */
-                struct dentry *dparent = filter_parent(obd, S_IFREG, 0);
-                int err;
-
-                rc = filter_finish_transno(exp, desc_private, oti, rc);
-                err = fsfilt_commit(obd, dparent->d_inode, desc_private,
-                                    obd_sync_filter);
-                if (err)
-                        rc = err;
-                if (obd_sync_filter)
-                        LASSERT(oti->oti_transno <= obd->obd_last_committed);
-
-                if (time_after(jiffies, now + 15*HZ))
-                        CERROR("slow commitrw commit %lus\n", (jiffies-now)/HZ);
-        }
-
-        LASSERT(!current->journal_info);
-
-        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-        RETURN(rc);
+        RETURN(fsfilt_sync(exp->exp_obd, exp->exp_obd->u.filter.fo_sb));
 }
 
-static int filter_brw(int cmd, struct lustre_handle *conn,
-                      struct lov_stripe_md *lsm, obd_count oa_bufs,
-                      struct brw_page *pga, struct obd_trans_info *oti)
+static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                         unsigned long max_age)
 {
-        struct obd_export *export = class_conn2export(conn);
-        struct obd_ioobj        ioo;
-        struct niobuf_local     *lnb;
-        struct niobuf_remote    *rnb;
-        obd_count               i;
-        void                    *desc_private;
-        int                     ret = 0;
         ENTRY;
-
-        if (export == NULL)
-                RETURN(-EINVAL);
-
-        OBD_ALLOC(lnb, oa_bufs * sizeof(struct niobuf_local));
-        OBD_ALLOC(rnb, oa_bufs * sizeof(struct niobuf_remote));
-
-        if (lnb == NULL || rnb == NULL)
-                GOTO(out, ret = -ENOMEM);
-
-        for (i = 0; i < oa_bufs; i++) {
-                rnb[i].offset = pga[i].off;
-                rnb[i].len = pga[i].count;
-        }
-
-        ioo.ioo_id = lsm->lsm_object_id;
-        ioo.ioo_gr = 0;
-        ioo.ioo_type = S_IFREG;
-        ioo.ioo_bufcnt = oa_bufs;
-
-        ret = filter_preprw(cmd, export, NULL, 1, &ioo, oa_bufs, rnb, lnb,
-                            &desc_private, oti);
-        if (ret != 0)
-                GOTO(out, ret);
-
-        for (i = 0; i < oa_bufs; i++) {
-                void *virt = kmap(pga[i].pg);
-                obd_off off = pga[i].off & ~PAGE_MASK;
-                void *addr = kmap(lnb[i].page);
-
-                /* 2 kmaps == vanishingly small deadlock opportunity */
-
-                if (cmd & OBD_BRW_WRITE)
-                        memcpy(addr + off, virt + off, pga[i].count);
-                else
-                        memcpy(virt + off, addr + off, pga[i].count);
-
-                kunmap(addr);
-                kunmap(virt);
-        }
-
-        ret = filter_commitrw(cmd, export, 1, &ioo, oa_bufs, lnb, desc_private,
-                              oti);
-
-out:
-        if (lnb)
-                OBD_FREE(lnb, oa_bufs * sizeof(struct niobuf_local));
-        if (rnb)
-                OBD_FREE(rnb, oa_bufs * sizeof(struct niobuf_remote));
-        class_export_put(export);
-        RETURN(ret);
-}
-
-static int filter_san_preprw(int cmd, struct lustre_handle *conn,
-                             int objcount, struct obd_ioobj *obj,
-                             int niocount, struct niobuf_remote *nb)
-{
-        struct obd_device *obd;
-        struct obd_ioobj *o = obj;
-        struct niobuf_remote *rnb = nb;
-        int rc = 0;
-        int i;
-        ENTRY;
-
-        obd = class_conn2obd(conn);
-        if (!obd) {
-                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
-                       conn->cookie);
-                RETURN(-EINVAL);
-        }
-
-        for (i = 0; i < objcount; i++, o++) {
-                struct dentry *dentry;
-                struct inode *inode;
-                int (*fs_bmap)(struct address_space *, long);
-                int j;
-
-                dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id);
-                if (IS_ERR(dentry))
-                        GOTO(out, rc = PTR_ERR(dentry));
-                inode = dentry->d_inode;
-                if (!inode) {
-                        CERROR("trying to BRW to non-existent file "LPU64"\n",
-                               o->ioo_id);
-                        f_dput(dentry);
-                        GOTO(out, rc = -ENOENT);
-                }
-                fs_bmap = inode->i_mapping->a_ops->bmap;
-
-                for (j = 0; j < o->ioo_bufcnt; j++, rnb++) {
-                        long block;
-
-                        block = rnb->offset >> inode->i_blkbits;
-
-                        if (cmd == OBD_BRW_READ) {
-                                block = fs_bmap(inode->i_mapping, block);
-                        } else {
-                                loff_t newsize = rnb->offset + rnb->len;
-                                /* fs_prep_san_write will also update inode
-                                 * size for us:
-                                 * (1) new alloced block
-                                 * (2) existed block but size extented
-                                 */
-                                /* FIXME We could call fs_prep_san_write()
-                                 * only once for all the blocks allocation.
-                                 * Now call it once for each block, for
-                                 * simplicity. And if error happens, we
-                                 * probably need to release previous alloced
-                                 * block */
-                                rc = fs_prep_san_write(obd, inode, &block,
-                                                       1, newsize);
-                                if (rc)
-                                        break;
-                        }
-
-                        rnb->offset = block;
-                }
-                f_dput(dentry);
-        }
-out:
-        RETURN(rc);
-}
-
-static int filter_statfs(struct obd_export *exp, struct obd_statfs *osfs)
-{
-        struct obd_device *obd = exp->exp_obd;
-        ENTRY;
-
         RETURN(fsfilt_statfs(obd, obd->u.filter.fo_sb, osfs));
 }
 
@@ -2676,7 +2067,7 @@ static int filter_get_info(struct lustre_handle *conn, __u32 keylen,
         ENTRY;
 
         obd = class_conn2obd(conn);
-        if (!obd) {
+        if (obd == NULL) {
                 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
                        conn->cookie);
                 RETURN(-EINVAL);
@@ -2702,77 +2093,46 @@ static int filter_get_info(struct lustre_handle *conn, __u32 keylen,
         RETURN(-EINVAL);
 }
 
-int filter_copy_data(struct lustre_handle *dst_conn, struct obdo *dst,
-                  struct lustre_handle *src_conn, struct obdo *src,
-                  obd_size count, obd_off offset, struct obd_trans_info *oti)
+static int filter_set_info(struct lustre_handle *conn, __u32 keylen,
+                           void *key, __u32 vallen, void *val)
 {
-        struct page *page;
-        struct lov_stripe_md srcmd, dstmd;
-        unsigned long index = 0;
-        int err = 0;
-
-        LBUG(); /* THIS CODE IS NOT CORRECT -phil */
-
-        memset(&srcmd, 0, sizeof(srcmd));
-        memset(&dstmd, 0, sizeof(dstmd));
-        srcmd.lsm_object_id = src->o_id;
-        dstmd.lsm_object_id = dst->o_id;
-
+        struct obd_device *obd;
+        struct obd_export *exp;
+        struct obd_import *imp;
         ENTRY;
-        CDEBUG(D_INFO, "src: ino "LPU64" blocks "LPU64", size "LPU64
-               ", dst: ino "LPU64"\n",
-               src->o_id, src->o_blocks, src->o_size, dst->o_id);
-        page = alloc_page(GFP_USER);
-        if (page == NULL)
-                RETURN(-ENOMEM);
-
-        wait_on_page(page);
 
-        /* XXX with brw vector I/O, we could batch up reads and writes here,
-         *     all we need to do is allocate multiple pages to handle the I/Os
-         *     and arrays to handle the request parameters.
-         */
-        while (index < ((src->o_size + PAGE_SIZE - 1) >> PAGE_SHIFT)) {
-                struct brw_page pg;
-
-                pg.pg = page;
-                pg.count = PAGE_SIZE;
-                pg.off = (page->index) << PAGE_SHIFT;
-                pg.flag = 0;
-
-                page->index = index;
-                err = obd_brw(OBD_BRW_READ, src_conn, &srcmd, 1, &pg, NULL);
-                if (err) {
-                        EXIT;
-                        break;
-                }
+        obd = class_conn2obd(conn);
+        if (obd == NULL) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
+                       conn->cookie);
+                RETURN(-EINVAL);
+        }
 
-                pg.flag = OBD_BRW_CREATE;
-                CDEBUG(D_INFO, "Read page %ld ...\n", page->index);
+        if (keylen < strlen("mds_conn") ||
+            memcmp(key, "mds_conn", keylen) != 0)
+                RETURN(-EINVAL);
 
-                err = obd_brw(OBD_BRW_WRITE, dst_conn, &dstmd, 1, &pg, oti);
+        CERROR("Received MDS connection ("LPX64")\n", conn->cookie);
+        memcpy(&obd->u.filter.fo_mdc_conn, conn, sizeof(*conn));
 
-                /* XXX should handle dst->o_size, dst->o_blocks here */
-                if (err) {
-                        EXIT;
-                        break;
-                }
+        imp = obd->u.filter.fo_mdc_imp = class_new_import();
 
-                CDEBUG(D_INFO, "Wrote page %ld ...\n", page->index);
+        exp = class_conn2export(conn);
+        imp->imp_connection = ptlrpc_connection_addref(exp->exp_connection);
+        class_export_put(exp);
 
-                index++;
-        }
-        dst->o_size = src->o_size;
-        dst->o_blocks = src->o_blocks;
-        dst->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
-        unlock_page(page);
-        __free_page(page);
+        imp->imp_client = &obd->u.filter.fo_mdc_client;
+        imp->imp_remote_handle = *conn;
+        imp->imp_obd = obd;
+        imp->imp_dlm_fake = 1; /* XXX rename imp_dlm_fake to something else */
+        imp->imp_level = LUSTRE_CONN_FULL;
+        class_import_put(imp);
 
-        RETURN(err);
+        RETURN(0);
 }
 
 int filter_iocontrol(unsigned int cmd, struct lustre_handle *conn,
-                  int len, void *karg, void *uarg)
+                     int len, void *karg, void *uarg)
 {
         struct obd_device *obd = class_conn2obd(conn);
 
@@ -2788,12 +2148,12 @@ int filter_iocontrol(unsigned int cmd, struct lustre_handle *conn,
         RETURN(0);
 }
 
-
 static struct obd_ops filter_obd_ops = {
         o_owner:          THIS_MODULE,
         o_attach:         filter_attach,
         o_detach:         filter_detach,
         o_get_info:       filter_get_info,
+        o_set_info:       filter_set_info,
         o_setup:          filter_setup,
         o_cleanup:        filter_cleanup,
         o_connect:        filter_connect,
@@ -2810,15 +2170,9 @@ static struct obd_ops filter_obd_ops = {
         o_punch:          filter_truncate,
         o_preprw:         filter_preprw,
         o_commitrw:       filter_commitrw,
+        o_log_cancel:     filter_log_cancel,
         o_destroy_export: filter_destroy_export,
         o_iocontrol:      filter_iocontrol,
-#if 0
-        o_san_preprw:  filter_san_preprw,
-        o_preallocate: filter_preallocate_inodes,
-        o_migrate:     filter_migrate,
-        o_copy:        filter_copy_data,
-        o_iterate:     filter_iterate
-#endif
 };
 
 static struct obd_ops filter_sanobd_ops = {
@@ -2826,6 +2180,7 @@ static struct obd_ops filter_sanobd_ops = {
         o_attach:         filter_attach,
         o_detach:         filter_detach,
         o_get_info:       filter_get_info,
+        o_set_info:       filter_set_info,
         o_setup:          filter_san_setup,
         o_cleanup:        filter_cleanup,
         o_connect:        filter_connect,
@@ -2841,18 +2196,12 @@ static struct obd_ops filter_sanobd_ops = {
         o_punch:          filter_truncate,
         o_preprw:         filter_preprw,
         o_commitrw:       filter_commitrw,
+        o_log_cancel:     filter_log_cancel,
         o_san_preprw:     filter_san_preprw,
         o_destroy_export: filter_destroy_export,
         o_iocontrol:      filter_iocontrol,
-#if 0
-        o_preallocate:  filter_preallocate_inodes,
-        o_migrate:      filter_migrate,
-        o_copy:         filter_copy_data,
-        o_iterate:      filter_iterate
-#endif
 };
 
-
 static int __init obdfilter_init(void)
 {
         struct lprocfs_static_vars lvars;
@@ -2860,7 +2209,7 @@ static int __init obdfilter_init(void)
 
         printk(KERN_INFO "Lustre Filtering OBD driver; info@clusterfs.com\n");
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(filter, &lvars);
 
         rc = class_register_type(&filter_obd_ops, lvars.module_vars,
                                  OBD_FILTER_DEVICENAME);
index 1319dbd..411a9fb 100644 (file)
 #define DEBUG_SUBSYSTEM S_CLASS
 
 #include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
 #include <linux/lprocfs_status.h>
 #include <linux/obd.h>
 
 #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
 #else
 
-static inline int lprocfs_filter_statfs(void *data, struct statfs *sfs)
-{
-        struct obd_device *dev = (struct obd_device *) data;
-        LASSERT(dev != NULL);
-        return vfs_statfs(dev->u.filter.fo_sb, sfs);
-}
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  lprocfs_filter_statfs);
-
-int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
-              void *data)
-{
-        struct obd_device *dev = (struct obd_device *)data;
-        LASSERT(dev != NULL);
-        return snprintf(page, count, "%s\n", dev->u.filter.fo_fstype);
-}
-
-int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off, int count,
-                             int *eof, void *data)
+static int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off,
+                                    int count, int *eof, void *data)
 {
         struct obd_device* obd = (struct obd_device *)data;
 
@@ -67,23 +42,23 @@ int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off, int count,
                         obd->u.filter.fo_vfsmnt->mnt_devname);
 }
 
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",        lprocfs_rd_uuid,    0, 0 },
-        { "blocksize",   rd_blksize,         0, 0 },
-        { "kbytestotal", rd_kbytestotal,     0, 0 },
-        { "kbytesfree",  rd_kbytesfree,      0, 0 },
-        { "filestotal",  rd_filestotal,      0, 0 },
-        { "filesfree",   rd_filesfree,       0, 0 },
-        { "filegroups",  rd_filegroups,      0, 0 },
-        { "fstype",      rd_fstype,          0, 0 },
-        { "mntdev",      lprocfs_filter_rd_mntdev,    0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",         lprocfs_rd_uuid,          0, 0 },
+        { "blocksize",    lprocfs_rd_blksize,       0, 0 },
+        { "kbytestotal",  lprocfs_rd_kbytestotal,   0, 0 },
+        { "kbytesfree",   lprocfs_rd_kbytesfree,    0, 0 },
+        { "filestotal",   lprocfs_rd_filestotal,    0, 0 },
+        { "filesfree",    lprocfs_rd_filesfree,     0, 0 },
+        //{ "filegroups",   lprocfs_rd_filegroups,    0, 0 },
+        { "fstype",       lprocfs_rd_fstype,        0, 0 },
+        { "mntdev",       lprocfs_filter_rd_mntdev, 0, 0 },
         { 0 }
 };
 
-struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs",    lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",     lprocfs_rd_numrefs,       0, 0 },
         { 0 }
 };
 
 #endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(filter,lprocfs_module_vars, lprocfs_obd_vars)
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
+.*.cmd
index d5e4ec1..e9affd0 100644 (file)
 #include <linux/lprocfs_status.h>
 
 #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
 #else
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  obd_self_statfs);
-
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",            lprocfs_rd_uuid, 0, 0 },
-        { "blocksize",       rd_blksize, 0, 0 },
-        { "kbytestotal",     rd_kbytestotal, 0, 0 },
-        { "kbytesfree",      rd_kbytesfree, 0, 0 },
-        { "filestotal",      rd_filestotal, 0, 0 },
-        { "filesfree",       rd_filesfree, 0, 0   },
-        { "filegroups",      rd_filegroups, 0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",            lprocfs_rd_uuid,        0, 0 },
+        { "blocksize",       lprocfs_rd_blksize,     0, 0 },
+        { "kbytestotal",     lprocfs_rd_kbytestotal, 0, 0 },
+        { "kbytesfree",      lprocfs_rd_kbytesfree,  0, 0 },
+        { "filestotal",      lprocfs_rd_filestotal,  0, 0 },
+        { "filesfree",       lprocfs_rd_filesfree,   0, 0 },
+        //{ "filegroups",      lprocfs_rd_filegroups,  0, 0 },
         { "ost_server_uuid", lprocfs_rd_server_uuid, 0, 0 },
-        { "ost_conn_uuid",   lprocfs_rd_conn_uuid, 0, 0 },
+        { "ost_conn_uuid",   lprocfs_rd_conn_uuid,   0, 0 },
         { 0 }
 };
 
-struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs",        lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",        lprocfs_rd_numrefs,     0, 0 },
         { 0 }
 };
 
 #endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(osc,lprocfs_module_vars, lprocfs_obd_vars)
index aa04a1a..c8cd6ad 100644 (file)
@@ -34,8 +34,7 @@ static kdev_t path2dev(char *path)
 {
         struct dentry *dentry;
         struct nameidata nd;
-        kdev_t dev;
-        KDEVT_VAL(dev, 0);
+        kdev_t dev = KDEVT_INIT(0);
 
         if (!path_init(path, LOOKUP_FOLLOW, &nd))
                 return 0;
index 4bda8de..89061fd 100644 (file)
 #define DEBUG_SUBSYSTEM S_OSC
 
 #ifdef __KERNEL__
-#include <linux/version.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/lustre_dlm.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <linux/workqueue.h>
-#include <linux/smp_lock.h>
-#else
-#include <linux/locks.h>
-#endif
-#else
-#include <liblustre.h>
+# include <linux/version.h>
+# include <linux/module.h>
+# include <linux/mm.h>
+# include <linux/highmem.h>
+# include <linux/lustre_dlm.h>
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#  include <linux/workqueue.h>
+#  include <linux/smp_lock.h>
+# else
+#  include <linux/locks.h>
+# endif
+#else /* __KERNEL__ */
+# include <liblustre.h>
 #endif
 
 #include <linux/kp30.h>
 #include <linux/lustre_mds.h> /* for mds_objid */
 #include <linux/lustre_otree.h>
 #include <linux/obd_ost.h>
+#include <linux/lustre_commit_confd.h>
 #include <linux/obd_lov.h>
 
 #ifndef  __CYGWIN__
-#include <linux/ctype.h>
-#include <linux/init.h>
+# include <linux/ctype.h>
+# include <linux/init.h>
 #else
-#include <ctype.h>
+# include <ctype.h>
 #endif
 
 #include <linux/lustre_ha.h>
 #include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
 #include <linux/lprocfs_status.h>
 
+static struct llog_cookie zero_cookie = { { 0 } };
+
 static int osc_attach(struct obd_device *dev, obd_count len, void *data)
 {
         struct lprocfs_static_vars lvars;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(osc,&lvars);
         return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
@@ -119,29 +122,29 @@ static int osc_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
                 if (lmm_bytes < sizeof (*lmm)) {
                         CERROR("lov_mds_md too small: %d, need %d\n",
                                lmm_bytes, (int)sizeof(*lmm));
-                        RETURN (-EINVAL);
+                        RETURN(-EINVAL);
                 }
                 /* XXX LOV_MAGIC etc check? */
 
-                if (lmm->lmm_object_id == cpu_to_le64 (0)) {
-                        CERROR ("lov_mds_md: zero lmm_object_id\n");
-                        RETURN (-EINVAL);
+                if (lmm->lmm_object_id == cpu_to_le64(0)) {
+                        CERROR("lov_mds_md: zero lmm_object_id\n");
+                        RETURN(-EINVAL);
                 }
         }
 
         lsm_size = lov_stripe_md_size(1);
-        if (!lsmp)
+        if (lsmp == NULL)
                 RETURN(lsm_size);
 
-        if (*lsmp && !lmm) {
+        if (*lsmp != NULL && lmm == NULL) {
                 OBD_FREE(*lsmp, lsm_size);
                 *lsmp = NULL;
                 RETURN(0);
         }
 
-        if (!*lsmp) {
+        if (*lsmp == NULL) {
                 OBD_ALLOC(*lsmp, lsm_size);
-                if (!*lsmp)
+                if (*lsmp == NULL)
                         RETURN(-ENOMEM);
 
                 (*lsmp)->lsm_oinfo[0].loi_dirty_ot =
@@ -149,7 +152,7 @@ static int osc_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
                 ot_init((*lsmp)->lsm_oinfo[0].loi_dirty_ot);
         }
 
-        if (lmm) {
+        if (lmm != NULL) {
                 /* XXX zero *lsmp? */
                 (*lsmp)->lsm_object_id = le64_to_cpu (lmm->lmm_object_id);
                 LASSERT((*lsmp)->lsm_object_id);
@@ -167,29 +170,27 @@ static int osc_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
 static int osc_getattr_interpret(struct ptlrpc_request *req,
                                  struct osc_getattr_async_args *aa, int rc)
 {
-        struct obdo     *oa = aa->aa_oa;
         struct ost_body *body;
         ENTRY;
 
-        if (rc != 0) {
-                CERROR("failed: rc = %d\n", rc);
-                RETURN (rc);
-        }
-
-        body = lustre_swab_repbuf(req, 0, sizeof (*body), lustre_swab_ost_body);
-        if (body == NULL) {
-                CERROR ("can't unpack ost_body\n");
-                RETURN (-EPROTO);
-        }
+        if (rc != 0)
+                RETURN(rc);
 
-        CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
-        memcpy(oa, &body->oa, sizeof(*oa));
+        body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
+        if (body) {
+                CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
+                memcpy(aa->aa_oa, &body->oa, sizeof(*aa->aa_oa));
 
-        /* This should really be sent by the OST */
-        oa->o_blksize = OSC_BRW_MAX_SIZE;
-        oa->o_valid |= OBD_MD_FLBLKSZ;
+                /* This should really be sent by the OST */
+                aa->aa_oa->o_blksize = OSC_BRW_MAX_SIZE;
+                aa->aa_oa->o_valid |= OBD_MD_FLBLKSZ;
+        } else {
+                CERROR("can't unpack ost_body\n");
+                rc = -EPROTO;
+                aa->aa_oa->o_valid = 0;
+        }
 
-        RETURN (0);
+        RETURN(rc);
 }
 
 static int osc_getattr_async(struct lustre_handle *conn, struct obdo *oa,
@@ -505,7 +506,7 @@ static int osc_create(struct lustre_handle *conn, struct obdo *oa,
                 GOTO(out, rc = -ENOMEM);
 
         body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
-        memcpy(&body->oa, oa, sizeof(*oa));
+        memcpy(&body->oa, oa, sizeof(body->oa));
 
         request->rq_replen = lustre_msg_size(1, &size);
 
@@ -513,8 +514,8 @@ static int osc_create(struct lustre_handle *conn, struct obdo *oa,
         if (rc)
                 GOTO(out_req, rc);
 
-        body = lustre_swab_repbuf (request, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_repbuf(request, 0, sizeof(*body),
+                                  lustre_swab_ost_body);
         if (body == NULL) {
                 CERROR ("can't unpack ost_body\n");
                 GOTO (out_req, rc = -EPROTO);
@@ -531,13 +532,19 @@ static int osc_create(struct lustre_handle *conn, struct obdo *oa,
          * This needs to be fixed in a big way.
          */
         lsm->lsm_object_id = oa->o_id;
-        lsm->lsm_stripe_count = 0;
-        lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
         *ea = lsm;
 
-        if (oti != NULL)
+        if (oti != NULL) {
                 oti->oti_transno = request->rq_repmsg->transno;
 
+                if (oa->o_valid & OBD_MD_FLCOOKIE) {
+                        if (!oti->oti_logcookies)
+                                oti_alloc_cookies(oti, 1);
+                        memcpy(oti->oti_logcookies, obdo_logcookie(oa),
+                               sizeof(oti->oti_onecookie));
+                }
+        }
+
         CDEBUG(D_HA, "transno: "LPD64"\n", request->rq_repmsg->transno);
         EXIT;
 out_req:
@@ -616,14 +623,20 @@ static int osc_destroy(struct lustre_handle *conn, struct obdo *oa,
         body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
         memcpy(&body->oa, oa, sizeof(*oa));
 
+        if (oti && oa->o_valid & OBD_MD_FLCOOKIE) {
+                memcpy(obdo_logcookie(oa), oti->oti_logcookies,
+                       sizeof(*oti->oti_logcookies));
+                oti->oti_logcookies++;
+        }
+
         request->rq_replen = lustre_msg_size(1, &size);
 
         rc = ptlrpc_queue_wait(request);
         if (rc)
                 GOTO(out, rc);
 
-        body = lustre_swab_repbuf (request, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_repbuf(request, 0, sizeof(*body),
+                                  lustre_swab_ost_body);
         if (body == NULL) {
                 CERROR ("Can't unpack body\n");
                 GOTO (out, rc = -EPROTO);
@@ -663,7 +676,7 @@ static void osc_update_grant(struct client_obd *cli, struct ost_body *body)
                 return;
         }
 
-        CDEBUG(D_INODE, "got "LPU64" grant\n", body->oa.o_rdev);
+        CDEBUG(D_ERROR, "got "LPU64" grant\n", body->oa.o_rdev);
         down(&cli->cl_dirty_sem);
         cli->cl_dirty_granted = body->oa.o_rdev;
         /* XXX check for over-run and wake up the io thread that
@@ -708,9 +721,8 @@ static void handle_short_read(int nob_read, obd_count page_count,
         }
 }
 
-static int check_write_rcs (struct ptlrpc_request *request,
-                            int niocount, obd_count page_count,
-                            struct brw_page *pga)
+static int check_write_rcs(struct ptlrpc_request *request, int niocount,
+                           obd_count page_count, struct brw_page *pga)
 {
         int    i;
         __u32 *remote_rcs;
@@ -778,11 +790,10 @@ static obd_count cksum_pages(int nob, obd_count page_count,
 }
 #endif
 
-static int osc_brw_prep_request(struct obd_import *imp,
+static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
                                 struct lov_stripe_md *lsm, obd_count page_count,
-                                struct brw_page *pga, int cmd,
-                                int *requested_nobp, int *niocountp,
-                                struct ptlrpc_request **reqp)
+                                struct brw_page *pga, int *requested_nobp,
+                                int *niocountp, struct ptlrpc_request **reqp)
 {
         struct ptlrpc_request   *req;
         struct ptlrpc_bulk_desc *desc;
@@ -804,11 +815,11 @@ static int osc_brw_prep_request(struct obd_import *imp,
                 if (!can_merge_pages (&pga[i - 1], &pga[i]))
                         niocount++;
 
-        size[0] = sizeof (*body);
-        size[1] = sizeof (*ioobj);
-        size[2] = niocount * sizeof (*niobuf);
+        size[0] = sizeof(*body);
+        size[1] = sizeof(*ioobj);
+        size[2] = niocount * sizeof(*niobuf);
 
-        req = ptlrpc_prep_req (imp, opc, 3, size, NULL);
+        req = ptlrpc_prep_req(imp, opc, 3, size, NULL);
         if (req == NULL)
                 return (-ENOMEM);
 
@@ -819,16 +830,18 @@ static int osc_brw_prep_request(struct obd_import *imp,
                 desc = ptlrpc_prep_bulk_imp(req, BULK_PUT_SINK,
                                             OST_BULK_PORTAL);
         if (desc == NULL)
-                GOTO (out, rc = -ENOMEM);
+                GOTO(out, rc = -ENOMEM);
         /* NB request now owns desc and will free it when it gets freed */
 
         body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body));
         ioobj = lustre_msg_buf(req->rq_reqmsg, 1, sizeof(*ioobj));
         niobuf = lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf));
 
-        ioobj->ioo_id = lsm->lsm_object_id;
-        ioobj->ioo_gr = 0;
-        ioobj->ioo_type = S_IFREG;
+        memcpy(&body->oa, oa, sizeof(*oa));
+
+        ioobj->ioo_id = oa->o_id;
+        ioobj->ioo_gr = oa->o_valid & 0 ? oa->o_gr : 0;
+        ioobj->ioo_type = oa->o_mode;
         ioobj->ioo_bufcnt = niocount;
 
         LASSERT (page_count > 0);
@@ -836,19 +849,18 @@ static int osc_brw_prep_request(struct obd_import *imp,
                 struct brw_page *pg = &pga[i];
                 struct brw_page *pg_prev = pg - 1;
 
-                LASSERT (pg->count > 0);
-                LASSERT ((pg->off & (PAGE_SIZE - 1)) + pg->count <= PAGE_SIZE);
-                LASSERT (i == 0 || pg->off > pg_prev->off);
+                LASSERT(pg->count > 0);
+                LASSERT((pg->off & ~PAGE_MASK) + pg->count <= PAGE_SIZE);
+                LASSERT(i == 0 || pg->off > pg_prev->off);
 
-                rc = ptlrpc_prep_bulk_page (desc, pg->pg,
-                                            pg->off & (PAGE_SIZE - 1),
-                                            pg->count);
+                rc = ptlrpc_prep_bulk_page(desc, pg->pg, pg->off & ~PAGE_MASK,
+                                           pg->count);
                 if (rc != 0)
-                        GOTO (out, rc);
+                        GOTO(out, rc);
 
                 requested_nob += pg->count;
 
-                if (i > 0 && can_merge_pages (pg_prev, pg)) {
+                if (i > 0 && can_merge_pages(pg_prev, pg)) {
                         niobuf--;
                         niobuf->len += pg->count;
                 } else {
@@ -858,17 +870,17 @@ static int osc_brw_prep_request(struct obd_import *imp,
                 }
         }
 
-        LASSERT ((void *)(niobuf - niocount) ==
-                 lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf)));
+        LASSERT((void *)(niobuf - niocount) ==
+                lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf)));
 #if CHECKSUM_BULK
         body->oa.o_valid |= OBD_MD_FLCKSUM;
         if (opc == OST_BRW_WRITE)
-                body->oa.o_nlink = cksum_pages (requested_nob, page_count, pga);
+                body->oa.o_nlink = cksum_pages(requested_nob, page_count, pga);
 #endif
         osc_announce_cached(cli, body);
-        spin_lock_irqsave (&req->rq_lock, flags);
+        spin_lock_irqsave(&req->rq_lock, flags);
         req->rq_no_resend = 1;
-        spin_unlock_irqrestore (&req->rq_lock, flags);
+        spin_unlock_irqrestore(&req->rq_lock, flags);
 
         /* size[0] still sizeof (*body) */
         if (opc == OST_WRITE) {
@@ -890,21 +902,23 @@ static int osc_brw_prep_request(struct obd_import *imp,
         return (rc);
 }
 
-static int osc_brw_fini_request (struct ptlrpc_request *req,
-                                 int requested_nob, int niocount,
-                                 obd_count page_count, struct brw_page *pga,
-                                 int rc)
+static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa,
+                                int requested_nob, int niocount,
+                                obd_count page_count, struct brw_page *pga,
+                                int rc)
 {
         struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
         struct ost_body *body;
+
         if (rc < 0)
                 return (rc);
 
-        body = lustre_swab_repbuf(req, 0, sizeof (*body), lustre_swab_ost_body);
+        body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL) {
                 CERROR ("Can't unpack body\n");
-                RETURN(-EPROTO);
+                return (-EPROTO);
         }
+
         osc_update_grant(cli, body);
 
         if (req->rq_reqmsg->opc == OST_WRITE) {
@@ -913,22 +927,23 @@ static int osc_brw_fini_request (struct ptlrpc_request *req,
                         return (-EPROTO);
                 }
 
-                return (check_write_rcs(req, niocount, page_count, pga));
+                return(check_write_rcs(req, niocount, page_count, pga));
         }
 
         if (rc > requested_nob) {
-                CERROR ("Unexpected rc %d (%d requested)\n",
-                        rc, requested_nob);
+                CERROR("Unexpected rc %d (%d requested)\n", rc, requested_nob);
                 return (-EPROTO);
         }
 
         if (rc < requested_nob)
                 handle_short_read(rc, page_count, pga);
 
+        memcpy(oa, &body->oa, sizeof(*oa));
+
 #if CHECKSUM_BULK
-        if (body->oa.o_valid & OBD_MD_FLCKSUM) {
+        if (oa->o_valid & OBD_MD_FLCKSUM) {
                 static int cksum_counter;
-                obd_count server_cksum = body->oa.o_nlink;
+                obd_count server_cksum = oa->o_nlink;
                 obd_count cksum = cksum_pages(rc, page_count, pga);
 
                 cksum_counter++;
@@ -937,6 +952,7 @@ static int osc_brw_fini_request (struct ptlrpc_request *req,
                                ", server NID "LPX64"\n", server_cksum, cksum,
                                imp->imp_connection->c_peer.peer_nid);
                         cksum_counter = 0;
+                        oa->o_rdev = cksum;
                 } else if ((cksum_counter & (-cksum_counter)) == cksum_counter)
                         CERROR("Checksum %u from "LPX64" OK: %x\n",
                                cksum_counter,
@@ -953,9 +969,9 @@ static int osc_brw_fini_request (struct ptlrpc_request *req,
         return (0);
 }
 
-static int osc_brw_internal(struct lustre_handle *conn,
+static int osc_brw_internal(int cmd, struct lustre_handle *conn,struct obdo *oa,
                             struct lov_stripe_md *lsm,
-                            obd_count page_count, struct brw_page *pga, int cmd)
+                            obd_count page_count, struct brw_page *pga)
 {
         int                    requested_nob;
         int                    niocount;
@@ -964,8 +980,9 @@ static int osc_brw_internal(struct lustre_handle *conn,
         ENTRY;
 
 restart_bulk:
-        rc = osc_brw_prep_request(class_conn2cliimp(conn), lsm, page_count, pga,
-                                  cmd, &requested_nob, &niocount, &request);
+        rc = osc_brw_prep_request(cmd, class_conn2cliimp(conn), oa, lsm,
+                                  page_count, pga, &requested_nob, &niocount,
+                                  &request);
         /* NB ^ sets rq_no_resend */
 
         if (rc != 0)
@@ -979,8 +996,8 @@ restart_bulk:
                 goto restart_bulk;
         }
 
-        rc = osc_brw_fini_request (request, requested_nob, niocount,
-                                   page_count, pga, rc);
+        rc = osc_brw_fini_request(request, oa, requested_nob, niocount,
+                                  page_count, pga, rc);
 
         ptlrpc_req_finished(request);
         RETURN (rc);
@@ -989,6 +1006,7 @@ restart_bulk:
 static int brw_interpret(struct ptlrpc_request *request,
                          struct osc_brw_async_args *aa, int rc)
 {
+        struct obdo *oa      = aa->aa_oa;
         int requested_nob    = aa->aa_requested_nob;
         int niocount         = aa->aa_nio_count;
         obd_count page_count = aa->aa_page_count;
@@ -1002,14 +1020,14 @@ static int brw_interpret(struct ptlrpc_request *request,
                 //goto restart_bulk;
         }
 
-        rc = osc_brw_fini_request (request, requested_nob, niocount,
-                                   page_count, pga, rc);
+        rc = osc_brw_fini_request(request, oa, requested_nob, niocount,
+                                  page_count, pga, rc);
         RETURN (rc);
 }
 
-static int async_internal(struct lustre_handle *conn, struct lov_stripe_md *lsm,
-                          obd_count page_count, struct brw_page *pga,
-                          struct ptlrpc_request_set *set, int cmd)
+static int async_internal(int cmd, struct lustre_handle *conn, struct obdo *oa,
+                          struct lov_stripe_md *lsm, obd_count page_count,
+                          struct brw_page *pga, struct ptlrpc_request_set *set)
 {
         struct ptlrpc_request     *request;
         int                        requested_nob;
@@ -1018,14 +1036,15 @@ static int async_internal(struct lustre_handle *conn, struct lov_stripe_md *lsm,
         int                        rc;
         ENTRY;
 
-        rc = osc_brw_prep_request (class_conn2cliimp(conn),
-                                   lsm, page_count, pga, cmd,
-                                   &requested_nob, &nio_count, &request);
+        rc = osc_brw_prep_request(cmd, class_conn2cliimp(conn), oa, lsm,
+                                  page_count, pga, &requested_nob, &nio_count,
+                                  &request);
         /* NB ^ sets rq_no_resend */
 
         if (rc == 0) {
-                LASSERT (sizeof (*aa) <= sizeof (request->rq_async_args));
+                LASSERT(sizeof(*aa) <= sizeof(request->rq_async_args));
                 aa = (struct osc_brw_async_args *)&request->rq_async_args;
+                aa->aa_oa = oa;
                 aa->aa_requested_nob = requested_nob;
                 aa->aa_nio_count = nio_count;
                 aa->aa_page_count = page_count;
@@ -1096,7 +1115,7 @@ static obd_count check_elan_limit(struct brw_page *pg, obd_count pages)
         return i;
 }
 
-static int osc_brw(int cmd, struct lustre_handle *conn,
+static int osc_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
                    struct lov_stripe_md *md, obd_count page_count,
                    struct brw_page *pga, struct obd_trans_info *oti)
 {
@@ -1124,7 +1143,7 @@ static int osc_brw(int cmd, struct lustre_handle *conn,
                 sort_brw_pages(pga, pages_per_brw);
                 pages_per_brw = check_elan_limit(pga, pages_per_brw);
 
-                rc = osc_brw_internal(conn, md, pages_per_brw, pga, cmd);
+                rc = osc_brw_internal(cmd, conn, oa, md, pages_per_brw, pga);
 
                 if (rc != 0)
                         RETURN(rc);
@@ -1135,7 +1154,7 @@ static int osc_brw(int cmd, struct lustre_handle *conn,
         RETURN(0);
 }
 
-static int osc_brw_async(int cmd, struct lustre_handle *conn,
+static int osc_brw_async(int cmd, struct lustre_handle *conn, struct obdo *oa,
                          struct lov_stripe_md *md, obd_count page_count,
                          struct brw_page *pga, struct ptlrpc_request_set *set,
                          struct obd_trans_info *oti)
@@ -1164,7 +1183,7 @@ static int osc_brw_async(int cmd, struct lustre_handle *conn,
                 sort_brw_pages(pga, pages_per_brw);
                 pages_per_brw = check_elan_limit(pga, pages_per_brw);
 
-                rc = async_internal(conn, md, pages_per_brw, pga, set, cmd);
+                rc = async_internal(cmd, conn, oa, md, pages_per_brw, pga, set);
 
                 if (rc != 0)
                         RETURN(rc);
@@ -1178,9 +1197,8 @@ static int osc_brw_async(int cmd, struct lustre_handle *conn,
 #ifdef __KERNEL__
 /* Note: caller will lock/unlock, and set uptodate on the pages */
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-static int sanosc_brw_read(struct lustre_handle *conn,
-                           struct lov_stripe_md *lsm,
-                           obd_count page_count,
+static int sanosc_brw_read(struct lustre_handle *conn, struct obdo *oa,
+                           struct lov_stripe_md *lsm, obd_count page_count,
                            struct brw_page *pga)
 {
         struct ptlrpc_request *request = NULL;
@@ -1201,14 +1219,16 @@ static int sanosc_brw_read(struct lustre_handle *conn,
         if (!request)
                 RETURN(-ENOMEM);
 
-        body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
-        iooptr = lustre_msg_buf(request->rq_reqmsg, 1, sizeof (*iooptr));
+        body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof(*body));
+        iooptr = lustre_msg_buf(request->rq_reqmsg, 1, sizeof(*iooptr));
         nioptr = lustre_msg_buf(request->rq_reqmsg, 2,
-                                sizeof (*nioptr) * page_count);
+                                sizeof(*nioptr) * page_count);
+
+        memcpy(&body->oa, oa, sizeof(body->oa));
 
-        iooptr->ioo_id = lsm->lsm_object_id;
-        iooptr->ioo_gr = 0;
-        iooptr->ioo_type = S_IFREG;
+        iooptr->ioo_id = oa->o_id;
+        iooptr->ioo_gr = oa->o_valid & 0 ? oa->o_gr : 0;
+        iooptr->ioo_type = oa->o_mode;
         iooptr->ioo_bufcnt = page_count;
 
         for (mapped = 0; mapped < page_count; mapped++, nioptr++) {
@@ -1227,8 +1247,17 @@ static int sanosc_brw_read(struct lustre_handle *conn,
         if (rc)
                 GOTO(out_req, rc);
 
-        swab = lustre_msg_swabbed (request->rq_repmsg);
-        LASSERT_REPSWAB (request, 1);
+        body = lustre_swab_repbuf(request, 0, sizeof(*body),
+                                  lustre_swab_ost_body);
+        if (body == NULL) {
+                CERROR("Can't unpack body\n");
+                GOTO(out_req, rc = -EPROTO);
+        }
+
+        memcpy(oa, &body->oa, sizeof(*oa));
+
+        swab = lustre_msg_swabbed(request->rq_repmsg);
+        LASSERT_REPSWAB(request, 1);
         nioptr = lustre_msg_buf(request->rq_repmsg, 1, size[1]);
         if (!nioptr) {
                 /* nioptr missing or short */
@@ -1300,9 +1329,8 @@ out_req:
         RETURN(rc);
 }
 
-static int sanosc_brw_write(struct lustre_handle *conn,
-                            struct lov_stripe_md *lsm,
-                            obd_count page_count,
+static int sanosc_brw_write(struct lustre_handle *conn, struct obdo *oa,
+                            struct lov_stripe_md *lsm, obd_count page_count,
                             struct brw_page *pga)
 {
         struct ptlrpc_request *request = NULL;
@@ -1326,9 +1354,11 @@ static int sanosc_brw_write(struct lustre_handle *conn,
         nioptr = lustre_msg_buf(request->rq_reqmsg, 2,
                                 sizeof (*nioptr) * page_count);
 
-        iooptr->ioo_id = lsm->lsm_object_id;
-        iooptr->ioo_gr = 0;
-        iooptr->ioo_type = S_IFREG;
+        memcpy(&body->oa, oa, sizeof(body->oa));
+
+        iooptr->ioo_id = oa->o_id;
+        iooptr->ioo_gr = oa->o_valid & 0 ? oa->o_gr : 0;
+        iooptr->ioo_type = oa->o_mode;
         iooptr->ioo_bufcnt = page_count;
 
         /* pack request */
@@ -1414,7 +1444,7 @@ out_req:
         RETURN(rc);
 }
 
-static int sanosc_brw(int cmd, struct lustre_handle *conn,
+static int sanosc_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
                       struct lov_stripe_md *lsm, obd_count page_count,
                       struct brw_page *pga, struct obd_trans_info *oti)
 {
@@ -1430,9 +1460,9 @@ static int sanosc_brw(int cmd, struct lustre_handle *conn,
                         pages_per_brw = page_count;
 
                 if (cmd & OBD_BRW_WRITE)
-                        rc = sanosc_brw_write(conn, lsm, pages_per_brw, pga);
+                        rc = sanosc_brw_write(conn, oa, lsm, pages_per_brw,pga);
                 else
-                        rc = sanosc_brw_read(conn, lsm, pages_per_brw, pga);
+                        rc = sanosc_brw_read(conn, oa, lsm, pages_per_brw, pga);
 
                 if (rc != 0)
                         RETURN(rc);
@@ -1445,7 +1475,7 @@ static int sanosc_brw(int cmd, struct lustre_handle *conn,
 #endif
 #endif
 
-static int osc_mark_page_dirty(struct lustre_handle *conn, 
+static int osc_mark_page_dirty(struct lustre_handle *conn,
                                struct lov_stripe_md *lsm, unsigned long offset)
 {
         struct client_obd *cli = &class_conn2obd(conn)->u.cli;
@@ -1455,12 +1485,14 @@ static int osc_mark_page_dirty(struct lustre_handle *conn,
 
         down(&cli->cl_dirty_sem);
 
-        if (cli->cl_ost_can_grant && 
+#if 0
+        if (cli->cl_ost_can_grant &&
             (cli->cl_dirty + PAGE_CACHE_SIZE >= cli->cl_dirty_granted)) {
                 CDEBUG(D_INODE, "granted "LPU64" < "LPU64"\n",
                        cli->cl_dirty_granted, cli->cl_dirty + PAGE_CACHE_SIZE);
                 GOTO(out, rc = -EDQUOT);
         }
+#endif
 
         rc = ot_mark_offset(dirty_ot, offset);
         if (rc)
@@ -1474,7 +1506,7 @@ out:
         RETURN(rc);
 }
 
-static int osc_clear_dirty_pages(struct lustre_handle *conn, 
+static int osc_clear_dirty_pages(struct lustre_handle *conn,
                                  struct lov_stripe_md *lsm,
                                  unsigned long start, unsigned long end,
                                  unsigned long *cleared)
@@ -1526,7 +1558,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
                        struct lustre_handle *lockh)
 {
         struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
-        struct obd_device *obddev = class_conn2obd(connh);
+        struct obd_device *obd = class_conn2obd(connh);
         struct ldlm_extent *extent = extentp;
         int rc;
         ENTRY;
@@ -1537,7 +1569,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
         extent->end |= ~PAGE_MASK;
 
         /* Next, search for already existing extent locks that will cover us */
-        rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_MATCH_DATA, &res_id,
+        rc = ldlm_lock_match(obd->obd_namespace, LDLM_FL_MATCH_DATA, &res_id,
                              type, extent, sizeof(extent), mode, data, lockh);
         if (rc == 1)
                 /* We already have a lock, and it's referenced */
@@ -1556,7 +1588,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
          * locks out from other users right now, too. */
 
         if (mode == LCK_PR) {
-                rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_MATCH_DATA,
+                rc = ldlm_lock_match(obd->obd_namespace, LDLM_FL_MATCH_DATA,
                                      &res_id, type, extent, sizeof(extent),
                                      LCK_PW, data, lockh);
                 if (rc == 1) {
@@ -1570,7 +1602,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
                 }
         }
 
-        rc = ldlm_cli_enqueue(connh, NULL, obddev->obd_namespace, parent_lock,
+        rc = ldlm_cli_enqueue(connh, NULL, obd->obd_namespace, parent_lock,
                               res_id, type, extent, sizeof(extent), mode, flags,
                               ldlm_completion_ast, callback, data, lockh);
         RETURN(rc);
@@ -1581,7 +1613,7 @@ static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm,
                        int *flags, void *data, struct lustre_handle *lockh)
 {
         struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
-        struct obd_device *obddev = class_conn2obd(connh);
+        struct obd_device *obd = class_conn2obd(connh);
         struct ldlm_extent *extent = extentp;
         int rc;
         ENTRY;
@@ -1592,7 +1624,7 @@ static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm,
         extent->end |= ~PAGE_MASK;
 
         /* Next, search for already existing extent locks that will cover us */
-        rc = ldlm_lock_match(obddev->obd_namespace, *flags, &res_id, type,
+        rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id, type,
                              extent, sizeof(extent), mode, data, lockh);
         if (rc)
                 RETURN(rc);
@@ -1601,7 +1633,7 @@ static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm,
          * VFS and page cache already protect us locally, so lots of readers/
          * writers can share a single PW lock. */
         if (mode == LCK_PR) {
-                rc = ldlm_lock_match(obddev->obd_namespace, *flags, &res_id,
+                rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id,
                                      type, extent, sizeof(extent), LCK_PW,
                                      data, lockh);
                 if (rc == 1) {
@@ -1628,22 +1660,28 @@ static int osc_cancel(struct lustre_handle *oconn, struct lov_stripe_md *md,
 static int osc_cancel_unused(struct lustre_handle *connh,
                              struct lov_stripe_md *lsm, int flags, void *opaque)
 {
-        struct obd_device *obddev = class_conn2obd(connh);
+        struct obd_device *obd = class_conn2obd(connh);
         struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
 
-        return ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags,
+        return ldlm_cli_cancel_unused(obd->obd_namespace, &res_id, flags,
                                       opaque);
 }
 
-static int osc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+static int osc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                      unsigned long max_age)
 {
         struct obd_statfs *msfs;
         struct ptlrpc_request *request;
         int rc, size = sizeof(*osfs);
         ENTRY;
 
-        request = ptlrpc_prep_req(exp->exp_obd->u.cli.cl_import, OST_STATFS, 0, 
-                                  NULL, NULL);
+        /* We could possibly pass max_age in the request (as an absolute
+         * timestamp or a "seconds.usec ago") so the target can avoid doing
+         * extra calls into the filesystem if that isn't necessary (e.g.
+         * during mount that would help a bit).  Having relative timestamps
+         * is not so great if request processing is slow, while absolute
+         * timestamps are not ideal because they need time synchronization. */
+        request = ptlrpc_prep_req(obd->u.cli.cl_import, OST_STATFS,0,NULL,NULL);
         if (!request)
                 RETURN(-ENOMEM);
 
@@ -1655,14 +1693,14 @@ static int osc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
                 GOTO(out, rc);
         }
 
-        msfs = lustre_swab_repbuf (request, 0, sizeof (*msfs),
-                                   lustre_swab_obd_statfs);
+        msfs = lustre_swab_repbuf(request, 0, sizeof(*msfs),
+                                  lustre_swab_obd_statfs);
         if (msfs == NULL) {
-                CERROR ("Can't unpack obd_statfs\n");
-                GOTO (out, rc = -EPROTO);
+                CERROR("Can't unpack obd_statfs\n");
+                GOTO(out, rc = -EPROTO);
         }
 
-        memcpy (osfs, msfs, sizeof (*msfs));
+        memcpy(osfs, msfs, sizeof(*osfs));
 
         EXIT;
  out:
@@ -1717,16 +1755,16 @@ static int osc_getstripe(struct lustre_handle *conn, struct lov_stripe_md *lsm,
 static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                          void *karg, void *uarg)
 {
-        struct obd_device *obddev = class_conn2obd(conn);
+        struct obd_device *obd = class_conn2obd(conn);
         struct obd_ioctl_data *data = karg;
         int err = 0;
         ENTRY;
 
         switch (cmd) {
         case IOC_OSC_REGISTER_LOV: {
-                if (obddev->u.cli.cl_containing_lov)
+                if (obd->u.cli.cl_containing_lov)
                         GOTO(out, err = -EALREADY);
-                obddev->u.cli.cl_containing_lov = (struct obd_device *)karg;
+                obd->u.cli.cl_containing_lov = (struct obd_device *)karg;
                 GOTO(out, err);
         }
         case OBD_IOC_LOV_GET_CONFIG: {
@@ -1758,9 +1796,9 @@ static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                 desc->ld_default_stripe_size = 0;
                 desc->ld_default_stripe_offset = 0;
                 desc->ld_pattern = 0;
-                memcpy(&desc->ld_uuid, &obddev->obd_uuid, sizeof(uuid));
+                memcpy(&desc->ld_uuid, &obd->obd_uuid, sizeof(uuid));
 
-                memcpy(data->ioc_inlbuf2, &obddev->obd_uuid, sizeof(uuid));
+                memcpy(data->ioc_inlbuf2, &obd->obd_uuid, sizeof(uuid));
 
                 err = copy_to_user((void *)uarg, buf, len);
                 if (err)
@@ -1777,15 +1815,15 @@ static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                 err = osc_getstripe(conn, karg, uarg);
                 GOTO(out, err);
         case OBD_IOC_CLIENT_RECOVER:
-                err = ptlrpc_recover_import(obddev->u.cli.cl_import,
+                err = ptlrpc_recover_import(obd->u.cli.cl_import,
                                             data->ioc_inlbuf1);
                 GOTO(out, err);
         case IOC_OSC_SET_ACTIVE:
-                err = ptlrpc_set_import_active(obddev->u.cli.cl_import,
+                err = ptlrpc_set_import_active(obd->u.cli.cl_import,
                                                data->ioc_offset);
                 GOTO(out, err);
         default:
-                CERROR ("osc_ioctl(): unrecognised ioctl %#x\n", cmd);
+                CERROR("unrecognised ioctl %#x by %s\n", cmd, current->comm);
                 GOTO(out, err = -ENOTTY);
         }
 out:
@@ -1809,6 +1847,104 @@ static int osc_get_info(struct lustre_handle *conn, obd_count keylen,
         RETURN(-EINVAL);
 }
 
+static int osc_set_info(struct lustre_handle *conn, obd_count keylen,
+                        void *key, obd_count vallen, void *val)
+{
+        struct ptlrpc_request *req;
+        int rc, size = keylen;
+        char *bufs[1] = {key};
+        ENTRY;
+
+        if (keylen < strlen("mds_conn") ||
+            memcmp(key, "mds_conn", strlen("mds_conn")) != 0)
+                RETURN(-EINVAL);
+
+        req = ptlrpc_prep_req(class_conn2cliimp(conn), OST_SET_INFO, 1,
+                              &size, bufs);
+        if (req == NULL)
+                RETURN(-ENOMEM);
+
+        req->rq_replen = lustre_msg_size(0, NULL);
+        rc = ptlrpc_queue_wait(req);
+        ptlrpc_req_finished(req);
+        RETURN(rc);
+}
+
+static int osc_log_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm,
+                          int count, struct llog_cookie *cookies, int flags)
+{
+        struct obd_device *obd = class_conn2obd(conn);
+        struct llog_commit_data *llcd;
+        struct client_obd *cli;
+        int rc = 0;
+        ENTRY;
+
+        cli = &obd->u.cli;
+        if ((count == 0 || cookies == NULL ||
+             memcmp(cookies, &zero_cookie, sizeof(*cookies)) == 0)) {
+                down(&cli->cl_sem);
+                if (cli->cl_llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW))
+                        GOTO(out, rc);
+
+                llcd = cli->cl_llcd;
+                GOTO(send_now, rc);
+        }
+
+        down(&cli->cl_sem);
+        llcd = cli->cl_llcd;
+        if (llcd == NULL) {
+                llcd = llcd_grab();
+                if (llcd == NULL) {
+                        CERROR("couldn't get an llcd - dropped "LPX64":%x+%u\n",
+                               cookies->lgc_lgl.lgl_oid,
+                               cookies->lgc_lgl.lgl_ogen, cookies->lgc_index);
+                        GOTO(out, rc = -ENOMEM);
+                }
+                llcd->llcd_import = cli->cl_import;
+                cli->cl_llcd = llcd;
+        }
+
+        memcpy(llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies,
+               sizeof(*cookies));
+        llcd->llcd_cookiebytes += sizeof(*cookies);
+
+        /* If we can't fit any more cookies into the page, we need to send it */
+send_now:
+        if ((PAGE_SIZE - llcd->llcd_cookiebytes < sizeof(*cookies) ||
+             flags & OBD_LLOG_FL_SENDNOW)) {
+                cli->cl_llcd = NULL;
+                llcd_send(llcd);
+        }
+out:
+        up(&cli->cl_sem);
+
+        return rc;
+}
+
+static int osc_disconnect(struct lustre_handle *conn, int flags)
+{
+        struct obd_device *obd = class_conn2obd(conn);
+
+        /* flush any remaining cancel messages out to the target */
+        if (obd->u.cli.cl_llcd)
+                osc_log_cancel(conn, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW);
+
+        return client_import_disconnect(conn, flags);
+}
+
+static int osc_log_add(struct lustre_handle *conn,
+                       struct llog_handle *cathandle,
+                       struct llog_trans_hdr *rec, struct lov_stripe_md *lsm,
+                       struct llog_cookie *logcookies, int numcookies)
+{
+        ENTRY;
+        LASSERT(logcookies && numcookies > 0);
+
+        llog_add_record(cathandle, rec, logcookies);
+
+        RETURN(1);
+}
+
 struct obd_ops osc_obd_ops = {
         o_owner:        THIS_MODULE,
         o_attach:       osc_attach,
@@ -1816,14 +1952,14 @@ struct obd_ops osc_obd_ops = {
         o_setup:        client_obd_setup,
         o_cleanup:      client_obd_cleanup,
         o_connect:      client_import_connect,
-        o_disconnect:   client_import_disconnect,
+        o_disconnect:   osc_disconnect,
         o_statfs:       osc_statfs,
         o_packmd:       osc_packmd,
         o_unpackmd:     osc_unpackmd,
         o_create:       osc_create,
         o_destroy:      osc_destroy,
         o_getattr:      osc_getattr,
-        o_getattr_async: osc_getattr_async,
+        o_getattr_async:osc_getattr_async,
         o_setattr:      osc_setattr,
         o_open:         osc_open,
         o_close:        osc_close,
@@ -1833,14 +1969,18 @@ struct obd_ops osc_obd_ops = {
         o_enqueue:      osc_enqueue,
         o_match:        osc_match,
         o_cancel:       osc_cancel,
-        o_cancel_unused: osc_cancel_unused,
+        o_cancel_unused:osc_cancel_unused,
         o_iocontrol:    osc_iocontrol,
         o_get_info:     osc_get_info,
-        .o_mark_page_dirty =    osc_mark_page_dirty,
-        .o_clear_dirty_pages =  osc_clear_dirty_pages,
-        .o_last_dirty_offset =  osc_last_dirty_offset,
+        o_set_info:     osc_set_info,
+        o_log_cancel:   osc_log_cancel,
+        o_log_add:      osc_log_add,
+        o_mark_page_dirty:    osc_mark_page_dirty,
+        o_clear_dirty_pages:  osc_clear_dirty_pages,
+        o_last_dirty_offset:  osc_last_dirty_offset,
 };
 
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 struct obd_ops sanosc_obd_ops = {
         o_owner:        THIS_MODULE,
         o_attach:       osc_attach,
@@ -1858,48 +1998,54 @@ struct obd_ops sanosc_obd_ops = {
         o_setattr:      osc_setattr,
         o_open:         osc_open,
         o_close:        osc_close,
-#ifdef __KERNEL__
         o_setup:        client_sanobd_setup,
         o_brw:          sanosc_brw,
-#endif
         o_punch:        osc_punch,
         o_enqueue:      osc_enqueue,
         o_match:        osc_match,
         o_cancel:       osc_cancel,
         o_cancel_unused: osc_cancel_unused,
         o_iocontrol:    osc_iocontrol,
-        .o_mark_page_dirty =    osc_mark_page_dirty,
-        .o_clear_dirty_pages =  osc_clear_dirty_pages,
-        .o_last_dirty_offset =  osc_last_dirty_offset,
+        o_log_cancel:   osc_log_cancel,
+        o_log_add:      osc_log_add,
+        o_mark_page_dirty:   osc_mark_page_dirty,
+        o_clear_dirty_pages: osc_clear_dirty_pages,
+        o_last_dirty_offset: osc_last_dirty_offset,
 };
+#endif
 
 int __init osc_init(void)
 {
-        struct lprocfs_static_vars lvars;
+        struct lprocfs_static_vars lvars, sanlvars;
         int rc;
         ENTRY;
 
         LASSERT(sizeof(struct obd_client_handle) <= FD_OSTDATA_SIZE);
         LASSERT(sizeof(struct obd_client_handle) <= OBD_INLINESZ);
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(osc,&lvars);
+        lprocfs_init_vars(osc,&sanlvars);
 
         rc = class_register_type(&osc_obd_ops, lvars.module_vars,
                                  LUSTRE_OSC_NAME);
         if (rc)
                 RETURN(rc);
 
-        rc = class_register_type(&sanosc_obd_ops, lvars.module_vars,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        rc = class_register_type(&sanosc_obd_ops, sanlvars.module_vars,
                                  LUSTRE_SANOSC_NAME);
         if (rc)
                 class_unregister_type(LUSTRE_OSC_NAME);
+#endif
 
         RETURN(rc);
 }
 
-static void __exit osc_exit(void)
+static void /*__exit*/ osc_exit(void)
 {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
         class_unregister_type(LUSTRE_SANOSC_NAME);
+#endif
         class_unregister_type(LUSTRE_OSC_NAME);
 }
 
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
+.*.cmd
index c44093c..936706d 100644 (file)
 #include <linux/lprocfs_status.h>
 
 #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
 #else
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",        lprocfs_rd_uuid,   0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",            lprocfs_rd_uuid,   0, 0 },
         { 0 }
 };
 
-struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs",   lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",       lprocfs_rd_numrefs, 0, 0 },
         { 0 }
 };
 
 #endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(ost, lprocfs_module_vars, lprocfs_obd_vars)
index 023deb2..6801e92 100644 (file)
 #include <linux/lustre_export.h>
 #include <linux/init.h>
 #include <linux/lprocfs_status.h>
+#include <linux/lustre_commit_confd.h>
+#include <portals/list.h>
 
-inline void oti_init(struct obd_trans_info *oti,
-                           struct ptlrpc_request *req)
+void oti_init(struct obd_trans_info *oti, struct ptlrpc_request *req)
 {
-        if(oti == NULL)
+        if (oti == NULL)
                 return;
         memset(oti, 0, sizeof *oti);
 
-        
         if (req->rq_repmsg && req->rq_reqmsg != 0)
                 oti->oti_transno = req->rq_repmsg->transno;
-
-        EXIT;
 }
 
-inline void oti_to_request(struct obd_trans_info *oti,
-                           struct ptlrpc_request *req)
+void oti_to_request(struct obd_trans_info *oti, struct ptlrpc_request *req)
 {
-        int i;
         struct oti_req_ack_lock *ack_lock;
+        int i;
 
-        if(oti == NULL)
+        if (oti == NULL)
                 return;
 
         if (req->rq_repmsg)
@@ -75,7 +72,6 @@ inline void oti_to_request(struct obd_trans_info *oti,
                        sizeof(req->rq_ack_locks[i].lock));
                 req->rq_ack_locks[i].mode = ack_lock->mode;
         }
-        EXIT;
 }
 
 static int ost_destroy(struct ptlrpc_request *req, struct obd_trans_info *oti)
@@ -85,15 +81,16 @@ static int ost_destroy(struct ptlrpc_request *req, struct obd_trans_info *oti)
         int rc, size = sizeof(*body);
         ENTRY;
 
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
+        if (body->oa.o_valid & OBD_MD_FLCOOKIE)
+                oti->oti_logcookies = obdo_logcookie(&body->oa);
         req->rq_status = obd_destroy(conn, &body->oa, NULL, oti);
         RETURN(0);
 }
@@ -105,16 +102,15 @@ static int ost_getattr(struct ptlrpc_request *req)
         int rc, size = sizeof(*body);
         ENTRY;
 
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
-        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof(*repbody));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         req->rq_status = obd_getattr(conn, &repbody->oa, NULL);
         RETURN(0);
@@ -130,10 +126,9 @@ static int ost_statfs(struct ptlrpc_request *req)
         if (rc)
                 RETURN(rc);
 
-        osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*osfs));
-        memset(osfs, 0, size);
+        osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*osfs));
 
-        req->rq_status = obd_statfs(req->rq_export, osfs);
+        req->rq_status = obd_statfs(req->rq_export->exp_obd, osfs, jiffies-HZ);
         if (req->rq_status != 0)
                 CERROR("ost: statfs failed: rc %d\n", req->rq_status);
 
@@ -167,16 +162,15 @@ static int ost_open(struct ptlrpc_request *req, struct obd_trans_info *oti)
         int rc, size = sizeof(*repbody);
         ENTRY;
 
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL)
-                return (-EFAULT);
+                RETURN(-EFAULT);
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
-        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof(*repbody));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         req->rq_status = obd_open(conn, &repbody->oa, NULL, oti, NULL);
         RETURN(0);
@@ -189,16 +183,15 @@ static int ost_close(struct ptlrpc_request *req, struct obd_trans_info *oti)
         int rc, size = sizeof(*repbody);
         ENTRY;
 
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
-        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         req->rq_status = obd_close(conn, &repbody->oa, NULL, oti);
         RETURN(0);
@@ -211,18 +204,19 @@ static int ost_create(struct ptlrpc_request *req, struct obd_trans_info *oti)
         int rc, size = sizeof(*repbody);
         ENTRY;
 
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
-        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof(*repbody));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
+        oti->oti_logcookies = obdo_logcookie(&repbody->oa);
         req->rq_status = obd_create(conn, &repbody->oa, NULL, oti);
+        //obd_log_cancel(conn, NULL, 1, oti->oti_logcookies, 0);
         RETURN(0);
 }
 
@@ -233,10 +227,9 @@ static int ost_punch(struct ptlrpc_request *req, struct obd_trans_info *oti)
         int rc, size = sizeof(*repbody);
         ENTRY;
 
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
 
         if ((body->oa.o_valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) !=
             (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))
@@ -246,7 +239,7 @@ static int ost_punch(struct ptlrpc_request *req, struct obd_trans_info *oti)
         if (rc)
                 RETURN(rc);
 
-        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         req->rq_status = obd_punch(conn, &repbody->oa, NULL, repbody->oa.o_size,
                                    repbody->oa.o_blocks, oti);
@@ -260,16 +253,15 @@ static int ost_setattr(struct ptlrpc_request *req, struct obd_trans_info *oti)
         int rc, size = sizeof(*repbody);
         ENTRY;
 
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
-        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
 
         req->rq_status = obd_setattr(conn, &repbody->oa, NULL, oti);
@@ -285,9 +277,9 @@ static int ost_bulk_timeout(void *data)
         RETURN(1);
 }
 
-static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo,
-                                 struct niobuf_remote *rnb, int nrnb,
-                                 struct niobuf_remote **pp_rnbp)
+static int get_per_page_niobufs(struct obd_ioobj *ioo, int nioo,
+                                struct niobuf_remote *rnb, int nrnb,
+                                struct niobuf_remote **pp_rnbp)
 {
         /* Copy a remote niobuf, splitting it into page-sized chunks
          * and setting ioo[i].ioo_bufcnt accordingly */
@@ -305,14 +297,14 @@ static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo,
                         obd_off p0 = offset >> PAGE_SHIFT;
                         obd_off pn = (offset + rnb[rnbidx].len - 1)>>PAGE_SHIFT;
 
-                        LASSERT (rnbidx < nrnb);
+                        LASSERT(rnbidx < nrnb);
 
                         npages += (pn + 1 - p0);
 
                         if (rnb[rnbidx].len == 0) {
                                 CERROR("zero len BRW: obj %d objid "LPX64
                                        " buf %u\n", i, ioo[i].ioo_id, j);
-                                return (-EINVAL);
+                                return -EINVAL;
                         }
                         if (j > 0 &&
                             rnb[rnbidx].offset <= rnb[rnbidx-1].offset) {
@@ -320,20 +312,20 @@ static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo,
                                        " buf %u offset "LPX64" <= "LPX64"\n",
                                        i, ioo[i].ioo_id, j, rnb[rnbidx].offset,
                                        rnb[rnbidx].offset);
-                                return (-EINVAL);
+                                return -EINVAL;
                         }
                 }
 
-        LASSERT (rnbidx == nrnb);
+        LASSERT(rnbidx == nrnb);
 
         if (npages == nrnb) {       /* all niobufs are for single pages */
                 *pp_rnbp = rnb;
-                return (npages);
+                return npages;
         }
 
-        OBD_ALLOC (pp_rnb, sizeof (*pp_rnb) * npages);
+        OBD_ALLOC(pp_rnb, sizeof(*pp_rnb) * npages);
         if (pp_rnb == NULL)
-                return (-ENOMEM);
+                return -ENOMEM;
 
         /* now do the actual split */
         page = rnbidx = 0;
@@ -344,35 +336,35 @@ static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo,
                         obd_off off = rnb[rnbidx].offset;
                         int     nob = rnb[rnbidx].len;
 
-                        LASSERT (rnbidx < nrnb);
+                        LASSERT(rnbidx < nrnb);
                         do {
                                 obd_off  poff = off & (PAGE_SIZE - 1);
                                 int      pnob = (poff + nob > PAGE_SIZE) ?
                                                 PAGE_SIZE - poff : nob;
 
-                                LASSERT (page < npages);
+                                LASSERT(page < npages);
                                 pp_rnb[page].len = pnob;
                                 pp_rnb[page].offset = off;
                                 pp_rnb[page].flags = rnb->flags;
 
-                                CDEBUG (D_PAGE, "   obj %d id "LPX64
-                                        "page %d(%d) "LPX64" for %d\n",
-                                        i, ioo[i].ioo_id, obj_pages, page,
-                                        pp_rnb[page].offset, pp_rnb[page].len);
+                                CDEBUG(D_PAGE, "   obj %d id "LPX64
+                                       "page %d(%d) "LPX64" for %d\n",
+                                       i, ioo[i].ioo_id, obj_pages, page,
+                                       pp_rnb[page].offset, pp_rnb[page].len);
                                 page++;
                                 obj_pages++;
 
                                 off += pnob;
                                 nob -= pnob;
                         } while (nob > 0);
-                        LASSERT (nob == 0);
+                        LASSERT(nob == 0);
                 }
                 ioo[i].ioo_bufcnt = obj_pages;
         }
-        LASSERT (page == npages);
+        LASSERT(page == npages);
 
         *pp_rnbp = pp_rnb;
-        return (npages);
+        return npages;
 }
 
 static void free_per_page_niobufs (int npages, struct niobuf_remote *pp_rnb,
@@ -381,23 +373,19 @@ static void free_per_page_niobufs (int npages, struct niobuf_remote *pp_rnb,
         if (pp_rnb == rnb)                      /* didn't allocate above */
                 return;
 
-        OBD_FREE (pp_rnb, sizeof (*pp_rnb) * npages);
+        OBD_FREE(pp_rnb, sizeof(*pp_rnb) * npages);
 }
 
 #if CHECKSUM_BULK
 __u64 ost_checksum_bulk (struct ptlrpc_bulk_desc *desc)
 {
         __u64             cksum = 0;
-        struct list_head *tmp;
-        char             *ptr;
+        struct ptlrpc_bulk_page *bp;
 
-        list_for_each (tmp, &desc->bd_page_list) {
-                struct ptlrpc_bulk_page *bp;
-
-                bp = list_entry (tmp, struct ptlrpc_bulk_page, bp_link);
-                ptr = kmap (bp->bp_page);
-                ost_checksum (&cksum, ptr + bp->bp_pageoffset, bp->bp_buflen);
-                kunmap (bp->bp_page);
+        list_for_each_entry(bp, &desc->bd_page_list, bp_link) {
+                ost_checksum(&cksum, kmap(bp->bp_page) + bp->bp_pageoffset,
+                             bp->bp_buflen);
+                kunmap(bp->bp_page);
         }
 }
 #endif
@@ -409,9 +397,9 @@ static int ost_brw_read(struct ptlrpc_request *req)
         struct niobuf_remote    *pp_rnb;
         struct niobuf_local     *local_nb;
         struct obd_ioobj        *ioo;
-        struct ost_body         *body;
+        struct ost_body         *body, *repbody;
         struct l_wait_info       lwi;
-        void                    *desc_priv = NULL;
+        struct obd_trans_info    oti = { 0 };
         int                      size[1] = { sizeof(*body) };
         int                      comms_error = 0;
         int                      niocount;
@@ -426,35 +414,36 @@ static int ost_brw_read(struct ptlrpc_request *req)
 
         body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL) {
-                CERROR ("Missing/short ost_body\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short ost_body\n");
+                GOTO(out, rc = -EFAULT);
         }
 
-        ioo = lustre_swab_reqbuf (req, 1, sizeof (*ioo),
-                                  lustre_swab_obd_ioobj);
+        ioo = lustre_swab_reqbuf(req, 1, sizeof(*ioo), lustre_swab_obd_ioobj);
         if (ioo == NULL) {
-                CERROR ("Missing/short ioobj\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short ioobj\n");
+                GOTO(out, rc = -EFAULT);
         }
 
         niocount = ioo->ioo_bufcnt;
-        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb),
+        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof(*remote_nb),
                                        lustre_swab_niobuf_remote);
         if (remote_nb == NULL) {
-                CERROR ("Missing/short niobuf\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short niobuf\n");
+                GOTO(out, rc = -EFAULT);
         }
-        if (lustre_msg_swabbed (req->rq_reqmsg)) { /* swab remaining niobufs */
+        if (lustre_msg_swabbed(req->rq_reqmsg)) { /* swab remaining niobufs */
                 for (i = 1; i < niocount; i++)
                         lustre_swab_niobuf_remote (&remote_nb[i]);
         }
 
+        size[0] = sizeof(*body);
         rc = lustre_pack_msg(1, size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 GOTO(out, rc);
 
+        /* FIXME all niobuf splitting should be done in obdfilter if needed */
         /* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */
-        npages = get_per_page_niobufs (ioo, 1, remote_nb, niocount, &pp_rnb);
+        npages = get_per_page_niobufs(ioo, 1, remote_nb, niocount, &pp_rnb);
         if (npages < 0)
                 GOTO(out, rc = npages);
 
@@ -462,12 +451,12 @@ static int ost_brw_read(struct ptlrpc_request *req)
         if (local_nb == NULL)
                 GOTO(out_pp_rnb, rc = -ENOMEM);
 
-        desc = ptlrpc_prep_bulk_exp (req, BULK_PUT_SOURCE, OST_BULK_PORTAL);
+        desc = ptlrpc_prep_bulk_exp(req, BULK_PUT_SOURCE, OST_BULK_PORTAL);
         if (desc == NULL)
                 GOTO(out_local, rc = -ENOMEM);
 
-        rc = obd_preprw(OBD_BRW_READ, req->rq_export, NULL, 1, ioo, npages,
-                        pp_rnb, local_nb, &desc_priv, NULL);
+        rc = obd_preprw(OBD_BRW_READ, req->rq_export, &body->oa, 1,
+                        ioo, npages, pp_rnb, local_nb, &oti);
         if (rc != 0)
                 GOTO(out_bulk, rc);
 
@@ -480,7 +469,7 @@ static int ost_brw_read(struct ptlrpc_request *req)
                         break;
                 }
 
-                LASSERT (page_rc <= pp_rnb[i].len);
+                LASSERT(page_rc <= pp_rnb[i].len);
                 nob += page_rc;
                 if (page_rc != 0) {             /* some data! */
                         LASSERT (local_nb[i].page != NULL);
@@ -493,8 +482,8 @@ static int ost_brw_read(struct ptlrpc_request *req)
 
                 if (page_rc != pp_rnb[i].len) { /* short read */
                         /* All subsequent pages should be 0 */
-                        while (++i < npages)
-                                LASSERT (local_nb[i].rc == 0);
+                        while(++i < npages)
+                                LASSERT(local_nb[i].rc == 0);
                         break;
                 }
         }
@@ -509,7 +498,7 @@ static int ost_brw_read(struct ptlrpc_request *req)
                         if (rc) {
                                 LASSERT(rc == -ETIMEDOUT);
                                 CERROR ("timeout waiting for bulk PUT\n");
-                                ptlrpc_abort_bulk (desc);
+                                ptlrpc_abort_bulk(desc);
                         }
                 } else {
                         CERROR("ptlrpc_bulk_put failed RC: %d\n", rc);
@@ -518,25 +507,27 @@ static int ost_brw_read(struct ptlrpc_request *req)
         }
 
         /* Must commit after prep above in all cases */
-        rc = obd_commitrw(OBD_BRW_READ, req->rq_export, 1, ioo, npages,
-                          local_nb, desc_priv, NULL);
+        rc = obd_commitrw(OBD_BRW_READ, req->rq_export, &body->oa, 1,
+                          ioo, npages, local_nb, &oti);
+
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
+        memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa));
 
 #if CHECKSUM_BULK
         if (rc == 0) {
-                body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
-                body->oa.o_rdev = ost_checksum_bulk (desc);
-                body->oa.o_valid |= OBD_MD_FLCKSUM;
+                repbody->oa.o_rdev = ost_checksum_bulk(desc);
+                repbody->oa.o_valid |= OBD_MD_FLCKSUM;
         }
 #endif
 
  out_bulk:
-        ptlrpc_free_bulk (desc);
+        ptlrpc_free_bulk(desc);
  out_local:
         OBD_FREE(local_nb, sizeof(*local_nb) * npages);
  out_pp_rnb:
-        free_per_page_niobufs (npages, pp_rnb, remote_nb);
+        free_per_page_niobufs(npages, pp_rnb, remote_nb);
  out:
-        LASSERT (rc <= 0);
+        LASSERT(rc <= 0);
         if (rc == 0) {
                 req->rq_status = nob;
                 ptlrpc_reply(req);
@@ -547,7 +538,7 @@ static int ost_brw_read(struct ptlrpc_request *req)
         } else {
                 if (req->rq_repmsg != NULL) {
                         /* reply out callback would free */
-                        OBD_FREE (req->rq_repmsg, req->rq_replen);
+                        OBD_FREE(req->rq_repmsg, req->rq_replen);
                 }
                 CERROR("bulk IO comms error: evicting %s@%s nid "LPU64"\n",
                        req->rq_export->exp_client_uuid.uuid,
@@ -566,11 +557,10 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
         struct niobuf_remote    *pp_rnb;
         struct niobuf_local     *local_nb;
         struct obd_ioobj        *ioo;
-        struct ost_body         *body;
+        struct ost_body         *body, *repbody;
         struct l_wait_info       lwi;
-        void                    *desc_priv = NULL;
         __u32                   *rcs;
-        int                      size[2] = { sizeof (*body) };
+        int                      size[2] = { sizeof(*body) };
         int                      objcount, niocount, npages;
         int                      comms_error = 0;
         int                      rc, rc2, swab, i, j;
@@ -580,39 +570,38 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 GOTO(out, rc = -EIO);
 
         /* pause before transaction has been started */
-        OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE, 
+        OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE,
                          obd_timeout +1);
 
-        swab = lustre_msg_swabbed (req->rq_reqmsg);
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        swab = lustre_msg_swabbed(req->rq_reqmsg);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL) {
-                CERROR ("Missing/short ost_body\n");
+                CERROR("Missing/short ost_body\n");
                 GOTO(out, rc = -EFAULT);
         }
 
-        LASSERT_REQSWAB (req, 1);
+        LASSERT_REQSWAB(req, 1);
         objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
         if (objcount == 0) {
-                CERROR ("Missing/short ioobj\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short ioobj\n");
+                GOTO(out, rc = -EFAULT);
         }
-        ioo = lustre_msg_buf (req->rq_reqmsg, 1, objcount * sizeof (*ioo));
+        ioo = lustre_msg_buf (req->rq_reqmsg, 1, objcount * sizeof(*ioo));
         LASSERT (ioo != NULL);
         for (niocount = i = 0; i < objcount; i++) {
                 if (swab)
                         lustre_swab_obd_ioobj (&ioo[i]);
                 if (ioo[i].ioo_bufcnt == 0) {
-                        CERROR ("ioo[%d] has zero bufcnt\n", i);
-                        GOTO (out, rc = -EFAULT);
+                        CERROR("ioo[%d] has zero bufcnt\n", i);
+                        GOTO(out, rc = -EFAULT);
                 }
                 niocount += ioo[i].ioo_bufcnt;
         }
 
-        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb),
+        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof(*remote_nb),
                                        lustre_swab_niobuf_remote);
         if (remote_nb == NULL) {
-                CERROR ("Missing/short niobuf\n");
+                CERROR("Missing/short niobuf\n");
                 GOTO(out, rc = -EFAULT);
         }
         if (swab) {                             /* swab the remaining niobufs */
@@ -620,30 +609,31 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                         lustre_swab_niobuf_remote (&remote_nb[i]);
         }
 
-        size[1] = niocount * sizeof (*rcs);
+        size[1] = niocount * sizeof(*rcs);
         rc = lustre_pack_msg(2, size, NULL, &req->rq_replen,
                              &req->rq_repmsg);
         if (rc != 0)
-                GOTO (out, rc);
-        rcs = lustre_msg_buf (req->rq_repmsg, 1, niocount * sizeof (*rcs));
+                GOTO(out, rc);
+        rcs = lustre_msg_buf(req->rq_repmsg, 1, niocount * sizeof(*rcs));
 
+        /* FIXME all niobuf splitting should be done in obdfilter if needed */
         /* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */
         npages = get_per_page_niobufs(ioo, objcount,remote_nb,niocount,&pp_rnb);
         if (npages < 0)
-                GOTO (out, rc = npages);
+                GOTO(out, rc = npages);
 
         OBD_ALLOC(local_nb, sizeof(*local_nb) * npages);
         if (local_nb == NULL)
                 GOTO(out_pp_rnb, rc = -ENOMEM);
 
-        desc = ptlrpc_prep_bulk_exp (req, BULK_GET_SINK, OST_BULK_PORTAL);
+        desc = ptlrpc_prep_bulk_exp(req, BULK_GET_SINK, OST_BULK_PORTAL);
         if (desc == NULL)
                 GOTO(out_local, rc = -ENOMEM);
 
-        rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, NULL, objcount, ioo,
-                        npages, pp_rnb, local_nb, &desc_priv, oti);
+        rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, &body->oa, objcount,
+                        ioo, npages, pp_rnb, local_nb, oti);
         if (rc != 0)
-                GOTO (out_bulk, rc);
+                GOTO(out_bulk, rc);
 
         /* NB Having prepped, we must commit... */
 
@@ -664,8 +654,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                                           ptlrpc_bulk_complete(desc), &lwi);
                         if (rc) {
                                 LASSERT(rc == -ETIMEDOUT);
-                                CERROR ("timeout waiting for bulk GET\n");
-                                ptlrpc_abort_bulk (desc);
+                                CERROR("timeout waiting for bulk GET\n");
+                                ptlrpc_abort_bulk(desc);
                         }
                 } else {
                        CERROR("ptlrpc_bulk_get failed RC: %d\n", rc);
@@ -673,17 +663,21 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                comms_error = rc != 0;
         }
 
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
+        memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa));
+
 #if CHECKSUM_BULK
         if (rc == 0 && (body->oa.o_valid & OBD_MD_FLCKSUM) != 0) {
                 static int cksum_counter;
                 __u64 client_cksum = body->oa.o_rdev;
-                __u64 cksum = ost_checksum_bulk (desc);
+                __u64 cksum = ost_checksum_bulk(desc);
 
                 if (client_cksum != cksum) {
                         CERROR("Bad checksum: client "LPX64", server "LPX64
                                ", client NID "LPX64"\n", client_cksum, cksum,
                                req->rq_connection->c_peer.peer_nid);
                         cksum_counter = 1;
+                        repbody->oa.o_rdev = cksum;
                 } else {
                         cksum_counter++;
                         if ((cksum_counter & (-cksum_counter)) == cksum_counter)
@@ -695,8 +689,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
         }
 #endif
         /* Must commit after prep above in all cases */
-        rc2 = obd_commitrw(OBD_BRW_WRITE, req->rq_export, objcount, ioo,
-                           npages, local_nb, desc_priv, oti);
+        rc2 = obd_commitrw(OBD_BRW_WRITE, req->rq_export, &repbody->oa,
+                           objcount, ioo, npages, local_nb, oti);
 
         if (rc == 0) {
                 /* set per-requested niobuf return codes */
@@ -705,25 +699,25 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
 
                         rcs[i] = 0;
                         do {
-                                LASSERT (j < npages);
+                                LASSERT(j < npages);
                                 if (local_nb[j].rc < 0)
                                         rcs[i] = local_nb[j].rc;
                                 nob -= pp_rnb[j].len;
                                 j++;
                         } while (nob > 0);
-                        LASSERT (nob == 0);
+                        LASSERT(nob == 0);
                 }
-                LASSERT (j == npages);
+                LASSERT(j == npages);
         }
         if (rc == 0)
                 rc = rc2;
 
  out_bulk:
-        ptlrpc_free_bulk (desc);
+        ptlrpc_free_bulk(desc);
  out_local:
         OBD_FREE(local_nb, sizeof(*local_nb) * npages);
  out_pp_rnb:
-        free_per_page_niobufs (npages, pp_rnb, remote_nb);
+        free_per_page_niobufs(npages, pp_rnb, remote_nb);
  out:
         if (rc == 0) {
                 oti_to_request(oti, req);
@@ -748,10 +742,9 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
 
 static int ost_san_brw(struct ptlrpc_request *req, int cmd)
 {
-        struct lustre_handle *conn = &req->rq_reqmsg->handle;
         struct niobuf_remote *remote_nb, *res_nb;
         struct obd_ioobj *ioo;
-        struct ost_body *body;
+        struct ost_body *body, *repbody;
         int rc, i, j, objcount, niocount, size[2] = {sizeof(*body)};
         int n;
         int swab;
@@ -759,19 +752,17 @@ static int ost_san_brw(struct ptlrpc_request *req, int cmd)
 
         /* XXX not set to use latest protocol */
 
-        swab = lustre_msg_swabbed (req->rq_reqmsg);
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        swab = lustre_msg_swabbed(req->rq_reqmsg);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL) {
-                CERROR ("Missing/short ost_body\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short ost_body\n");
+                GOTO(out, rc = -EFAULT);
         }
 
-        ioo = lustre_swab_reqbuf(req, 1, sizeof (*ioo),
-                                 lustre_swab_obd_ioobj);
+        ioo = lustre_swab_reqbuf(req, 1, sizeof(*ioo), lustre_swab_obd_ioobj);
         if (ioo == NULL) {
-                CERROR ("Missing/short ioobj\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short ioobj\n");
+                GOTO(out, rc = -EFAULT);
         }
         objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
         niocount = ioo[0].ioo_bufcnt;
@@ -781,11 +772,11 @@ static int ost_san_brw(struct ptlrpc_request *req, int cmd)
                 niocount += ioo[i].ioo_bufcnt;
         }
 
-        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb),
+        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof(*remote_nb),
                                        lustre_swab_niobuf_remote);
         if (remote_nb == NULL) {
-                CERROR ("Missing/short niobuf\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short niobuf\n");
+                GOTO(out, rc = -EFAULT);
         }
         if (swab) {                             /* swab the remaining niobufs */
                 for (i = 1; i < niocount; i++)
@@ -814,14 +805,17 @@ static int ost_san_brw(struct ptlrpc_request *req, int cmd)
         if (rc)
                 GOTO(out, rc);
 
-        req->rq_status = obd_san_preprw(cmd, conn, objcount, ioo,
-                                        niocount, remote_nb);
+        req->rq_status = obd_san_preprw(cmd, req->rq_export, &body->oa,
+                                        objcount, ioo, niocount, remote_nb);
 
         if (req->rq_status)
-                GOTO (out, rc = 0);
+                GOTO(out, rc = 0);
+
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
+        memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
 
         res_nb = lustre_msg_buf(req->rq_repmsg, 1, size[1]);
-        memcpy (res_nb, remote_nb, size[1]);
+        memcpy(res_nb, remote_nb, size[1]);
         rc = 0;
 out:
         if (rc) {
@@ -835,6 +829,57 @@ out:
         return rc;
 }
 
+static int ost_log_cancel(struct ptlrpc_request *req)
+{
+        struct lustre_handle *conn;
+        struct llog_cookie *logcookies;
+        int num_cookies, rc = 0;
+        ENTRY;
+
+        logcookies = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*logcookies));
+        if (logcookies == NULL) {
+                DEBUG_REQ(D_HA, req, "no cookies sent");
+                RETURN(-EFAULT);
+        }
+        num_cookies = req->rq_reqmsg->buflens[0] / sizeof(*logcookies);
+
+        /* workaround until we don't need to send replies */
+        rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
+                RETURN(rc);
+        req->rq_repmsg->status = 0;
+        /* end workaround */
+
+        conn = (struct lustre_handle *)&req->rq_reqmsg->handle;
+        rc = obd_log_cancel(conn, NULL, num_cookies, logcookies, 0);
+
+        RETURN(rc);
+}
+
+static int ost_set_info(struct ptlrpc_request *req)
+{
+        struct lustre_handle *conn;
+        char *key;
+        int keylen, rc = 0;
+        ENTRY;
+
+        key = lustre_msg_buf(req->rq_reqmsg, 0, 1);
+        if (key == NULL) {
+                DEBUG_REQ(D_HA, req, "no set_info key");
+                RETURN(-EFAULT);
+        }
+        keylen = req->rq_reqmsg->buflens[0];
+
+        rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
+                RETURN(rc);
+
+        conn = (struct lustre_handle *)&req->rq_reqmsg->handle;
+        rc = obd_set_info(conn, keylen, key, 0, NULL);
+        req->rq_repmsg->status = 0;
+        RETURN(rc);
+}
+
 static int filter_recovery_request(struct ptlrpc_request *req,
                                    struct obd_device *obd, int *process)
 {
@@ -850,9 +895,10 @@ static int filter_recovery_request(struct ptlrpc_request *req,
         case OST_DESTROY:
         case OST_OPEN:
         case OST_PUNCH:
-        case OST_SETATTR: 
+        case OST_SETATTR:
         case OST_SYNCFS:
         case OST_WRITE:
+        case OBD_LOG_CANCEL:
         case LDLM_ENQUEUE:
                 *process = target_queue_recovery_request(req, obd);
                 RETURN(0);
@@ -881,7 +927,7 @@ static int ost_handle(struct ptlrpc_request *req)
                 int abort_recovery, recovering;
 
                 if (req->rq_export == NULL) {
-                        CERROR("lustre_ost: operation %d on unconnected OST\n",
+                        CDEBUG(D_HA, "operation %d on unconnected OST\n",
                                req->rq_reqmsg->opc);
                         req->rq_status = -ENOTCONN;
                         GOTO(out, rc = -ENOTCONN);
@@ -901,7 +947,7 @@ static int ost_handle(struct ptlrpc_request *req)
                         if (rc || !should_process)
                                 RETURN(rc);
                 }
-        } 
+        }
 
         if (strcmp(req->rq_obd->obd_type->typ_name, "ost") != 0)
                 GOTO(out, rc = -EINVAL);
@@ -988,10 +1034,18 @@ static int ost_handle(struct ptlrpc_request *req)
                 OBD_FAIL_RETURN(OBD_FAIL_OST_SYNCFS_NET, 0);
                 rc = ost_syncfs(req);
                 break;
+        case OST_SET_INFO:
+                DEBUG_REQ(D_INODE, req, "set_info");
+                rc = ost_set_info(req);
         case OBD_PING:
                 DEBUG_REQ(D_INODE, req, "ping");
                 rc = target_handle_ping(req);
                 break;
+        case OBD_LOG_CANCEL:
+                CDEBUG(D_INODE, "log cancel\n");
+                OBD_FAIL_RETURN(OBD_FAIL_OBD_LOG_CANCEL_NET, 0);
+                rc = ost_log_cancel(req);
+                break;
         case LDLM_ENQUEUE:
                 CDEBUG(D_INODE, "enqueue\n");
                 OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
@@ -1058,17 +1112,22 @@ out:
 static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
 {
         struct ost_obd *ost = &obddev->u.ost;
-        int err;
-        int i;
+        int err, i;
         ENTRY;
 
+#ifdef ENABLE_ORPHANS
+        err = llog_start_commit_thread();
+        if (err < 0)
+                RETURN(err);
+#endif
+
         ost->ost_service = ptlrpc_init_svc(OST_NEVENTS, OST_NBUFS,
                                            OST_BUFSIZE, OST_MAXREQSIZE,
                                            OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
                                            ost_handle, "ost", obddev);
         if (!ost->ost_service) {
                 CERROR("failed to start service\n");
-                GOTO(error_disc, err = -ENOMEM);
+                RETURN(-ENOMEM);
         }
 
         for (i = 0; i < OST_NUM_THREADS; i++) {
@@ -1077,17 +1136,14 @@ static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
                 err = ptlrpc_start_thread(obddev, ost->ost_service, name);
                 if (err) {
                         CERROR("error starting thread #%d: rc %d\n", i, err);
-                        GOTO(error_disc, err = -EINVAL);
+                        RETURN(-EINVAL);
                 }
         }
 
         RETURN(0);
-
-error_disc:
-        RETURN(err);
 }
 
-static int ost_cleanup(struct obd_device *obddev, int force, int failover)
+static int ost_cleanup(struct obd_device *obddev, int flags)
 {
         struct ost_obd *ost = &obddev->u.ost;
         int err = 0;
@@ -1106,7 +1162,7 @@ int ost_attach(struct obd_device *dev, obd_count len, void *data)
 {
         struct lprocfs_static_vars lvars;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(ost,&lvars);
         return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
@@ -1115,7 +1171,7 @@ int ost_detach(struct obd_device *dev)
         return lprocfs_obd_detach(dev);
 }
 
-/* I don't think this function is ever used, since nothing 
+/* I don't think this function is ever used, since nothing
  * connects directly to this module.
  */
 static int ost_connect(struct lustre_handle *conn,
@@ -1153,12 +1209,12 @@ static int __init ost_init(void)
         struct lprocfs_static_vars lvars;
         ENTRY;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(ost,&lvars);
         RETURN(class_register_type(&ost_obd_ops, lvars.module_vars,
                                    LUSTRE_OST_NAME));
 }
 
-static void __exit ost_exit(void)
+static void /*__exit*/ ost_exit(void)
 {
         class_unregister_type(LUSTRE_OST_NAME);
 }
index 99ac885..c1a9bdf 100644 (file)
@@ -6,3 +6,4 @@ autom4te.cache
 config.log
 config.status
 configure
+.*.o.cmd
index 29a713f..7a48c58 100644 (file)
@@ -1 +1,6 @@
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+# portals/utils/debug.c wants <linux/version.h> from userspace.  sigh.
+HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS)
+LIBREADLINE := @LIBREADLINE@
+# 2.5's makefiles aren't nice to cross dir libraries in host programs
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
index 29a713f..7c66dfa 100644 (file)
@@ -1 +1,4 @@
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+HOSTCFLAGS := $(EXTRA_CFLAGS)
+# the kernel doesn't want us to build archives for host binaries :/
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
index be0e51a..73a19df 100644 (file)
@@ -1,6 +1,12 @@
-include fs/lustre/portals/Kernelenv
+include $(src)/Kernelenv
 
-obj-y += portals/
+# The ordering of these determines the order that each subsystem's 
+# module_init() functions are called in.  if these are changed make sure
+# they reflect the dependencies between each subsystem's _init functions.
 obj-y += libcfs/
-obj-y += knals/
+obj-y += portals/
 obj-y += router/
+obj-y += knals/
+obj-y += tests/
+
+obj-m += utils/
index 7a4e05c..1a7741b 100644 (file)
@@ -11,8 +11,13 @@ AC_ARG_WITH(lib, [  --with-lib compile lustre library], host_cpu="lib")
 
 AC_ARG_WITH(linux, [  --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux)
 AC_SUBST(LINUX)
+if test x$enable_inkernel = xyes ; then
+        echo ln -s `pwd` $LINUX/fs/lustre
+        rm $LINUX/fs/lustre
+        ln -s `pwd` $LINUX/fs/lustre
+fi
 
-# --------- UML?  --------------------
+#  --------------------
 AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...)
 if test $host_cpu = "lib" ; then 
         host_cpu="lib"
@@ -111,6 +116,13 @@ case ${host_cpu} in
         MOD_LINK=elf64_ia64
 ;;
 
+       x86_64 )
+       AC_MSG_RESULT($host_cpu)
+        KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -fomit-frame-pointer -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -finline-limit=2000 -fno-strength-reduce -fno-asynchronous-unwind-tables'
+       KCPPFLAGS='-D__KERNEL__ -DMODULE'
+        MOD_LINK=elf_x86_64
+;;
+
        sparc64 )
        AC_MSG_RESULT($host_cpu)
         KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs'
@@ -160,21 +172,33 @@ if test $host_cpu != "lib" ; then
       AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.)
   fi
 
-# ------------ RELEASE and moduledir ------------------
+# ------------ LINUXRELEASE and moduledir ------------------
   AC_MSG_CHECKING(for Linux release)
   
   dnl We need to rid ourselves of the nasty [ ] quotes.
   changequote(, )
   dnl Get release from version.h
-  RELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
+  LINUXRELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
   changequote([, ])
   
-  moduledir='$(libdir)/modules/'$RELEASE/kernel
+  moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel
   AC_SUBST(moduledir)
   
   modulefsdir='$(moduledir)/fs/$(PACKAGE)'
   AC_SUBST(modulefsdir)
   
+  AC_MSG_RESULT($LINUXRELEASE)
+  AC_SUBST(LINUXRELEASE)
+
+# ------------ RELEASE --------------------------------
+  AC_MSG_CHECKING(lustre release)
+  
+  dnl We need to rid ourselves of the nasty [ ] quotes.
+  changequote(, )
+  dnl Get release from version.h
+  RELEASE="`sed -ne 's/-/_/g' -e 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_]*\).*/\1/p' $LINUX/include/linux/version.h`_`date +%Y%m%d%H%M`"
+  changequote([, ])
+
   AC_MSG_RESULT($RELEASE)
   AC_SUBST(RELEASE)
 
@@ -302,7 +326,7 @@ AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib)
 # This needs to run after we've defined the KCPPFLAGS
 
 AC_MSG_CHECKING(for kernel version)
-AC_TRY_LINK([#define __KERNEL__
+AC_TRY_COMPILE([#define __KERNEL__
              #include <linux/sched.h>],
             [struct task_struct p;
              p.sighand = NULL;],
@@ -313,5 +337,5 @@ if test $RH_2_4_20 = 1; then
        AC_MSG_RESULT(redhat-2.4.20)
        CPPFLAGS="$CPPFLAGS -DCONFIG_RH_2_4_20"
 else
-       AC_MSG_RESULT($RELEASE)
+       AC_MSG_RESULT($LINUXRELEASE)
 fi 
index 3aa6909..f9605ab 100644 (file)
@@ -1,5 +1,11 @@
 /* portals/include/config.h.in.  Generated from configure.in by autoheader.  */
 
+/* Compile with orphan support */
+#undef ENABLE_ORPHANS
+
+/* Use the Pinger */
+#undef ENABLE_PINGER
+
 /* Define to 1 if you have the <inttypes.h> header file. */
 #undef HAVE_INTTYPES_H
 
index ee3b9fc..2133391 100644 (file)
@@ -4,7 +4,6 @@
 #ifndef _KP30_INCLUDED
 #define _KP30_INCLUDED
 
-
 #define PORTAL_DEBUG
 
 #ifndef offsetof
 
 #define LOWEST_BIT_SET(x)      ((x) & ~((x) - 1))
 
-#ifndef CONFIG_SMP
-# define smp_processor_id() 0
-#endif
-
 /*
  *  Debugging
  */
@@ -24,39 +19,34 @@ extern unsigned int portal_subsystem_debug;
 extern unsigned int portal_stack;
 extern unsigned int portal_debug;
 extern unsigned int portal_printk;
-/* Debugging subsystems  (8 bit ID)
- *
- * If you add debug subsystem #32, you need to send email to phil, because
- * you're going to break kernel subsystem debug filtering. */
-#define S_UNDEFINED    (0 << 24)
-#define S_MDC          (1 << 24)
-#define S_MDS          (2 << 24)
-#define S_OSC          (3 << 24)
-#define S_OST          (4 << 24)
-#define S_CLASS        (5 << 24)
-#define S_OBDFS        (6 << 24) /* obsolete */
-#define S_LLITE        (7 << 24)
-#define S_RPC          (8 << 24)
-#define S_EXT2OBD      (9 << 24) /* obsolete */
-#define S_PORTALS     (10 << 24)
-#define S_SOCKNAL     (11 << 24)
-#define S_QSWNAL      (12 << 24)
-#define S_PINGER      (13 << 24)
-#define S_FILTER      (14 << 24)
-#define S_TRACE       (15 << 24) /* obsolete */
-#define S_ECHO        (16 << 24)
-#define S_LDLM        (17 << 24)
-#define S_LOV         (18 << 24)
-#define S_GMNAL       (19 << 24)
-#define S_PTLROUTER   (20 << 24)
-#define S_COBD        (21 << 24)
-#define S_PTLBD       (22 << 24)
-#define S_LOG         (23 << 24)
-
-/* If you change these values, please keep portals/linux/utils/debug.c
+/* Debugging subsystems (32 bits, non-overlapping) */
+#define S_UNDEFINED    (1 << 0)
+#define S_MDC          (1 << 1)
+#define S_MDS          (1 << 2)
+#define S_OSC          (1 << 3)
+#define S_OST          (1 << 4)
+#define S_CLASS        (1 << 5)
+#define S_LOG          (1 << 6)
+#define S_LLITE        (1 << 7)
+#define S_RPC          (1 << 8)
+#define S_MGMT         (1 << 9)
+#define S_PORTALS     (1 << 10)
+#define S_SOCKNAL     (1 << 11)
+#define S_QSWNAL      (1 << 12)
+#define S_PINGER      (1 << 13)
+#define S_FILTER      (1 << 14)
+#define S_PTLBD       (1 << 15)
+#define S_ECHO        (1 << 16)
+#define S_LDLM        (1 << 17)
+#define S_LOV         (1 << 18)
+#define S_GMNAL       (1 << 19)
+#define S_PTLROUTER   (1 << 20)
+#define S_COBD        (1 << 21)
+
+/* If you change these values, please keep portals/utils/debug.c
  * up to date! */
 
-/* Debugging masks (24 bits, non-overlapping) */
+/* Debugging masks (32 bits, non-overlapping) */
 #define D_TRACE     (1 << 0) /* ENTRY/EXIT markers */
 #define D_INODE     (1 << 1)
 #define D_SUPER     (1 << 2)
@@ -80,20 +70,23 @@ extern unsigned int portal_printk;
 #define D_RPCTRACE  (1 << 20) /* for distributed debugging */
 #define D_VFSTRACE  (1 << 21)
 
-#ifndef __KERNEL__
-#define THREAD_SIZE 8192
+#ifdef __KERNEL__
+# include <linux/sched.h> /* THREAD_SIZE */
+#else
+# define THREAD_SIZE 8192
 #endif
-#ifdef  __ia64__
-#define CDEBUG_STACK() (THREAD_SIZE -                                      \
+
+#ifdef __KERNEL__
+# ifdef  __ia64__
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
                         ((unsigned long)__builtin_dwarf_cfa() &            \
                          (THREAD_SIZE - 1)))
-#else
-#define CDEBUG_STACK() (THREAD_SIZE -                                      \
+# else
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
                         ((unsigned long)__builtin_frame_address(0) &       \
                          (THREAD_SIZE - 1)))
-#endif
+# endif
 
-#ifdef __KERNEL__
 #define CHECK_STACK(stack)                                                    \
         do {                                                                  \
                 if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) {    \
@@ -105,20 +98,21 @@ extern unsigned int portal_printk;
                       /*panic("LBUG");*/                                      \
                 }                                                             \
         } while (0)
-#else
+#else /* __KERNEL__ */
 #define CHECK_STACK(stack) do { } while(0)
-#endif
+#define CDEBUG_STACK (0L)
+#endif /* __KERNEL__ */
 
 #if 1
 #define CDEBUG(mask, format, a...)                                            \
 do {                                                                          \
-        CHECK_STACK(CDEBUG_STACK());                                          \
+        CHECK_STACK(CDEBUG_STACK);                                            \
         if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) ||                      \
             (portal_debug & (mask) &&                                         \
-             portal_subsystem_debug & (1 << (DEBUG_SUBSYSTEM >> 24))))        \
+             portal_subsystem_debug & DEBUG_SUBSYSTEM))                       \
                 portals_debug_msg(DEBUG_SUBSYSTEM, mask,                      \
                                   __FILE__, __FUNCTION__, __LINE__,           \
-                                  CDEBUG_STACK(), format , ## a);             \
+                                  CDEBUG_STACK, format, ## a);                \
 } while (0)
 
 #define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
@@ -162,7 +156,6 @@ do {                                                                    \
 #define EXIT                            do { } while (0)
 #endif
 
-
 #ifdef __KERNEL__
 # include <linux/vmalloc.h>
 # include <linux/time.h>
@@ -210,7 +203,8 @@ static inline void our_cond_resched(void)
 #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
 
 #ifdef PORTAL_DEBUG
-extern void kportal_assertion_failed(char *expr,char *file,char *func,int line);
+extern void kportal_assertion_failed(char *expr, char *file, const char *func,
+                                     const int line);
 #define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__,  \
                                                         __FUNCTION__, __LINE__))
 #else
@@ -560,14 +554,14 @@ extern struct prof_ent prof_ents[MAX_PROFS];
 #endif /* PORTALS_PROFILING */
 
 /* debug.c */
-void portals_run_lbug_upcall(char * file, char *fn, int line);
+void portals_run_lbug_upcall(char * file, const char *fn, const int line);
 void portals_debug_dumplog(void);
 int portals_debug_init(unsigned long bufsize);
 int portals_debug_cleanup(void);
 int portals_debug_clear_buffer(void);
 int portals_debug_mark_buffer(char *text);
 int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
-                char *file, unsigned int size);
+                             char *file, unsigned int size);
 __s32 portals_debug_copy_to_user(char *buf, unsigned long len);
 #if (__GNUC__)
 /* Use the special GNU C __attribute__ hack to have the compiler check the
@@ -578,13 +572,14 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len);
 # warning printf has been defined as a macro...
 # undef printf
 #endif
-void portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                        unsigned long stack, const char *format, ...)
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                       const int line, unsigned long stack,
+                       const char *format, ...)
         __attribute__ ((format (printf, 7, 8)));
 #else
-void portals_debug_msg (int subsys, int mask, char *file, char *fn,
-                        int line, unsigned long stack,
-                        const char *format, ...);
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                       const int line, unsigned long stack,
+                       const char *format, ...);
 #endif /* __GNUC__ */
 void portals_debug_set_level(unsigned int debug_level);
 
@@ -618,9 +613,9 @@ extern void kportal_blockallsigs (void);
 # define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0);
 # define PORTAL_FREE(a, b) do { free(a); } while (0);
 # define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \
-    printf ("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format,                    \
-            (subsys) >> 24, (mask), (long)time(0), file, fn, line,            \
-            getpid() , stack, ## a);
+    printf("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format,                    \
+           (subsys), (mask), (long)time(0), file, fn, line,                   \
+           getpid() , stack, ## a);
 #endif
 
 #ifndef CURRENT_TIME
@@ -911,13 +906,13 @@ ptl_handle_ni_t *kportal_get_ni (int nal);
 void kportal_put_ni (int nal);
 
 #ifdef __CYGWIN__
-#ifndef BITS_PER_LONG
-#if (~0UL) == 0xffffffffUL
-#define BITS_PER_LONG 32
-#else
-#define BITS_PER_LONG 64
-#endif
-#endif
+# ifndef BITS_PER_LONG
+#  if (~0UL) == 0xffffffffUL
+#   define BITS_PER_LONG 32
+#  else
+#   define BITS_PER_LONG 64
+#  endif
+# endif
 #endif
 
 #if (BITS_PER_LONG == 32 || __WORDSIZE == 32)
index e28fbac..a7cb4d1 100644 (file)
@@ -1,13 +1,56 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _PORTALS_COMPAT_H
+#define _PORTALS_COMPAT_H
+
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+#if SPINLOCK_DEBUG
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
+#  define SIGNAL_MASK_ASSERT() \
+   LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC)
+# else
+#  define SIGNAL_MASK_ASSERT() \
+   LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC)
+# endif
+#else
+# define SIGNAL_MASK_ASSERT()
+#endif
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
-# define SIGNAL_MASK_LOCK(task, flags)                              \
+
+# define SIGNAL_MASK_LOCK(task, flags)                                  \
   spin_lock_irqsave(&task->sighand->siglock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags)                            \
+# define SIGNAL_MASK_UNLOCK(task, flags)                                \
   spin_unlock_irqrestore(&task->sighand->siglock, flags)
+# define USERMODEHELPER(path, argv, envp)                               \
+  call_usermodehelper(path, argv, envp, 1)
 # define RECALC_SIGPENDING         recalc_sigpending()
-#else
-# define SIGNAL_MASK_LOCK(task, flags)                              \
+# define CURRENT_SECONDS           get_seconds()
+
+#else /* 2.4.x */
+
+# define SIGNAL_MASK_LOCK(task, flags)                                  \
   spin_lock_irqsave(&task->sigmask_lock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags)                            \
+# define SIGNAL_MASK_UNLOCK(task, flags)                                \
   spin_unlock_irqrestore(&task->sigmask_lock, flags)
+# define USERMODEHELPER(path, argv, envp)                               \
+  call_usermodehelper(path, argv, envp)
 # define RECALC_SIGPENDING         recalc_sigpending(current)
+# define CURRENT_SECONDS           CURRENT_TIME
+
+#endif
+
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt "|%d", ## a, current->thread.extern_pid)
+#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt "|%d", ## a, current->thread.mode.tt.extern_pid)
+#else
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt, ## a)
 #endif
+
+#endif /* _PORTALS_COMPAT_H */
index 2b63312..78a1e2d 100644 (file)
@@ -1,6 +1,4 @@
 #ifndef _LINUX_LIST_H
-#define _LINUX_LIST_H
-
 
 /*
  * Simple doubly linked list implementation.
@@ -101,7 +99,9 @@ static inline void list_del_init(struct list_head *entry)
        __list_del(entry->prev, entry->next);
        INIT_LIST_HEAD(entry);
 }
+#endif
 
+#ifndef list_for_each_entry
 /**
  * list_move - delete from one list and add as another's head
  * @list: the entry to move
@@ -124,7 +124,10 @@ static inline void list_move_tail(struct list_head *list,
        __list_del(list->prev, list->next);
        list_add_tail(list, head);
 }
+#endif
 
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
 /**
  * list_empty - tests whether a list is empty
  * @head: the list to test.
index 7d1b304..d389aab 100644 (file)
@@ -2,7 +2,7 @@
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
  * Compile with:
- * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl 
+ * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
  */
 #ifndef __LTRACE_H_
 #define __LTRACE_H_
@@ -31,20 +31,20 @@ static inline int ltrace_write_file(char* fname)
         argv[0] = "debug_kernel";
         argv[1] = fname;
         argv[2] = "1";
-        
+
         fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
-        
+
         return jt_dbg_debug_kernel(3, argv);
 }
 
 static inline int ltrace_clear()
 {
         char* argv[1];
-        
+
         argv[0] = "clear";
-        
+
         fprintf(stderr, "[ptlctl] %s\n", argv[0]);
-        
+
         return jt_dbg_clear_debug_buf(1, argv);
 }
 
@@ -52,9 +52,9 @@ static inline int ltrace_mark(int indent_level, char* text)
 {
         char* argv[2];
         char mark_buf[PATH_MAX];
-        
+
         snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
-        
+
         argv[0] = "mark";
         argv[1] = mark_buf;
         return jt_dbg_mark_debug_buf(2, argv);
@@ -65,9 +65,9 @@ static inline int ltrace_applymasks()
         char* argv[2];
         argv[0] = "list";
         argv[1] = "applymasks";
-        
+
         fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
-        
+
         return jt_dbg_list(2, argv);
 }
 
@@ -95,19 +95,19 @@ static inline int ltrace_start()
 #ifdef PORTALS_DEV_ID
         rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
 #endif
-        ltrace_filter("class"); 
+        ltrace_filter("class");
         ltrace_filter("socknal");
-        ltrace_filter("qswnal"); 
-        ltrace_filter("gmnal");  
-        ltrace_filter("portals");  
-        
-        ltrace_show("all_types");  
-        ltrace_filter("trace");  
-        ltrace_filter("malloc"); 
-        ltrace_filter("net"); 
-        ltrace_filter("page"); 
-        ltrace_filter("other"); 
-        ltrace_filter("info"); 
+        ltrace_filter("qswnal");
+        ltrace_filter("gmnal");
+        ltrace_filter("portals");
+
+        ltrace_show("all_types");
+        ltrace_filter("trace");
+        ltrace_filter("malloc");
+        ltrace_filter("net");
+        ltrace_filter("page");
+        ltrace_filter("other");
+        ltrace_filter("info");
         ltrace_applymasks();
 
         return rc;
@@ -146,7 +146,7 @@ static inline void ltrace_add_processnames(char* fname)
         struct timezone tz;
         int nob;
         int underuml = !not_uml();
-        
+
         gettimeofday(&tv, &tz);
 
         nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
@@ -167,7 +167,7 @@ static inline void ltrace_add_processnames(char* fname)
                                  "(%s:%d:%s() %d+%lu): ",
                                  "lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
         }
-         
+
         nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
         system(cmdbuf);
 }
index 4727599..760f465 100644 (file)
@@ -1,6 +1,3 @@
-/*
- */
-
 #ifndef _INCppidh_
 #define _INCppidh_
 
index d4038b6..0269290 100644 (file)
@@ -2,14 +2,19 @@
 #define _P30_TYPES_H_
 
 #ifdef __linux__
-#include <asm/types.h>
-#include <asm/timex.h>
+# include <asm/types.h>
+# include <asm/timex.h>
 #else
-#include <sys/types.h>
+# include <sys/types.h>
 typedef u_int32_t __u32;
 typedef u_int64_t __u64;
-typedef unsigned long long cycles_t;
-static inline cycles_t get_cycles(void) { return 0; }
+#endif
+
+#ifdef __KERNEL__
+# include <linux/time.h>
+#else
+# include <sys/time.h>
+# define do_gettimeofday(tv) gettimeofday(tv, NULL)
 #endif
 
 typedef __u64 ptl_nid_t;
@@ -31,7 +36,7 @@ typedef ptl_handle_any_t ptl_handle_md_t;
 typedef ptl_handle_any_t ptl_handle_me_t;
 
 #define PTL_HANDLE_NONE \
-((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
+    ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
 #define PTL_EQ_NONE PTL_HANDLE_NONE
 
 static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
@@ -108,17 +113,15 @@ typedef struct {
         ptl_handle_me_t unlinked_me;
         ptl_md_t mem_desc;
         ptl_hdr_data_t hdr_data;
-        cycles_t  arrival_time;
+        struct timeval arrival_time;
         volatile ptl_seq_t sequence;
 } ptl_event_t;
 
-
 typedef enum {
         PTL_ACK_REQ,
         PTL_NOACK_REQ
 } ptl_ack_req_t;
 
-
 typedef struct {
         volatile ptl_seq_t sequence;
         ptl_size_t size;
@@ -130,7 +133,6 @@ typedef struct {
         ptl_eq_t *eq;
 } ptl_ni_t;
 
-
 typedef struct {
         int max_match_entries;    /* max number of match entries */
         int max_mem_descriptors;  /* max number of memory descriptors */
index ce40a60..cd5d9d6 100644 (file)
@@ -1,4 +1,4 @@
-include ../Kernelenv
+include $(obj)/../Kernelenv
 
 obj-y = socknal/
-# more coming...
\ No newline at end of file
+# more coming...
index ceeea2a..0cffc15 100644 (file)
@@ -124,7 +124,7 @@ static nal_t *kgmnal_init(int interface, ptl_pt_index_t ptl_size,
         return &kgmnal_api;
 }
 
-static void __exit
+static void /*__exit*/
 kgmnal_finalize(void)
 {
         struct list_head *tmp;
index 1066d69..479cc2c 100644 (file)
@@ -112,7 +112,7 @@ static nal_t *kscimacnal_init(int interface, ptl_pt_index_t  ptl_size,
 
 
 /* Called by kernel at module unload time */
-static void __exit 
+static void /*__exit*/ 
 kscimacnal_finalize(void)
 {
         /* FIXME: How should the shutdown procedure really look? */
index 46edf01..5c1b366 100644 (file)
@@ -3,7 +3,7 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-include ../../Kernelenv
+include $(src)/../../Kernelenv
 
 obj-y += ksocknal.o
 ksocknal-objs    := socknal.o socknal_cb.o
index 1f5dc38..77ee473 100644 (file)
@@ -379,7 +379,7 @@ ktoenal_cmd(struct portal_ioctl_data * data, void * private)
 }
 
 
-void __exit
+void /*__exit*/
 ktoenal_module_fini (void)
 {
         CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
index ec37f6f..abd0731 100644 (file)
@@ -893,6 +893,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags)
                                 spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
                                 goto get_fmb;   /* => go get a fwd msg buffer */
                         default:
+                                break;
                         }
                         /* Not Reached */
                         LBUG ();
@@ -934,6 +935,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags)
                 goto out;                       /* (later) */
 
         default:
+                break;
         }
 
         /* Not Reached */
index 67d1a3d..7fa686f 100644 (file)
@@ -2,3 +2,4 @@
 Makefile
 Makefile.in
 link-stamp
+.*.o.cmd
index 3196ea2..9aa838f 100644 (file)
@@ -6,4 +6,4 @@
 include fs/lustre/portals/Kernelenv
 
 obj-y += libcfs.o
-licfs-objs    := module.o proc.o debug.o
\ No newline at end of file
+libcfs-objs    := module.o proc.o debug.o
index 8d26dbb..f37cd96 100644 (file)
@@ -571,8 +571,8 @@ int portals_debug_init(unsigned long bufsize)
         memset(debug_buf, 0, debug_size);
         debug_wrapped = 0;
 
-        printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
-               bufsize, debug_buf);
+        //printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
+               //bufsize, debug_buf);
         atomic_set(&debug_off_a, debug_off);
         notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier);
         debug_size = bufsize;
@@ -632,9 +632,9 @@ int portals_debug_mark_buffer(char *text)
         if (debug_buf == NULL)
                 return -EINVAL;
 
-        CDEBUG(0, "*******************************************************************************\n");
+        CDEBUG(0, "********************************************************\n");
         CDEBUG(0, "DEBUG MARKER: %s\n", text);
-        CDEBUG(0, "*******************************************************************************\n");
+        CDEBUG(0, "********************************************************\n");
 
         return 0;
 }
@@ -672,8 +672,8 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len)
 
 /* FIXME: I'm not very smart; someone smarter should make this better. */
 void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                   unsigned long stack, const char *format, ...)
+portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                  const int line, unsigned long stack, const char *format, ...)
 {
         va_list       ap;
         unsigned long flags;
@@ -728,8 +728,8 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
         do_gettimeofday(&tv);
 
         prefix_nob = snprintf(debug_buf + debug_off, max_nob,
-                              "%02x:%06x:%d:%lu.%06lu ",
-                              subsys >> 24, mask, smp_processor_id(),
+                              "%06x:%06x:%d:%lu.%06lu ",
+                              subsys, mask, smp_processor_id(),
                               tv.tv_sec, tv.tv_usec);
         max_nob -= prefix_nob;
 
@@ -752,7 +752,7 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
 
         va_start(ap, format);
         msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob,
-                            max_nob, format, ap);
+                             max_nob, format, ap);
         max_nob -= msg_nob;
         va_end(ap);
 
@@ -790,7 +790,7 @@ void portals_debug_set_level(unsigned int debug_level)
         portal_debug = debug_level;
 }
 
-void portals_run_lbug_upcall(char * file, char *fn, int line)
+void portals_run_lbug_upcall(char *file, const char *fn, const int line)
 {
         char *argv[6];
         char *envp[3];
@@ -803,7 +803,7 @@ void portals_run_lbug_upcall(char * file, char *fn, int line)
         argv[0] = portals_upcall;
         argv[1] = "LBUG";
         argv[2] = file;
-        argv[3] = fn;
+        argv[3] = (char *)fn;
         argv[4] = buf;
         argv[5] = NULL;
 
index 14cc325..e8eb290 100644 (file)
@@ -62,10 +62,10 @@ static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
 struct semaphore nal_cmd_sem;
 
 #ifdef PORTAL_DEBUG
-void
-kportal_assertion_failed (char *expr, char *file, char *func, int line)
+void kportal_assertion_failed(char *expr, char *file, const char *func,
+                              const int line)
 {
-        portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK(),
+        portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK,
                           "ASSERTION(%s) failed\n", expr);
         LBUG_WITH_LOC(file, func, line);
 }
index 5627ef7..7822846 100644 (file)
@@ -3,7 +3,10 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-include ../Kernelenv
+include $(src)/../Kernelenv
 
 obj-y += portals.o
-portals-objs    := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o lib-move.o lib-msg.o lib-ni.o lib-not-impl.o lib-pid.o api-eq.o api-errno.o api-init.o api-md.o api-me.o api-ni.o api-wrap.o
+portals-objs    :=     lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
+                       lib-move.o lib-msg.o lib-ni.o lib-pid.o \
+                       api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
+                       api-wrap.o
index e59c922..dc1fead 100644 (file)
@@ -26,7 +26,7 @@
 #include <portals/api-support.h>
 
 int ptl_init;
-unsigned int portal_subsystem_debug = 0xfff7e3ff;
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL | S_GMNAL);
 unsigned int portal_debug = ~0;
 unsigned int portal_printk;
 unsigned int portal_stack;
index fde4f16..02f8b60 100644 (file)
@@ -544,7 +544,7 @@ get_new_msg (nal_cb_t *nal, lib_md_t *md)
         msg->send_ack = 0;
 
         msg->md = md;
-        msg->ev.arrival_time = get_cycles();
+        do_gettimeofday(&msg->ev.arrival_time);
         md->pending++;
         if (md->threshold != PTL_MD_THRESH_INF) {
                 LASSERT (md->threshold > 0);
index 64bd09b..9b02c03 100644 (file)
@@ -3,7 +3,7 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-include ../Kernelenv
+include $(src)/../Kernelenv
 
 obj-y += kptlrouter.o
 kptlrouter-objs    := router.o proc.o
index 6074c3c..27a7fba 100644 (file)
@@ -23,8 +23,8 @@
 
 #include "router.h"
 
-struct list_head kpr_routes;
-struct list_head kpr_nals;
+LIST_HEAD(kpr_routes);
+LIST_HEAD(kpr_nals);
 
 unsigned long long kpr_fwd_bytes;
 unsigned long      kpr_fwd_packets;
@@ -35,7 +35,7 @@ atomic_t           kpr_queue_depth;
  *
  * Once in a blue moon we register/deregister NALs and add/remove routing
  * entries (thread context only)... */
-rwlock_t         kpr_rwlock;
+rwlock_t         kpr_rwlock = RW_LOCK_UNLOCKED;
 
 kpr_router_interface_t kpr_router_interface = {
        kprri_register:         kpr_register_nal,
@@ -55,7 +55,7 @@ kpr_control_interface_t kpr_control_interface = {
 int
 kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
 {
-       long               flags;
+       unsigned long      flags;
        struct list_head  *e;
        kpr_nal_entry_t   *ne;
 
@@ -98,7 +98,7 @@ kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
 void
 kpr_shutdown_nal (void *arg)
 {
-       long             flags;
+       unsigned long    flags;
        kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
 
         CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
@@ -123,7 +123,7 @@ kpr_shutdown_nal (void *arg)
 void
 kpr_deregister_nal (void *arg)
 {
-       long              flags;
+       unsigned long     flags;
        kpr_nal_entry_t  *ne = (kpr_nal_entry_t *)arg;
 
         CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
@@ -296,7 +296,7 @@ int
 kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
                ptl_nid_t hi_nid)
 {
-       long               flags;
+       unsigned long      flags;
        struct list_head  *e;
        kpr_route_entry_t *re;
 
@@ -345,7 +345,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
 int
 kpr_del_route (ptl_nid_t nid)
 {
-       long               flags;
+       unsigned long      flags;
        struct list_head  *e;
 
         CDEBUG(D_OTHER, "Del route "LPX64"\n", nid);
@@ -398,7 +398,7 @@ kpr_get_route(int idx, int *gateway_nalid, ptl_nid_t *gateway_nid,
         return (-ENOENT);
 }
 
-static void __exit
+static void /*__exit*/
 kpr_finalise (void)
 {
         LASSERT (list_empty (&kpr_nals));
@@ -427,10 +427,6 @@ kpr_initialise (void)
         CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
                atomic_read(&portal_kmemory));
 
-       rwlock_init(&kpr_rwlock);
-       INIT_LIST_HEAD(&kpr_routes);
-       INIT_LIST_HEAD(&kpr_nals);
-
         kpr_proc_init();
 
         PORTAL_SYMBOL_REGISTER(kpr_router_interface);
index 051d1bd..d0c4c88 100644 (file)
@@ -1,3 +1,4 @@
 Makefile
 Makefile.in
 .deps
+.*.o.cmd
index 389ffbb..4d04ffb 100644 (file)
@@ -260,7 +260,7 @@ pingcli_start(struct portal_ioctl_data *args)
 
 
 /* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
 {
         PORTAL_ALLOC (client, sizeof(struct pingcli_data));
         if (client == NULL)
@@ -282,7 +282,7 @@ static int __init pingcli_init(void)
 } /* pingcli_init() */
 
 
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
 {
         PORTAL_SYMBOL_UNREGISTER (kping_client);
 } /* pingcli_cleanup() */
index 1037d09..873e11c 100644 (file)
 #include <asm/semaphore.h>
 
 #define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
-#define MAXSIZE (16*1024*1024)
+#define MAXSIZE (16*1024)
 
 static unsigned ping_head_magic;
 static unsigned ping_bulk_magic;
-static int nal  = 0;                            // Your NAL,
+static int nal  = SOCKNAL;                            // Your NAL,
 static unsigned long packets_valid = 0;         // Valid packets 
 static int running = 1;
 atomic_t pkt;
@@ -282,7 +282,7 @@ static int __init pingsrv_init(void)
 } /* pingsrv_init() */
 
 
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
 {
         remove_proc_entry ("net/pingsrv", NULL);
         
index 4cef08b..35e114b 100644 (file)
@@ -235,7 +235,7 @@ pingcli_start(struct portal_ioctl_data *args)
 
 
 /* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
 {
 
         PORTAL_ALLOC (client, sizeof(struct pingcli_data));
@@ -258,7 +258,7 @@ static int __init pingcli_init(void)
 } /* pingcli_init() */
 
 
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
 {
         PORTAL_SYMBOL_UNREGISTER (kping_client);
 } /* pingcli_cleanup() */
index a18ea35..2b45a46 100644 (file)
@@ -269,7 +269,7 @@ static int __init pingsrv_init(void)
 } /* pingsrv_init() */
 
 
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
 {
         remove_proc_entry ("net/pingsrv", NULL);
         
index 529bb2d..b73f042 100644 (file)
@@ -84,8 +84,8 @@ int portals_debug_copy_to_user(char *buf, unsigned long len)
 
 /* FIXME: I'm not very smart; someone smarter should make this better. */
 void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                   const char *format, ...)
+portals_debug_msg (int subsys, int mask, char *file, const char *fn, 
+                   const int line, const char *format, ...)
 {
         va_list       ap;
         unsigned long flags;
index 148310a..8e474ad 100644 (file)
@@ -5,4 +5,5 @@ debugctl
 ptlctl
 .deps
 routerstat
-wirecheck
\ No newline at end of file
+wirecheck
+.*.cmd
index 9ab1c73..0a009d2 100644 (file)
@@ -53,17 +53,18 @@ static char rawbuf[8192];
 static char *buf = rawbuf;
 static int max = 8192;
 //static int g_pfd = -1;
-static int subsystem_array[1 << 8];
+static int subsystem_mask = ~0;
 static int debug_mask = ~0;
 
 static const char *portal_debug_subsystems[] =
-        {"undefined", "mdc", "mds", "osc", "ost", "class", "obdfs", "llite",
-         "rpc", "ext2obd", "portals", "socknal", "qswnal", "pinger", "filter",
-         "obdtrace", "echo", "ldlm", "lov", "gmnal", "router", "ptldb", NULL};
+        {"undefined", "mdc", "mds", "osc", "ost", "class", "log", "llite",
+         "rpc", "mgmt", "portals", "socknal", "qswnal", "pinger", "filter",
+         "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", NULL};
 static const char *portal_debug_masks[] =
         {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
          "blocks", "net", "warning", "buffs", "other", "dentry", "portals",
-         "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", NULL};
+         "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace",
+         NULL};
 
 struct debug_daemon_cmd {
         char *cmd;
@@ -88,7 +89,10 @@ static int do_debug_mask(char *name, int enable)
                         printf("%s output from subsystem \"%s\"\n",
                                 enable ? "Enabling" : "Disabling",
                                 portal_debug_subsystems[i]);
-                        subsystem_array[i] = enable;
+                        if (enable)
+                                subsystem_mask |= (1 << i);
+                        else
+                                subsystem_mask &= ~(1 << i);
                         found = 1;
                 }
         }
@@ -111,7 +115,6 @@ static int do_debug_mask(char *name, int enable)
 
 int dbg_initialize(int argc, char **argv)
 {
-        memset(subsystem_array, 1, sizeof(subsystem_array));
         return 0;
 }
 
@@ -213,12 +216,7 @@ int jt_dbg_list(int argc, char **argv)
                 for (i = 0; portal_debug_masks[i] != NULL; i++)
                         printf(", %s", portal_debug_masks[i]);
                 printf("\n");
-        }
-        else if (strcasecmp(argv[1], "applymasks") == 0) {
-                unsigned int subsystem_mask = 0;
-                for (i = 0; portal_debug_subsystems[i] != NULL; i++) {
-                        if (subsystem_array[i]) subsystem_mask |= (1 << i);
-                }
+        } else if (strcasecmp(argv[1], "applymasks") == 0) {
                 applymask_all(subsystem_mask, debug_mask);
         }
         return 0;
@@ -230,12 +228,6 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw)
 {
         char *p, *z;
         unsigned long subsystem, debug, dropped = 0, kept = 0;
-        int max_sub, max_type;
-
-        for (max_sub = 0; portal_debug_subsystems[max_sub] != NULL; max_sub++)
-                ;
-        for (max_type = 0; portal_debug_masks[max_type] != NULL; max_type++)
-                ;
 
         while (size) {
                 p = memchr(buf, '\n', size);
@@ -247,8 +239,7 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw)
                 z++;
                 /* for some reason %*s isn't working. */
                 *p = '\0';
-                if (subsystem < max_sub &&
-                    subsystem_array[subsystem] &&
+                if ((subsystem_mask & subsystem) &&
                     (!debug || (debug_mask & debug))) {
                         if (raw)
                                 fprintf(fd, "%s\n", buf);
@@ -551,6 +542,8 @@ int jt_dbg_modules(int argc, char **argv)
                 {"mds_ext3", "lustre/mds"},
                 {"mds_extN", "lustre/mds"},
                 {"ptlbd", "lustre/ptlbd"},
+                {"mgmt_svc", "lustre/mgmt"},
+                {"mgmt_cli", "lustre/mgmt"},
                 {NULL, NULL}
         };
         char *path = "..";
index 90d66f5..a89f4f7 100644 (file)
@@ -22,6 +22,7 @@
 
 #include <stdio.h>
 #include <sys/types.h>
+#include <netdb.h>
 #include <sys/socket.h>
 #include <netinet/tcp.h>
 #include <netdb.h>
@@ -106,6 +107,27 @@ nal2name (int nal)
         return ((e == NULL) ? "???" : e->name);
 }
 
+static struct hostent *
+ptl_gethostbyname(char * hname) {
+        struct hostent *he;
+        he = gethostbyname(hname);
+        if (!he) {
+                switch(h_errno) {
+                case HOST_NOT_FOUND:
+                case NO_ADDRESS:
+                        fprintf(stderr, "Unable to resolve hostname: %s\n",
+                                hname);
+                        break;
+                default:
+                        fprintf(stderr, "gethostbyname error: %s\n",
+                                strerror(errno));
+                        break;
+                }
+                return NULL;
+        }
+        return he;
+}
+
 int
 ptl_parse_nid (ptl_nid_t *nidp, char *str)
 {
@@ -127,7 +149,7 @@ ptl_parse_nid (ptl_nid_t *nidp, char *str)
         
         if ((('a' <= str[0] && str[0] <= 'z') ||
              ('A' <= str[0] && str[0] <= 'Z')) &&
-             (he = gethostbyname (str)) != NULL)
+             (he = ptl_gethostbyname (str)) != NULL)
         {
                 __u32 addr = *(__u32 *)he->h_addr;
 
@@ -351,12 +373,9 @@ int jt_ptl_connect(int argc, char **argv)
                         goto usage;
                 }
 
-                he = gethostbyname(argv[1]);
-                if (!he) {
-                        fprintf(stderr, "gethostbyname error: %s\n",
-                                strerror(errno));
+                he = ptl_gethostbyname(argv[1]);
+                if (!he)
                         return -1;
-                }
 
                 g_port = atol(argv[2]);
 
@@ -525,12 +544,9 @@ int jt_ptl_disconnect(int argc, char **argv)
 
                 PORTAL_IOC_INIT(data);
                 if (argc == 2) {
-                        he = gethostbyname(argv[1]);
-                        if (!he) {
-                                fprintf(stderr, "gethostbyname error: %s\n",
-                                        strerror(errno));
+                        he = ptl_gethostbyname(argv[1]);
+                        if (!he) 
                                 return -1;
-                        }
                         
                         data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
 
@@ -582,12 +598,9 @@ int jt_ptl_push_connection (int argc, char **argv)
 
                 PORTAL_IOC_INIT(data);
                 if (argc == 2) {
-                        he = gethostbyname(argv[1]);
-                        if (!he) {
-                                fprintf(stderr, "gethostbyname error: %s\n",
-                                        strerror(errno));
+                        he = ptl_gethostbyname(argv[1]);
+                        if (!he)
                                 return -1;
-                        }
                         
                         data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
 
index af76523..0a6ad8f 100644 (file)
@@ -76,7 +76,7 @@ static int ptlbd_cl_setup(struct obd_device *obd, obd_count len, void *buf)
         RETURN(0);
 }
 
-static int ptlbd_cl_cleanup(struct obd_device *obd, int force, int failover)
+static int ptlbd_cl_cleanup(struct obd_device *obd, int flags)
 {
         struct ptlbd_obd *ptlbd = &obd->u.ptlbd;
         struct obd_import *imp;
@@ -99,9 +99,8 @@ static int ptlbd_cl_cleanup(struct obd_device *obd, int force, int failover)
 
 
 /* modelled after ptlrpc_import_connect() */
-int ptlbd_cl_connect(struct lustre_handle *conn,
-                      struct obd_device *obd, 
-                      struct obd_uuid *target_uuid)
+int ptlbd_cl_connect(struct lustre_handle *conn, struct obd_device *obd,
+                     struct obd_uuid *target_uuid)
 {
         struct ptlbd_obd *ptlbd = &obd->u.ptlbd;
         struct obd_import *imp = ptlbd->bd_import;
@@ -196,7 +195,7 @@ int ptlbd_cl_init(void)
 {
         struct lprocfs_static_vars lvars;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(ptlbd,&lvars);
         return class_register_type(&ptlbd_cl_obd_ops, lvars.module_vars,
                                    OBD_PTLBD_CL_DEVICENAME);
 }
index e3fde99..dc591f4 100644 (file)
@@ -57,7 +57,7 @@ out_cl:
         RETURN(ret);
 }
 
-static void __exit ptlbd_exit(void)
+static void /*__exit*/ ptlbd_exit(void)
 {
         ENTRY;
         ptlbd_cl_exit();
index 34ec737..d293a86 100644 (file)
@@ -74,7 +74,7 @@ out_filp:
         RETURN(rc);
 }
 
-static int ptlbd_sv_cleanup(struct obd_device *obddev, int force, int failover)
+static int ptlbd_sv_cleanup(struct obd_device *obddev, int flags)
 {
         struct ptlbd_obd *ptlbd = &obddev->u.ptlbd;
         ENTRY;
@@ -102,7 +102,7 @@ int ptlbd_sv_init(void)
 {
         struct lprocfs_static_vars lvars;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(ptlbd,&lvars);
         return class_register_type(&ptlbd_sv_obd_ops, lvars.module_vars,
                                    OBD_PTLBD_SV_DEVICENAME);
 }
index 067f05c..cf51f30 100644 (file)
@@ -7,3 +7,4 @@ Makefile.in
 .deps
 tags
 TAGS
+.*.cmd
index eb44329..355d48c 100644 (file)
@@ -16,7 +16,7 @@ EXTRA_PROGRAMS = ptlrpc
 
 ptlrpc_SOURCES = recover.c connection.c ptlrpc_module.c events.c service.c \
 client.c niobuf.c pack_generic.c lproc_ptlrpc.c pinger.c ptlrpc_lib.c \
-ptlrpc_internal.h
+ptlrpc_internal.h recov_thread.c
 endif
 
 include $(top_srcdir)/Rules
index a98af3e..50ea587 100644 (file)
@@ -78,13 +78,13 @@ void ptlrpc_readdress_connection(struct ptlrpc_connection *conn,
         struct ptlrpc_peer peer;
         int err;
 
-        err = ptlrpc_uuid_to_peer (uuid, &peer);
+        err = ptlrpc_uuid_to_peer(uuid, &peer);
         if (err != 0) {
                 CERROR("cannot find peer %s!\n", uuid->uuid);
                 return;
         }
 
-        memcpy (&conn->c_peer, &peer, sizeof (peer));
+        memcpy(&conn->c_peer, &peer, sizeof (peer));
         return;
 }
 
@@ -96,7 +96,7 @@ static inline struct ptlrpc_bulk_desc *new_bulk(void)
         if (!desc)
                 return NULL;
 
-        spin_lock_init (&desc->bd_lock);
+        spin_lock_init(&desc->bd_lock);
         init_waitqueue_head(&desc->bd_waitq);
         INIT_LIST_HEAD(&desc->bd_page_list);
         desc->bd_md_h = PTL_HANDLE_NONE;
@@ -108,10 +108,10 @@ static inline struct ptlrpc_bulk_desc *new_bulk(void)
 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req,
                                                int type, int portal)
 {
-        struct obd_import       *imp = req->rq_import;
+        struct obd_import *imp = req->rq_import;
         struct ptlrpc_bulk_desc *desc;
 
-        LASSERT (type == BULK_PUT_SINK || type == BULK_GET_SOURCE);
+        LASSERT(type == BULK_PUT_SINK || type == BULK_GET_SOURCE);
 
         desc = new_bulk();
         if (desc == NULL)
@@ -132,10 +132,10 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req,
 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp (struct ptlrpc_request *req,
                                                int type, int portal)
 {
-        struct obd_export       *exp = req->rq_export;
+        struct obd_export *exp = req->rq_export;
         struct ptlrpc_bulk_desc *desc;
 
-        LASSERT (type == BULK_PUT_SOURCE || type == BULK_GET_SINK);
+        LASSERT(type == BULK_PUT_SOURCE || type == BULK_GET_SINK);
 
         desc = new_bulk();
         if (desc == NULL)
@@ -159,12 +159,12 @@ int ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
 
         OBD_ALLOC(bulk, sizeof(*bulk));
         if (bulk == NULL)
-                return (-ENOMEM);
+                return -ENOMEM;
 
-        LASSERT (page != NULL);
-        LASSERT (pageoffset >= 0);
-        LASSERT (len > 0);
-        LASSERT (pageoffset + len <= PAGE_SIZE);
+        LASSERT(page != NULL);
+        LASSERT(pageoffset >= 0);
+        LASSERT(len > 0);
+        LASSERT(pageoffset + len <= PAGE_SIZE);
 
         bulk->bp_page = page;
         bulk->bp_pageoffset = pageoffset;
@@ -181,9 +181,9 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
         struct list_head *tmp, *next;
         ENTRY;
 
-        LASSERT (desc != NULL);
-        LASSERT (desc->bd_page_count != 0x5a5a5a5a); /* not freed already */
-        LASSERT (!desc->bd_network_rw);         /* network hands off or */
+        LASSERT(desc != NULL);
+        LASSERT(desc->bd_page_count != 0x5a5a5a5a); /* not freed already */
+        LASSERT(!desc->bd_network_rw);         /* network hands off or */
 
         list_for_each_safe(tmp, next, &desc->bd_page_list) {
                 struct ptlrpc_bulk_page *bulk;
@@ -191,7 +191,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
                 ptlrpc_free_bulk_page(bulk);
         }
 
-        LASSERT (desc->bd_page_count == 0);
+        LASSERT(desc->bd_page_count == 0);
         LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
 
         if (desc->bd_export)
@@ -205,7 +205,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
 
 void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk)
 {
-        LASSERT (bulk != NULL);
+        LASSERT(bulk != NULL);
 
         list_del(&bulk->bp_link);
         bulk->bp_desc->bd_page_count--;
@@ -247,7 +247,7 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
 
         request->rq_connection = ptlrpc_connection_addref(imp->imp_connection);
 
-        spin_lock_init (&request->rq_lock);
+        spin_lock_init(&request->rq_lock);
         INIT_LIST_HEAD(&request->rq_list);
         init_waitqueue_head(&request->rq_wait_for_rep);
         request->rq_xid = ptlrpc_next_xid();
@@ -289,18 +289,18 @@ void ptlrpc_set_destroy(struct ptlrpc_request_set *set)
                 struct ptlrpc_request *req =
                         list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
-                LASSERT (req->rq_phase == expected_phase);
+                LASSERT(req->rq_phase == expected_phase);
                 n++;
         }
 
-        LASSERT (set->set_remaining == 0 || set->set_remaining == n);
+        LASSERT(set->set_remaining == 0 || set->set_remaining == n);
 
         list_for_each_safe(tmp, next, &set->set_requests) {
                 struct ptlrpc_request *req =
                         list_entry(tmp, struct ptlrpc_request, rq_set_chain);
                 list_del_init(&req->rq_set_chain);
 
-                LASSERT (req->rq_phase == expected_phase);
+                LASSERT(req->rq_phase == expected_phase);
 
                 if (req->rq_phase == RQ_PHASE_NEW) {
 
@@ -312,7 +312,8 @@ void ptlrpc_set_destroy(struct ptlrpc_request_set *set)
                                 /* higher level (i.e. LOV) failed;
                                  * let the sub reqs clean up */
                                 req->rq_status = -EBADR;
-                                interpreter(req, &req->rq_async_args, req->rq_status);
+                                interpreter(req, &req->rq_async_args,
+                                            req->rq_status);
                         }
                         set->set_remaining--;
                 }
@@ -402,8 +403,8 @@ static int after_reply(struct ptlrpc_request *req, int *restartp)
         int rc;
         ENTRY;
 
-        LASSERT (!req->rq_receiving_reply);
-        LASSERT (req->rq_replied);
+        LASSERT(!req->rq_receiving_reply);
+        LASSERT(req->rq_replied);
 
         if (restartp != NULL)
                 *restartp = 0;
@@ -418,14 +419,14 @@ static int after_reply(struct ptlrpc_request *req, int *restartp)
         rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
         if (rc) {
                 CERROR("unpack_rep failed: %d\n", rc);
-                RETURN (-EPROTO);
+                RETURN(-EPROTO);
         }
 
         if (req->rq_repmsg->type != PTL_RPC_MSG_REPLY &&
             req->rq_repmsg->type != PTL_RPC_MSG_ERR) {
                 CERROR("invalid packet type received (type=%u)\n",
                        req->rq_repmsg->type);
-                RETURN (-EPROTO);
+                RETURN(-EPROTO);
         }
 
         /* Store transno in reqmsg for replay. */
@@ -447,6 +448,9 @@ static int after_reply(struct ptlrpc_request *req, int *restartp)
                 if (req->rq_err)
                         RETURN(-EIO);
 
+                if (req->rq_no_resend)
+                        RETURN(rc); /* -ENOTCONN */
+
                 if (req->rq_resend) {
                         if (restartp == NULL)
                                 LBUG(); /* async resend not supported yet */
@@ -456,7 +460,7 @@ static int after_reply(struct ptlrpc_request *req, int *restartp)
                         *restartp = 1;
                         lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
                         DEBUG_REQ(D_HA, req, "resending: ");
-                        RETURN (0);
+                        RETURN(0);
                 }
 
                 CERROR("request should be err or resend: %p\n", req);
@@ -472,10 +476,9 @@ static int after_reply(struct ptlrpc_request *req, int *restartp)
                         imp->imp_max_transno = req->rq_transno;
 
                 /* Replay-enabled imports return commit-status information. */
-                if (req->rq_repmsg->last_committed) {
+                if (req->rq_repmsg->last_committed)
                         imp->imp_peer_committed_transno =
                                 req->rq_repmsg->last_committed;
-                }
                 ptlrpc_free_committed(imp);
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
         }
@@ -510,8 +513,8 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                         continue;
 
                 if (req->rq_phase == RQ_PHASE_INTERPRET)
-                        GOTO (interpret, req->rq_status);
-                
+                        GOTO(interpret, req->rq_status);
+
                 if (req->rq_err) {
                         ptlrpc_unregister_reply(req);
                         if (req->rq_status == 0)
@@ -522,7 +525,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                         list_del_init(&req->rq_list);
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
-                        GOTO (interpret, req->rq_status);
+                        GOTO(interpret, req->rq_status);
                 }
 
                 if (req->rq_intr) {
@@ -535,7 +538,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                         list_del_init(&req->rq_list);
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
-                        GOTO (interpret, req->rq_status);
+                        GOTO(interpret, req->rq_status);
                 }
 
                 if (req->rq_phase == RQ_PHASE_RPC) {
@@ -553,13 +556,13 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                                 list_add_tail(&req->rq_list,
                                               &imp->imp_sending_list);
 
-                                if (req->rq_import_generation < 
+                                if (req->rq_import_generation <
                                     imp->imp_generation) {
                                         req->rq_status = -EIO;
                                         req->rq_phase = RQ_PHASE_INTERPRET;
-                                        spin_unlock_irqrestore(&imp->imp_lock, 
+                                        spin_unlock_irqrestore(&imp->imp_lock,
                                                                flags);
-                                        GOTO (interpret, req->rq_status);
+                                        GOTO(interpret, req->rq_status);
                                 }
                                 spin_unlock_irqrestore(&imp->imp_lock, flags);
 
@@ -571,16 +574,17 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                                         req->rq_resend = 0;
                                         spin_unlock_irqrestore(&req->rq_lock,
                                                                flags);
+
                                         ptlrpc_unregister_reply(req);
                                         if (req->rq_bulk)
                                                 ptlrpc_unregister_bulk(req);
-                               }
+                                }
 
                                 rc = ptl_send_rpc(req);
                                 if (rc) {
                                         req->rq_status = rc;
                                         req->rq_phase = RQ_PHASE_INTERPRET;
-                                        GOTO (interpret, req->rq_status);
+                                        GOTO(interpret, req->rq_status);
                                 }
 
                         }
@@ -612,21 +616,21 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                          */
                         if (req->rq_bulk == NULL || req->rq_status != 0) {
                                 req->rq_phase = RQ_PHASE_INTERPRET;
-                                GOTO (interpret, req->rq_status);
+                                GOTO(interpret, req->rq_status);
                         }
 
                         req->rq_phase = RQ_PHASE_BULK;
                 }
 
-                LASSERT (req->rq_phase == RQ_PHASE_BULK);
+                LASSERT(req->rq_phase == RQ_PHASE_BULK);
                 if (!ptlrpc_bulk_complete (req->rq_bulk))
                         continue;
 
                 req->rq_phase = RQ_PHASE_INTERPRET;
 
         interpret:
-                LASSERT (req->rq_phase == RQ_PHASE_INTERPRET);
-                LASSERT (!req->rq_receiving_reply);
+                LASSERT(req->rq_phase == RQ_PHASE_INTERPRET);
+                LASSERT(!req->rq_receiving_reply);
 
                 ptlrpc_unregister_reply(req);
                 if (req->rq_bulk != NULL)
@@ -651,7 +655,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                 set->set_remaining--;
         }
 
-        RETURN (set->set_remaining == 0);
+        RETURN(set->set_remaining == 0);
 }
 
 int ptlrpc_expire_one_request(struct ptlrpc_request *req)
@@ -695,7 +699,7 @@ static int expired_set(void *data)
         time_t                     now = LTIME_S (CURRENT_TIME);
         ENTRY;
 
-        LASSERT (set != NULL);
+        LASSERT(set != NULL);
 
         /* A timeout expired; see which reqs it applies to... */
         list_for_each (tmp, &set->set_requests) {
@@ -728,7 +732,7 @@ static void interrupted_set(void *data)
         struct list_head *tmp;
         unsigned long flags;
 
-        LASSERT (set != NULL);
+        LASSERT(set != NULL);
         CERROR("INTERRUPTED SET %p\n", set);
 
         list_for_each(tmp, &set->set_requests) {
@@ -757,12 +761,13 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
         int                    timeout;
         ENTRY;
 
+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
         LASSERT(!list_empty(&set->set_requests));
         list_for_each(tmp, &set->set_requests) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
-                LASSERT (req->rq_level == LUSTRE_CONN_FULL);
-                LASSERT (req->rq_phase == RQ_PHASE_NEW);
+                LASSERT(req->rq_level == LUSTRE_CONN_FULL);
+                LASSERT(req->rq_phase == RQ_PHASE_NEW);
                 req->rq_phase = RQ_PHASE_RPC;
 
                 imp = req->rq_import;
@@ -789,7 +794,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                         spin_lock (&req->rq_lock);
                         req->rq_waiting = 1;
                         spin_unlock (&req->rq_lock);
-                        LASSERT (list_empty (&req->rq_list));
+                        LASSERT(list_empty (&req->rq_list));
                         // list_del(&req->rq_list);
                         list_add_tail(&req->rq_list, &imp->imp_delayed_list);
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
@@ -801,6 +806,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                 list_add_tail(&req->rq_list, &imp->imp_sending_list);
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
 
+                req->rq_reqmsg->status = current->pid;
                 CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:ni:nid:opc"
                        " %s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm,
                        imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status,
@@ -820,7 +826,8 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                 now = LTIME_S (CURRENT_TIME);
                 timeout = 0;
                 list_for_each (tmp, &set->set_requests) {
-                        req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
+                        req = list_entry(tmp, struct ptlrpc_request,
+                                         rq_set_chain);
 
                         /* request in-flight? */
                         if (!((req->rq_phase == RQ_PHASE_RPC &&
@@ -846,7 +853,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                                        expired_set, interrupted_set, set);
                 rc = l_wait_event(set->set_waitq, ptlrpc_check_set(set), &lwi);
 
-                LASSERT (rc == 0 || rc == -EINTR || rc == -ETIMEDOUT);
+                LASSERT(rc == 0 || rc == -EINTR || rc == -ETIMEDOUT);
 
                 /* -EINTR => all requests have been flagged rq_intr so next
                  * check completes.
@@ -857,13 +864,13 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                  * the error cases -eeb. */
         } while (rc != 0);
 
-        LASSERT (set->set_remaining == 0);
+        LASSERT(set->set_remaining == 0);
 
         rc = 0;
         list_for_each(tmp, &set->set_requests) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
-                LASSERT (req->rq_phase == RQ_PHASE_COMPLETE);
+                LASSERT(req->rq_phase == RQ_PHASE_COMPLETE);
                 if (req->rq_status != 0)
                         rc = req->rq_status;
         }
@@ -885,7 +892,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
                 return;
         }
 
-        LASSERT (!request->rq_receiving_reply);
+        LASSERT(!request->rq_receiving_reply);
 
         /* We must take it off the imp_replay_list first.  Otherwise, we'll set
          * request->rq_reqmsg to NULL while osc_close is dereferencing it. */
@@ -940,7 +947,7 @@ static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked)
         if (request == NULL)
                 RETURN(1);
 
-        if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a) || 
+        if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a) ||
             request->rq_obd == (void *)(long)(0x5a5a5a5a5a5a5a5a)) {
                 CERROR("dereferencing freed request (bug 575)\n");
                 LBUG();
@@ -981,7 +988,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
         int           rc;
         ENTRY;
 
-        LASSERT (!in_interrupt ());             /* might sleep */
+        LASSERT(!in_interrupt ());             /* might sleep */
 
         spin_lock_irqsave (&request->rq_lock, flags);
         if (!request->rq_receiving_reply) {     /* not waiting for a reply */
@@ -991,7 +998,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
                 return;
         }
 
-        LASSERT (!request->rq_replied);         /* callback hasn't completed */
+        LASSERT(!request->rq_replied);         /* callback hasn't completed */
         spin_unlock_irqrestore (&request->rq_lock, flags);
 
         rc = PtlMDUnlink (request->rq_reply_md_h);
@@ -1000,8 +1007,8 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
                 LBUG ();
 
         case PTL_OK:                            /* unlinked before completion */
-                LASSERT (request->rq_receiving_reply);
-                LASSERT (!request->rq_replied);
+                LASSERT(request->rq_receiving_reply);
+                LASSERT(!request->rq_replied);
                 spin_lock_irqsave (&request->rq_lock, flags);
                 request->rq_receiving_reply = 0;
                 spin_unlock_irqrestore (&request->rq_lock, flags);
@@ -1018,7 +1025,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
 
                         rc = l_wait_event (request->rq_wait_for_rep,
                                            request->rq_replied, &lwi);
-                        LASSERT (rc == 0 || rc == -ETIMEDOUT);
+                        LASSERT(rc == 0 || rc == -ETIMEDOUT);
                         if (rc == 0) {
                                 spin_lock_irqsave (&request->rq_lock, flags);
                                 /* Ensure the callback has completed scheduling
@@ -1032,8 +1039,8 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
                 /* fall through */
 
         case PTL_INV_MD:                        /* callback completed */
-                LASSERT (!request->rq_receiving_reply);
-                LASSERT (request->rq_replied);
+                LASSERT(!request->rq_receiving_reply);
+                LASSERT(request->rq_replied);
                 EXIT;
                 return;
         }
@@ -1061,7 +1068,7 @@ void ptlrpc_free_committed(struct obd_import *imp)
                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
 
                 /* XXX ok to remove when 1357 resolved - rread 05/29/03  */
-                LASSERT (req != last_req);
+                LASSERT(req != last_req);
                 last_req = req;
 
                 if (req->rq_import_generation < imp->imp_generation) {
@@ -1208,14 +1215,13 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
         struct l_wait_info lwi;
         struct obd_import *imp = req->rq_import;
         struct obd_device *obd = imp->imp_obd;
-        struct ptlrpc_connection *conn = imp->imp_connection;
-        unsigned int flags;
+        unsigned long flags;
         int do_restart = 0;
         int timeout = 0;
         ENTRY;
 
-        LASSERT (req->rq_set == NULL);
-        LASSERT (!req->rq_receiving_reply);
+        LASSERT(req->rq_set == NULL);
+        LASSERT(!req->rq_receiving_reply);
 
         /* for distributed debugging */
         req->rq_reqmsg->status = current->pid;
@@ -1224,7 +1230,8 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
                "%s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm,
                imp->imp_obd->obd_uuid.uuid,
                req->rq_reqmsg->status, req->rq_xid,
-               conn->c_peer.peer_ni->pni_name, conn->c_peer.peer_nid,
+               imp->imp_connection->c_peer.peer_ni->pni_name,
+               imp->imp_connection->c_peer.peer_nid,
                req->rq_reqmsg->opc);
 
         /* Mark phase here for a little debug help */
@@ -1242,13 +1249,13 @@ restart:
         if (req->rq_import->imp_invalid && req->rq_level == LUSTRE_CONN_FULL) {
                 DEBUG_REQ(D_ERROR, req, "IMP_INVALID:");
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
-                GOTO (out, rc = -EIO);
+                GOTO(out, rc = -EIO);
         }
 
         if (req->rq_import_generation < imp->imp_generation) {
                 DEBUG_REQ(D_ERROR, req, "req old gen:");
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
-                GOTO (out, rc = -EIO);
+                GOTO(out, rc = -EIO);
         }
 
         if (req->rq_level > imp->imp_level) {
@@ -1256,7 +1263,7 @@ restart:
                 if (req->rq_no_recov || obd->obd_no_recov ||
                     imp->imp_dlm_fake) {
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
-                        GOTO (out, rc = -EWOULDBLOCK);
+                        GOTO(out, rc = -EWOULDBLOCK);
                 }
 
                 list_add_tail(&req->rq_list, &imp->imp_delayed_list);
@@ -1269,23 +1276,24 @@ restart:
                                   (req->rq_level <= imp->imp_level ||
                                    req->rq_err),
                                   &lwi);
-                DEBUG_REQ(D_HA, req, "\"%s\" awake: (%d > %d)",
-                          current->comm, req->rq_level, imp->imp_level);
+                DEBUG_REQ(D_HA, req, "\"%s\" awake: (%d > %d or %d == 1)",
+                          current->comm, imp->imp_level, req->rq_level,
+                          req->rq_err);
 
                 spin_lock_irqsave(&imp->imp_lock, flags);
                 list_del_init(&req->rq_list);
 
-                if (req->rq_err || 
+                if (req->rq_err ||
                     req->rq_import_generation < imp->imp_generation)
                         rc = -EIO;
 
 
                 if (rc) {
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
-                        GOTO (out, rc);
+                        GOTO(out, rc);
                 }
 
-                CERROR("process %d resumed\n", current->pid);
+                DEBUG_REQ(D_HA, req, "resumed");
         }
 
         /* XXX this is the same as ptlrpc_set_wait */
@@ -1335,7 +1343,7 @@ restart:
                           &reply_ev);
                 reply_in_callback(&reply_ev);
 
-                LASSERT (reply_ev.mem_desc.user_ptr == (void *)req);
+                LASSERT(reply_ev.mem_desc.user_ptr == (void *)req);
                 // ptlrpc_check_reply(req);
                 // not required now it only tests
         }
@@ -1347,7 +1355,8 @@ restart:
                "%s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm,
                imp->imp_obd->obd_uuid.uuid,
                req->rq_reqmsg->status, req->rq_xid,
-               conn->c_peer.peer_ni->pni_name, conn->c_peer.peer_nid,
+               imp->imp_connection->c_peer.peer_ni->pni_name,
+               imp->imp_connection->c_peer.peer_nid,
                req->rq_reqmsg->opc);
 
         spin_lock_irqsave(&imp->imp_lock, flags);
@@ -1421,7 +1430,7 @@ restart:
                                            ptlrpc_bulk_complete(req->rq_bulk),
                                            &lwi);
                         if (brc != 0) {
-                                LASSERT (brc == -ETIMEDOUT);
+                                LASSERT(brc == -ETIMEDOUT);
                                 CERROR ("Timed out waiting for bulk\n");
                                 rc = brc;
                         }
@@ -1429,14 +1438,14 @@ restart:
                 if (rc < 0) {
                         /* MDS blocks for put ACKs before replying */
                         /* OSC sets rq_no_resend for the time being */
-                        LASSERT (req->rq_no_resend);
+                        LASSERT(req->rq_no_resend);
                         ptlrpc_unregister_bulk (req);
                 }
         }
 
-        LASSERT (!req->rq_receiving_reply);
+        LASSERT(!req->rq_receiving_reply);
         req->rq_phase = RQ_PHASE_INTERPRET;
-        RETURN (rc);
+        RETURN(rc);
 }
 
 int ptlrpc_replay_req(struct ptlrpc_request *req)
@@ -1450,7 +1459,7 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
          * state it was left in */
 
         /* Not handling automatic bulk replay yet (or ever?) */
-        LASSERT (req->rq_bulk == NULL);
+        LASSERT(req->rq_bulk == NULL);
 
         DEBUG_REQ(D_NET, req, "about to replay");
 
index 07be1af..c4c47d3 100644 (file)
@@ -50,6 +50,7 @@ struct ll_rpc_opcode {
         { OST_SAN_READ,     "ost_san_read" },
         { OST_SAN_WRITE,    "ost_san_write" },
         { OST_SYNCFS,       "ost_syncfs" },
+        { OST_SET_INFO,     "ost_set_info" },
         { MDS_GETATTR,      "mds_getattr" },
         { MDS_GETATTR_NAME, "mds_getattr_name" },
         { MDS_CLOSE,        "mds_close" },
@@ -60,6 +61,8 @@ struct ll_rpc_opcode {
         { MDS_GETSTATUS,    "mds_getstatus" },
         { MDS_STATFS,       "mds_statfs" },
         { MDS_GETLOVINFO,   "mds_getlovinfo" },
+        { MDS_PIN,          "mds_pin" },
+        { MDS_UNPIN,        "mds_unpin" },
         { LDLM_ENQUEUE,     "ldlm_enqueue" },
         { LDLM_CONVERT,     "ldlm_convert" },
         { LDLM_CANCEL,      "ldlm_cancel" },
@@ -71,7 +74,8 @@ struct ll_rpc_opcode {
         { PTLBD_FLUSH,      "ptlbd_flush" },
         { PTLBD_CONNECT,    "ptlbd_connect" },
         { PTLBD_DISCONNECT, "ptlbd_disconnect" },
-        { OBD_PING,         "obd_ping" }
+        { OBD_PING,         "obd_ping" },
+        { OBD_LOG_CANCEL,   "obd_log_cancel" },
 };
 
 const char* ll_opcode2str(__u32 opcode)
@@ -119,7 +123,7 @@ void ptlrpc_lprocfs_register_service(struct obd_device *obddev,
         }
 
         lprocfs_counter_init(svc_stats, PTLRPC_REQWAIT_CNTR,
-                             svc_counter_config, "req_waittime", "cycles");
+                             svc_counter_config, "req_waittime", "usec");
         /* Wait for b_eq branch
         lprocfs_counter_init(svc_stats, PTLRPC_SVCEQDEPTH_CNTR,
                              svc_counter_config, "svc_eqdepth", "reqs");
@@ -127,12 +131,12 @@ void ptlrpc_lprocfs_register_service(struct obd_device *obddev,
         /* no stddev on idletime */
         lprocfs_counter_init(svc_stats, PTLRPC_SVCIDLETIME_CNTR,
                              (LPROCFS_CNTR_EXTERNALLOCK|LPROCFS_CNTR_AVGMINMAX),
-                             "svc_idletime", "cycles");
+                             "svc_idletime", "usec");
         for (i = 0; i < LUSTRE_MAX_OPCODES; i++) {
                 __u32 opcode = ll_rpc_opcode_table[i].opcode;
                 lprocfs_counter_init(svc_stats, PTLRPC_LAST_CNTR + i,
                                      svc_counter_config, ll_opcode2str(opcode),
-                                     "cycles");
+                                     "usec");
         }
 
         rc = lprocfs_register_stats(svc_procroot, "stats", svc_stats);
index 3811d2a..0e2d651 100644 (file)
@@ -187,14 +187,9 @@ void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size)
         }
 
         buflen = m->buflens[n];
-        if (buflen == 0) {
-                CERROR("msg %p buffer[%d] is zero length\n", m, n);
-                return NULL;
-        }
-
         if (buflen < min_size) {
                 CERROR("msg %p buffer[%d] size %d too small (required %d)\n",
-                        m, n, buflen, min_size);
+                       m, n, buflen, min_size);
                 return NULL;
         }
 
@@ -249,17 +244,16 @@ void *lustre_swab_reqbuf (struct ptlrpc_request *req, int index, int min_size,
 {
         void *ptr;
 
-        LASSERT_REQSWAB (req, index);
+        LASSERT_REQSWAB(req, index);
 
         ptr = lustre_msg_buf(req->rq_reqmsg, index, min_size);
         if (ptr == NULL)
-                return (NULL);
+                return NULL;
 
-        if (swabber != NULL &&
-            lustre_msg_swabbed (req->rq_reqmsg))
+        if (swabber != NULL && lustre_msg_swabbed(req->rq_reqmsg))
                 ((void (*)(void *))swabber)(ptr);
 
-        return (ptr);
+        return ptr;
 }
 
 /* Wrap up the normal fixed length case */
@@ -268,17 +262,16 @@ void *lustre_swab_repbuf (struct ptlrpc_request *req, int index, int min_size,
 {
         void *ptr;
 
-        LASSERT_REPSWAB (req, index);
+        LASSERT_REPSWAB(req, index);
 
-        ptr = lustre_msg_buf (req->rq_repmsg, index, min_size);
+        ptr = lustre_msg_buf(req->rq_repmsg, index, min_size);
         if (ptr == NULL)
-                return (NULL);
+                return NULL;
 
-        if (swabber != NULL &&
-            lustre_msg_swabbed (req->rq_repmsg))
+        if (swabber != NULL && lustre_msg_swabbed(req->rq_repmsg))
                 ((void (*)(void *))swabber)(ptr);
 
-        return (ptr);
+        return ptr;
 }
 
 /* byte flipping routines for all wire types declared in
@@ -638,12 +631,12 @@ void lustre_assert_wire_constants (void)
         LASSERT (REINT_RENAME == 5);
         LASSERT (REINT_OPEN == 6);
         LASSERT (REINT_MAX == 6);
-        LASSERT (IT_INTENT_EXEC == 1);
-        LASSERT (IT_OPEN_LOOKUP == 2);
-        LASSERT (IT_OPEN_NEG == 4);
-        LASSERT (IT_OPEN_POS == 8);
-        LASSERT (IT_OPEN_CREATE == 16);
-        LASSERT (IT_OPEN_OPEN == 32);
+        LASSERT (DISP_IT_EXECD == 1);
+        LASSERT (DISP_LOOKUP_EXECD == 2);
+        LASSERT (DISP_LOOKUP_NEG == 4);
+        LASSERT (DISP_LOOKUP_POS == 8);
+        LASSERT (DISP_OPEN_CREATE == 16);
+        LASSERT (DISP_OPEN_OPEN == 32);
         LASSERT (MDS_STATUS_CONN == 1);
         LASSERT (MDS_STATUS_LOV == 2);
         LASSERT (MDS_OPEN_HAS_EA == 1);
index ebc69e1..c81fb51 100644 (file)
@@ -47,12 +47,12 @@ void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
 
 int ptlrpc_pinger_add_import(struct obd_import *imp)
 {
+#ifndef ENABLE_PINGER
+        return 0;
+#else
         int rc;
         ENTRY;
 
-#ifndef ENABLE_PINGER
-        RETURN(0);
-#else
         if (!list_empty(&imp->imp_pinger_chain))
                 RETURN(-EALREADY);
 
@@ -77,12 +77,12 @@ int ptlrpc_pinger_add_import(struct obd_import *imp)
 
 int ptlrpc_pinger_del_import(struct obd_import *imp)
 {
+#ifndef ENABLE_PINGER
+        return 0;
+#else
         int rc;
         ENTRY;
 
-#ifndef ENABLE_PINGER
-        RETURN(0);
-#else
         if (list_empty(&imp->imp_pinger_chain))
                 RETURN(-ENOENT);
 
@@ -118,14 +118,7 @@ static int ptlrpc_pinger_main(void *arg)
         RECALC_SIGPENDING;
         SIGNAL_MASK_UNLOCK(current, flags);
 
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
-        sprintf(current->comm, "%s|%d", data->name,current->thread.extern_pid);
-#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        sprintf(current->comm, "%s|%d", data->name,
-                current->thread.mode.tt.extern_pid);
-#else
-        strcpy(current->comm, data->name);
-#endif
+        THREAD_NAME(current->comm, "%s", data->name);
         unlock_kernel();
 
         /* Record that the thread is running */
@@ -147,7 +140,8 @@ static int ptlrpc_pinger_main(void *arg)
                 down(&pinger_sem);
                 list_for_each(iter, &pinger_imports) {
                         struct obd_import *imp =
-                                list_entry(iter, struct obd_import, imp_pinger_chain);
+                                list_entry(iter, struct obd_import,
+                                           imp_pinger_chain);
                         int generation, level;
                         unsigned long flags;
 
@@ -159,16 +153,19 @@ static int ptlrpc_pinger_main(void *arg)
                                 spin_unlock_irqrestore(&imp->imp_lock, flags);
 
                                 if (level != LUSTRE_CONN_FULL) {
-                                        CDEBUG(D_HA, "not pinging %s (in recovery)\n",
+                                        CDEBUG(D_HA,
+                                               "not pinging %s (in recovery)\n",
                                                imp->imp_target_uuid.uuid);
                                         continue;
                                 }
 
-                                req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL);
+                                req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL,
+                                                      NULL);
                                 if (!req) {
                                         CERROR("OOM trying to ping\n");
                                         break;
                                 }
+                                req->rq_no_resend = 1;
                                 req->rq_replen = lustre_msg_size(0, NULL);
                                 req->rq_level = LUSTRE_CONN_FULL;
                                 req->rq_phase = RQ_PHASE_RPC;
index cb96c3c..8d66c88 100644 (file)
@@ -33,19 +33,22 @@ struct ptlrpc_request_set;
 /* ldlm hooks that we need, managed via inter_module_{get,put} */
 extern int (*ptlrpc_ldlm_namespace_cleanup)(struct ldlm_namespace *, int);
 extern int (*ptlrpc_ldlm_cli_cancel_unused)(struct ldlm_namespace *,
-                                     struct ldlm_res_id *, int);
+                                            struct ldlm_res_id *, int);
 extern int (*ptlrpc_ldlm_replay_locks)(struct obd_import *);
 
 int ptlrpc_get_ldlm_hooks(void);
 void ptlrpc_daemonize(void);
 
 void ptlrpc_request_handle_eviction(struct ptlrpc_request *);
-void lustre_assert_wire_constants (void);
+void lustre_assert_wire_constants(void);
 
 void ptlrpc_lprocfs_register_service(struct obd_device *obddev,
                                      struct ptlrpc_service *svc);
 void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc);
 
+/* recovd_thread.c */
+int llog_init_commit_master(void);
+int llog_cleanup_commit_master(int force);
 
 static inline int opcode_offset(__u32 opc) {
         if (opc < OST_LAST_OPC) {
@@ -66,9 +69,9 @@ static inline int opcode_offset(__u32 opc) {
                         (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
                         (MDS_LAST_OPC - MDS_FIRST_OPC) +
                         (OST_LAST_OPC - OST_FIRST_OPC));
-        } else if (opc == OBD_PING) {
+        } else if (opc < OBD_LAST_OPC) {
                 /* OBD Ping */
-                return (opc - OBD_PING +
+                return (opc - OBD_FIRST_OPC +
                         (PTLBD_LAST_OPC - PTLBD_FIRST_OPC) +
                         (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
                         (MDS_LAST_OPC - MDS_FIRST_OPC) +
@@ -79,10 +82,11 @@ static inline int opcode_offset(__u32 opc) {
         }
 }
 
-#define LUSTRE_MAX_OPCODES (1 + (PTLBD_LAST_OPC - PTLBD_FIRST_OPC) \
-                              + (LDLM_LAST_OPC - LDLM_FIRST_OPC)   \
-                              + (MDS_LAST_OPC - MDS_FIRST_OPC)     \
-                              + (OST_LAST_OPC - OST_FIRST_OPC))
+#define LUSTRE_MAX_OPCODES ((PTLBD_LAST_OPC - PTLBD_FIRST_OPC) + \
+                            (LDLM_LAST_OPC - LDLM_FIRST_OPC)   + \
+                            (MDS_LAST_OPC - MDS_FIRST_OPC)     + \
+                            (OST_LAST_OPC - OST_FIRST_OPC)     + \
+                            (OBD_LAST_OPC - OBD_FIRST_OPC))
 
 enum {
         PTLRPC_REQWAIT_CNTR     = 0,
index ccc05dc..3dfec9a 100644 (file)
 
 #ifdef __KERNEL__
 # include <linux/module.h>
-#else 
+#else
 # include <liblustre.h>
 #endif
 #include <linux/obd.h>
 #include <linux/obd_ost.h>
+#include <linux/lustre_mgmt.h>
 #include <linux/lustre_net.h>
 #include <linux/lustre_dlm.h>
 
@@ -40,19 +41,27 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
         struct obd_import *imp;
         struct obd_uuid server_uuid;
         int rq_portal, rp_portal, connect_op;
-        char *name;
+        char *name = obddev->obd_type->typ_name;
         ENTRY;
 
-        if (obddev->obd_type->typ_ops->o_brw) {
+        /* In a more perfect world, we would hang a ptlrpc_client off of
+         * obd_type and just use the values from there. */
+        if (!strcmp(name, LUSTRE_OSC_NAME)) {
                 rq_portal = OST_REQUEST_PORTAL;
                 rp_portal = OSC_REPLY_PORTAL;
-                name = "osc";
                 connect_op = OST_CONNECT;
-        } else {
+        } else if (!strcmp(name, LUSTRE_MDC_NAME)) {
                 rq_portal = MDS_REQUEST_PORTAL;
                 rp_portal = MDC_REPLY_PORTAL;
-                name = "mdc";
                 connect_op = MDS_CONNECT;
+        } else if (!strcmp(name, LUSTRE_MGMTCLI_NAME)) {
+                rq_portal = MGMT_REQUEST_PORTAL;
+                rp_portal = MGMT_REPLY_PORTAL;
+                connect_op = MGMT_CONNECT;
+        } else {
+                CERROR("unknown client OBD type \"%s\", can't setup\n",
+                       name);
+                RETURN(-EINVAL);
         }
 
         if (data->ioc_inllen1 < 1) {
@@ -108,18 +117,60 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
 
         cli->cl_import = imp;
         cli->cl_max_mds_easize = sizeof(struct lov_mds_md);
+        cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie);
         cli->cl_sandev = to_kdev_t(0);
 
+        /* Register with management client if we need to. */
+        if (data->ioc_inllen3 > 0) {
+                char *mgmt_name = data->ioc_inlbuf3;
+                int rc;
+                struct obd_device *mgmt_obd;
+                mgmtcli_register_for_events_t register_f;
+
+                CDEBUG(D_HA, "%s registering with %s for events about %s\n",
+                       obddev->obd_name, mgmt_name, server_uuid.uuid);
+
+                mgmt_obd = class_name2obd(mgmt_name);
+                if (!mgmt_obd) {
+                        CERROR("can't find mgmtcli %s to register\n",
+                               mgmt_name);
+                        class_destroy_import(imp);
+                        RETURN(-ENOENT);
+                }
+                
+                register_f = inter_module_get("mgmtcli_register_for_events");
+                if (!register_f) {
+                        CERROR("can't i_m_g mgmtcli_register_for_events\n");
+                        class_destroy_import(imp);
+                        RETURN(-ENOSYS);
+                }
+                
+                rc = register_f(mgmt_obd, obddev, &imp->imp_target_uuid);
+                inter_module_put("mgmtcli_register_for_events");
+
+                if (!rc)
+                        cli->cl_mgmtcli_obd = mgmt_obd;
+
+                RETURN(rc);
+        }
+
         RETURN(0);
 }
 
-int client_obd_cleanup(struct obd_device *obddev, int force, int failover)
+int client_obd_cleanup(struct obd_device *obddev, int flags)
 {
-        struct client_obd *client = &obddev->u.cli;
+        struct client_obd *cli = &obddev->u.cli;
 
-        if (!client->cl_import)
+        if (!cli->cl_import)
                 RETURN(-EINVAL);
-        class_destroy_import(client->cl_import);
-        client->cl_import = NULL;
+        if (cli->cl_mgmtcli_obd) {
+                mgmtcli_deregister_for_events_t dereg_f;
+                
+                dereg_f = inter_module_get("mgmtcli_deregister_for_events");
+                dereg_f(cli->cl_mgmtcli_obd, obddev);
+                inter_module_put("mgmtcli_deregister_for_events");
+        }
+        class_destroy_import(cli->cl_import);
+        cli->cl_import = NULL;
         RETURN(0);
 }
index 57f3653..4b75026 100644 (file)
@@ -100,13 +100,14 @@ __init int ptlrpc_init(void)
         int rc;
         ENTRY;
 
-        lustre_assert_wire_constants ();
-        
+        lustre_assert_wire_constants();
+
         rc = ptlrpc_init_portals();
         if (rc)
                 RETURN(rc);
 
         ptlrpc_init_connection();
+        llog_init_commit_master();
 
         ptlrpc_put_connection_superhack = ptlrpc_put_connection;
         ptlrpc_abort_inflight_superhack = ptlrpc_abort_inflight;
@@ -117,6 +118,9 @@ static void __exit ptlrpc_exit(void)
 {
         ptlrpc_exit_portals();
         ptlrpc_cleanup_connection();
+#ifdef ENABLE_ORPHANS
+        llog_cleanup_commit_master(0);
+#endif
 }
 
 /* connection.c */
index ca2afad..70e9b5c 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_RPC
 #ifdef __KERNEL__
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
+# include <linux/config.h>
+# include <linux/module.h>
+# include <linux/kmod.h>
 #else
-#include <liblustre.h>
+# include <liblustre.h>
 #endif
 
 #include <linux/obd_support.h>
@@ -62,7 +62,7 @@ int ptlrpc_reconnect_import(struct obd_import *imp)
         struct lustre_handle old_hdl;
         __u64 committed_before_reconnect = imp->imp_peer_committed_transno;
 
-        CERROR("reconnect handle "LPX64"\n", 
+        CERROR("reconnect handle "LPX64"\n",
                imp->imp_dlm_handle.cookie);
 
         req = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp);
@@ -89,7 +89,7 @@ int ptlrpc_reconnect_import(struct obd_import *imp)
                         GOTO(out_disc, rc = -ENOTCONN);
                 }
 
-                if (memcmp(&imp->imp_remote_handle, &req->rq_repmsg->handle, 
+                if (memcmp(&imp->imp_remote_handle, &req->rq_repmsg->handle,
                            sizeof(imp->imp_remote_handle))) {
                         CERROR("%s@%s changed handle from "LPX64" to "LPX64
                                "; copying, but this may foreshadow disaster\n",
@@ -104,12 +104,13 @@ int ptlrpc_reconnect_import(struct obd_import *imp)
                 CERROR("reconnected to %s@%s after partition\n",
                        imp->imp_target_uuid.uuid, conn->c_remote_uuid.uuid);
                 GOTO(out_disc, rc = RECON_RESULT_RECONNECTED);
-        } else if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECOVERING) {
+        } else if (lustre_msg_get_op_flags(req->rq_repmsg) &
+                   MSG_CONNECT_RECOVERING) {
                 rc = RECON_RESULT_RECOVERING;
         } else {
                 rc = RECON_RESULT_EVICTED;
         }
-        
+
         old_hdl = imp->imp_remote_handle;
         imp->imp_remote_handle = req->rq_repmsg->handle;
         CERROR("reconnected to %s@%s ("LPX64", was "LPX64")!\n",
@@ -150,9 +151,9 @@ void ptlrpc_run_recovery_over_upcall(struct obd_device *obd)
         rc = USERMODEHELPER(argv[0], argv, envp);
         if (rc < 0) {
                 CERROR("Error invoking recovery upcall %s %s %s: %d; check "
-                       "/proc/sys/lustre/upcall\n",                
+                       "/proc/sys/lustre/upcall\n",
                        argv[0], argv[1], argv[2], rc);
-                
+
         } else {
                 CERROR("Invoked upcall %s %s %s",
                        argv[0], argv[1], argv[2]);
@@ -180,10 +181,10 @@ void ptlrpc_run_failed_import_upcall(struct obd_import* imp)
 
         rc = USERMODEHELPER(argv[0], argv, envp);
         if (rc < 0) {
-                CERROR("Error invoking recovery upcall %s %s %s %s %s: %d; check "
-                       "/proc/sys/lustre/lustre_upcall\n",                
+                CERROR("Error invoking recovery upcall %s %s %s %s %s: %d; "
+                       "check /proc/sys/lustre/lustre_upcall\n",
                        argv[0], argv[1], argv[2], argv[3], argv[4],rc);
-                
+
         } else {
                 CERROR("Invoked upcall %s %s %s %s %s\n",
                        argv[0], argv[1], argv[2], argv[3], argv[4]);
@@ -196,7 +197,6 @@ int ptlrpc_replay(struct obd_import *imp)
         struct list_head *tmp, *pos;
         struct ptlrpc_request *req;
         unsigned long flags;
-        __u64 committed = imp->imp_peer_committed_transno;
         ENTRY;
 
         /* It might have committed some after we last spoke, so make sure we
@@ -207,7 +207,7 @@ int ptlrpc_replay(struct obd_import *imp)
         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
         CDEBUG(D_HA, "import %p from %s has committed "LPD64"\n",
-               imp, imp->imp_target_uuid.uuid, committed);
+               imp, imp->imp_target_uuid.uuid, imp->imp_peer_committed_transno);
 
         list_for_each(tmp, &imp->imp_replay_list) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
@@ -221,7 +221,7 @@ int ptlrpc_replay(struct obd_import *imp)
          * than the one we're replaying (it can't be committed until it's
          * replayed, and we're doing that here).  l_f_e_safe protects against
          * problems with the current request being committed, in the unlikely
-         * event of that race.  So, in conclusion, I think that it's safe to 
+         * event of that race.  So, in conclusion, I think that it's safe to
          * perform this list-walk without the imp_lock held.
          *
          * But, the {mdc,osc}_replay_open callbacks both iterate
@@ -235,7 +235,7 @@ int ptlrpc_replay(struct obd_import *imp)
                 DEBUG_REQ(D_HA, req, "REPLAY:");
 
                 rc = ptlrpc_replay_req(req);
-        
+
                 if (rc) {
                         CERROR("recovery replay error %d for req "LPD64"\n",
                                rc, req->rq_xid);
@@ -307,7 +307,6 @@ inline void ptlrpc_invalidate_import_state(struct obd_import *imp)
         ptlrpc_abort_inflight(imp);
 }
 
-
 void ptlrpc_handle_failed_import(struct obd_import *imp)
 {
         ENTRY;
@@ -329,7 +328,6 @@ void ptlrpc_request_handle_eviction(struct ptlrpc_request *failed_req)
         int rc;
         struct obd_import *imp= failed_req->rq_import;
         unsigned long flags;
-        struct ptlrpc_request *req;
         ENTRY;
 
         CDEBUG(D_HA, "import %s of %s@%s evicted: reconnecting\n",
@@ -347,7 +345,6 @@ void ptlrpc_request_handle_eviction(struct ptlrpc_request *failed_req)
                 failed_req->rq_err = 1;
                 spin_unlock_irqrestore (&failed_req->rq_lock, flags);
         }
-        ptlrpc_req_finished(req);
         EXIT;
 }
 
@@ -361,17 +358,23 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
 
         notify_obd = imp->imp_obd->u.cli.cl_containing_lov;
 
-        /* When deactivating, mark import invalid, and 
-           abort in-flight requests. */
+        /* When deactivating, mark import invalid, and abort in-flight
+         * requests. */
         if (!active) {
-                CDEBUG(D_ERROR, "setting import %s INVALID\n", imp->imp_target_uuid.uuid);
                 spin_lock_irqsave(&imp->imp_lock, flags);
-                imp->imp_invalid = 1;
+                /* This is a bit of a hack, but invalidating replayable
+                 * imports makes a temporary reconnect failure into a much more
+                 * ugly -- and hard to remedy -- situation. */
+                if (!imp->imp_replayable) {
+                        CDEBUG(D_HA, "setting import %s INVALID\n",
+                               imp->imp_target_uuid.uuid);
+                        imp->imp_invalid = 1;
+                }
                 imp->imp_generation++;
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
                 ptlrpc_invalidate_import_state(imp);
-//                ptlrpc_abort_inflight(imp);
-        } 
+                //ptlrpc_abort_inflight(imp);
+        }
 
         if (notify_obd == NULL)
                 GOTO(out, rc = 0);
@@ -403,8 +406,9 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
 
 out:
         /* When activating, mark import valid */
-        if (active) {
-                CDEBUG(D_ERROR, "setting import %s VALID\n", imp->imp_target_uuid.uuid);
+        if (active && !rc) {
+                CDEBUG(D_HA, "setting import %s VALID\n",
+                       imp->imp_target_uuid.uuid);
                 spin_lock_irqsave(&imp->imp_lock, flags);
                 imp->imp_invalid = 0;
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
@@ -420,7 +424,7 @@ void ptlrpc_fail_import(struct obd_import *imp, int generation)
         ENTRY;
 
         LASSERT (!imp->imp_dlm_fake);
-        
+
         spin_lock_irqsave(&imp->imp_lock, flags);
         if (imp->imp_level != LUSTRE_CONN_FULL)
                 in_recovery = 1;
@@ -466,14 +470,14 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
         ENTRY;
 
         spin_lock_irqsave(&imp->imp_lock, flags);
-        if (imp->imp_level == LUSTRE_CONN_FULL || 
+        if (imp->imp_level == LUSTRE_CONN_FULL ||
             imp->imp_level == LUSTRE_CONN_NOTCONN)
                     imp->imp_level = LUSTRE_CONN_RECOVER;
         else
                 in_recover = 1;
         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
-        if (in_recover == 1) 
+        if (in_recover == 1)
                 RETURN(-EALREADY);
 
         if (new_uuid) {
index f2a1089..22ccb09 100644 (file)
@@ -289,18 +289,24 @@ void ptlrpc_daemonize(void)
         reparent_to_init();
 }
 
+static long timeval_sub(struct timeval *large, struct timeval *small)
+{
+        return (large->tv_sec - small->tv_sec) * 1000000 +
+                (large->tv_usec - small->tv_usec);
+}
+
 static int ptlrpc_main(void *arg)
 {
-        struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
+        struct ptlrpc_svc_data *data = arg;
         struct obd_device *obddev = data->dev;
         struct ptlrpc_service *svc = data->svc;
         struct ptlrpc_thread *thread = data->thread;
         struct ptlrpc_request *request;
         ptl_event_t *event;
-        int rc = 0;
         unsigned long flags;
-        cycles_t workdone_time = -1;
-        cycles_t svc_workcycles = -1;
+        struct timeval start_time, finish_time;
+        long total;
+        int rc = 0;
         ENTRY;
 
         lock_kernel();
@@ -311,21 +317,14 @@ static int ptlrpc_main(void *arg)
         RECALC_SIGPENDING;
         SIGNAL_MASK_UNLOCK(current, flags);
 
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
-        sprintf(current->comm, "%s|%d", data->name,current->thread.extern_pid);
-#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        sprintf(current->comm, "%s|%d", data->name,
-                current->thread.mode.tt.extern_pid);
-#else
-        strcpy(current->comm, data->name);
-#endif
+        THREAD_NAME(current->comm, "%s", data->name);
         unlock_kernel();
 
         OBD_ALLOC(event, sizeof(*event));
-        if (!event)
+        if (event == NULL)
                 GOTO(out, rc = -ENOMEM);
         OBD_ALLOC(request, sizeof(*request));
-        if (!request)
+        if (request == NULL)
                 GOTO(out_event, rc = -ENOMEM);
 
         /* Record that the thread is running */
@@ -334,14 +333,15 @@ static int ptlrpc_main(void *arg)
 
         /* XXX maintain a list of all managed devices: insert here */
 
+        do_gettimeofday(&finish_time);
         /* And now, loop forever on requests */
         while (1) {
                 struct l_wait_info lwi = { 0 };
                 l_wait_event(svc->srv_waitq,
                              ptlrpc_check_event(svc, thread, event), &lwi);
 
+                spin_lock(&svc->srv_lock);
                 if (thread->t_flags & SVC_STOPPING) {
-                        spin_lock(&svc->srv_lock);
                         thread->t_flags &= ~SVC_STOPPING;
                         spin_unlock(&svc->srv_lock);
 
@@ -349,65 +349,64 @@ static int ptlrpc_main(void *arg)
                         break;
                 }
 
-                if (thread->t_flags & SVC_EVENT) {
-                        cycles_t  workstart_time;
-
-                        spin_lock(&svc->srv_lock);
-                        thread->t_flags &= ~SVC_EVENT;
-                        /* Update Service Statistics */
-                        workstart_time = get_cycles();
-                        if (workdone_time != -1 && svc->svc_stats != NULL) {
-                                /* Stats for req(n) are updated just before
-                                 * req(n+1) is executed. This avoids need to
-                                 * reacquire svc->srv_lock after
-                                 * call to handling_request().
-                                 */
-                                int opc;
-
-                                /* req_waittime */
-                                lprocfs_counter_add(svc->svc_stats,
-                                                    PTLRPC_REQWAIT_CNTR,
-                                                    (workstart_time -
-                                                     event->arrival_time));
-                                /* svc_eqdepth */
-                                /* Wait for b_eq branch
-                                lprocfs_counter_add(svc->svc_stats,
-                                                    PTLRPC_SVCEQDEPTH_CNTR,
-                                                    0);
-                                */
-                                /* svc_idletime */
-                                lprocfs_counter_add(svc->svc_stats,
-                                                    PTLRPC_SVCIDLETIME_CNTR,
-                                                    (workstart_time -
-                                                     workdone_time));
-                                /* previous request */
-                                opc = opcode_offset(request->rq_reqmsg->opc);
-                                if (opc > 0) {
-                                        LASSERT(opc < LUSTRE_MAX_OPCODES);
-                                        lprocfs_counter_add(svc->svc_stats, opc,
-                                                            PTLRPC_LAST_CNTR +
-                                                            svc_workcycles);
-                                }
-                        }
+                if (!(thread->t_flags & SVC_EVENT)) {
+                        CERROR("unknown flag in service");
                         spin_unlock(&svc->srv_lock);
+                        LBUG();
+                        EXIT;
+                        break;
+                }
+
+                thread->t_flags &= ~SVC_EVENT;
+                spin_unlock(&svc->srv_lock);
+
+                do_gettimeofday(&start_time);
+                total = timeval_sub(&start_time, &event->arrival_time);
+                if (svc->svc_stats != NULL) {
+                        lprocfs_counter_add(svc->svc_stats, PTLRPC_REQWAIT_CNTR,
+                                            total);
+                        lprocfs_counter_add(svc->svc_stats,
+                                            PTLRPC_SVCIDLETIME_CNTR,
+                                            timeval_sub(&start_time,
+                                                        &finish_time));
+#if 0 /* Wait for b_eq branch */
+                        lprocfs_counter_add(svc->svc_stats,
+                                            PTLRPC_SVCEQDEPTH_CNTR, 0);
+#endif
+                }
 
+                if (total / 1000000 > (long)obd_timeout) {
+                        CERROR("Dropping request from NID "LPX64" because it's "
+                               "%ld seconds old.\n", event->initiator.nid,
+                               total / 1000000); /* bug 1502 */
+                } else {
+                        CDEBUG(D_HA, "request from NID "LPX64" noticed after "
+                               "%ldus\n", event->initiator.nid, total);
                         rc = handle_incoming_request(obddev, svc, event,
                                                      request);
-                        workdone_time = get_cycles();
-                        svc_workcycles = workdone_time - workstart_time;
-                        continue;
                 }
-
-                CERROR("unknown break in service");
-                LBUG();
-                EXIT;
-                break;
+                do_gettimeofday(&finish_time);
+                total = timeval_sub(&finish_time, &start_time);
+
+                CDEBUG((total / 1000000 > (long)obd_timeout) ? D_ERROR : D_HA,
+                       "request "LPU64" from NID "LPX64" processed in %ldus "
+                       "(%ldus total)\n", request->rq_xid, event->initiator.nid,
+                       total, timeval_sub(&finish_time, &event->arrival_time));
+
+                if (svc->svc_stats != NULL) {
+                        int opc = opcode_offset(request->rq_reqmsg->opc);
+                        if (opc > 0) {
+                                LASSERT(opc < LUSTRE_MAX_OPCODES);
+                                lprocfs_counter_add(svc->svc_stats,
+                                                    opc + PTLRPC_LAST_CNTR,
+                                                    total);
+                        }
+                }
         }
 
         /* NB should wait for all SENT callbacks to complete before exiting
          * here.  Unfortunately at this time there is no way to track this
-         * state.
-         */
+         * state. */
         OBD_FREE(request, sizeof(*request));
 out_event:
         OBD_FREE(event, sizeof(*event));
index a24a26a..1b2ba01 100644 (file)
@@ -1,17 +1,17 @@
 # lustre.spec
 %define version b_devel
-%define kversion @RELEASE@
+%define kversion @LINUXRELEASE@
 %define linuxdir @LINUX@
-Release: 0306170928kernel
 
 Summary: Lustre Lite File System
 Name: lustre-lite
 Version: %{version}
+Release: @RELEASE@
 Copyright: GPL
 Group: Utilities/System
 Requires: lustre-modules, PyXML
-BuildRoot: /var/tmp/lustre-%{version}-root
 Source: ftp://ftp.lustre.com/pub/lustre/lustre-%{version}.tar.gz
+BuildRoot: /var/tmp/lustre-%{version}-root
 
 %description
 The Lustre Lite Cluster File System: kernel drivers for file system,
@@ -69,21 +69,10 @@ cd $RPM_BUILD_DIR/lustre-%{version}
 ./configure --with-linux='%{linuxdir}' 
 make
 
-#%ifarch i386
-#cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version}
-#./configure --with-lib 
-#make
-#%endif
-
 %install
 cd $RPM_BUILD_DIR/lustre-%{version}
 make install prefix=$RPM_BUILD_ROOT
 
-#%ifarch i386
-#cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version}
-#make install prefix=$RPM_BUILD_ROOT
-#%endif
-
 %ifarch alpha
 # this hurts me
   conf_flag=
@@ -226,20 +215,20 @@ if [ ! -e /dev/portals ]; then
 fi
 depmod -ae || exit 0
 
-grep -q obdclass /etc/modules.conf || \
-       echo 'alias char-major-10-241 obdclass' >> /etc/modules.conf
+#grep -q obdclass /etc/modules.conf || \
+#      echo 'alias char-major-10-241 obdclass' >> /etc/modules.conf
 
-grep -q '/dev/obd' /etc/modules.conf || \
-       echo 'alias /dev/obd obdclass' >> /etc/modules.conf
+#grep -q '/dev/obd' /etc/modules.conf || \
+#      echo 'alias /dev/obd obdclass' >> /etc/modules.conf
 
-grep -q '/dev/lustre' /etc/modules.conf || \
-       echo 'alias /dev/lustre obdclass' >> /etc/modules.conf
+#grep -q '/dev/lustre' /etc/modules.conf || \
+#      echo 'alias /dev/lustre obdclass' >> /etc/modules.conf
 
-grep -q portals /etc/modules.conf || \
-        echo 'alias char-major-10-240 portals' >> /etc/modules.conf
+#grep -q portals /etc/modules.conf || \
+#        echo 'alias char-major-10-240 portals' >> /etc/modules.conf
 
-grep -q '/dev/portals' /etc/modules.conf || \
-        echo 'alias /dev/portals portals' >> /etc/modules.conf
+#grep -q '/dev/portals' /etc/modules.conf || \
+#        echo 'alias /dev/portals portals' >> /etc/modules.conf
 
 %postun
 depmod -ae || exit 0
@@ -257,6 +246,7 @@ if grep -q slapd-lustre $slapd; then
    cp $tmp $slapd
    rm $tmp
 fi
+
 %clean
 #rm -rf $RPM_BUILD_ROOT
 
index 2e5c1fe..21575d0 100644 (file)
@@ -41,5 +41,9 @@ runas
 openfile
 unlinkmany
 fchdir_test
+*.cmd
 getdents
 o_directory
+mkdirdeep
+utime
+small_write
index 064de98..6600962 100644 (file)
@@ -6,18 +6,21 @@ CFLAGS := -g -Wall
 # LDADD := -lreadline -ltermcap # -lefence
 EXTRA_DIST = $(pkgexample_SCRIPTS) $(noinst_SCRIPTS) $(noinst_DATA) \
        sanity.sh          rundbench    mcreate
-pkgexample_SCRIPTS = llmount.sh llmountcleanup.sh llecho.sh llechocleanup.sh local.sh echo.sh uml.sh lov.sh
+pkgexample_SCRIPTS = llmount.sh llmountcleanup.sh llecho.sh llechocleanup.sh
+pkgexample_SCRIPTS += local.sh echo.sh uml.sh lov.sh
 noinst_DATA =
-noinst_SCRIPTS = leak_finder.pl llecho.sh llmount.sh llmountcleanup.sh tbox.sh \
-       llrmount.sh runfailure-mds runvmstat runfailure-net runfailure-ost \
-       runiozone runregression-net.sh runtests sanity.sh rundbench
+noinst_SCRIPTS = leak_finder.pl llecho.sh llmount.sh llmountcleanup.sh tbox.sh
+noinst_SCRIPTS += llrmount.sh runfailure-mds runvmstat runfailure-net
+noinst_SCRIPTS += runfailure-ost runiozone runregression-net.sh runtests
+noinst_SCRIPTS += sanity.sh rundbench
 noinst_PROGRAMS = openunlink testreq truncate directio openme writeme open_delay
-noinst_PROGRAMS += munlink tchmod toexcl fsx test_brw openclose createdestroy
-noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink
+noinst_PROGRAMS += tchmod toexcl fsx test_brw openclose createdestroy
+noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink utime
 noinst_PROGRAMS += opendirunlink opendevunlink unlinkmany fchdir_test checkstat
-noinst_PROGRAMS += wantedi statone runas openfile getdents o_directory
+noinst_PROGRAMS += wantedi statone runas openfile getdents mkdirdeep o_directory
+noinst_PROGRAMS += small_write
 # noinst_PROGRAMS += ldaptest
-sbin_PROGRAMS = mcreate mkdirmany
+sbin_PROGRAMS = mcreate munlink mkdirmany
 
 # ldaptest_SOURCES = ldaptest.c
 tchmod_SOURCES = tchmod.c
@@ -48,13 +51,15 @@ openfile_SOURCES = openfile.c
 wantedi_SOURCES = wantedi.c
 createtest_SOURCES = createtest.c
 open_delay_SOURCES = open_delay.c
-opendirunlink_SOURCES=opendirunlink.c
-opendevunlink_SOURCES=opendirunlink.c
-fchdir_test_SOURCES=fchdir_test.c
+opendirunlink_SOURCES = opendirunlink.c
+opendevunlink_SOURCES = opendevunlink.c
+fchdir_test_SOURCES = fchdir_test.c
 getdents_SOURCES=getdents.c
 o_directory_SOURCES = o_directory.c
-#mkdirdeep_SOURCES= mkdirdeep.c
-#mkdirdeep_LDADD=-L../portals/util -lptlctl
-#mkdirdeep_CPPFLAGS=-I$(top_srcdir)/portals/include
+utime_SOURCES = utime.c
+mkdirdeep_SOURCES = mkdirdeep.c
+mkdirdeep_LDADD=-L$(top_builddir)/portals/utils -lptlctl
+mkdirdeep_CPPFLAGS=-I$(top_srcdir)/portals/include
+small_write_SOURCES = small_write.c
 
 include $(top_srcdir)/Rules
index f647a55..496f3b4 100644 (file)
@@ -8,6 +8,7 @@ set -e
 
 SRCDIR="`dirname $0`"
 CREATE=$SRCDIR/create.pl
+RENAME=$SRCDIR/rename.pl
 
 debug_client_on()
 {
@@ -23,118 +24,71 @@ MNT=${MNT:-/mnt/lustre}
 
 debug_client_on
 echo "create.pl, 2 mounts, 1 thread, 10 ops, debug on"
-perl $CREATE -- $MNT 2 10
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=10
 echo "create.pl, 2 mounts, 1 thread, 100 ops, debug on"
-perl $CREATE --silent -- $MNT 2 100
-echo "create.pl --mcreate=0, 2 mounts, 1 thread, 10 ops, debug on"
-perl $CREATE --mcreate=0 -- $MNT 2 10
-echo "create.pl --mcreate=0, 2 mounts, 1 thread, 100 ops, debug on"
-perl $CREATE --mcreate=0 --silent -- $MNT 2 100
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 1 thread, 10 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=10 --use_mcreate=0
+echo "create.pl --use_mcreate=0, 2 mounts, 1 thread, 100 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 1 thread, 10 ops, debug on"
-perl rename.pl --count=2 $MNT 10
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=10
 echo "rename.pl, 2 mounts, 1 thread, 100 ops, debug on"
-perl rename.pl --count=2 --silent $MNT 100
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=100 --silent
 
 debug_client_off
 echo "create.pl, 2 mounts, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent -- $MNT 2 1000
-echo "create.pl --mcreate=0, 2 mounts, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT 2 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=1000 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 1 thread, 1000 ops, debug off"
+perl $CREATE --silent --use_mcreate=0 -- $MNT 2 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=1000 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 1 thread, 1000 ops, debug off"
-perl rename.pl --count=2 --silent $MNT 1000
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=1000 --silent
 
 debug_client_on
 echo "create.pl, 2 mounts, 2 threads, 100 ops, debug on"
-perl $CREATE --silent -- $MNT 2 100 &
-perl $CREATE --silent -- $MNT 2 100 &
-wait
-echo "create.pl --mcreate=0, 2 mounts, 2 threads, 100 ops, debug on"
-perl $CREATE --silent --mcreate=0 -- $MNT 2 100 &
-perl $CREATE --silent --mcreate=0 -- $MNT 2 100 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=2 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 2 threads, 100 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=2 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 2 thread, 1000 ops, debug on"
-perl rename.pl --count=2 --silent $MNT 1000 &
-perl rename.pl --count=2 --silent $MNT 1000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=1000 --num_threads=2 --silent
 
 debug_client_off
 echo "create.pl, 2 mounts, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent -- $MNT 2 2000 &
-perl $CREATE --silent -- $MNT 2 2000 &
-wait
-echo "create.pl --mcreate=0, 2 mounts, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=2 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 2 threads, 2000 ops, debug off"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=2 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 2 threads, 2000 ops, debug off"
-perl rename.pl --count=2 --silent $MNT 2000 &
-perl rename.pl --count=2 --silent $MNT 2000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=2 --silent
 
 debug_client_on
 echo "create.pl, 2 mounts, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
-  perl $CREATE --silent -- $MNT 2 100 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT 2 100 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=4 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 4 threads, 100 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=4 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug on"
-for i in `seq 1 4`; do
-  perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --silent
 
 debug_client_off
 echo "create.pl, 2 mounts, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl $CREATE --silent -- $MNT 2 2000 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 4 threads, 2000 ops, debug off"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --silent
 
 debug_client_on
 echo "create.pl, 2 mounts, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
-  perl $CREATE --silent -- $MNT 2 500 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT 2 500 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=500 --num_threads=8 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 8 threads, 500 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=500 --num_threads=8 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug on"
-for i in `seq 1 8`; do
-  perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --silent
 
 debug_client_off
 echo "create.pl, 2 mounts, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl $CREATE --silent -- $MNT 2 2000 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 8 threads, 2000 ops, debug off"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --silent
index 53774e5..2bf0a53 100644 (file)
@@ -8,6 +8,7 @@ set -e
 
 SRCDIR="`dirname $0`"
 CREATE=$SRCDIR/create.pl
+RENAME=$SRCDIR/rename.pl
 
 debug_client_on()
 {
@@ -23,121 +24,75 @@ MNT=${MNT:-/mnt/lustre}
 
 debug_client_on
 echo "create.pl, 1 mount, 1 thread, 10 ops, debug on"
-perl $CREATE -- $MNT -1 10
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=10
 echo "create.pl, 1 mount, 1 thread, 100 ops, debug on"
-perl $CREATE --silent -- $MNT -1 100
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --silent
 echo "create.pl --mcreate=0, 1 mount, 1 thread, 10 ops, debug on"
-perl $CREATE --mcreate=0 -- $MNT -1 10
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=10 --use_mcreate=0
 echo "create.pl --mcreate=0, 1 mount, 1 thread, 100 ops, debug on"
-perl $CREATE --mcreate=0 --silent -- $MNT -1 100
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --use_mcreate=0 --silent
 echo "rename.pl, 1 mount, 1 thread, 10 ops, debug on"
-perl rename.pl $MNT 10
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=10
 echo "rename.pl, 1 mount, 1 thread, 100 ops, debug on"
-perl rename.pl --silent $MNT 100
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=100 --silent
 
 debug_client_off
 echo "create.pl, 1 mount, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent -- $MNT -1 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --silent
 echo "create.pl --mcreate=0, 1 mount, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT -1 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --use_mcreate=0 --silent
 echo "rename.pl, 1 mount, 1 thread, 1000 ops, debug off"
-perl rename.pl --silent $MNT 1000
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --silent
 
 debug_client_on
 echo "create.pl, 1 mount, 2 threads, 100 ops, debug on"
-perl $CREATE --silent -- $MNT -1 100 &
-perl $CREATE --silent -- $MNT -1 100 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=2 --silent
 echo "create.pl --mcreate=0, 1 mount, 2 threads, 100 ops, debug on"
-perl $CREATE --silent --mcreate=0 -- $MNT -1 100 &
-perl $CREATE --silent --mcreate=0 -- $MNT -1 100 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=2 --use_mcreate=0 --silent
 echo "rename.pl, 1 mount, 2 thread, 1000 ops, debug on"
-perl rename.pl --silent $MNT 1000 &
-perl rename.pl --silent $MNT 1000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --num_threads=2 --silent
 
 debug_client_off
 echo "create.pl, 1 mount, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent -- $MNT -1 2000 &
-perl $CREATE --silent -- $MNT -1 2000 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=2 --silent
 echo "create.pl --mcreate=0, 1 mount, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
-perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=2 --use_mcreate=0  --silent
 wait
 echo "rename.pl, 1 mount, 2 threads, 2000 ops, debug off"
-perl rename.pl --silent $MNT 2000 &
-perl rename.pl --silent $MNT 2000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=2 --silent
 
 debug_client_on
 echo "create.pl, 1 mount, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
-  perl $CREATE --silent -- $MNT -1 100 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=4 --silent
 echo "create.pl --mcreate=0, 1 mount, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT -1 100 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=4 --use_mcreate=0 --silent
 echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug on"
-for i in `seq 1 4`; do
-  perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4 --silent
 
 debug_client_off
 echo "create.pl, 1 mount, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl $CREATE --silent -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4  --silent
 echo "create.pl --mcreate=0, 1 mount, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4  --use_mcreate=0 --silent
 echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4 --silent
 
 debug_client_on
 echo "create.pl, 1 mount, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
-  perl $CREATE --silent -- $MNT -1 500 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=500 --num_threads=8  --silent
 echo "create.pl --mcreate=0, 1 mount, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT -1 500 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=500 --num_threads=8  --use_mcreate=0 --silent
 echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug on"
-for i in `seq 1 8`; do
-  perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8 --silent
 
 debug_client_off
 echo "create.pl, 1 mount, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl $CREATE --silent -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8  --silent
 echo "create.pl --mcreate=0, 1 mount, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8  --use_mcreate=0 --silent
 echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8 --silent
+
 sh rundbench 1
 sh rundbench 2
 sh rundbench 4
index 0d2d836..919ea1f 100755 (executable)
@@ -5,7 +5,7 @@ set -vxe
 
 [ "$CONFIGS" -a -z "$SANITYN" ] && SANITYN=no
 [ "$CONFIGS" ] || CONFIGS="local lov"
-[ "$MAX_THREADS" ] || MAX_THREADS=50
+[ "$MAX_THREADS" ] || MAX_THREADS=10
 if [ -z "$THREADS" ]; then
        KB=`awk '/MemTotal:/ { print $2 }' /proc/meminfo`
        THREADS=`expr $KB / 16384`
@@ -76,7 +76,7 @@ for NAME in $CONFIGS; do
        if [ "$IOZONE_DIR" != "no" ]; then
                mount | grep $MNT || sh llmount.sh
                SPACE=`df $MNT | tail -1 | awk '{ print $4 }'`
-               IOZ_THREADS=`expr $SPACE / $SIZE`
+               IOZ_THREADS=`expr $SPACE / \( $SIZE + $SIZE / 1000 \)`
                [ $THREADS -lt $IOZ_THREADS ] && IOZ_THREADS=$THREADS
 
                $DEBUG_OFF
index cb4f94d..983df93 100755 (executable)
@@ -6,10 +6,11 @@ config=${1:-$(basename $0 .sh)}.xml
 LMC=${LMC:-../utils/lmc -m $config}
 TMP=${TMP:-/tmp}
 
-MDSDEV=$TMP/mds1
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
 MDSSIZE=50000
+FSTYPE=${FSTYPE:-ext3}
 
-OSTDEV=$TMP/ost1
+OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`}
 OSTSIZE=200000
 
 rm -f $config
@@ -18,12 +19,12 @@ ${LMC} --add node --node localhost || exit 10
 ${LMC} --add net --node  localhost --nid localhost --nettype tcp || exit 11
 
 # configure mds server
-${LMC}  --add mds  --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20
+${LMC}  --add mds  --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
 
 # configure ost
-${LMC} --add ost --node localhost --obd obd1 --obdtype obdecho || exit 30
+${LMC} --add ost --node localhost --obd obd1 --fstype $FSTYPE --obdtype obdecho || exit 30
 # configure ost
-${LMC} --add ost --node localhost --obd obd2 --obdtype obdecho || exit 30
+${LMC} --add ost --node localhost --obd obd2 --fstype $FSTYPE --obdtype obdecho || exit 30
 
 ${LMC} --add cobd --node localhost --real_obd obd1 --cache_obd obd2
 
index 6156869..c5f3f12 100644 (file)
-#!/usr/bin/perl
+#!/usr/bin/perl -w
+use strict;
+$|++;
+
+$ENV{PATH}="/bin:/usr/bin";
+$ENV{ENV}="";
+$ENV{BASH_ENV}="";
+use POSIX ":sys_wait_h";
+
+use diagnostics;
 use Getopt::Long;
 
+use vars qw(
+           $MAX_THREADS
+           );
+
+# Don't try to run more than this many threads concurrently.
+$MAX_THREADS = 16;
+
+# Initialize variables
 my $silent = 0;
-my $mcreate = 1; # should we use mcreate or open?
-my $files = 5;
+my $use_mcreate = 1; # should we use mcreate or open?
+my $num_files = 5;   # number of files to create
+my $iterations = 1;
+my $num_threads = 1;
+my $mountpt;
+my $num_mounts = -1;
 
+# Get options from the command line.
 GetOptions("silent!" => \$silent,
-           "mcreate=i" => \$mcreate,
-           "files=i" => \$files);
+           "use_mcreate=i" => \$use_mcreate,
+           "num_files=i" => \$num_files,
+          "mountpt=s" => \$mountpt,
+          "num_mounts=i" => \$num_mounts,
+          "iterations=i" => \$iterations,
+          "num_threads=i" => \$num_threads,
+          ) || die &usage;
+
+# Check for mandatory args.
+if (!$mountpt || 
+    !$num_mounts) {
+    die &usage;
+}
+
+if ($num_threads > $MAX_THREADS) {
+    print "\nMAX_THREADS is currently set to $MAX_THREADS.\n\n";
+    print "You will have to change this in the source\n";
+    print "if you really want to run with $num_threads threads.\n\n";
+    exit 1;
+}
 
-my $mtpt = shift || usage();
-my $mount_count = shift || usage();
-my $i = shift || usage();
-my $count = $i;
+# Initialize rand() function.
+srand (time ^ $$ ^ unpack "%L*", `ps axww | gzip`);
+
+#########################################################################
+### MAIN
+
+for (my $i=1; $i<=$num_threads; $i++) {
+    my $status = &fork_and_create($i);
+    last if ($status != 0);
+}
+
+# Wait for all our threads to finish.
+my $child = 0;
+do {
+    $child = waitpid(-1, WNOHANG);
+} until $child > 0;
+sleep 1;
+
+exit 0;
+
+#########################################################################
+### SUBROUTINES
 
 sub usage () {
-    print "Usage: $0 [--silent] [--mcreate=n] [--files=n] <mnt prefix> <mnt count> <iterations>\n";
-    print "example: $0 /mnt/lustre 2 50\n";
-    print "         will test in /mnt/lustre1 and /mnt/lustre2\n";
-    print "         $0 /mnt/lustre -1 50\n";
-    print "         will test in /mnt/lustre only\n";
+    print "\nUsage: $0 [--silent] [--use_mcreate=n] [--num_files=n] [--iterations=n] [--num_threads=n] --mountpt=/path/to/lustre/mount --num_mounts=n\n\n";
+    print "\t--silent\tminimal output\n";
+    print "\t--use_mcreate=n\tuse mcreate to create files, default=1 (yes)\n";
+    print "\t--num_files=n\tnumber of files to create per iteration, default=5\n";
+    print "\t--iterations=n\tnumber of iterations to perform, default=1\n";
+    print "\t--num_threads=n\tnumber of thread to run, default=1\n";
+    print "\t--mountpt\tlocation of lustre mount\n";
+    print "\t--num_mounts=n\tnumber of lustre mounts to test across, default=-1 (single mount point without numeric suffix)\n\n";
+    print "example: $0 --mountpt=/mnt/lustre --num_mounts=2 --iterations=50\n";
+    print "         will perform 50 interations in /mnt/lustre1 and /mnt/lustre2\n";
+    print "         $0 --mountpt=/mnt/lustre --num_mounts=-1 --iterations=50\n";
+    print "         will perform 50 iterations in /mnt/lustre only\n\n";
     exit;
 }
 
-sub do_open($) {
-    my $path = shift;
+#########################################################################
+sub fork_and_create ($) {
+    my ($thread_num) = @_;
+    
+  FORK: {
+      if (my $pid = fork) {
+         # parent here
+         # child process pid is available in $pid
+         return 0;
+      } elsif (defined $pid) { # $pid is zero here if defined
+         my $current_iteration=1;
+         while ($current_iteration <= $iterations) {
+             for (my $i=1; $i<=$num_files; $i++) {
+                 my $which = "";
+                 if ($num_mounts > 0) {
+                     $which = int(rand() * $num_mounts) + 1;
+                 }
+                 my $d = int(rand() * $num_files);
+                 do_open("${mountpt}${which}/thread${thread_num}.${d}");
+                 
+                 if ($num_mounts > 0) {
+                     $which = int(rand() * $num_mounts) + 1;
+                 }
+                 $d = int(rand() * $num_files);
+                 my $path = "${mountpt}${which}/thread${thread_num}.${d}";
+                 print  "Thread $thread_num: Unlink $path start [" . $$."]...\n" if !$silent;
+                 if (unlink($path)) {
+                     print "Thread $thread_num: Unlink done [$$] $path: Success\n" if !$silent;
+                 } else {
+                     print "Thread $thread_num: Unlink done [$$] $path: $!\n"if !$silent;
+                 }
+             }
+             if (($current_iteration) % 100 == 0) {
+                 print STDERR "Thread $thread_num: " . $current_iteration . " operations [" . $$ . "]\n";
+             }
+             $current_iteration++;
+         }
+         
+         my $which = "";
+         if ($num_mounts > 0) {
+             $which = int(rand() * $num_mounts) + 1;
+         }
+         for (my $d = 0; $d < $num_files; $d++) {
+             my $path = "${mountpt}${which}/thread${thread_num}.${d}";
+             unlink("$path") if (-e $path);
+         }
+         
+         print "Thread $thread_num: Done.\n";
+         
+         exit 0;
+
+      } elsif ($! =~ /No more process/) {
+          # EAGAIN, supposedly recoverable fork error
+         sleep 5;
+         redo FORK;
+      } else {
+          # weird fork error
+         die "Can't fork: $!\n";
+      }
+  }
+
+}
+
+#########################################################################
+
+sub do_open ($) {
+    my ($path) = @_;;
 
-    if ($mcreate) {
+    if ($use_mcreate) {
         my $tmp = `./mcreate $path`;
         if ($tmp) {
             print  "Creating $path [" . $$."]...\n" if !$silent;
@@ -37,42 +167,9 @@ sub do_open($) {
         }
     } else {
         print  "Opening $path [" . $$."]...\n"if !$silent;
-        open(FH, ">$path") || die "open($PATH): $!";
+        open(FH, ">$path") || die "open($path: $!";
         print  "Open done [$$] $path: Success\n"if !$silent;
         close(FH) || die;
     }
 }
 
-while ($i--) {
-    my $which = "";
-    if ($mount_count > 0) {
-        $which = int(rand() * $mount_count) + 1;
-    }
-    $d = int(rand() * $files);
-    do_open("$mtpt$which/$d");
-
-    if ($mount_count > 0) {
-        $which = int(rand() * $mount_count) + 1;
-    }
-    $d = int(rand() * $files);
-    $path = "$mtpt$which/$d";
-    print  "Unlink $path start [" . $$."]...\n"if !$silent;
-    if (unlink($path)) {
-        print  "Unlink done [$$] $path: Success\n"if !$silent;
-    } else {
-        print  "Unlink done [$$] $path: $!\n"if !$silent;
-    }
-    if (($count - $i) % 100 == 0) {
-        print STDERR ($count - $i) . " operations [" . $$ . "]\n";
-    }
-}
-
-my $which = "";
-if ($mount_count > 0) {
-    $which = int(rand() * $mount_count) + 1;
-}
-for ($d = 0; $d < $files; $d++) {
-    unlink("$mtpt$which/$d");
-}
-
-print "Done.\n";
index e660ea4..cc92c80 100644 (file)
@@ -41,7 +41,7 @@ int main(int argc, char **argv)
                 return 1;
         }
 
-        printf("directio on %s for %dx%lu blocks \n", argv[1], blocks,
+        printf("directio on %s for %dx%lu bytes \n", argv[1], blocks,
                st.st_blksize);
 
         seek = (off64_t)seek_blocks * (off64_t)st.st_blksize;
@@ -75,5 +75,6 @@ int main(int argc, char **argv)
                 return 1;
         }
 
+       printf("PASS\n");
         return 0;
 }
index 335db41..b4fe5a4 100755 (executable)
@@ -21,8 +21,9 @@ CLIENTNID=${CLIENTNID:-$CLIENT}
 
 
 # FIXME: make LMC not require MDS for obdecho LOV
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
 MDSSIZE=10000
+FSTYPE=${FSTYPE:-ext3}
 
 STRIPE_BYTES=65536
 STRIPES_PER_OBJ=2      # 0 means stripe over all OSTs
@@ -33,7 +34,7 @@ $LMC --add node --node $SERVER  || exit 1
 $LMC --add net --node $SERVER --nid $SERVERNID --nettype $NET || exit 2
 
 if (($LOV)); then
-    $LMC --add mds --node $SERVER --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10
+    $LMC --add mds --node $SERVER --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 10
     $LMC --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 11
     $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 12
     $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 13
index a2b1d5e..92a2342 100644 (file)
@@ -294,9 +294,10 @@ save_buffer(char *buffer, off_t bufferlength, int fd)
                if (size_by_seek == (off_t)-1)
                        prterr("save_buffer: lseek eof");
                else if (bufferlength > size_by_seek) {
-                       warn("save_buffer: .fsxgood file too short... will
-save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek,
-                            (unsigned long long)bufferlength);
+                       warn("save_buffer: .fsxgood file too short... will"
+                               "save 0x%llx bytes instead of 0x%llx\n", 
+                               (unsigned long long)size_by_seek,
+                               (unsigned long long)bufferlength);
                        bufferlength = size_by_seek;
                }
        }
@@ -310,8 +311,8 @@ save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek,
                if (byteswritten == -1)
                        prterr("save_buffer write");
                else
-                       warn("save_buffer: short write, 0x%x bytes instead
-of 0x%llx\n",
+                       warn("save_buffer: short write, 0x%x bytes instead"
+                               "of 0x%llx\n",
                             (unsigned)byteswritten,
                             (unsigned long long)bufferlength);
        }
@@ -372,11 +373,11 @@ check_buffers(unsigned offset, unsigned size)
                if (n) {
                        prt("\t0x%5x\n", n);
                        if (bad)
-                               prt("operation# (mod 256) for the bad data
-may be %u\n", ((unsigned)op & 0xff));
+                               prt("operation# (mod 256) for the bad data"
+                                       "may be %u\n", ((unsigned)op & 0xff));
                        else
-                               prt("operation# (mod 256) for the bad data
-unknown, check HOLE and EXTEND ops\n");
+                               prt("operation# (mod 256) for the bad data"
+                                       "unknown, check HOLE and EXTEND ops\n");
                } else
                        prt("????????????????\n");
                report_failure(110);
@@ -927,33 +928,33 @@ void
 usage(void)
 {
        fprintf(stdout, "usage: %s",
-               "fsx [-dnqLOW] [-b opnum] [-c Prob] [-l flen] [-m
-start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t
-truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed]
-fname\n\
-       -b opnum: beginning operation number (default 1)\n\
-       -c P: 1 in P chance of file close+open at each op (default infinity)\n\
-       -d: debug output for all operations [-d -d = more debugging]\n\
-       -l flen: the upper bound on file size (default 262144)\n\
-       -m startop:endop: monitor (print debug output) specified byte range
-(default 0:infinity)\n\
-       -n: no verifications of file size\n\
-       -o oplen: the upper bound on operation size (default 65536)\n\
-       -p progressinterval: debug output at specified operation interval\n\
-       -q: quieter operation\n\
-       -r readbdy: 4096 would make reads page aligned (default 1)\n\
-       -s style: 1 gives smaller truncates (default 0)\n\
-       -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
-       -w writebdy: 4096 would make writes page aligned (default 1)\n\
-       -D startingop: debug output starting at specified operation\n\
-       -L: fsxLite - no file creations & no file size changes\n\
-       -N numops: total # operations to do (default infinity)\n\
-       -O: use oplen (see -o flag) for every op (default random)\n\
-       -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n\
-       -S seed: for random # generator (default 1) 0 gets timestamp\n\
-       -W: mapped write operations DISabled\n\
-        -R: read() system calls only (mapped reads disabled)\n\
-       fname: this filename is REQUIRED (no default)\n");
+               "fsx [-dnqLOW] [-b opnum] [-c Prob] [-l flen] [-m "
+"start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t "
+"truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] "
+"fname\n"
+"      -b opnum: beginning operation number (default 1)\n"
+"      -c P: 1 in P chance of file close+open at each op (default infinity)\n"
+"      -d: debug output for all operations [-d -d = more debugging]\n"
+"      -l flen: the upper bound on file size (default 262144)\n"
+"      -m startop:endop: monitor (print debug output) specified byte rang"
+"(default 0:infinity)\n"
+"      -n: no verifications of file size\n"
+"      -o oplen: the upper bound on operation size (default 65536)\n"
+"      -p progressinterval: debug output at specified operation interval\n"
+"      -q: quieter operation\n"
+"      -r readbdy: 4096 would make reads page aligned (default 1)\n"
+"      -s style: 1 gives smaller truncates (default 0)\n"
+"      -t truncbdy: 4096 would make truncates page aligned (default 1)\n"
+"      -w writebdy: 4096 would make writes page aligned (default 1)\n"
+"      -D startingop: debug output starting at specified operation\n"
+"      -L: fsxLite - no file creations & no file size changes\n"
+"      -N numops: total # operations to do (default infinity)\n"
+"      -O: use oplen (see -o flag) for every op (default random)\n"
+"      -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n"
+"      -S seed: for random # generator (default 1) 0 gets timestamp\n"
+"      -W: mapped write operations DISabled\n"
+"        -R: read() system calls only (mapped reads disabled)\n"
+"      fname: this filename is REQUIRED (no default)\n");
        exit(90);
 }
 
@@ -1020,8 +1021,8 @@ main(int argc, char **argv)
                case 'b':
                        simulatedopcount = getnum(optarg, &endp);
                        if (!quiet)
-                               fprintf(stdout, "Will begin at operation
-%ld\n",
+                               fprintf(stdout, "Will begin at operation"
+                                       "%ld\n",
                                        simulatedopcount);
                        if (simulatedopcount == 0)
                                usage();
@@ -1206,8 +1207,8 @@ main(int argc, char **argv)
                                prterr(fname);
                                warn("main: error on write");
                        } else
-                               warn("main: short write, 0x%x bytes instead
-of 0x%x\n",
+                               warn("main: short write, 0x%x bytes instead"
+                                       "of 0x%x\n",
                                     (unsigned)written, maxfilelen);
                        exit(98);
                }
index b8d234b..745f113 100644 (file)
@@ -8,17 +8,21 @@ STDERR->autoflush(1);
 my ($line, $memory);
 my $debug_line = 0;
 
+my $total = 0;
+my $max = 0;
+
 while ($line = <>) {
     $debug_line++;
     my ($file, $func, $lno, $name, $size, $addr, $type);
-    if ($line =~ m/^.*\((.*):(\d+):(.*)\(\) (\d+ \| )?\d+\+\d+\): [vk](.*) '(.*)': (\d+) at (.*) \(tot .*$/) {
+    if ($line =~ m/^.*\((.*):(\d+):(.*)\(\) (\d+ \| )?\d+\+\d+\): (k|v|slab-)(.*) '(.*)': (\d+) at (.*) \(tot (.*)\).*$/) {
         $file = $1;
         $lno = $2;
         $func = $3;
-        $type = $5;
-        $name = $6;
-        $size = $7;
-        $addr = $8;
+        $type = $6;
+        $name = $7;
+        $size = $8;
+        $addr = $9;
+        $tot = $10;
 
        # we can't dump the log after portals has exited, so skip "leaks"
        # from memory freed in the portals module unloading.
@@ -31,13 +35,24 @@ while ($line = <>) {
         next;
     }
 
-    if ($type eq 'malloced') {
+    if (index($type, 'alloced') >= 0) {
+        if (defined($memory->{$addr})) {
+            print STDERR "*** Two allocs with the same address ($size bytes at $addr, $file:$func:$lno)\n";
+            print STDERR "    first malloc at $memory->{$addr}->{file}:$memory->{$addr}->{func}:$memory->{$addr}->{lno}, second at $file:$func:$lno\n";
+            next;
+        }
+
         $memory->{$addr}->{name} = $name;
         $memory->{$addr}->{size} = $size;
         $memory->{$addr}->{file} = $file;
         $memory->{$addr}->{func} = $func;
         $memory->{$addr}->{lno} = $lno;
         $memory->{$addr}->{debug_line} = $debug_line;
+
+        $total += $size;
+        if ($total > $max) {
+            $max = $total;
+        }
     } else {
         if (!defined($memory->{$addr})) {
             print STDERR "*** Free without malloc ($size bytes at $addr, $file:$func:$lno)\n";
@@ -52,6 +67,11 @@ while ($line = <>) {
         }
 
         delete $memory->{$addr};
+        $total -= $size;
+    }
+    if ($total != int($tot)) {
+        print "kernel total $tot != my total $total\n";
+        $total = $tot;
     }
 }
 
@@ -66,4 +86,4 @@ foreach $key (@sorted) {
     print STDERR "*** Leak: $memory->{$key}->{size} bytes allocated at $key ($memory->{$key}->{file}:$memory->{$key}->{func}:$memory->{$key}->{lno}, debug file line $memory->{$key}->{debug_line})\n";
 }
 
-print "Done.\n";
+print "maximum used: $max, amount leaked: $total\n";
index 20c8c20..dbfd7f0 100755 (executable)
@@ -4,10 +4,10 @@ LCMD=$TMP/lkcd-cmds-`hostname`
 echo "Storing LKCD module info in $LCMD"
 cat /tmp/ogdb-`hostname` | while read JUNK M JUNK; do
        MOD="../$M"
-       MAP=`echo $MOD | sed -e 's/\.o$/.map/'`
-       MODNAME=`basename $MOD | sed -e 's/\.o$//'`
+       MODNAME="`basename $MOD .o`"
+       MAP="$TMP/$MODNAME.map"
 
        nm $MOD > $MAP
        echo namelist -a $PWD/$MOD  | tee -a $LCMD
-       echo symtab -a $PWD/$MAP $MODNAME | tee -a $LCMD
+       echo symtab -a $MAP $MODNAME | tee -a $LCMD
 done
index 5afade1..3e3e03b 100644 (file)
@@ -1,6 +1,8 @@
 #!/bin/sh
 
-LCONF=${LCONF:-../utils/lconf}
+PATH=`dirname $0`/../utils:$PATH
+
+LCONF=${LCONF:-lconf}
 NAME=${NAME:-echo}
 
 config=$NAME.xml
@@ -17,5 +19,5 @@ $LCONF $lustre_opt --reformat --gdb $OPTS $config || exit 4
 cat <<EOF
 
 run getattr tests as:
-../utils/lctl --device '\$ECHO_$SERVER' test_getattr 1000000
+`dirname $0`../utils/lctl --device '\$ECHO_$SERVER' test_getattr 1000000
 EOF
index 8e3b37b..d8f37c5 100755 (executable)
@@ -30,5 +30,5 @@ if [ "$1" = "-v" ]; then
   verbose="-v"
 fi
 
-${LCONF} $portals_opt $lustre_opt $node_opt ${REFORMAT:---reformat} --gdb \
-    $verbose $conf_opt  || exit 2
+${LCONF} $portals_opt $lustre_opt $node_opt ${REFORMAT:---reformat} \
+    ${GDB:---gdb} $verbose $conf_opt  || exit 2
index 25d05d2..00f2391 100755 (executable)
@@ -7,12 +7,12 @@ config=${1:-local.xml}
 LMC="${LMC:-lmc} -m $config"
 TMP=${TMP:-/tmp}
 
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
 MDSSIZE=${MDSSIZE:-50000}
+FSTYPE=${FSTYPE:-ext3}
 
-OSTDEV=${OSTDEV:-$TMP/ost1}
+OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`}
 OSTSIZE=${OSTSIZE:-200000}
-FSTYPE=${FSTYPE:-ext3}
 
 rm -f $config
 
@@ -21,7 +21,7 @@ ${LMC} --add node --node localhost || exit 10
 ${LMC} --add net --node  localhost --nid localhost --nettype tcp || exit 11
 
 # configure mds server
-${LMC} --add mds --nspath /mnt/mds_ns  --node localhost --mds mds1  --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
+${LMC} --add mds --nspath /mnt/mds_ns  --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
 
 # configure ost
 ${LMC} --add ost --nspath /mnt/ost_ns --node localhost --ost ost1  --fstype $FSTYPE --dev $OSTDEV --size  $OSTSIZE || exit 30
index 3956f9e..79e9590 100755 (executable)
@@ -7,13 +7,16 @@ config=${1:-lov.xml}
 LMC=${LMC:-lmc}
 TMP=${TMP:-/tmp}
 
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
 MDSSIZE=${MDSSIZE:-50000}
+FSTYPE=${FSTYPE:-ext3}
 
-OSTDEV1=${OSTDEV1:-$TMP/ost1}
-OSTDEV2=${OSTDEV2:-$TMP/ost2}
-OSTDEV3=${OSTDEV3:-$TMP/ost3}
+OSTDEV1=${OSTDEV1:-$TMP/ost1-`hostname`}
+OSTDEV2=${OSTDEV2:-$TMP/ost2-`hostname`}
+OSTDEV3=${OSTDEV3:-$TMP/ost3-`hostname`}
 OSTSIZE=${OSTSIZE:-100000}
+# 1 to config an echo client instead of llite
+ECHO_CLIENT=${ECHO_CLIENT:-}
 
 STRIPE_BYTES=65536
 STRIPES_PER_OBJ=2      # 0 means stripe over all OSTs
@@ -22,13 +25,17 @@ STRIPES_PER_OBJ=2   # 0 means stripe over all OSTs
 ${LMC} -o $config --add net --node localhost --nid localhost --nettype tcp || exit 1
 
 # configure mds server
-${LMC} -m $config --format --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10
+${LMC} -m $config --format --add mds --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 10
 
 # configure ost
 ${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 20
-${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV1 --size $OSTSIZE || exit 21
-${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV2 --size $OSTSIZE || exit 22
-${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV3 --size $OSTSIZE || exit 23
-
-# create client config
-${LMC} -m $config  --add mtpt --node localhost --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
+${LMC} -m $config --add ost --node localhost --lov lov1 --fstype $FSTYPE --dev $OSTDEV1 --size $OSTSIZE || exit 21
+${LMC} -m $config --add ost --node localhost --lov lov1 --fstype $FSTYPE --dev $OSTDEV2 --size $OSTSIZE || exit 22
+${LMC} -m $config --add ost --node localhost --lov lov1 --fstype $FSTYPE --dev $OSTDEV3 --size $OSTSIZE || exit 23
+
+if [ -z "$ECHO_CLIENT" ]; then
+       # create client config
+       ${LMC} -m $config  --add mtpt --node localhost --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
+else
+       ${LMC} -m $config  --add echo_client --node localhost --ost lov1 || exit 31
+fi
index 07de3ed..40ef46a 100644 (file)
@@ -7,10 +7,11 @@ PATH=$SRCDIR:$SRCDIR/../utils:$PATH
 LMC="${LMC:-lmc} -m $config"
 TMP=${TMP:-/tmp}
 
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
 MDSSIZE=${MDSSIZE:-50000}
+FSTYPE=${FSTYPE:-ext3}
 
-OSTDEV=${OSTDEV:-$TMP/ost1}
+OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`}
 OSTSIZE=${OSTSIZE:-200000}
 
 rm -f $config
@@ -20,10 +21,10 @@ ${LMC} --add node --node localhost || exit 10
 ${LMC} --add net --node  localhost --nid localhost --nettype tcp || exit 11
 
 # configure mds server
-${LMC} --add mds  --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20
+${LMC} --add mds  --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
 
 # configure ost
-${LMC} --add ost --node localhost --ost ost1 --dev $OSTDEV --size  $OSTSIZE || exit 30
+${LMC} --add ost --node localhost --ost ost1 --fstype $FSTYPE --dev $OSTDEV --size  $OSTSIZE || exit 30
 
 # create client config
 ${LMC} --add mtpt --node localhost --path /mnt/lustre1 --mds mds1 --ost ost1 || exit 40
index 27b570d..6d94362 100644 (file)
@@ -7,12 +7,13 @@ config=${1-mds-bug.xml}
 LMC=${LMC-../utils/lmc}
 TMP=${TMP:-/tmp}
 
-MDSDEV=$TMP/mds1
-MDSDEV2=$TMP/mds2
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
+MDSDEV2=${MDSDEV:-$TMP/mds2-`hostname`}
 MDSSIZE=50000
+FSTYPE=${FSTYPE:-ext3}
 
-OSTDEV1=$TMP/ost1
-OSTDEV2=$TMP/ost2
+OSTDEV1=${OSTDEV1:-$TMP/ost1-`hostname`}
+OSTDEV2=${OSTDEV2:-$TMP/ost2-`hostname`}
 OSTSIZE=100000
 
 MDSNODE=uml1
@@ -25,19 +26,15 @@ ${LMC} -m $config --add net --node $OSTNODE --nid $OSTNODE --nettype tcp || exit
 ${LMC} -m $config --add net --node $CLIENT --nid $CLIENT --nettype tcp || exit 3
 
 # configure mds server
-${LMC} -m $config --format --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV --size $MDSSIZE ||exit 10
-${LMC} -m $config --format --add mds --node $MDSNODE --mds mds2 --dev $MDSDEV2 --size $MDSSIZE ||exit 10
+${LMC} -m $config --format --add mds --node $MDSNODE --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE ||exit 10
+${LMC} -m $config --format --add mds --node $MDSNODE --mds mds2 --fstype $FSTYPE --dev $MDSDEV2 --size $MDSSIZE ||exit 10
 
 # configure ost
 ${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20
 ${LMC} -m $config --add lov --lov lov2 --mds mds2 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20
-${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --dev $OSTDEV1 --size $OSTSIZE || exit 21
-${LMC} -m $config --add ost --node $OSTNODE --lov lov2 --dev $OSTDEV2 --size $OSTSIZE || exit 22
+${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --fstype $FSTYPE --dev $OSTDEV1 --size $OSTSIZE || exit 21
+${LMC} -m $config --add ost --node $OSTNODE --lov lov2 --fstype $FSTYPE --dev $OSTDEV2 --size $OSTSIZE || exit 22
 
 # create client config
 ${LMC} -m $config --add mtpt --node $CLIENT --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
 ${LMC} -m $config --add mtpt --node $CLIENT --path /mnt/lustre2 --mds mds2 --lov lov2 || exit 30
-
-
-
-
index fde7d36..8250f96 100644 (file)
@@ -11,6 +11,8 @@
 #include <sys/stat.h>
 #include <dirent.h>
 #include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
 
 int main(int argc, char **argv)
 {
@@ -34,7 +36,7 @@ int main(int argc, char **argv)
         fprintf(stderr, "creating special file %s\n", dname1);
         rc = mknod(dname1, 0777|S_IFIFO, 0);
         if (rc == -1) {
-                fprintf(stderr, "creating %s fails: %s\n", 
+                fprintf(stderr, "creating %s fails: %s\n",
                         dname1, strerror(errno));
                 exit(1);
         }
@@ -47,7 +49,7 @@ int main(int argc, char **argv)
                         dname1, strerror(errno));
                 exit(1);
         }
-        
+
         // doesn't matter if the two dirs are the same??
         fddev2 = open(dname2, O_RDONLY | O_NONBLOCK);
         if (fddev2 == -1) {
@@ -55,40 +57,38 @@ int main(int argc, char **argv)
                         dname2, strerror(errno));
                 exit(1);
         }
-        
+
         // delete the special file
         fprintf (stderr, "unlinking %s\n", dname1);
         rc = unlink(dname1);
         if (rc) {
-                fprintf(stderr, "unlink %s error: %s\n", 
+                fprintf(stderr, "unlink %s error: %s\n",
                         dname1, strerror(errno));
                 exit(1);
         }
 
-        if (access(dname2, F_OK) == 0){
+        if (access(dname2, F_OK) == 0) {
                 fprintf(stderr, "%s still exists\n", dname2);
                 exit(1);
         }
 
-        if (access(dname1, F_OK) == 0){
+        if (access(dname1, F_OK) == 0) {
                 fprintf(stderr, "%s still exists\n", dname1);
                 exit(1);
         }
 
         // fchmod one special file
         rc = fchmod (fddev1, 0777);
-        if(rc == -1)
-        {
-                fprintf(stderr, "fchmod unlinked special file %s fails: %s\n", 
+        if (rc == -1) {
+                fprintf(stderr, "fchmod unlinked special file %s fails: %s\n",
                         dname1, strerror(errno));
                 exit(1);
         }
-                
+
         // fstat two files to check if they are the same
         rc = fstat(fddev1, &st1);
-        if(rc == -1)
-        {
-                fprintf(stderr, "fstat unlinked special file %s fails: %s\n", 
+        if (rc == -1) {
+                fprintf(stderr, "fstat unlinked special file %s fails: %s\n",
                         dname1, strerror(errno));
                 exit(1);
         }
@@ -103,7 +103,7 @@ int main(int argc, char **argv)
         if (st1.st_mode != st2.st_mode) {  // can we do this?
                 fprintf(stderr, "fstat different value on %s and %s\n",                                 dname1, dname2);
                 exit(1);
-        }        
+        }
 
         fprintf(stderr, "Ok, everything goes well.\n");
         return 0;
index 7d8cc6b..7b97309 100644 (file)
@@ -18,8 +18,8 @@
 #include <unistd.h>
 
 typedef struct flag_mapping {
-       char string[20];
-       int  flag;
+       const char *string;
+       const int  flag;
 } FLAG_MAPPING;
 
 FLAG_MAPPING flag_table[] = {
@@ -67,13 +67,13 @@ int main(int argc, char** argv)
                 case 'f': {
                         char *tmp;
 
-                        cloned_flags = (char *)malloc(strlen(optarg));
+                        cloned_flags = (char *)malloc(strlen(optarg)+1);
                         if (cloned_flags == NULL) {
                                 fprintf(stderr, "Insufficient memory.\n");
                                 exit(-1);
                         }
 
-                        strncpy(cloned_flags, optarg, strlen(optarg));
+                        strncpy(cloned_flags, optarg, strlen(optarg)+1);
                         for (tmp = strtok(optarg, ":|"); tmp;
                              tmp = strtok(NULL, ":|")) {
                                 int i = 0;
index e7671c8..96632a9 100644 (file)
@@ -3,16 +3,18 @@
 #include <string.h>
 #include <errno.h>
 #include <sys/types.h>
+#include <sys/stat.h>
 #include <stdlib.h>
 #include <unistd.h>
 
-#define T1 "write before unlink\n"
-#define T2 "write after unlink\n"
+#define T1 "write data before unlink\n"
+#define T2 "write data after unlink\n"
 char buf[128];
 
 int main(int argc, char **argv)
 {
-       char *fname, *fname2;
+        char *fname, *fname2;
+        struct stat st;
         int fd, rc;
 
         if (argc < 2 || argc > 3) {
@@ -20,11 +22,11 @@ int main(int argc, char **argv)
                 exit(1);
         }
 
-       fname = argv[1];
-       if (argc == 3)
-               fname2 = argv[2];
-       else
-               fname2 = argv[1];
+        fname = argv[1];
+        if (argc == 3)
+                fname2 = argv[2];
+        else
+                fname2 = argv[1];
 
         fprintf(stderr, "opening\n");
         fd = open(fname, O_RDWR | O_TRUNC | O_CREAT, 0644);
@@ -36,50 +38,67 @@ int main(int argc, char **argv)
         fprintf(stderr, "writing\n");
         rc = write(fd, T1, strlen(T1) + 1);
         if (rc != strlen(T1) + 1) {
-                fprintf(stderr, "write (normal) %s\n", strerror(errno));
+                fprintf(stderr, "write (normal) %s (rc %d)\n",
+                        strerror(errno), rc);
+                exit(1);
+        }
+
+        if (argc == 3) {
+                fprintf(stderr, "closing %s\n", fname);
+                rc = close(fd);
+                if (rc) {
+                        fprintf(stderr, "close (normal) %s\n", strerror(errno));
+                        exit(1);
+                }
+
+                fprintf(stderr, "opening %s\n", fname2);
+                fd = open(fname2, O_RDWR);
+                if (fd == -1) {
+                        fprintf(stderr, "open (unlink) %s\n", strerror(errno));
+                        exit(1);
+                }
+
+                fprintf (stderr, "unlinking %s\n", fname2);
+                rc = unlink(fname2);
+                if (rc) {
+                        fprintf(stderr, "unlink %s\n", strerror(errno));
+                        exit(1);
+                }
+
+                if (access(fname2, F_OK) == 0) {
+                        fprintf(stderr, "%s still exists\n", fname2);
+                        exit(1);
+                }
+        } else {
+                fprintf(stderr, "resetting fd offset\n");
+                rc = lseek(fd, 0, SEEK_SET);
+                if (rc) {
+                        fprintf(stderr, "seek %s\n", strerror(errno));
+                        exit(1);
+                }
+
+                printf("unlink %s and press enter\n", fname);
+                getc(stdin);
+        }
+
+        if (access(fname, F_OK) == 0) {
+                fprintf(stderr, "%s still exists\n", fname);
                 exit(1);
         }
 
-       if (argc == 3) {
-               fprintf(stderr, "closing %s\n", fname);
-               rc = close(fd);
-               if (rc) {
-                       fprintf(stderr, "close (normal) %s\n", strerror(errno));
-                       exit(1);
-               }
-
-               fprintf(stderr, "opening %s\n", fname2);
-               fd = open(fname2, O_RDWR);
-               if (fd == -1) {
-                       fprintf(stderr, "open (unlink) %s\n", strerror(errno));
-                       exit(1);
-               }
-
-               fprintf (stderr, "unlinking %s\n", fname2);
-               rc = unlink(fname2);
-               if (rc) {
-                       fprintf(stderr, "unlink %s\n", strerror(errno));
-                       exit(1);
-               }
-
-               if (access(fname2, F_OK) == 0) {
-                       fprintf(stderr, "%s still exists\n", fname2);
-                       exit(1);
-               }
-       } else {
-               printf("unlink %s and press enter\n", fname);
-               getc(stdin);
-       }
-
-       if (access(fname, F_OK) == 0) {
-               fprintf(stderr, "%s still exists\n", fname);
-               exit(1);
-       }
+        fprintf(stderr, "fstating\n");
+        rc = fstat(fd, &st);
+        if (rc) {
+                fprintf(stderr, "fstat (unlink) %s\n", strerror(errno));
+                exit(1);
+        }
+        if (st.st_nlink != 0)
+                fprintf(stderr, "st_nlink = %d\n", (int)st.st_nlink);
 
         fprintf(stderr, "reading\n");
         rc = read(fd, buf, strlen(T1) + 1);
         if (rc != strlen(T1) + 1) {
-                fprintf(stderr, "read (unlink) %s rc %d\n",
+                fprintf(stderr, "read (unlink) %s (rc %d)\n",
                         strerror(errno), rc);
                 exit(1);
         }
@@ -92,7 +111,7 @@ int main(int argc, char **argv)
 
         fprintf(stderr, "truncating\n");
         rc = ftruncate(fd, 0);
-        if (rc ) {
+        if (rc) {
                 fprintf(stderr, "truncate (unlink) %s\n", strerror(errno));
                 exit(1);
         }
@@ -124,8 +143,8 @@ int main(int argc, char **argv)
         fprintf(stderr, "reading again\n");
         rc = read(fd, buf, strlen(T2) + 1);
         if (rc != strlen(T2) + 1) {
-                fprintf(stderr, "read (after unlink rewrite) %s\n",
-                        strerror(errno));
+                fprintf(stderr, "read (after unlink rewrite) %s (rc %d)\n",
+                        strerror(errno), rc);
                 exit(1);
         }
 
@@ -135,7 +154,7 @@ int main(int argc, char **argv)
                 exit(1);
         }
 
-        fprintf(stderr, "closing again\n");
+        fprintf(stderr, "closing\n");
         rc = close(fd);
         if (rc) {
                 fprintf(stderr, "close (unlink) %s\n", strerror(errno));
index c8f85ee..fefd2d6 100755 (executable)
@@ -22,9 +22,10 @@ CLIENT=${CLIENT:-mdev8}
 NETWORKTYPE=${NETWORKTYPE:-tcp}
 MOUNTPT=${MOUNTPT:-/mnt/lustre}
 CONFIG=${CONFIG:-recovery-cleanup.xml}
-MDSDEV=${MDSDEV:-/tmp/mds}
-OSTDEV=${OSTDEV:-/tmp/ost}
+MDSDEV=${MDSDEV:-/tmp/mds-`hostname`}
 MDSSIZE=${MDSSIZE:-100000}
+FSTYPE=${FSTYPE:-ext3}
+OSTDEV=${OSTDEV:-/tmp/ost-`hostname`}
 OSTSIZE=${OSTSIZE:-100000}
 
 do_mds() {
@@ -51,10 +52,10 @@ make_config() {
        lmc -m $CONFIG --add net --node $NODE --nid `h2$NETWORKTYPE $NODE` \
            --nettype $NETWORKTYPE || exit 4
     done
-    lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV \
-        --size $MDSSIZE || exit 5
-    lmc -m $CONFIG --add ost --node $OSTNODE --ost ost1 --dev $OSTDEV \
-        --size $OSTSIZE || exit 6
+    lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --fstype $FSTYPE \
+       --dev $MDSDEV --size $MDSSIZE || exit 5
+    lmc -m $CONFIG --add ost --node $OSTNODE --ost ost1 --fstype $FSTYPE \
+       --dev $OSTDEV --size $OSTSIZE || exit 6
     lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --mds mds1 \
         --ost ost1 || exit 7
 }
index ebf0a0c..bc6a9c1 100755 (executable)
@@ -25,9 +25,9 @@ CLIENT=${CLIENT:-mdev8}
 NETWORKTYPE=${NETWORKTYPE:-tcp}
 MOUNTPT=${MOUNTPT:-/mnt/lustre}
 CONFIG=${CONFIG:-recovery-small.xml}
-MDSDEV=${MDSDEV:-/tmp/mds}
-OSTDEV=${OSTDEV:-/tmp/ost}
+MDSDEV=${MDSDEV:-/tmp/mds-`hostname`}
 MDSSIZE=${MDSSIZE:-100000}
+OSTDEV=${OSTDEV:-/tmp/ost-`hostname`}
 OSTSIZE=${OSTSIZE:-100000}
 UPCALL=${UPCALL:-$RPWD/recovery-small-upcall.sh}
 FSTYPE=${FSTYPE:-ext3}
index 3ba9368..4ea020f 100644 (file)
-#!/usr/bin/perl
+#!/usr/bin/perl -w
 use strict;
+$|++;
+
+$ENV{PATH}="/bin:/usr/bin";
+$ENV{ENV}="";
+$ENV{BASH_ENV}="";
+
 use diagnostics;
 use Getopt::Long;
+use POSIX ":sys_wait_h";
 
-sub usage () {
-    print "Usage: $0 <mount point prefix> <iterations>\n";
-    print "example: $0 --count=2 /mnt/lustre 50\n";
-    print "         will test in /mnt/lustre1 and /mnt/lustre2\n";
-    print "         $0 --count=0 /mnt/lustre 50\n";
-    print "         will test in /mnt/lustre only\n";
-    exit;
-}
-my ($j, $k, $d, $f1, $f2, $path, $silent);
-my $count = 0;
-my $create = 10;
+use vars qw(
+            $MAX_THREADS
+            );
+# Don't try to run more than this many threads concurrently.
+$MAX_THREADS = 16;
+
+# Initialize variables
+my $silent = 0;
+my $create_files = 1; # should we create files or not?
+my $use_mcreate = 1;  # should we use mcreate or open?
+my $num_dirs = 3;     # number of directories to create
+my $num_files = 6;    # number of files to create
+my $iterations = 1;
+my $num_threads = 1;
+my $mountpt;
+my $num_mounts = -1;
 
 GetOptions("silent!"=> \$silent,
-           "count=i" => \$count,
-           "create=i" => \$create);
+          "use_mcreate=i" => \$use_mcreate,
+           "create_files=i" => \$create_files,
+          "use_mcreate=i" => \$use_mcreate,
+          "num_files=i" => \$num_files,
+          "num_dirs=i" => \$num_dirs,
+          "mountpt=s" => \$mountpt,
+           "num_mounts=i" => \$num_mounts,
+          "iterations=i" => \$iterations,
+           "num_threads=i" => \$num_threads,
+           ) || die &usage;
 
-my $mtpt = shift || usage();
-my $i = shift || usage();
-my $total = $i;
-my $files = 6;
-my $dirs = 3;
-my $mcreate = 0; # should we use mcreate or open?
+# Check for mandatory args.
+if (!$mountpt ||
+    !$num_mounts) {
+    die &usage;
+}
 
-my $which = "";
-if ($count > 0) {
-    $which = int(rand() * $count) + 1;
+if ($num_threads > $MAX_THREADS) {
+    print "\nMAX_THREADS is currently set to $MAX_THREADS.\n\n";
+    print "You will have to change this in the source\n";
+    print "if you really want to run with $num_threads threads.\n\n";
+    exit 1;
 }
 
-$k = $dirs;
-if ($create == 0) {
-    $k = 0;
+# Initialize rand() function.
+srand (time ^ $$ ^ unpack "%L*", `ps axww | gzip`);
+
+#########################################################################
+### MAIN
+
+my $which = "";
+if ($num_mounts > 0) {
+    $which = int(rand() * $num_mounts) + 1;
 }
-while ($k--) {
-    $path = "$mtpt$which/$k";
-    my $rc = mkdir $path, 0755;
-    print "mkdir $path failed: $!\n" if !$rc;
-    $j = $files;
-    while ($j--) {
-        `./mcreate $path/$j`;
+
+# Create files and directories (if necessary)
+if ($create_files) {
+    for (my $i=1; $i<=$num_threads;$i++) {
+       for (my $j=0; $j<$num_dirs;$j++) {
+           my $path = "${mountpt}${which}/${i}.${j}";
+           mkdir $path, 0755 || die "Can't mkdir $path: $!\n";
+           for (my $k=0; $k<$num_files; $k++) {
+               my $filepath = "${path}/${k}";
+               &create_file($filepath);
+               if (! -e $filepath) {
+                   die "Error creating $filepath\n";
+               }
+           }
+       }
     }
 }
 
-while ($i--) {
-    my $which = "";
-    if ($count > 0) {
-        $which = int(rand() * $count) + 1;
-    }
-    $d = int(rand() * $dirs);
-    $f1 = int(rand() * $files);
-    $f2 = int(rand() * $files);
-    print "[$$] $mtpt$which/$d/$f1 $mtpt$which/$d/$f2 ...\n" if !$silent;
-    my $rc = rename "$mtpt$which/$d/$f1", "$mtpt$which/$d/$f2";
-    print "[$$] done: $rc\n" if !$silent;
-    if (($total - $i) % 100 == 0) {
-        print STDERR "[" . $$ . "]" . ($total - $i) . " operations\n";
+for (my $i=1; $i<=$num_threads; $i++) {
+    my $status = &fork_and_rename($i);
+    last if ($status != 0);
+}
+
+# Wait for all our threads to finish.
+# Wait for all our threads to finish.
+my $child = 0;
+do {
+    $child = waitpid(-1, WNOHANG);
+} until $child > 0;
+sleep 1;
+
+# Unlink files and directories (if necessary)
+if ($create_files) {
+    for (my $i=1; $i<=$num_threads;$i++) {
+       for (my $j=0; $j<$num_dirs;$j++) {
+           my $path = "${mountpt}${which}/${i}.${j}";
+           for (my $k=0; $k<=$num_files; $k++) {
+               my $filepath = "${path}/${k}";
+               unlink("$filepath") if (-e $filepath);
+           }
+           my $rc = rmdir $path;
+           print "rmdir $path failed: $!\n" if !$rc;       
+       }
     }
 }
 
-$k = $dirs;
-if ($create == 0) {
-    $k = 0;
+exit 0;
+
+#########################################################################
+### SUBROUTINES
+
+sub usage () {
+    print "\nUsage: $0 [--silent] [--create_files=n] [--use_mcreate=n] [--num_dirs=n] [--num_files=n] [--iterations=n] [--num_threads=n] --num_mounts=n --mountpt=/path/to/lustre/mount\n\n";
+    print "\t--silent\tminimal output\n";
+    print "\t--create_files=n\create files at start, default=1 (yes)\n";
+    print "\t--use_mcreate=n\tuse mcreate to create files, default=1 (yes)\n";
+    print "\t--num_dirs=n\tnumber of directories to create per iteration, default=3\n";
+    print "\t--num_files=n\tnumber of files to create per directory, default=6\n";
+    print "\t--iterations=n\tnumber of iterations to perform, default=1\n";
+    print "\t--num_threads=n\tnumber of thread to run, default=1\n";
+    print "\t--mountpt\tlocation of lustre mount\n";
+    print "\t--num_mounts=n\tnumber of lustre mounts to test across, default=-1 (single mount point without numeric suffix)\n\n";
+    print "example: $0 --mountpt=/mnt/lustre --num_mounts=2 --iterations=50\n";
+    print "         will perform 50 interations in /mnt/lustre1 and /mnt/lustre2\n";
+    print "         $0 --mountpt=/mnt/lustre --num_mounts=-1 --iterations=50\n";
+    print "         will perform 50 iterations in /mnt/lustre only\n\n";
+    exit;
 }
-while ($k--) {
-    $path = "$mtpt$which/$k";
-    $j = $files;
-    while ($j--) {
-        unlink "$path/$j";
+
+
+#########################################################################
+sub create_file ($) {
+    my ($path) = @_;;
+    
+    if ($use_mcreate) {
+        my $tmp = `./mcreate $path`;
+       if ($tmp =~ /.*error: (.*)\n/) {
+           die "Error mcreating $path: $!\n";
+       }
+    } else {
+        open(FH, ">$path") || die "Error opening $path: $!\n";
+        close(FH) || die;
     }
-    my $rc = rmdir $path;
-    print "rmdir $path failed: $!\n" if !$rc;
+    return 0;
 }
 
-print "Done.\n";
+#########################################################################
+sub fork_and_rename ($) {
+    my ($thread_num) = @_;
+    
+  FORK: {
+      if (my $pid = fork) {
+          # parent here
+          # child process pid is available in $pid
+         return 0;
+      } elsif (defined $pid) { # $pid is zero here if defined
+         
+         my $current_iteration=1;
+          while ($current_iteration <= $iterations) {
+             for (my $i=0; $i<$num_files; $i++) {
+                 my $which = "";
+                 if ($num_mounts > 0) {
+                     $which = int(rand() * $num_mounts) + 1;
+                 }
+                 
+                 my $d = int(rand() * $num_dirs);
+                 my $f1 = int(rand() * $num_files);
+                 my $f2 = int(rand() * $num_files);
+                 my $path_f1 = "${mountpt}${which}/${thread_num}.${d}/${f1}";
+                 my $path_f2 = "${mountpt}${which}/${thread_num}.${d}/${f2}";
+                 
+                 print "Thread $thread_num: [$$] $path_f1 $path_f2 ...\n" if !$silent;
+                 my $rc = rename $path_f1, $path_f2;
+                 print "Thread $thread_num: [$$] done: $rc\n" if !$silent;
+             }
+             if (($current_iteration) % 100 == 0) {
+                 print STDERR "Thread $thread_num: " . $current_iteration . " operations [" . $$ . "]\n";
+                 
+             }
+             $current_iteration++;
+         }
+
+         print "Thread $thread_num: Done.\n";
+
+         exit 0;
+
+      } elsif ($! =~ /No more process/) {
+          # EAGAIN, supposedly recoverable fork error
+          sleep 5;
+          redo FORK;
+      } else {
+          # weird fork error
+          die "Can't fork: $!\n";
+      }
+  }
+    
+}
index 20981e8..1e859aa 100644 (file)
@@ -7,40 +7,39 @@
 #include <string.h>
 #include <errno.h>
 #include <sys/types.h>
+#include <grp.h>
 #include <sys/wait.h>
 
 #define DEBUG 0
 
-void Usage_and_abort(void)
+static const char usage[] =
+"Usage: %s -u user_id [-g grp_id ] [ -G ] command\n"
+"  -u user_id      switch to UID user_id\n"
+"  -g grp_id       switch to GID grp_id\n"
+"  -G              clear supplementary groups\n";
+
+void Usage_and_abort(const char *name)
 {
-       fprintf(stderr, "Usage: runas -u user_id [ -g grp_id ]"
-               " command_to_be_run \n");
-       exit(-1);
+        fprintf(stderr, usage, name);
+        exit(-1);
 }
 
-// Usage: runas -u user_id [ -g grp_id ] [--] command_to_be_run
-// return: the return value of "command_to_be_run"
-// NOTE: returning -1 might be the return code of this program itself or
-// the "command_to_be_run"
-
-// ROOT runs "runas" for free
-// Other users run "runas" requires  chmod 6755 "command_to_be_run"
-
 int main(int argc, char **argv)
 {
-        char **my_argv;
+        char **my_argv, *name = argv[0];
         int status;
         int c,i;
         int gid_is_set = 0;
         int uid_is_set = 0;
+        int clear_supp_groups = 0;
         uid_t user_id;
         gid_t grp_id;
 
         if (argc == 1)
-                Usage_and_abort();
+                Usage_and_abort(name);
 
         // get UID and GID
-        while ((c = getopt (argc, argv, "+u:g:h")) != -1) {
+        while ((c = getopt (argc, argv, "+u:g:hG")) != -1) {
                 switch (c) {
                 case 'u':
                         user_id = (uid_t)atoi(optarg);
@@ -54,23 +53,23 @@ int main(int argc, char **argv)
                         gid_is_set = 1;
                         break;
 
-                case 'h':
-                        Usage_and_abort();
+                case 'G':
+                        clear_supp_groups = 1;
                         break;
 
                 default:
-                        //fprintf(stderr, "Bad parameters.\n");
-                        //Usage_and_abort ();
+                case 'h':
+                        Usage_and_abort(name);
                         break;
                 }
         }
 
         if (!uid_is_set)
-                Usage_and_abort();
+                Usage_and_abort(name);
 
         if (optind == argc) {
-                fprintf(stderr, "Bad parameters.\n");
-                Usage_and_abort();
+                fputs("Must specify command to run.\n", stderr);
+                Usage_and_abort(name);
         }
 
         // assemble the command
@@ -99,6 +98,14 @@ int main(int argc, char **argv)
                  exit(-1);
         }
 
+        if (clear_supp_groups) {
+                status = setgroups(0, NULL);
+                if (status == -1) {
+                        perror("clearing supplementary groups");
+                        exit(-1);
+                }
+        }
+        
         // set UID
         status = setreuid(user_id, user_id );
         if(status == -1) {
@@ -107,8 +114,8 @@ int main(int argc, char **argv)
                   exit(-1);
         }
 
-
-        fprintf(stderr, "running as USER(%d), Grp (%d):  ", user_id, grp_id );
+        fprintf(stderr, "running as UID %d, GID %d%s:", user_id, grp_id,
+                clear_supp_groups ? ", cleared groups" : "");
 
         for (i = 0; i < argc - optind; i++)
                  fprintf(stderr, " [%s]", my_argv[i]);
index cb417d2..821ac46 100755 (executable)
@@ -1,6 +1,6 @@
 #!/bin/sh
-
-DIR=${DIR:-/mnt/lustre/`hostname`}
+MNT=${MNT:-/mnt/lustre}
+DIR=${DIR:-$MNT/`hostname`}
 #[ -e /proc/sys/portals/debug ] && echo 0 > /proc/sys/portals/debug 
 mkdir -p $DIR
 TGT=$DIR/client.txt
index 886ce8f..ad60d6d 100644 (file)
@@ -1,7 +1,7 @@
 #!/bin/sh
 PATH=`dirname $0`/../utils:$PATH
 
-obdstat filter 1 | while read LINE; do
+llobdstat.pl $1 1 | while read LINE; do
        echo "`date +s`: $LINE"
-       [ "$1" ] && echo "`date +s`: $LINE" >> $1
+       [ "$2" ] && echo "`date +s`: $LINE" >> $2
 done
index 4d86248..395ceb5 100644 (file)
@@ -1,6 +1,6 @@
 #!/bin/sh
 SRCDIR="`dirname $0`/"
-export PATH=/sbin:/usr/sbin:$SRCDIR:$PATH
+export PATH=/sbin:/usr/sbin:$SRCDIR/../utils:$PATH
 
 LOOPS=${LOOPS:-1}
 COUNT=${COUNT:-1000000}
index e59f5f4..6a8aac8 100755 (executable)
@@ -35,41 +35,42 @@ while [ "$1" ]; do
        shift
 done
 
-OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
-if [ -z "$OSCMT" ]; then
+MOUNT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
+if [ -z "$MOUNT" ]; then
        sh llmount.sh
-       OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
-       [ -z "$OSCMT" ] && fail "no lustre filesystem mounted" 1
+       MOUNT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
+       [ -z "$MOUNT" ] && fail "no lustre filesystem mounted" 1
        I_MOUNTED="yes"
 fi
 
-OSCTMP=`echo $OSCMT | tr "/" "."`
+OSCTMP=`echo $MOUNT | tr "/" "."`
 USED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
 USED=`expr $USED + 16` # Some space for the status file
 
 # let's start slowly here...
-log "touching $OSCMT"
-touch $OSCMT || fail "can't touch $OSCMT" 2
-HOSTS=$OSCMT/hosts.$$
-
-# this will cause the following cp to trigger bug #620096
-log "create an empty file $HOSTS"
-mcreate $HOSTS
-
-log "copying /etc/hosts to $HOSTS"
-cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS" 3
-log "comparing /etc/hosts and $HOSTS"
-diff -u /etc/hosts $HOSTS || fail "$HOSTS different" 4
-log "renaming $HOSTS to $HOSTS.ren"
-mv $HOSTS $HOSTS.ren || fail "can't rename $HOSTS to $HOSTS.ren" 5
-log "copying /etc/hosts to $HOSTS again"
-cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS again" 6
-log "truncating $HOSTS"
-> $HOSTS || fail "can't truncate $HOSTS" 8
-log "removing $HOSTS"
-rm $HOSTS || fail "can't remove $HOSTS" 9
-
-DST=$OSCMT/runtest.$$
+log "touching $MOUNT"
+touch $MOUNT || fail "can't touch $MOUNT" 2
+HOSTS=$MOUNT/hosts.$$
+
+if [ $COUNT -gt 10 -o $COUNT -eq 0 ]; then
+       # this will cause the following cp to trigger bug #620096
+       log "create an empty file $HOSTS"
+       mcreate $HOSTS
+       log "copying /etc/hosts to $HOSTS"
+       cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS" 3
+       log "comparing /etc/hosts and $HOSTS"
+       diff -u /etc/hosts $HOSTS || fail "$HOSTS different" 4
+       log "renaming $HOSTS to $HOSTS.ren"
+       mv $HOSTS $HOSTS.ren || fail "can't rename $HOSTS to $HOSTS.ren" 5
+       log "copying /etc/hosts to $HOSTS again"
+       cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS again" 6
+       log "truncating $HOSTS"
+       > $HOSTS || fail "can't truncate $HOSTS" 8
+       log "removing $HOSTS"
+       rm $HOSTS || fail "can't remove $HOSTS" 9
+fi
+
+DST=$MOUNT/runtest.$$
 # let's start slowly here...
 log "creating $DST"
 mkdir $DST || fail "can't mkdir $DST" 10
@@ -102,27 +103,29 @@ done
 sh llmountcleanup.sh || exit 19
 sh llrmount.sh || exit 20
 
-log "renaming $HOSTS.ren to $HOSTS"
-mv $HOSTS.ren $HOSTS || fail "can't rename $HOSTS.ren to $HOSTS" 32
-log "truncating $HOSTS"
-> $HOSTS || fail "can't truncate $HOSTS" 34
-log "removing $HOSTS"
-rm $HOSTS || fail "can't remove $HOSTS again" 36
 log "removing $DST"
 rm -r $V $DST || fail "can't remove $DST" 37
 
+if [ $COUNT -gt 10 -o $COUNT -eq 0 ]; then
+       log "renaming $HOSTS.ren to $HOSTS"
+       mv $HOSTS.ren $HOSTS || fail "can't rename $HOSTS.ren to $HOSTS" 32
+       log "truncating $HOSTS"
+       > $HOSTS || fail "can't truncate $HOSTS" 34
+       log "removing $HOSTS"
+       rm $HOSTS || fail "can't remove $HOSTS again" 36
+fi
+
 # mkdirmany test (bug 589)
-log "running mkdirmany $OSCMT/base$$ 100"
-$MKDIRMANY $OSCMT/base$$ 100 || fail "mkdirmany failed"
+log "running mkdirmany $MOUNT/base$$ 100"
+$MKDIRMANY $MOUNT/base$$ 100 || fail "mkdirmany failed"
 log "removing mkdirmany directories"
-rmdir $OSCMT/base$$* || fail "mkdirmany cleanup failed"
+rmdir $MOUNT/base$$* || fail "mkdirmany cleanup failed"
 
 log "done"
 
 NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
-if [ $NOWUSED -gt $USED ]; then
+if [ `expr $NOWUSED - $USED` -gt 1024 ]; then
        echo "Space not all freed: now ${NOWUSED}kB, was ${USED}kB." 1>&2
-       echo "This is normal on BA OSTs, because of subdirectories." 1>&2
 fi
 
 if [ "$I_MOUNTED" = "yes" ]; then
index b04d84c..f414ccc 100755 (executable)
@@ -1,6 +1,6 @@
 #!/bin/sh
 vmstat 1 | while read LINE ; do
        LINE="`date +%s`: $LINE"
-       echo $LINE
-       [ "$1" ] && echo $LINE >> $1
+       echo "$LINE"
+       [ "$1" ] && echo "$LINE" >> $1
 done
index 46d0072..09eb8e9 100644 (file)
@@ -7,17 +7,19 @@
 set -e
 
 ONLY=${ONLY:-"$*"}
-ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"34 35"}        # bugs 1365 and 1360 respectively
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"35 32q 37 39"} # bugs 1360, 1504
 
 SRCDIR=`dirname $0`
 PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
 
-CHECKSTAT=${CHECKSTAT:-"./checkstat -v"}
+CHECKSTAT=${CHECKSTAT:-"checkstat -v"}
 CREATETEST=${CREATETEST:-createtest}
 LFIND=${LFIND:-lfind}
 LSTRIPE=${LSTRIPE:-lstripe}
 LCTL=${LCTL:-lctl}
 MCREATE=${MCREATE:-mcreate}
+OPENFILE=${OPENFILE:-openfile}
+OPENUNLINK=${OPENUNLINK:-openunlink}
 TOEXCL=${TOEXCL:-toexcl}
 TRUNCATE=${TRUNCATE:-truncate}
 
@@ -29,22 +31,20 @@ else
        RUNAS=${RUNAS:-"runas -u $RUNAS_ID"}
 fi
 
-MOUNT=${MOUNT:-/mnt/lustre}
-DIR=${DIR:-$MOUNT}
-export NAME=$NAME
+export NAME=${NAME:-local}
 
 SAVE_PWD=$PWD
 
 clean() {
-        echo -n "cln.."
-        sh llmountcleanup.sh > /dev/null || exit 20
+       echo -n "cln.."
+       sh llmountcleanup.sh > /dev/null || exit 20
 }
-
 CLEAN=${CLEAN:-clean}
+
 start() {
-        echo -n "mnt.."
-        sh llrmount.sh > /dev/null || exit 10
-        echo "done"
+       echo -n "mnt.."
+       sh llrmount.sh > /dev/null || exit 10
+       echo "done"
 }
 START=${START:-start}
 
@@ -54,7 +54,7 @@ log() {
 }
 
 run_one() {
-       if ! mount | grep -q $MOUNT; then
+       if ! mount | grep -q $DIR; then
                $START
        fi
        log "== test $1: $2"
@@ -87,23 +87,33 @@ run_test() {
 }
 
 error() { 
-    echo FAIL
-    exit 1
+       echo "FAIL: $@"
+       exit 1
 }
 
 pass() { 
-    echo PASS
+       echo PASS
 }
 
-if ! mount | grep $MOUNT; then
+MOUNT="`mount | awk '/^'$NAME' .* lustre_lite / { print $3 }'`"
+if [ -z "$MOUNT" ]; then
        sh llmount.sh
+       MOUNT="`mount | awk '/^'$NAME' .* lustre_lite / { print $3 }'`"
+       [ -z "$MOUNT" ] && error "NAME=$NAME not mounted"
        I_MOUNTED=yes
 fi
 
+[ `echo $MOUNT | wc -w` -gt 1 ] && error "NAME=$NAME mounted more than once"
+
+DIR=${DIR:-$MOUNT}
+[ -z "`echo $DIR | grep $MOUNT`" ] && echo "$DIR not in $MOUNT" && exit 99
+
+rm -rf $DIR/[Rdfs][1-9]*
+
 echo preparing for tests involving mounts
-EXT2_DEV=/tmp/SANITY.LOOP
-dd if=/dev/zero of=$EXT2_DEV bs=1k seek=1000 count=1 > /dev/null
-mke2fs -F $EXT2_DEV > /dev/null
+EXT2_DEV=${EXT2_DEV:-/tmp/SANITY.LOOP}
+touch $EXT2_DEV
+mke2fs -F $EXT2_DEV 1000 > /dev/null
 
 test_0() {
        touch $DIR/f
@@ -178,12 +188,49 @@ test_5() {
 }
 run_test 5 "mkdir .../d5 .../d5/d2; chmod .../d5/d2 ============"
 
-test_6() {
-       touch $DIR/f6
-       chmod 0666 $DIR/f6
-       $CHECKSTAT -t file -p 0666 $DIR/f6 || error
+test_6a() {
+       touch $DIR/f6a
+       chmod 0666 $DIR/f6a || error
+       $CHECKSTAT -t file -p 0666 -u \#$UID $DIR/f6a || error
+}
+run_test 6a "touch .../f6a; chmod .../f6a ======================"
+
+test_6b() {
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 6b" && return
+       $RUNAS chmod 0444 $DIR/f6a && error
+       $CHECKSTAT -t file -p 0666 -u \#$UID $DIR/f6a || error
+}
+run_test 6b "$RUNAS chmod .../f6a (should return error) =="
+
+test_6c() {
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 6c" && return
+       touch $DIR/f6c
+       chown $RUNAS_ID $DIR/f6c || error
+       $CHECKSTAT -t file -u \#$RUNAS_ID $DIR/f6c || error
+}
+run_test 6c "touch .../f6c; chown .../f6c ======================"
+
+test_6d() {
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 6d" && return
+       $RUNAS chown $UID $DIR/f6c && error
+       $CHECKSTAT -t file -u \#$RUNAS_ID $DIR/f6c || error
 }
-run_test 6 "touch .../f6; chmod .../f6 ========================="
+run_test 6d "$RUNAS chown .../f6c (should return error) =="
+
+test_6e() {
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 6e" && return
+       touch $DIR/f6e
+       chgrp $RUNAS_ID $DIR/f6e || error
+       $CHECKSTAT -t file -u \#$UID -g \#$RUNAS_ID $DIR/f6e || error
+}
+run_test 6e "touch .../f6e; chgrp .../f6e ======================"
+
+test_6f() {
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 6f" && return
+       $RUNAS chgrp $UID $DIR/f6e && error
+       $CHECKSTAT -t file -u \#$UID -g \#$RUNAS_ID $DIR/f6e || error
+}
+run_test 6f "$RUNAS chgrp .../f6e (should return error) =="
 
 test_7a() {
        mkdir $DIR/d7
@@ -357,7 +404,7 @@ test_23() {
 run_test 23 "O_CREAT|O_EXCL in subdir =========================="
 
 test_24a() {
-       echo '============ rename sanity ================================='
+       echo '== rename sanity =============================================='
        echo '-- same directory rename'
        mkdir $DIR/R1
        touch $DIR/R1/f
@@ -440,7 +487,7 @@ test_24i() {
        $CHECKSTAT -t dir  $DIR/R9/a || error
        $CHECKSTAT -a file $DIR/R9/a/f || error
 }
-run_test 24i "rename file to dir error: touch f ; mkdir a ; rename f a ====="
+run_test 24i "rename file to dir error: touch f ; mkdir a ; rename f a"
 
 test_24j() {
        mkdir $DIR/R10
@@ -452,7 +499,7 @@ test_24j() {
 run_test 24j "source does not exist ============================" 
 
 test_25a() {
-       echo '== symlink sanity ======================================='
+       echo '== symlink sanity ============================================='
        mkdir $DIR/d25
        ln -s d25 $DIR/s25
        touch $DIR/s25/foo || error
@@ -473,7 +520,8 @@ test_26a() {
 run_test 26a "multiple component symlink ======================="
 
 test_26b() {
-       ln -s d26/d26-2/foo $DIR/s26-2
+       mkdir -p $DIR/d26b/d26-2
+       ln -s d26b/d26-2/foo $DIR/s26-2
        touch $DIR/s26-2 || error
 }
 run_test 26b "multiple component symlink at end of lookup ======"
@@ -500,12 +548,12 @@ test_26e() {
 run_test 26e "unlink multiple component recursive symlink ======"
 
 test_27a() {
-       echo '== stripe sanity ========================================'
+       echo '== stripe sanity =============================================='
        mkdir $DIR/d27
        $LSTRIPE $DIR/d27/f0 8192 0 1
        $CHECKSTAT -t file $DIR/d27/f0
        pass
-       log "test_27b: write to one stripe file ========================="
+       log "== test_27b: write to one stripe file ========================="
        cp /etc/hosts $DIR/d27/f0
 }
 run_test 27a "one stripe file =================================="
@@ -513,7 +561,7 @@ run_test 27a "one stripe file =================================="
 test_27c() {
        $LSTRIPE $DIR/d27/f01 8192 0 2
        pass
-       log "test_27d: write to two stripe file file f01 ================"
+       log "== test_27d: write to two stripe file file f01 ================"
        dd if=/dev/zero of=$DIR/d27/f01 bs=4k count=4
 }
 run_test 27c "create two stripe file f01 ======================="
@@ -537,14 +585,15 @@ run_test 27e "lstripe existing file (should return error) ======"
 test_27f() {
        $LSTRIPE $DIR/d27/fbad 100 1 2 || true
        dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4
+       $LFIND $DIR/d27/fbad
 }
 run_test 27f "lstripe with bad stripe size (should return error on LOV)"
 
 test_27g() {
        $MCREATE $DIR/d27/fnone || error
        pass
-       log "test 27.9: lfind ============================================"
-       $LFIND $DIR/d27
+       log "== test 27h: lfind ============================================"
+       $LFIND $DIR/d27/fnone | grep -q "Has no stripe info" || error
 }
 run_test 27g "mcreate file without objects to test lfind ======="
 
@@ -586,7 +635,7 @@ test_30() {
 run_test 30 "run binary from Lustre (execve) ==================="
 
 test_31() {
-       ./openunlink $DIR/f31 $DIR/f31 || error
+       $OPENUNLINK $DIR/f31 $DIR/f31 || error
 }
 run_test 31 "open-unlink file =================================="
 
@@ -627,7 +676,7 @@ test_32d() {
        ls -al $DIR/d32d/ext2-mountpoint/../d2/test_dir || error
        umount $DIR/d32d/ext2-mountpoint || error
 }
-run_test 32d "open d32d/ext2-mountpoint/../d2/test_dir =========="
+run_test 32d "open d32d/ext2-mountpoint/../d2/test_dir ========="
 
 test_32e() {
        [ -e $DIR/d32e ] && rm -fr $DIR/d32e
@@ -638,7 +687,7 @@ test_32e() {
        $CHECKSTAT -t link $DIR/d32e/tmp/symlink11 || error
        $CHECKSTAT -t link $DIR/d32e/symlink01 || error
 }
-run_test 32e "stat d32e/symlink->tmp/symlink->lustre-subdir ====="
+run_test 32e "stat d32e/symlink->tmp/symlink->lustre-subdir ===="
 
 test_32f() {
        [ -e $DIR/d32f ] && rm -fr $DIR/d32f
@@ -649,7 +698,7 @@ test_32f() {
        ls $DIR/d32f/tmp/symlink11  || error
        ls $DIR/d32f/symlink01 || error
 }
-run_test 32f "open d32f/symlink->tmp/symlink->lustre-subdir ====="
+run_test 32f "open d32f/symlink->tmp/symlink->lustre-subdir ===="
 
 test_32g() {
        [ -e $DIR/d32g ] && rm -fr $DIR/d32g
@@ -687,7 +736,7 @@ test_32i() {
        $CHECKSTAT -t file $DIR/d32i/ext2-mountpoint/../test_file || error  
        umount $DIR/d32i/ext2-mountpoint || error
 }
-run_test 32i "stat d32i/ext2-mountpoint/../test_file ============"
+run_test 32i "stat d32i/ext2-mountpoint/../test_file ==========="
 
 test_32j() {
        [ -e $DIR/d32j ] && rm -fr $DIR/d32j
@@ -697,10 +746,10 @@ test_32j() {
        cat $DIR/d32j/ext2-mountpoint/../test_file || error
        umount $DIR/d32j/ext2-mountpoint || error
 }
-run_test 32j "open d32j/ext2-mountpoint/../test_file ============"
+run_test 32j "open d32j/ext2-mountpoint/../test_file ==========="
 
 test_32k() {
-       [ -e $DIR/d32k ] && rm -fr $DIR/d32k
+       rm -fr $DIR/d32k
        mkdir -p $DIR/d32k/ext2-mountpoint 
        mount -t ext2 -o loop $EXT2_DEV $DIR/d32k/ext2-mountpoint  
        mkdir -p $DIR/d32k/d2
@@ -708,10 +757,10 @@ test_32k() {
        $CHECKSTAT -t file $DIR/d32k/ext2-mountpoint/../d2/test_file || error
        umount $DIR/d32k/ext2-mountpoint || error
 }
-run_test 32k "stat d32k/ext2-mountpoint/../d2/test_file ========="
+run_test 32k "stat d32k/ext2-mountpoint/../d2/test_file ========"
 
 test_32l() {
-       [ -e $DIR/d32l ] && rm -fr $DIR/d32l
+       rm -fr $DIR/d32l
        mkdir -p $DIR/d32l/ext2-mountpoint 
        mount -t ext2 -o loop $EXT2_DEV $DIR/d32l/ext2-mountpoint || error
        mkdir -p $DIR/d32l/d2
@@ -719,10 +768,10 @@ test_32l() {
        cat  $DIR/d32l/ext2-mountpoint/../d2/test_file || error
        umount $DIR/d32l/ext2-mountpoint || error
 }
-run_test 32l "open d32l/ext2-mountpoint/../d2/test_file ========="
+run_test 32l "open d32l/ext2-mountpoint/../d2/test_file ========"
 
 test_32m() {
-       [ -e $DIR/d32m ] && rm -fr $DIR/d32m
+       rm -fr $DIR/d32m
        mkdir -p $DIR/d32m/tmp    
        TMP_DIR=$DIR/d32m/tmp       
        ln -s $DIR $TMP_DIR/symlink11 
@@ -730,10 +779,10 @@ test_32m() {
        $CHECKSTAT -t link $DIR/d32m/tmp/symlink11 || error
        $CHECKSTAT -t link $DIR/d32m/symlink01 || error
 }
-run_test 32m "stat d32m/symlink->tmp/symlink->lustre-root ======="
+run_test 32m "stat d32m/symlink->tmp/symlink->lustre-root ======"
 
 test_32n() {
-       [ -e $DIR/d32n ] && rm -fr $DIR/d32n
+       rm -fr $DIR/d32n
        mkdir -p $DIR/d32n/tmp    
        TMP_DIR=$DIR/d32n/tmp       
        ln -s $DIR $TMP_DIR/symlink11 
@@ -741,11 +790,11 @@ test_32n() {
        ls -l $DIR/d32n/tmp/symlink11  || error
        ls -l $DIR/d32n/symlink01 || error
 }
-run_test 32n "open d32n/symlink->tmp/symlink->lustre-root ======="
+run_test 32n "open d32n/symlink->tmp/symlink->lustre-root ======"
 
 test_32o() {
-       [ -e $DIR/d32o ] && rm -fr $DIR/d32o
-       [ -e $DIR/test_file ] && rm -fr $DIR/test_file
+       rm -fr $DIR/d32o
+       rm -f $DIR/test_file
        touch $DIR/test_file 
        mkdir -p $DIR/d32o/tmp    
        TMP_DIR=$DIR/d32o/tmp       
@@ -759,8 +808,8 @@ test_32o() {
 run_test 32o "stat d32o/symlink->tmp/symlink->lustre-root/test_file"
 
 test_32p() {
-       [ -e $DIR/d32p ] && rm -fr $DIR/d32p
-       [ -e $DIR/test_file ] && rm -fr $DIR/test_file
+       rm -fr $DIR/d32p
+       rm -f $DIR/test_file
        touch $DIR/test_file 
        mkdir -p $DIR/d32p/tmp    
        TMP_DIR=$DIR/d32p/tmp       
@@ -771,109 +820,220 @@ test_32p() {
 }
 run_test 32p "open d32p/symlink->tmp/symlink->lustre-root/test_file"
 
+test_32q() {
+       [ -e $DIR/d32q ] && rm -fr $DIR/d32q
+       mkdir -p $DIR/d32q
+       mount -t ext2 -o loop $EXT2_DEV $DIR/d32q
+       ls $DIR/d32q || error
+       umount $DIR/d32q || error
+}
+run_test 32q "ls a mounted file system ========================="
+
 #   chmod 444 /mnt/lustre/somefile
 #   open(/mnt/lustre/somefile, O_RDWR)
 #   Should return -1
 test_33() {
-       [ -e $DIR/test_33_file ] && rm -fr $DIR/test_33_file
+       rm -f $DIR/test_33_file
        touch $DIR/test_33_file
        chmod 444 $DIR/test_33_file
        chown $RUNAS_ID $DIR/test_33_file
-       $RUNAS openfile -f O_RDWR $DIR/test_33_file && error || true
+       $RUNAS $OPENFILE -f O_RDWR $DIR/test_33_file && error || true
 }
 run_test 33 "write file with mode 444 (should return error) ===="
 
-test_34() {
-       $MCREATE $DIR/f
-       $TRUNCATE $DIR/f 100
-       rm $DIR/f
+TEST_34_SIZE=${TEST_34_SIZE:-2000000000000}
+test_34a() {
+       rm -f $DIR/test_34_file
+       $MCREATE $DIR/test_34_file || error
+       $LFIND $DIR/test_34_file | grep -q "Has no stripe information" || error
+       $TRUNCATE $DIR/test_34_file $TEST_34_SIZE || error
+       $LFIND $DIR/test_34_file | grep -q "Has no stripe information" || error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+}
+run_test 34a "truncate file that has not been opened ==========="
+
+test_34b() {
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+       $OPENFILE -f O_RDONLY $DIR/test_34_file
+       $LFIND $DIR/test_34_file | grep -q "Has no stripe information" || error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
 }
-run_test 34 "truncate file that has not been opened ============"
+run_test 34b "O_RDONLY opening file doesn't create objects ====="
+
+test_34c() {
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+       $OPENFILE -f O_RDWR $DIR/test_34_file
+       $LFIND $DIR/test_34_file | grep -q "Has no stripe information" && error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+}
+run_test 34c "O_RDWR opening file-with-size works =============="
+
+test_34d() {
+       dd if=/dev/zero of=$DIR/test_34_file conv=notrunc bs=4k count=1 || error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+       rm $DIR/test_34_file
+}
+run_test 34d "write to sparse file ============================="
+
+test_34e() {
+       rm -f $DIR/test_34_file
+       $MCREATE $DIR/test_34_file || error
+       $TRUNCATE $DIR/test_34_file 1000 || error
+       $CHECKSTAT -s 1000 $DIR/test_34_file || error
+       $OPENFILE -f O_RDWR $DIR/test_34_file
+       $CHECKSTAT -s 1000 $DIR/test_34_file || error
+}
+run_test 34e "create objects, some with size and some without =="
 
 test_35() {
-       [ -e $DIR/test_35_file ] && rm -fr $DIR/test_35_file
        cp /bin/sh $DIR/test_35_file
        chmod 444 $DIR/test_35_file
        chown $RUNAS_ID $DIR/test_35_file
-       $DIR/test_35_file && error
-       return 0
+       $DIR/test_35_file && error || true
+       rm $DIR/test_35_file
 }
 run_test 35 "exec file with mode 444 (should return error) ====="
 
 test_36a() {
-       log 36  "cvs operations ===================================="
-       mkdir -p $DIR/cvsroot
-       chown $RUNAS_ID $DIR/cvsroot
-       $RUNAS cvs -d $DIR/cvsroot init 
+       sleep 1         # we need a rest, or UMLs clock becomes skewed
+       rm -f $DIR/test_36_file
+       utime $DIR/test_36_file || error
 }
-run_test 36a "cvs init ========================================="
+run_test 36a "MDS utime check (mknod, utime) ==================="
 
 test_36b() {
-       # on the LLNL clusters, runas will still pick up root's $TMP settings,
-        # which will not be writable for the runas user, and then you get a CVS
-       # error message with a corrupt path string (CVS bug) and panic.
-       # We're not using much space, so just stick it in /tmp, which is
-       # safe.
-       OLDTMPDIR=$TMPDIR
-       OLDTMP=$TMP
-       TMPDIR=/tmp
-       TMP=/tmp
-
-       cd /etc/init.d
-       $RUNAS cvs -d $DIR/cvsroot import -m "nomesg"  reposname vtag rtag
-
-       TMPDIR=$OLDTMPDIR
-       TMP=$OLDTMP
+       sleep 1
+       echo "" > $DIR/test_36_file
+       utime $DIR/test_36_file || error
 }
-run_test 36b "cvs import ======================================="
+run_test 36b "OST utime check (open, utime) ===================="
 
 test_36c() {
-       cd $DIR
-       mkdir -p $DIR/reposname
-       chown $RUNAS_ID $DIR/reposname
-       $RUNAS cvs -d $DIR/cvsroot co reposname
+       sleep 1
+       rm -f $DIR/d36/test_36_file
+       mkdir $DIR/d36
+       chown $RUNAS_ID $DIR/d36
+       $RUNAS utime $DIR/d36/test_36_file || error
 }
-run_test 36c "cvs checkout ====================================="
+run_test 36c "non-root MDS utime check (mknod, utime) =========="
 
 test_36d() {
-       cd $DIR/reposname
-       $RUNAS touch foo36
-       $RUNAS cvs add -m 'addmsg' foo36
+       sleep 1
+       echo "" > $DIR/d36/test_36_file
+       $RUNAS utime $DIR/d36/test_36_file || error
 }
-run_test 36d "cvs add =========================================="
+run_test 36d "non-root OST utime check (open, utime) ==========="
 
 test_36e() {
-       cd $DIR/reposname
-       $RUNAS cvs update
-}
-run_test 36e "cvs update ======================================="
-
-# XXX change this: use a non root user
-test_36f() {
-       cd $DIR/reposname
-       $RUNAS cvs commit -m 'nomsg' foo36
+       sleep 1
+       [ $RUNAS_ID -eq $UID ] && return
+       touch $DIR/d36/test_36_file2
+       $RUNAS utime $DIR/d36/test_36_file2 && error || true
 }
-run_test 36f "cvs commit ======================================="
+run_test 36e "utime on non-owned file (should return error) ===="
 
 test_37() {
        mkdir -p $DIR/dextra
        echo f > $DIR/dextra/fbugfile
-       mount -t ext2 -o loop /$EXT2_DEV $DIR/dextra
-       ls $DIR/dextra |grep "\<fbugfile\>" && error
-       umount /$EXT2_DEV
-       rm -f DIR/dextra/fbugfile
+       mount -t ext2 -o loop $EXT2_DEV $DIR/dextra
+       ls $DIR/dextra | grep "\<fbugfile\>" && error
+       umount $DIR/dextra || error
+       rm -f $DIR/dextra/fbugfile || error
 }
-run_test 37 "ls a mounted file system to check the old contents ====="
+run_test 37 "ls a mounted file system to check old content ====="
 
 # open(file, O_DIRECTORY) will leak a request and not cleanup (bug 1501)
 test_38() {
-        o_directory $DIR/test38
+       o_directory $DIR/test38
 }
 run_test 38 "open a regular file with O_DIRECTORY =============="
-        
+
+test_39() {
+       touch $DIR/test_39_file
+       touch $DIR/test_39_file2
+#      ls -l  $DIR/test_39_file $DIR/test_39_file2
+#      ls -lu  $DIR/test_39_file $DIR/test_39_file2
+#      ls -lc  $DIR/test_39_file $DIR/test_39_file2
+       sleep 2
+       $OPENFILE -f O_CREAT:O_TRUNC:O_WRONLY $DIR/test_39_file2
+#      ls -l  $DIR/test_39_file $DIR/test_39_file2
+#      ls -lu  $DIR/test_39_file $DIR/test_39_file2
+#      ls -lc  $DIR/test_39_file $DIR/test_39_file2
+       [ $DIR/test_39_file2 -nt $DIR/test_39_file ] || error
+}
+run_test 39 "mtime changed on create ==========================="
+
+test_40() {
+       dd if=/dev/zero of=$DIR/f40 bs=4096 count=1
+       $RUNAS $OPENFILE -f O_WRONLY:O_TRUNC $DIR/f40 && error
+       $CHECKSTAT -t file -s 4096 $DIR/f40 || error
+}
+run_test 40 "failed open(O_TRUNC) doesn't truncate ============="
+
+test_41() {
+       # bug 1553
+       small_write $DIR/f41 18
+}
+run_test 41 "test small file write + fstat ====================="
+
+# on the LLNL clusters, runas will still pick up root's $TMP settings,
+# which will not be writable for the runas user, and then you get a CVS
+# error message with a corrupt path string (CVS bug) and panic.
+# We're not using much space, so just stick it in /tmp, which is safe.
+OLDTMPDIR=$TMPDIR
+OLDTMP=$TMP
+TMPDIR=/tmp
+TMP=/tmp
+OLDHOME=$HOME
+[ $RUNAS_ID -ne $UID ] && HOME=/tmp
+
+test_99a() {
+       echo 99 "cvs operations ===================================="
+       mkdir -p $DIR/d99cvsroot
+       chown $RUNAS_ID $DIR/d99cvsroot
+       $RUNAS cvs -d $DIR/d99cvsroot init || error
+}
+run_test 99a "cvs init ========================================="
+
+test_99b() {
+       cd /etc/init.d
+       $RUNAS cvs -d $DIR/d99cvsroot import -m "nomesg" d99reposname vtag rtag
+}
+run_test 99b "cvs import ======================================="
+
+test_99c() {
+       cd $DIR
+       mkdir -p $DIR/d99reposname
+       chown $RUNAS_ID $DIR/d99reposname
+       $RUNAS cvs -d $DIR/d99cvsroot co d99reposname
+}
+run_test 99c "cvs checkout ====================================="
+
+test_99d() {
+       cd $DIR/d99reposname
+       $RUNAS touch foo99
+       $RUNAS cvs add -m 'addmsg' foo99
+}
+run_test 99d "cvs add =========================================="
+
+test_99e() {
+       cd $DIR/d99reposname
+       $RUNAS cvs update
+}
+run_test 99e "cvs update ======================================="
+
+test_99f() {
+       cd $DIR/d99reposname
+       $RUNAS cvs commit -m 'nomsg' foo99
+}
+run_test 99f "cvs commit ======================================="
+
+TMPDIR=$OLDTMPDIR
+TMP=$OLDTMP
+HOME=$OLDHOME
 
 log "cleanup: ======================================================"
-rm -r $DIR/[Rdfs][1-9]*
+rm -rf $DIR/[Rdfs][1-9]*
 if [ "$I_MOUNTED" = "yes" ]; then
        sh llmountcleanup.sh || error
 fi
index 8145e63..1895c8a 100644 (file)
 
 set -e
 
-PATH=$PATH:.
+ONLY=${ONLY:-"$*"}
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"8"} # bug 1557
+
+SRCDIR=`dirname $0`
+PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
 
 CHECKSTAT=${CHECKSTAT:-"checkstat -v"}
-MOUNT1=${MOUNT1:-/mnt/lustre1}
-MOUNT2=${MOUNT2:-/mnt/lustre2}
+CREATETEST=${CREATETEST:-createtest}
+LFIND=${LFIND:-lfind}
+LSTRIPE=${LSTRIPE:-lstripe}
+LCTL=${LCTL:-lctl}
+MCREATE=${MCREATE:-mcreate}
+OPENFILE=${OPENFILE:-openfile}
+OPENUNLINK=${OPENUNLINK:-openunlink}
+TOEXCL=${TOEXCL:-toexcl}
+TRUNCATE=${TRUNCATE:-truncate}
+
+if [ $UID -ne 0 ]; then
+       RUNAS_ID="$UID"
+       RUNAS=""
+else
+       RUNAS_ID=${RUNAS_ID:-500}
+       RUNAS=${RUNAS:-"runas -u $RUNAS_ID"}
+fi
+
 export NAME=${NAME:-mount2}
 
+SAVE_PWD=$PWD
+
 clean() {
-        echo -n "cln.."
-        sh llmountcleanup.sh > /dev/null
+       echo -n "cln.."
+       sh llmountcleanup.sh > /dev/null || exit 20
 }
-
 CLEAN=${CLEAN:-clean}
+
 start() {
-        echo -n "mnt.."
-        sh llrmount.sh > /dev/null
-        echo -n "done"
+       echo -n "mnt.."
+       sh llrmount.sh > /dev/null || exit 10
+       echo "done"
 }
 START=${START:-start}
 
-error () { 
-    echo FAIL
-    exit 1
-}
-
-pass() { 
-    echo PASS
-}
-
-mkdir -p $MOUNT2
-mount | grep $MOUNT1 || sh llmount.sh
-
-echo -n "test 1: check create on 2 mtpt's..."
-touch $MOUNT1/f1
-[ -f $MOUNT2/f1 ] || error
-pass
-
-echo "test 2: check attribute updates on 2 mtpt's..."
-chmod 777 $MOUNT2/f1
-$CHECKSTAT -t file -p 0777 $MOUNT1/f1 || error
-pass
-
-echo "test 2b: check cached attribute updates on 2 mtpt's..."
-touch $MOUNT1/f2b
-ls -l $MOUNT2/f2b
-chmod 777 $MOUNT2/f2b
-$CHECKSTAT -t file -p 0777 $MOUNT1/f2b || error
-pass
-
-echo "test 2c: check cached attribute updates on 2 mtpt's..."
-touch $MOUNT1/f2c
-ls -l $MOUNT2/f2c
-chmod 777 $MOUNT1/f2c
-$CHECKSTAT -t file -p 0777 $MOUNT2/f2c || error
-pass
-
-echo "test 3: check after remount attribute updates on 2 mtpt's..."
-chmod a-x $MOUNT2/f1
-$CLEAN
-$START
-$CHECKSTAT -t file -p 0666 $MOUNT1/f1 || error
-pass
-
-echo "test 4: unlink on one mountpoint removes file on other..."
-rm $MOUNT2/f1
-$CHECKSTAT -a $MOUNT1/f1 || error
-pass
-
-echo -n "test 5: symlink on one mtpt, readlink on another..."
-( cd $MOUNT1 ; ln -s this/is/good lnk )
-
-[ "this/is/good" = "`perl -e 'print readlink("/mnt/lustre2/lnk");'`" ] || error
-pass
-
-echo -n "test 6: fstat validation on multiple mount points..."
-./multifstat $MOUNT1/f6 $MOUNT2/f6
-pass
-
-if [ -n "$BUG_1365" ]; then
-echo -n "test 7: create a file on one mount, truncate it on the other..."
-mcreate $MOUNT1/f1
-truncate $MOUNT2/f1 100
-rm $MOUNT1/f1
-pass
-else
-echo "Skipping test for 1365: set \$BUG_1365 to run it (and crash, likely)."
-fi
+log() {
+       echo "$*"
+       lctl mark "$*" || true
+}
+
+run_one() {
+       if ! mount | grep -q $DIR1; then
+               $START
+       fi
+       log "== test $1: $2"
+       test_$1 || error
+       pass
+       cd $SAVE_PWD
+       $CLEAN
+}
+
+run_test() {
+       for O in $ONLY; do
+               if [ "`echo $1 | grep '\<'$O'[a-z]*\>'`" ]; then
+                       echo ""
+                       run_one $1 "$2"
+                       return $?
+               else
+                       echo -n "."
+               fi
+       done
+       for X in $EXCEPT $ALWAYS_EXCEPT; do
+               if [ "`echo $1 | grep '\<'$X'[a-z]*\>'`" ]; then
+                       echo "skipping excluded test $1"
+                       return 0
+               fi
+       done
+       if [ -z "$ONLY" ]; then
+               run_one $1 "$2"
+               return $?
+       fi
+}
+
+error () {
+       echo "FAIL: $@"
+       exit 1
+}
+
+pass() {
+       echo PASS
+}
+
+MOUNT1=`mount| awk '/^'$NAME' .* lustre_lite / { print $3 }'| head -1`
+MOUNT2=`mount| awk '/^'$NAME' .* lustre_lite / { print $3 }'| tail -1`
+[ -z "$MOUNT1" ] && error "NAME=$NAME not mounted once"
+[ "$MOUNT1" = "$MOUNT2" ] && error "NAME=$NAME not mounted twice"
+[ `mount| awk '/^'$NAME' .* lustre_lite / { print $3 }'| wc -l` -ne 2 ] && \
+       error "NAME=$NAME mounted more than twice"
+
+DIR1=${DIR1:-$MOUNT1}
+DIR2=${DIR2:-$MOUNT2}
+[ -z "`echo $DIR1 | grep $MOUNT1`" ] && echo "$DIR1 not in $MOUNT1" && exit 96
+[ -z "`echo $DIR2 | grep $MOUNT2`" ] && echo "$DIR2 not in $MOUNT2" && exit 95
+
+rm -f $DIR1/[df][0-9]* $DIR1/lnk
+
+test_1a() {
+       touch $DIR1/f1
+       [ -f $DIR2/f1 ] || error
+}
+run_test 1a "check create on 2 mtpt's =========================="
+
+test_1b() {
+       chmod 777 $DIR2/f1
+       $CHECKSTAT -t file -p 0777 $DIR1/f1 || error
+       chmod a-x $DIR2/f1
+}
+run_test 1b "check attribute updates on 2 mtpt's ==============="
+
+test_1c() {
+       $CHECKSTAT -t file -p 0666 $DIR1/f1 || error
+}
+run_test 1c "check after remount attribute updates on 2 mtpt's ="
+
+test_1d() {
+       rm $DIR2/f1
+       $CHECKSTAT -a $DIR1/f1 || error
+}
+run_test 1d "unlink on one mountpoint removes file on other ===="
+
+test_2a() {
+       touch $DIR1/f2a
+       ls -l $DIR2/f2a
+       chmod 777 $DIR2/f2a
+       $CHECKSTAT -t file -p 0777 $DIR1/f2a || error
+}
+run_test 2a "check cached attribute updates on 2 mtpt's ========"
+
+test_2b() {
+       touch $DIR1/f2b
+       ls -l $DIR2/f2b
+       chmod 777 $DIR1/f2b
+       $CHECKSTAT -t file -p 0777 $DIR2/f2b || error
+}
+run_test 2b "check cached attribute updates on 2 mtpt's ========"
+
+test_3() {
+       ( cd $DIR1 ; ln -s this/is/good lnk )
+       [ "this/is/good" = "`perl -e 'print readlink("'$DIR2/lnk'");'`" ] || \
+               error
+}
+run_test 3 "symlink on one mtpt, readlink on another ==========="
+
+test_4() {
+       ./multifstat $DIR1/f6 $DIR2/f6
+}
+run_test 4 "fstat validation on multiple mount points =========="
+
+test_5() {
+       mcreate $DIR1/f5
+       truncate $DIR2/f5 100
+       rm $DIR1/f5
+}
+run_test 5 "create a file on one mount, truncate it on the other"
+
+test_6() {
+       ./openunlink $DIR1/f6 $DIR2/f6 || error
+}
+run_test 6 "remove of open file on other node =================="
+
+test_7() {
+       ./opendirunlink $DIR1/d7 $DIR2/d7 || error
+}
+run_test 7 "remove of open directory on other node ============="
+
+test_8() {
+       ./opendevunlink $DIR1/dev8 $DIR2/dev8 || error
+}
+run_test 8 "remove of open special file on other node =========="
+
+test_9() {
+       MTPT=1
+       > $DIR2/f9
+       for C in a b c d e f g h i j k l; do
+               DIR=`eval echo \\$DIR$MTPT`
+               echo -n $C >> $DIR/f9
+               [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
+       done
+       [ "`cat $DIR1/f9`" = "abcdefghijkl" ] || error
+}
+run_test 9 "append of file with sub-page size on multiple mounts"
+
+test_10() {
+       MTPT=1
+       OFFSET=0
+       > $DIR2/f10
+       for C in a b c d e f g h i j k l; do
+               DIR=`eval echo \\$DIR$MTPT`
+               echo -n $C | dd of=$DIR/f10 bs=1 seek=$OFFSET count=1
+               [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
+               OFFSET=`expr $OFFSET + 1`
+       done
+       [ "`cat $DIR1/f10`" = "abcdefghijkl" ] || error
+}
+run_test 10 "write of file with sub-page size on multiple mounts "
 
-echo "test 9: remove of open file on other node..."
-./openunlink $MOUNT1/f9 $MOUNT2/f9 || error
-pass
-
-echo "test 9b: remove of open directory on other node..."
-./opendirunlink $MOUNT1/dir1 $MOUNT2/dir1 || error
-pass
-
-#echo "test 9c: remove of open special file on other node..."
-#./opendevunlink $MOUNT1/dev1 $MOUNT2/dev1 || error
-#pass
-
-echo -n "test 10: append of file with sub-page size on multiple mounts..."
-MTPT=1
-> $MOUNT2/f10
-for C in a b c d e f g h i j k l; do
-       MOUNT=`eval echo \\$MOUNT$MTPT`
-       echo -n $C >> $MOUNT/f10
-       [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
-done
-[ "`cat $MOUNT1/f10`" = "abcdefghijkl" ] && pass || error
-       
-echo -n "test 11: write of file with sub-page size on multiple mounts..."
-MTPT=1
-OFFSET=0
-> $MOUNT2/f11
-for C in a b c d e f g h i j k l; do
-       MOUNT=`eval echo \\$MOUNT$MTPT`
-       echo -n $C | dd of=$MOUNT/f11 bs=1 seek=$OFFSET count=1
-       [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
-       OFFSET=`expr $OFFSET + 1`
-done
-[ "`cat $MOUNT1/f11`" = "abcdefghijkl" ] && pass || error
-       
-rm -f $MOUNT1/f[0-9]* $MOUNT1/lnk
-
-$CLEAN
-
-exit
+rm -f $DIR1/f[0-9]* $DIR1/lnk
index 2b3adc3..f7a9241 100644 (file)
@@ -6,7 +6,7 @@ config=${1:-uml.xml}
 LMC=${LMC:-lmc}
 TMP=${TMP:-/tmp}
 
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
 MDSSIZE=${MDSSIZE:-50000}
 
 OSTDEVBASE=$TMP/ost
@@ -19,6 +19,7 @@ STRIPECNT=${STRIPECNT:-1}
 FSTYPE=${FSTYPE:-ext3}
 
 NETTYPE=${NETTYPE:-tcp}
+NIDTYPE=${NIDTYPE:-$NODETYPE}
 
 # NOTE - You can't have different MDS/OST nodes and also have clients on the
 #        MDS/OST nodes without using --endlevel and --startlevel during lconf.
@@ -50,6 +51,10 @@ CLIENTS=${CLIENTS:-"uml3"}
 
 rm -f $config
 
+h2localhost () {
+       echo localhost
+}
+       
 h2tcp () {
        case $1 in
        client) echo '\*' ;;
@@ -68,7 +73,7 @@ h2elan () {
 echo -n "adding NET for:"
 for NODE in `echo $MDSNODE $OSTNODES $CLIENTS | tr -s " " "\n" | sort -u`; do
        echo -n " $NODE"
-       ${LMC} -m $config --add net --node $NODE --nid `h2$NETTYPE $NODE` --nettype $NETTYPE || exit 1
+       ${LMC} -m $config --add net --node $NODE --nid `h2$NIDTYPE $NODE` --nettype $NETTYPE || exit 1
 done
 
 # configure mds server
@@ -82,7 +87,7 @@ echo -n "adding OST on:"
 for NODE in $OSTNODES; do
        eval OSTDEV=\$OSTDEV$COUNT
        echo -n " $NODE"
-       OSTDEV=${OSTDEV:-$OSTDEVBASE$COUNT}
+       OSTDEV=${OSTDEV:-$OSTDEVBASE$COUNT-`hostname`}
         ${LMC} -m $config --add ost --node $NODE --lov lov1 --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 21
        COUNT=`expr $COUNT + 1`
 done
index c6a5d7d..9fe9f26 100644 (file)
@@ -30,7 +30,14 @@ int main(int argc, char *argv[])
        if (argc != 2)
                usage(argv[0]);
 
-       before_mknod = time(0);
+       /* Adjust the before time back one second, because the kernel's
+        * CURRENT_TIME (lockless clock reading, used to set inode times)
+        * may drift against the do_gettimeofday() time (TSC-corrected and
+        * locked clock reading, used to return timestamps to user space).
+        * This means that the mknod time could be a second older than the
+        * before time, even for a local filesystem such as ext3.
+        */
+       before_mknod = time(0) - 1;
        rc = mknod(filename, 0700, S_IFREG);
        after_mknod = time(0);
        if (rc && errno != EEXIST) {
@@ -52,13 +59,15 @@ int main(int argc, char *argv[])
                        return 4;
                }
 
-               printf("%s: good mknod times %lu <= %lu <= %lu\n",
-                      prog, before_mknod, st.st_mtime, after_mknod);
+               printf("%s: good mknod times %lu%s <= %lu <= %lu\n",
+                      prog, before_mknod, before_mknod == st.st_mtime ? "*":"",
+                      st.st_mtime, after_mknod);
 
                sleep(5);
        }
 
-       before_utime = time(0);
+       /* See above */
+       before_utime = time(0) - 1;
        rc = utime(filename, NULL);
        after_utime = time(0);
        if (rc) {
@@ -80,8 +89,9 @@ int main(int argc, char *argv[])
                return 7;
        }
 
-       printf("%s: good utime times %lu <= %lu <= %lu\n",
-              prog, before_utime, st.st_mtime, after_utime);
+       printf("%s: good utime times %lu%s <= %lu <= %lu\n",
+              prog, before_utime, before_utime == st.st_mtime ? "*" : "",
+              st.st_mtime, after_utime);
 
        return 0;
 }
index 06a1588..20f4185 100644 (file)
@@ -15,4 +15,6 @@ obdstat
 obdio
 obdbarrier
 lload
-wirecheck
\ No newline at end of file
+wirecheck
+.*.cmd
+.*.d
index c1b93e6..7a21df3 100644 (file)
@@ -4,4 +4,4 @@ from lustredb import LustreDB, LustreDB_XML, LustreDB_LDAP
 from error import LconfError, OptionError
 from cmdline import Options
 
-CONFIG_VERSION="2003060501"
+CONFIG_VERSION="2003070801"
index a5e8580..04841eb 100644 (file)
@@ -31,6 +31,7 @@
 import sys, getopt, types
 import string, os
 import ldap
+from stat import S_IROTH, S_IRGRP
 PYMOD_DIR = "/usr/lib/lustre/python"
 
 def development_mode():
@@ -43,13 +44,14 @@ if not development_mode():
     sys.path.append(PYMOD_DIR)
 
 import Lustre
+PARAM = Lustre.Options.PARAM
 
 lactive_options = [
-    ('ldapurl',"LDAP server URL", Lustre.Options.PARAM,
-     "ldap://localhost"),
-    ('config', "Cluster config name used for LDAP query", Lustre.Options.PARAM),
-    ('group', "The group of devices to update", Lustre.Options.PARAM),
-    ('active', "The active node name", Lustre.Options.PARAM),
+    ('ldapurl',"LDAP server URL", PARAM, "ldap://localhost"),
+    ('config', "Cluster config name used for LDAP query", PARAM),
+    ('group', "The group of devices to update", PARAM),
+    ('active', "The active node name", PARAM),
+    ('pwfile', "File containing password", PARAM),
     ]
 
 def fatal(*args):
@@ -57,7 +59,6 @@ def fatal(*args):
     print "! " + msg
     sys.exit(1)
 
-
 cl = Lustre.Options("lactive","", lactive_options)
 config, args = cl.parse(sys.argv[1:])
 
@@ -66,10 +67,32 @@ if not (config.group or config.active):
 
 if not config.config:
     fatal("Missing config")
-    
+
+if config.pwfile:
+    try:
+        pwperm = os.stat(config.pwfile)[0]
+        pwreadable = pwperm & (S_IRGRP | S_IROTH)
+        if pwreadable:
+            if pwreadable == (S_IRGRP | S_IROTH):
+                readable_by = "group and others"
+            elif pwreadable == S_IRGRP:
+                readable_by = "group"
+            else:
+                readable_by = "others"
+            print "WARNING: Password file %s is readable by %s" % (
+                config.pwfile, readable_by)
+                 
+        pwfile = open(config.pwfile, "r")
+        pw = string.strip(pwfile.readline())
+        pwfile.close()
+    except Exception, e:
+        fatal("Can't read secret from pwfile %s: %s" % (config.pwfile, e))
+else:
+    print "no pwfile specified, binding anonymously"
+    pw = ""
+
 base = "config=%s,fs=lustre" % (config.config,)
-db = Lustre.LustreDB_LDAP('', {}, base=base, pw = "secret",
-                          url = config.ldapurl)
+db = Lustre.LustreDB_LDAP('', {}, base=base, pw = pw, url = config.ldapurl)
 
 active_node = db.lookup_name(config.active)
 if not active_node:
index 15e5a2c..92ec8e2 100755 (executable)
@@ -1,7 +1,8 @@
 #!/usr/bin/env python
 #
-#  Copyright (C) 2002 Cluster File Systems, Inc.
-#   Author: Robert Read <rread@clusterfs.com>
+#  Copyright (C) 2002-2003 Cluster File Systems, Inc.
+#   Authors: Robert Read <rread@clusterfs.com>
+#            Mike Shaver <shaver@clusterfs.com>
 #   This file is part of Lustre, http://www.lustre.org.
 #
 #   Lustre is free software; you can redistribute it and/or
@@ -26,7 +27,7 @@
 
 import sys, getopt, types
 import string, os, stat, popen2, socket, time, random, fcntl, select
-import re, exceptions, signal
+import re, exceptions, signal, traceback
 import xml.dom.minidom
 
 if sys.version[0] == '1':
@@ -57,7 +58,7 @@ MAX_LOOP_DEVICES = 256
 PORTALS_DIR = 'portals'
 
 
-# Please keep these uptodate with the values in portals/kp30.h
+# Please keep these in sync with the values in portals/kp30.h
 ptldebug_names = { 
     "trace" :     (1 << 0),
     "inode" :     (1 << 1),
@@ -107,6 +108,8 @@ subsystem_names = {
     "ptlrouter" :   (20 << 24),
     "cobd" :        (21 << 24),
     "ptlbd" :       (22 << 24),
+    "log" :         (23 << 24),
+    "mgmt" :        (24 << 24),
     }
 
 
@@ -423,8 +426,11 @@ class LCTLInterface:
   add_route %s %s %s
   quit  """ % (net,
                gw, lo, hi)
-        self.run(cmds)
-
+        try:
+            self.run(cmds)
+        except CommandError, e:
+            log ("ignore: ")
+            e.dump()
                 
     def del_route(self, net, gw, lo, hi):
         cmds =  """
@@ -443,7 +449,11 @@ class LCTLInterface:
   quit """ % (net,
               uuid, tgt, net,
               gw, tgt)
-        self.run(cmds)
+        try:
+            self.run(cmds)
+        except CommandError, e:
+            log ("ignore: ")
+            e.dump()
 
     # add a route to a range
     def del_route_host(self, net, uuid, gw, tgt):
@@ -795,7 +805,6 @@ def get_local_address(net_type, wildcard):
         local=string.rstrip(local[0])
 
     return local
-        
 
 # XXX: instead of device_list, ask for $name and see what we get
 def is_prepared(name):
@@ -1020,8 +1029,8 @@ class Network(Module):
                             self_nid = self.nid
                         if gw_nid < self_nid:
                             try:
-                                lctl.disconnect(router.net_type, router.nid, router.port,
-                                                router.uuid)
+                                lctl.disconnect(gw.net_type, gw.nid, gw.port,
+                                                gw.uuid)
                             except CommandError, e:
                                 print "disconnectAll failed: ", self.name
                                 e.dump()
@@ -1087,6 +1096,27 @@ class RouteTable(Module):
                 e.dump()
                 cleanup_error(e.rc)
 
+class Management(Module):
+    def __init__(self, db):
+        Module.__init__(self, 'MGMT', db)
+        self.add_lustre_module('obdclass', 'obdclass')
+        self.add_lustre_module('ptlrpc', 'ptlrpc')
+        self.add_lustre_module('ldlm', 'ldlm')
+        self.add_lustre_module('mgmt', 'mgmt_svc')
+
+    def prepare(self):
+        if is_prepared(self.name):
+            return
+        self.info()
+        lctl.newdev(attach="mgmt %s %s" % (self.name, self.uuid))
+
+    def safe_to_clean(self):
+        return 1
+
+    def cleanup(self):
+        if is_prepared(self.name):
+            Module.cleanup(self)
+
 class LDLM(Module):
     def __init__(self,db):
         Module.__init__(self, 'LDLM', db)
@@ -1109,7 +1139,7 @@ class LDLM(Module):
             Module.cleanup(self)
 
 class LOV(Module):
-    def __init__(self, db, uuid):
+    def __init__(self, db, uuid, fs_name):
         Module.__init__(self, 'LOV', db)
         self.add_lustre_module('mdc', 'mdc')
         self.add_lustre_module('lov', 'lov')
@@ -1123,11 +1153,12 @@ class LOV(Module):
         self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
         self.osclist = []
         self.client_uuid = generate_client_uuid(self.name)
+        self.fs_name = fs_name
         self.mdc_name = ''
-        self.mdc = get_mdc(db, self.client_uuid, self.name, self.mds_uuid)
+        self.mdc = get_mdc(db, self.client_uuid, fs_name, self.mds_uuid)
         for obd_uuid in self.devlist:
             obd = self.db.lookup(obd_uuid)
-            osc = get_osc(obd, self.client_uuid, self.name)
+            osc = get_osc(obd, self.client_uuid, fs_name)
             if osc:
                 self.osclist.append(osc)
             else:
@@ -1142,7 +1173,7 @@ class LOV(Module):
                 # isn't implemented here yet.
                 osc.prepare(ignore_connect_failure=0)
             except CommandError, e:
-                print "Error preparing OSC %s (inactive)\n" % osc.uuid
+                print "Error preparing OSC %s\n" % osc.uuid
                 raise e
         self.mdc.prepare()
         self.mdc_name = self.mdc.name
@@ -1156,7 +1187,7 @@ class LOV(Module):
             Module.cleanup(self)
         for osc in self.osclist:
             osc.cleanup()
-        mdc = get_mdc(self.db, self.client_uuid, self.name, self.mds_uuid)
+        mdc = get_mdc(self.db, self.client_uuid, self.fs_name, self.mds_uuid)
         mdc.cleanup()
 
     def load_module(self):
@@ -1172,12 +1203,12 @@ class LOV(Module):
             break
 
 class LOVConfig(Module):
-    def __init__(self,db):
+    def __init__(self, db):
         Module.__init__(self, 'LOVConfig', db)
 
         self.lov_uuid = self.db.get_first_ref('lov')
         l = self.db.lookup(self.lov_uuid)
-        self.lov = LOV(l, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
+        self.lov = LOV(l, "YOU_SHOULD_NEVER_SEE_THIS_UUID", '')
         
     def prepare(self):
         lov = self.lov
@@ -1410,9 +1441,20 @@ class OSD(Module):
         if not self.osdtype == 'obdecho':
             clean_loop(self.devpath)
 
+def mgmt_uuid_for_fs(mtpt_name):
+    if not mtpt_name:
+        return ''
+    mtpt_db = toplevel.lookup_name(mtpt_name)
+    fs_uuid = mtpt_db.get_first_ref('filesystem')
+    fs = toplevel.lookup(fs_uuid)
+    if not fs:
+        return ''
+    return fs.get_first_ref('mgmt')
+
 # Generic client module, used by OSC and MDC
 class Client(Module):
-    def __init__(self, tgtdb, uuid, module, owner):
+    def __init__(self, tgtdb, uuid, module, fs_name, self_name=None,
+                 module_dir=None):
         self.target_name = tgtdb.getName()
         self.target_uuid = tgtdb.getUUID()
         self.db = tgtdb
@@ -1427,11 +1469,22 @@ class Client(Module):
 
         self.module = module
         self.module_name = string.upper(module)
-        self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
-                                     self.target_name, owner)
+        if not self_name:
+            self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
+                                         self.target_name, fs_name)
+        else:
+            self.name = self_name
         self.uuid = uuid
         self.lookup_server(self.tgt_dev_uuid)
-        self.add_lustre_module(module, module)
+        mgmt_uuid = mgmt_uuid_for_fs(fs_name)
+        if mgmt_uuid:
+            self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid)
+        else:
+            self.mgmt_name = ''
+        self.fs_name = fs_name
+        if not module_dir:
+            module_dir = module
+        self.add_lustre_module(module_dir, module)
 
     def lookup_server(self, srv_uuid):
         """ Lookup a server's network information """
@@ -1461,7 +1514,8 @@ class Client(Module):
                 raise e
         if srv:
             lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
-                        setup ="%s %s" %(self.target_uuid, srv.uuid))
+                        setup ="%s %s %s" % (self.target_uuid, srv.uuid,
+                                             self.mgmt_name))
 
     def cleanup(self):
         if is_prepared(self.name):
@@ -1473,7 +1527,7 @@ class Client(Module):
                 else:
                     srv, r =  find_route(self.get_servers())
                     if srv:
-                        lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
+                        lctl.del_route_host(r[0], srv.uuid, r[1], r[3])
             except CommandError, e:
                 log(self.module_name, "cleanup failed: ", self.name)
                 e.dump()
@@ -1481,13 +1535,22 @@ class Client(Module):
 
 
 class MDC(Client):
-    def __init__(self, db, uuid, owner):
-         Client.__init__(self, db, uuid, 'mdc', owner)
+    def __init__(self, db, uuid, fs_name):
+         Client.__init__(self, db, uuid, 'mdc', fs_name)
+
 
 class OSC(Client):
-    def __init__(self, db, uuid, owner):
-         Client.__init__(self, db, uuid, 'osc', owner)
+    def __init__(self, db, uuid, fs_name):
+         Client.__init__(self, db, uuid, 'osc', fs_name)
 
+def mgmtcli_name_for_uuid(uuid):
+    return 'MGMTCLI_%s' % uuid
+
+class ManagementClient(Client):
+    def __init__(self, db, uuid):
+        Client.__init__(self, db, uuid, 'mgmt_cli', '',
+                        self_name = mgmtcli_name_for_uuid(db.getUUID()),
+                        module_dir = 'mgmt')
             
 class COBD(Module):
     def __init__(self, db):
@@ -1509,12 +1572,12 @@ class COBD(Module):
 
 # virtual interface for  OSC and LOV
 class VOSC(Module):
-    def __init__(self, db, uuid, owner):
+    def __init__(self, db, uuid, fs_name):
         Module.__init__(self, 'VOSC', db)
         if db.get_class() == 'lov':
-            self.osc = LOV(db, uuid)
+            self.osc = LOV(db, uuid, fs_name)
         else:
-            self.osc = get_osc(db, uuid, owner)
+            self.osc = get_osc(db, uuid, fs_name)
     def get_uuid(self):
         return self.osc.uuid
     def get_name(self):
@@ -1560,10 +1623,12 @@ class ECHO_CLIENT(Module):
     def load_module(self):
         self.osc.load_module()
         Module.load_module(self)
+
     def cleanup_module(self):
         Module.cleanup_module(self)
         self.osc.cleanup_module()
 
+
 def generate_client_uuid(name):
         client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
                                                name,
@@ -1571,6 +1636,7 @@ def generate_client_uuid(name):
                                                int(random.random() * 1048576))
         return client_uuid[:36]
 
+
 class Mountpoint(Module):
     def __init__(self,db):
         Module.__init__(self, 'MTPT', db)
@@ -1579,6 +1645,7 @@ class Mountpoint(Module):
         fs = self.db.lookup(self.fs_uuid)
         self.mds_uuid = fs.get_first_ref('mds')
         self.obd_uuid = fs.get_first_ref('obd')
+        self.mgmt_uuid = fs.get_first_ref('mgmt')
         obd = self.db.lookup(self.obd_uuid)
         client_uuid = generate_client_uuid(self.name)
         self.vosc = VOSC(obd, client_uuid, self.name)
@@ -1586,12 +1653,18 @@ class Mountpoint(Module):
             self.add_lustre_module('mdc', 'mdc')
             self.mdc = get_mdc(db, client_uuid, self.name, self.mds_uuid)
         self.add_lustre_module('llite', 'llite')
-
+        if self.mgmt_uuid:
+            self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid),
+                                            client_uuid)
+        else:
+            self.mgmtcli = None
 
     def prepare(self):
         if fs_is_mounted(self.path):
             log(self.path, "already mounted.")
             return
+        if self.mgmtcli:
+            self.mgmtcli.prepare()
         self.vosc.prepare()
         if self.vosc.need_mdc():
             self.mdc.prepare()
@@ -1632,13 +1705,20 @@ class Mountpoint(Module):
         self.vosc.cleanup()
         if self.vosc.need_mdc():
             self.mdc.cleanup()
+        if self.mgmtcli:
+            self.mgmtcli.cleanup()
 
     def load_module(self):
+        if self.mgmtcli:
+            self.mgmtcli.load_module()
         self.vosc.load_module()
         Module.load_module(self)
+
     def cleanup_module(self):
         Module.cleanup_module(self)
         self.vosc.cleanup_module()
+        if self.mgmtcli:
+            self.mgmtcli.cleanup_module()
 
 
 # ============================================================
@@ -1670,6 +1750,8 @@ def getServiceLevel(self):
         ret = 6
     elif type in ('ldlm',):
         ret = 20
+    elif type in ('mgmt',):
+        ret = 25
     elif type in ('osd', 'cobd'):
         ret = 30
     elif type in ('mdsdev',):
@@ -1707,15 +1789,15 @@ def getServices(self):
 #
 # OSC is no longer in the xml, so we have to fake it.
 # this is getting ugly and begging for another refactoring
-def get_osc(ost_db, uuid, owner):
-    osc = OSC(ost_db, uuid, owner)
+def get_osc(ost_db, uuid, fs_name):
+    osc = OSC(ost_db, uuid, fs_name)
     return osc
 
-def get_mdc(db, uuid, owner, mds_uuid):
+def get_mdc(db, uuid, fs_name, mds_uuid):
     mds_db = db.lookup(mds_uuid);
     if not mds_db:
         panic("no mds:", mds_uuid)
-    mdc = MDC(mds_db, uuid, owner)
+    mdc = MDC(mds_db, uuid, fs_name)
     return mdc
 
 ############################################################
@@ -1842,6 +1924,8 @@ def newService(db):
         n = Mountpoint(db)
     elif type == 'echoclient':
         n = ECHO_CLIENT(db)
+    elif type == 'mgmt':
+        n = Management(db)
     else:
         panic ("unknown service type:", type)
     return n
@@ -2060,7 +2144,7 @@ def sys_set_ptldebug():
 def sys_set_subsystem():
     if config.subsystem != None:
         try:
-            val = eval(config.ptldebug, ptldebug_names)
+            val = eval(config.subsystem, subsystem_names)
             val = "0x%x" % (val,)
             sysctl('portals/subsystem_debug', val)
         except NameError, e:
@@ -2191,7 +2275,7 @@ lconf_options = [
     ]      
 
 def main():
-    global lctl, config
+    global lctl, config, toplevel
 
     # in the upcall this is set to SIG_IGN
     signal.signal(signal.SIGCHLD, signal.SIG_DFL)
@@ -2241,9 +2325,12 @@ def main():
         dn = "config=%s,fs=lustre" % (config.config)
         db = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
     else:
-        cl.usage()
+        print 'Missing config file or ldap URL.'
+        print 'see lconf --help for command summary'
         sys.exit(1)
 
+    toplevel = db
+
     ver = db.get_version()
     if not ver:
         panic("No version found in config data, please recreate.")
@@ -2277,6 +2364,8 @@ if __name__ == "__main__":
         main()
     except Lustre.LconfError, e:
         print e
+#        traceback.print_exc(file=sys.stdout)
+        sys.exit(1)
     except CommandError, e:
         e.dump()
         sys.exit(e.rc)
index 80cdcf2..a4681ec 100644 (file)
@@ -205,6 +205,9 @@ command_t cmdlist[] = {
         {"debug_kernel", jt_dbg_debug_kernel, 0,
          "get debug buffer and dump to a file"
          "usage: debug_kernel [file] [raw]"},
+        {"dk", jt_dbg_debug_kernel, 0,
+         "get debug buffer and dump to a file"
+         "usage: dk [file] [raw]"},
         {"debug_file", jt_dbg_debug_file, 0,
          "read debug buffer from input and dump to output"
          "usage: debug_file <input> [output] [raw]"},
index 847dd4f..45f837c 100644 (file)
 #define MAX_LOV_UUID_COUNT     1000
 #define OBD_NOT_FOUND          (-1)
 
-char *         cmd;
-struct option  longOpts[] = {
+char           *cmd;
+struct option   longOpts[] = {
                        {"help", 0, 0, 'h'},
                        {"obd", 1, 0, 'o'},
                        {"query", 0, 0, 'q'},
                        {"verbose", 0, 0, 'v'},
                        {0, 0, 0, 0}
-               };
-int            query;
-int            verbose;
-char *         shortOpts = "ho:qv";
-char *         usageMsg = "[ --obd <obd uuid> | --query ] <dir|file> ...";
-
-int            max_ost_count = MAX_LOV_UUID_COUNT;
-struct obd_uuid *      obduuid;
-char *         buf;
-int            buflen;
-struct obd_uuid *      uuids;
+                };
+int             query;
+int             verbose;
+char            shortOpts[] = "ho:qv";
+char            usageMsg[] = "[ --obd <obd uuid> | --query ] <dir|file> ...";
+
+int             max_ost_count = MAX_LOV_UUID_COUNT;
+struct obd_uuid *obduuid;
+char           *buf;
+int             buflen;
+struct obd_uuid *uuids;
 struct obd_ioctl_data data;
-struct lov_desc desc;
-int            uuidslen;
-int            cfglen;
+struct lov_desc  desc;
+int             uuidslen;
+int             cfglen;
 struct lov_mds_md *lmm;
-int            lmmlen;
+int             lmmlen;
+int             printed_UUIDs;
 
 void   init();
 void   usage(FILE *stream);
 void   errMsg(char *fmt, ...);
-void   processPath(const char *path);
+void   processPath(char *path);
 
-int
-main (int argc, char **argv) {
+int main (int argc, char **argv) {
        int c;
 
        cmd = basename(argv[0]);
@@ -61,8 +61,8 @@ main (int argc, char **argv) {
                switch (c) {
                case 'o':
                        if (obduuid) {
-                               errMsg("obd '%s' already specified: '%s'.",
-                                       obduuid, optarg);
+                               printf("obd '%s' already specified: '%s'\n",
+                                       obduuid->uuid, optarg);
                                exit(1);
                        }
 
@@ -81,7 +81,7 @@ main (int argc, char **argv) {
                        usage(stderr);
                        exit(1);
                default:
-                       errMsg("Internal error. Valid '%s' unrecognized.",
+                       printf("Internal error. Valid '%s' unrecognized\n",
                                argv[optind - 1]);
                        usage(stderr);
                        exit(1);
@@ -105,8 +105,7 @@ main (int argc, char **argv) {
        exit (0);
 }
 
-void
-init()
+void init()
 {
        int datalen, desclen;
 
@@ -141,8 +140,7 @@ init()
        }
 
        if ((buf = malloc(buflen)) == NULL) {
-               errMsg("Unable to allocate %d bytes of memory for ioctl's.",
-                       buflen);
+               errMsg("Unable to allocate %d bytes of memory for ioctl's");
                exit(1);
        }
 
@@ -150,112 +148,120 @@ init()
        uuids = (struct obd_uuid *)buf;
 }
 
-void
-usage(FILE *stream)
+void usage(FILE *stream)
 {
        fprintf(stream, "usage: %s %s\n", cmd, usageMsg);
 }
 
-void
-errMsg(char *fmt, ...)
+void errMsg(char *fmt, ...)
 {
        va_list args;
+       int tmp_errno = errno;
 
        fprintf(stderr, "%s: ", cmd);
        va_start(args, fmt);
        vfprintf(stderr, fmt, args);
        va_end(args);
-       fprintf(stderr, "\n");
+       fprintf(stderr, ": %s (%d)\n", strerror(tmp_errno), tmp_errno);
 }
 
-void
-processPath(const char *path)
+void processPath(char *path)
 {
        int fd;
        int rc;
        int i;
-       int obdindex;
+       int obdindex = OBD_NOT_FOUND;
        int obdcount;
        struct obd_uuid *uuidp;
+       char *fname, *dirname;
 
-       if (query || verbose && !obduuid) {
+       if ((query || verbose) && !obduuid) {
                printf("%s\n", path);
        }
 
-       if ((fd = open(path, O_RDONLY | O_LOV_DELAY_CREATE)) < 0) {
-               errMsg("open \"%.20s\" failed.", path);
-               perror("open");
+       fname = strrchr(path, '/');
+       if (fname != NULL && fname[1] != '\0') {
+               *fname = '\0';
+               fname++;
+               dirname = path;
+       } else if (fname != NULL && fname[1] == '\0') {
+               printf("need getdents support\n");
                return;
+       } else {
+               dirname = ".";
+               fname = path;
        }
 
-       memset(&data, 0, sizeof(data));
-        data.ioc_inllen1 = sizeof(desc);
-        data.ioc_inlbuf1 = (char *)&desc;
-        data.ioc_inllen2 = uuidslen;
-        data.ioc_inlbuf2 = (char *)uuids;
+       if ((fd = open(dirname, O_RDONLY)) < 0) {
+               errMsg("open \"%.20s\" failed", dirname);
+               return;
+       }
 
-        memset(&desc, 0, sizeof(desc));
-        desc.ld_tgt_count = max_ost_count;
+       if (!printed_UUIDs) {
+               memset(&data, 0, sizeof(data));
+               data.ioc_inllen1 = sizeof(desc);
+               data.ioc_inlbuf1 = (char *)&desc;
+               data.ioc_inllen2 = uuidslen;
+               data.ioc_inlbuf2 = (char *)uuids;
 
-        if (obd_ioctl_pack(&data, &buf, buflen)) {
-                errMsg("internal buffering error.");
-               exit(1);
-        }
+               memset(&desc, 0, sizeof(desc));
+               desc.ld_tgt_count = max_ost_count;
+
+               if (obd_ioctl_pack(&data, &buf, buflen)) {
+                       errMsg("internal buffering error");
+                       exit(1);
+               }
 
-        rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf);
-        if (rc) {
-               if (errno == ENOTTY) {
-                       if (!obduuid) {
-                               printf("Not a regular file or not Lustre file.\n\n");
+               rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf);
+               if (rc) {
+                       if (errno == ENOTTY) {
+                               if (!obduuid) {
+                                       errMsg("error getting LOV config");
+                               }
+                               return;
                        }
-                       return;
+                       errMsg("OBD_IOC_LOV_GET_CONFIG ioctl failed: %s");
+                       exit(1);
                }
-               errMsg("OBD_IOC_LOV_GET_CONFIG ioctl failed: %d.", errno);
-               perror("ioctl");
-               exit(1);
-        }
 
-       if (obd_ioctl_unpack(&data, buf, buflen)) {
-               errMsg("Invalid reply from ioctl.");
-                exit(1);
-       }
+               if (obd_ioctl_unpack(&data, buf, buflen)) {
+                       errMsg("Invalid reply from ioctl");
+                       exit(1);
+               }
 
-        obdcount = desc.ld_tgt_count;
-       if (obdcount == 0)
-               return;
+               obdcount = desc.ld_tgt_count;
+               if (obdcount == 0)
+                       return;
 
-       obdindex = OBD_NOT_FOUND;
+               obdindex = OBD_NOT_FOUND;
 
-       if (obduuid) {
-               for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) {
-                       if (strncmp((const char *)obduuid, (const char *)uuidp,
-                                   sizeof(*uuidp)) == 0) {
-                               obdindex = i;
+               if (obduuid) {
+                       for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) {
+                               if (strncmp((char *)obduuid, (char *)uuidp,
+                                       sizeof(*uuidp)) == 0) {
+                                       obdindex = i;
+                               }
                        }
-               }
 
-               if (obdindex == OBD_NOT_FOUND)
-                       return;
-       } else  if (query || verbose) {
-               printf("OBDS:\n");
-               for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++)
-                       printf("%4d: %s\n", i, (char *)uuidp);
+                       if (obdindex == OBD_NOT_FOUND)
+                               return;
+               } else if (query || verbose) {
+                       printf("OBDS:\n");
+                       for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++)
+                               printf("%4d: %s\n", i, (char *)uuidp);
+               }
+               printed_UUIDs = 1;
        }
 
-       memset((void *)buf, 0, buflen);
-       lmm->lmm_magic = LOV_MAGIC;
-        lmm->lmm_ost_count = max_ost_count;
-
-       rc = ioctl(fd, LL_IOC_LOV_GETSTRIPE, (void *)lmm);
+       strcpy((char *)lmm, fname);
+       rc = ioctl(fd, IOC_MDC_GETSTRIPE, (void *)lmm);
        if (rc) {
                if (errno == ENODATA) {
-                       if(!obduuid) {
-                               printf("Has no stripe information.\n\n");
-                       }
+                       if (!obduuid)
+                               printf("Has no stripe information.\n");
                }
                else {
-                       errMsg("LL_IOC_LOV_GETSTRIPE ioctl failed. %d", errno);
-                       perror("ioctl");
+                       errMsg("IOC_MDC_GETSTRIPE ioctl failed");
                }
                return;
        }
index eaaed71..1a1bbc9 100755 (executable)
@@ -19,9 +19,9 @@
 #
 
 """
-lmc - lustre configurtion data  manager
+lmc - lustre configuration data manager
 
-  See lustre book for documentation for lmc.
+  See Lustre book (http://www.lustre.org/docs/lustre.pdf) for documentation on lmc.
 
 """
 
@@ -98,6 +98,10 @@ Object creation command summary:
   --path /mnt/point
   --mds mds_name
   --ost ost_name OR --lov lov_name
+
+--add mgmt  - Management/monitoring service
+  --node node_name
+  --mgmt mgmt_service_name
 """
 
 PARAM = Lustre.Options.PARAM
@@ -123,10 +127,10 @@ lmc_options = [
     # network 
     ('nettype', "Specify the network type. This can be tcp/elan/gm/scimac.", PARAM),
     ('nid', "Give the network ID, e.g ElanID/IP Address as used by portals.", PARAM),
-    ('tcpbuf', "Optional arguement to specify the TCP buffer size.", PARAM, "0"),
-    ('port', "Optional arguement to specify the TCP port number.", PARAM, DEFAULT_PORT),
-    ('nid_exchange', "Optional arguement to indicate if nid exchange should be done.", PARAM, 0),
-    ('irq_affinity', "Optional arguement.", PARAM, 0),
+    ('tcpbuf', "Optional argument to specify the TCP buffer size.", PARAM, "0"),
+    ('port', "Optional argument to specify the TCP port number.", PARAM, DEFAULT_PORT),
+    ('nid_exchange', "Optional argument to indicate if nid exchange should be done.", PARAM, 0),
+    ('irq_affinity', "Optional argument.", PARAM, 0),
     ('hostaddr', "", PARAM,""),
     ('cluster_id', "Specify the cluster ID", PARAM, "0"),
 
@@ -143,12 +147,12 @@ lmc_options = [
     ('mds', "Specify MDS name.", PARAM),
     ('ost', "Specify the OST name.", PARAM,""),
     ('osdtype', "This could obdfilter or obdecho.", PARAM, "obdfilter"),
-    ('failover', ""),
+    ('failover', "Enable failover support on OSTs or MDS?"),
     ('group', "", PARAM),
     ('dev', "Path of the device on local system.", PARAM,""),
     ('size', "Specify the size of the device if needed.", PARAM,"0"),
     ('journal_size', "Specify new journal size for underlying ext3 file system.", PARAM,"0"),
-    ('fstype', "Optional arguement to specify the filesystem type.", PARAM, "ext3"),
+    ('fstype', "Optional argument to specify the filesystem type.", PARAM, "ext3"),
     ('ostuuid', "", PARAM,""),
     ('nspath', "Local mount point of server namespace.", PARAM,""),
     ('format', ""),
@@ -167,6 +171,8 @@ lmc_options = [
     # cobd
     ('real_obd', "", PARAM),
     ('cache_obd', "", PARAM),
+
+    ('mgmt', "Specify management/monitoring service name.", PARAM, ""),
     ]
 
 def error(*args):
@@ -393,16 +399,25 @@ class GenConfig:
         mdd.appendChild(self.ref("target", mds_uuid))
         return mdd
 
+    def mgmt(self, mgmt_name, mgmt_uuid, node_uuid):
+        mgmt = self.newService("mgmt", mgmt_name, mgmt_uuid)
+        mgmt.appendChild(self.ref("node", node_uuid))
+        # Placeholder until mgmt-service failover.
+        mgmt.appendChild(self.ref("active", mgmt_uuid))
+        return mgmt
+
     def mountpoint(self, name, uuid, fs_uuid, path):
         mtpt = self.newService("mountpoint", name, uuid)
         mtpt.appendChild(self.ref("filesystem", fs_uuid))
         self.addElement(mtpt, "path", path)
         return mtpt
 
-    def filesystem(self, name, uuid, mds_uuid, obd_uuid):
+    def filesystem(self, name, uuid, mds_uuid, obd_uuid, mgmt_uuid):
         fs = self.newService("filesystem", name, uuid)
         fs.appendChild(self.ref("mds", mds_uuid))
         fs.appendChild(self.ref("obd", obd_uuid))
+        if mgmt_uuid:
+            fs.appendChild(self.ref("mgmt", mgmt_uuid))
         return fs
         
     def echo_client(self, name, uuid, osc_uuid):
@@ -660,6 +675,23 @@ def add_mds(gen, lustre, options):
     lustre.appendChild(mdd)
                    
 
+def add_mgmt(gen, lustre, options):
+    node_name = get_option(options, 'node')
+    node_uuid = name2uuid(lustre, node_name)
+    mgmt_name = get_option(options, 'mgmt')
+    if not mgmt_name:
+        mgmt_name = new_name('MGMT_' + node_name)
+    mgmt_uuid = name2uuid(lustre, mgmt_name, fatal=0)
+    if not mgmt_uuid:
+        mgmt_uuid = new_uuid(mgmt_name)
+        mgmt = gen.mgmt(mgmt_name, mgmt_uuid, node_uuid)
+        lustre.appendChild(mgmt)
+    else:
+        mgmt = lookup(lustre, mgmt_uuid)
+
+    node = findByName(lustre, node_name, "node")
+    node_add_profile(gen, node, 'mgmt', mgmt_uuid)
+
 def add_ost(gen, lustre, options):
     node_name = get_option(options, 'node')
     lovname = get_option(options, 'lov')
@@ -793,23 +825,27 @@ def add_lov(gen, lustre, options):
     lovconfig = gen.lovconfig(lovconfig_name, lovconfig_uuid, uuid)
     lustre.appendChild(lovconfig)
 
-def new_filesystem(gen, lustre, mds_uuid, obd_uuid):
+def new_filesystem(gen, lustre, mds_uuid, obd_uuid, mgmt_uuid):
     fs_name = new_name("FS_fsname")
     fs_uuid = new_uuid(fs_name)
     mds = lookup(lustre, mds_uuid)
     mds.appendChild(gen.ref("filesystem", fs_uuid))
-    fs = gen.filesystem(fs_name, fs_uuid, mds_uuid, obd_uuid)
+    fs = gen.filesystem(fs_name, fs_uuid, mds_uuid, obd_uuid, mgmt_uuid)
     lustre.appendChild(fs)
     return fs_uuid
 
-def get_fs_uuid(gen, lustre, mds_name, obd_name):
+def get_fs_uuid(gen, lustre, mds_name, obd_name, mgmt_name):
     mds_uuid = name2uuid(lustre, mds_name, tag='mds')
     obd_uuid = name2uuid(lustre, obd_name, tag='lov', fatal=0)
     if not obd_uuid:
         obd_uuid = name2uuid(lustre, obd_name, tag='ost', fatal=1)
+    if mgmt_name:
+        mgmt_uuid = name2uuid(lustre, mgmt_name, tag='mgmt', fatal=1)
+    else:
+        mgmt_uuid = ''
     fs_uuid = lookup_filesystem(lustre, mds_uuid, obd_uuid)
     if not fs_uuid:
-        fs_uuid = new_filesystem(gen, lustre, mds_uuid, obd_uuid)
+        fs_uuid = new_filesystem(gen, lustre, mds_uuid, obd_uuid, mgmt_uuid)
     return fs_uuid
     
 def add_mtpt(gen, lustre, options):
@@ -825,7 +861,8 @@ def add_mtpt(gen, lustre, options):
             lov_name = get_option(options, 'ost')
             if lov_name == '':
                 error("--add mtpt requires either --filesystem or --mds with an  --lov lov_name or --ost ost_name")
-        fs_uuid = get_fs_uuid(gen, lustre, mds_name, lov_name)
+        mgmt_name = get_option(options, 'mgmt')
+        fs_uuid = get_fs_uuid(gen, lustre, mds_name, lov_name, mgmt_name)
     else:
         fs_uuid = name2uuid(lustre, fs_name, tag='filesystem')
 
@@ -910,6 +947,8 @@ def add(devtype, gen, lustre, options):
         add_echo_client(gen, lustre, options)
     elif devtype == 'cobd':
         add_cobd(gen, lustre, options)
+    elif devtype == 'mgmt':
+        add_mgmt(gen, lustre, options)
     else:
         error("unknown device type:", devtype)
     
index 4373071..8774cef 100644 (file)
@@ -214,9 +214,9 @@ main (int argc, char **argv)
                 }
         }
 
-        free (b);
+        free(b);
 
-        obdio_disconnect (conn);
+        obdio_disconnect(conn, 0);
 
         return (rc == 0 ? 0 : 1);
 }
index 8264761..24b9e2d 100644 (file)
@@ -294,10 +294,10 @@ main (int argc, char **argv)
         if (conn == NULL)
                 return (1);
 
-        rc = obdio_test_fixed_extent (conn, myhid, mypid, reps, locked,
-                                      oid, base_offset, size);
+        rc = obdio_test_fixed_extent(conn, myhid, mypid, reps, locked,
+                                     oid, base_offset, size);
 
-        obdio_disconnect (conn);
+        obdio_disconnect(conn, 0);
 
         return (rc == 0 ? 0 : 1);
 }
index c871818..04dae88 100644 (file)
@@ -116,7 +116,7 @@ obdio_connect (int device)
 }
 
 void
-obdio_disconnect (struct obdio_conn *conn)
+obdio_disconnect (struct obdio_conn *conn, int flags)
 {
         close (conn->oc_fd);
         /* obdclass will automatically close on last ref */
index 3811b41..b2ec6b6 100644 (file)
@@ -48,22 +48,24 @@ struct obdio_barrier {
 };
 
 extern struct obdio_conn * obdio_connect (int device);
-extern void obdio_disconnect (struct obdio_conn *conn);
-extern int obdio_open (struct obdio_conn *conn, uint64_t oid,
+extern void obdio_disconnect(struct obdio_conn *conn, int flags);
+extern int obdio_open(struct obdio_conn *conn, uint64_t oid,
+                      struct lustre_handle *fh);
+extern int obdio_close(struct obdio_conn *conn, uint64_t oid,
                        struct lustre_handle *fh);
-extern int obdio_close (struct obdio_conn *conn, uint64_t oid,
-                        struct lustre_handle *fh);
-extern int obdio_pread (struct obdio_conn *conn, uint64_t oid,
+extern int obdio_pread(struct obdio_conn *conn, uint64_t oid,
+                       char *buffer, uint32_t count, uint64_t offset);
+extern int obdio_pwrite(struct obdio_conn *conn, uint64_t oid,
                         char *buffer, uint32_t count, uint64_t offset);
-extern int obdio_pwrite (struct obdio_conn *conn, uint64_t oid,
-                         char *buffer, uint32_t count, uint64_t offset);
-extern int obdio_enqueue (struct obdio_conn *conn, uint64_t oid,
-                          int mode, uint64_t offset, uint32_t count,
-                          struct lustre_handle *lh);
-extern int obdio_cancel (struct obdio_conn *conn, struct lustre_handle *lh);
-extern void *obdio_alloc_aligned_buffer (void **spacep, int size);
-extern struct obdio_barrier *obdio_new_barrier (uint64_t oid, uint64_t id, int npeers) ;
-extern int obdio_setup_barrier (struct obdio_conn *conn, struct obdio_barrier *b);
-extern int obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b);
+extern int obdio_enqueue(struct obdio_conn *conn, uint64_t oid,
+                         int mode, uint64_t offset, uint32_t count,
+                         struct lustre_handle *lh);
+extern int obdio_cancel(struct obdio_conn *conn, struct lustre_handle *lh);
+extern void *obdio_alloc_aligned_buffer(void **spacep, int size);
+extern struct obdio_barrier *obdio_new_barrier(uint64_t oid, uint64_t id,
+                                               int npeers);
+extern int obdio_setup_barrier(struct obdio_conn *conn,
+                               struct obdio_barrier *b);
+extern int obdio_barrier(struct obdio_conn *conn, struct obdio_barrier *b);
 
 #endif
index 5b6a589..86ae507 100644 (file)
@@ -518,12 +518,12 @@ main (int argc, char **argv)
        CHECK_VALUE (REINT_OPEN);
        CHECK_VALUE (REINT_MAX);
 
-       CHECK_VALUE (IT_INTENT_EXEC);
-       CHECK_VALUE (IT_OPEN_LOOKUP);
-       CHECK_VALUE (IT_OPEN_NEG);
-       CHECK_VALUE (IT_OPEN_POS);
-       CHECK_VALUE (IT_OPEN_CREATE);
-       CHECK_VALUE (IT_OPEN_OPEN);
+       CHECK_VALUE (DISP_IT_EXECD);
+       CHECK_VALUE (DISP_LOOKUP_EXECD);
+       CHECK_VALUE (DISP_LOOKUP_NEG);
+       CHECK_VALUE (DISP_LOOKUP_POS);
+       CHECK_VALUE (DISP_OPEN_CREATE);
+       CHECK_VALUE (DISP_OPEN_OPEN);
 
        CHECK_VALUE (MDS_STATUS_CONN);
        CHECK_VALUE (MDS_STATUS_LOV);