Whamcloud - gitweb
merge b_devel into HEAD, which will become 0.7.3
authorphil <phil>
Fri, 25 Jul 2003 17:58:07 +0000 (17:58 +0000)
committerphil <phil>
Fri, 25 Jul 2003 17:58:07 +0000 (17:58 +0000)
 - dozens and dozens of fixes for working with 2.6 kernels
 - new 2.4 kernel APIs
 - uncountable bug fixes

265 files changed:
lnet/.cvsignore
lnet/Kernelenv.in
lnet/Kernelenv.mk
lnet/Makefile.mk
lnet/archdep.m4
lnet/include/config.h.in
lnet/include/linux/kp30.h
lnet/include/linux/portals_compat25.h
lnet/include/lnet/internal.h
lnet/include/lnet/list.h
lnet/include/lnet/lltrace.h
lnet/include/lnet/myrnal.h
lnet/include/lnet/nal.h
lnet/include/lnet/ppid.h
lnet/include/lnet/stringtab.h
lnet/include/lnet/types.h
lnet/klnds/.cvsignore
lnet/klnds/Makefile.mk
lnet/klnds/gmlnd/gmnal.c
lnet/klnds/scimaclnd/scimacnal.c
lnet/klnds/socklnd/.cvsignore
lnet/klnds/socklnd/Makefile.mk
lnet/klnds/toelnd/toenal.c
lnet/klnds/toelnd/toenal_cb.c
lnet/libcfs/.cvsignore
lnet/libcfs/Makefile.mk
lnet/libcfs/debug.c
lnet/libcfs/module.c
lnet/lnet/.cvsignore
lnet/lnet/Makefile.mk
lnet/lnet/api-init.c
lnet/lnet/lib-move.c
lnet/router/.cvsignore
lnet/router/Makefile.mk
lnet/router/router.c
lnet/tests/.cvsignore
lnet/tests/ping_cli.c
lnet/tests/ping_srv.c
lnet/tests/sping_cli.c
lnet/tests/sping_srv.c
lnet/ulnds/debug.c
lnet/ulnds/socklnd/debug.c
lnet/utils/.cvsignore
lnet/utils/debug.c
lnet/utils/portals.c
lustre/.cvsignore
lustre/ChangeLog
lustre/Makefile.mk
lustre/cobd/cache_obd.c
lustre/cobd/lproc_cache.c
lustre/conf/lustre.dtd
lustre/configure.in
lustre/include/liblustre.h
lustre/include/linux/lprocfs_status.h
lustre/include/linux/lustre_compat25.h
lustre/include/linux/lustre_dlm.h
lustre/include/linux/lustre_export.h
lustre/include/linux/lustre_fsfilt.h
lustre/include/linux/lustre_idl.h
lustre/include/linux/lustre_lib.h
lustre/include/linux/lustre_lite.h
lustre/include/linux/lustre_mds.h
lustre/include/linux/lustre_net.h
lustre/include/linux/obd.h
lustre/include/linux/obd_class.h
lustre/include/linux/obd_lov.h
lustre/include/linux/obd_ost.h
lustre/include/linux/obd_support.h
lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch
lustre/kernel_patches/patches/export-truncate-2.5.63.patch
lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch
lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch
lustre/kernel_patches/patches/extN-misc-fixup.patch
lustre/kernel_patches/patches/extN-noread.patch
lustre/kernel_patches/patches/extN-wantedi.patch
lustre/kernel_patches/patches/iopen-2.4.18.patch
lustre/kernel_patches/patches/iopen-2.4.20.patch
lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch
lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch
lustre/kernel_patches/patches/lustre_version.patch
lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch
lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch
lustre/kernel_patches/pc/ext3-delete_thread-2.4.18.pc
lustre/kernel_patches/pc/ext3-delete_thread-2.4.20.pc
lustre/kernel_patches/pc/extN-wantedi.pc
lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc
lustre/kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc
lustre/kernel_patches/scripts/patchfns
lustre/kernel_patches/series/hp-pnnl-2.4.20
lustre/kernel_patches/series/rh-2.4.20
lustre/kernel_patches/series/vanilla-2.4.20
lustre/kernel_patches/which_patch
lustre/ldlm/.cvsignore
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c
lustre/ldlm/ldlm_resource.c
lustre/liblustre/file.c
lustre/liblustre/super.c
lustre/llite/.cvsignore
lustre/llite/Makefile.am
lustre/llite/dcache.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/iod.c
lustre/llite/llite_internal.h
lustre/llite/lproc_llite.c
lustre/llite/namei.c
lustre/llite/rw.c
lustre/llite/super.c
lustre/llite/super25.c
lustre/llite/symlink.c
lustre/lov/.cvsignore
lustre/lov/Makefile.am
lustre/lov/lov_obd.c
lustre/lov/lov_pack.c
lustre/lov/lproc_lov.c
lustre/mdc/.cvsignore
lustre/mdc/lproc_mdc.c
lustre/mdc/mdc_internal.h
lustre/mdc/mdc_lib.c
lustre/mdc/mdc_reint.c
lustre/mdc/mdc_request.c
lustre/mds/.cvsignore
lustre/mds/Makefile.mk
lustre/mds/handler.c
lustre/mds/lproc_mds.c
lustre/mds/mds_fs.c
lustre/mds/mds_internal.h
lustre/mds/mds_lib.c
lustre/mds/mds_lov.c
lustre/mds/mds_open.c
lustre/mds/mds_reint.c
lustre/obdclass/.cvsignore
lustre/obdclass/Makefile.am
lustre/obdclass/class_obd.c
lustre/obdclass/fsfilt.c
lustre/obdclass/fsfilt_ext3.c
lustre/obdclass/fsfilt_extN.c
lustre/obdclass/fsfilt_reiserfs.c
lustre/obdclass/lprocfs_status.c
lustre/obdclass/lustre_handles.c
lustre/obdclass/lustre_peer.c
lustre/obdclass/simple.c
lustre/obdclass/statfs_pack.c
lustre/obdecho/.cvsignore
lustre/obdecho/echo.c
lustre/obdecho/echo_client.c
lustre/obdecho/lproc_echo.c
lustre/obdfilter/.cvsignore
lustre/obdfilter/Makefile.am
lustre/obdfilter/filter.c
lustre/obdfilter/lproc_obdfilter.c
lustre/osc/.cvsignore
lustre/osc/lproc_osc.c
lustre/osc/osc_lib.c
lustre/osc/osc_request.c
lustre/ost/.cvsignore
lustre/ost/lproc_ost.c
lustre/ost/ost_handler.c
lustre/portals/.cvsignore
lustre/portals/Kernelenv.in
lustre/portals/Kernelenv.mk
lustre/portals/Makefile.mk
lustre/portals/archdep.m4
lustre/portals/include/config.h.in
lustre/portals/include/linux/kp30.h
lustre/portals/include/linux/portals_compat25.h
lustre/portals/include/portals/list.h
lustre/portals/include/portals/lltrace.h
lustre/portals/include/portals/myrnal.h
lustre/portals/include/portals/nal.h
lustre/portals/include/portals/ppid.h
lustre/portals/include/portals/stringtab.h
lustre/portals/include/portals/types.h
lustre/portals/knals/.cvsignore
lustre/portals/knals/Makefile.mk
lustre/portals/knals/gmnal/gmnal.c
lustre/portals/knals/scimacnal/scimacnal.c
lustre/portals/knals/socknal/.cvsignore
lustre/portals/knals/socknal/Makefile.mk
lustre/portals/knals/toenal/toenal.c
lustre/portals/knals/toenal/toenal_cb.c
lustre/portals/libcfs/.cvsignore
lustre/portals/libcfs/Makefile.mk
lustre/portals/libcfs/debug.c
lustre/portals/libcfs/module.c
lustre/portals/portals/.cvsignore
lustre/portals/portals/Makefile.mk
lustre/portals/portals/api-init.c
lustre/portals/portals/lib-move.c
lustre/portals/router/.cvsignore
lustre/portals/router/Makefile.mk
lustre/portals/router/router.c
lustre/portals/tests/.cvsignore
lustre/portals/tests/ping_cli.c
lustre/portals/tests/ping_srv.c
lustre/portals/tests/sping_cli.c
lustre/portals/tests/sping_srv.c
lustre/portals/unals/debug.c
lustre/portals/utils/.cvsignore
lustre/portals/utils/debug.c
lustre/portals/utils/portals.c
lustre/ptlbd/client.c
lustre/ptlbd/main.c
lustre/ptlbd/server.c
lustre/ptlrpc/.cvsignore
lustre/ptlrpc/Makefile.am
lustre/ptlrpc/client.c
lustre/ptlrpc/lproc_ptlrpc.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/pinger.c
lustre/ptlrpc/ptlrpc_internal.h
lustre/ptlrpc/ptlrpc_lib.c
lustre/ptlrpc/ptlrpc_module.c
lustre/ptlrpc/recover.c
lustre/ptlrpc/service.c
lustre/scripts/lustre.spec.in
lustre/tests/.cvsignore
lustre/tests/Makefile.am
lustre/tests/acceptance-metadata-double.sh
lustre/tests/acceptance-metadata-single.sh
lustre/tests/acceptance-small.sh
lustre/tests/cobd.sh
lustre/tests/create.pl
lustre/tests/directio.c
lustre/tests/echo.sh
lustre/tests/fsx.c
lustre/tests/leak_finder.pl
lustre/tests/lkcdmap
lustre/tests/llecho.sh
lustre/tests/llmount.sh
lustre/tests/local.sh
lustre/tests/lov.sh
lustre/tests/mount2.sh
lustre/tests/mount2fs.sh
lustre/tests/opendevunlink.c
lustre/tests/openfile.c
lustre/tests/openunlink.c
lustre/tests/recovery-cleanup.sh
lustre/tests/recovery-small.sh
lustre/tests/rename.pl
lustre/tests/runas.c
lustre/tests/rundbench
lustre/tests/runobdstat
lustre/tests/runregression-brw.sh
lustre/tests/runtests
lustre/tests/runvmstat
lustre/tests/sanity.sh
lustre/tests/sanityN.sh
lustre/tests/uml.sh
lustre/tests/utime.c
lustre/utils/.cvsignore
lustre/utils/Lustre/__init__.py
lustre/utils/lactive
lustre/utils/lconf
lustre/utils/lctl.c
lustre/utils/lfind.c
lustre/utils/lmc
lustre/utils/obdbarrier.c
lustre/utils/obdio.c
lustre/utils/obdiolib.c
lustre/utils/obdiolib.h
lustre/utils/wirecheck.c

index 99ac885..c1a9bdf 100644 (file)
@@ -6,3 +6,4 @@ autom4te.cache
 config.log
 config.status
 configure
 config.log
 config.status
 configure
+.*.o.cmd
index 29a713f..7a48c58 100644 (file)
@@ -1 +1,6 @@
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+# portals/utils/debug.c wants <linux/version.h> from userspace.  sigh.
+HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS)
+LIBREADLINE := @LIBREADLINE@
+# 2.5's makefiles aren't nice to cross dir libraries in host programs
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
index 29a713f..7c66dfa 100644 (file)
@@ -1 +1,4 @@
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+HOSTCFLAGS := $(EXTRA_CFLAGS)
+# the kernel doesn't want us to build archives for host binaries :/
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
index be0e51a..73a19df 100644 (file)
@@ -1,6 +1,12 @@
-include fs/lustre/portals/Kernelenv
+include $(src)/Kernelenv
 
 
-obj-y += portals/
+# The ordering of these determines the order that each subsystem's 
+# module_init() functions are called in.  if these are changed make sure
+# they reflect the dependencies between each subsystem's _init functions.
 obj-y += libcfs/
 obj-y += libcfs/
-obj-y += knals/
+obj-y += portals/
 obj-y += router/
 obj-y += router/
+obj-y += knals/
+obj-y += tests/
+
+obj-m += utils/
index 7a4e05c..1a7741b 100644 (file)
@@ -11,8 +11,13 @@ AC_ARG_WITH(lib, [  --with-lib compile lustre library], host_cpu="lib")
 
 AC_ARG_WITH(linux, [  --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux)
 AC_SUBST(LINUX)
 
 AC_ARG_WITH(linux, [  --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux)
 AC_SUBST(LINUX)
+if test x$enable_inkernel = xyes ; then
+        echo ln -s `pwd` $LINUX/fs/lustre
+        rm $LINUX/fs/lustre
+        ln -s `pwd` $LINUX/fs/lustre
+fi
 
 
-# --------- UML?  --------------------
+#  --------------------
 AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...)
 if test $host_cpu = "lib" ; then 
         host_cpu="lib"
 AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...)
 if test $host_cpu = "lib" ; then 
         host_cpu="lib"
@@ -111,6 +116,13 @@ case ${host_cpu} in
         MOD_LINK=elf64_ia64
 ;;
 
         MOD_LINK=elf64_ia64
 ;;
 
+       x86_64 )
+       AC_MSG_RESULT($host_cpu)
+        KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -fomit-frame-pointer -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -finline-limit=2000 -fno-strength-reduce -fno-asynchronous-unwind-tables'
+       KCPPFLAGS='-D__KERNEL__ -DMODULE'
+        MOD_LINK=elf_x86_64
+;;
+
        sparc64 )
        AC_MSG_RESULT($host_cpu)
         KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs'
        sparc64 )
        AC_MSG_RESULT($host_cpu)
         KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs'
@@ -160,21 +172,33 @@ if test $host_cpu != "lib" ; then
       AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.)
   fi
 
       AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.)
   fi
 
-# ------------ RELEASE and moduledir ------------------
+# ------------ LINUXRELEASE and moduledir ------------------
   AC_MSG_CHECKING(for Linux release)
   
   dnl We need to rid ourselves of the nasty [ ] quotes.
   changequote(, )
   dnl Get release from version.h
   AC_MSG_CHECKING(for Linux release)
   
   dnl We need to rid ourselves of the nasty [ ] quotes.
   changequote(, )
   dnl Get release from version.h
-  RELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
+  LINUXRELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
   changequote([, ])
   
   changequote([, ])
   
-  moduledir='$(libdir)/modules/'$RELEASE/kernel
+  moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel
   AC_SUBST(moduledir)
   
   modulefsdir='$(moduledir)/fs/$(PACKAGE)'
   AC_SUBST(modulefsdir)
   
   AC_SUBST(moduledir)
   
   modulefsdir='$(moduledir)/fs/$(PACKAGE)'
   AC_SUBST(modulefsdir)
   
+  AC_MSG_RESULT($LINUXRELEASE)
+  AC_SUBST(LINUXRELEASE)
+
+# ------------ RELEASE --------------------------------
+  AC_MSG_CHECKING(lustre release)
+  
+  dnl We need to rid ourselves of the nasty [ ] quotes.
+  changequote(, )
+  dnl Get release from version.h
+  RELEASE="`sed -ne 's/-/_/g' -e 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_]*\).*/\1/p' $LINUX/include/linux/version.h`_`date +%Y%m%d%H%M`"
+  changequote([, ])
+
   AC_MSG_RESULT($RELEASE)
   AC_SUBST(RELEASE)
 
   AC_MSG_RESULT($RELEASE)
   AC_SUBST(RELEASE)
 
@@ -302,7 +326,7 @@ AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib)
 # This needs to run after we've defined the KCPPFLAGS
 
 AC_MSG_CHECKING(for kernel version)
 # This needs to run after we've defined the KCPPFLAGS
 
 AC_MSG_CHECKING(for kernel version)
-AC_TRY_LINK([#define __KERNEL__
+AC_TRY_COMPILE([#define __KERNEL__
              #include <linux/sched.h>],
             [struct task_struct p;
              p.sighand = NULL;],
              #include <linux/sched.h>],
             [struct task_struct p;
              p.sighand = NULL;],
@@ -313,5 +337,5 @@ if test $RH_2_4_20 = 1; then
        AC_MSG_RESULT(redhat-2.4.20)
        CPPFLAGS="$CPPFLAGS -DCONFIG_RH_2_4_20"
 else
        AC_MSG_RESULT(redhat-2.4.20)
        CPPFLAGS="$CPPFLAGS -DCONFIG_RH_2_4_20"
 else
-       AC_MSG_RESULT($RELEASE)
+       AC_MSG_RESULT($LINUXRELEASE)
 fi 
 fi 
index 3aa6909..f9605ab 100644 (file)
@@ -1,5 +1,11 @@
 /* portals/include/config.h.in.  Generated from configure.in by autoheader.  */
 
 /* portals/include/config.h.in.  Generated from configure.in by autoheader.  */
 
+/* Compile with orphan support */
+#undef ENABLE_ORPHANS
+
+/* Use the Pinger */
+#undef ENABLE_PINGER
+
 /* Define to 1 if you have the <inttypes.h> header file. */
 #undef HAVE_INTTYPES_H
 
 /* Define to 1 if you have the <inttypes.h> header file. */
 #undef HAVE_INTTYPES_H
 
index ee3b9fc..2133391 100644 (file)
@@ -4,7 +4,6 @@
 #ifndef _KP30_INCLUDED
 #define _KP30_INCLUDED
 
 #ifndef _KP30_INCLUDED
 #define _KP30_INCLUDED
 
-
 #define PORTAL_DEBUG
 
 #ifndef offsetof
 #define PORTAL_DEBUG
 
 #ifndef offsetof
 
 #define LOWEST_BIT_SET(x)      ((x) & ~((x) - 1))
 
 
 #define LOWEST_BIT_SET(x)      ((x) & ~((x) - 1))
 
-#ifndef CONFIG_SMP
-# define smp_processor_id() 0
-#endif
-
 /*
  *  Debugging
  */
 /*
  *  Debugging
  */
@@ -24,39 +19,34 @@ extern unsigned int portal_subsystem_debug;
 extern unsigned int portal_stack;
 extern unsigned int portal_debug;
 extern unsigned int portal_printk;
 extern unsigned int portal_stack;
 extern unsigned int portal_debug;
 extern unsigned int portal_printk;
-/* Debugging subsystems  (8 bit ID)
- *
- * If you add debug subsystem #32, you need to send email to phil, because
- * you're going to break kernel subsystem debug filtering. */
-#define S_UNDEFINED    (0 << 24)
-#define S_MDC          (1 << 24)
-#define S_MDS          (2 << 24)
-#define S_OSC          (3 << 24)
-#define S_OST          (4 << 24)
-#define S_CLASS        (5 << 24)
-#define S_OBDFS        (6 << 24) /* obsolete */
-#define S_LLITE        (7 << 24)
-#define S_RPC          (8 << 24)
-#define S_EXT2OBD      (9 << 24) /* obsolete */
-#define S_PORTALS     (10 << 24)
-#define S_SOCKNAL     (11 << 24)
-#define S_QSWNAL      (12 << 24)
-#define S_PINGER      (13 << 24)
-#define S_FILTER      (14 << 24)
-#define S_TRACE       (15 << 24) /* obsolete */
-#define S_ECHO        (16 << 24)
-#define S_LDLM        (17 << 24)
-#define S_LOV         (18 << 24)
-#define S_GMNAL       (19 << 24)
-#define S_PTLROUTER   (20 << 24)
-#define S_COBD        (21 << 24)
-#define S_PTLBD       (22 << 24)
-#define S_LOG         (23 << 24)
-
-/* If you change these values, please keep portals/linux/utils/debug.c
+/* Debugging subsystems (32 bits, non-overlapping) */
+#define S_UNDEFINED    (1 << 0)
+#define S_MDC          (1 << 1)
+#define S_MDS          (1 << 2)
+#define S_OSC          (1 << 3)
+#define S_OST          (1 << 4)
+#define S_CLASS        (1 << 5)
+#define S_LOG          (1 << 6)
+#define S_LLITE        (1 << 7)
+#define S_RPC          (1 << 8)
+#define S_MGMT         (1 << 9)
+#define S_PORTALS     (1 << 10)
+#define S_SOCKNAL     (1 << 11)
+#define S_QSWNAL      (1 << 12)
+#define S_PINGER      (1 << 13)
+#define S_FILTER      (1 << 14)
+#define S_PTLBD       (1 << 15)
+#define S_ECHO        (1 << 16)
+#define S_LDLM        (1 << 17)
+#define S_LOV         (1 << 18)
+#define S_GMNAL       (1 << 19)
+#define S_PTLROUTER   (1 << 20)
+#define S_COBD        (1 << 21)
+
+/* If you change these values, please keep portals/utils/debug.c
  * up to date! */
 
  * up to date! */
 
-/* Debugging masks (24 bits, non-overlapping) */
+/* Debugging masks (32 bits, non-overlapping) */
 #define D_TRACE     (1 << 0) /* ENTRY/EXIT markers */
 #define D_INODE     (1 << 1)
 #define D_SUPER     (1 << 2)
 #define D_TRACE     (1 << 0) /* ENTRY/EXIT markers */
 #define D_INODE     (1 << 1)
 #define D_SUPER     (1 << 2)
@@ -80,20 +70,23 @@ extern unsigned int portal_printk;
 #define D_RPCTRACE  (1 << 20) /* for distributed debugging */
 #define D_VFSTRACE  (1 << 21)
 
 #define D_RPCTRACE  (1 << 20) /* for distributed debugging */
 #define D_VFSTRACE  (1 << 21)
 
-#ifndef __KERNEL__
-#define THREAD_SIZE 8192
+#ifdef __KERNEL__
+# include <linux/sched.h> /* THREAD_SIZE */
+#else
+# define THREAD_SIZE 8192
 #endif
 #endif
-#ifdef  __ia64__
-#define CDEBUG_STACK() (THREAD_SIZE -                                      \
+
+#ifdef __KERNEL__
+# ifdef  __ia64__
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
                         ((unsigned long)__builtin_dwarf_cfa() &            \
                          (THREAD_SIZE - 1)))
                         ((unsigned long)__builtin_dwarf_cfa() &            \
                          (THREAD_SIZE - 1)))
-#else
-#define CDEBUG_STACK() (THREAD_SIZE -                                      \
+# else
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
                         ((unsigned long)__builtin_frame_address(0) &       \
                          (THREAD_SIZE - 1)))
                         ((unsigned long)__builtin_frame_address(0) &       \
                          (THREAD_SIZE - 1)))
-#endif
+# endif
 
 
-#ifdef __KERNEL__
 #define CHECK_STACK(stack)                                                    \
         do {                                                                  \
                 if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) {    \
 #define CHECK_STACK(stack)                                                    \
         do {                                                                  \
                 if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) {    \
@@ -105,20 +98,21 @@ extern unsigned int portal_printk;
                       /*panic("LBUG");*/                                      \
                 }                                                             \
         } while (0)
                       /*panic("LBUG");*/                                      \
                 }                                                             \
         } while (0)
-#else
+#else /* __KERNEL__ */
 #define CHECK_STACK(stack) do { } while(0)
 #define CHECK_STACK(stack) do { } while(0)
-#endif
+#define CDEBUG_STACK (0L)
+#endif /* __KERNEL__ */
 
 #if 1
 #define CDEBUG(mask, format, a...)                                            \
 do {                                                                          \
 
 #if 1
 #define CDEBUG(mask, format, a...)                                            \
 do {                                                                          \
-        CHECK_STACK(CDEBUG_STACK());                                          \
+        CHECK_STACK(CDEBUG_STACK);                                            \
         if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) ||                      \
             (portal_debug & (mask) &&                                         \
         if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) ||                      \
             (portal_debug & (mask) &&                                         \
-             portal_subsystem_debug & (1 << (DEBUG_SUBSYSTEM >> 24))))        \
+             portal_subsystem_debug & DEBUG_SUBSYSTEM))                       \
                 portals_debug_msg(DEBUG_SUBSYSTEM, mask,                      \
                                   __FILE__, __FUNCTION__, __LINE__,           \
                 portals_debug_msg(DEBUG_SUBSYSTEM, mask,                      \
                                   __FILE__, __FUNCTION__, __LINE__,           \
-                                  CDEBUG_STACK(), format , ## a);             \
+                                  CDEBUG_STACK, format, ## a);                \
 } while (0)
 
 #define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
 } while (0)
 
 #define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
@@ -162,7 +156,6 @@ do {                                                                    \
 #define EXIT                            do { } while (0)
 #endif
 
 #define EXIT                            do { } while (0)
 #endif
 
-
 #ifdef __KERNEL__
 # include <linux/vmalloc.h>
 # include <linux/time.h>
 #ifdef __KERNEL__
 # include <linux/vmalloc.h>
 # include <linux/time.h>
@@ -210,7 +203,8 @@ static inline void our_cond_resched(void)
 #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
 
 #ifdef PORTAL_DEBUG
 #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
 
 #ifdef PORTAL_DEBUG
-extern void kportal_assertion_failed(char *expr,char *file,char *func,int line);
+extern void kportal_assertion_failed(char *expr, char *file, const char *func,
+                                     const int line);
 #define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__,  \
                                                         __FUNCTION__, __LINE__))
 #else
 #define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__,  \
                                                         __FUNCTION__, __LINE__))
 #else
@@ -560,14 +554,14 @@ extern struct prof_ent prof_ents[MAX_PROFS];
 #endif /* PORTALS_PROFILING */
 
 /* debug.c */
 #endif /* PORTALS_PROFILING */
 
 /* debug.c */
-void portals_run_lbug_upcall(char * file, char *fn, int line);
+void portals_run_lbug_upcall(char * file, const char *fn, const int line);
 void portals_debug_dumplog(void);
 int portals_debug_init(unsigned long bufsize);
 int portals_debug_cleanup(void);
 int portals_debug_clear_buffer(void);
 int portals_debug_mark_buffer(char *text);
 int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
 void portals_debug_dumplog(void);
 int portals_debug_init(unsigned long bufsize);
 int portals_debug_cleanup(void);
 int portals_debug_clear_buffer(void);
 int portals_debug_mark_buffer(char *text);
 int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
-                char *file, unsigned int size);
+                             char *file, unsigned int size);
 __s32 portals_debug_copy_to_user(char *buf, unsigned long len);
 #if (__GNUC__)
 /* Use the special GNU C __attribute__ hack to have the compiler check the
 __s32 portals_debug_copy_to_user(char *buf, unsigned long len);
 #if (__GNUC__)
 /* Use the special GNU C __attribute__ hack to have the compiler check the
@@ -578,13 +572,14 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len);
 # warning printf has been defined as a macro...
 # undef printf
 #endif
 # warning printf has been defined as a macro...
 # undef printf
 #endif
-void portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                        unsigned long stack, const char *format, ...)
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                       const int line, unsigned long stack,
+                       const char *format, ...)
         __attribute__ ((format (printf, 7, 8)));
 #else
         __attribute__ ((format (printf, 7, 8)));
 #else
-void portals_debug_msg (int subsys, int mask, char *file, char *fn,
-                        int line, unsigned long stack,
-                        const char *format, ...);
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                       const int line, unsigned long stack,
+                       const char *format, ...);
 #endif /* __GNUC__ */
 void portals_debug_set_level(unsigned int debug_level);
 
 #endif /* __GNUC__ */
 void portals_debug_set_level(unsigned int debug_level);
 
@@ -618,9 +613,9 @@ extern void kportal_blockallsigs (void);
 # define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0);
 # define PORTAL_FREE(a, b) do { free(a); } while (0);
 # define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \
 # define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0);
 # define PORTAL_FREE(a, b) do { free(a); } while (0);
 # define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \
-    printf ("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format,                    \
-            (subsys) >> 24, (mask), (long)time(0), file, fn, line,            \
-            getpid() , stack, ## a);
+    printf("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format,                    \
+           (subsys), (mask), (long)time(0), file, fn, line,                   \
+           getpid() , stack, ## a);
 #endif
 
 #ifndef CURRENT_TIME
 #endif
 
 #ifndef CURRENT_TIME
@@ -911,13 +906,13 @@ ptl_handle_ni_t *kportal_get_ni (int nal);
 void kportal_put_ni (int nal);
 
 #ifdef __CYGWIN__
 void kportal_put_ni (int nal);
 
 #ifdef __CYGWIN__
-#ifndef BITS_PER_LONG
-#if (~0UL) == 0xffffffffUL
-#define BITS_PER_LONG 32
-#else
-#define BITS_PER_LONG 64
-#endif
-#endif
+# ifndef BITS_PER_LONG
+#  if (~0UL) == 0xffffffffUL
+#   define BITS_PER_LONG 32
+#  else
+#   define BITS_PER_LONG 64
+#  endif
+# endif
 #endif
 
 #if (BITS_PER_LONG == 32 || __WORDSIZE == 32)
 #endif
 
 #if (BITS_PER_LONG == 32 || __WORDSIZE == 32)
index e28fbac..a7cb4d1 100644 (file)
@@ -1,13 +1,56 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _PORTALS_COMPAT_H
+#define _PORTALS_COMPAT_H
+
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+#if SPINLOCK_DEBUG
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
+#  define SIGNAL_MASK_ASSERT() \
+   LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC)
+# else
+#  define SIGNAL_MASK_ASSERT() \
+   LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC)
+# endif
+#else
+# define SIGNAL_MASK_ASSERT()
+#endif
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
-# define SIGNAL_MASK_LOCK(task, flags)                              \
+
+# define SIGNAL_MASK_LOCK(task, flags)                                  \
   spin_lock_irqsave(&task->sighand->siglock, flags)
   spin_lock_irqsave(&task->sighand->siglock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags)                            \
+# define SIGNAL_MASK_UNLOCK(task, flags)                                \
   spin_unlock_irqrestore(&task->sighand->siglock, flags)
   spin_unlock_irqrestore(&task->sighand->siglock, flags)
+# define USERMODEHELPER(path, argv, envp)                               \
+  call_usermodehelper(path, argv, envp, 1)
 # define RECALC_SIGPENDING         recalc_sigpending()
 # define RECALC_SIGPENDING         recalc_sigpending()
-#else
-# define SIGNAL_MASK_LOCK(task, flags)                              \
+# define CURRENT_SECONDS           get_seconds()
+
+#else /* 2.4.x */
+
+# define SIGNAL_MASK_LOCK(task, flags)                                  \
   spin_lock_irqsave(&task->sigmask_lock, flags)
   spin_lock_irqsave(&task->sigmask_lock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags)                            \
+# define SIGNAL_MASK_UNLOCK(task, flags)                                \
   spin_unlock_irqrestore(&task->sigmask_lock, flags)
   spin_unlock_irqrestore(&task->sigmask_lock, flags)
+# define USERMODEHELPER(path, argv, envp)                               \
+  call_usermodehelper(path, argv, envp)
 # define RECALC_SIGPENDING         recalc_sigpending(current)
 # define RECALC_SIGPENDING         recalc_sigpending(current)
+# define CURRENT_SECONDS           CURRENT_TIME
+
+#endif
+
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt "|%d", ## a, current->thread.extern_pid)
+#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt "|%d", ## a, current->thread.mode.tt.extern_pid)
+#else
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt, ## a)
 #endif
 #endif
+
+#endif /* _PORTALS_COMPAT_H */
index d78cad4..a70b465 100644 (file)
@@ -1,5 +1,3 @@
-/*
-*/
 #ifndef _P30_INTERNAL_H_
 #define _P30_INTERNAL_H_
 
 #ifndef _P30_INTERNAL_H_
 #define _P30_INTERNAL_H_
 
index 2b63312..78a1e2d 100644 (file)
@@ -1,6 +1,4 @@
 #ifndef _LINUX_LIST_H
 #ifndef _LINUX_LIST_H
-#define _LINUX_LIST_H
-
 
 /*
  * Simple doubly linked list implementation.
 
 /*
  * Simple doubly linked list implementation.
@@ -101,7 +99,9 @@ static inline void list_del_init(struct list_head *entry)
        __list_del(entry->prev, entry->next);
        INIT_LIST_HEAD(entry);
 }
        __list_del(entry->prev, entry->next);
        INIT_LIST_HEAD(entry);
 }
+#endif
 
 
+#ifndef list_for_each_entry
 /**
  * list_move - delete from one list and add as another's head
  * @list: the entry to move
 /**
  * list_move - delete from one list and add as another's head
  * @list: the entry to move
@@ -124,7 +124,10 @@ static inline void list_move_tail(struct list_head *list,
        __list_del(list->prev, list->next);
        list_add_tail(list, head);
 }
        __list_del(list->prev, list->next);
        list_add_tail(list, head);
 }
+#endif
 
 
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
 /**
  * list_empty - tests whether a list is empty
  * @head: the list to test.
 /**
  * list_empty - tests whether a list is empty
  * @head: the list to test.
index 7d1b304..d389aab 100644 (file)
@@ -2,7 +2,7 @@
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
  * Compile with:
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
  * Compile with:
- * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl 
+ * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
  */
 #ifndef __LTRACE_H_
 #define __LTRACE_H_
  */
 #ifndef __LTRACE_H_
 #define __LTRACE_H_
@@ -31,20 +31,20 @@ static inline int ltrace_write_file(char* fname)
         argv[0] = "debug_kernel";
         argv[1] = fname;
         argv[2] = "1";
         argv[0] = "debug_kernel";
         argv[1] = fname;
         argv[2] = "1";
-        
+
         fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
         fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
-        
+
         return jt_dbg_debug_kernel(3, argv);
 }
 
 static inline int ltrace_clear()
 {
         char* argv[1];
         return jt_dbg_debug_kernel(3, argv);
 }
 
 static inline int ltrace_clear()
 {
         char* argv[1];
-        
+
         argv[0] = "clear";
         argv[0] = "clear";
-        
+
         fprintf(stderr, "[ptlctl] %s\n", argv[0]);
         fprintf(stderr, "[ptlctl] %s\n", argv[0]);
-        
+
         return jt_dbg_clear_debug_buf(1, argv);
 }
 
         return jt_dbg_clear_debug_buf(1, argv);
 }
 
@@ -52,9 +52,9 @@ static inline int ltrace_mark(int indent_level, char* text)
 {
         char* argv[2];
         char mark_buf[PATH_MAX];
 {
         char* argv[2];
         char mark_buf[PATH_MAX];
-        
+
         snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
         snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
-        
+
         argv[0] = "mark";
         argv[1] = mark_buf;
         return jt_dbg_mark_debug_buf(2, argv);
         argv[0] = "mark";
         argv[1] = mark_buf;
         return jt_dbg_mark_debug_buf(2, argv);
@@ -65,9 +65,9 @@ static inline int ltrace_applymasks()
         char* argv[2];
         argv[0] = "list";
         argv[1] = "applymasks";
         char* argv[2];
         argv[0] = "list";
         argv[1] = "applymasks";
-        
+
         fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
         fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
-        
+
         return jt_dbg_list(2, argv);
 }
 
         return jt_dbg_list(2, argv);
 }
 
@@ -95,19 +95,19 @@ static inline int ltrace_start()
 #ifdef PORTALS_DEV_ID
         rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
 #endif
 #ifdef PORTALS_DEV_ID
         rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
 #endif
-        ltrace_filter("class"); 
+        ltrace_filter("class");
         ltrace_filter("socknal");
         ltrace_filter("socknal");
-        ltrace_filter("qswnal"); 
-        ltrace_filter("gmnal");  
-        ltrace_filter("portals");  
-        
-        ltrace_show("all_types");  
-        ltrace_filter("trace");  
-        ltrace_filter("malloc"); 
-        ltrace_filter("net"); 
-        ltrace_filter("page"); 
-        ltrace_filter("other"); 
-        ltrace_filter("info"); 
+        ltrace_filter("qswnal");
+        ltrace_filter("gmnal");
+        ltrace_filter("portals");
+
+        ltrace_show("all_types");
+        ltrace_filter("trace");
+        ltrace_filter("malloc");
+        ltrace_filter("net");
+        ltrace_filter("page");
+        ltrace_filter("other");
+        ltrace_filter("info");
         ltrace_applymasks();
 
         return rc;
         ltrace_applymasks();
 
         return rc;
@@ -146,7 +146,7 @@ static inline void ltrace_add_processnames(char* fname)
         struct timezone tz;
         int nob;
         int underuml = !not_uml();
         struct timezone tz;
         int nob;
         int underuml = !not_uml();
-        
+
         gettimeofday(&tv, &tz);
 
         nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
         gettimeofday(&tv, &tz);
 
         nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
@@ -167,7 +167,7 @@ static inline void ltrace_add_processnames(char* fname)
                                  "(%s:%d:%s() %d+%lu): ",
                                  "lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
         }
                                  "(%s:%d:%s() %d+%lu): ",
                                  "lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
         }
-         
+
         nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
         system(cmdbuf);
 }
         nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
         system(cmdbuf);
 }
index 12b1925..13790f7 100644 (file)
@@ -1,6 +1,3 @@
-/*
-*/
-
 #ifndef MYRNAL_H
 #define MYRNAL_H
 
 #ifndef MYRNAL_H
 #define MYRNAL_H
 
index 88be63c..7cb3ab7 100644 (file)
@@ -1,5 +1,3 @@
-/*
-*/
 #ifndef _NAL_H_
 #define _NAL_H_
 
 #ifndef _NAL_H_
 #define _NAL_H_
 
index 4727599..760f465 100644 (file)
@@ -1,6 +1,3 @@
-/*
- */
-
 #ifndef _INCppidh_
 #define _INCppidh_
 
 #ifndef _INCppidh_
 #define _INCppidh_
 
index c9683f7..33e4375 100644 (file)
@@ -1,5 +1,3 @@
 /*
 /*
-*/
-/*
  * stringtab.h
  */
  * stringtab.h
  */
index d4038b6..0269290 100644 (file)
@@ -2,14 +2,19 @@
 #define _P30_TYPES_H_
 
 #ifdef __linux__
 #define _P30_TYPES_H_
 
 #ifdef __linux__
-#include <asm/types.h>
-#include <asm/timex.h>
+# include <asm/types.h>
+# include <asm/timex.h>
 #else
 #else
-#include <sys/types.h>
+# include <sys/types.h>
 typedef u_int32_t __u32;
 typedef u_int64_t __u64;
 typedef u_int32_t __u32;
 typedef u_int64_t __u64;
-typedef unsigned long long cycles_t;
-static inline cycles_t get_cycles(void) { return 0; }
+#endif
+
+#ifdef __KERNEL__
+# include <linux/time.h>
+#else
+# include <sys/time.h>
+# define do_gettimeofday(tv) gettimeofday(tv, NULL)
 #endif
 
 typedef __u64 ptl_nid_t;
 #endif
 
 typedef __u64 ptl_nid_t;
@@ -31,7 +36,7 @@ typedef ptl_handle_any_t ptl_handle_md_t;
 typedef ptl_handle_any_t ptl_handle_me_t;
 
 #define PTL_HANDLE_NONE \
 typedef ptl_handle_any_t ptl_handle_me_t;
 
 #define PTL_HANDLE_NONE \
-((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
+    ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
 #define PTL_EQ_NONE PTL_HANDLE_NONE
 
 static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
 #define PTL_EQ_NONE PTL_HANDLE_NONE
 
 static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
@@ -108,17 +113,15 @@ typedef struct {
         ptl_handle_me_t unlinked_me;
         ptl_md_t mem_desc;
         ptl_hdr_data_t hdr_data;
         ptl_handle_me_t unlinked_me;
         ptl_md_t mem_desc;
         ptl_hdr_data_t hdr_data;
-        cycles_t  arrival_time;
+        struct timeval arrival_time;
         volatile ptl_seq_t sequence;
 } ptl_event_t;
 
         volatile ptl_seq_t sequence;
 } ptl_event_t;
 
-
 typedef enum {
         PTL_ACK_REQ,
         PTL_NOACK_REQ
 } ptl_ack_req_t;
 
 typedef enum {
         PTL_ACK_REQ,
         PTL_NOACK_REQ
 } ptl_ack_req_t;
 
-
 typedef struct {
         volatile ptl_seq_t sequence;
         ptl_size_t size;
 typedef struct {
         volatile ptl_seq_t sequence;
         ptl_size_t size;
@@ -130,7 +133,6 @@ typedef struct {
         ptl_eq_t *eq;
 } ptl_ni_t;
 
         ptl_eq_t *eq;
 } ptl_ni_t;
 
-
 typedef struct {
         int max_match_entries;    /* max number of match entries */
         int max_mem_descriptors;  /* max number of memory descriptors */
 typedef struct {
         int max_match_entries;    /* max number of match entries */
         int max_mem_descriptors;  /* max number of memory descriptors */
index 282522d..89a4aa6 100644 (file)
@@ -1,2 +1,3 @@
 Makefile
 Makefile.in
 Makefile
 Makefile.in
+.*.o.cmd
index ce40a60..cd5d9d6 100644 (file)
@@ -1,4 +1,4 @@
-include ../Kernelenv
+include $(obj)/../Kernelenv
 
 obj-y = socknal/
 
 obj-y = socknal/
-# more coming...
\ No newline at end of file
+# more coming...
index ceeea2a..0cffc15 100644 (file)
@@ -124,7 +124,7 @@ static nal_t *kgmnal_init(int interface, ptl_pt_index_t ptl_size,
         return &kgmnal_api;
 }
 
         return &kgmnal_api;
 }
 
-static void __exit
+static void /*__exit*/
 kgmnal_finalize(void)
 {
         struct list_head *tmp;
 kgmnal_finalize(void)
 {
         struct list_head *tmp;
index 1066d69..479cc2c 100644 (file)
@@ -112,7 +112,7 @@ static nal_t *kscimacnal_init(int interface, ptl_pt_index_t  ptl_size,
 
 
 /* Called by kernel at module unload time */
 
 
 /* Called by kernel at module unload time */
-static void __exit 
+static void /*__exit*/ 
 kscimacnal_finalize(void)
 {
         /* FIXME: How should the shutdown procedure really look? */
 kscimacnal_finalize(void)
 {
         /* FIXME: How should the shutdown procedure really look? */
index e995588..95973d6 100644 (file)
@@ -1,3 +1,4 @@
 .deps
 Makefile
 Makefile.in
 .deps
 Makefile
 Makefile.in
+.*.o.cmd
index 46edf01..5c1b366 100644 (file)
@@ -3,7 +3,7 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-include ../../Kernelenv
+include $(src)/../../Kernelenv
 
 obj-y += ksocknal.o
 ksocknal-objs    := socknal.o socknal_cb.o
 
 obj-y += ksocknal.o
 ksocknal-objs    := socknal.o socknal_cb.o
index 1f5dc38..77ee473 100644 (file)
@@ -379,7 +379,7 @@ ktoenal_cmd(struct portal_ioctl_data * data, void * private)
 }
 
 
 }
 
 
-void __exit
+void /*__exit*/
 ktoenal_module_fini (void)
 {
         CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
 ktoenal_module_fini (void)
 {
         CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
index ec37f6f..abd0731 100644 (file)
@@ -893,6 +893,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags)
                                 spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
                                 goto get_fmb;   /* => go get a fwd msg buffer */
                         default:
                                 spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
                                 goto get_fmb;   /* => go get a fwd msg buffer */
                         default:
+                                break;
                         }
                         /* Not Reached */
                         LBUG ();
                         }
                         /* Not Reached */
                         LBUG ();
@@ -934,6 +935,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags)
                 goto out;                       /* (later) */
 
         default:
                 goto out;                       /* (later) */
 
         default:
+                break;
         }
 
         /* Not Reached */
         }
 
         /* Not Reached */
index 67d1a3d..7fa686f 100644 (file)
@@ -2,3 +2,4 @@
 Makefile
 Makefile.in
 link-stamp
 Makefile
 Makefile.in
 link-stamp
+.*.o.cmd
index 3196ea2..9aa838f 100644 (file)
@@ -6,4 +6,4 @@
 include fs/lustre/portals/Kernelenv
 
 obj-y += libcfs.o
 include fs/lustre/portals/Kernelenv
 
 obj-y += libcfs.o
-licfs-objs    := module.o proc.o debug.o
\ No newline at end of file
+libcfs-objs    := module.o proc.o debug.o
index 8d26dbb..f37cd96 100644 (file)
@@ -571,8 +571,8 @@ int portals_debug_init(unsigned long bufsize)
         memset(debug_buf, 0, debug_size);
         debug_wrapped = 0;
 
         memset(debug_buf, 0, debug_size);
         debug_wrapped = 0;
 
-        printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
-               bufsize, debug_buf);
+        //printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
+               //bufsize, debug_buf);
         atomic_set(&debug_off_a, debug_off);
         notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier);
         debug_size = bufsize;
         atomic_set(&debug_off_a, debug_off);
         notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier);
         debug_size = bufsize;
@@ -632,9 +632,9 @@ int portals_debug_mark_buffer(char *text)
         if (debug_buf == NULL)
                 return -EINVAL;
 
         if (debug_buf == NULL)
                 return -EINVAL;
 
-        CDEBUG(0, "*******************************************************************************\n");
+        CDEBUG(0, "********************************************************\n");
         CDEBUG(0, "DEBUG MARKER: %s\n", text);
         CDEBUG(0, "DEBUG MARKER: %s\n", text);
-        CDEBUG(0, "*******************************************************************************\n");
+        CDEBUG(0, "********************************************************\n");
 
         return 0;
 }
 
         return 0;
 }
@@ -672,8 +672,8 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len)
 
 /* FIXME: I'm not very smart; someone smarter should make this better. */
 void
 
 /* FIXME: I'm not very smart; someone smarter should make this better. */
 void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                   unsigned long stack, const char *format, ...)
+portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                  const int line, unsigned long stack, const char *format, ...)
 {
         va_list       ap;
         unsigned long flags;
 {
         va_list       ap;
         unsigned long flags;
@@ -728,8 +728,8 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
         do_gettimeofday(&tv);
 
         prefix_nob = snprintf(debug_buf + debug_off, max_nob,
         do_gettimeofday(&tv);
 
         prefix_nob = snprintf(debug_buf + debug_off, max_nob,
-                              "%02x:%06x:%d:%lu.%06lu ",
-                              subsys >> 24, mask, smp_processor_id(),
+                              "%06x:%06x:%d:%lu.%06lu ",
+                              subsys, mask, smp_processor_id(),
                               tv.tv_sec, tv.tv_usec);
         max_nob -= prefix_nob;
 
                               tv.tv_sec, tv.tv_usec);
         max_nob -= prefix_nob;
 
@@ -752,7 +752,7 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
 
         va_start(ap, format);
         msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob,
 
         va_start(ap, format);
         msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob,
-                            max_nob, format, ap);
+                             max_nob, format, ap);
         max_nob -= msg_nob;
         va_end(ap);
 
         max_nob -= msg_nob;
         va_end(ap);
 
@@ -790,7 +790,7 @@ void portals_debug_set_level(unsigned int debug_level)
         portal_debug = debug_level;
 }
 
         portal_debug = debug_level;
 }
 
-void portals_run_lbug_upcall(char * file, char *fn, int line)
+void portals_run_lbug_upcall(char *file, const char *fn, const int line)
 {
         char *argv[6];
         char *envp[3];
 {
         char *argv[6];
         char *envp[3];
@@ -803,7 +803,7 @@ void portals_run_lbug_upcall(char * file, char *fn, int line)
         argv[0] = portals_upcall;
         argv[1] = "LBUG";
         argv[2] = file;
         argv[0] = portals_upcall;
         argv[1] = "LBUG";
         argv[2] = file;
-        argv[3] = fn;
+        argv[3] = (char *)fn;
         argv[4] = buf;
         argv[5] = NULL;
 
         argv[4] = buf;
         argv[5] = NULL;
 
index 14cc325..e8eb290 100644 (file)
@@ -62,10 +62,10 @@ static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
 struct semaphore nal_cmd_sem;
 
 #ifdef PORTAL_DEBUG
 struct semaphore nal_cmd_sem;
 
 #ifdef PORTAL_DEBUG
-void
-kportal_assertion_failed (char *expr, char *file, char *func, int line)
+void kportal_assertion_failed(char *expr, char *file, const char *func,
+                              const int line)
 {
 {
-        portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK(),
+        portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK,
                           "ASSERTION(%s) failed\n", expr);
         LBUG_WITH_LOC(file, func, line);
 }
                           "ASSERTION(%s) failed\n", expr);
         LBUG_WITH_LOC(file, func, line);
 }
index e995588..95973d6 100644 (file)
@@ -1,3 +1,4 @@
 .deps
 Makefile
 Makefile.in
 .deps
 Makefile
 Makefile.in
+.*.o.cmd
index 5627ef7..7822846 100644 (file)
@@ -3,7 +3,10 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-include ../Kernelenv
+include $(src)/../Kernelenv
 
 obj-y += portals.o
 
 obj-y += portals.o
-portals-objs    := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o lib-move.o lib-msg.o lib-ni.o lib-not-impl.o lib-pid.o api-eq.o api-errno.o api-init.o api-md.o api-me.o api-ni.o api-wrap.o
+portals-objs    :=     lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
+                       lib-move.o lib-msg.o lib-ni.o lib-pid.o \
+                       api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
+                       api-wrap.o
index e59c922..dc1fead 100644 (file)
@@ -26,7 +26,7 @@
 #include <portals/api-support.h>
 
 int ptl_init;
 #include <portals/api-support.h>
 
 int ptl_init;
-unsigned int portal_subsystem_debug = 0xfff7e3ff;
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL | S_GMNAL);
 unsigned int portal_debug = ~0;
 unsigned int portal_printk;
 unsigned int portal_stack;
 unsigned int portal_debug = ~0;
 unsigned int portal_printk;
 unsigned int portal_stack;
index fde4f16..02f8b60 100644 (file)
@@ -544,7 +544,7 @@ get_new_msg (nal_cb_t *nal, lib_md_t *md)
         msg->send_ack = 0;
 
         msg->md = md;
         msg->send_ack = 0;
 
         msg->md = md;
-        msg->ev.arrival_time = get_cycles();
+        do_gettimeofday(&msg->ev.arrival_time);
         md->pending++;
         if (md->threshold != PTL_MD_THRESH_INF) {
                 LASSERT (md->threshold > 0);
         md->pending++;
         if (md->threshold != PTL_MD_THRESH_INF) {
                 LASSERT (md->threshold > 0);
index e995588..95973d6 100644 (file)
@@ -1,3 +1,4 @@
 .deps
 Makefile
 Makefile.in
 .deps
 Makefile
 Makefile.in
+.*.o.cmd
index 64bd09b..9b02c03 100644 (file)
@@ -3,7 +3,7 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-include ../Kernelenv
+include $(src)/../Kernelenv
 
 obj-y += kptlrouter.o
 kptlrouter-objs    := router.o proc.o
 
 obj-y += kptlrouter.o
 kptlrouter-objs    := router.o proc.o
index 6074c3c..27a7fba 100644 (file)
@@ -23,8 +23,8 @@
 
 #include "router.h"
 
 
 #include "router.h"
 
-struct list_head kpr_routes;
-struct list_head kpr_nals;
+LIST_HEAD(kpr_routes);
+LIST_HEAD(kpr_nals);
 
 unsigned long long kpr_fwd_bytes;
 unsigned long      kpr_fwd_packets;
 
 unsigned long long kpr_fwd_bytes;
 unsigned long      kpr_fwd_packets;
@@ -35,7 +35,7 @@ atomic_t           kpr_queue_depth;
  *
  * Once in a blue moon we register/deregister NALs and add/remove routing
  * entries (thread context only)... */
  *
  * Once in a blue moon we register/deregister NALs and add/remove routing
  * entries (thread context only)... */
-rwlock_t         kpr_rwlock;
+rwlock_t         kpr_rwlock = RW_LOCK_UNLOCKED;
 
 kpr_router_interface_t kpr_router_interface = {
        kprri_register:         kpr_register_nal,
 
 kpr_router_interface_t kpr_router_interface = {
        kprri_register:         kpr_register_nal,
@@ -55,7 +55,7 @@ kpr_control_interface_t kpr_control_interface = {
 int
 kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
 {
 int
 kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
 {
-       long               flags;
+       unsigned long      flags;
        struct list_head  *e;
        kpr_nal_entry_t   *ne;
 
        struct list_head  *e;
        kpr_nal_entry_t   *ne;
 
@@ -98,7 +98,7 @@ kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
 void
 kpr_shutdown_nal (void *arg)
 {
 void
 kpr_shutdown_nal (void *arg)
 {
-       long             flags;
+       unsigned long    flags;
        kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
 
         CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
        kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
 
         CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
@@ -123,7 +123,7 @@ kpr_shutdown_nal (void *arg)
 void
 kpr_deregister_nal (void *arg)
 {
 void
 kpr_deregister_nal (void *arg)
 {
-       long              flags;
+       unsigned long     flags;
        kpr_nal_entry_t  *ne = (kpr_nal_entry_t *)arg;
 
         CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
        kpr_nal_entry_t  *ne = (kpr_nal_entry_t *)arg;
 
         CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
@@ -296,7 +296,7 @@ int
 kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
                ptl_nid_t hi_nid)
 {
 kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
                ptl_nid_t hi_nid)
 {
-       long               flags;
+       unsigned long      flags;
        struct list_head  *e;
        kpr_route_entry_t *re;
 
        struct list_head  *e;
        kpr_route_entry_t *re;
 
@@ -345,7 +345,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
 int
 kpr_del_route (ptl_nid_t nid)
 {
 int
 kpr_del_route (ptl_nid_t nid)
 {
-       long               flags;
+       unsigned long      flags;
        struct list_head  *e;
 
         CDEBUG(D_OTHER, "Del route "LPX64"\n", nid);
        struct list_head  *e;
 
         CDEBUG(D_OTHER, "Del route "LPX64"\n", nid);
@@ -398,7 +398,7 @@ kpr_get_route(int idx, int *gateway_nalid, ptl_nid_t *gateway_nid,
         return (-ENOENT);
 }
 
         return (-ENOENT);
 }
 
-static void __exit
+static void /*__exit*/
 kpr_finalise (void)
 {
         LASSERT (list_empty (&kpr_nals));
 kpr_finalise (void)
 {
         LASSERT (list_empty (&kpr_nals));
@@ -427,10 +427,6 @@ kpr_initialise (void)
         CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
                atomic_read(&portal_kmemory));
 
         CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
                atomic_read(&portal_kmemory));
 
-       rwlock_init(&kpr_rwlock);
-       INIT_LIST_HEAD(&kpr_routes);
-       INIT_LIST_HEAD(&kpr_nals);
-
         kpr_proc_init();
 
         PORTAL_SYMBOL_REGISTER(kpr_router_interface);
         kpr_proc_init();
 
         PORTAL_SYMBOL_REGISTER(kpr_router_interface);
index 051d1bd..d0c4c88 100644 (file)
@@ -1,3 +1,4 @@
 Makefile
 Makefile.in
 .deps
 Makefile
 Makefile.in
 .deps
+.*.o.cmd
index 389ffbb..4d04ffb 100644 (file)
@@ -260,7 +260,7 @@ pingcli_start(struct portal_ioctl_data *args)
 
 
 /* called by the portals_ioctl for ping requests */
 
 
 /* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
 {
         PORTAL_ALLOC (client, sizeof(struct pingcli_data));
         if (client == NULL)
 {
         PORTAL_ALLOC (client, sizeof(struct pingcli_data));
         if (client == NULL)
@@ -282,7 +282,7 @@ static int __init pingcli_init(void)
 } /* pingcli_init() */
 
 
 } /* pingcli_init() */
 
 
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
 {
         PORTAL_SYMBOL_UNREGISTER (kping_client);
 } /* pingcli_cleanup() */
 {
         PORTAL_SYMBOL_UNREGISTER (kping_client);
 } /* pingcli_cleanup() */
index 1037d09..873e11c 100644 (file)
 #include <asm/semaphore.h>
 
 #define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
 #include <asm/semaphore.h>
 
 #define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
-#define MAXSIZE (16*1024*1024)
+#define MAXSIZE (16*1024)
 
 static unsigned ping_head_magic;
 static unsigned ping_bulk_magic;
 
 static unsigned ping_head_magic;
 static unsigned ping_bulk_magic;
-static int nal  = 0;                            // Your NAL,
+static int nal  = SOCKNAL;                            // Your NAL,
 static unsigned long packets_valid = 0;         // Valid packets 
 static int running = 1;
 atomic_t pkt;
 static unsigned long packets_valid = 0;         // Valid packets 
 static int running = 1;
 atomic_t pkt;
@@ -282,7 +282,7 @@ static int __init pingsrv_init(void)
 } /* pingsrv_init() */
 
 
 } /* pingsrv_init() */
 
 
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
 {
         remove_proc_entry ("net/pingsrv", NULL);
         
 {
         remove_proc_entry ("net/pingsrv", NULL);
         
index 4cef08b..35e114b 100644 (file)
@@ -235,7 +235,7 @@ pingcli_start(struct portal_ioctl_data *args)
 
 
 /* called by the portals_ioctl for ping requests */
 
 
 /* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
 {
 
         PORTAL_ALLOC (client, sizeof(struct pingcli_data));
 {
 
         PORTAL_ALLOC (client, sizeof(struct pingcli_data));
@@ -258,7 +258,7 @@ static int __init pingcli_init(void)
 } /* pingcli_init() */
 
 
 } /* pingcli_init() */
 
 
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
 {
         PORTAL_SYMBOL_UNREGISTER (kping_client);
 } /* pingcli_cleanup() */
 {
         PORTAL_SYMBOL_UNREGISTER (kping_client);
 } /* pingcli_cleanup() */
index a18ea35..2b45a46 100644 (file)
@@ -269,7 +269,7 @@ static int __init pingsrv_init(void)
 } /* pingsrv_init() */
 
 
 } /* pingsrv_init() */
 
 
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
 {
         remove_proc_entry ("net/pingsrv", NULL);
         
 {
         remove_proc_entry ("net/pingsrv", NULL);
         
index 529bb2d..b73f042 100644 (file)
@@ -84,8 +84,8 @@ int portals_debug_copy_to_user(char *buf, unsigned long len)
 
 /* FIXME: I'm not very smart; someone smarter should make this better. */
 void
 
 /* FIXME: I'm not very smart; someone smarter should make this better. */
 void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                   const char *format, ...)
+portals_debug_msg (int subsys, int mask, char *file, const char *fn, 
+                   const int line, const char *format, ...)
 {
         va_list       ap;
         unsigned long flags;
 {
         va_list       ap;
         unsigned long flags;
index 529bb2d..b73f042 100644 (file)
@@ -84,8 +84,8 @@ int portals_debug_copy_to_user(char *buf, unsigned long len)
 
 /* FIXME: I'm not very smart; someone smarter should make this better. */
 void
 
 /* FIXME: I'm not very smart; someone smarter should make this better. */
 void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                   const char *format, ...)
+portals_debug_msg (int subsys, int mask, char *file, const char *fn, 
+                   const int line, const char *format, ...)
 {
         va_list       ap;
         unsigned long flags;
 {
         va_list       ap;
         unsigned long flags;
index 148310a..8e474ad 100644 (file)
@@ -5,4 +5,5 @@ debugctl
 ptlctl
 .deps
 routerstat
 ptlctl
 .deps
 routerstat
-wirecheck
\ No newline at end of file
+wirecheck
+.*.cmd
index 9ab1c73..0a009d2 100644 (file)
@@ -53,17 +53,18 @@ static char rawbuf[8192];
 static char *buf = rawbuf;
 static int max = 8192;
 //static int g_pfd = -1;
 static char *buf = rawbuf;
 static int max = 8192;
 //static int g_pfd = -1;
-static int subsystem_array[1 << 8];
+static int subsystem_mask = ~0;
 static int debug_mask = ~0;
 
 static const char *portal_debug_subsystems[] =
 static int debug_mask = ~0;
 
 static const char *portal_debug_subsystems[] =
-        {"undefined", "mdc", "mds", "osc", "ost", "class", "obdfs", "llite",
-         "rpc", "ext2obd", "portals", "socknal", "qswnal", "pinger", "filter",
-         "obdtrace", "echo", "ldlm", "lov", "gmnal", "router", "ptldb", NULL};
+        {"undefined", "mdc", "mds", "osc", "ost", "class", "log", "llite",
+         "rpc", "mgmt", "portals", "socknal", "qswnal", "pinger", "filter",
+         "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", NULL};
 static const char *portal_debug_masks[] =
         {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
          "blocks", "net", "warning", "buffs", "other", "dentry", "portals",
 static const char *portal_debug_masks[] =
         {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
          "blocks", "net", "warning", "buffs", "other", "dentry", "portals",
-         "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", NULL};
+         "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace",
+         NULL};
 
 struct debug_daemon_cmd {
         char *cmd;
 
 struct debug_daemon_cmd {
         char *cmd;
@@ -88,7 +89,10 @@ static int do_debug_mask(char *name, int enable)
                         printf("%s output from subsystem \"%s\"\n",
                                 enable ? "Enabling" : "Disabling",
                                 portal_debug_subsystems[i]);
                         printf("%s output from subsystem \"%s\"\n",
                                 enable ? "Enabling" : "Disabling",
                                 portal_debug_subsystems[i]);
-                        subsystem_array[i] = enable;
+                        if (enable)
+                                subsystem_mask |= (1 << i);
+                        else
+                                subsystem_mask &= ~(1 << i);
                         found = 1;
                 }
         }
                         found = 1;
                 }
         }
@@ -111,7 +115,6 @@ static int do_debug_mask(char *name, int enable)
 
 int dbg_initialize(int argc, char **argv)
 {
 
 int dbg_initialize(int argc, char **argv)
 {
-        memset(subsystem_array, 1, sizeof(subsystem_array));
         return 0;
 }
 
         return 0;
 }
 
@@ -213,12 +216,7 @@ int jt_dbg_list(int argc, char **argv)
                 for (i = 0; portal_debug_masks[i] != NULL; i++)
                         printf(", %s", portal_debug_masks[i]);
                 printf("\n");
                 for (i = 0; portal_debug_masks[i] != NULL; i++)
                         printf(", %s", portal_debug_masks[i]);
                 printf("\n");
-        }
-        else if (strcasecmp(argv[1], "applymasks") == 0) {
-                unsigned int subsystem_mask = 0;
-                for (i = 0; portal_debug_subsystems[i] != NULL; i++) {
-                        if (subsystem_array[i]) subsystem_mask |= (1 << i);
-                }
+        } else if (strcasecmp(argv[1], "applymasks") == 0) {
                 applymask_all(subsystem_mask, debug_mask);
         }
         return 0;
                 applymask_all(subsystem_mask, debug_mask);
         }
         return 0;
@@ -230,12 +228,6 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw)
 {
         char *p, *z;
         unsigned long subsystem, debug, dropped = 0, kept = 0;
 {
         char *p, *z;
         unsigned long subsystem, debug, dropped = 0, kept = 0;
-        int max_sub, max_type;
-
-        for (max_sub = 0; portal_debug_subsystems[max_sub] != NULL; max_sub++)
-                ;
-        for (max_type = 0; portal_debug_masks[max_type] != NULL; max_type++)
-                ;
 
         while (size) {
                 p = memchr(buf, '\n', size);
 
         while (size) {
                 p = memchr(buf, '\n', size);
@@ -247,8 +239,7 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw)
                 z++;
                 /* for some reason %*s isn't working. */
                 *p = '\0';
                 z++;
                 /* for some reason %*s isn't working. */
                 *p = '\0';
-                if (subsystem < max_sub &&
-                    subsystem_array[subsystem] &&
+                if ((subsystem_mask & subsystem) &&
                     (!debug || (debug_mask & debug))) {
                         if (raw)
                                 fprintf(fd, "%s\n", buf);
                     (!debug || (debug_mask & debug))) {
                         if (raw)
                                 fprintf(fd, "%s\n", buf);
@@ -551,6 +542,8 @@ int jt_dbg_modules(int argc, char **argv)
                 {"mds_ext3", "lustre/mds"},
                 {"mds_extN", "lustre/mds"},
                 {"ptlbd", "lustre/ptlbd"},
                 {"mds_ext3", "lustre/mds"},
                 {"mds_extN", "lustre/mds"},
                 {"ptlbd", "lustre/ptlbd"},
+                {"mgmt_svc", "lustre/mgmt"},
+                {"mgmt_cli", "lustre/mgmt"},
                 {NULL, NULL}
         };
         char *path = "..";
                 {NULL, NULL}
         };
         char *path = "..";
index 90d66f5..a89f4f7 100644 (file)
@@ -22,6 +22,7 @@
 
 #include <stdio.h>
 #include <sys/types.h>
 
 #include <stdio.h>
 #include <sys/types.h>
+#include <netdb.h>
 #include <sys/socket.h>
 #include <netinet/tcp.h>
 #include <netdb.h>
 #include <sys/socket.h>
 #include <netinet/tcp.h>
 #include <netdb.h>
@@ -106,6 +107,27 @@ nal2name (int nal)
         return ((e == NULL) ? "???" : e->name);
 }
 
         return ((e == NULL) ? "???" : e->name);
 }
 
+static struct hostent *
+ptl_gethostbyname(char * hname) {
+        struct hostent *he;
+        he = gethostbyname(hname);
+        if (!he) {
+                switch(h_errno) {
+                case HOST_NOT_FOUND:
+                case NO_ADDRESS:
+                        fprintf(stderr, "Unable to resolve hostname: %s\n",
+                                hname);
+                        break;
+                default:
+                        fprintf(stderr, "gethostbyname error: %s\n",
+                                strerror(errno));
+                        break;
+                }
+                return NULL;
+        }
+        return he;
+}
+
 int
 ptl_parse_nid (ptl_nid_t *nidp, char *str)
 {
 int
 ptl_parse_nid (ptl_nid_t *nidp, char *str)
 {
@@ -127,7 +149,7 @@ ptl_parse_nid (ptl_nid_t *nidp, char *str)
         
         if ((('a' <= str[0] && str[0] <= 'z') ||
              ('A' <= str[0] && str[0] <= 'Z')) &&
         
         if ((('a' <= str[0] && str[0] <= 'z') ||
              ('A' <= str[0] && str[0] <= 'Z')) &&
-             (he = gethostbyname (str)) != NULL)
+             (he = ptl_gethostbyname (str)) != NULL)
         {
                 __u32 addr = *(__u32 *)he->h_addr;
 
         {
                 __u32 addr = *(__u32 *)he->h_addr;
 
@@ -351,12 +373,9 @@ int jt_ptl_connect(int argc, char **argv)
                         goto usage;
                 }
 
                         goto usage;
                 }
 
-                he = gethostbyname(argv[1]);
-                if (!he) {
-                        fprintf(stderr, "gethostbyname error: %s\n",
-                                strerror(errno));
+                he = ptl_gethostbyname(argv[1]);
+                if (!he)
                         return -1;
                         return -1;
-                }
 
                 g_port = atol(argv[2]);
 
 
                 g_port = atol(argv[2]);
 
@@ -525,12 +544,9 @@ int jt_ptl_disconnect(int argc, char **argv)
 
                 PORTAL_IOC_INIT(data);
                 if (argc == 2) {
 
                 PORTAL_IOC_INIT(data);
                 if (argc == 2) {
-                        he = gethostbyname(argv[1]);
-                        if (!he) {
-                                fprintf(stderr, "gethostbyname error: %s\n",
-                                        strerror(errno));
+                        he = ptl_gethostbyname(argv[1]);
+                        if (!he) 
                                 return -1;
                                 return -1;
-                        }
                         
                         data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
 
                         
                         data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
 
@@ -582,12 +598,9 @@ int jt_ptl_push_connection (int argc, char **argv)
 
                 PORTAL_IOC_INIT(data);
                 if (argc == 2) {
 
                 PORTAL_IOC_INIT(data);
                 if (argc == 2) {
-                        he = gethostbyname(argv[1]);
-                        if (!he) {
-                                fprintf(stderr, "gethostbyname error: %s\n",
-                                        strerror(errno));
+                        he = ptl_gethostbyname(argv[1]);
+                        if (!he)
                                 return -1;
                                 return -1;
-                        }
                         
                         data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
 
                         
                         data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
 
index 776ef36..a8a5356 100644 (file)
@@ -15,4 +15,4 @@ cscope.files
 cscope.out
 autom4te-2.53.cache
 autom4te.cache
 cscope.out
 autom4te-2.53.cache
 autom4te.cache
-
+.*.o.cmd
index 89eaef7..17c08c6 100644 (file)
@@ -1,4 +1,14 @@
 tbd
 tbd
+       * version v0_8
+       * bug fixes
+        - orphans are moved into the PENDING directory for possible recovery
+        - replayed opens now open by fid for orphan/rename safety (1042)
+        - last close of an orphan inode generates a transno (683)
+       - chdir() and mount() now pin the directory entry (1020)
+       - avoid CERROR in normal ll_setattr_raw() error case (1500)
+       - discard very old requests without processing them (1502)
+
+2003-06-15  Phil Schwan  <phil@clusterfs.com>
        * version v0_7
        * bug fixes
        - imports and exports cleanup too early, need refcounts (349, 879, 1045)
        * version v0_7
        * bug fixes
        - imports and exports cleanup too early, need refcounts (349, 879, 1045)
index e540148..59178a4 100644 (file)
@@ -1,4 +1,22 @@
-include fs/lustre/portals/Kernelenv
+include $(src)/portals/Kernelenv
+
+# for scripts/version_tag.pl
+LINUX = @LINUX@
 
 obj-y += portals/
 
 obj-y += portals/
+# obdclass has to come before anything that does class_register..
+obj-y += obdclass/
+obj-y += ptlrpc/
+obj-y += ldlm/
+obj-y += obdfilter/
+obj-y += mdc/
 obj-y += mds/
 obj-y += mds/
+obj-y += obdecho/
+obj-y += osc/
+obj-y += ost/
+obj-y += lov/
+obj-y += llite/
+
+# portals needs to be before utils/, which pulls in ptlctl objects
+obj-m += utils/
+obj-m += tests/ 
index 5efb545..2d3549b 100644 (file)
@@ -36,13 +36,13 @@ static int cobd_attach(struct obd_device *dev, obd_count len, void *data)
 {
         struct lprocfs_static_vars lvars;
 
 {
         struct lprocfs_static_vars lvars;
 
-        lprocfs_init_vars(&lvars);
-       return lprocfs_obd_attach(dev, lvars.obd_vars);
+        lprocfs_init_vars(cobd, &lvars);
+        return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
 static int cobd_detach(struct obd_device *dev)
 {
 }
 
 static int cobd_detach(struct obd_device *dev)
 {
-       return lprocfs_obd_detach(dev);
+        return lprocfs_obd_detach(dev);
 }
 
 static int
 }
 
 static int
@@ -82,24 +82,23 @@ cobd_setup (struct obd_device *dev, obd_count len, void *buf)
         return (0);
 
  fail_0:
         return (0);
 
  fail_0:
-        obd_disconnect (&cobd->cobd_target, 0 );
+        obd_disconnect(&cobd->cobd_target, 0);
         return (rc);
 }
 
         return (rc);
 }
 
-static int
-cobd_cleanup (struct obd_device *dev, int force, int failover)
+static int cobd_cleanup(struct obd_device *dev, int flags)
 {
         struct cache_obd  *cobd = &dev->u.cobd;
         int                rc;
 
 {
         struct cache_obd  *cobd = &dev->u.cobd;
         int                rc;
 
-        if (!list_empty (&dev->obd_exports))
+        if (!list_empty(&dev->obd_exports))
                 return (-EBUSY);
 
                 return (-EBUSY);
 
-        rc = obd_disconnect (&cobd->cobd_cache, failover);
+        rc = obd_disconnect(&cobd->cobd_cache, flags);
         if (rc != 0)
                 CERROR ("error %d disconnecting cache\n", rc);
 
         if (rc != 0)
                 CERROR ("error %d disconnecting cache\n", rc);
 
-        rc = obd_disconnect (&cobd->cobd_target, failover);
+        rc = obd_disconnect(&cobd->cobd_target, flags);
         if (rc != 0)
                 CERROR ("error %d disconnecting target\n", rc);
 
         if (rc != 0)
                 CERROR ("error %d disconnecting target\n", rc);
 
@@ -116,13 +115,12 @@ cobd_connect (struct lustre_handle *conn, struct obd_device *obd,
         return (rc);
 }
 
         return (rc);
 }
 
-static int
-cobd_disconnect (struct lustre_handle *conn, int failover)
+static int cobd_disconnect(struct lustre_handle *conn, int flags)
 {
 {
-       int rc = class_disconnect (conn, failover);
+        int rc = class_disconnect(conn, flags);
 
         CERROR ("rc %d\n", rc);
 
         CERROR ("rc %d\n", rc);
-       return (rc);
+        return (rc);
 }
 
 static int
 }
 
 static int
@@ -144,23 +142,15 @@ cobd_get_info(struct lustre_handle *conn, obd_count keylen,
         return obd_get_info(&cobd->cobd_target, keylen, key, vallen, val);
 }
 
         return obd_get_info(&cobd->cobd_target, keylen, key, vallen, val);
 }
 
-static int cobd_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+static int cobd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                       unsigned long max_age)
 {
 {
-        struct obd_export *cobd_exp;
-        int rc;
-
-        if (exp->exp_obd == NULL)
-                return -EINVAL;
-
-        cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
-        rc = obd_statfs(cobd_exp, osfs);
-        class_export_put(cobd_exp);
-        return rc;
+        return obd_statfs(class_conn2obd(&obd->u.cobd.cobd_target), osfs,
+                          max_age);
 }
 
 }
 
-static int
-cobd_getattr(struct lustre_handle *conn, struct obdo *oa,
-             struct lov_stripe_md *lsm)
+static int cobd_getattr(struct lustre_handle *conn, struct obdo *oa,
+                        struct lov_stripe_md *lsm)
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct cache_obd  *cobd;
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct cache_obd  *cobd;
@@ -207,11 +197,10 @@ cobd_close(struct lustre_handle *conn, struct obdo *oa,
         return (obd_close (&cobd->cobd_target, oa, lsm, oti));
 }
 
         return (obd_close (&cobd->cobd_target, oa, lsm, oti));
 }
 
-static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
+static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
                        int objcount, struct obd_ioobj *obj,
                        int niocount, struct niobuf_remote *nb,
                        int objcount, struct obd_ioobj *obj,
                        int niocount, struct niobuf_remote *nb,
-                       struct niobuf_local *res, void **desc_private,
-                       struct obd_trans_info *oti)
+                       struct niobuf_local *res, struct obd_trans_info *oti)
 {
         struct obd_export *cobd_exp;
         int rc;
 {
         struct obd_export *cobd_exp;
         int rc;
@@ -223,16 +212,17 @@ static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
                 return -EOPNOTSUPP;
 
         cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
                 return -EOPNOTSUPP;
 
         cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
-        rc = obd_preprw(cmd, cobd_exp, obdo, objcount, obj, niocount, nb, res,
-                        desc_private, oti);
+        rc = obd_preprw(cmd, cobd_exp, oa, objcount, obj, niocount, nb, res,
+                        oti);
         class_export_put(cobd_exp);
         class_export_put(cobd_exp);
+
         return rc;
 }
 
         return rc;
 }
 
-static int cobd_commitrw(int cmd, struct obd_export *exp,
+static int cobd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa,
                          int objcount, struct obd_ioobj *obj,
                          int niocount, struct niobuf_local *local,
                          int objcount, struct obd_ioobj *obj,
                          int niocount, struct niobuf_local *local,
-                         void *desc_private, struct obd_trans_info *oti)
+                         struct obd_trans_info *oti)
 {
         struct obd_export *cobd_exp;
         int rc;
 {
         struct obd_export *cobd_exp;
         int rc;
@@ -244,16 +234,14 @@ static int cobd_commitrw(int cmd, struct obd_export *exp,
                 return -EOPNOTSUPP;
 
         cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
                 return -EOPNOTSUPP;
 
         cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
-        rc = obd_commitrw(cmd, cobd_exp, objcount, obj, niocount, local,
-                          desc_private, oti);
+        rc = obd_commitrw(cmd, cobd_exp, oa, objcount, obj,niocount,local,oti);
         class_export_put(cobd_exp);
         return rc;
 }
 
         class_export_put(cobd_exp);
         return rc;
 }
 
-static inline int
-cobd_brw(int cmd, struct lustre_handle *conn,
-         struct lov_stripe_md *lsm, obd_count oa_bufs,
-         struct brw_page *pga, struct obd_trans_info *oti)
+static int cobd_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
+                    struct lov_stripe_md *lsm, obd_count oa_bufs,
+                    struct brw_page *pga, struct obd_trans_info *oti)
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct cache_obd  *cobd;
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct cache_obd  *cobd;
@@ -267,13 +255,11 @@ cobd_brw(int cmd, struct lustre_handle *conn,
                 return -EOPNOTSUPP;
 
         cobd = &obd->u.cobd;
                 return -EOPNOTSUPP;
 
         cobd = &obd->u.cobd;
-        return (obd_brw (cmd, &cobd->cobd_target,
-                         lsm, oa_bufs, pga, oti));
+        return (obd_brw(cmd, &cobd->cobd_target, oa, lsm, oa_bufs, pga, oti));
 }
 
 }
 
-static int
-cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
-               void *karg, void *uarg)
+static int cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
+                          void *karg, void *uarg)
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct cache_obd  *cobd;
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct cache_obd  *cobd;
@@ -286,7 +272,7 @@ cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
         /* intercept? */
 
         cobd = &obd->u.cobd;
         /* intercept? */
 
         cobd = &obd->u.cobd;
-        return (obd_iocontrol (cmd, &cobd->cobd_target, len, karg, uarg));
+        return (obd_iocontrol(cmd, &cobd->cobd_target, len, karg, uarg));
 }
 
 static struct obd_ops cobd_ops = {
 }
 
 static struct obd_ops cobd_ops = {
@@ -317,16 +303,16 @@ static int __init cobd_init(void)
         struct lprocfs_static_vars lvars;
         ENTRY;
 
         struct lprocfs_static_vars lvars;
         ENTRY;
 
-       printk(KERN_INFO "Lustre Caching OBD driver; info@clusterfs.com\n");
+        printk(KERN_INFO "Lustre Caching OBD driver; info@clusterfs.com\n");
 
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(cobd, &lvars);
         RETURN(class_register_type(&cobd_ops, lvars.module_vars,
                                    OBD_CACHE_DEVICENAME));
 }
 
         RETURN(class_register_type(&cobd_ops, lvars.module_vars,
                                    OBD_CACHE_DEVICENAME));
 }
 
-static void __exit cobd_exit(void)
+static void /*__exit*/ cobd_exit(void)
 {
 {
-       class_unregister_type(OBD_CACHE_DEVICENAME);
+        class_unregister_type(OBD_CACHE_DEVICENAME);
 }
 
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
 }
 
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
index fd7474b..ba9b9cf 100644 (file)
 #include <linux/lprocfs_status.h>
 
 #ifndef LPROCFS
 #include <linux/lprocfs_status.h>
 
 #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
 #else
 /* Common STATUS namespace */
 #else
 /* Common STATUS namespace */
-static int rd_target(char *page, char **start, off_t off, int count,
-                     int *eof, void *data)
+static int cobd_rd_target(char *page, char **start, off_t off, int count,
+                          int *eof, void *data)
 {
 {
-        struct obd_device    *dev = (struct obd_device*)data;
-       struct lustre_handle *conn;
-       struct obd_export    *exp;
-       int    rc;
+        struct obd_device *cobd = (struct obd_device *)data;
+        int    rc;
 
 
-        LASSERT(dev != NULL);
-        conn = &dev->u.cobd.cobd_target;
+        LASSERT(cobd != NULL);
 
 
-       if (!dev->obd_set_up) {
-               rc = snprintf (page, count, "not set up\n");
-       } else {
-               exp = class_conn2export(conn);
-               LASSERT(exp != NULL);
-               rc = snprintf(page, count, "%s\n", 
-                              exp->exp_obd->obd_uuid.uuid);
-                class_export_put(exp);
-       }
-       return (rc);
+        if (!cobd->obd_set_up) {
+                rc = snprintf(page, count, "not set up\n");
+        } else {
+                struct obd_device *tgt =
+                        class_conn2obd(&cobd->u.cobd.cobd_target);
+                LASSERT(tgt != NULL);
+                rc = snprintf(page, count, "%s\n", tgt->obd_uuid.uuid);
+        }
+        return rc;
 }
 
 }
 
-static int rd_cache(char *page, char **start, off_t off, int count,
-                    int *eof, void *data)
+static int cobd_rd_cache(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
 {
 {
-        struct obd_device    *dev = (struct obd_device*)data;
-       struct lustre_handle *conn;
-       struct obd_export    *exp;
-       int    rc;
+        struct obd_device *cobd = (struct obd_device*)data;
+        int    rc;
 
 
-        LASSERT(dev != NULL);
-        conn = &dev->u.cobd.cobd_cache;
+        LASSERT(cobd != NULL);
 
 
-       if (!dev->obd_set_up) {
-               rc = snprintf (page, count, "not set up\n");
+        if (!cobd->obd_set_up) {
+                rc = snprintf(page, count, "not set up\n");
         } else {
         } else {
-               exp = class_conn2export(conn);
-               LASSERT (exp != NULL);
-               rc = snprintf(page, count, "%s\n", 
-                              exp->exp_obd->obd_uuid.uuid);
-                class_export_put(exp);
-       }
-       return (rc);
+                struct obd_device *cache =
+                        class_conn2obd(&cobd->u.cobd.cobd_cache);
+                LASSERT(cache != NULL);
+                rc = snprintf(page, count, "%s\n", cache->obd_uuid.uuid);
+        }
+        return rc;
 }
 
 }
 
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",        lprocfs_rd_uuid,    0, 0 },
-        { "target_uuid", rd_target,          0, 0 },
-        { "cache_uuid",  rd_cache,           0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",         lprocfs_rd_uuid,        0, 0 },
+        { "target_uuid",  cobd_rd_target,         0, 0 },
+        { "cache_uuid",   cobd_rd_cache,          0, 0 },
         { 0 }
 };
 
 struct lprocfs_vars lprocfs_module_vars[] = {
         { 0 }
 };
 
 struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs",    lprocfs_rd_numrefs, 0, 0 },
+        { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
         { 0 }
 };
 #endif /* LPROCFS */
 
         { 0 }
 };
 #endif /* LPROCFS */
 
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(cobd, lprocfs_module_vars, lprocfs_obd_vars)
index 51d1d1a..de4d653 100644 (file)
 
 <!ELEMENT profile (ldlm_ref | ptlrpc_ref | network_ref | routetbl_ref |
                    osd_ref | mdsdev_ref | lovconfig_ref|
 
 <!ELEMENT profile (ldlm_ref | ptlrpc_ref | network_ref | routetbl_ref |
                    osd_ref | mdsdev_ref | lovconfig_ref|
-                   echoclient_ref | mountpoint_ref)*>
+                   echoclient_ref | mountpoint_ref | mgmt_ref)*>
 <!ATTLIST profile %object.attr;>
 
 <!ATTLIST profile %object.attr;>
 
-<!ELEMENT mountpoint (path | fileset | mds_ref | obd_ref)*>
+<!ELEMENT mountpoint path #REQUIRED
+                     filesystem_ref #REQUIRED >
 <!ATTLIST mountpoint %object.attr;>
 
 <!ELEMENT echoclient (obd_ref)>
 <!ATTLIST mountpoint %object.attr;>
 
 <!ELEMENT echoclient (obd_ref)>
@@ -45,6 +46,9 @@
 <!ELEMENT ldlm EMPTY>
 <!ATTLIST ldlm %object.attr;>
 
 <!ELEMENT ldlm EMPTY>
 <!ATTLIST ldlm %object.attr;>
 
+<!ELEMENT mgmt (active_ref)*>
+<!ATTLIST mgmt %object.attr;>
+
 <!ELEMENT ptlrpc EMPTY>
 <!ATTLIST ptlrpc %object.attr;>
 
 <!ELEMENT ptlrpc EMPTY>
 <!ATTLIST ptlrpc %object.attr;>
 
 <!ATTLIST ost %object.attr;
               failover ( 1 | 0 ) #IMPLIED>
 
 <!ATTLIST ost %object.attr;
               failover ( 1 | 0 ) #IMPLIED>
 
+<!ELEMENT filesystem mds_ref #REQUIRED
+                     obd_ref #REQUIRED
+                     (mgmt_ref)* >
+<!ATTLIST filesystem %object.attr;>
+
 <!ELEMENT mds (active_ref | lovconfig_ref | group)*>
 <!ATTLIST mds %object.attr;
               failover ( 1 | 0 ) #IMPLIED>
 <!ELEMENT mds (active_ref | lovconfig_ref | group)*>
 <!ATTLIST mds %object.attr;
               failover ( 1 | 0 ) #IMPLIED>
 <!ATTLIST obd_ref         %objref.attr;>
 <!ELEMENT ost_ref         %objref.content;>
 <!ATTLIST ost_ref         %objref.attr;>
 <!ATTLIST obd_ref         %objref.attr;>
 <!ELEMENT ost_ref         %objref.content;>
 <!ATTLIST ost_ref         %objref.attr;>
-<!ELEMENT active_ref         %objref.content;>
-<!ATTLIST active_ref         %objref.attr;>
-<!ELEMENT target_ref         %objref.content;>
-<!ATTLIST target_ref         %objref.attr;>
+<!ELEMENT active_ref      %objref.content;>
+<!ATTLIST active_ref      %objref.attr;>
+<!ELEMENT target_ref      %objref.content;>
+<!ATTLIST target_ref      %objref.attr;>
 <!ELEMENT lov_ref         %objref.content;>
 <!ATTLIST lov_ref         %objref.attr;>
 <!ELEMENT lovconfig_ref   %objref.content;>
 <!ATTLIST lovconfig_ref   %objref.attr;>
 <!ELEMENT lov_ref         %objref.content;>
 <!ATTLIST lov_ref         %objref.attr;>
 <!ELEMENT lovconfig_ref   %objref.content;>
 <!ATTLIST lovconfig_ref   %objref.attr;>
+<!ELEMENT mgmt_ref        %objref.content;>
+<!ATTLIST mgmt_ref        %objref.attr;>
 <!ELEMENT mountpoint_ref  %objref.content;>
 <!ATTLIST mountpoint_ref  %objref.attr;>
 <!ELEMENT mountpoint_ref  %objref.content;>
 <!ATTLIST mountpoint_ref  %objref.attr;>
+<!ELEMENT filesystem_ref  %objref.content;>
+<!ATTLIST filesystem_ref  %objref.attr;>
 <!ELEMENT echoclient_ref  %objref.content;>
 <!ATTLIST echoclient_ref  %objref.attr;>
 <!ELEMENT failover_ref    %objref.content;>
 <!ELEMENT echoclient_ref  %objref.content;>
 <!ATTLIST echoclient_ref  %objref.attr;>
 <!ELEMENT failover_ref    %objref.content;>
index 8e12135..50f82c8 100644 (file)
@@ -14,6 +14,18 @@ AM_INIT_AUTOMAKE(lustre, builtin([esyscmd], [sed -ne '/^%define version /{ s/.*v
 AC_ARG_ENABLE(extN, [  --enable-extN use extN instead of ext3 for lustre backend])
 AM_CONDITIONAL(EXTN, test x$enable_extN = xyes)
 
 AC_ARG_ENABLE(extN, [  --enable-extN use extN instead of ext3 for lustre backend])
 AM_CONDITIONAL(EXTN, test x$enable_extN = xyes)
 
+# the pinger is temporary, until we have the recovery node in place
+AC_ARG_ENABLE(pinger, [  --enable-pinger recovery pinger support])
+if test x$enable_pinger = xyes ; then
+  AC_DEFINE(ENABLE_PINGER, 1, Use the Pinger)
+fi
+
+# very experimental orphan support
+AC_ARG_ENABLE(orphans, [  --enable-orphans very experimental orphan recovery support])
+if test x$enable_orphans = xyes ; then
+  AC_DEFINE(ENABLE_ORPHANS, 1, Compile with orphan support)
+fi
+
 AC_ARG_WITH(obd-buffer-size, [  --with-obd-buffer-size=[size] set lctl ioctl maximum (default=8K)],OBD_BUFFER_SIZE=$with_obd_buffer_size,OBD_BUFFER_SIZE=8192)
 AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size])
 
 AC_ARG_WITH(obd-buffer-size, [  --with-obd-buffer-size=[size] set lctl ioctl maximum (default=8K)],OBD_BUFFER_SIZE=$with_obd_buffer_size,OBD_BUFFER_SIZE=8192)
 AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size])
 
@@ -21,15 +33,8 @@ sinclude(portals/build.m4)
 sinclude(portals/archdep.m4)
 
 if test x$enable_inkernel = xyes ; then
 sinclude(portals/archdep.m4)
 
 if test x$enable_inkernel = xyes ; then
-cp Makefile.mk Makefile.in
-cp mds/Makefile.mk mds/Makefile.in
-cp portals/Kernelenv.mk portals/Kernelenv.in
-cp portals/Makefile.mk portals/Makefile.in
-cp portals/libcfs/Makefile.mk portals/libcfs/Makefile.in
-cp portals/portals/Makefile.mk portals/portals/Makefile.in
-cp portals/knals/Makefile.mk portals/knals/Makefile.in
-cp portals/knals/socknal/Makefile.mk portals/knals/socknal/Makefile.in
-cp portals/router/Makefile.mk portals/router/Makefile.in
+       find . -name Makefile.mk | sed 's/.mk$//' | xargs -n 1 \
+               sh -e -x -c '(cp -f $0.mk $0.in)'
 fi
 
 AM_CONFIG_HEADER(portals/include/config.h)
 fi
 
 AM_CONFIG_HEADER(portals/include/config.h)
index 202a761..6b94901 100644 (file)
@@ -363,16 +363,16 @@ struct page {
 #define kmap(page) (page)->addr
 #define kunmap(a) do { int foo = 1; foo++; } while (0)
 
 #define kmap(page) (page)->addr
 #define kunmap(a) do { int foo = 1; foo++; } while (0)
 
-static inline struct page *alloc_pages(int mask, unsigned long foo)
+static inline struct page *alloc_pages(int mask, unsigned long order)
 {
         struct page *pg = malloc(sizeof(*pg));
 
         if (!pg)
                 return NULL;
 #ifdef MAP_ANONYMOUS
 {
         struct page *pg = malloc(sizeof(*pg));
 
         if (!pg)
                 return NULL;
 #ifdef MAP_ANONYMOUS
-        pg->addr = mmap(0, PAGE_SIZE, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+        pg->addr = mmap(0, PAGE_SIZE << order, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
 #else
 #else
-        pg->addr = malloc(PAGE_SIZE);
+        pg->addr = malloc(PAGE_SIZE << order);
 #endif
 
         if (!pg->addr) {
 #endif
 
         if (!pg->addr) {
@@ -407,26 +407,27 @@ static inline struct page* __grab_cache_page(int index)
 /* arithmetic */
 #define do_div(a,b)                     \
         ({                              \
 /* arithmetic */
 #define do_div(a,b)                     \
         ({                              \
-                unsigned long ret;      \
-                ret = (a)%(b);          \
-                (a) = (a)/(b);          \
-                (ret);                  \
+                unsigned long remainder;\
+                remainder = (a) % (b);  \
+                (a) = (a) / (b);        \
+                (remainder);            \
         })
 
 /* VFS stuff */
         })
 
 /* VFS stuff */
-#define ATTR_MODE       1
-#define ATTR_UID        2
-#define ATTR_GID        4
-#define ATTR_SIZE       8
-#define ATTR_ATIME      16
-#define ATTR_MTIME      32
-#define ATTR_CTIME      64
-#define ATTR_ATIME_SET  128
-#define ATTR_MTIME_SET  256
-#define ATTR_FORCE      512     /* Not a change, but a change it */
-#define ATTR_ATTR_FLAG  1024
-#define ATTR_RAW        2048    /* file system, not vfs will massage attrs */
-#define ATTR_FROM_OPEN  4096    /* called from open path, ie O_TRUNC */
+#define ATTR_MODE       0x0001
+#define ATTR_UID        0x0002
+#define ATTR_GID        0x0004
+#define ATTR_SIZE       0x0008
+#define ATTR_ATIME      0x0010
+#define ATTR_MTIME      0x0020
+#define ATTR_CTIME      0x0040
+#define ATTR_ATIME_SET  0x0080
+#define ATTR_MTIME_SET  0x0100
+#define ATTR_FORCE      0x0200  /* Not a change, but a change it */
+#define ATTR_ATTR_FLAG  0x0400
+#define ATTR_RAW        0x0800  /* file system, not vfs will massage attrs */
+#define ATTR_FROM_OPEN  0x1000  /* called from open path, ie O_TRUNC */
+#define ATTR_CTIME_SET  0x2000
 
 struct iattr {
         unsigned int    ia_valid;
 
 struct iattr {
         unsigned int    ia_valid;
index fb96bde..e6678f8 100644 (file)
 #ifndef _LPROCFS_SNMP_H
 #define _LPROCFS_SNMP_H
 
 #ifndef _LPROCFS_SNMP_H
 #define _LPROCFS_SNMP_H
 
+
 #ifdef __KERNEL__
 #include <linux/config.h>
 #include <linux/autoconf.h>
 #include <linux/proc_fs.h>
 #ifdef __KERNEL__
 #include <linux/config.h>
 #include <linux/autoconf.h>
 #include <linux/proc_fs.h>
+#include <linux/version.h>
 #include <linux/smp.h>
 #include <linux/smp.h>
+#include <linux/kp30.h>
+
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#  include <linux/statfs.h>
+# else 
+#  define kstatfs statfs
+# endif
+
+#else 
+#  define kstatfs statfs
 #endif
 
 #endif
 
-#include <linux/kp30.h>
 
 #ifndef LPROCFS
 #ifdef  CONFIG_PROC_FS  /* Ensure that /proc is configured */
 
 #ifndef LPROCFS
 #ifdef  CONFIG_PROC_FS  /* Ensure that /proc is configured */
@@ -116,9 +127,8 @@ struct lprocfs_stats {
 /* class_obd.c */
 extern struct proc_dir_entry *proc_lustre_root;
 
 /* class_obd.c */
 extern struct proc_dir_entry *proc_lustre_root;
 
-/* lproc_lov.c */
-extern struct file_operations ll_proc_target_fops;
 struct obd_device;
 struct obd_device;
+struct file;
 
 #ifdef LPROCFS
 
 
 #ifdef LPROCFS
 
@@ -184,14 +194,18 @@ void lprocfs_init_multi_vars(unsigned int idx,                            \
    x->obd_vars = glob[idx].obd_vars;                                      \
 }                                                                         \
 
    x->obd_vars = glob[idx].obd_vars;                                      \
 }                                                                         \
 
-#define LPROCFS_INIT_VARS(vclass, vinstance)           \
-void lprocfs_init_vars(struct lprocfs_static_vars *x)  \
+#define LPROCFS_INIT_VARS(name, vclass, vinstance)           \
+void lprocfs_##name##_init_vars(struct lprocfs_static_vars *x)  \
 {                                                      \
         x->module_vars = vclass;                       \
         x->obd_vars = vinstance;                       \
 }                                                      \
 
 {                                                      \
         x->module_vars = vclass;                       \
         x->obd_vars = vinstance;                       \
 }                                                      \
 
-extern void lprocfs_init_vars(struct lprocfs_static_vars *var);
+#define lprocfs_init_vars(NAME, VAR)     \
+do {      \
+        extern void lprocfs_##NAME##_init_vars(struct lprocfs_static_vars *);  \
+        lprocfs_##NAME##_init_vars(VAR);                                       \
+} while (0)
 extern void lprocfs_init_multi_vars(unsigned int idx,
                                     struct lprocfs_static_vars *var);
 /* lprocfs_status.c */
 extern void lprocfs_init_multi_vars(unsigned int idx,
                                     struct lprocfs_static_vars *var);
 /* lprocfs_status.c */
@@ -220,6 +234,8 @@ extern int lprocfs_rd_uuid(char *page, char **start, off_t off,
                            int count, int *eof, void *data);
 extern int lprocfs_rd_name(char *page, char **start, off_t off,
                            int count, int *eof, void *data);
                            int count, int *eof, void *data);
 extern int lprocfs_rd_name(char *page, char **start, off_t off,
                            int count, int *eof, void *data);
+extern int lprocfs_rd_fstype(char *page, char **start, off_t off,
+                             int count, int *eof, void *data);
 extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
                                   int count, int *eof, void *data);
 extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
 extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
                                   int count, int *eof, void *data);
 extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
@@ -228,38 +244,24 @@ extern int lprocfs_rd_numrefs(char *page, char **start, off_t off,
                               int count, int *eof, void *data);
 
 /* Statfs helpers */
                               int count, int *eof, void *data);
 
 /* Statfs helpers */
-struct statfs;
 extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
 extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
-                              int count, int *eof, struct statfs *sfs);
+                              int count, int *eof, void *data);
 extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
 extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
-                                  int count, int *eof, struct statfs *sfs);
+                                  int count, int *eof, void *data);
 extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
 extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
-                                 int count, int *eof, struct statfs *sfs);
+                                 int count, int *eof, void *data);
 extern int lprocfs_rd_filestotal(char *page, char **start, off_t off,
 extern int lprocfs_rd_filestotal(char *page, char **start, off_t off,
-                                 int count, int *eof, struct statfs *sfs);
+                                 int count, int *eof, void *data);
 extern int lprocfs_rd_filesfree(char *page, char **start, off_t off,
 extern int lprocfs_rd_filesfree(char *page, char **start, off_t off,
-                                int count, int *eof, struct statfs *sfs);
+                                int count, int *eof, void *data);
 extern int lprocfs_rd_filegroups(char *page, char **start, off_t off,
 extern int lprocfs_rd_filegroups(char *page, char **start, off_t off,
-                                 int count, int *eof, struct statfs *sfs);
+                                 int count, int *eof, void *data);
 
 /* lprocfs_status.c: counter read/write functions */
 
 /* lprocfs_status.c: counter read/write functions */
-struct file;
 extern int lprocfs_counter_read(char *page, char **start, off_t off,
                                 int count, int *eof, void *data);
 extern int lprocfs_counter_write(struct file *file, const char *buffer,
                                  unsigned long count, void *data);
 extern int lprocfs_counter_read(char *page, char **start, off_t off,
                                 int count, int *eof, void *data);
 extern int lprocfs_counter_write(struct file *file, const char *buffer,
                                  unsigned long count, void *data);
-
-#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct)               \
-int fct_name(char *page, char **start, off_t off,                         \
-             int count, int *eof, void *data)                             \
-{                                                                         \
-        struct statfs sfs;                                                \
-        int rc = get_statfs_fct((struct obd_device*)data, &sfs);          \
-        return (rc == 0 ?                                                 \
-                lprocfs_##fct_name (page, start, off, count, eof, &sfs) : \
-                rc);                                                      \
-}
-
 #else
 /* LPROCFS is not defined */
 static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
 #else
 /* LPROCFS is not defined */
 static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
@@ -289,18 +291,17 @@ static inline void lprocfs_free_obd_stats(struct obd_device *obddev)
 static inline struct proc_dir_entry *
 lprocfs_register(const char *name, struct proc_dir_entry *parent,
                  struct lprocfs_vars *list, void *data) { return NULL; }
 static inline struct proc_dir_entry *
 lprocfs_register(const char *name, struct proc_dir_entry *parent,
                  struct lprocfs_vars *list, void *data) { return NULL; }
-#define LPROCFS_INIT_MULTI_VARS(array, size)
+#define LPROCFS_INIT_MULTI_VARS(array, size) do {} while (0)
 static inline void lprocfs_init_multi_vars(unsigned int idx,
                                            struct lprocfs_static_vars *x) { return; }
 static inline void lprocfs_init_multi_vars(unsigned int idx,
                                            struct lprocfs_static_vars *x) { return; }
-#define LPROCFS_INIT_VARS(vclass, vinstance)
-static inline void lprocfs_init_vars(struct lprocfs_static_vars *x) { return; }
+#define LPROCFS_INIT_VARS(name, vclass, vinstance) do {} while (0)
+#define lprocfs_init_vars(...) do {} while (0)
 static inline int lprocfs_add_vars(struct proc_dir_entry *root,
                                    struct lprocfs_vars *var,
                                    void *data) { return 0; }
 static inline void lprocfs_remove(struct proc_dir_entry *root) {};
 static inline struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
                                     const char *name) {return 0;}
 static inline int lprocfs_add_vars(struct proc_dir_entry *root,
                                    struct lprocfs_vars *var,
                                    void *data) { return 0; }
 static inline void lprocfs_remove(struct proc_dir_entry *root) {};
 static inline struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
                                     const char *name) {return 0;}
-struct obd_device;
 static inline int lprocfs_obd_attach(struct obd_device *dev,
                                      struct lprocfs_vars *list) { return 0; }
 static inline int lprocfs_obd_detach(struct obd_device *dev)  { return 0; }
 static inline int lprocfs_obd_attach(struct obd_device *dev,
                                      struct lprocfs_vars *list) { return 0; }
 static inline int lprocfs_obd_detach(struct obd_device *dev)  { return 0; }
@@ -318,37 +319,30 @@ static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off,
                                      int count, int *eof, void *data) { return 0; }
 
 /* Statfs helpers */
                                      int count, int *eof, void *data) { return 0; }
 
 /* Statfs helpers */
-struct statfs;
 static inline
 int lprocfs_rd_blksize(char *page, char **start, off_t off,
 static inline
 int lprocfs_rd_blksize(char *page, char **start, off_t off,
-                       int count, int *eof, struct statfs *sfs) { return 0; }
+                       int count, int *eof, void *data) { return 0; }
 static inline
 int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
 static inline
 int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
-                           int count, int *eof, struct statfs *sfs) { return 0; }
+                           int count, int *eof, void *data) { return 0; }
 static inline
 int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
 static inline
 int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
-                          int count, int *eof, struct statfs *sfs) { return 0; }
+                          int count, int *eof, void *data) { return 0; }
 static inline
 int lprocfs_rd_filestotal(char *page, char **start, off_t off,
 static inline
 int lprocfs_rd_filestotal(char *page, char **start, off_t off,
-                          int count, int *eof, struct statfs *sfs) { return 0; }
+                          int count, int *eof, void *data) { return 0; }
 static inline
 int lprocfs_rd_filesfree(char *page, char **start, off_t off,
 static inline
 int lprocfs_rd_filesfree(char *page, char **start, off_t off,
-                         int count, int *eof, struct statfs *sfs)  { return 0; }
+                         int count, int *eof, void *data)  { return 0; }
 static inline
 int lprocfs_rd_filegroups(char *page, char **start, off_t off,
 static inline
 int lprocfs_rd_filegroups(char *page, char **start, off_t off,
-                          int count, int *eof, struct statfs *sfs) { return 0; }
+                          int count, int *eof, void *data) { return 0; }
 static inline
 int lprocfs_counter_read(char *page, char **start, off_t off,
                          int count, int *eof, void *data) { return 0; }
 static inline
 int lprocfs_counter_read(char *page, char **start, off_t off,
                          int count, int *eof, void *data) { return 0; }
-struct file;
 static inline
 int lprocfs_counter_write(struct file *file, const char *buffer,
                           unsigned long count, void *data) { return 0; }
 static inline
 int lprocfs_counter_write(struct file *file, const char *buffer,
                           unsigned long count, void *data) { return 0; }
-
-#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct)  \
-int fct_name(char *page, char **start, off_t off,            \
-             int count, int *eof, void *data) { *eof = 1; return 0; }
-
 #endif /* LPROCFS */
 
 #endif /* LPROCFS_SNMP_H */
 #endif /* LPROCFS */
 
 #endif /* LPROCFS_SNMP_H */
index 4275a10..3609d52 100644 (file)
 #ifndef _COMPAT25_H
 #define _COMPAT25_H
 
 #ifndef _COMPAT25_H
 #define _COMPAT25_H
 
-#include <linux/portals_compat25.h>
+#ifdef __KERNEL__
 
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-#define KDEVT_VAL(dev, val)         dev.value = 0               
-#else
-#define KDEVT_VAL(dev, val)         dev = 0               
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) && LINUX_VERSION_CODE < KERNEL_VERSION(2,5,69)
+#error sorry, lustre requires at least 2.5.69
 #endif
 
 #endif
 
+#include <linux/portals_compat25.h>
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
 # define PGCACHE_WRLOCK(mapping)          write_lock(&mapping->page_lock)
 # define PGCACHE_WRUNLOCK(mapping)        write_unlock(&mapping->page_lock)
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
 # define PGCACHE_WRLOCK(mapping)          write_lock(&mapping->page_lock)
 # define PGCACHE_WRUNLOCK(mapping)        write_unlock(&mapping->page_lock)
-#else
+
+#define KDEVT_INIT(val)                 { .value = val }
+#define LTIME_S(time)                   (time.tv_sec)
+#define USERMODEHELPER(path, argv, envp) \
+                                        call_usermodehelper(path, argv, envp, 1)
+#define ll_path_lookup                  path_lookup
+
+
+#define ll_pgcache_lock(mapping)          spin_lock(&mapping->page_lock)
+#define ll_pgcache_unlock(mapping)        spin_unlock(&mapping->page_lock)
+
+#else /* 2.4.. */
+
 # define PGCACHE_WRLOCK(mapping)          spin_lock(&pagecache_lock)
 # define PGCACHE_WRUNLOCK(mapping)        spin_unlock(&pagecache_lock)
 # define PGCACHE_WRLOCK(mapping)          spin_lock(&pagecache_lock)
 # define PGCACHE_WRUNLOCK(mapping)        spin_unlock(&pagecache_lock)
+
+/* 2.5 uses hlists for some things, like the d_hash.  we'll treat them
+ * as 2.5 and let macros drop back.. */
+#define hlist_entry                     list_entry
+#define hlist_head                      list_head
+#define hlist_node                      list_head
+#define HLIST_HEAD                      LIST_HEAD
+#define INIT_HLIST_HEAD                 INIT_LIST_HEAD
+#define hlist_del_init                  list_del_init
+#define hlist_add_head                  list_add
+#define hlist_for_each_safe             list_for_each_safe
+#define KDEVT_INIT(val)                 (val)
+#define ext3_xattr_set_handle           ext3_xattr_set
+#define try_module_get                  __MOD_INC_USE_COUNT
+#define module_put                      __MOD_DEC_USE_COUNT
+#define LTIME_S(time)                   (time)
+#ifndef CONFIG_RH_2_4_20
+#define cpu_online(cpu)                 (cpu_online_map & (1<<cpu))
 #endif
 #endif
+#define USERMODEHELPER(path, argv, envp) \
+                                        call_usermodehelper(path, argv, envp)
+static inline int ll_path_lookup(const char *path, unsigned flags, 
+                              struct nameidata *nd)
+{
+        int error = 0;
+        if (path_init(path, flags, nd))
+                error = path_walk(path, nd);
+        return error;
+}
+typedef long sector_t;
+
+#define ll_pgcache_lock(mapping)        spin_lock(&pagecache_lock)
+#define ll_pgcache_unlock(mapping)      spin_unlock(&pagecache_lock)
 
 
+#endif /* end of 2.4 compat macros */
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
 # define filemap_fdatasync(mapping)       filemap_fdatawrite(mapping)
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
 # define filemap_fdatasync(mapping)       filemap_fdatawrite(mapping)
 # define Page_Uptodate(page)              PageUptodate(page)
 #endif
 
 # define Page_Uptodate(page)              PageUptodate(page)
 #endif
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define USERMODEHELPER(path, argv, envp) call_usermodehelper(path, argv, envp, 0)
-#else
-# define USERMODEHELPER(path, argv, envp) call_usermodehelper(path, argv, envp)
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define LL_CHECK_DIRTY(sb)              do { }while(0)
-#else
-# define LL_CHECK_DIRTY(sb)              ll_check_dirty(sb)
-#endif
-
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
 #define  rb_node_s rb_node
 #define  rb_root_s rb_root
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
 #define  rb_node_s rb_node
 #define  rb_root_s rb_root
@@ -73,4 +106,5 @@ typedef struct rb_root_s rb_root_t;
 typedef struct rb_node_s rb_node_t;
 #endif
 
 typedef struct rb_node_s rb_node_t;
 #endif
 
+#endif /* __KERNEL__ */
 #endif /* _COMPAT25_H */
 #endif /* _COMPAT25_H */
index 2db4196..8fc90ae 100644 (file)
@@ -188,6 +188,7 @@ struct ldlm_lock {
          * it's no longer in use.  If the lock is not granted, a process sleeps
          * on this waitq to learn when it becomes granted. */
         wait_queue_head_t     l_waitq;
          * it's no longer in use.  If the lock is not granted, a process sleeps
          * on this waitq to learn when it becomes granted. */
         wait_queue_head_t     l_waitq;
+        struct timeval        l_enqueued_time;
 };
 
 typedef int (*ldlm_res_compat)(struct ldlm_lock *child, struct ldlm_lock *new);
 };
 
 typedef int (*ldlm_res_compat)(struct ldlm_lock *child, struct ldlm_lock *new);
@@ -316,6 +317,8 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
                                ldlm_res_iterator_t iter, void *closure);
 
 int ldlm_replay_locks(struct obd_import *imp);
                                ldlm_res_iterator_t iter, void *closure);
 
 int ldlm_replay_locks(struct obd_import *imp);
+void ldlm_change_cbdata(struct ldlm_namespace *, struct ldlm_res_id *,
+                        ldlm_iterator_t iter, void *data);
 
 /* ldlm_extent.c */
 int ldlm_extent_compat(struct ldlm_lock *, struct ldlm_lock *);
 
 /* ldlm_extent.c */
 int ldlm_extent_compat(struct ldlm_lock *, struct ldlm_lock *);
@@ -450,6 +453,8 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *, struct ldlm_res_id *,
 
 /* mds/handler.c */
 /* This has to be here because recurisve inclusion sucks. */
 
 /* mds/handler.c */
 /* This has to be here because recurisve inclusion sucks. */
+int intent_disposition(struct ldlm_reply *rep, int flag);
+void intent_set_disposition(struct ldlm_reply *rep, int flag);
 int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                      void *data, int flag);
 
 int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                      void *data, int flag);
 
index 6939a95..677ddc6 100644 (file)
@@ -11,7 +11,7 @@
 #define __EXPORT_H
 
 #include <linux/lustre_idl.h>
 #define __EXPORT_H
 
 #include <linux/lustre_idl.h>
-#include <linux/obd_filter.h>
+#include <linux/lustre_dlm.h>
 
 struct mds_client_data;
 
 
 struct mds_client_data;
 
@@ -19,7 +19,8 @@ struct mds_export_data {
         struct list_head        med_open_head;
         spinlock_t              med_open_lock;
         struct mds_client_data *med_mcd;
         struct list_head        med_open_head;
         spinlock_t              med_open_lock;
         struct mds_client_data *med_mcd;
-        int                     med_off;
+        loff_t                  med_off;
+        int                     med_idx;
 };
 
 struct ldlm_export_data {
 };
 
 struct ldlm_export_data {
@@ -37,6 +38,16 @@ struct ec_export_data { /* echo client */
         struct list_head eced_locks;
 };
 
         struct list_head eced_locks;
 };
 
+/* In-memory access to client data from OST struct */
+struct filter_client_data;
+struct filter_export_data {
+        struct list_head           fed_open_head; //files to close on disconnect
+        spinlock_t                 fed_lock;      /* protects fed_open_head */
+        struct filter_client_data *fed_fcd;
+        loff_t                     fed_lr_off;
+        int                        fed_lr_idx;
+};
+
 struct obd_export {
         struct portals_handle     exp_handle;
         atomic_t                  exp_refcount;
 struct obd_export {
         struct portals_handle     exp_handle;
         atomic_t                  exp_refcount;
@@ -48,7 +59,8 @@ struct obd_export {
         struct ptlrpc_request    *exp_outstanding_reply;
         time_t                    exp_last_request_time;
         spinlock_t                exp_lock; /* protects flags int below */
         struct ptlrpc_request    *exp_outstanding_reply;
         time_t                    exp_last_request_time;
         spinlock_t                exp_lock; /* protects flags int below */
-        int                       exp_failed:1, exp_failover:1;
+        int                       exp_failed:1;
+        int                       exp_flags;
         union {
                 struct mds_export_data    eu_mds_data;
                 struct filter_export_data eu_filter_data;
         union {
                 struct mds_export_data    eu_mds_data;
                 struct filter_export_data eu_filter_data;
index fc00fe1..37ffc4f 100644 (file)
@@ -30,7 +30,8 @@
 #include <linux/obd.h>
 #include <linux/fs.h>
 
 #include <linux/obd.h>
 #include <linux/fs.h>
 
-typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, int error);
+typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd,
+                            void *data, int error);
 
 struct fsfilt_objinfo {
         struct dentry *fso_dentry;
 
 struct fsfilt_objinfo {
         struct dentry *fso_dentry;
@@ -41,9 +42,9 @@ struct fsfilt_operations {
         struct list_head fs_list;
         struct module *fs_owner;
         char   *fs_type;
         struct list_head fs_list;
         struct module *fs_owner;
         char   *fs_type;
-        void   *(* fs_start)(struct inode *inode, int op);
+        void   *(* fs_start)(struct inode *inode, int op, void *desc_private);
         void   *(* fs_brw_start)(int objcount, struct fsfilt_objinfo *fso,
         void   *(* fs_brw_start)(int objcount, struct fsfilt_objinfo *fso,
-                                 int niocount, struct niobuf_remote *nb);
+                                 int niocount, void *desc_private);
         int     (* fs_commit)(struct inode *inode, void *handle,int force_sync);
         int     (* fs_setattr)(struct dentry *dentry, void *handle,
                                struct iattr *iattr, int do_trunc);
         int     (* fs_commit)(struct inode *inode, void *handle,int force_sync);
         int     (* fs_setattr)(struct dentry *dentry, void *handle,
                                struct iattr *iattr, int do_trunc);
@@ -54,16 +55,19 @@ struct fsfilt_operations {
                                 loff_t *offset);
         int     (* fs_journal_data)(struct file *file);
         int     (* fs_set_last_rcvd)(struct obd_device *obd, __u64 last_rcvd,
                                 loff_t *offset);
         int     (* fs_journal_data)(struct file *file);
         int     (* fs_set_last_rcvd)(struct obd_device *obd, __u64 last_rcvd,
-                                     void *handle, fsfilt_cb_t cb_func);
+                                     void *handle, fsfilt_cb_t cb_func,
+                                     void *cb_data);
         int     (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs);
         int     (* fs_sync)(struct super_block *sb);
         int     (* fs_prep_san_write)(struct inode *inode, long *blocks,
                                       int nblocks, loff_t newsize);
         int     (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs);
         int     (* fs_sync)(struct super_block *sb);
         int     (* fs_prep_san_write)(struct inode *inode, long *blocks,
                                       int nblocks, loff_t newsize);
+        int     (* fs_write_record)(struct file *, char *, int size, loff_t *);
+        int     (* fs_read_record)(struct file *, char *, int size, loff_t *);
 };
 
 extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops);
 extern void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops);
 };
 
 extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops);
 extern void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops);
-extern struct fsfilt_operations *fsfilt_get_ops(char *type);
+extern struct fsfilt_operations *fsfilt_get_ops(const char *type);
 extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops);
 
 #define FSFILT_OP_UNLINK         1
 extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops);
 
 #define FSFILT_OP_UNLINK         1
@@ -75,26 +79,53 @@ extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops);
 #define FSFILT_OP_MKNOD          7
 #define FSFILT_OP_SETATTR        8
 #define FSFILT_OP_LINK           9
 #define FSFILT_OP_MKNOD          7
 #define FSFILT_OP_SETATTR        8
 #define FSFILT_OP_LINK           9
+#define FSFILT_OP_CREATE_LOG    10
+#define FSFILT_OP_UNLINK_LOG    11
 
 
-static inline void *fsfilt_start(struct obd_device *obd,
-                                 struct inode *inode, int op)
+static inline void *fsfilt_start(struct obd_device *obd, struct inode *inode,
+                                 int op, struct obd_trans_info *oti)
 {
         unsigned long now = jiffies;
 {
         unsigned long now = jiffies;
-        void *handle = obd->obd_fsops->fs_start(inode, op);
-        CDEBUG(D_HA, "started handle %p\n", handle);
-        if (time_after(jiffies, now + 15*HZ))
+        void *parent_handle = oti ? oti->oti_handle : NULL;
+        void *handle = obd->obd_fsops->fs_start(inode, op, parent_handle);
+        CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle);
+
+        if (oti != NULL) {
+                if (parent_handle == NULL) {
+                        oti->oti_handle = handle;
+                } else if (handle != parent_handle) {
+                        CERROR("mismatch: parent %p, handle %p, oti %p\n",
+                               parent_handle, handle, oti->oti_handle);
+                        LBUG();
+                }
+        }
+        if (time_after(jiffies, now + 15 * HZ))
                 CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
         return handle;
 }
 
 static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount,
                                      struct fsfilt_objinfo *fso, int niocount,
                 CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
         return handle;
 }
 
 static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount,
                                      struct fsfilt_objinfo *fso, int niocount,
-                                     struct niobuf_remote *nb)
+                                     struct obd_trans_info *oti)
 {
         unsigned long now = jiffies;
 {
         unsigned long now = jiffies;
-        void *handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount,nb);
-        CDEBUG(D_HA, "started handle %p\n", handle);
-        if (time_after(jiffies, now + 15*HZ))
+        void *parent_handle = oti ? oti->oti_handle : NULL;
+        void *handle;
+
+        handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount,
+                                              parent_handle);
+        CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle);
+
+        if (oti != NULL) {
+                if (parent_handle == NULL) {
+                        oti->oti_handle = handle;
+                } else if (handle != parent_handle) {
+                        CERROR("mismatch: parent %p, handle %p, oti %p\n",
+                               parent_handle, handle, oti->oti_handle);
+                        LBUG();
+                }
+        }
+        if (time_after(jiffies, now + 15 * HZ))
                 CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
         return handle;
 }
                 CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
         return handle;
 }
@@ -105,7 +136,7 @@ static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode,
         unsigned long now = jiffies;
         int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync);
         CDEBUG(D_HA, "committing handle %p\n", handle);
         unsigned long now = jiffies;
         int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync);
         CDEBUG(D_HA, "committing handle %p\n", handle);
-        if (time_after(jiffies, now + 15*HZ))
+        if (time_after(jiffies, now + 15 * HZ))
                 CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
         return rc;
 }
                 CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
         return rc;
 }
@@ -116,9 +147,8 @@ static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry,
         unsigned long now = jiffies;
         int rc;
         rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr, do_trunc);
         unsigned long now = jiffies;
         int rc;
         rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr, do_trunc);
-        if (time_after(jiffies, now + 15*HZ))
+        if (time_after(jiffies, now + 15 * HZ))
                 CERROR("long setattr time %lus\n", (jiffies - now) / HZ);
                 CERROR("long setattr time %lus\n", (jiffies - now) / HZ);
-
         return rc;
 }
 
         return rc;
 }
 
@@ -147,9 +177,11 @@ static inline int fsfilt_journal_data(struct obd_device *obd, struct file *file)
 }
 
 static inline int fsfilt_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
 }
 
 static inline int fsfilt_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
-                                       void *handle, fsfilt_cb_t cb_func)
+                                       void *handle, fsfilt_cb_t cb_func,
+                                       void *cb_data)
 {
 {
-        return obd->obd_fsops->fs_set_last_rcvd(obd, last_rcvd,handle,cb_func);
+        return obd->obd_fsops->fs_set_last_rcvd(obd, last_rcvd, handle,
+                                                cb_func, cb_data);
 }
 
 static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *fs,
 }
 
 static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *fs,
@@ -172,6 +204,19 @@ static inline int fs_prep_san_write(struct obd_device *obd,
         return obd->obd_fsops->fs_prep_san_write(inode, blocks,
                                                  nblocks, newsize);
 }
         return obd->obd_fsops->fs_prep_san_write(inode, blocks,
                                                  nblocks, newsize);
 }
+
+static inline int fsfilt_read_record(struct obd_device *obd, struct file *file,
+                                     char *buf, loff_t size, loff_t *offs)
+{
+        return obd->obd_fsops->fs_read_record(file, buf, size, offs);
+}
+
+static inline int fsfilt_write_record(struct obd_device *obd, struct file *file,
+                                      char *buf, loff_t size, loff_t *offs)
+{
+        return obd->obd_fsops->fs_write_record(file, buf, size, offs);
+}
+
 #endif /* __KERNEL__ */
 
 #endif
 #endif /* __KERNEL__ */
 
 #endif
index f4a5f2d..055b7a4 100644 (file)
@@ -18,7 +18,7 @@
  *   along with Lustre; if not, write to the Free Software
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  *   along with Lustre; if not, write to the Free Software
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
- * (Un)packing of OST requests
+ * Lustre wire protocol definitions.
  *
  * We assume all nodes are either little-endian or big-endian, and we
  * always send messages in the sender's native format.  The receiver
  *
  * We assume all nodes are either little-endian or big-endian, and we
  * always send messages in the sender's native format.  The receiver
@@ -29,9 +29,9 @@
  * implemented either here, inline (trivial implementations) or in
  * ptlrpc/pack_generic.c.  These 'swabbers' convert the type from "other"
  * endian, in-place in the message buffer.
  * implemented either here, inline (trivial implementations) or in
  * ptlrpc/pack_generic.c.  These 'swabbers' convert the type from "other"
  * endian, in-place in the message buffer.
- * 
+ *
  * A swabber takes a single pointer argument.  The caller must already have
  * A swabber takes a single pointer argument.  The caller must already have
- * verified that the length of the message buffer >= sizeof (type).  
+ * verified that the length of the message buffer >= sizeof (type).
  *
  * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
  * may be defined that swabs just the variable part, after the caller has
  *
  * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
  * may be defined that swabs just the variable part, after the caller has
@@ -90,29 +90,33 @@ extern struct obd_uuid lctl_fake_uuid;
  * FOO_BULK_PORTAL    is for incoming bulk on the FOO
  */
 
  * FOO_BULK_PORTAL    is for incoming bulk on the FOO
  */
 
-#define CONNMGR_REQUEST_PORTAL  1
-#define CONNMGR_REPLY_PORTAL    2
-//#define OSC_REQUEST_PORTAL      3
-#define OSC_REPLY_PORTAL        4
-//#define OSC_BULK_PORTAL         5
-#define OST_REQUEST_PORTAL      6
-//#define OST_REPLY_PORTAL        7
-#define OST_BULK_PORTAL         8
-//#define MDC_REQUEST_PORTAL      9
-#define MDC_REPLY_PORTAL        10
-//#define MDC_BULK_PORTAL         11
-#define MDS_REQUEST_PORTAL      12
-//#define MDS_REPLY_PORTAL        13
-#define MDS_BULK_PORTAL         14
-#define LDLM_CB_REQUEST_PORTAL     15
-#define LDLM_CB_REPLY_PORTAL       16
+#define CONNMGR_REQUEST_PORTAL          1
+#define CONNMGR_REPLY_PORTAL            2
+//#define OSC_REQUEST_PORTAL            3
+#define OSC_REPLY_PORTAL                4
+//#define OSC_BULK_PORTAL               5
+#define OST_REQUEST_PORTAL              6
+//#define OST_REPLY_PORTAL              7
+#define OST_BULK_PORTAL                 8
+//#define MDC_REQUEST_PORTAL            9
+#define MDC_REPLY_PORTAL               10
+//#define MDC_BULK_PORTAL              11
+#define MDS_REQUEST_PORTAL             12
+//#define MDS_REPLY_PORTAL             13
+#define MDS_BULK_PORTAL                14
+#define LDLM_CB_REQUEST_PORTAL         15
+#define LDLM_CB_REPLY_PORTAL           16
 #define LDLM_CANCEL_REQUEST_PORTAL     17
 #define LDLM_CANCEL_REPLY_PORTAL       18
 #define PTLBD_REQUEST_PORTAL           19
 #define PTLBD_REPLY_PORTAL             20
 #define PTLBD_BULK_PORTAL              21
 #define LDLM_CANCEL_REQUEST_PORTAL     17
 #define LDLM_CANCEL_REPLY_PORTAL       18
 #define PTLBD_REQUEST_PORTAL           19
 #define PTLBD_REPLY_PORTAL             20
 #define PTLBD_BULK_PORTAL              21
-#define MDS_SETATTR_PORTAL      22
-#define MDS_READPAGE_PORTAL     23
+#define MDS_SETATTR_PORTAL             22
+#define MDS_READPAGE_PORTAL            23
+#define MGMT_REQUEST_PORTAL            24
+#define MGMT_REPLY_PORTAL              25
+#define MGMT_CLI_REQUEST_PORTAL        26
+#define MGMT_CLI_REPLY_PORTAL          27
 
 #define SVC_KILLED               1
 #define SVC_EVENT                2
 
 #define SVC_KILLED               1
 #define SVC_EVENT                2
@@ -159,7 +163,7 @@ struct lustre_msg {
 
 static inline int lustre_msg_swabbed (struct lustre_msg *msg)
 {
 
 static inline int lustre_msg_swabbed (struct lustre_msg *msg)
 {
-        return (msg->magic == __swab32 (PTLRPC_MSG_MAGIC));
+        return (msg->magic == __swab32(PTLRPC_MSG_MAGIC));
 }
 
 /* Flags that are operation-specific go in the top 16 bits. */
 }
 
 /* Flags that are operation-specific go in the top 16 bits. */
@@ -207,9 +211,10 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags)
  * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT)
  */
 
  * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT)
  */
 
-#define MSG_CONNECT_RECOVERING 0x1
-#define MSG_CONNECT_RECONNECT  0x2
+#define MSG_CONNECT_RECOVERING  0x1
+#define MSG_CONNECT_RECONNECT   0x2
 #define MSG_CONNECT_REPLAYABLE  0x4
 #define MSG_CONNECT_REPLAYABLE  0x4
+#define MSG_CONNECT_PEER        0x8
 
 /*
  *   OST requests: OBDO & OBD request records
 
 /*
  *   OST requests: OBDO & OBD request records
@@ -234,13 +239,13 @@ typedef enum {
         OST_SAN_READ   = 14,
         OST_SAN_WRITE  = 15,
         OST_SYNCFS     = 16,
         OST_SAN_READ   = 14,
         OST_SAN_WRITE  = 15,
         OST_SYNCFS     = 16,
+        OST_SET_INFO   = 17,
         OST_LAST_OPC
 } ost_cmd_t;
 #define OST_FIRST_OPC  OST_REPLY
 /* When adding OST RPC opcodes, please update 
  * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */
 
         OST_LAST_OPC
 } ost_cmd_t;
 #define OST_FIRST_OPC  OST_REPLY
 /* When adding OST RPC opcodes, please update 
  * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */
 
-
 typedef uint64_t        obd_id;
 typedef uint64_t        obd_gr;
 typedef uint64_t        obd_time;
 typedef uint64_t        obd_id;
 typedef uint64_t        obd_gr;
 typedef uint64_t        obd_time;
@@ -324,8 +329,23 @@ struct lov_mds_md {
 #define OBD_MD_LINKNAME (0x00040000)    /* symbolic link target */
 #define OBD_MD_FLHANDLE (0x00080000)    /* file handle */
 #define OBD_MD_FLCKSUM  (0x00100000)    /* bulk data checksum */
 #define OBD_MD_LINKNAME (0x00040000)    /* symbolic link target */
 #define OBD_MD_FLHANDLE (0x00080000)    /* file handle */
 #define OBD_MD_FLCKSUM  (0x00100000)    /* bulk data checksum */
+#define OBD_MD_FLQOS    (0x00200000)    /* quality of service stats */
+#define OBD_MD_FLOSCOPQ (0x00400000)    /* osc opaque data */
+#define OBD_MD_FLCOOKIE (0x00800000)    /* log cancellation cookie */
 #define OBD_MD_FLNOTOBD (~(OBD_MD_FLOBDFLG | OBD_MD_FLBLOCKS | OBD_MD_LINKNAME|\
 #define OBD_MD_FLNOTOBD (~(OBD_MD_FLOBDFLG | OBD_MD_FLBLOCKS | OBD_MD_LINKNAME|\
-                           OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM))
+                           OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM|\
+                           OBD_MD_FLQOS | OBD_MD_FLOSCOPQ | OBD_MD_FLCOOKIE))
+
+static inline struct lustre_handle *obdo_handle(struct obdo *oa)
+{
+        return (struct lustre_handle *)oa->o_inline;
+}
+
+static inline struct llog_cookie *obdo_logcookie(struct obdo *oa)
+{
+        return (struct llog_cookie *)(oa->o_inline +
+                                      sizeof(struct lustre_handle));
+}
 
 struct obd_statfs {
         __u64           os_type;
 
 struct obd_statfs {
         __u64           os_type;
@@ -399,6 +419,8 @@ typedef enum {
         MDS_GETSTATUS    = 40,
         MDS_STATFS       = 41,
         MDS_GETLOVINFO   = 42,
         MDS_GETSTATUS    = 40,
         MDS_STATFS       = 41,
         MDS_GETLOVINFO   = 42,
+        MDS_PIN          = 43,
+        MDS_UNPIN        = 44,
         MDS_LAST_OPC
 } mds_cmd_t;
 #define MDS_FIRST_OPC    MDS_GETATTR
         MDS_LAST_OPC
 } mds_cmd_t;
 #define MDS_FIRST_OPC    MDS_GETATTR
@@ -417,12 +439,20 @@ typedef enum {
 #define REINT_OPEN     6
 #define REINT_MAX      6
 
 #define REINT_OPEN     6
 #define REINT_MAX      6
 
-#define IT_INTENT_EXEC   1
-#define IT_OPEN_LOOKUP  (1 << 1)
-#define IT_OPEN_NEG     (1 << 2)
-#define IT_OPEN_POS     (1 << 3)
-#define IT_OPEN_CREATE  (1 << 4)
-#define IT_OPEN_OPEN    (1 << 5)
+/* the disposition of the intent outlines what was executed */
+#define DISP_IT_EXECD   1
+#define DISP_LOOKUP_EXECD  (1 << 1)
+#define DISP_LOOKUP_NEG     (1 << 2)
+#define DISP_LOOKUP_POS     (1 << 3)
+#define DISP_OPEN_CREATE  (1 << 4)
+#define DISP_OPEN_OPEN    (1 << 5)
+#define DISP_ENQ_COMPLETE (1<<6)
+
+
+struct ll_uctxt {
+        __u32 gid1;
+        __u32 gid2;
+};
 
 struct ll_fid {
         __u64 id;
 
 struct ll_fid {
         __u64 id;
@@ -504,6 +534,11 @@ struct mds_rec_setattr {
         __u32           sa_suppgid;
 };
 
         __u32           sa_suppgid;
 };
 
+/* Remove this once we declare it in include/linux/fs.h (v21 kernel patch?) */
+#ifndef ATTR_CTIME_SET
+#define ATTR_CTIME_SET 0x2000
+#endif
+
 extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa);
 
 struct mds_rec_create {
 extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa);
 
 struct mds_rec_create {
@@ -720,9 +755,109 @@ struct ptlbd_rsp {
 extern void lustre_swab_ptlbd_rsp (struct ptlbd_rsp *r);
 
 /*
 extern void lustre_swab_ptlbd_rsp (struct ptlbd_rsp *r);
 
 /*
+ * Opcodes for management/monitoring node.
+ */
+#define MGMT_CONNECT    250
+#define MGMT_DISCONNECT 251
+#define MGMT_EXCEPTION  252 /* node died, etc. */
+
+/*
  * Opcodes for multiple servers.
  */
 
  * Opcodes for multiple servers.
  */
 
-#define OBD_PING 400
+#define OBD_PING       400
+#define OBD_LOG_CANCEL 401
+#define OBD_LAST_OPC  (OBD_LOG_CANCEL + 1)
+#define OBD_FIRST_OPC OBD_PING
+
+/* catalog of log objects */
+
+/* Identifier for a single log object */
+struct llog_logid {
+        __u64                   lgl_oid;
+        __u32                   lgl_ogen;
+};
+
+/* Log data record types - there is no specific reason that these need to
+ * be related to the RPC opcodes, but no reason not to (may be handy later?)
+ */
+typedef enum {
+        OST_CREATE_REC = 0x10600000 | (OST_CREATE << 8),
+        OST_ORPHAN_REC = 0x10600000 | (OST_DESTROY << 8),
+        MDS_UNLINK_REC = 0x10610000 | (MDS_REINT << 8) | REINT_UNLINK,
+        LLOG_CATALOG_MAGIC = 0x1062e67d,
+        LLOG_OBJECT_MAGIC = 0x10645539,
+} llog_op_type;
+
+/* Log record header - stored in originating host endian order (use magic to
+ * check order).
+ * Each record must start with this struct, end with a __u32 for the struct
+ * length, and be a multiple of 64 bits in size.
+ */
+struct llog_trans_hdr {
+        __u32                   lth_len;
+        __u32                   lth_type;
+};
+
+struct llog_create_rec {
+        struct llog_trans_hdr   lcr_hdr;
+        struct ll_fid           lcr_fid;
+        obd_id                  lcr_oid;
+        obd_count               lcr_ogen;
+        __u32                   lcr_end_len;
+} __attribute__((packed));
+
+struct llog_orphan_rec {
+        struct llog_trans_hdr   lor_hdr;
+        obd_id                  lor_oid;
+        obd_count               lor_ogen;
+        __u32                   lor_end_len;
+} __attribute__((packed));
+
+struct llog_unlink_rec {
+        struct llog_trans_hdr   lur_hdr;
+        obd_id                  lur_oid;
+        obd_count               lur_ogen;
+        __u32                   lur_end_len;
+} __attribute__((packed));
+
+/* On-disk header structure of each log object - stored in creating host
+ * endian order, with the exception of the bitmap - stored in little endian
+ * order so that we can use ext2_{clear,set,test}_bit() for proper/optimized
+ * little-endian handling of bitmaps (which are otherwise a pain to handle).
+ */
+#define LLOG_CHUNK_SIZE         4096
+#define LLOG_HEADER_SIZE        (96)
+#define LLOG_BITMAP_BYTES       (LLOG_CHUNK_SIZE - LLOG_HEADER_SIZE)
+
+#define LLOG_MIN_REC_SIZE       (16) /* round(struct llog_trans_hdr+end_len) */
+
+struct llog_object_hdr {
+        struct llog_trans_hdr   llh_hdr;
+        __u64                   llh_timestamp;
+        __u32                   llh_count;
+        __u16                   llh_bitmap_offset;
+        __u16                   llh_unused;
+        struct obd_uuid         llh_tgtuuid;
+        __u8                    llh_padding[3];
+        __u32                   llh_reserved[LLOG_HEADER_SIZE/sizeof(__u32)-17];
+        __u32                   llh_bitmap[LLOG_BITMAP_BYTES/sizeof(__u32)];
+        __u32                   llh_hdr_end_len;
+};
+
+static inline int llog_log_swabbed(struct llog_object_hdr *hdr)
+{
+        if (hdr->llh_hdr.lth_type == __swab32(LLOG_OBJECT_MAGIC))
+                return 1;
+        if (hdr->llh_hdr.lth_type == LLOG_OBJECT_MAGIC)
+                return 0;
+        return -1;
+}
+
+/* log cookies are used to reference a specific log file and a record therein */
+struct llog_cookie {
+        struct llog_logid       lgc_lgl;
+        __u32                   lgc_index;
+};
 
 #endif
 
 #endif
index b18e2d2..467132b 100644 (file)
@@ -80,7 +80,7 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id);
 
 int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf);
 int client_sanobd_setup(struct obd_device *obddev, obd_count len, void *buf);
 
 int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf);
 int client_sanobd_setup(struct obd_device *obddev, obd_count len, void *buf);
-int client_obd_cleanup(struct obd_device * obddev, int force, int failover);
+int client_obd_cleanup(struct obd_device * obddev, int flags);
 struct client_obd *client_conn2cli(struct lustre_handle *conn);
 struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid);
 
 struct client_obd *client_conn2cli(struct lustre_handle *conn);
 struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid);
 
@@ -89,13 +89,16 @@ struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid);
  * the server, we can just send the whole struct unaltered. */
 struct obd_client_handle {
         struct lustre_handle och_fh;
  * the server, we can just send the whole struct unaltered. */
 struct obd_client_handle {
         struct lustre_handle och_fh;
+        struct llog_cookie och_cookie;
         struct ptlrpc_request *och_req;
         __u32 och_magic;
 };
 #define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
 
 /* statfs_pack.c */
         struct ptlrpc_request *och_req;
         __u32 och_magic;
 };
 #define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
 
 /* statfs_pack.c */
-int obd_self_statfs(struct obd_device *dev, struct statfs *sfs);
+struct statfs;
+void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs);
+void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs);
 
 /* l_lock.c */
 struct lustre_lock {
 
 /* l_lock.c */
 struct lustre_lock {
index 81184e7..fa83fb2 100644 (file)
 
 #ifdef __KERNEL__
 
 
 #ifdef __KERNEL__
 
+#include <linux/version.h>
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <asm/statfs.h>
+#endif
+
 #include <linux/fs.h>
 #include <linux/fs.h>
+#include <linux/dcache.h>
 #include <linux/ext2_fs.h>
 #include <linux/proc_fs.h>
 
 #include <linux/ext2_fs.h>
 #include <linux/proc_fs.h>
 
@@ -46,20 +53,62 @@ struct lustre_intent_data {
         __u32 it_lock_mode;
 };
 
         __u32 it_lock_mode;
 };
 
+#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
+
+static inline struct lookup_intent *ll_nd2it(struct nameidata *nd)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+        return &nd->it;
+#else
+        return nd->it;
+#endif
+}
+
 struct ll_dentry_data {
 struct ll_dentry_data {
-        struct semaphore      lld_it_sem;
+        int                      lld_cwd_count;
+        int                      lld_mnt_count;
+        struct obd_client_handle lld_cwd_och;
+        struct obd_client_handle lld_mnt_och;
 };
 
 };
 
-#define ll_d2d(dentry) ((struct ll_dentry_data*) dentry->d_fsdata)
+#define ll_d2d(de) ((struct ll_dentry_data*) de->d_fsdata)
 
 extern struct file_operations ll_pgcache_seq_fops;
 
 
 extern struct file_operations ll_pgcache_seq_fops;
 
+/* 
+ * XXX used in obdecho/echo_client.c  must move (pjb)
+ *'p' list as its a list of pages linked together
+ * by ->private.. 
+ */
+struct plist {
+        struct page *pl_head;
+        struct page *pl_tail;
+        int pl_num;
+};
+
+struct ll_dirty_offsets {
+        rb_root_t       do_root;
+        spinlock_t      do_lock;
+        unsigned long   do_num_dirty;
+};
+
+struct ll_writeback_pages {
+        obd_count npgs, max;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+        int rw;
+        struct inode *inode;
+        struct brw_page pga[0];
+#else
+        struct brw_page *pga;
+#endif
+};
+
 struct ll_inode_info {
         struct lov_stripe_md   *lli_smd;
         char                   *lli_symlink_name;
         struct semaphore        lli_open_sem;
         struct list_head        lli_read_extents;
 struct ll_inode_info {
         struct lov_stripe_md   *lli_smd;
         char                   *lli_symlink_name;
         struct semaphore        lli_open_sem;
         struct list_head        lli_read_extents;
-        loff_t                  lli_maxbytes;
+        __u64                   lli_maxbytes;
         spinlock_t              lli_read_extent_lock;
         unsigned long           lli_flags;
 #define LLI_F_HAVE_SIZE_LOCK    0
         spinlock_t              lli_read_extent_lock;
         unsigned long           lli_flags;
 #define LLI_F_HAVE_SIZE_LOCK    0
@@ -81,13 +130,6 @@ struct ll_read_extent {
         struct ldlm_extent re_extent;
 };
 
         struct ldlm_extent re_extent;
 };
 
-int ll_check_dirty( struct super_block *sb );
-int ll_batch_writepage( struct inode *inode, struct page *page );
-
-/* interpet return codes from intent lookup */
-#define LL_LOOKUP_POSITIVE 1
-#define LL_LOOKUP_NEGATIVE 2
-
 #define LL_SUPER_MAGIC 0x0BD00BD0
 
 #define LL_COMMITCBD_STOPPING  0x1
 #define LL_SUPER_MAGIC 0x0BD00BD0
 
 #define LL_COMMITCBD_STOPPING  0x1
@@ -118,14 +160,22 @@ struct ll_sb_info {
         struct lprocfs_stats     *ll_stats; /* lprocfs stats counter */
 };
 
         struct lprocfs_stats     *ll_stats; /* lprocfs stats counter */
 };
 
-static inline struct ll_sb_info *ll_s2sbi(struct super_block *sb)
-{
+
 #if  (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #if  (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-        return (struct ll_sb_info *)(sb->s_fs_info);
-#else
-        return (struct ll_sb_info *)(sb->u.generic_sbp);
-#endif
+#define    ll_s2sbi(sb)     ((struct ll_sb_info *)((sb)->s_fs_info))
+void __d_rehash(struct dentry * entry, int lock);
+static inline __u64 ll_ts2u64(struct timespec *time)
+{
+        __u64 t = time->tv_sec;
+        return t;
+}
+#else  /* 2.4 here */
+#define    ll_s2sbi(sb)     ((struct ll_sb_info *)((sb)->u.generic_sbp))
+static inline __u64 ll_ts2u64(time_t *time)
+{
+        return *time;
 }
 }
+#endif 
 
 static inline struct lustre_handle *ll_s2obdconn(struct super_block *sb)
 {
 
 static inline struct lustre_handle *ll_s2obdconn(struct super_block *sb)
 {
@@ -146,29 +196,6 @@ static inline struct ll_sb_info *ll_i2sbi(struct inode *inode)
         return ll_s2sbi(inode->i_sb);
 }
 
         return ll_s2sbi(inode->i_sb);
 }
 
-static inline void d_unhash_aliases(struct inode *inode)
-{
-        struct dentry *dentry = NULL;
-        struct list_head *tmp;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        ENTRY;
-
-        CDEBUG(D_INODE, "marking dentries for ino %lx/%x invalid\n",
-               inode->i_ino, inode->i_generation);
-
-        spin_lock(&dcache_lock);
-        list_for_each(tmp, &inode->i_dentry) {
-                dentry = list_entry(tmp, struct dentry, d_alias);
-
-                list_del_init(&dentry->d_hash);
-                dentry->d_flags |= DCACHE_LUSTRE_INVALID;
-                list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
-        }
-
-        spin_unlock(&dcache_lock);
-        EXIT;
-}
-
 // FIXME: replace the name of this with LL_I to conform to kernel stuff
 // static inline struct ll_inode_info *LL_I(struct inode *inode)
 static inline struct ll_inode_info *ll_i2info(struct inode *inode)
 // FIXME: replace the name of this with LL_I to conform to kernel stuff
 // static inline struct ll_inode_info *LL_I(struct inode *inode)
 static inline struct ll_inode_info *ll_i2info(struct inode *inode)
@@ -199,21 +226,17 @@ static inline int ll_mds_max_easize(struct super_block *sb)
         return sbi2mdc(ll_s2sbi(sb))->cl_max_mds_easize;
 }
 
         return sbi2mdc(ll_s2sbi(sb))->cl_max_mds_easize;
 }
 
-static inline loff_t ll_file_maxbytes(struct inode *inode)
+static inline __u64 ll_file_maxbytes(struct inode *inode)
 {
         return ll_i2info(inode)->lli_maxbytes;
 }
 
 /* namei.c */
 {
         return ll_i2info(inode)->lli_maxbytes;
 }
 
 /* namei.c */
-int ll_lock(struct inode *dir, struct dentry *dentry,
-            struct lookup_intent *it, struct lustre_handle *lockh);
-int ll_unlock(__u32 mode, struct lustre_handle *lockh);
-
-typedef int (*intent_finish_cb)(int flag, struct ptlrpc_request *,
+typedef int (*intent_finish_cb)(struct ptlrpc_request *,
                                 struct inode *parent, struct dentry **, 
                                 struct lookup_intent *, int offset, obd_id ino);
 int ll_intent_lock(struct inode *parent, struct dentry **,
                                 struct inode *parent, struct dentry **, 
                                 struct lookup_intent *, int offset, obd_id ino);
 int ll_intent_lock(struct inode *parent, struct dentry **,
-                   struct lookup_intent *, intent_finish_cb);
+                   struct lookup_intent *, int, intent_finish_cb);
 int ll_mdc_blocking_ast(struct ldlm_lock *lock,
                         struct ldlm_lock_desc *desc,
                         void *data, int flag);
 int ll_mdc_blocking_ast(struct ldlm_lock *lock,
                         struct ldlm_lock_desc *desc,
                         void *data, int flag);
@@ -222,51 +245,7 @@ void ll_prepare_mdc_op_data(struct mdc_op_data *data,
                             struct inode *i1, struct inode *i2,
                             const char *name, int namelen, int mode);
 
                             struct inode *i1, struct inode *i2,
                             const char *name, int namelen, int mode);
 
-/* dcache.c */
-void ll_intent_release(struct dentry *, struct lookup_intent *);
-
-/****
-
-I originally implmented these as functions, then realized a macro
-would be more helpful for debugging, so the CDEBUG messages show
-the current calling function.  The orignal functions are in llite/dcache.c
-
-int ll_save_intent(struct dentry * de, struct lookup_intent * it);
-struct lookup_intent * ll_get_intent(struct dentry * de);
-****/
-
-#define IT_RELEASED_MAGIC 0xDEADCAFE
-
-#define LL_SAVE_INTENT(de, it)                                                 \
-do {                                                                           \
-        LASSERT(ll_d2d(de) != NULL);                                           \
-                                                                               \
-        down(&ll_d2d(de)->lld_it_sem);                                         \
-        LASSERT(de->d_it == NULL);                                             \
-        de->d_it = it;                                                         \
-        CDEBUG(D_DENTRY,                                                       \
-               "D_IT DOWN dentry %p fsdata %p intent: %p %s sem %d\n",         \
-               de, ll_d2d(de), de->d_it, ldlm_it2str(de->d_it->it_op),         \
-               atomic_read(&(ll_d2d(de)->lld_it_sem.count)));                  \
-} while(0)
-
-#define LL_GET_INTENT(de, it)                                                  \
-do {                                                                           \
-        it = de->d_it;                                                         \
-                                                                               \
-        LASSERT(ll_d2d(de) != NULL);                                           \
-        LASSERT(it);                                                           \
-        LASSERT(it->it_op != IT_RELEASED_MAGIC);                               \
-                                                                               \
-        CDEBUG(D_DENTRY, "D_IT UP dentry %p fsdata %p intent: %p %s\n",        \
-               de, ll_d2d(de), de->d_it, ldlm_it2str(de->d_it->it_op));        \
-        de->d_it = NULL;                                                       \
-        it->it_op = IT_RELEASED_MAGIC;                                         \
-        up(&ll_d2d(de)->lld_it_sem);                                           \
-} while(0)
-
-#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
-
+/* lprocfs.c */
 enum {
          LPROC_LL_DIRTY_HITS = 0,
          LPROC_LL_DIRTY_MISSES,
 enum {
          LPROC_LL_DIRTY_HITS = 0,
          LPROC_LL_DIRTY_MISSES,
@@ -312,8 +291,6 @@ extern struct file_operations ll_file_operations;
 extern struct inode_operations ll_file_inode_operations;
 extern struct inode_operations ll_special_inode_operations;
 struct ldlm_lock;
 extern struct inode_operations ll_file_inode_operations;
 extern struct inode_operations ll_special_inode_operations;
 struct ldlm_lock;
-int ll_extent_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *,
-                            void *data, int flag);
 int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
                    struct lov_stripe_md *lsm, int mode,
                    struct ldlm_extent *extent, struct lustre_handle *lockh);
 int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
                    struct lov_stripe_md *lsm, int mode,
                    struct ldlm_extent *extent, struct lustre_handle *lockh);
@@ -329,30 +306,22 @@ int ll_file_open(struct inode *inode, struct file *file);
 int ll_file_release(struct inode *inode, struct file *file);
 
 
 int ll_file_release(struct inode *inode, struct file *file);
 
 
-/* rw.c */
-struct page *ll_getpage(struct inode *inode, unsigned long offset,
-                        int create, int locked);
-void ll_truncate(struct inode *inode);
 
 /* super.c */
 void ll_update_inode(struct inode *, struct mds_body *, struct lov_stripe_md *);
 int ll_setattr_raw(struct inode *inode, struct iattr *attr);
 
 /* super.c */
 void ll_update_inode(struct inode *, struct mds_body *, struct lov_stripe_md *);
 int ll_setattr_raw(struct inode *inode, struct iattr *attr);
+int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
+                       unsigned long maxage);
 
 /* symlink.c */
 extern struct inode_operations ll_fast_symlink_inode_operations;
 extern struct inode_operations ll_symlink_inode_operations;
 
 
 /* symlink.c */
 extern struct inode_operations ll_fast_symlink_inode_operations;
 extern struct inode_operations ll_symlink_inode_operations;
 
-/* sysctl.c */
-void ll_sysctl_init(void);
-void ll_sysctl_clean(void);
-
 #else
 #include <linux/lustre_idl.h>
 #endif /* __KERNEL__ */
 
 #else
 #include <linux/lustre_idl.h>
 #endif /* __KERNEL__ */
 
-static inline void ll_ino2fid(struct ll_fid *fid,
-                              obd_id ino,
-                              __u32 generation,
+static inline void ll_ino2fid(struct ll_fid *fid, obd_id ino, __u32 generation,
                               int type)
 {
         fid->id = ino;
                               int type)
 {
         fid->id = ino;
@@ -360,11 +329,6 @@ static inline void ll_ino2fid(struct ll_fid *fid,
         fid->f_type = type;
 }
 
         fid->f_type = type;
 }
 
-struct ll_read_inode2_cookie {
-        struct mds_body      *lic_body;
-        struct lov_stripe_md *lic_lsm;
-};
-
 #include <asm/types.h>
 
 #define LL_IOC_GETFLAGS                 _IOR ('f', 151, long)
 #include <asm/types.h>
 
 #define LL_IOC_GETFLAGS                 _IOR ('f', 151, long)
index 683d78d..e7ee6f0 100644 (file)
@@ -35,6 +35,8 @@
 #include <linux/lustre_idl.h>
 #include <linux/lustre_lib.h>
 #include <linux/lustre_dlm.h>
 #include <linux/lustre_idl.h>
 #include <linux/lustre_lib.h>
 #include <linux/lustre_dlm.h>
+#include <linux/lustre_log.h>
+#include <linux/lustre_export.h>
 
 struct ldlm_lock_desc;
 struct mds_obd;
 
 struct ldlm_lock_desc;
 struct mds_obd;
@@ -49,6 +51,11 @@ struct ll_file_data;
 #define LUSTRE_MDT_NAME "mdt"
 #define LUSTRE_MDC_NAME "mdc"
 
 #define LUSTRE_MDT_NAME "mdt"
 #define LUSTRE_MDC_NAME "mdc"
 
+struct lustre_md {
+        struct mds_body *body;
+        struct lov_stripe_md *lsm;
+};
+
 struct mdc_rpc_lock {
         struct semaphore rpcl_sem;
         struct lookup_intent *rpcl_it;
 struct mdc_rpc_lock {
         struct semaphore rpcl_sem;
         struct lookup_intent *rpcl_it;
@@ -144,6 +151,8 @@ struct mds_update_record {
         char *ur_tgt;
         int ur_eadatalen;
         void *ur_eadata;
         char *ur_tgt;
         int ur_eadatalen;
         void *ur_eadata;
+        int ur_cookielen;
+        struct llog_cookie *ur_logcookies;
         struct iattr ur_iattr;
         struct obd_ucred ur_uc;
         __u64 ur_rdev;
         struct iattr ur_iattr;
         struct obd_ucred ur_uc;
         __u64 ur_rdev;
@@ -160,8 +169,31 @@ struct mds_update_record {
 #define ur_suppgid1 ur_uc.ouc_suppgid1
 #define ur_suppgid2 ur_uc.ouc_suppgid2
 
 #define ur_suppgid1 ur_uc.ouc_suppgid1
 #define ur_suppgid2 ur_uc.ouc_suppgid2
 
-#define MDS_LR_CLIENT  8192
-#define MDS_LR_SIZE     128
+/* i_attr_flags holds the open count in the inode in 2.4 */
+//Alex implement on 2.4 with i_attr_flags and find soln for 2.5 please
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+# define mds_open_orphan_count(inode)   (0)
+# define mds_open_orphan_inc(inode)  do { } while (0);
+# define mds_open_orphan_dec_test(inode)  (0)
+#else
+# define mds_inode_oatomic(inode)    ((atomic_t *)&(inode)->i_attr_flags)
+# define mds_open_orphan_count(inode)                          \
+  atomic_read(mds_inode_oatomic(inode))
+# define mds_open_orphan_inc(inode)                            \
+  atomic_inc(mds_inode_oatomic(inode))
+# define mds_open_orphan_dec_test(inode)                       \
+  atomic_dec_and_test(mds_inode_oatomic(inode))
+#endif
+#define mds_inode_is_orphan(inode)  ((inode)->i_flags & 0x4000000)
+#define mds_inode_set_orphan(inode) (inode)->i_flags |= 0x4000000
+
+#define MDS_LR_SERVER_SIZE    512
+
+#define MDS_LR_CLIENT_START  8192
+#define MDS_LR_CLIENT_SIZE    128
+#if MDS_LR_CLIENT_START < MDS_LR_SERVER_SIZE
+#error "Can't have MDS_LR_CLIENT_START < MDS_LR_SERVER_SIZE"
+#endif
 
 #define MDS_CLIENT_SLOTS 17
 
 
 #define MDS_CLIENT_SLOTS 17
 
@@ -169,11 +201,24 @@ struct mds_update_record {
 
 /* Data stored per server at the head of the last_rcvd file.  In le32 order. */
 struct mds_server_data {
 
 /* Data stored per server at the head of the last_rcvd file.  In le32 order. */
 struct mds_server_data {
-        __u8 msd_uuid[37];      /* server UUID */
-        __u8 uuid_padding[3];   /* unused */
-        __u64 msd_last_transno; /* last completed transaction ID */
-        __u64 msd_mount_count;  /* MDS incarnation number */
-        __u8 padding[512 - 56];
+        __u8  msd_uuid[37];        /* server UUID */
+        __u8  uuid_padding[3];     /* unused */
+//      __u64 msd_last_objid;      /* last created object ID */
+        __u64 msd_last_transno;    /* last completed transaction ID */
+        __u64 msd_mount_count;     /* MDS incarnation number */
+        __u64 msd_padding_until_last_objid_is_enabled;
+        __u32 msd_feature_compat;  /* compatible feature flags */
+        __u32 msd_feature_rocompat;/* read-only compatible feature flags */
+        __u32 msd_feature_incompat;/* incompatible feature flags */
+        __u32 msd_server_size;     /* size of server data area */
+        __u32 msd_client_start;    /* start of per-client data area */
+        __u16 msd_client_size;     /* size of per-client data area */
+        __u16 msd_subdir_count;    /* number of subdirectories for objects */
+        __u64 msd_catalog_oid;     /* recovery catalog object id */
+        __u32 msd_catalog_ogen;    /* recovery catalog inode generation */
+        __u8  msd_peeruuid[37];    /* UUID of LOV/OSC associated with MDS */
+        __u8  peer_padding[3];     /* unused */
+        __u8  msd_padding[MDS_LR_SERVER_SIZE - 140];
 };
 
 /* Data stored per client in the last_rcvd file.  In le32 order. */
 };
 
 /* Data stored per client in the last_rcvd file.  In le32 order. */
@@ -185,7 +230,7 @@ struct mds_client_data {
         __u64 mcd_last_xid;     /* xid for the last transaction */
         __u32 mcd_last_result;  /* result from last RPC */
         __u32 mcd_last_data;    /* per-op data (disposition for open &c.) */
         __u64 mcd_last_xid;     /* xid for the last transaction */
         __u32 mcd_last_result;  /* result from last RPC */
         __u32 mcd_last_data;    /* per-op data (disposition for open &c.) */
-        __u8 padding[MDS_LR_SIZE - 74];
+        __u8 mcd_padding[MDS_LR_CLIENT_SIZE - 72];
 };
 
 /* file data for open files on MDS */
 };
 
 /* file data for open files on MDS */
@@ -202,10 +247,6 @@ struct mds_file_data {
 int mds_reint_rec(struct mds_update_record *r, int offset,
                   struct ptlrpc_request *req, struct lustre_handle *);
 
 int mds_reint_rec(struct mds_update_record *r, int offset,
                   struct ptlrpc_request *req, struct lustre_handle *);
 
-/* mds/mds_open.c */
-int mds_open(struct mds_update_record *rec, int offset,
-             struct ptlrpc_request *req, struct lustre_handle *);
-
 /* mds/handler.c */
 #ifdef __KERNEL__
 struct dentry *mds_name2locked_dentry(struct obd_device *, struct dentry *dir,
 /* mds/handler.c */
 #ifdef __KERNEL__
 struct dentry *mds_name2locked_dentry(struct obd_device *, struct dentry *dir,
@@ -223,13 +264,22 @@ int mds_pack_md(struct obd_device *mds, struct lustre_msg *msg,
                 int offset, struct mds_body *body, struct inode *inode);
 void mds_steal_ack_locks(struct obd_export *exp,
                          struct ptlrpc_request *req);
                 int offset, struct mds_body *body, struct inode *inode);
 void mds_steal_ack_locks(struct obd_export *exp,
                          struct ptlrpc_request *req);
+int mds_update_server_data(struct obd_device *);
 
 /* mds/mds_fs.c */
 int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt);
 int mds_fs_cleanup(struct obd_device *obddev, int failover);
 #endif
 
 
 /* mds/mds_fs.c */
 int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt);
 int mds_fs_cleanup(struct obd_device *obddev, int failover);
 #endif
 
+/* mds/mds_lov.c */
+extern int mds_get_lovtgts(struct mds_obd *obd, int tgt_count,
+                           struct obd_uuid *uuidarray);
+extern int mds_get_lovdesc(struct mds_obd  *obd, struct lov_desc *desc);
+
 /* mdc/mdc_request.c */
 /* mdc/mdc_request.c */
+int mdc_req2lustre_md(struct ptlrpc_request *req, int offset,
+                      struct lustre_handle *obd_import,
+                      struct lustre_md *md);
 int mdc_enqueue(struct lustre_handle *conn, int lock_type,
                 struct lookup_intent *it, int lock_mode,
                 struct mdc_op_data *enq_data,
 int mdc_enqueue(struct lustre_handle *conn, int lock_type,
                 struct lookup_intent *it, int lock_mode,
                 struct mdc_op_data *enq_data,
@@ -248,7 +298,7 @@ int mdc_getattr_name(struct lustre_handle *conn, struct ll_fid *fid,
                      unsigned int ea_size, struct ptlrpc_request **request);
 int mdc_setattr(struct lustre_handle *conn,
                 struct mdc_op_data *data,
                      unsigned int ea_size, struct ptlrpc_request **request);
 int mdc_setattr(struct lustre_handle *conn,
                 struct mdc_op_data *data,
-                struct iattr *iattr, void *ea, int ealen,
+                struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
                 struct ptlrpc_request **request);
 int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
              struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh,
                 struct ptlrpc_request **request);
 int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
              struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh,
index ac87d7f..bc70b9a 100644 (file)
 /* OST_MAXREQSIZE ~= 1640 bytes =
  * lustre_msg + obdo + 16 * obd_ioobj + 64 * niobuf_remote
  *
 /* OST_MAXREQSIZE ~= 1640 bytes =
  * lustre_msg + obdo + 16 * obd_ioobj + 64 * niobuf_remote
  *
- * single object with 16 pages is 512 bytes
+ * - single object with 16 pages is 512 bytes
+ * - OST_MAXREQSIZE must be at least 1 page of cookies plus some spillover
  */
  */
-#define OST_MAXREQSIZE  (2 * 1024)
+#define OST_MAXREQSIZE  (5 * 1024)
 
 #define PTLBD_NUM_THREADS        4
 #define PTLBD_NEVENTS    1024
 
 #define PTLBD_NUM_THREADS        4
 #define PTLBD_NEVENTS    1024
@@ -188,15 +189,19 @@ union ptlrpc_async_args {
          * big enough.  For _tons_ of context, OBD_ALLOC a struct and store
          * a pointer to it here.  The pointer_arg ensures this struct is at
          * least big enough for that. */
          * big enough.  For _tons_ of context, OBD_ALLOC a struct and store
          * a pointer to it here.  The pointer_arg ensures this struct is at
          * least big enough for that. */
-        void      *pointer_arg[4];
+        void      *pointer_arg[5];
         __u64      space[4];
 };
 
         __u64      space[4];
 };
 
+struct ptlrpc_request_set;
+typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int);
+
 struct ptlrpc_request_set {
         int               set_remaining; /* # uncompleted requests */
         wait_queue_head_t set_waitq;
 struct ptlrpc_request_set {
         int               set_remaining; /* # uncompleted requests */
         wait_queue_head_t set_waitq;
+        wait_queue_head_t *set_wakeup_ptr;
         struct list_head  set_requests;
         struct list_head  set_requests;
-        void             *set_interpret; /* completion callback */
+        set_interpreter_func    set_interpret; /* completion callback */
         union ptlrpc_async_args set_args; /* completion context */
 };
 
         union ptlrpc_async_args set_args; /* completion context */
 };
 
index ba848a9..f30cbb2 100644 (file)
 #ifndef __OBD_H
 #define __OBD_H
 
 #ifndef __OBD_H
 #define __OBD_H
 
+#define IOC_OSC_TYPE         'h'
+#define IOC_OSC_MIN_NR       20
+#define IOC_OSC_REGISTER_LOV _IOWR(IOC_OSC_TYPE, 20, struct obd_device *)
+#define IOC_OSC_SET_ACTIVE   _IOWR(IOC_OSC_TYPE, 21, struct obd_device *)
+#define IOC_OSC_MAX_NR       50
+
+#define IOC_MDC_TYPE         'i'
+#define IOC_MDC_MIN_NR       20
+#define IOC_MDC_LOOKUP       _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
+#define IOC_MDC_GETSTRIPE    _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *)
+#define IOC_MDC_MAX_NR       50
+
+#ifdef __KERNEL__
+# include <linux/fs.h>
+# include <linux/list.h>
+# include <linux/sched.h> /* for struct task_struct, for current.h */
+# include <asm/current.h> /* for smp_lock.h */
+# include <linux/smp_lock.h>
+# include <linux/proc_fs.h>
+# include <linux/mount.h>
+#endif
+
+#include <linux/lustre_lib.h>
+#include <linux/lustre_idl.h>
+#include <linux/lustre_export.h>
 #include <linux/lustre_otree.h>
 
 struct lov_oinfo { /* per-child structure */
 #include <linux/lustre_otree.h>
 
 struct lov_oinfo { /* per-child structure */
@@ -34,31 +59,6 @@ struct lov_stripe_md {
         struct lov_oinfo lsm_oinfo[0];
 };
 
         struct lov_oinfo lsm_oinfo[0];
 };
 
-#define IOC_OSC_TYPE         'h'
-#define IOC_OSC_MIN_NR       20
-#define IOC_OSC_REGISTER_LOV _IOWR(IOC_OSC_TYPE, 20, struct obd_device *)
-#define IOC_OSC_SET_ACTIVE   _IOWR(IOC_OSC_TYPE, 21, struct obd_device *)
-#define IOC_OSC_MAX_NR       50
-
-#define IOC_MDC_TYPE         'i'
-#define IOC_MDC_MIN_NR       20
-#define IOC_MDC_LOOKUP       _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
-#define IOC_MDC_MAX_NR       50
-
-#ifdef __KERNEL__
-# include <linux/fs.h>
-# include <linux/list.h>
-# include <linux/sched.h> /* for struct task_struct, for current.h */
-# include <asm/current.h> /* for smp_lock.h */
-# include <linux/smp_lock.h>
-# include <linux/proc_fs.h>
-
-# include <linux/lustre_lib.h>
-# include <linux/lustre_idl.h>
-# include <linux/lustre_mds.h>
-# include <linux/lustre_export.h>
-#endif
-
 struct obd_type {
         struct list_head typ_chain;
         struct obd_ops *typ_ops;
 struct obd_type {
         struct list_head typ_chain;
         struct obd_ops *typ_ops;
@@ -80,7 +80,7 @@ struct ost_server_data;
 
 struct filter_obd {
         const char          *fo_fstype;
 
 struct filter_obd {
         const char          *fo_fstype;
-        char *fo_nspath;
+        char                *fo_nspath;
         struct super_block  *fo_sb;
         struct vfsmount     *fo_vfsmnt;
         struct obd_run_ctxt  fo_ctxt;
         struct super_block  *fo_sb;
         struct vfsmount     *fo_vfsmnt;
         struct obd_run_ctxt  fo_ctxt;
@@ -103,28 +103,57 @@ struct filter_obd {
         spinlock_t           fo_grant_lock;       /* protects tot_granted */
         obd_size             fo_tot_granted;
         obd_size             fo_tot_cached;
         spinlock_t           fo_grant_lock;       /* protects tot_granted */
         obd_size             fo_tot_granted;
         obd_size             fo_tot_cached;
+
+        struct llog_handle  *fo_catalog;
+        struct obd_import   *fo_mdc_imp;
+        struct obd_uuid      fo_mdc_uuid;
+        struct lustre_handle fo_mdc_conn;
+        struct ptlrpc_client fo_mdc_client;
+        struct llog_commit_data *fo_llcd;
+        struct semaphore     fo_sem; /* protects fo_llcd */
 };
 
 struct mds_server_data;
 
 struct client_obd {
 };
 
 struct mds_server_data;
 
 struct client_obd {
-        struct obd_import   *cl_import;
-        struct semaphore     cl_sem;
-        int                  cl_conn_count;
+        struct obd_import       *cl_import;
+        struct semaphore         cl_sem;
+        int                      cl_conn_count;
         /* max_mds_easize is purely a performance thing so we don't have to
          * call obd_size_wiremd() all the time. */
         /* max_mds_easize is purely a performance thing so we don't have to
          * call obd_size_wiremd() all the time. */
-        int                  cl_max_mds_easize;
-        struct obd_device   *cl_containing_lov;
-        kdev_t               cl_sandev;
-        struct semaphore     cl_dirty_sem;
-        obd_size             cl_dirty;  /* both in bytes */
-        obd_size             cl_dirty_granted;
-        /* this is just to keep existing infinitely caching behaviour between 
-         * clients and OSTs that don't have the grant code in yet.. it can 
+        int                      cl_max_mds_easize;
+        int                      cl_max_mds_cookiesize;
+        /* XXX can we replace cl_containing_lov with mgmt-events? */
+        struct obd_device       *cl_containing_lov;
+        kdev_t                   cl_sandev;
+
+        struct llog_commit_data *cl_llcd;
+        void                    *cl_llcd_offset;
+
+        struct semaphore         cl_dirty_sem;
+        obd_size                 cl_dirty;  /* both in bytes */
+        obd_size                 cl_dirty_granted;
+
+        struct obd_device       *cl_mgmtcli_obd;
+
+        /* this is just to keep existing infinitely caching behaviour between
+         * clients and OSTs that don't have the grant code in yet.. it can
          * be yanked once everything speaks grants */
          * be yanked once everything speaks grants */
-        char                 cl_ost_can_grant;
+        char                     cl_ost_can_grant;
 };
 
 };
 
+/* Like a client, with some hangers-on.  Keep mc_client_obd first so that we
+ * can reuse the various client setup/connect functions. */
+struct mgmtcli_obd {
+        struct client_obd        mc_client_obd; /* nested */
+        struct ptlrpc_thread    *mc_ping_thread;
+        struct lustre_handle     mc_ping_handle; /* XXX single-target */
+        struct list_head         mc_registered;
+        void                    *mc_hammer;
+};
+
+#define mc_import mc_client_obd.cl_import
+
 struct mds_obd {
         struct ptlrpc_service           *mds_service;
         struct ptlrpc_service           *mds_setattr_service;
 struct mds_obd {
         struct ptlrpc_service           *mds_service;
         struct ptlrpc_service           *mds_setattr_service;
@@ -139,12 +168,20 @@ struct mds_obd {
         struct address_space_operations *mds_aops;
 
         int                              mds_max_mdsize;
         struct address_space_operations *mds_aops;
 
         int                              mds_max_mdsize;
+        int                              mds_max_cookiesize;
         struct file                     *mds_rcvd_filp;
         spinlock_t                       mds_transno_lock;
         __u64                            mds_last_transno;
         __u64                            mds_mount_count;
         struct ll_fid                    mds_rootfid;
         struct mds_server_data          *mds_server_data;
         struct file                     *mds_rcvd_filp;
         spinlock_t                       mds_transno_lock;
         __u64                            mds_last_transno;
         __u64                            mds_mount_count;
         struct ll_fid                    mds_rootfid;
         struct mds_server_data          *mds_server_data;
+        struct dentry                   *mds_pending_dir;
+        struct dentry                   *mds_logs_dir;
+
+        struct llog_handle              *mds_catalog;
+        struct obd_device               *mds_osc_obd;
+        struct obd_uuid                  mds_osc_uuid;
+        struct lustre_handle             mds_osc_conn;
 
         int                              mds_has_lov_desc;
         struct lov_desc                  mds_lov_desc;
 
         int                              mds_has_lov_desc;
         struct lov_desc                  mds_lov_desc;
@@ -159,7 +196,6 @@ struct ldlm_obd {
 };
 
 struct echo_obd {
 };
 
 struct echo_obd {
-        char *eo_fstype;
         struct obdo oa;
         spinlock_t eo_lock;
         __u64 eo_lastino;
         struct obdo oa;
         spinlock_t eo_lock;
         __u64 eo_lastino;
@@ -221,6 +257,7 @@ struct cache_obd {
 struct lov_tgt_desc {
         struct obd_uuid uuid;
         struct lustre_handle conn;
 struct lov_tgt_desc {
         struct obd_uuid uuid;
         struct lustre_handle conn;
+        struct llog_handle *ltd_cathandle;
         int active; /* is this target available for requests, etc */
 };
 
         int active; /* is this target available for requests, etc */
 };
 
@@ -230,6 +267,7 @@ struct lov_obd {
         struct lov_desc desc;
         int bufsize;
         int refcount;
         struct lov_desc desc;
         int bufsize;
         int refcount;
+        int lo_catalog_loaded:1;
         struct lov_tgt_desc *tgts;
 };
 
         struct lov_tgt_desc *tgts;
 };
 
@@ -247,14 +285,46 @@ struct niobuf_local {
 #define N_LOCAL_TEMP_PAGE 0x10000000
 
 struct obd_trans_info {
 #define N_LOCAL_TEMP_PAGE 0x10000000
 
 struct obd_trans_info {
-        __u64     oti_transno;
+        __u64                   oti_transno;
         /* Only used on the server side for tracking acks. */
         struct oti_req_ack_lock {
                 struct lustre_handle lock;
                 __u32                mode;
         } oti_ack_locks[4];
         /* Only used on the server side for tracking acks. */
         struct oti_req_ack_lock {
                 struct lustre_handle lock;
                 __u32                mode;
         } oti_ack_locks[4];
+        void                    *oti_handle;
+        struct llog_cookie       oti_onecookie;
+        struct llog_cookie      *oti_logcookies;
+        int                      oti_numcookies;
 };
 
 };
 
+static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies)
+{
+        if (!oti)
+                return;
+
+        if (num_cookies == 1)
+                oti->oti_logcookies = &oti->oti_onecookie;
+        else
+                OBD_ALLOC(oti->oti_logcookies,
+                          num_cookies * sizeof(oti->oti_onecookie));
+
+        oti->oti_numcookies = num_cookies;
+}
+
+static inline void oti_free_cookies(struct obd_trans_info *oti)
+{
+        if (!oti || !oti->oti_logcookies)
+                return;
+
+        if (oti->oti_logcookies == &oti->oti_onecookie)
+                LASSERT(oti->oti_numcookies == 1);
+        else
+                OBD_FREE(oti->oti_logcookies,
+                         oti->oti_numcookies * sizeof(oti->oti_onecookie));
+        oti->oti_logcookies = NULL;
+        oti->oti_numcookies = 0;
+}
+
 /* corresponds to one of the obd's */
 struct obd_device {
         struct obd_type *obd_type;
 /* corresponds to one of the obd's */
 struct obd_device {
         struct obd_type *obd_type;
@@ -276,9 +346,11 @@ struct obd_device {
         struct ldlm_namespace *obd_namespace;
         struct ptlrpc_client   obd_ldlm_client; /* XXX OST/MDS only */
         /* a spinlock is OK for what we do now, may need a semaphore later */
         struct ldlm_namespace *obd_namespace;
         struct ptlrpc_client   obd_ldlm_client; /* XXX OST/MDS only */
         /* a spinlock is OK for what we do now, may need a semaphore later */
-        spinlock_t obd_dev_lock;
+        spinlock_t             obd_dev_lock;
         __u64                  obd_last_committed;
         struct fsfilt_operations *obd_fsops;
         __u64                  obd_last_committed;
         struct fsfilt_operations *obd_fsops;
+        struct obd_statfs      obd_osfs;
+        unsigned long          obd_osfs_age;
 
         /* XXX encapsulate all this recovery data into one struct */
         svc_handler_t                    obd_recovery_handler;
 
         /* XXX encapsulate all this recovery data into one struct */
         svc_handler_t                    obd_recovery_handler;
@@ -297,19 +369,25 @@ struct obd_device {
                 struct mds_obd mds;
                 struct client_obd cli;
                 struct ost_obd ost;
                 struct mds_obd mds;
                 struct client_obd cli;
                 struct ost_obd ost;
-                struct echo_client_obd echo_client;;
+                struct echo_client_obd echo_client;
                 struct ldlm_obd ldlm;
                 struct echo_obd echo;
                 struct recovd_obd recovd;
                 struct lov_obd lov;
                 struct cache_obd cobd;
                 struct ptlbd_obd ptlbd;
                 struct ldlm_obd ldlm;
                 struct echo_obd echo;
                 struct recovd_obd recovd;
                 struct lov_obd lov;
                 struct cache_obd cobd;
                 struct ptlbd_obd ptlbd;
+                struct mgmtcli_obd mgmtcli;
         } u;
        /* Fields used by LProcFS */
         unsigned int           obd_cntr_base;
         struct lprocfs_stats  *obd_stats;
 };
 
         } u;
        /* Fields used by LProcFS */
         unsigned int           obd_cntr_base;
         struct lprocfs_stats  *obd_stats;
 };
 
+#define OBD_OPT_FORCE           0x0001
+#define OBD_OPT_FAILOVER        0x0002
+
+#define OBD_LLOG_FL_SENDNOW     0x0001
+
 struct obd_ops {
         struct module *o_owner;
         int (*o_iocontrol)(unsigned int cmd, struct lustre_handle *, int len,
 struct obd_ops {
         struct module *o_owner;
         int (*o_iocontrol)(unsigned int cmd, struct lustre_handle *, int len,
@@ -321,16 +399,17 @@ struct obd_ops {
         int (*o_attach)(struct obd_device *dev, obd_count len, void *data);
         int (*o_detach)(struct obd_device *dev);
         int (*o_setup) (struct obd_device *dev, obd_count len, void *data);
         int (*o_attach)(struct obd_device *dev, obd_count len, void *data);
         int (*o_detach)(struct obd_device *dev);
         int (*o_setup) (struct obd_device *dev, obd_count len, void *data);
-        int (*o_cleanup)(struct obd_device *dev, int force, int failover);
+        int (*o_cleanup)(struct obd_device *dev, int flags);
         int (*o_connect)(struct lustre_handle *conn, struct obd_device *src,
                          struct obd_uuid *cluuid);
         int (*o_connect)(struct lustre_handle *conn, struct obd_device *src,
                          struct obd_uuid *cluuid);
-        int (*o_disconnect)(struct lustre_handle *conn, int failover);
+        int (*o_disconnect)(struct lustre_handle *conn, int flags);
 
 
-        int (*o_statfs)(struct obd_export *exp, struct obd_statfs *osfs);
-        int (*o_syncfs)(struct obd_export *);
+        int (*o_statfs)(struct obd_device *obd, struct obd_statfs *osfs,
+                        unsigned long max_age);
+        int (*o_syncfs)(struct obd_export *exp);
         int (*o_packmd)(struct lustre_handle *, struct lov_mds_md **disk_tgt,
                         struct lov_stripe_md *mem_src);
         int (*o_packmd)(struct lustre_handle *, struct lov_mds_md **disk_tgt,
                         struct lov_stripe_md *mem_src);
-        int (*o_unpackmd)(struct lustre_handle *,
+        int (*o_unpackmd)(struct lustre_handle *conn,
                           struct lov_stripe_md **mem_tgt,
                           struct lov_mds_md *disk_src, int disk_len);
         int (*o_preallocate)(struct lustre_handle *, obd_count *req,
                           struct lov_stripe_md **mem_tgt,
                           struct lov_mds_md *disk_src, int disk_len);
         int (*o_preallocate)(struct lustre_handle *, obd_count *req,
@@ -344,42 +423,42 @@ struct obd_ops {
         int (*o_getattr)(struct lustre_handle *conn, struct obdo *oa,
                          struct lov_stripe_md *ea);
         int (*o_getattr_async)(struct lustre_handle *conn, struct obdo *oa,
         int (*o_getattr)(struct lustre_handle *conn, struct obdo *oa,
                          struct lov_stripe_md *ea);
         int (*o_getattr_async)(struct lustre_handle *conn, struct obdo *oa,
-                               struct lov_stripe_md *ea, 
+                               struct lov_stripe_md *ea,
                                struct ptlrpc_request_set *set);
         int (*o_open)(struct lustre_handle *conn, struct obdo *oa,
                       struct lov_stripe_md *ea, struct obd_trans_info *oti,
                       struct obd_client_handle *och);
         int (*o_close)(struct lustre_handle *conn, struct obdo *oa,
                        struct lov_stripe_md *ea, struct obd_trans_info *oti);
                                struct ptlrpc_request_set *set);
         int (*o_open)(struct lustre_handle *conn, struct obdo *oa,
                       struct lov_stripe_md *ea, struct obd_trans_info *oti,
                       struct obd_client_handle *och);
         int (*o_close)(struct lustre_handle *conn, struct obdo *oa,
                        struct lov_stripe_md *ea, struct obd_trans_info *oti);
-        int (*o_brw)(int rw, struct lustre_handle *conn,
+        int (*o_brw)(int rw, struct lustre_handle *conn, struct obdo *oa,
                      struct lov_stripe_md *ea, obd_count oa_bufs,
                      struct brw_page *pgarr, struct obd_trans_info *oti);
                      struct lov_stripe_md *ea, obd_count oa_bufs,
                      struct brw_page *pgarr, struct obd_trans_info *oti);
-        int (*o_brw_async)(int rw, struct lustre_handle *conn,
+        int (*o_brw_async)(int rw, struct lustre_handle *conn, struct obdo *oa,
                            struct lov_stripe_md *ea, obd_count oa_bufs,
                            struct brw_page *pgarr, struct ptlrpc_request_set *,
                            struct obd_trans_info *oti);
                            struct lov_stripe_md *ea, obd_count oa_bufs,
                            struct brw_page *pgarr, struct ptlrpc_request_set *,
                            struct obd_trans_info *oti);
-        int (*o_punch)(struct lustre_handle *conn, struct obdo *tgt,
+        int (*o_punch)(struct lustre_handle *conn, struct obdo *oa,
                        struct lov_stripe_md *ea, obd_size count,
                        obd_off offset, struct obd_trans_info *oti);
                        struct lov_stripe_md *ea, obd_size count,
                        obd_off offset, struct obd_trans_info *oti);
-        int (*o_sync)(struct lustre_handle *conn, struct obdo *tgt,
+        int (*o_sync)(struct lustre_handle *conn, struct obdo *oa,
                       obd_size count, obd_off offset);
                       obd_size count, obd_off offset);
-        int (*o_migrate)(struct lustre_handle *conn, struct obdo *dst,
-                         struct obdo *src, obd_size count, obd_off offset);
-        int (*o_copy)(struct lustre_handle *dstconn, struct obdo *dst,
-                      struct lustre_handle *srconn, struct obdo *src,
+        int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst,
+                         struct lov_stripe_md *src, obd_size count,
+                         obd_off offset);
+        int (*o_copy)(struct lustre_handle *dstconn, struct lov_stripe_md *dst,
+                      struct lustre_handle *srconn, struct lov_stripe_md *src,
                       obd_size count, obd_off offset, struct obd_trans_info *);
         int (*o_iterate)(struct lustre_handle *conn,
                          int (*)(obd_id, obd_gr, void *),
                          obd_id *startid, obd_gr group, void *data);
                       obd_size count, obd_off offset, struct obd_trans_info *);
         int (*o_iterate)(struct lustre_handle *conn,
                          int (*)(obd_id, obd_gr, void *),
                          obd_id *startid, obd_gr group, void *data);
-        int (*o_preprw)(int cmd, struct obd_export *, struct obdo *obdo,
+        int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa,
                         int objcount, struct obd_ioobj *obj,
                         int niocount, struct niobuf_remote *remote,
                         int objcount, struct obd_ioobj *obj,
                         int niocount, struct niobuf_remote *remote,
-                        struct niobuf_local *local, void **desc_private, 
-                        struct obd_trans_info *oti);
-        int (*o_commitrw)(int cmd, struct obd_export *,
+                        struct niobuf_local *local, struct obd_trans_info *oti);
+        int (*o_commitrw)(int cmd, struct obd_export *exp, struct obdo *oa,
                           int objcount, struct obd_ioobj *obj,
                           int niocount, struct niobuf_local *local,
                           int objcount, struct obd_ioobj *obj,
                           int niocount, struct niobuf_local *local,
-                          void *desc_private, struct obd_trans_info *oti);
+                          struct obd_trans_info *oti);
         int (*o_enqueue)(struct lustre_handle *conn, struct lov_stripe_md *md,
                          struct lustre_handle *parent_lock,
                          __u32 type, void *cookie, int cookielen, __u32 mode,
         int (*o_enqueue)(struct lustre_handle *conn, struct lov_stripe_md *md,
                          struct lustre_handle *parent_lock,
                          __u32 type, void *cookie, int cookielen, __u32 mode,
@@ -391,10 +470,17 @@ struct obd_ops {
         int (*o_cancel)(struct lustre_handle *, struct lov_stripe_md *md,
                         __u32 mode, struct lustre_handle *);
         int (*o_cancel_unused)(struct lustre_handle *, struct lov_stripe_md *,
         int (*o_cancel)(struct lustre_handle *, struct lov_stripe_md *md,
                         __u32 mode, struct lustre_handle *);
         int (*o_cancel_unused)(struct lustre_handle *, struct lov_stripe_md *,
-                               int local_only, void *opaque);
-        int (*o_san_preprw)(int cmd, struct lustre_handle *conn,
-                            int objcount, struct obd_ioobj *obj,
-                            int niocount, struct niobuf_remote *remote);
+                               int flags, void *opaque);
+        int (*o_log_add)(struct lustre_handle *conn,
+                         struct llog_handle *cathandle,
+                         struct llog_trans_hdr *rec, struct lov_stripe_md *lsm,
+                         struct llog_cookie *logcookies, int numcookies);
+        int (*o_log_cancel)(struct lustre_handle *, struct lov_stripe_md *,
+                            int count, struct llog_cookie *, int flags);
+        int (*o_san_preprw)(int cmd, struct obd_export *exp,
+                            struct obdo *oa, int objcount,
+                            struct obd_ioobj *obj, int niocount,
+                            struct niobuf_remote *remote);
         int (*o_mark_page_dirty)(struct lustre_handle *conn,
                                  struct lov_stripe_md *ea,
                                  unsigned long offset);
         int (*o_mark_page_dirty)(struct lustre_handle *conn,
                                  struct lov_stripe_md *ea,
                                  unsigned long offset);
@@ -406,14 +492,22 @@ struct obd_ops {
         int (*o_last_dirty_offset)(struct lustre_handle *conn,
                                    struct lov_stripe_md *ea,
                                    unsigned long *offset);
         int (*o_last_dirty_offset)(struct lustre_handle *conn,
                                    struct lov_stripe_md *ea,
                                    unsigned long *offset);
-        void (*o_destroy_export)(struct obd_export *export);
+        void (*o_destroy_export)(struct obd_export *exp);
+
+        /* metadata-only methods */
+        int (*o_pin)(struct lustre_handle *, obd_id ino, __u32 gen, int type,
+                     struct obd_client_handle *, int flag);
+        int (*o_unpin)(struct lustre_handle *, struct obd_client_handle *, int);
+
+        /* If adding ops, also update obdclass/lprocfs_status.c,
+         * and include/linux/obd_class.h */
 };
 
 static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno,
                                          int error)
 {
         if (error) {
 };
 
 static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno,
                                          int error)
 {
         if (error) {
-                CDEBUG(D_ERROR, "%s: transno "LPD64" commit error: %d\n",
+                CERROR("%s: transno "LPD64" commit error: %d\n",
                        obd->obd_name, transno, error);
                 return;
         }
                        obd->obd_name, transno, error);
                 return;
         }
@@ -425,8 +519,4 @@ static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno,
         }
 }
 
         }
 }
 
-/* When adding a function pointer to struct obd_ops, please update 
- * function lprocfs_alloc_obd_counters() in obdclass/lprocfs_status.c
- * accordingly. */
-
 #endif /* __OBD_H */
 #endif /* __OBD_H */
index 0c33ceb..2e57d2f 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/types.h>
 #include <linux/fs.h>
 #include <linux/time.h>
 #include <linux/types.h>
 #include <linux/fs.h>
 #include <linux/time.h>
+#include <linux/timer.h>
 #endif
 
 #include <linux/obd_support.h>
 #endif
 
 #include <linux/obd_support.h>
@@ -81,6 +82,17 @@ void class_disconnect_exports(struct obd_device *obddev, int failover);
 int class_multi_setup(struct obd_device *obddev, uint32_t len, void *data);
 int class_multi_cleanup(struct obd_device *obddev);
 
 int class_multi_setup(struct obd_device *obddev, uint32_t len, void *data);
 int class_multi_cleanup(struct obd_device *obddev);
 
+/* obdo.c */
+#ifdef __KERNEL__
+void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned ia_valid);
+void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid);
+void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid);
+void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid);
+void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid);
+#endif
+void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid);
+int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_flag compare);
+
 static inline int obd_check_conn(struct lustre_handle *conn)
 {
         struct obd_device *obd;
 static inline int obd_check_conn(struct lustre_handle *conn)
 {
         struct obd_device *obd;
@@ -277,7 +289,7 @@ static inline int obd_setup(struct obd_device *obd, int datalen, void *data)
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-static inline int obd_cleanup(struct obd_device *obd, int force, int failover)
+static inline int obd_cleanup(struct obd_device *obd, int flags)
 {
         int rc;
         ENTRY;
 {
         int rc;
         ENTRY;
@@ -286,7 +298,7 @@ static inline int obd_cleanup(struct obd_device *obd, int force, int failover)
         OBD_CHECK_OP(obd, cleanup);
         OBD_COUNTER_INCREMENT(obd, cleanup);
 
         OBD_CHECK_OP(obd, cleanup);
         OBD_COUNTER_INCREMENT(obd, cleanup);
 
-        rc = OBP(obd, cleanup)(obd, force, failover);
+        rc = OBP(obd, cleanup)(obd, flags);
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
@@ -518,7 +530,7 @@ static inline int obd_connect(struct lustre_handle *conn,
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-static inline int obd_disconnect(struct lustre_handle *conn, int failover)
+static inline int obd_disconnect(struct lustre_handle *conn, int flags)
 {
         struct obd_export *exp;
         int rc;
 {
         struct obd_export *exp;
         int rc;
@@ -528,7 +540,7 @@ static inline int obd_disconnect(struct lustre_handle *conn, int failover)
         OBD_CHECK_OP(exp->exp_obd, disconnect);
         OBD_COUNTER_INCREMENT(exp->exp_obd, disconnect);
 
         OBD_CHECK_OP(exp->exp_obd, disconnect);
         OBD_COUNTER_INCREMENT(exp->exp_obd, disconnect);
 
-        rc = OBP(exp->exp_obd, disconnect)(conn, failover);
+        rc = OBP(exp->exp_obd, disconnect)(conn, flags);
         class_export_put(exp);
         RETURN(rc);
 }
         class_export_put(exp);
         RETURN(rc);
 }
@@ -541,15 +553,35 @@ static inline void obd_destroy_export(struct obd_export *exp)
         EXIT;
 }
 
         EXIT;
 }
 
-static inline int obd_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+#ifndef time_before
+#define time_before(t1, t2) ((long)t2 - (long)t1 > 0)
+#endif
+
+static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                             unsigned long max_age)
 {
 {
-        int rc;
+        int rc = 0;
         ENTRY;
 
         ENTRY;
 
-        OBD_CHECK_OP(exp->exp_obd, statfs);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, statfs);
-
-        rc = OBP(exp->exp_obd, statfs)(exp, osfs);
+        if (obd == NULL)
+                RETURN(-EINVAL);
+
+        OBD_CHECK_OP(obd, statfs);
+        OBD_COUNTER_INCREMENT(obd, statfs);
+
+        CDEBUG(D_SUPER, "osfs %lu, max_age %lu\n", obd->obd_osfs_age, max_age);
+        if (obd->obd_osfs_age == 0 || time_before(obd->obd_osfs_age, max_age)) {
+                rc = OBP(obd, statfs)(obd, osfs, max_age);
+                spin_lock(&obd->obd_dev_lock);
+                memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs));
+                obd->obd_osfs_age = jiffies;
+                spin_unlock(&obd->obd_dev_lock);
+        } else {
+                CDEBUG(D_SUPER, "using cached obd_statfs data\n");
+                spin_lock(&obd->obd_dev_lock);
+                memcpy(osfs, &obd->obd_osfs, sizeof(*osfs));
+                spin_unlock(&obd->obd_dev_lock);
+        }
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
@@ -582,7 +614,7 @@ static inline int obd_punch(struct lustre_handle *conn, struct obdo *oa,
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-static inline int obd_brw(int cmd, struct lustre_handle *conn,
+static inline int obd_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md *ea, obd_count oa_bufs,
                           struct brw_page *pg, struct obd_trans_info *oti)
 {
                           struct lov_stripe_md *ea, obd_count oa_bufs,
                           struct brw_page *pg, struct obd_trans_info *oti)
 {
@@ -600,14 +632,14 @@ static inline int obd_brw(int cmd, struct lustre_handle *conn,
                 LBUG();
         }
 
                 LBUG();
         }
 
-        rc = OBP(exp->exp_obd, brw)(cmd, conn, ea, oa_bufs, pg, oti);
+        rc = OBP(exp->exp_obd, brw)(cmd, conn, oa, ea, oa_bufs, pg, oti);
         class_export_put(exp);
         RETURN(rc);
 }
 
 static inline int obd_brw_async(int cmd, struct lustre_handle *conn,
         class_export_put(exp);
         RETURN(rc);
 }
 
 static inline int obd_brw_async(int cmd, struct lustre_handle *conn,
-                                struct lov_stripe_md *ea, obd_count oa_bufs,
-                                struct brw_page *pg,
+                                struct obdo *oa, struct lov_stripe_md *ea,
+                                obd_count oa_bufs, struct brw_page *pg,
                                 struct ptlrpc_request_set *set,
                                 struct obd_trans_info *oti)
 {
                                 struct ptlrpc_request_set *set,
                                 struct obd_trans_info *oti)
 {
@@ -624,15 +656,16 @@ static inline int obd_brw_async(int cmd, struct lustre_handle *conn,
                 LBUG();
         }
 
                 LBUG();
         }
 
-        rc = OBP(exp->exp_obd, brw_async)(cmd, conn, ea, oa_bufs, pg, set, oti);
+        rc = OBP(exp->exp_obd, brw_async)(cmd, conn, oa, ea, oa_bufs, pg, set,
+                                          oti);
         class_export_put(exp);
         RETURN(rc);
 }
 
         class_export_put(exp);
         RETURN(rc);
 }
 
-static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
+static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
                              int objcount, struct obd_ioobj *obj,
                              int niocount, struct niobuf_remote *remote,
                              int objcount, struct obd_ioobj *obj,
                              int niocount, struct niobuf_remote *remote,
-                             struct niobuf_local *local, void **desc_private,
+                             struct niobuf_local *local,
                              struct obd_trans_info *oti)
 {
         int rc;
                              struct obd_trans_info *oti)
 {
         int rc;
@@ -641,15 +674,15 @@ static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
         OBD_CHECK_OP(exp->exp_obd, preprw);
         OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
 
         OBD_CHECK_OP(exp->exp_obd, preprw);
         OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
 
-        rc = OBP(exp->exp_obd, preprw)(cmd, exp, obdo, objcount, obj, niocount,
-                                       remote, local, desc_private, oti);
+        rc = OBP(exp->exp_obd, preprw)(cmd, exp, oa, objcount, obj, niocount,
+                                       remote, local, oti);
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-static inline int obd_commitrw(int cmd, struct obd_export *exp,
+static inline int obd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa,
                                int objcount, struct obd_ioobj *obj,
                                int niocount, struct niobuf_local *local,
                                int objcount, struct obd_ioobj *obj,
                                int niocount, struct niobuf_local *local,
-                               void *desc_private, struct obd_trans_info *oti)
+                               struct obd_trans_info *oti)
 {
         int rc;
         ENTRY;
 {
         int rc;
         ENTRY;
@@ -657,8 +690,8 @@ static inline int obd_commitrw(int cmd, struct obd_export *exp,
         OBD_CHECK_OP(exp->exp_obd, commitrw);
         OBD_COUNTER_INCREMENT(exp->exp_obd, commitrw);
 
         OBD_CHECK_OP(exp->exp_obd, commitrw);
         OBD_COUNTER_INCREMENT(exp->exp_obd, commitrw);
 
-        rc = OBP(exp->exp_obd, commitrw)(cmd, exp, objcount, obj, niocount,
-                                         local, desc_private, oti);
+        rc = OBP(exp->exp_obd, commitrw)(cmd, exp, oa, objcount, obj, niocount,
+                                         local, oti);
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
@@ -754,25 +787,92 @@ static inline int obd_cancel_unused(struct lustre_handle *conn,
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-static inline int obd_san_preprw(int cmd, struct lustre_handle *conn,
+static inline int obd_log_add(struct lustre_handle *conn,
+                              struct llog_handle *cathandle,
+                              struct llog_trans_hdr *rec,
+                              struct lov_stripe_md *lsm,
+                              struct llog_cookie *logcookies,
+                              int numcookies)
+{
+        struct obd_export *exp;
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_SETUP(conn, exp);
+        OBD_CHECK_OP(exp->exp_obd, log_add);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, log_add);
+
+        rc = OBP(exp->exp_obd, log_add)(conn, cathandle, rec, lsm, logcookies,
+                                        numcookies);
+        class_export_put(exp);
+        RETURN(rc);
+}
+
+static inline int obd_log_cancel(struct lustre_handle *conn,
+                                 struct lov_stripe_md *lsm, int count,
+                                 struct llog_cookie *cookies, int flags)
+{
+        struct obd_export *exp;
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_SETUP(conn, exp);
+        OBD_CHECK_OP(exp->exp_obd, log_cancel);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, log_cancel);
+
+        rc = OBP(exp->exp_obd, log_cancel)(conn, lsm, count, cookies, flags);
+        class_export_put(exp);
+        RETURN(rc);
+}
+
+static inline int obd_san_preprw(int cmd, struct obd_export *exp,
+                                 struct obdo *oa,
                                  int objcount, struct obd_ioobj *obj,
                                  int niocount, struct niobuf_remote *remote)
 {
                                  int objcount, struct obd_ioobj *obj,
                                  int niocount, struct niobuf_remote *remote)
 {
-        struct obd_export *exp;
         int rc;
 
         int rc;
 
-        OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, preprw);
         OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
 
         OBD_CHECK_OP(exp->exp_obd, preprw);
         OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
 
-        rc = OBP(exp->exp_obd, san_preprw)(cmd, conn, objcount, obj,
+        rc = OBP(exp->exp_obd, san_preprw)(cmd, exp, oa, objcount, obj,
                                            niocount, remote);
         class_export_put(exp);
                                            niocount, remote);
         class_export_put(exp);
-        RETURN(rc);
+        return(rc);
+}
+
+static inline int obd_pin(struct lustre_handle *conn, obd_id ino, __u32 gen,
+                          int type, struct obd_client_handle *handle, int flag)
+{
+        struct obd_export *exp;
+        int rc;
+
+        OBD_CHECK_ACTIVE(conn, exp);
+        OBD_CHECK_OP(exp->exp_obd, pin);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, pin);
+
+        rc = OBP(exp->exp_obd, pin)(conn, ino, gen, type, handle, flag);
+        class_export_put(exp);
+        return(rc);
+}
+
+static inline int obd_unpin(struct lustre_handle *conn,
+                            struct obd_client_handle *handle, int flag)
+{
+        struct obd_export *exp;
+        int rc;
+
+        OBD_CHECK_ACTIVE(conn, exp);
+        OBD_CHECK_OP(exp->exp_obd, unpin);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, unpin);
+
+        rc = OBP(exp->exp_obd, unpin)(conn, handle, flag);
+        class_export_put(exp);
+        return(rc);
 }
 
 static inline int obd_mark_page_dirty(struct lustre_handle *conn,
 }
 
 static inline int obd_mark_page_dirty(struct lustre_handle *conn,
-                                      struct lov_stripe_md *lsm,  
+                                      struct lov_stripe_md *lsm,
                                       unsigned long offset)
 {
         struct obd_export *exp;
                                       unsigned long offset)
 {
         struct obd_export *exp;
@@ -780,14 +880,15 @@ static inline int obd_mark_page_dirty(struct lustre_handle *conn,
 
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, mark_page_dirty);
 
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, mark_page_dirty);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, mark_page_dirty);
 
         rc = OBP(exp->exp_obd, mark_page_dirty)(conn, lsm, offset);
         class_export_put(exp);
 
         rc = OBP(exp->exp_obd, mark_page_dirty)(conn, lsm, offset);
         class_export_put(exp);
-        RETURN(rc);
+        return(rc);
 }
 
 static inline int obd_clear_dirty_pages(struct lustre_handle *conn,
 }
 
 static inline int obd_clear_dirty_pages(struct lustre_handle *conn,
-                                        struct lov_stripe_md *lsm,  
+                                        struct lov_stripe_md *lsm,
                                         unsigned long start,
                                         unsigned long end,
                                         unsigned long *cleared)
                                         unsigned long start,
                                         unsigned long end,
                                         unsigned long *cleared)
@@ -797,11 +898,12 @@ static inline int obd_clear_dirty_pages(struct lustre_handle *conn,
 
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, clear_dirty_pages);
 
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, clear_dirty_pages);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, clear_dirty_pages);
 
         rc = OBP(exp->exp_obd, clear_dirty_pages)(conn, lsm, start, end,
                                                   cleared);
         class_export_put(exp);
 
         rc = OBP(exp->exp_obd, clear_dirty_pages)(conn, lsm, start, end,
                                                   cleared);
         class_export_put(exp);
-        RETURN(rc);
+        return(rc);
 }
 
 static inline int obd_last_dirty_offset(struct lustre_handle *conn,
 }
 
 static inline int obd_last_dirty_offset(struct lustre_handle *conn,
@@ -813,10 +915,11 @@ static inline int obd_last_dirty_offset(struct lustre_handle *conn,
 
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, last_dirty_offset);
 
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, last_dirty_offset);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, last_dirty_offset);
 
         rc = OBP(exp->exp_obd, last_dirty_offset)(conn, lsm, offset);
         class_export_put(exp);
 
         rc = OBP(exp->exp_obd, last_dirty_offset)(conn, lsm, offset);
         class_export_put(exp);
-        RETURN(rc);
+        return(rc);
 }
 
 /* OBD Metadata Support */
 }
 
 /* OBD Metadata Support */
@@ -824,11 +927,6 @@ static inline int obd_last_dirty_offset(struct lustre_handle *conn,
 extern int obd_init_caches(void);
 extern void obd_cleanup_caches(void);
 
 extern int obd_init_caches(void);
 extern void obd_cleanup_caches(void);
 
-static inline struct lustre_handle *obdo_handle(struct obdo *oa)
-{
-        return (struct lustre_handle *)&oa->o_inline;
-}
-
 /* support routines */
 extern kmem_cache_t *obdo_cachep;
 static inline struct obdo *obdo_alloc(void)
 /* support routines */
 extern kmem_cache_t *obdo_cachep;
 static inline struct obdo *obdo_alloc(void)
@@ -838,6 +936,7 @@ static inline struct obdo *obdo_alloc(void)
         oa = kmem_cache_alloc(obdo_cachep, SLAB_KERNEL);
         if (oa == NULL)
                 LBUG();
         oa = kmem_cache_alloc(obdo_cachep, SLAB_KERNEL);
         if (oa == NULL)
                 LBUG();
+        CDEBUG(D_MALLOC, "kmem_cache_alloced oa at %p\n", oa);
         memset(oa, 0, sizeof (*oa));
 
         return oa;
         memset(oa, 0, sizeof (*oa));
 
         return oa;
@@ -847,6 +946,7 @@ static inline void obdo_free(struct obdo *oa)
 {
         if (!oa)
                 return;
 {
         if (!oa)
                 return;
+        CDEBUG(D_MALLOC, "kmem_cache_freed oa at %p\n", oa);
         kmem_cache_free(obdo_cachep, oa);
 }
 
         kmem_cache_free(obdo_cachep, oa);
 }
 
@@ -855,268 +955,6 @@ static inline void obdo_free(struct obdo *oa)
 #define kdev_t_to_nr(dev) dev
 #endif
 
 #define kdev_t_to_nr(dev) dev
 #endif
 
-#ifdef __KERNEL__
-static inline void obdo_from_iattr(struct obdo *oa, struct iattr *attr)
-{
-        unsigned int ia_valid = attr->ia_valid;
-
-        if (ia_valid & ATTR_ATIME) {
-                oa->o_atime = LTIME_S(attr->ia_atime);
-                oa->o_valid |= OBD_MD_FLATIME;
-        }
-        if (ia_valid & ATTR_MTIME) {
-                oa->o_mtime = LTIME_S(attr->ia_mtime);
-                oa->o_valid |= OBD_MD_FLMTIME;
-        }
-        if (ia_valid & ATTR_CTIME) {
-                oa->o_ctime = LTIME_S(attr->ia_ctime);
-                oa->o_valid |= OBD_MD_FLCTIME;
-        }
-        if (ia_valid & ATTR_SIZE) {
-                oa->o_size = attr->ia_size;
-                oa->o_valid |= OBD_MD_FLSIZE;
-        }
-        if (ia_valid & ATTR_MODE) {
-                oa->o_mode = attr->ia_mode;
-                oa->o_valid |= OBD_MD_FLTYPE | OBD_MD_FLMODE;
-                if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
-                        oa->o_mode &= ~S_ISGID;
-        }
-        if (ia_valid & ATTR_UID) {
-                oa->o_uid = attr->ia_uid;
-                oa->o_valid |= OBD_MD_FLUID;
-        }
-        if (ia_valid & ATTR_GID) {
-                oa->o_gid = attr->ia_gid;
-                oa->o_valid |= OBD_MD_FLGID;
-        }
-}
-
-
-static inline void iattr_from_obdo(struct iattr *attr, struct obdo *oa,
-                                   obd_flag valid)
-{
-        memset(attr, 0, sizeof(*attr));
-        if (valid & OBD_MD_FLATIME) {
-                LTIME_S(attr->ia_atime) = oa->o_atime;
-                attr->ia_valid |= ATTR_ATIME;
-        }
-        if (valid & OBD_MD_FLMTIME) {
-                LTIME_S(attr->ia_mtime) = oa->o_mtime;
-                attr->ia_valid |= ATTR_MTIME;
-        }
-        if (valid & OBD_MD_FLCTIME) {
-                LTIME_S(attr->ia_ctime) = oa->o_ctime;
-                attr->ia_valid |= ATTR_CTIME;
-        }
-        if (valid & OBD_MD_FLSIZE) {
-                attr->ia_size = oa->o_size;
-                attr->ia_valid |= ATTR_SIZE;
-        }
-        if (valid & OBD_MD_FLTYPE) {
-                attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT);
-                attr->ia_valid |= ATTR_MODE;
-        }
-        if (valid & OBD_MD_FLMODE) {
-                attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT);
-                attr->ia_valid |= ATTR_MODE;
-                if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
-                        attr->ia_mode &= ~S_ISGID;
-        }
-        if (valid & OBD_MD_FLUID)
-        {
-                attr->ia_uid = oa->o_uid;
-                attr->ia_valid |= ATTR_UID;
-        }
-        if (valid & OBD_MD_FLGID) {
-                attr->ia_gid = oa->o_gid;
-                attr->ia_valid |= ATTR_GID;
-        }
-}
-
-
-/* WARNING: the file systems must take care not to tinker with
-   attributes they don't manage (such as blocks). */
-
-
-static inline void obdo_from_inode(struct obdo *dst, struct inode *src,
-                                   obd_flag valid)
-{
-        if (valid & OBD_MD_FLATIME)
-                dst->o_atime = LTIME_S(src->i_atime);
-        if (valid & OBD_MD_FLMTIME)
-                dst->o_mtime = LTIME_S(src->i_mtime);
-        if (valid & OBD_MD_FLCTIME)
-                dst->o_ctime = LTIME_S(src->i_ctime);
-        if (valid & OBD_MD_FLSIZE)
-                dst->o_size = src->i_size;
-        if (valid & OBD_MD_FLBLOCKS)   /* allocation of space */
-                dst->o_blocks = src->i_blocks;
-        if (valid & OBD_MD_FLBLKSZ)
-                dst->o_blksize = src->i_blksize;
-        if (valid & OBD_MD_FLTYPE)
-                dst->o_mode = (dst->o_mode & ~S_IFMT) | (src->i_mode & S_IFMT);
-        if (valid & OBD_MD_FLMODE)
-                dst->o_mode = (dst->o_mode & S_IFMT) | (src->i_mode & ~S_IFMT);
-        if (valid & OBD_MD_FLUID)
-                dst->o_uid = src->i_uid;
-        if (valid & OBD_MD_FLGID)
-                dst->o_gid = src->i_gid;
-        if (valid & OBD_MD_FLFLAGS)
-                dst->o_flags = src->i_flags;
-        if (valid & OBD_MD_FLNLINK)
-                dst->o_nlink = src->i_nlink;
-        if (valid & OBD_MD_FLGENER)
-                dst->o_generation = src->i_generation;
-        if (valid & OBD_MD_FLRDEV)
-                dst->o_rdev = (__u32)kdev_t_to_nr(src->i_rdev);
-
-        dst->o_valid |= (valid & ~OBD_MD_FLID);
-}
-
-static inline void obdo_refresh_inode(struct inode *dst, struct obdo *src,
-                                      obd_flag valid)
-{
-        valid &= src->o_valid;
-
-        if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(dst->i_atime))
-                LTIME_S(dst->i_atime) = src->o_atime;
-        if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(dst->i_mtime))
-                LTIME_S(dst->i_mtime) = src->o_mtime;
-        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime))
-                LTIME_S(dst->i_ctime) = src->o_ctime;
-        if (valid & OBD_MD_FLSIZE && src->o_size > dst->i_size)
-                dst->i_size = src->o_size;
-        /* allocation of space */
-        if (valid & OBD_MD_FLBLOCKS && src->o_blocks > dst->i_blocks)
-                dst->i_blocks = src->o_blocks;
-}
-
-static inline void obdo_to_inode(struct inode *dst, struct obdo *src,
-                                 obd_flag valid)
-{
-        valid &= src->o_valid;
-
-        if (valid & OBD_MD_FLATIME)
-                LTIME_S(dst->i_atime) = src->o_atime;
-        if (valid & OBD_MD_FLMTIME)
-                LTIME_S(dst->i_mtime) = src->o_mtime;
-        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime))
-                LTIME_S(dst->i_ctime) = src->o_ctime;
-        if (valid & OBD_MD_FLSIZE)
-                dst->i_size = src->o_size;
-        if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
-                dst->i_blocks = src->o_blocks;
-        if (valid & OBD_MD_FLBLKSZ)
-                dst->i_blksize = src->o_blksize;
-        if (valid & OBD_MD_FLTYPE)
-                dst->i_mode = (dst->i_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
-        if (valid & OBD_MD_FLMODE)
-                dst->i_mode = (dst->i_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
-        if (valid & OBD_MD_FLUID)
-                dst->i_uid = src->o_uid;
-        if (valid & OBD_MD_FLGID)
-                dst->i_gid = src->o_gid;
-        if (valid & OBD_MD_FLFLAGS)
-                dst->i_flags = src->o_flags;
-        if (valid & OBD_MD_FLNLINK)
-                dst->i_nlink = src->o_nlink;
-        if (valid & OBD_MD_FLGENER)
-                dst->i_generation = src->o_generation;
-        if (valid & OBD_MD_FLRDEV)
-                dst->i_rdev = to_kdev_t(src->o_rdev);
-}
-#endif
-
-static inline void obdo_cpy_md(struct obdo *dst, struct obdo *src,
-                               obd_flag valid)
-{
-#ifdef __KERNEL__
-        CDEBUG(D_INODE, "src obdo %Ld valid 0x%x, dst obdo %Ld\n",
-               (unsigned long long)src->o_id, src->o_valid,
-               (unsigned long long)dst->o_id);
-#endif
-        if (valid & OBD_MD_FLATIME)
-                dst->o_atime = src->o_atime;
-        if (valid & OBD_MD_FLMTIME)
-                dst->o_mtime = src->o_mtime;
-        if (valid & OBD_MD_FLCTIME)
-                dst->o_ctime = src->o_ctime;
-        if (valid & OBD_MD_FLSIZE)
-                dst->o_size = src->o_size;
-        if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
-                dst->o_blocks = src->o_blocks;
-        if (valid & OBD_MD_FLBLKSZ)
-                dst->o_blksize = src->o_blksize;
-        if (valid & OBD_MD_FLTYPE)
-                dst->o_mode = (dst->o_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
-        if (valid & OBD_MD_FLMODE)
-                dst->o_mode = (dst->o_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
-        if (valid & OBD_MD_FLUID)
-                dst->o_uid = src->o_uid;
-        if (valid & OBD_MD_FLGID)
-                dst->o_gid = src->o_gid;
-        if (valid & OBD_MD_FLFLAGS)
-                dst->o_flags = src->o_flags;
-        /*
-        if (valid & OBD_MD_FLOBDFLG)
-                dst->o_obdflags = src->o_obdflags;
-        */
-        if (valid & OBD_MD_FLNLINK)
-                dst->o_nlink = src->o_nlink;
-        if (valid & OBD_MD_FLGENER)
-                dst->o_generation = src->o_generation;
-        if (valid & OBD_MD_FLRDEV)
-                dst->o_rdev = src->o_rdev;
-        if (valid & OBD_MD_FLINLINE &&
-             src->o_obdflags & OBD_FL_INLINEDATA) {
-                memcpy(dst->o_inline, src->o_inline, sizeof(src->o_inline));
-                dst->o_obdflags |= OBD_FL_INLINEDATA;
-        }
-
-        dst->o_valid |= valid;
-}
-
-
-/* returns FALSE if comparison (by flags) is same, TRUE if changed */
-static inline int obdo_cmp_md(struct obdo *dst, struct obdo *src,
-                              obd_flag compare)
-{
-        int res = 0;
-
-        if ( compare & OBD_MD_FLATIME )
-                res = (res || (dst->o_atime != src->o_atime));
-        if ( compare & OBD_MD_FLMTIME )
-                res = (res || (dst->o_mtime != src->o_mtime));
-        if ( compare & OBD_MD_FLCTIME )
-                res = (res || (dst->o_ctime != src->o_ctime));
-        if ( compare & OBD_MD_FLSIZE )
-                res = (res || (dst->o_size != src->o_size));
-        if ( compare & OBD_MD_FLBLOCKS ) /* allocation of space */
-                res = (res || (dst->o_blocks != src->o_blocks));
-        if ( compare & OBD_MD_FLBLKSZ )
-                res = (res || (dst->o_blksize != src->o_blksize));
-        if ( compare & OBD_MD_FLTYPE )
-                res = (res || (((dst->o_mode ^ src->o_mode) & S_IFMT) != 0));
-        if ( compare & OBD_MD_FLMODE )
-                res = (res || (((dst->o_mode ^ src->o_mode) & ~S_IFMT) != 0));
-        if ( compare & OBD_MD_FLUID )
-                res = (res || (dst->o_uid != src->o_uid));
-        if ( compare & OBD_MD_FLGID )
-                res = (res || (dst->o_gid != src->o_gid));
-        if ( compare & OBD_MD_FLFLAGS )
-                res = (res || (dst->o_flags != src->o_flags));
-        if ( compare & OBD_MD_FLNLINK )
-                res = (res || (dst->o_nlink != src->o_nlink));
-        if ( compare & OBD_MD_FLGENER )
-                res = (res || (dst->o_generation != src->o_generation));
-        /* XXX Don't know if thses should be included here - wasn't previously
-        if ( compare & OBD_MD_FLINLINE )
-                res = (res || memcmp(dst->o_inline, src->o_inline));
-        */
-        return res;
-}
-
 /* I'm as embarrassed about this as you are.
  *
  * <shaver> // XXX do not look into _superhack with remaining eye
 /* I'm as embarrassed about this as you are.
  *
  * <shaver> // XXX do not look into _superhack with remaining eye
@@ -1124,11 +962,6 @@ static inline int obdo_cmp_md(struct obdo *dst, struct obdo *src,
 extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
 extern void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp);
 
 extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
 extern void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp);
 
-struct obd_statfs;
-struct statfs;
-void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs);
-void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs);
-
 struct obd_class_user_state {
         struct obd_device     *ocus_current_obd;
         struct list_head       ocus_conns;
 struct obd_class_user_state {
         struct obd_device     *ocus_current_obd;
         struct list_head       ocus_conns;
index b12a062..6d68ae9 100644 (file)
@@ -8,14 +8,17 @@
 #define OBD_LOV_DEVICENAME "lov"
 
 struct lov_brw_async_args {
 #define OBD_LOV_DEVICENAME "lov"
 
 struct lov_brw_async_args {
-        obd_count        aa_oa_bufs;
-        struct brw_page *aa_ioarr;
+        struct lov_stripe_md  *aa_lsm;
+        struct obdo           *aa_obdos;
+        struct obdo           *aa_oa;
+        struct brw_page       *aa_ioarr;
+        obd_count              aa_oa_bufs;
 };
 
 struct lov_getattr_async_args {
         struct lov_stripe_md  *aa_lsm;
         struct obdo           *aa_oa;
 };
 
 struct lov_getattr_async_args {
         struct lov_stripe_md  *aa_lsm;
         struct obdo           *aa_oa;
-        struct obdo           *aa_stripe_oas;
+        struct obdo           *aa_obdos;
 };
 
 static inline int lov_stripe_md_size(int stripes)
 };
 
 static inline int lov_stripe_md_size(int stripes)
@@ -28,15 +31,6 @@ static inline int lov_mds_md_size(int stripes)
         return sizeof(struct lov_mds_md) + stripes*sizeof(struct lov_object_id);
 }
 
         return sizeof(struct lov_mds_md) + stripes*sizeof(struct lov_object_id);
 }
 
-extern int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmm,
-                       struct lov_stripe_md *lsm);
-extern int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsm,
-                         struct lov_mds_md *lmm, int lmmsize);
-extern int lov_setstripe(struct lustre_handle *conn,
-                         struct lov_stripe_md **lsmp, struct lov_mds_md *lmmu);
-extern int lov_getstripe(struct lustre_handle *conn, 
-                         struct lov_stripe_md *lsm, struct lov_mds_md *lmmu);
-
 #define IOC_LOV_TYPE                   'g'
 #define IOC_LOV_MIN_NR                 50
 #define IOC_LOV_SET_OSC_ACTIVE         _IOWR('g', 50, long)
 #define IOC_LOV_TYPE                   'g'
 #define IOC_LOV_MIN_NR                 50
 #define IOC_LOV_SET_OSC_ACTIVE         _IOWR('g', 50, long)
index 22fe694..ac2e24b 100644 (file)
@@ -35,6 +35,7 @@
 #define LUSTRE_SANOST_NAME "sanost"
 
 struct osc_brw_async_args {
 #define LUSTRE_SANOST_NAME "sanost"
 
 struct osc_brw_async_args {
+        struct obdo     *aa_oa;
         int              aa_requested_nob;
         int              aa_nio_count;
         obd_count        aa_page_count;
         int              aa_requested_nob;
         int              aa_nio_count;
         obd_count        aa_page_count;
index 2a76905..28a9a3d 100644 (file)
@@ -74,8 +74,10 @@ extern unsigned long obd_sync_filter;
 #define OBD_FAIL_MDS_STATFS_PACK         0x11d
 #define OBD_FAIL_MDS_STATFS_NET          0x11e
 #define OBD_FAIL_MDS_GETATTR_NAME_NET    0x11f
 #define OBD_FAIL_MDS_STATFS_PACK         0x11d
 #define OBD_FAIL_MDS_STATFS_NET          0x11e
 #define OBD_FAIL_MDS_GETATTR_NAME_NET    0x11f
-#define OBD_FAIL_MDS_ALL_REPLY_NET       0x120
-#define OBD_FAIL_MDS_ALL_REQUEST_NET     0x121
+#define OBD_FAIL_MDS_PIN_NET             0x120
+#define OBD_FAIL_MDS_UNPIN_NET           0x121
+#define OBD_FAIL_MDS_ALL_REPLY_NET       0x122
+#define OBD_FAIL_MDS_ALL_REQUEST_NET     0x123
 
 #define OBD_FAIL_OST                     0x200
 #define OBD_FAIL_OST_CONNECT_NET         0x201
 
 #define OBD_FAIL_OST                     0x200
 #define OBD_FAIL_OST_CONNECT_NET         0x201
@@ -116,6 +118,9 @@ extern unsigned long obd_sync_filter;
 #define OBD_FAIL_PTLRPC                  0x500
 #define OBD_FAIL_PTLRPC_ACK              0x501
 
 #define OBD_FAIL_PTLRPC                  0x500
 #define OBD_FAIL_PTLRPC_ACK              0x501
 
+#define OBD_FAIL_OBD_PING_NET            0x600
+#define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
+
 /* preparation for a more advanced failure testbed (not functional yet) */
 #define OBD_FAIL_MASK_SYS    0x0000FF00
 #define OBD_FAIL_MASK_LOC    (0x000000FF | OBD_FAIL_MASK_SYS)
 /* preparation for a more advanced failure testbed (not functional yet) */
 #define OBD_FAIL_MASK_SYS    0x0000FF00
 #define OBD_FAIL_MASK_LOC    (0x000000FF | OBD_FAIL_MASK_SYS)
@@ -169,37 +174,27 @@ do {                                                                         \
 
 
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 
 
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#define ll_bdevname(a) __bdevname((a))
+#define BDEVNAME_DECLARE_STORAGE(foo) char foo[BDEVNAME_SIZE]
+#define ll_bdevname(DEV, STORAGE) __bdevname(DEV, STORAGE)
 #define ll_lock_kernel lock_kernel()
 #define ll_lock_kernel lock_kernel()
-#define LTIME_S(time) (time.tv_sec)
 #else
 #else
+#define BDEVNAME_DECLARE_STORAGE(foo) char __unused_##foo
+#define ll_bdevname(DEV, STORAGE) ((void)__unused_##STORAGE, bdevname((DEV)))
 #define ll_lock_kernel
 #define ll_lock_kernel
-#define ll_bdevname(a) bdevname((a))
-#define LTIME_S(time) (time)
 #endif
 
 
 static inline void OBD_FAIL_WRITE(int id, kdev_t dev)
 {
         if (OBD_FAIL_CHECK(id)) {
 #endif
 
 
 static inline void OBD_FAIL_WRITE(int id, kdev_t dev)
 {
         if (OBD_FAIL_CHECK(id)) {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+                BDEVNAME_DECLARE_STORAGE(tmp);
 #ifdef CONFIG_DEV_RDONLY
                 CERROR("obd_fail_loc=%x, fail write operation on %s\n",
 #ifdef CONFIG_DEV_RDONLY
                 CERROR("obd_fail_loc=%x, fail write operation on %s\n",
-                       id, ll_bdevname(dev));
+                       id, ll_bdevname(kdev_t_to_nr(dev), tmp));
                 dev_set_rdonly(dev, 2);
 #else
                 CERROR("obd_fail_loc=%x, can't fail write operation on %s\n",
                 dev_set_rdonly(dev, 2);
 #else
                 CERROR("obd_fail_loc=%x, can't fail write operation on %s\n",
-                       id, ll_bdevname(dev));
-#endif
-#else
-#ifdef CONFIG_DEV_RDONLY
-                CERROR("obd_fail_loc=%x, fail write operation on %s\n",
-                       id, ll_bdevname(dev.value));
-                dev_set_rdonly(dev, 2);
-#else
-                CERROR("obd_fail_loc=%x, can't fail write operation on %s\n",
-                       id, ll_bdevname(dev.value));
-#endif
+                       id, ll_bdevname(kdev_t_to_nr(dev), tmp));
 #endif
                 /* We set FAIL_ONCE because we never "un-fail" a device */
                 obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE;
 #endif
                 /* We set FAIL_ONCE because we never "un-fail" a device */
                 obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE;
@@ -209,9 +204,9 @@ static inline void OBD_FAIL_WRITE(int id, kdev_t dev)
 #define LTIME_S(time) (time)
 #endif  /* __KERNEL__ */
 
 #define LTIME_S(time) (time)
 #endif  /* __KERNEL__ */
 
-#define OBD_ALLOC(ptr, size)                                                  \
+#define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                    \
 do {                                                                          \
 do {                                                                          \
-        (ptr) = kmalloc(size, GFP_KERNEL);                                    \
+        (ptr) = kmalloc(size, gfp_mask);                                      \
         if ((ptr) == NULL) {                                                  \
                 CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
                        (int)(size), __FILE__, __LINE__);                      \
         if ((ptr) == NULL) {                                                  \
                 CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
                        (int)(size), __FILE__, __LINE__);                      \
@@ -225,6 +220,12 @@ do {                                                                          \
         }                                                                     \
 } while (0)
 
         }                                                                     \
 } while (0)
 
+#ifndef OBD_GFP_MASK
+# define OBD_GFP_MASK GFP_KERNEL
+#endif
+
+#define OBD_ALLOC(ptr, size) OBD_ALLOC_GFP(ptr, size, OBD_GFP_MASK)
+
 #ifdef __arch_um__
 # define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size)
 #else
 #ifdef __arch_um__
 # define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size)
 #else
@@ -246,9 +247,9 @@ do {                                                                          \
 #endif
 
 #ifdef CONFIG_DEBUG_SLAB
 #endif
 
 #ifdef CONFIG_DEBUG_SLAB
-#define POISON(lptr, c, s) do {} while (0)
+#define POISON(ptr, c, s) do {} while (0)
 #else
 #else
-#define POISON(lptr, c, s) memset(lptr, c, s)
+#define POISON(ptr, c, s) memset(ptr, c, s)
 #endif
 
 #define OBD_FREE(ptr, size)                                                   \
 #endif
 
 #define OBD_FREE(ptr, size)                                                   \
@@ -277,9 +278,12 @@ do {                                                                          \
 } while (0)
 #endif
 
 } while (0)
 #endif
 
+/* we memset() the slab object to 0 when allocation succeeds, so DO NOT
+ * HAVE A CTOR THAT DOES ANYTHING.  its work will be cleared here.  we'd
+ * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */
 #define OBD_SLAB_ALLOC(ptr, slab, type, size)                                 \
 do {                                                                          \
 #define OBD_SLAB_ALLOC(ptr, slab, type, size)                                 \
 do {                                                                          \
-        LASSERT (!in_interrupt());                                            \
+        LASSERT(!in_interrupt());                                             \
         (ptr) = kmem_cache_alloc(slab, type);                                 \
         if ((ptr) == NULL) {                                                  \
                 CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \
         (ptr) = kmem_cache_alloc(slab, type);                                 \
         if ((ptr) == NULL) {                                                  \
                 CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \
index 55057d9..1b589b9 100644 (file)
@@ -1,13 +1,10 @@
-
-
-
  drivers/block/blkpg.c  |   35 +++++++++++++++++++++++++++++++++++
  drivers/block/loop.c   |    3 +++
  drivers/block/blkpg.c  |   35 +++++++++++++++++++++++++++++++++++
  drivers/block/loop.c   |    3 +++
- drivers/ide/ide-disk.c |    5 ++++-
- 3 files changed, 42 insertions(+), 1 deletion(-)
+ drivers/ide/ide-disk.c |    5 +++++
+ 3 files changed, 43 insertions(+)
 
 
---- rh-2.4.20/drivers/block/blkpg.c~dev_read_only_2.4.20       2003-04-11 14:05:03.000000000 +0800
-+++ rh-2.4.20-root/drivers/block/blkpg.c       2003-04-12 13:11:31.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/drivers/block/blkpg.c~dev_read_only_2.4.20-rh    2003-05-15 21:12:48.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/drivers/block/blkpg.c      2003-07-12 15:10:31.000000000 -0600
 @@ -297,3 +297,38 @@ int blk_ioctl(kdev_t dev, unsigned int c
  }
  
 @@ -297,3 +297,38 @@ int blk_ioctl(kdev_t dev, unsigned int c
  }
  
@@ -47,8 +44,8 @@
 +EXPORT_SYMBOL(dev_set_rdonly);
 +EXPORT_SYMBOL(dev_check_rdonly);
 +EXPORT_SYMBOL(dev_clear_rdonly);
 +EXPORT_SYMBOL(dev_set_rdonly);
 +EXPORT_SYMBOL(dev_check_rdonly);
 +EXPORT_SYMBOL(dev_clear_rdonly);
---- rh-2.4.20/drivers/block/loop.c~dev_read_only_2.4.20        2003-04-11 14:05:08.000000000 +0800
-+++ rh-2.4.20-root/drivers/block/loop.c        2003-04-12 13:11:31.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/drivers/block/loop.c~dev_read_only_2.4.20-rh     2003-05-15 21:12:50.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/drivers/block/loop.c       2003-07-12 15:10:31.000000000 -0600
 @@ -491,6 +491,9 @@ static int loop_make_request(request_que
        spin_unlock_irq(&lo->lo_lock);
  
 @@ -491,6 +491,9 @@ static int loop_make_request(request_que
        spin_unlock_irq(&lo->lo_lock);
  
                if (lo->lo_flags & LO_FLAGS_READ_ONLY)
                        goto err;
        } else if (rw == READA) {
                if (lo->lo_flags & LO_FLAGS_READ_ONLY)
                        goto err;
        } else if (rw == READA) {
---- rh-2.4.20/drivers/ide/ide-disk.c~dev_read_only_2.4.20      2003-04-11 14:04:53.000000000 +0800
-+++ rh-2.4.20-root/drivers/ide/ide-disk.c      2003-04-12 13:14:48.000000000 +0800
-@@ -381,7 +381,10 @@ static ide_startstop_t do_rw_disk (ide_d
-       if (IS_PDC4030_DRIVE)
-               return promise_rw_disk(drive, rq, block);
- #endif /* CONFIG_BLK_DEV_PDC4030 */
--
-+      if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) {
-+              ide_end_request(1, HWGROUP(drive));
-+              return ide_stopped;
-+      }
+--- kernel-2.4.20-6chaos_18_7/drivers/ide/ide-disk.c~dev_read_only_2.4.20-rh   2003-05-15 21:13:09.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/drivers/ide/ide-disk.c     2003-07-12 15:12:03.000000000 -0600
+@@ -371,6 +371,11 @@ ide_startstop_t __ide_do_rw_disk (ide_dr
+       if (driver_blocked)
+               panic("Request while ide driver is blocked?");
++      if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) {
++              ide_end_request(1, HWGROUP(drive));
++              return ide_stopped;
++      }
++
        if (IDE_CONTROL_REG)
                hwif->OUTB(drive->ctl, IDE_CONTROL_REG);
  
        if (IDE_CONTROL_REG)
                hwif->OUTB(drive->ctl, IDE_CONTROL_REG);
  
index 3d82572..3063be4 100644 (file)
@@ -9,7 +9,7 @@
                return 0;
  }
 +/* truncate.c */
                return 0;
  }
 +/* truncate.c */
-+extern void truncate_complete_page(struct page *);
++extern void truncate_complete_page(struct address_space *mapping,struct page *);
  
  /* filemap.c */
  extern unsigned long page_unuse(struct page *);
  
  /* filemap.c */
  extern unsigned long page_unuse(struct page *);
index e01feca..a173981 100644 (file)
@@ -1,11 +1,17 @@
- fs/ext3/super.c            |  229 +++++++++++++++++++++++++++++++++++++++++++++
- include/linux/ext3_fs.h    |    2 
+
+Create a service thread to handle delete and truncate of inodes, to avoid
+long latency while truncating very large files.
+
+
+ fs/ext3/inode.c            |  116 ++++++++++++++++++++++
+ fs/ext3/super.c            |  231 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h    |    5 
  include/linux/ext3_fs_sb.h |   10 +
  include/linux/ext3_fs_sb.h |   10 +
3 files changed, 241 insertions(+)
4 files changed, 362 insertions(+)
 
 --- linux-2.4.18-18.8.0-l15/fs/ext3/super.c~ext3-delete_thread-2.4.18  Tue Jun  3 17:26:21 2003
 
 --- linux-2.4.18-18.8.0-l15/fs/ext3/super.c~ext3-delete_thread-2.4.18  Tue Jun  3 17:26:21 2003
-+++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c    Wed Jun 18 11:59:14 2003
-@@ -396,6 +396,219 @@ static void dump_orphan_list(struct supe
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c    Wed Jul  2 23:49:40 2003
+@@ -396,6 +396,220 @@ static void dump_orphan_list(struct supe
        }
  }
  
        }
  }
  
 + * If we have any problem deferring the delete, just delete it right away.
 + * If we defer it, we also mark how many blocks it would free, so that we
 + * can keep the statfs data correct, and we know if we should sleep on the
 + * If we have any problem deferring the delete, just delete it right away.
 + * If we defer it, we also mark how many blocks it would free, so that we
 + * can keep the statfs data correct, and we know if we should sleep on the
-+ * truncate thread when we run out of space.
-+ *
-+ * In 2.5 this can be done much more cleanly by just registering a "drop"
-+ * method in the super_operations struct.
++ * delete thread when we run out of space.
 + */
 +static void ext3_delete_inode_thread(struct inode *old_inode)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
 + */
 +static void ext3_delete_inode_thread(struct inode *old_inode)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
 +      struct inode *new_inode;
 +      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
 +
 +      struct inode *new_inode;
 +      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
 +
 +              return;
 +      }
 +
 +              return;
 +      }
 +
-+      if (!test_opt(old_inode->i_sb, ASYNCDEL)) {
-+              ext3_delete_inode(old_inode);
-+              return;
-+      }
++      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++              goto out_delete;
 +
 +      /* We may want to delete the inode immediately and not defer it */
 +
 +      /* We may want to delete the inode immediately and not defer it */
-+      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
-+          !sbi->s_delete_list.next) {
-+              ext3_delete_inode(old_inode);
-+              return;
-+      }
++      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
++              goto out_delete;
 +
 +
-+      if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) ||
-+          (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
++      /* We can't use the delete thread as-is during real orphan recovery,
++       * as we add to the orphan list here, causing ext3_orphan_cleanup()
++       * to loop endlessly.  It would be nice to do so, but needs work.
++       */
++      if (oei->i_state & EXT3_STATE_DELETE ||
++          sbi->s_mount_state & EXT3_ORPHAN_FS) {
 +              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
 +                         old_inode->i_ino, blocks);
 +              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
 +                         old_inode->i_ino, blocks);
-+              ext3_delete_inode(old_inode);
-+              return;
++              goto out_delete;
 +      }
 +
 +      /* We can iget this inode again here, because our caller has unhashed
 +      }
 +
 +      /* We can iget this inode again here, because our caller has unhashed
 +       */
 +      down(&sbi->s_orphan_lock);
 +
 +       */
 +      down(&sbi->s_orphan_lock);
 +
-+      EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS;
++      sbi->s_mount_state |= EXT3_ORPHAN_FS;
 +      new_inode = iget(old_inode->i_sb, old_inode->i_ino);
 +      new_inode = iget(old_inode->i_sb, old_inode->i_ino);
-+      EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
++      sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
 +      if (is_bad_inode(new_inode)) {
 +              printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
 +              iput(new_inode);
 +      if (is_bad_inode(new_inode)) {
 +              printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
 +              iput(new_inode);
 +              up(&sbi->s_orphan_lock);
 +              ext3_debug("delete inode %lu directly (bad read)\n",
 +                         old_inode->i_ino);
 +              up(&sbi->s_orphan_lock);
 +              ext3_debug("delete inode %lu directly (bad read)\n",
 +                         old_inode->i_ino);
-+              ext3_delete_inode(old_inode);
-+              return;
++              goto out_delete;
 +      }
 +      J_ASSERT(new_inode != old_inode);
 +
 +      }
 +      J_ASSERT(new_inode != old_inode);
 +
-+      J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan));
++      J_ASSERT(!list_empty(&oei->i_orphan));
++
++      nei = EXT3_I(new_inode);
 +      /* Ugh.  We need to insert new_inode into the same spot on the list
 +       * as old_inode was, to ensure the in-memory orphan list is still
 +       * in the same order as the on-disk orphan list (badness otherwise).
 +       */
 +      /* Ugh.  We need to insert new_inode into the same spot on the list
 +       * as old_inode was, to ensure the in-memory orphan list is still
 +       * in the same order as the on-disk orphan list (badness otherwise).
 +       */
-+      EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE;
++      nei->i_orphan = oei->i_orphan;
++      nei->i_orphan.next->prev = &nei->i_orphan;
++      nei->i_orphan.prev->next = &nei->i_orphan;
++      nei->i_state |= EXT3_STATE_DELETE;
 +      up(&sbi->s_orphan_lock);
 +
 +      clear_inode(old_inode);
 +      up(&sbi->s_orphan_lock);
 +
 +      clear_inode(old_inode);
 +                 new_inode->i_ino, blocks);
 +
 +      wake_up(&sbi->s_delete_thread_queue);
 +                 new_inode->i_ino, blocks);
 +
 +      wake_up(&sbi->s_delete_thread_queue);
++      return;
++
++out_delete:
++      ext3_delete_inode(old_inode);
 +}
 +#else
 +#define ext3_start_delete_thread(sbi) do {} while(0)
 +}
 +#else
 +#define ext3_start_delete_thread(sbi) do {} while(0)
  void ext3_put_super (struct super_block * sb)
  {
        struct ext3_sb_info *sbi = EXT3_SB(sb);
  void ext3_put_super (struct super_block * sb)
  {
        struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -403,6 +615,7 @@ void ext3_put_super (struct super_block 
+@@ -403,6 +617,7 @@ void ext3_put_super (struct super_block 
        kdev_t j_dev = sbi->s_journal->j_dev;
        int i;
  
        kdev_t j_dev = sbi->s_journal->j_dev;
        int i;
  
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
        if (!(sb->s_flags & MS_RDONLY)) {
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
        if (!(sb->s_flags & MS_RDONLY)) {
-@@ -451,7 +664,11 @@ static struct super_operations ext3_sops
+@@ -451,7 +666,11 @@ static struct super_operations ext3_sops
        write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
        dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
        put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
        write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
        dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
        put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
        put_super:      ext3_put_super,         /* BKL held */
        write_super:    ext3_write_super,       /* BKL held */
        write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
        put_super:      ext3_put_super,         /* BKL held */
        write_super:    ext3_write_super,       /* BKL held */
        write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
-@@ -511,6 +728,14 @@ static int parse_options (char * options
+@@ -511,6 +730,14 @@ static int parse_options (char * options
             this_char = strtok (NULL, ",")) {
                if ((value = strchr (this_char, '=')) != NULL)
                        *value++ = 0;
             this_char = strtok (NULL, ",")) {
                if ((value = strchr (this_char, '=')) != NULL)
                        *value++ = 0;
                if (!strcmp (this_char, "bsddf"))
                        clear_opt (*mount_options, MINIX_DF);
                else if (!strcmp (this_char, "nouid32")) {
                if (!strcmp (this_char, "bsddf"))
                        clear_opt (*mount_options, MINIX_DF);
                else if (!strcmp (this_char, "nouid32")) {
-@@ -1206,6 +1431,7 @@ struct super_block * ext3_read_super (st
+@@ -1206,6 +1433,7 @@ struct super_block * ext3_read_super (st
        }
  
        ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
        }
  
        ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
        /*
         * akpm: core read_super() calls in here with the superblock locked.
         * That deadlocks, because orphan cleanup needs to lock the superblock
        /*
         * akpm: core read_super() calls in here with the superblock locked.
         * That deadlocks, because orphan cleanup needs to lock the superblock
-@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s
+@@ -1648,6 +1876,9 @@ int ext3_remount (struct super_block * s
        if (!parse_options(data, &tmp, sbi, &tmp, 1))
                return -EINVAL;
  
        if (!parse_options(data, &tmp, sbi, &tmp, 1))
                return -EINVAL;
  
        if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
                ext3_abort(sb, __FUNCTION__, "Abort forced by user");
  
        if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
                ext3_abort(sb, __FUNCTION__, "Abort forced by user");
  
+--- linux/fs/ext3/file.c.orig  Fri Jan 17 10:57:31 2003
++++ linux/fs/ext3/file.c       Mon Jun 30 13:28:52 2003
+@@ -121,7 +121,11 @@ struct file_operations ext3_file_operati
+ };
+ struct inode_operations ext3_file_inode_operations = {
++#ifdef EXT3_DELETE_THREAD
++      truncate:       ext3_truncate_thread,   /* BKL held */
++#else
+       truncate:       ext3_truncate,          /* BKL held */
++#endif
+       setattr:        ext3_setattr,           /* BKL held */
+ };
+--- linux-2.4.18-18.8.0-l15/fs/ext3/inode.c~ext3-delete_thread-2.4.18  Wed Jul  2 23:13:58 2003
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/inode.c    Wed Jul  2 23:50:29 2003
+@@ -2004,6 +2004,118 @@ out_stop:
+       ext3_journal_stop(handle, inode);
+ }
++#ifdef EXT3_DELETE_THREAD
++/* Move blocks from to-be-truncated inode over to a new inode, and delete
++ * that one from the delete thread instead.  This avoids a lot of latency
++ * when truncating large files.
++ *
++ * If we have any problem deferring the truncate, just truncate it right away.
++ * If we defer it, we also mark how many blocks it would free, so that we
++ * can keep the statfs data correct, and we know if we should sleep on the
++ * delete thread when we run out of space.
++ */
++void ext3_truncate_thread(struct inode *old_inode)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
++      struct inode *new_inode;
++      handle_t *handle;
++      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
++
++      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++              goto out_truncate;
++
++      /* XXX This is a temporary limitation for code simplicity.
++       *     We could truncate to arbitrary sizes at some later time.
++       */
++      if (old_inode->i_size != 0)
++              goto out_truncate;
++
++      /* We may want to truncate the inode immediately and not defer it */
++      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
++          old_inode->i_size > oei->i_disksize)
++              goto out_truncate;
++
++      /* We can't use the delete thread as-is during real orphan recovery,
++       * as we add to the orphan list here, causing ext3_orphan_cleanup()
++       * to loop endlessly.  It would be nice to do so, but needs work.
++       */
++      if (oei->i_state & EXT3_STATE_DELETE ||
++          sbi->s_mount_state & EXT3_ORPHAN_FS) {
++              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
++                         old_inode->i_ino, blocks);
++              goto out_truncate;
++      }
++
++      ext3_discard_prealloc(old_inode);
++
++      /* old_inode   = 1
++       * new_inode   = sb + GDT + ibitmap
++       * orphan list = 1 inode/superblock for add, 2 inodes for del
++       * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
++       */
++      handle = ext3_journal_start(old_inode, 7);
++      if (IS_ERR(handle))
++              goto out_truncate;
++
++      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
++      if (IS_ERR(new_inode)) {
++              ext3_debug("truncate inode %lu directly (no new inodes)\n",
++                         old_inode->i_ino);
++              goto out_journal;
++      }
++
++      nei = EXT3_I(new_inode);
++
++      down_write(&oei->truncate_sem);
++      new_inode->i_size = old_inode->i_size;
++      new_inode->i_blocks = old_inode->i_blocks;
++      new_inode->i_uid = old_inode->i_uid;
++      new_inode->i_gid = old_inode->i_gid;
++      new_inode->i_nlink = 0;
++
++      /* FIXME when we do arbitrary truncates */
++      old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
++      old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
++
++      memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
++      memset(oei->i_data, 0, sizeof(oei->i_data));
++
++      nei->i_disksize = oei->i_disksize;
++      nei->i_state |= EXT3_STATE_DELETE;
++      up_write(&oei->truncate_sem);
++
++      if (ext3_orphan_add(handle, new_inode) < 0)
++              goto out_journal;
++
++      if (ext3_orphan_del(handle, old_inode) < 0) {
++              ext3_orphan_del(handle, new_inode);
++              iput(new_inode);
++              goto out_journal;
++      }
++
++      ext3_journal_stop(handle, old_inode);
++
++      spin_lock(&sbi->s_delete_lock);
++      J_ASSERT(list_empty(&new_inode->i_dentry));
++      list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
++      sbi->s_delete_blocks += blocks;
++      sbi->s_delete_inodes++;
++      spin_unlock(&sbi->s_delete_lock);
++
++      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++                 new_inode->i_ino, blocks);
++
++      wake_up(&sbi->s_delete_thread_queue);
++      return;
++
++out_journal:
++      ext3_journal_stop(handle, old_inode);
++out_truncate:
++      ext3_truncate(old_inode);
++}
++#endif /* EXT3_DELETE_THREAD */
++
+ /* 
+  * ext3_get_inode_loc returns with an extra refcount against the
+  * inode's underlying buffer_head on success. 
 --- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18  Tue Jun  3 17:26:20 2003
 --- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18  Tue Jun  3 17:26:20 2003
-+++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h    Tue Jun 17 12:36:56 2003
++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h    Wed Jul  2 23:19:09 2003
 @@ -190,6 +190,7 @@ struct ext3_group_desc
   */
  #define EXT3_STATE_JDATA              0x00000001 /* journaled data exists */
 @@ -190,6 +190,7 @@ struct ext3_group_desc
   */
  #define EXT3_STATE_JDATA              0x00000001 /* journaled data exists */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
+@@ -651,6 +653,9 @@ extern void ext3_discard_prealloc (struc
+ extern void ext3_dirty_inode(struct inode *);
+ extern int ext3_change_inode_journal_flag(struct inode *, int);
+ extern void ext3_truncate (struct inode *);
++#ifdef EXT3_DELETE_THREAD
++extern void ext3_truncate_thread(struct inode *inode);
++#endif
+ /* ioctl.c */
+ extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
 --- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18       Tue Jun  3 17:26:21 2003
 --- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18       Tue Jun  3 17:26:21 2003
-+++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h Tue Jun 17 12:36:56 2003
++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h Wed Jul  2 23:19:09 2003
 @@ -29,6 +29,8 @@
  
  #define EXT3_MAX_GROUP_LOADED 32
 @@ -29,6 +29,8 @@
  
  #define EXT3_MAX_GROUP_LOADED 32
index 34c5158..a8816ec 100644 (file)
@@ -1,7 +1,13 @@
-diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
---- origin/fs/ext3/super.c     2003-05-04 17:23:52.000000000 +0400
-+++ linux/fs/ext3/super.c      2003-05-04 17:09:20.000000000 +0400
-@@ -398,6 +398,219 @@ static void dump_orphan_list(struct supe
+ fs/ext3/file.c             |    4 
+ fs/ext3/inode.c            |  116 ++++++++++++++++++++++
+ fs/ext3/super.c            |  230 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h    |    5 
+ include/linux/ext3_fs_sb.h |   10 +
+ 5 files changed, 365 insertions(+)
+
+--- linux/fs/ext3/super.c~ext3-delete_thread-2.4.20    Thu Jul 10 14:11:32 2003
++++ linux-mmonroe/fs/ext3/super.c      Thu Jul 10 14:11:33 2003
+@@ -400,6 +400,220 @@ static void dump_orphan_list(struct supe
        }
  }
  
        }
  }
  
@@ -126,14 +132,12 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 + * If we have any problem deferring the delete, just delete it right away.
 + * If we defer it, we also mark how many blocks it would free, so that we
 + * can keep the statfs data correct, and we know if we should sleep on the
 + * If we have any problem deferring the delete, just delete it right away.
 + * If we defer it, we also mark how many blocks it would free, so that we
 + * can keep the statfs data correct, and we know if we should sleep on the
-+ * truncate thread when we run out of space.
-+ *
-+ * In 2.5 this can be done much more cleanly by just registering a "drop"
-+ * method in the super_operations struct.
++ * delete thread when we run out of space.
 + */
 +static void ext3_delete_inode_thread(struct inode *old_inode)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
 + */
 +static void ext3_delete_inode_thread(struct inode *old_inode)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
 +      struct inode *new_inode;
 +      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
 +
 +      struct inode *new_inode;
 +      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
 +
@@ -142,24 +146,22 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +              return;
 +      }
 +
 +              return;
 +      }
 +
-+      if (!test_opt(old_inode->i_sb, ASYNCDEL)) {
-+              ext3_delete_inode(old_inode);
-+              return;
-+      }
++      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++              goto out_delete;
 +
 +      /* We may want to delete the inode immediately and not defer it */
 +
 +      /* We may want to delete the inode immediately and not defer it */
-+      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
-+          !sbi->s_delete_list.next) {
-+              ext3_delete_inode(old_inode);
-+              return;
-+      }
++      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
++              goto out_delete;
 +
 +
-+      if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) ||
-+          (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
++      /* We can't use the delete thread as-is during real orphan recovery,
++       * as we add to the orphan list here, causing ext3_orphan_cleanup()
++       * to loop endlessly.  It would be nice to do so, but needs work.
++       */
++      if (oei->i_state & EXT3_STATE_DELETE ||
++          sbi->s_mount_state & EXT3_ORPHAN_FS) {
 +              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
 +                         old_inode->i_ino, blocks);
 +              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
 +                         old_inode->i_ino, blocks);
-+              ext3_delete_inode(old_inode);
-+              return;
++              goto out_delete;
 +      }
 +
 +      /* We can iget this inode again here, because our caller has unhashed
 +      }
 +
 +      /* We can iget this inode again here, because our caller has unhashed
@@ -171,9 +173,9 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +       */
 +      down(&sbi->s_orphan_lock);
 +
 +       */
 +      down(&sbi->s_orphan_lock);
 +
-+      EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS;
++      sbi->s_mount_state |= EXT3_ORPHAN_FS;
 +      new_inode = iget(old_inode->i_sb, old_inode->i_ino);
 +      new_inode = iget(old_inode->i_sb, old_inode->i_ino);
-+      EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
++      sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
 +      if (is_bad_inode(new_inode)) {
 +              printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
 +              iput(new_inode);
 +      if (is_bad_inode(new_inode)) {
 +              printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
 +              iput(new_inode);
@@ -183,20 +185,21 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +              up(&sbi->s_orphan_lock);
 +              ext3_debug("delete inode %lu directly (bad read)\n",
 +                         old_inode->i_ino);
 +              up(&sbi->s_orphan_lock);
 +              ext3_debug("delete inode %lu directly (bad read)\n",
 +                         old_inode->i_ino);
-+              ext3_delete_inode(old_inode);
-+              return;
++              goto out_delete;
 +      }
 +      J_ASSERT(new_inode != old_inode);
 +
 +      }
 +      J_ASSERT(new_inode != old_inode);
 +
-+      J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan));
++      J_ASSERT(!list_empty(&oei->i_orphan));
++
++      nei = EXT3_I(new_inode);
 +      /* Ugh.  We need to insert new_inode into the same spot on the list
 +       * as old_inode was, to ensure the in-memory orphan list is still
 +       * in the same order as the on-disk orphan list (badness otherwise).
 +       */
 +      /* Ugh.  We need to insert new_inode into the same spot on the list
 +       * as old_inode was, to ensure the in-memory orphan list is still
 +       * in the same order as the on-disk orphan list (badness otherwise).
 +       */
-+      EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan;
-+      EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE;
++      nei->i_orphan = oei->i_orphan;
++      nei->i_orphan.next->prev = &nei->i_orphan;
++      nei->i_orphan.prev->next = &nei->i_orphan;
++      nei->i_state |= EXT3_STATE_DELETE;
 +      up(&sbi->s_orphan_lock);
 +
 +      clear_inode(old_inode);
 +      up(&sbi->s_orphan_lock);
 +
 +      clear_inode(old_inode);
@@ -212,6 +215,10 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +                 new_inode->i_ino, blocks);
 +
 +      wake_up(&sbi->s_delete_thread_queue);
 +                 new_inode->i_ino, blocks);
 +
 +      wake_up(&sbi->s_delete_thread_queue);
++      return;
++
++out_delete:
++      ext3_delete_inode(old_inode);
 +}
 +#else
 +#define ext3_start_delete_thread(sbi) do {} while(0)
 +}
 +#else
 +#define ext3_start_delete_thread(sbi) do {} while(0)
@@ -221,7 +228,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
  void ext3_put_super (struct super_block * sb)
  {
        struct ext3_sb_info *sbi = EXT3_SB(sb);
  void ext3_put_super (struct super_block * sb)
  {
        struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -405,6 +611,7 @@ void ext3_put_super (struct super_block 
+@@ -407,6 +621,7 @@ void ext3_put_super (struct super_block 
        kdev_t j_dev = sbi->s_journal->j_dev;
        int i;
  
        kdev_t j_dev = sbi->s_journal->j_dev;
        int i;
  
@@ -229,7 +236,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
        if (!(sb->s_flags & MS_RDONLY)) {
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
        if (!(sb->s_flags & MS_RDONLY)) {
-@@ -453,7 +660,11 @@ static struct super_operations ext3_sops
+@@ -455,7 +670,11 @@ static struct super_operations ext3_sops
        write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
        dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
        put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
        write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
        dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
        put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
@@ -240,11 +247,11 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +#endif
        put_super:      ext3_put_super,         /* BKL held */
        write_super:    ext3_write_super,       /* BKL held */
 +#endif
        put_super:      ext3_put_super,         /* BKL held */
        write_super:    ext3_write_super,       /* BKL held */
-       write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
-@@ -514,6 +725,13 @@ static int parse_options (char * options
-            this_char = strtok (NULL, ",")) {
-               if ((value = strchr (this_char, '=')) != NULL)
-                       *value++ = 0;
+       sync_fs:        ext3_sync_fs,
+@@ -524,6 +743,13 @@ static int parse_options (char * options
+                       clear_opt (*mount_options, XATTR_USER);
+               else
+ #endif
 +#ifdef EXT3_DELETE_THREAD
 +              if (!strcmp(this_char, "asyncdel"))
 +                      set_opt(*mount_options, ASYNCDEL);
 +#ifdef EXT3_DELETE_THREAD
 +              if (!strcmp(this_char, "asyncdel"))
 +                      set_opt(*mount_options, ASYNCDEL);
@@ -252,10 +259,10 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +                      clear_opt(*mount_options, ASYNCDEL);
 +              else
 +#endif
 +                      clear_opt(*mount_options, ASYNCDEL);
 +              else
 +#endif
- #ifdef CONFIG_EXT3_FS_XATTR_USER
-               if (!strcmp (this_char, "user_xattr"))
-                       set_opt (*mount_options, XATTR_USER);
-@@ -1220,6 +1436,7 @@ struct super_block * ext3_read_super (st
+               if (!strcmp (this_char, "bsddf"))
+                       clear_opt (*mount_options, MINIX_DF);
+               else if (!strcmp (this_char, "nouid32")) {
+@@ -1223,6 +1449,7 @@ struct super_block * ext3_read_super (st
        }
  
        ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
        }
  
        ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
@@ -263,7 +270,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
        /*
         * akpm: core read_super() calls in here with the superblock locked.
         * That deadlocks, because orphan cleanup needs to lock the superblock
        /*
         * akpm: core read_super() calls in here with the superblock locked.
         * That deadlocks, because orphan cleanup needs to lock the superblock
-@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s
+@@ -1678,6 +1905,9 @@ int ext3_remount (struct super_block * s
        if (!parse_options(data, &tmp, sbi, &tmp, 1))
                return -EINVAL;
  
        if (!parse_options(data, &tmp, sbi, &tmp, 1))
                return -EINVAL;
  
@@ -273,9 +280,143 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
        if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
                ext3_abort(sb, __FUNCTION__, "Abort forced by user");
  
        if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
                ext3_abort(sb, __FUNCTION__, "Abort forced by user");
  
-diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h
---- origin/include/linux/ext3_fs.h     2003-05-04 17:22:49.000000000 +0400
-+++ linux/include/linux/ext3_fs.h      2003-05-04 15:06:10.000000000 +0400
+--- linux/fs/ext3/inode.c~ext3-delete_thread-2.4.20    Thu Jul 10 14:11:29 2003
++++ linux-mmonroe/fs/ext3/inode.c      Thu Jul 10 14:11:33 2003
+@@ -2013,6 +2013,118 @@ out_stop:
+       ext3_journal_stop(handle, inode);
+ }
++#ifdef EXT3_DELETE_THREAD
++/* Move blocks from to-be-truncated inode over to a new inode, and delete
++ * that one from the delete thread instead.  This avoids a lot of latency
++ * when truncating large files.
++ *
++ * If we have any problem deferring the truncate, just truncate it right away.
++ * If we defer it, we also mark how many blocks it would free, so that we
++ * can keep the statfs data correct, and we know if we should sleep on the
++ * delete thread when we run out of space.
++ */
++void ext3_truncate_thread(struct inode *old_inode)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
++      struct inode *new_inode;
++      handle_t *handle;
++      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
++
++      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++              goto out_truncate;
++
++      /* XXX This is a temporary limitation for code simplicity.
++       *     We could truncate to arbitrary sizes at some later time.
++       */
++      if (old_inode->i_size != 0)
++              goto out_truncate;
++
++      /* We may want to truncate the inode immediately and not defer it */
++      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
++          old_inode->i_size > oei->i_disksize)
++              goto out_truncate;
++
++      /* We can't use the delete thread as-is during real orphan recovery,
++       * as we add to the orphan list here, causing ext3_orphan_cleanup()
++       * to loop endlessly.  It would be nice to do so, but needs work.
++       */
++      if (oei->i_state & EXT3_STATE_DELETE ||
++          sbi->s_mount_state & EXT3_ORPHAN_FS) {
++              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
++                         old_inode->i_ino, blocks);
++              goto out_truncate;
++      }
++
++      ext3_discard_prealloc(old_inode);
++
++      /* old_inode   = 1
++       * new_inode   = sb + GDT + ibitmap
++       * orphan list = 1 inode/superblock for add, 2 inodes for del
++       * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
++       */
++      handle = ext3_journal_start(old_inode, 7);
++      if (IS_ERR(handle))
++              goto out_truncate;
++
++      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
++      if (IS_ERR(new_inode)) {
++              ext3_debug("truncate inode %lu directly (no new inodes)\n",
++                         old_inode->i_ino);
++              goto out_journal;
++      }
++
++      nei = EXT3_I(new_inode);
++
++      down_write(&oei->truncate_sem);
++      new_inode->i_size = old_inode->i_size;
++      new_inode->i_blocks = old_inode->i_blocks;
++      new_inode->i_uid = old_inode->i_uid;
++      new_inode->i_gid = old_inode->i_gid;
++      new_inode->i_nlink = 0;
++
++      /* FIXME when we do arbitrary truncates */
++      old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
++      old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
++
++      memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
++      memset(oei->i_data, 0, sizeof(oei->i_data));
++
++      nei->i_disksize = oei->i_disksize;
++      nei->i_state |= EXT3_STATE_DELETE;
++      up_write(&oei->truncate_sem);
++
++      if (ext3_orphan_add(handle, new_inode) < 0)
++              goto out_journal;
++
++      if (ext3_orphan_del(handle, old_inode) < 0) {
++              ext3_orphan_del(handle, new_inode);
++              iput(new_inode);
++              goto out_journal;
++      }
++
++      ext3_journal_stop(handle, old_inode);
++
++      spin_lock(&sbi->s_delete_lock);
++      J_ASSERT(list_empty(&new_inode->i_dentry));
++      list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
++      sbi->s_delete_blocks += blocks;
++      sbi->s_delete_inodes++;
++      spin_unlock(&sbi->s_delete_lock);
++
++      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++                 new_inode->i_ino, blocks);
++
++      wake_up(&sbi->s_delete_thread_queue);
++      return;
++
++out_journal:
++      ext3_journal_stop(handle, old_inode);
++out_truncate:
++      ext3_truncate(old_inode);
++}
++#endif /* EXT3_DELETE_THREAD */
++
+ /* 
+  * ext3_get_inode_loc returns with an extra refcount against the
+  * inode's underlying buffer_head on success. 
+--- linux/fs/ext3/file.c~ext3-delete_thread-2.4.20     Thu Jul 10 14:11:21 2003
++++ linux-mmonroe/fs/ext3/file.c       Thu Jul 10 14:12:17 2003
+@@ -125,7 +125,11 @@ struct file_operations ext3_file_operati
+ };
+ struct inode_operations ext3_file_inode_operations = {
++#ifdef EXT3_DELETE_THREAD
++      truncate:       ext3_truncate_thread,   /* BKL held */
++#else
+       truncate:       ext3_truncate,          /* BKL held */
++#endif
+       setattr:        ext3_setattr,           /* BKL held */
+       setxattr:       ext3_setxattr,          /* BKL held */
+       getxattr:       ext3_getxattr,          /* BKL held */
+--- linux/include/linux/ext3_fs.h~ext3-delete_thread-2.4.20    Thu Jul 10 14:11:26 2003
++++ linux-mmonroe/include/linux/ext3_fs.h      Thu Jul 10 14:11:33 2003
 @@ -193,6 +193,7 @@ struct ext3_group_desc
   */
  #define EXT3_STATE_JDATA              0x00000001 /* journaled data exists */
 @@ -193,6 +193,7 @@ struct ext3_group_desc
   */
  #define EXT3_STATE_JDATA              0x00000001 /* journaled data exists */
@@ -284,17 +425,26 @@ diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h
  
  /*
   * ioctl commands
  
  /*
   * ioctl commands
-@@ -321,6 +322,7 @@ struct ext3_inode {
+@@ -320,6 +321,7 @@ struct ext3_inode {
  #define EXT3_MOUNT_UPDATE_JOURNAL     0x1000  /* Update the journal format */
  #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
  #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
  #define EXT3_MOUNT_UPDATE_JOURNAL     0x1000  /* Update the journal format */
  #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
  #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
-+#define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
++#define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
-diff -puNr origin/include/linux/ext3_fs_sb.h linux/include/linux/ext3_fs_sb.h
---- origin/include/linux/ext3_fs_sb.h  2003-05-04 17:23:52.000000000 +0400
-+++ linux/include/linux/ext3_fs_sb.h   2003-05-04 11:37:04.000000000 +0400
+@@ -694,6 +696,9 @@ extern void ext3_discard_prealloc (struc
+ extern void ext3_dirty_inode(struct inode *);
+ extern int ext3_change_inode_journal_flag(struct inode *, int);
+ extern void ext3_truncate (struct inode *);
++#ifdef EXT3_DELETE_THREAD
++extern void ext3_truncate_thread(struct inode *inode);
++#endif
+ /* ioctl.c */
+ extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
+--- linux/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:32 2003
++++ linux-mmonroe/include/linux/ext3_fs_sb.h   Thu Jul 10 14:11:33 2003
 @@ -29,6 +29,8 @@
  
  #define EXT3_MAX_GROUP_LOADED 8
 @@ -29,6 +29,8 @@
  
  #define EXT3_MAX_GROUP_LOADED 8
@@ -319,3 +469,5 @@ diff -puNr origin/include/linux/ext3_fs_sb.h linux/include/linux/ext3_fs_sb.h
  };
  
  #endif        /* _LINUX_EXT3_FS_SB */
  };
  
  #endif        /* _LINUX_EXT3_FS_SB */
+
+_
index 06ea72a..65d9347 100644 (file)
@@ -1,6 +1,9 @@
---- linux-2.4.17/fs/ext3/super.c.orig  Fri Dec 21 10:41:55 2001
-+++ linux-2.4.17/fs/ext3/super.c       Fri Mar 22 11:00:41 2002
-@@ -1344,10 +1342,10 @@
+ fs/ext3/super.c |    4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+--- linux-2.4.18-p4smp/fs/ext3/super.c~extN-misc-fixup 2003-07-21 23:07:50.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c   2003-07-21 23:08:06.000000000 -0600
+@@ -1578,10 +1578,10 @@ static journal_t *ext3_get_dev_journal(s
                printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
                goto out_journal;
        }
                printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
                goto out_journal;
        }
                goto out_journal;
        }
        EXT3_SB(sb)->journal_bdev = bdev;
                goto out_journal;
        }
        EXT3_SB(sb)->journal_bdev = bdev;
-@@ -1560,6 +1560,7 @@
-       unlock_kernel();
-       return ret;
- }
-+EXPORT_SYMBOL(ext3_force_commit); /* here to avoid potential patch collisions */
- /*
-  * Ext3 always journals updates to the superblock itself, so we don't
+
+_
index 63f4463..305f6fd 100644 (file)
@@ -83,9 +83,7 @@
                DQUOT_DROP(inode);
 --- linux-2.4.18-chaos52/fs/ext3/inode.c~extN-noread   2003-05-16 12:26:29.000000000 +0800
 +++ linux-2.4.18-chaos52-root/fs/ext3/inode.c  2003-05-16 12:27:06.000000000 +0800
                DQUOT_DROP(inode);
 --- linux-2.4.18-chaos52/fs/ext3/inode.c~extN-noread   2003-05-16 12:26:29.000000000 +0800
 +++ linux-2.4.18-chaos52-root/fs/ext3/inode.c  2003-05-16 12:27:06.000000000 +0800
-@@ -2011,23 +2011,28 @@ out_stop:
-       ext3_journal_stop(handle, inode);
- }
+@@ -2013,21 +2013,26 @@ out_stop:
  
 -/* 
 - * ext3_get_inode_loc returns with an extra refcount against the
  
 -/* 
 - * ext3_get_inode_loc returns with an extra refcount against the
index fc74c6b..d40d678 100644 (file)
        j += i * EXT3_INODES_PER_GROUP(sb) + 1;
        if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) {
                ext3_error (sb, "ext3_new_inode",
        j += i * EXT3_INODES_PER_GROUP(sb) + 1;
        if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) {
                ext3_error (sb, "ext3_new_inode",
+--- linux-2.4.18-18.8.0-l15/fs/ext3/inode.c~extN-wantedi       Thu Jul  3 00:15:41 2003
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/inode.c    Thu Jul  3 00:17:28 2003
+@@ -2070,7 +2070,7 @@ void ext3_truncate_thread(struct inode *
+       if (IS_ERR(handle))
+               goto out_truncate;
+-      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
++      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode, 0);
+       if (IS_ERR(new_inode)) {
+               ext3_debug("truncate inode %lu directly (no new inodes)\n",
+                          old_inode->i_ino);
 --- linux-2.4.20/fs/ext3/ioctl.c~extN-wantedi  2003-04-08 23:35:55.000000000 -0600
 +++ linux-2.4.20-braam/fs/ext3/ioctl.c 2003-04-08 23:35:55.000000000 -0600
 @@ -23,6 +23,31 @@ int ext3_ioctl (struct inode * inode, st
 --- linux-2.4.20/fs/ext3/ioctl.c~extN-wantedi  2003-04-08 23:35:55.000000000 -0600
 +++ linux-2.4.20-braam/fs/ext3/ioctl.c 2003-04-08 23:35:55.000000000 -0600
 @@ -23,6 +23,31 @@ int ext3_ioctl (struct inode * inode, st
index 6eabe85..b983b33 100644 (file)
@@ -1,7 +1,15 @@
- 0 files changed
+ Documentation/filesystems/ext2.txt |   16 ++
+ fs/ext3/Makefile                   |    2 
+ fs/ext3/inode.c                    |    4 
+ fs/ext3/iopen.c                    |  259 +++++++++++++++++++++++++++++++++++++
+ fs/ext3/iopen.h                    |   13 +
+ fs/ext3/namei.c                    |   12 +
+ fs/ext3/super.c                    |   11 +
+ include/linux/ext3_fs.h            |    2 
+ 8 files changed, 318 insertions(+), 1 deletion(-)
 
 
---- linux-2.4.18-chaos52/Documentation/filesystems/ext2.txt~iopen-2.4.18       2003-04-13 15:21:33.000000000 +0800
-+++ linux-2.4.18-chaos52-root/Documentation/filesystems/ext2.txt       2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/Documentation/filesystems/ext2.txt~iopen-2.4.18 2003-07-09 12:17:30.000000000 -0600
++++ linux-2.4.18-p4smp-braam/Documentation/filesystems/ext2.txt        2003-07-09 17:13:02.000000000 -0600
 @@ -35,6 +35,22 @@ resgid=n                    The group ID which may use th
  
  sb=n                          Use alternate superblock at this location.
 @@ -35,6 +35,22 @@ resgid=n                    The group ID which may use th
  
  sb=n                          Use alternate superblock at this location.
  grpquota,noquota,quota,usrquota       Quota options are silently ignored by ext2.
  
  
  grpquota,noquota,quota,usrquota       Quota options are silently ignored by ext2.
  
  
---- linux-2.4.18-chaos52/fs/ext3/Makefile~iopen-2.4.18 2003-06-01 03:24:07.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/Makefile 2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/Makefile~iopen-2.4.18   2003-07-09 17:12:12.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/Makefile  2003-07-09 17:13:15.000000000 -0600
 @@ -11,7 +11,7 @@ O_TARGET := ext3.o
  
 @@ -11,7 +11,7 @@ O_TARGET := ext3.o
  
- export-objs :=        super.o inode.o xattr.o
+ export-objs :=        super.o inode.o xattr.o ext3-exports.o
  
 -obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
  
 -obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-+obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
-               ioctl.o namei.o super.o symlink.o xattr.o
++obj-y    := balloc.o iopen.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+               ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o
  obj-m    := $(O_TARGET)
  
  obj-m    := $(O_TARGET)
  
---- linux-2.4.18-chaos52/fs/ext3/inode.c~iopen-2.4.18  2003-06-03 17:10:21.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/inode.c  2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/inode.c~iopen-2.4.18    2003-07-09 17:11:19.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/inode.c   2003-07-09 17:13:02.000000000 -0600
 @@ -31,6 +31,7 @@
  #include <linux/highuid.h>
  #include <linux/quotaops.h>
 @@ -31,6 +31,7 @@
  #include <linux/highuid.h>
  #include <linux/quotaops.h>
@@ -46,7 +54,7 @@
  
  /*
   * SEARCH_FROM_ZERO forces each block allocation to search from the start
  
  /*
   * SEARCH_FROM_ZERO forces each block allocation to search from the start
-@@ -2135,6 +2136,9 @@ void ext3_read_inode(struct inode * inod
+@@ -2165,6 +2166,9 @@ void ext3_read_inode(struct inode * inod
        struct buffer_head *bh;
        int block;
        
        struct buffer_head *bh;
        int block;
        
@@ -56,8 +64,8 @@
        if(ext3_get_inode_loc(inode, &iloc))
                goto bad_inode;
        bh = iloc.bh;
        if(ext3_get_inode_loc(inode, &iloc))
                goto bad_inode;
        bh = iloc.bh;
---- /dev/null  2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/iopen.c  2003-06-03 17:10:55.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/iopen.c   2003-07-09 17:13:02.000000000 -0600
 @@ -0,0 +1,259 @@
 +/*
 + * linux/fs/ext3/iopen.c
 @@ -0,0 +1,259 @@
 +/*
 + * linux/fs/ext3/iopen.c
 +
 +      return 1;
 +}
 +
 +      return 1;
 +}
---- /dev/null  2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/iopen.h  2003-06-03 17:10:55.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/iopen.h   2003-07-09 17:13:02.000000000 -0600
 @@ -0,0 +1,13 @@
 +/*
 + * iopen.h
 @@ -0,0 +1,13 @@
 +/*
 + * iopen.h
 +
 +extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
 +extern int ext3_iopen_get_inode(struct inode *inode);
 +
 +extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
 +extern int ext3_iopen_get_inode(struct inode *inode);
---- linux-2.4.18-chaos52/fs/ext3/namei.c~iopen-2.4.18  2003-06-03 17:10:20.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/namei.c  2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/namei.c~iopen-2.4.18    2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/namei.c   2003-07-09 17:13:02.000000000 -0600
 @@ -34,6 +34,7 @@
  #include <linux/locks.h>
  #include <linux/quotaops.h>
 @@ -34,6 +34,7 @@
  #include <linux/locks.h>
  #include <linux/quotaops.h>
        d_add(dentry, inode);
        return NULL;
  }
        d_add(dentry, inode);
        return NULL;
  }
---- linux-2.4.18-chaos52/fs/ext3/super.c~iopen-2.4.18  2003-06-03 17:10:21.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/super.c  2003-06-03 17:10:55.000000000 +0800
-@@ -820,6 +820,17 @@ static int parse_options (char * options
+--- linux-2.4.18-p4smp/fs/ext3/super.c~iopen-2.4.18    2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c   2003-07-09 17:13:02.000000000 -0600
+@@ -831,6 +831,17 @@ static int parse_options (char * options
                         || !strcmp (this_char, "quota")
                         || !strcmp (this_char, "usrquota"))
                        /* Don't do anything ;-) */ ;
                         || !strcmp (this_char, "quota")
                         || !strcmp (this_char, "usrquota"))
                        /* Don't do anything ;-) */ ;
                else if (!strcmp (this_char, "journal")) {
                        /* @@@ FIXME */
                        /* Eventually we will want to be able to create
                else if (!strcmp (this_char, "journal")) {
                        /* @@@ FIXME */
                        /* Eventually we will want to be able to create
---- linux-2.4.18-chaos52/include/linux/ext3_fs.h~iopen-2.4.18  2003-06-03 17:10:22.000000000 +0800
-+++ linux-2.4.18-chaos52-root/include/linux/ext3_fs.h  2003-06-03 17:12:08.000000000 +0800
+--- linux-2.4.18-p4smp/include/linux/ext3_fs.h~iopen-2.4.18    2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_fs.h   2003-07-09 17:13:02.000000000 -0600
 @@ -321,6 +321,8 @@ struct ext3_inode {
  #define EXT3_MOUNT_UPDATE_JOURNAL     0x1000  /* Update the journal format */
  #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
 @@ -321,6 +321,8 @@ struct ext3_inode {
  #define EXT3_MOUNT_UPDATE_JOURNAL     0x1000  /* Update the journal format */
  #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
index 3038cc8..ec48814 100644 (file)
@@ -1,15 +1,15 @@
  Documentation/filesystems/ext2.txt |   16 ++
  fs/ext3/Makefile                   |    2 
  fs/ext3/inode.c                    |    4 
  Documentation/filesystems/ext2.txt |   16 ++
  fs/ext3/Makefile                   |    2 
  fs/ext3/inode.c                    |    4 
- fs/ext3/iopen.c                    |  240 +++++++++++++++++++++++++++++++++++++
- fs/ext3/iopen.h                    |   15 ++
- fs/ext3/namei.c                    |   13 +-
+ fs/ext3/iopen.c                    |  259 +++++++++++++++++++++++++++++++++++++
+ fs/ext3/iopen.h                    |   1+
+ fs/ext3/namei.c                    |   13 +
  fs/ext3/super.c                    |   11 +
  include/linux/ext3_fs.h            |    2 
  fs/ext3/super.c                    |   11 +
  include/linux/ext3_fs.h            |    2 
- 8 files changed, 301 insertions(+), 2 deletions(-)
+ 8 files changed, 318 insertions(+), 2 deletions(-)
 
 
---- linux-2.4.20/Documentation/filesystems/ext2.txt~iopen      2001-07-11 16:44:45.000000000 -0600
-+++ linux-2.4.20-braam/Documentation/filesystems/ext2.txt      2003-05-17 14:06:00.000000000 -0600
+--- linux/Documentation/filesystems/ext2.txt~iopen-2.4.20      Wed Jul 11 15:44:45 2001
++++ linux-mmonroe/Documentation/filesystems/ext2.txt   Thu Jul 10 12:28:54 2003
 @@ -35,6 +35,22 @@ resgid=n                    The group ID which may use th
  
  sb=n                          Use alternate superblock at this location.
 @@ -35,6 +35,22 @@ resgid=n                    The group ID which may use th
  
  sb=n                          Use alternate superblock at this location.
@@ -33,8 +33,8 @@
  grpquota,noquota,quota,usrquota       Quota options are silently ignored by ext2.
  
  
  grpquota,noquota,quota,usrquota       Quota options are silently ignored by ext2.
  
  
---- linux-2.4.20/fs/ext3/Makefile~iopen        2003-05-17 14:05:57.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/Makefile        2003-05-17 14:06:00.000000000 -0600
+--- linux/fs/ext3/Makefile~iopen-2.4.20        Thu Jul 10 12:28:44 2003
++++ linux-mmonroe/fs/ext3/Makefile     Thu Jul 10 12:28:54 2003
 @@ -11,7 +11,7 @@ O_TARGET := ext3.o
  
  export-objs := ext3-exports.o
 @@ -11,7 +11,7 @@ O_TARGET := ext3.o
  
  export-objs := ext3-exports.o
@@ -44,8 +44,8 @@
                ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o
  obj-m    := $(O_TARGET)
  
                ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o
  obj-m    := $(O_TARGET)
  
---- linux-2.4.20/fs/ext3/inode.c~iopen 2003-05-17 14:06:00.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/inode.c 2003-05-17 14:06:00.000000000 -0600
+--- linux/fs/ext3/inode.c~iopen-2.4.20 Thu Jul 10 12:28:46 2003
++++ linux-mmonroe/fs/ext3/inode.c      Thu Jul 10 12:28:54 2003
 @@ -31,6 +31,7 @@
  #include <linux/highuid.h>
  #include <linux/quotaops.h>
 @@ -31,6 +31,7 @@
  #include <linux/highuid.h>
  #include <linux/quotaops.h>
@@ -54,7 +54,7 @@
  
  /*
   * SEARCH_FROM_ZERO forces each block allocation to search from the start
  
  /*
   * SEARCH_FROM_ZERO forces each block allocation to search from the start
-@@ -2137,6 +2138,9 @@ void ext3_read_inode(struct inode * inod
+@@ -2253,6 +2254,9 @@ void ext3_read_inode(struct inode * inod
        struct buffer_head *bh;
        int block;
        
        struct buffer_head *bh;
        int block;
        
@@ -64,8 +64,8 @@
        if(ext3_get_inode_loc(inode, &iloc))
                goto bad_inode;
        bh = iloc.bh;
        if(ext3_get_inode_loc(inode, &iloc))
                goto bad_inode;
        bh = iloc.bh;
---- /dev/null  2003-01-30 03:24:37.000000000 -0700
-+++ linux-2.4.20-braam/fs/ext3/iopen.c 2003-05-17 22:18:55.000000000 -0600
+--- /dev/null  Tue Jan 28 04:00:01 2003
++++ linux-mmonroe/fs/ext3/iopen.c      Thu Jul 10 12:28:54 2003
 @@ -0,0 +1,259 @@
 +/*
 + * linux/fs/ext3/iopen.c
 @@ -0,0 +1,259 @@
 +/*
 + * linux/fs/ext3/iopen.c
 +
 +      return 1;
 +}
 +
 +      return 1;
 +}
---- /dev/null  2003-01-30 03:24:37.000000000 -0700
-+++ linux-2.4.20-braam/fs/ext3/iopen.h 2003-05-17 14:06:00.000000000 -0600
+--- /dev/null  Tue Jan 28 04:00:01 2003
++++ linux-mmonroe/fs/ext3/iopen.h      Thu Jul 10 12:28:54 2003
 @@ -0,0 +1,13 @@
 +/*
 + * iopen.h
 @@ -0,0 +1,13 @@
 +/*
 + * iopen.h
 +
 +extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
 +extern int ext3_iopen_get_inode(struct inode *inode);
 +
 +extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
 +extern int ext3_iopen_get_inode(struct inode *inode);
---- linux-2.4.20/fs/ext3/namei.c~iopen 2003-05-17 14:05:59.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/namei.c 2003-05-17 22:23:08.000000000 -0600
+--- linux/fs/ext3/namei.c~iopen-2.4.20 Thu Jul 10 12:28:46 2003
++++ linux-mmonroe/fs/ext3/namei.c      Thu Jul 10 12:28:54 2003
 @@ -35,7 +35,7 @@
  #include <linux/string.h>
  #include <linux/locks.h>
 @@ -35,7 +35,7 @@
  #include <linux/string.h>
  #include <linux/locks.h>
        d_add(dentry, inode);
        return NULL;
  }
        d_add(dentry, inode);
        return NULL;
  }
---- linux-2.4.20/fs/ext3/super.c~iopen 2003-05-17 14:05:59.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/super.c 2003-05-17 14:06:00.000000000 -0600
-@@ -820,6 +820,17 @@ static int parse_options (char * options
+--- linux/fs/ext3/super.c~iopen-2.4.20 Thu Jul 10 12:28:45 2003
++++ linux-mmonroe/fs/ext3/super.c      Thu Jul 10 12:28:54 2003
+@@ -835,6 +835,17 @@ static int parse_options (char * options
                         || !strcmp (this_char, "quota")
                         || !strcmp (this_char, "usrquota"))
                        /* Don't do anything ;-) */ ;
                         || !strcmp (this_char, "quota")
                         || !strcmp (this_char, "usrquota"))
                        /* Don't do anything ;-) */ ;
                else if (!strcmp (this_char, "journal")) {
                        /* @@@ FIXME */
                        /* Eventually we will want to be able to create
                else if (!strcmp (this_char, "journal")) {
                        /* @@@ FIXME */
                        /* Eventually we will want to be able to create
---- linux-2.4.20/include/linux/ext3_fs.h~iopen 2003-05-17 14:05:59.000000000 -0600
-+++ linux-2.4.20-braam/include/linux/ext3_fs.h 2003-05-17 14:06:29.000000000 -0600
+--- linux/include/linux/ext3_fs.h~iopen-2.4.20 Thu Jul 10 12:28:46 2003
++++ linux-mmonroe/include/linux/ext3_fs.h      Thu Jul 10 12:30:12 2003
 @@ -322,6 +322,8 @@ struct ext3_inode {
  #define EXT3_MOUNT_UPDATE_JOURNAL     0x1000  /* Update the journal format */
  #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
  #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
 +#define EXT3_MOUNT_IOPEN              0x8000  /* Allow access via iopen */
 +#define EXT3_MOUNT_IOPEN_NOPRIV               0x10000 /* Make iopen world-readable */
 @@ -322,6 +322,8 @@ struct ext3_inode {
  #define EXT3_MOUNT_UPDATE_JOURNAL     0x1000  /* Update the journal format */
  #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
  #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
 +#define EXT3_MOUNT_IOPEN              0x8000  /* Allow access via iopen */
 +#define EXT3_MOUNT_IOPEN_NOPRIV               0x10000 /* Make iopen world-readable */
- #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
+ #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 
index 75ebcd0..15f1b2a 100644 (file)
@@ -1,7 +1,18 @@
- 0 files changed
+ fs/ext3/Makefile           |    4 
+ fs/ext3/ext3-exports.c     |   13 
+ fs/ext3/ialloc.c           |    2 
+ fs/ext3/inode.c            |   29 -
+ fs/ext3/namei.c            |   12 
+ fs/ext3/super.c            |   22 
+ fs/ext3/xattr.c            | 1242 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h    |   46 -
+ include/linux/ext3_jbd.h   |    8 
+ include/linux/ext3_xattr.h |  155 +++++
+ include/linux/xattr.h      |   15 
+ 11 files changed, 1496 insertions(+), 52 deletions(-)
 
 
---- linux-2.4.18-18/fs/ext3/ialloc.c~linux-2.4.18ea-0.8.26     2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/ialloc.c      2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/ialloc.c~linux-2.4.18ea-0.8.26  2003-07-20 17:12:43.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/ialloc.c  2003-07-21 22:49:05.000000000 -0600
 @@ -17,6 +17,7 @@
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
 @@ -17,6 +17,7 @@
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
@@ -18,8 +29,8 @@
        DQUOT_FREE_INODE(inode);
        DQUOT_DROP(inode);
  
        DQUOT_FREE_INODE(inode);
        DQUOT_DROP(inode);
  
---- linux-2.4.18-18/fs/ext3/inode.c~linux-2.4.18ea-0.8.26      2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/inode.c       2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/inode.c~linux-2.4.18ea-0.8.26   2003-07-20 17:12:43.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/inode.c   2003-07-21 22:49:05.000000000 -0600
 @@ -39,6 +39,18 @@
   */
  #undef SEARCH_FROM_ZERO
 @@ -39,6 +39,18 @@
   */
  #undef SEARCH_FROM_ZERO
@@ -59,7 +70,7 @@
                goto no_delete;
  
        lock_kernel();
                goto no_delete;
  
        lock_kernel();
-@@ -1861,6 +1871,8 @@ void ext3_truncate(struct inode * inode)
+@@ -1877,6 +1887,8 @@ void ext3_truncate(struct inode * inode)
        if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
            S_ISLNK(inode->i_mode)))
                return;
        if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
            S_ISLNK(inode->i_mode)))
                return;
@@ -68,7 +79,7 @@
        if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
                return;
  
        if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
                return;
  
-@@ -2008,8 +2020,6 @@ int ext3_get_inode_loc (struct inode *in
+@@ -2038,8 +2050,6 @@ int ext3_get_inode_loc (struct inode *in
        struct ext3_group_desc * gdp;
                
        if ((inode->i_ino != EXT3_ROOT_INO &&
        struct ext3_group_desc * gdp;
                
        if ((inode->i_ino != EXT3_ROOT_INO &&
@@ -77,7 +88,7 @@
                inode->i_ino != EXT3_JOURNAL_INO &&
                inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) ||
                inode->i_ino > le32_to_cpu(
                inode->i_ino != EXT3_JOURNAL_INO &&
                inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) ||
                inode->i_ino > le32_to_cpu(
-@@ -2136,10 +2146,7 @@ void ext3_read_inode(struct inode * inod
+@@ -2166,10 +2176,7 @@ void ext3_read_inode(struct inode * inod
  
        brelse (iloc.bh);
  
  
        brelse (iloc.bh);
  
                inode->i_op = &ext3_file_inode_operations;
                inode->i_fop = &ext3_file_operations;
                inode->i_mapping->a_ops = &ext3_aops;
                inode->i_op = &ext3_file_inode_operations;
                inode->i_fop = &ext3_file_operations;
                inode->i_mapping->a_ops = &ext3_aops;
-@@ -2147,7 +2154,7 @@ void ext3_read_inode(struct inode * inod
+@@ -2177,7 +2184,7 @@ void ext3_read_inode(struct inode * inod
                inode->i_op = &ext3_dir_inode_operations;
                inode->i_fop = &ext3_dir_operations;
        } else if (S_ISLNK(inode->i_mode)) {
                inode->i_op = &ext3_dir_inode_operations;
                inode->i_fop = &ext3_dir_operations;
        } else if (S_ISLNK(inode->i_mode)) {
                        inode->i_op = &ext3_fast_symlink_inode_operations;
                else {
                        inode->i_op = &page_symlink_inode_operations;
                        inode->i_op = &ext3_fast_symlink_inode_operations;
                else {
                        inode->i_op = &page_symlink_inode_operations;
---- linux-2.4.18-18/fs/ext3/namei.c~linux-2.4.18ea-0.8.26      2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/namei.c       2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/namei.c~linux-2.4.18ea-0.8.26   2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/namei.c   2003-07-21 22:49:05.000000000 -0600
 @@ -27,6 +27,7 @@
  #include <linux/sched.h>
  #include <linux/ext3_fs.h>
 @@ -27,6 +27,7 @@
  #include <linux/sched.h>
  #include <linux/ext3_fs.h>
                inode->i_op = &page_symlink_inode_operations;
                inode->i_mapping->a_ops = &ext3_aops;
                /*
                inode->i_op = &page_symlink_inode_operations;
                inode->i_mapping->a_ops = &ext3_aops;
                /*
---- linux-2.4.18-18/fs/ext3/super.c~linux-2.4.18ea-0.8.26      2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/super.c       2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/super.c~linux-2.4.18ea-0.8.26   2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c   2003-07-21 22:50:28.000000000 -0600
 @@ -24,6 +24,7 @@
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
 @@ -24,6 +24,7 @@
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
  #include <linux/slab.h>
  #include <linux/init.h>
  #include <linux/locks.h>
  #include <linux/slab.h>
  #include <linux/init.h>
  #include <linux/locks.h>
-@@ -404,6 +405,7 @@ void ext3_put_super (struct super_block 
+@@ -406,6 +407,7 @@ void ext3_put_super (struct super_block 
        kdev_t j_dev = sbi->s_journal->j_dev;
        int i;
  
        kdev_t j_dev = sbi->s_journal->j_dev;
        int i;
  
        journal_destroy(sbi->s_journal);
        if (!(sb->s_flags & MS_RDONLY)) {
                EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
        journal_destroy(sbi->s_journal);
        if (!(sb->s_flags & MS_RDONLY)) {
                EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-@@ -1748,14 +1750,25 @@ int ext3_statfs (struct super_block * sb
+@@ -1749,17 +1751,27 @@ int ext3_statfs (struct super_block * sb
  
  static DECLARE_FSTYPE_DEV(ext3_fs_type, "ext3", ext3_read_super);
  
  
  static DECLARE_FSTYPE_DEV(ext3_fs_type, "ext3", ext3_read_super);
  
 +      return error;
  }
  
 +      return error;
  }
  
- EXPORT_SYMBOL(ext3_bread);
---- /dev/null  2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/xattr.c       2003-04-20 16:14:31.000000000 +0800
-@@ -0,0 +1,1247 @@
+-EXPORT_SYMBOL(ext3_bread);
+ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+ MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/ext3-exports.c    2003-07-21 22:49:05.000000000 -0600
+@@ -0,0 +1,13 @@
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/ext3_xattr.h>
++
++EXPORT_SYMBOL(ext3_force_commit);
++EXPORT_SYMBOL(ext3_bread);
++EXPORT_SYMBOL(ext3_xattr_register);
++EXPORT_SYMBOL(ext3_xattr_unregister);
++EXPORT_SYMBOL(ext3_xattr_get);
++EXPORT_SYMBOL(ext3_xattr_list);
++EXPORT_SYMBOL(ext3_xattr_set);
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/xattr.c   2003-07-21 22:50:40.000000000 -0600
+@@ -0,0 +1,1242 @@
 +/*
 + * linux/fs/ext3/xattr.c
 + *
 +/*
 + * linux/fs/ext3/xattr.c
 + *
 +#include <linux/module.h>
 +
 +/* These symbols may be needed by a module. */
 +#include <linux/module.h>
 +
 +/* These symbols may be needed by a module. */
-+EXPORT_SYMBOL(ext3_xattr_register);
-+EXPORT_SYMBOL(ext3_xattr_unregister);
-+EXPORT_SYMBOL(ext3_xattr_get);
-+EXPORT_SYMBOL(ext3_xattr_list);
-+EXPORT_SYMBOL(ext3_xattr_set);
 +
 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
 +# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1)
 +
 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
 +# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1)
 +}
 +
 +#endif  /* CONFIG_EXT3_FS_XATTR_SHARING */
 +}
 +
 +#endif  /* CONFIG_EXT3_FS_XATTR_SHARING */
---- linux-2.4.18-18/include/linux/ext3_fs.h~linux-2.4.18ea-0.8.26      2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/ext3_fs.h       2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/include/linux/ext3_fs.h~linux-2.4.18ea-0.8.26   2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_fs.h   2003-07-21 22:49:05.000000000 -0600
 @@ -58,8 +58,6 @@
   */
  #define       EXT3_BAD_INO             1      /* Bad blocks inode */
 @@ -58,8 +58,6 @@
   */
  #define       EXT3_BAD_INO             1      /* Bad blocks inode */
  extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
  extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
  
  extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
  extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
  
---- linux-2.4.18-18/include/linux/ext3_jbd.h~linux-2.4.18ea-0.8.26     2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/ext3_jbd.h      2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/include/linux/ext3_jbd.h~linux-2.4.18ea-0.8.26  2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_jbd.h  2003-07-21 22:49:05.000000000 -0600
 @@ -30,13 +30,19 @@
  
  #define EXT3_SINGLEDATA_TRANS_BLOCKS  8
 @@ -30,13 +30,19 @@
  
  #define EXT3_SINGLEDATA_TRANS_BLOCKS  8
  
  extern int ext3_writepage_trans_blocks(struct inode *inode);
  
  
  extern int ext3_writepage_trans_blocks(struct inode *inode);
  
---- /dev/null  2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/ext3_xattr.h    2003-04-20 16:14:31.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_xattr.h        2003-07-21 22:49:05.000000000 -0600
 @@ -0,0 +1,155 @@
 +/*
 +  File: linux/ext3_xattr.h
 @@ -0,0 +1,155 @@
 +/*
 +  File: linux/ext3_xattr.h
 +
 +#endif  /* __KERNEL__ */
 +
 +
 +#endif  /* __KERNEL__ */
 +
---- /dev/null  2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/xattr.h 2003-04-20 16:14:31.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/include/linux/xattr.h     2003-07-21 22:49:05.000000000 -0600
 @@ -0,0 +1,15 @@
 +/*
 +  File: linux/xattr.h
 @@ -0,0 +1,15 @@
 +/*
 +  File: linux/xattr.h
 +#define XATTR_REPLACE 2       /* set value, fail if attr does not exist */
 +
 +#endif        /* _LINUX_XATTR_H */
 +#define XATTR_REPLACE 2       /* set value, fail if attr does not exist */
 +
 +#endif        /* _LINUX_XATTR_H */
---- linux-2.4.18-18/fs/ext3/Makefile~linux-2.4.18ea-0.8.26     2003-04-20 16:14:54.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/Makefile      2003-04-20 16:15:15.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/Makefile~linux-2.4.18ea-0.8.26  2003-07-21 22:27:37.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/Makefile  2003-07-21 22:51:23.000000000 -0600
 @@ -9,10 +9,10 @@
  
  O_TARGET := ext3.o
  
 -export-objs :=        super.o inode.o
 @@ -9,10 +9,10 @@
  
  O_TARGET := ext3.o
  
 -export-objs :=        super.o inode.o
-+export-objs :=        super.o inode.o xattr.o
++export-objs :=        ext3-exports.o
  
  obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
 -              ioctl.o namei.o super.o symlink.o
  
  obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
 -              ioctl.o namei.o super.o symlink.o
-+              ioctl.o namei.o super.o symlink.o xattr.o
++              ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o
  obj-m    := $(O_TARGET)
  
  include $(TOPDIR)/Rules.make
  obj-m    := $(O_TARGET)
  
  include $(TOPDIR)/Rules.make
index 5c6c6a9..6d8eac6 100644 (file)
@@ -31,6 +31,7 @@
  fs/ext2/xattr.c               | 1212 +++++++++++++++++++++++++++++++++++++++++
  fs/ext2/xattr_user.c          |  103 +++
  fs/ext3/Makefile              |   10 
  fs/ext2/xattr.c               | 1212 +++++++++++++++++++++++++++++++++++++++++
  fs/ext2/xattr_user.c          |  103 +++
  fs/ext3/Makefile              |   10 
+ fs/ext3/ext3-exports.c        |   13 
  fs/ext3/file.c                |    5 
  fs/ext3/ialloc.c              |    2 
  fs/ext3/inode.c               |   35 -
  fs/ext3/file.c                |    5 
  fs/ext3/ialloc.c              |    2 
  fs/ext3/inode.c               |   35 -
  include/linux/mbcache.h       |   69 ++
  kernel/ksyms.c                |    4 
  mm/vmscan.c                   |   36 +
  include/linux/mbcache.h       |   69 ++
  kernel/ksyms.c                |    4 
  mm/vmscan.c                   |   36 +
- fs/ext3/ext3-exports.c        |   14 +  
- 62 files changed, 4331 insertions(+), 197 deletions(-)
+ 62 files changed, 4344 insertions(+), 183 deletions(-)
 
 
---- linux-rh-2.4.20-8/Documentation/Configure.help~linux-2.4.20-xattr-0.8.54-chaos     2003-05-07 17:33:50.000000000 +0800
-+++ linux-rh-2.4.20-8-root/Documentation/Configure.help        2003-05-07 17:34:25.000000000 +0800
-@@ -15226,6 +15226,39 @@ CONFIG_EXT2_FS
+--- kernel-2.4.20-6chaos_18_7/Documentation/Configure.help~linux-2.4.20-xattr-0.8.54-chaos     2003-06-23 10:39:21.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/Documentation/Configure.help       2003-07-12 15:34:44.000000000 -0600
+@@ -15253,6 +15253,39 @@ CONFIG_EXT2_FS
    be compiled as a module, and so this could be dangerous.  Most
    everyone wants to say Y here.
  
    be compiled as a module, and so this could be dangerous.  Most
    everyone wants to say Y here.
  
  Ext3 journalling file system support (EXPERIMENTAL)
  CONFIG_EXT3_FS
    This is the journalling version of the Second extended file system
  Ext3 journalling file system support (EXPERIMENTAL)
  CONFIG_EXT3_FS
    This is the journalling version of the Second extended file system
-@@ -15258,6 +15291,39 @@ CONFIG_EXT3_FS
+@@ -15285,6 +15318,39 @@ CONFIG_EXT3_FS
    of your root partition (the one containing the directory /) cannot
    be compiled as a module, and so this may be dangerous.
  
    of your root partition (the one containing the directory /) cannot
    be compiled as a module, and so this may be dangerous.
  
  Journal Block Device support (JBD for ext3) (EXPERIMENTAL)
  CONFIG_JBD
    This is a generic journalling layer for block devices.  It is
  Journal Block Device support (JBD for ext3) (EXPERIMENTAL)
  CONFIG_JBD
    This is a generic journalling layer for block devices.  It is
---- linux-rh-2.4.20-8/arch/alpha/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2001-11-20 07:19:42.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/alpha/defconfig        2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/alpha/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2002-05-07 15:53:54.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/alpha/defconfig       2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  CONFIG_ALPHA=y
  # CONFIG_UID16 is not set
  # CONFIG_RWSEM_GENERIC_SPINLOCK is not set
  CONFIG_ALPHA=y
  # CONFIG_UID16 is not set
  # CONFIG_RWSEM_GENERIC_SPINLOCK is not set
---- linux-rh-2.4.20-8/arch/alpha/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos        2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/alpha/kernel/entry.S   2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/alpha/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos        2003-05-15 21:11:53.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/alpha/kernel/entry.S  2003-07-12 15:34:44.000000000 -0600
 @@ -1162,6 +1162,18 @@ sys_call_table:
        .quad sys_readahead
        .quad sys_ni_syscall                    /* 380, sys_security */
 @@ -1162,6 +1162,18 @@ sys_call_table:
        .quad sys_readahead
        .quad sys_ni_syscall                    /* 380, sys_security */
  
  /* Remember to update everything, kids.  */
  .ifne (. - sys_call_table) - (NR_SYSCALLS * 8)
  
  /* Remember to update everything, kids.  */
  .ifne (. - sys_call_table) - (NR_SYSCALLS * 8)
---- linux-rh-2.4.20-8/arch/arm/defconfig~linux-2.4.20-xattr-0.8.54-chaos       2001-05-20 08:43:05.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/arm/defconfig  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/arm/defconfig~linux-2.4.20-xattr-0.8.54-chaos       2002-05-07 15:53:56.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/arm/defconfig 2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  CONFIG_ARM=y
  # CONFIG_EISA is not set
  # CONFIG_SBUS is not set
  CONFIG_ARM=y
  # CONFIG_EISA is not set
  # CONFIG_SBUS is not set
---- linux-rh-2.4.20-8/arch/arm/kernel/calls.S~linux-2.4.20-xattr-0.8.54-chaos  2002-08-03 08:39:42.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/arm/kernel/calls.S     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/arm/kernel/calls.S~linux-2.4.20-xattr-0.8.54-chaos  2002-09-25 11:09:16.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/arm/kernel/calls.S    2003-07-12 15:34:44.000000000 -0600
 @@ -240,18 +240,18 @@ __syscall_start:
                .long   SYMBOL_NAME(sys_ni_syscall) /* Security */
                .long   SYMBOL_NAME(sys_gettid)
 @@ -240,18 +240,18 @@ __syscall_start:
                .long   SYMBOL_NAME(sys_ni_syscall) /* Security */
                .long   SYMBOL_NAME(sys_gettid)
                .long   SYMBOL_NAME(sys_tkill)
                /*
                 * Please check 2.5 _before_ adding calls here,
                .long   SYMBOL_NAME(sys_tkill)
                /*
                 * Please check 2.5 _before_ adding calls here,
---- linux-rh-2.4.20-8/arch/i386/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/i386/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/i386/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-05-15 21:12:00.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/i386/defconfig        2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  CONFIG_X86=y
  CONFIG_ISA=y
  # CONFIG_SBUS is not set
  CONFIG_X86=y
  CONFIG_ISA=y
  # CONFIG_SBUS is not set
---- linux-rh-2.4.20-8/arch/ia64/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-04-11 14:04:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/ia64/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/ia64/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-05-15 21:12:04.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/ia64/defconfig        2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  
  #
  # Code maturity level options
  
  #
  # Code maturity level options
---- linux-rh-2.4.20-8/arch/m68k/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2000-06-20 03:56:08.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/m68k/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/m68k/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2002-05-07 15:53:55.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/m68k/defconfig        2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  CONFIG_UID16=y
  
  #
  CONFIG_UID16=y
  
  #
---- linux-rh-2.4.20-8/arch/mips/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2002-11-29 07:53:10.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/mips/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/mips/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-02-14 15:58:06.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/mips/defconfig        2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  CONFIG_MIPS=y
  CONFIG_MIPS32=y
  # CONFIG_MIPS64 is not set
  CONFIG_MIPS=y
  CONFIG_MIPS32=y
  # CONFIG_MIPS64 is not set
---- linux-rh-2.4.20-8/arch/mips64/defconfig~linux-2.4.20-xattr-0.8.54-chaos    2002-11-29 07:53:10.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/mips64/defconfig       2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/mips64/defconfig~linux-2.4.20-xattr-0.8.54-chaos    2003-02-14 15:58:11.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/mips64/defconfig      2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  CONFIG_MIPS=y
  # CONFIG_MIPS32 is not set
  CONFIG_MIPS64=y
  CONFIG_MIPS=y
  # CONFIG_MIPS32 is not set
  CONFIG_MIPS64=y
---- linux-rh-2.4.20-8/arch/ppc/defconfig~linux-2.4.20-xattr-0.8.54-chaos       2003-04-11 14:04:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/ppc/defconfig  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/ppc/defconfig~linux-2.4.20-xattr-0.8.54-chaos       2003-05-15 21:12:20.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/ppc/defconfig 2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,20 @@
  #
  # Automatically generated make config: don't edit
 @@ -1,6 +1,20 @@
  #
  # Automatically generated make config: don't edit
  # CONFIG_UID16 is not set
  # CONFIG_RWSEM_GENERIC_SPINLOCK is not set
  CONFIG_RWSEM_XCHGADD_ALGORITHM=y
  # CONFIG_UID16 is not set
  # CONFIG_RWSEM_GENERIC_SPINLOCK is not set
  CONFIG_RWSEM_XCHGADD_ALGORITHM=y
---- linux-rh-2.4.20-8/arch/ppc64/kernel/misc.S~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/ppc64/kernel/misc.S    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/ppc64/kernel/misc.S~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:20.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/ppc64/kernel/misc.S   2003-07-12 15:34:44.000000000 -0600
 @@ -731,6 +731,7 @@ _GLOBAL(sys_call_table32)
        .llong .sys_gettid              /* 207 */
  #if 0 /* Reserved syscalls */
 @@ -731,6 +731,7 @@ _GLOBAL(sys_call_table32)
        .llong .sys_gettid              /* 207 */
  #if 0 /* Reserved syscalls */
        .llong .sys_futex
  #endif
        .llong .sys_perfmonctl   /* Put this here for now ... */
        .llong .sys_futex
  #endif
        .llong .sys_perfmonctl   /* Put this here for now ... */
---- linux-rh-2.4.20-8/arch/s390/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390/defconfig~linux-2.4.20-xattr-0.8.54-chaos      2003-02-14 15:58:20.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390/defconfig        2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  # CONFIG_ISA is not set
  # CONFIG_EISA is not set
  # CONFIG_MCA is not set
  # CONFIG_ISA is not set
  # CONFIG_EISA is not set
  # CONFIG_MCA is not set
---- linux-rh-2.4.20-8/arch/s390/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390/kernel/entry.S    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:20.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390/kernel/entry.S   2003-07-12 15:34:44.000000000 -0600
 @@ -558,18 +558,18 @@ sys_call_table:
          .long  sys_fcntl64 
        .long  sys_ni_syscall
 @@ -558,18 +558,18 @@ sys_call_table:
          .long  sys_fcntl64 
        .long  sys_ni_syscall
        .long  sys_gettid
        .long  sys_tkill
        .rept  255-237
        .long  sys_gettid
        .long  sys_tkill
        .rept  255-237
---- linux-rh-2.4.20-8/arch/s390x/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390x/defconfig        2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390x/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2003-02-14 15:58:21.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390x/defconfig       2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  # CONFIG_ISA is not set
  # CONFIG_EISA is not set
  # CONFIG_MCA is not set
  # CONFIG_ISA is not set
  # CONFIG_EISA is not set
  # CONFIG_MCA is not set
---- linux-rh-2.4.20-8/arch/s390x/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos        2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390x/kernel/entry.S   2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390x/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos        2003-02-14 15:58:21.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390x/kernel/entry.S  2003-07-12 15:34:44.000000000 -0600
 @@ -591,18 +591,18 @@ sys_call_table:
        .long  SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper)
        .long  SYSCALL(sys_ni_syscall,sys_ni_syscall)
 @@ -591,18 +591,18 @@ sys_call_table:
        .long  SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper)
        .long  SYSCALL(sys_ni_syscall,sys_ni_syscall)
        .long  SYSCALL(sys_gettid,sys_gettid)
        .long  SYSCALL(sys_tkill,sys_tkill)
        .rept  255-237
        .long  SYSCALL(sys_gettid,sys_gettid)
        .long  SYSCALL(sys_tkill,sys_tkill)
        .rept  255-237
---- linux-rh-2.4.20-8/arch/s390x/kernel/wrapper32.S~linux-2.4.20-xattr-0.8.54-chaos    2002-02-26 03:37:56.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390x/kernel/wrapper32.S       2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390x/kernel/wrapper32.S~linux-2.4.20-xattr-0.8.54-chaos    2002-05-07 15:53:59.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390x/kernel/wrapper32.S      2003-07-12 15:34:44.000000000 -0600
 @@ -1091,3 +1091,95 @@ sys32_fstat64_wrapper:
        llgtr   %r3,%r3                 # struct stat64 *
        llgfr   %r4,%r4                 # long
 @@ -1091,3 +1091,95 @@ sys32_fstat64_wrapper:
        llgtr   %r3,%r3                 # struct stat64 *
        llgfr   %r4,%r4                 # long
 +      jg      sys_fremovexattr
 +
 +
 +      jg      sys_fremovexattr
 +
 +
---- linux-rh-2.4.20-8/arch/sparc/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2002-08-03 08:39:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc/defconfig        2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc/defconfig~linux-2.4.20-xattr-0.8.54-chaos     2002-09-25 11:10:50.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc/defconfig       2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  CONFIG_UID16=y
  CONFIG_HIGHMEM=y
  
  CONFIG_UID16=y
  CONFIG_HIGHMEM=y
  
---- linux-rh-2.4.20-8/arch/sparc/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos      2002-08-03 08:39:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc/kernel/systbls.S 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos      2002-09-25 11:10:52.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc/kernel/systbls.S        2003-07-12 15:34:44.000000000 -0600
 @@ -51,11 +51,11 @@ sys_call_table:
  /*150*/       .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64
  /*155*/       .long sys_fcntl64, sys_nis_syscall, sys_statfs, sys_fstatfs, sys_oldumount
 @@ -51,11 +51,11 @@ sys_call_table:
  /*150*/       .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64
  /*155*/       .long sys_fcntl64, sys_nis_syscall, sys_statfs, sys_fstatfs, sys_oldumount
  /*190*/       .long sys_init_module, sys_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
  /*195*/       .long sys_nis_syscall, sys_nis_syscall, sys_getppid, sparc_sigaction, sys_sgetmask
  /*200*/       .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, old_readdir
  /*190*/       .long sys_init_module, sys_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
  /*195*/       .long sys_nis_syscall, sys_nis_syscall, sys_getppid, sparc_sigaction, sys_sgetmask
  /*200*/       .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, old_readdir
---- linux-rh-2.4.20-8/arch/sparc64/defconfig~linux-2.4.20-xattr-0.8.54-chaos   2003-04-11 14:04:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc64/defconfig      2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc64/defconfig~linux-2.4.20-xattr-0.8.54-chaos   2003-05-15 21:12:29.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc64/defconfig     2003-07-12 15:34:44.000000000 -0600
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
  
  #
  # Code maturity level options
  
  #
  # Code maturity level options
---- linux-rh-2.4.20-8/arch/sparc64/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos    2002-08-03 08:39:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc64/kernel/systbls.S       2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc64/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos    2002-09-25 11:10:55.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc64/kernel/systbls.S      2003-07-12 15:34:44.000000000 -0600
 @@ -52,11 +52,11 @@ sys_call_table32:
  /*150*/       .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64
        .word sys32_fcntl64, sys_nis_syscall, sys32_statfs, sys32_fstatfs, sys_oldumount
 @@ -52,11 +52,11 @@ sys_call_table32:
  /*150*/       .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64
        .word sys32_fcntl64, sys_nis_syscall, sys32_statfs, sys32_fstatfs, sys_oldumount
  /*190*/       .word sys_init_module, sparc64_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
        .word sys_nis_syscall, sys_nis_syscall, sys_getppid, sys_nis_syscall, sys_sgetmask
  /*200*/       .word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall
  /*190*/       .word sys_init_module, sparc64_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
        .word sys_nis_syscall, sys_nis_syscall, sys_getppid, sys_nis_syscall, sys_sgetmask
  /*200*/       .word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall
---- linux-rh-2.4.20-8/fs/Config.in~linux-2.4.20-xattr-0.8.54-chaos     2003-04-11 14:05:03.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/Config.in        2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/Config.in~linux-2.4.20-xattr-0.8.54-chaos     2003-05-15 21:14:24.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/Config.in       2003-07-12 15:34:44.000000000 -0600
 @@ -34,6 +34,11 @@ dep_mbool '  Debug Befs' CONFIG_BEFS_DEB
  dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL
  
 @@ -34,6 +34,11 @@ dep_mbool '  Debug Befs' CONFIG_BEFS_DEB
  dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL
  
  mainmenu_option next_comment
  comment 'Partition Types'
  source fs/partitions/Config.in
  mainmenu_option next_comment
  comment 'Partition Types'
  source fs/partitions/Config.in
---- linux-rh-2.4.20-8/fs/Makefile~linux-2.4.20-xattr-0.8.54-chaos      2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/Makefile 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/Makefile~linux-2.4.20-xattr-0.8.54-chaos      2003-07-12 15:33:34.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/Makefile        2003-07-12 15:34:44.000000000 -0600
 @@ -84,6 +84,9 @@ obj-y                                += binfmt_script.o
  
  obj-$(CONFIG_BINFMT_ELF)      += binfmt_elf.o
 @@ -84,6 +84,9 @@ obj-y                                += binfmt_script.o
  
  obj-$(CONFIG_BINFMT_ELF)      += binfmt_elf.o
  # persistent filesystems
  obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o))
  
  # persistent filesystems
  obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o))
  
---- linux-rh-2.4.20-8/fs/ext2/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2001-10-11 23:05:18.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/Makefile    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/Makefile   2003-07-12 15:34:44.000000000 -0600
 @@ -13,4 +13,8 @@ obj-y    := balloc.o bitmap.o dir.o file
                ioctl.o namei.o super.o symlink.o
  obj-m    := $(O_TARGET)
 @@ -13,4 +13,8 @@ obj-y    := balloc.o bitmap.o dir.o file
                ioctl.o namei.o super.o symlink.o
  obj-m    := $(O_TARGET)
 +obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o
 +
  include $(TOPDIR)/Rules.make
 +obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o
 +
  include $(TOPDIR)/Rules.make
---- linux-rh-2.4.20-8/fs/ext2/file.c~linux-2.4.20-xattr-0.8.54-chaos   2001-10-11 23:05:18.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/file.c      2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/file.c~linux-2.4.20-xattr-0.8.54-chaos   2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/file.c     2003-07-12 15:34:44.000000000 -0600
 @@ -20,6 +20,7 @@
  
  #include <linux/fs.h>
 @@ -20,6 +20,7 @@
  
  #include <linux/fs.h>
 +      listxattr:      ext2_listxattr,
 +      removexattr:    ext2_removexattr,
  };
 +      listxattr:      ext2_listxattr,
 +      removexattr:    ext2_removexattr,
  };
---- linux-rh-2.4.20-8/fs/ext2/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/ialloc.c    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:59:09.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/ialloc.c   2003-07-12 15:34:44.000000000 -0600
 @@ -15,6 +15,7 @@
  #include <linux/config.h>
  #include <linux/fs.h>
 @@ -15,6 +15,7 @@
  #include <linux/config.h>
  #include <linux/fs.h>
                DQUOT_FREE_INODE(inode);
                DQUOT_DROP(inode);
        }
                DQUOT_FREE_INODE(inode);
                DQUOT_DROP(inode);
        }
---- linux-rh-2.4.20-8/fs/ext2/inode.c~linux-2.4.20-xattr-0.8.54-chaos  2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/inode.c     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/inode.c~linux-2.4.20-xattr-0.8.54-chaos  2003-02-14 15:59:09.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/inode.c    2003-07-12 15:34:44.000000000 -0600
 @@ -39,6 +39,18 @@ MODULE_LICENSE("GPL");
  static int ext2_update_inode(struct inode * inode, int do_sync);
  
 @@ -39,6 +39,18 @@ MODULE_LICENSE("GPL");
  static int ext2_update_inode(struct inode * inode, int do_sync);
  
        brelse (bh);
        inode->i_attr_flags = 0;
        if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) {
        brelse (bh);
        inode->i_attr_flags = 0;
        if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) {
---- linux-rh-2.4.20-8/fs/ext2/namei.c~linux-2.4.20-xattr-0.8.54-chaos  2001-10-04 13:57:36.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/namei.c     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/namei.c~linux-2.4.20-xattr-0.8.54-chaos  2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/namei.c    2003-07-12 15:34:44.000000000 -0600
 @@ -31,6 +31,7 @@
  
  #include <linux/fs.h>
 @@ -31,6 +31,7 @@
  
  #include <linux/fs.h>
 +      listxattr:      ext2_listxattr,
 +      removexattr:    ext2_removexattr,
  };
 +      listxattr:      ext2_listxattr,
 +      removexattr:    ext2_removexattr,
  };
---- linux-rh-2.4.20-8/fs/ext2/super.c~linux-2.4.20-xattr-0.8.54-chaos  2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/super.c     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/super.c~linux-2.4.20-xattr-0.8.54-chaos  2003-02-14 15:59:09.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/super.c    2003-07-12 15:34:44.000000000 -0600
 @@ -21,6 +21,7 @@
  #include <linux/string.h>
  #include <linux/fs.h>
 @@ -21,6 +21,7 @@
  #include <linux/string.h>
  #include <linux/fs.h>
  }
  
  EXPORT_NO_SYMBOLS;
  }
  
  EXPORT_NO_SYMBOLS;
---- linux-rh-2.4.20-8/fs/ext2/symlink.c~linux-2.4.20-xattr-0.8.54-chaos        2000-09-28 04:41:33.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/symlink.c   2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/symlink.c~linux-2.4.20-xattr-0.8.54-chaos        2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/symlink.c  2003-07-12 15:34:44.000000000 -0600
 @@ -19,6 +19,7 @@
  
  #include <linux/fs.h>
 @@ -19,6 +19,7 @@
  
  #include <linux/fs.h>
 +      listxattr:      ext2_listxattr,
 +      removexattr:    ext2_removexattr,
  };
 +      listxattr:      ext2_listxattr,
 +      removexattr:    ext2_removexattr,
  };
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/xattr.c     2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/xattr.c    2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,1212 @@
 +/*
 + * linux/fs/ext2/xattr.c
 @@ -0,0 +1,1212 @@
 +/*
 + * linux/fs/ext2/xattr.c
 +}
 +
 +#endif  /* CONFIG_EXT2_FS_XATTR_SHARING */
 +}
 +
 +#endif  /* CONFIG_EXT2_FS_XATTR_SHARING */
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/xattr_user.c        2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/xattr_user.c       2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,103 @@
 +/*
 + * linux/fs/ext2/xattr_user.c
 @@ -0,0 +1,103 @@
 +/*
 + * linux/fs/ext2/xattr_user.c
 +      ext2_xattr_unregister(EXT2_XATTR_INDEX_USER,
 +                            &ext2_xattr_user_handler);
 +}
 +      ext2_xattr_unregister(EXT2_XATTR_INDEX_USER,
 +                            &ext2_xattr_user_handler);
 +}
---- linux-rh-2.4.20-8/fs/ext3/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/Makefile    2003-05-07 17:45:13.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/Makefile   2003-07-12 15:34:44.000000000 -0600
 @@ -1,5 +1,5 @@
  #
 -# Makefile for the linux ext2-filesystem routines.
 @@ -1,5 +1,5 @@
  #
 -# Makefile for the linux ext2-filesystem routines.
 +obj-$(CONFIG_EXT3_FS_XATTR_USER) += xattr_user.o
 +
  include $(TOPDIR)/Rules.make
 +obj-$(CONFIG_EXT3_FS_XATTR_USER) += xattr_user.o
 +
  include $(TOPDIR)/Rules.make
---- linux-rh-2.4.20-8/fs/ext3/file.c~linux-2.4.20-xattr-0.8.54-chaos   2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/file.c      2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/file.c~linux-2.4.20-xattr-0.8.54-chaos   2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/file.c     2003-07-12 15:34:44.000000000 -0600
 @@ -23,6 +23,7 @@
  #include <linux/locks.h>
  #include <linux/jbd.h>
 @@ -23,6 +23,7 @@
  #include <linux/locks.h>
  #include <linux/jbd.h>
 +      removexattr:    ext3_removexattr,       /* BKL held */
  };
  
 +      removexattr:    ext3_removexattr,       /* BKL held */
  };
  
---- linux-rh-2.4.20-8/fs/ext3/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:48.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/ialloc.c    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:14:30.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/ialloc.c   2003-07-12 15:34:44.000000000 -0600
 @@ -17,6 +17,7 @@
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
 @@ -17,6 +17,7 @@
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
        DQUOT_FREE_INODE(inode);
        DQUOT_DROP(inode);
  
        DQUOT_FREE_INODE(inode);
        DQUOT_DROP(inode);
  
---- linux-rh-2.4.20-8/fs/ext3/inode.c~linux-2.4.20-xattr-0.8.54-chaos  2003-04-11 14:04:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/inode.c     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/inode.c~linux-2.4.20-xattr-0.8.54-chaos  2003-05-15 21:14:30.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/inode.c    2003-07-12 15:34:44.000000000 -0600
 @@ -39,6 +39,18 @@
   */
  #undef SEARCH_FROM_ZERO
 @@ -39,6 +39,18 @@
   */
  #undef SEARCH_FROM_ZERO
        /* inode->i_attr_flags = 0;                             unused */
        if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) {
                /* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */
        /* inode->i_attr_flags = 0;                             unused */
        if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) {
                /* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */
---- linux-rh-2.4.20-8/fs/ext3/namei.c~linux-2.4.20-xattr-0.8.54-chaos  2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/namei.c     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/namei.c~linux-2.4.20-xattr-0.8.54-chaos  2003-07-12 15:33:43.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/namei.c    2003-07-12 15:34:44.000000000 -0600
 @@ -29,6 +29,7 @@
  #include <linux/sched.h>
  #include <linux/ext3_fs.h>
 @@ -29,6 +29,7 @@
  #include <linux/sched.h>
  #include <linux/ext3_fs.h>
 +      removexattr:    ext3_removexattr,       /* BKL held */
 +};
 +
 +      removexattr:    ext3_removexattr,       /* BKL held */
 +};
 +
---- linux-rh-2.4.20-8/fs/ext3/super.c~linux-2.4.20-xattr-0.8.54-chaos  2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/super.c     2003-05-07 17:40:45.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/super.c~linux-2.4.20-xattr-0.8.54-chaos  2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/super.c    2003-07-12 15:34:44.000000000 -0600
 @@ -24,6 +24,7 @@
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
 @@ -24,6 +24,7 @@
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
  MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
  MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
  MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
  MODULE_LICENSE("GPL");
---- linux-rh-2.4.20-8/fs/ext3/symlink.c~linux-2.4.20-xattr-0.8.54-chaos        2001-11-10 06:25:04.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/symlink.c   2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/symlink.c~linux-2.4.20-xattr-0.8.54-chaos        2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/symlink.c  2003-07-12 15:34:44.000000000 -0600
 @@ -20,6 +20,7 @@
  #include <linux/fs.h>
  #include <linux/jbd.h>
 @@ -20,6 +20,7 @@
  #include <linux/fs.h>
  #include <linux/jbd.h>
 +      listxattr:      ext3_listxattr,         /* BKL held */
 +      removexattr:    ext3_removexattr,       /* BKL held */
  };
 +      listxattr:      ext3_listxattr,         /* BKL held */
 +      removexattr:    ext3_removexattr,       /* BKL held */
  };
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/xattr.c     2003-05-07 17:42:06.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/xattr.c    2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,1225 @@
 +/*
 + * linux/fs/ext3/xattr.c
 @@ -0,0 +1,1225 @@
 +/*
 + * linux/fs/ext3/xattr.c
 +}
 +
 +#endif  /* CONFIG_EXT3_FS_XATTR_SHARING */
 +}
 +
 +#endif  /* CONFIG_EXT3_FS_XATTR_SHARING */
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/xattr_user.c        2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/xattr_user.c       2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,111 @@
 +/*
 + * linux/fs/ext3/xattr_user.c
 @@ -0,0 +1,111 @@
 +/*
 + * linux/fs/ext3/xattr_user.c
 +      ext3_xattr_unregister(EXT3_XATTR_INDEX_USER,
 +                            &ext3_xattr_user_handler);
 +}
 +      ext3_xattr_unregister(EXT3_XATTR_INDEX_USER,
 +                            &ext3_xattr_user_handler);
 +}
---- linux-rh-2.4.20-8/fs/jfs/jfs_xattr.h~linux-2.4.20-xattr-0.8.54-chaos       2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/jfs/jfs_xattr.h  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/jfs/jfs_xattr.h~linux-2.4.20-xattr-0.8.54-chaos       2003-02-14 15:59:11.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/jfs/jfs_xattr.h 2003-07-12 15:34:44.000000000 -0600
 @@ -52,8 +52,10 @@ struct jfs_ea_list {
  #define       END_EALIST(ealist) \
        ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist)))
 @@ -52,8 +52,10 @@ struct jfs_ea_list {
  #define       END_EALIST(ealist) \
        ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist)))
  extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t);
  extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t);
  extern ssize_t jfs_listxattr(struct dentry *, char *, size_t);
  extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t);
  extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t);
  extern ssize_t jfs_listxattr(struct dentry *, char *, size_t);
---- linux-rh-2.4.20-8/fs/jfs/xattr.c~linux-2.4.20-xattr-0.8.54-chaos   2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/jfs/xattr.c      2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/jfs/xattr.c~linux-2.4.20-xattr-0.8.54-chaos   2003-02-14 15:59:11.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/jfs/xattr.c     2003-07-12 15:34:44.000000000 -0600
 @@ -641,7 +641,7 @@ static int ea_put(struct inode *inode, s
  }
  
 @@ -641,7 +641,7 @@ static int ea_put(struct inode *inode, s
  }
  
                 size_t value_len, int flags)
  {
        if (value == NULL) {    /* empty EA, do not remove */
                 size_t value_len, int flags)
  {
        if (value == NULL) {    /* empty EA, do not remove */
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/mbcache.c        2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/mbcache.c       2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,648 @@
 +/*
 + * linux/fs/mbcache.c
 @@ -0,0 +1,648 @@
 +/*
 + * linux/fs/mbcache.c
 +module_init(init_mbcache)
 +module_exit(exit_mbcache)
 +
 +module_init(init_mbcache)
 +module_exit(exit_mbcache)
 +
---- linux-rh-2.4.20-8/include/asm-arm/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-arm/unistd.h    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-arm/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:14:42.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-arm/unistd.h   2003-07-12 15:34:44.000000000 -0600
 @@ -244,7 +244,6 @@
  #define __NR_security                 (__NR_SYSCALL_BASE+223)
  #define __NR_gettid                   (__NR_SYSCALL_BASE+224)
 @@ -244,7 +244,6 @@
  #define __NR_security                 (__NR_SYSCALL_BASE+223)
  #define __NR_gettid                   (__NR_SYSCALL_BASE+224)
  #define __NR_tkill                    (__NR_SYSCALL_BASE+238)
  /*
   * Please check 2.5 _before_ adding calls here,
  #define __NR_tkill                    (__NR_SYSCALL_BASE+238)
  /*
   * Please check 2.5 _before_ adding calls here,
---- linux-rh-2.4.20-8/include/asm-ppc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-ppc64/unistd.h  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-ppc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-09-25 11:13:42.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-ppc64/unistd.h 2003-07-12 15:34:44.000000000 -0600
 @@ -218,6 +218,7 @@
  #define __NR_gettid           207
  #if 0 /* Reserved syscalls */
 @@ -218,6 +218,7 @@
  #define __NR_gettid           207
  #if 0 /* Reserved syscalls */
  #define __NR_futex            221
  #endif
  
  #define __NR_futex            221
  #endif
  
---- linux-rh-2.4.20-8/include/asm-s390/unistd.h~linux-2.4.20-xattr-0.8.54-chaos        2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-s390/unistd.h   2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-s390/unistd.h~linux-2.4.20-xattr-0.8.54-chaos        2002-09-25 11:13:44.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-s390/unistd.h  2003-07-12 15:34:44.000000000 -0600
 @@ -212,9 +212,18 @@
  #define __NR_madvise            219
  #define __NR_getdents64               220
 @@ -212,9 +212,18 @@
  #define __NR_madvise            219
  #define __NR_getdents64               220
  #define __NR_gettid           236
  #define __NR_tkill            237
  
  #define __NR_gettid           236
  #define __NR_tkill            237
  
---- linux-rh-2.4.20-8/include/asm-s390x/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-s390x/unistd.h  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-s390x/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-09-25 11:13:45.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-s390x/unistd.h 2003-07-12 15:34:44.000000000 -0600
 @@ -180,9 +180,18 @@
  #define __NR_pivot_root         217
  #define __NR_mincore            218
 @@ -180,9 +180,18 @@
  #define __NR_pivot_root         217
  #define __NR_mincore            218
  #define __NR_gettid           236
  #define __NR_tkill            237
  
  #define __NR_gettid           236
  #define __NR_tkill            237
  
---- linux-rh-2.4.20-8/include/asm-sparc/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-sparc/unistd.h  2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-sparc/unistd.h~linux-2.4.20-xattr-0.8.54-chaos       2002-09-25 11:13:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-sparc/unistd.h 2003-07-12 15:34:44.000000000 -0600
 @@ -184,24 +184,24 @@
  /* #define __NR_exportfs        166    SunOS Specific                              */
  #define __NR_mount              167 /* Common                                      */
 @@ -184,24 +184,24 @@
  /* #define __NR_exportfs        166    SunOS Specific                              */
  #define __NR_mount              167 /* Common                                      */
  #define __NR_tkill              187 /* SunOS: fpathconf                            */
  /* #define __NR_sysconf         188    SunOS Specific                              */
  #define __NR_uname              189 /* Linux Specific                              */
  #define __NR_tkill              187 /* SunOS: fpathconf                            */
  /* #define __NR_sysconf         188    SunOS Specific                              */
  #define __NR_uname              189 /* Linux Specific                              */
---- linux-rh-2.4.20-8/include/asm-sparc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos     2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-sparc64/unistd.h        2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-sparc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos     2002-09-25 11:13:48.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-sparc64/unistd.h       2003-07-12 15:34:44.000000000 -0600
 @@ -184,24 +184,24 @@
  /* #define __NR_exportfs        166    SunOS Specific                              */
  #define __NR_mount              167 /* Common                                      */
 @@ -184,24 +184,24 @@
  /* #define __NR_exportfs        166    SunOS Specific                              */
  #define __NR_mount              167 /* Common                                      */
  #define __NR_tkill              187 /* SunOS: fpathconf                            */
  /* #define __NR_sysconf         188    SunOS Specific                              */
  #define __NR_uname              189 /* Linux Specific                              */
  #define __NR_tkill              187 /* SunOS: fpathconf                            */
  /* #define __NR_sysconf         188    SunOS Specific                              */
  #define __NR_uname              189 /* Linux Specific                              */
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/cache_def.h   2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/cache_def.h  2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,15 @@
 +/*
 + * linux/cache_def.h
 @@ -0,0 +1,15 @@
 +/*
 + * linux/cache_def.h
 +
 +extern void register_cache(struct cache_definition *);
 +extern void unregister_cache(struct cache_definition *);
 +
 +extern void register_cache(struct cache_definition *);
 +extern void unregister_cache(struct cache_definition *);
---- linux-rh-2.4.20-8/include/linux/errno.h~linux-2.4.20-xattr-0.8.54-chaos    2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/errno.h       2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/errno.h~linux-2.4.20-xattr-0.8.54-chaos    2003-05-15 21:15:06.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/errno.h      2003-07-12 15:34:44.000000000 -0600
 @@ -26,4 +26,8 @@
  
  #endif
 @@ -26,4 +26,8 @@
  
  #endif
 +#define ENOTSUP EOPNOTSUPP    /* Operation not supported */
 +
  #endif
 +#define ENOTSUP EOPNOTSUPP    /* Operation not supported */
 +
  #endif
---- linux-rh-2.4.20-8/include/linux/ext2_fs.h~linux-2.4.20-xattr-0.8.54-chaos  2003-04-12 15:46:42.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext2_fs.h     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/ext2_fs.h~linux-2.4.20-xattr-0.8.54-chaos  2003-06-24 11:31:16.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext2_fs.h    2003-07-12 15:34:44.000000000 -0600
 @@ -57,8 +57,6 @@
   */
  #define       EXT2_BAD_INO             1      /* Bad blocks inode */
 @@ -57,8 +57,6 @@
   */
  #define       EXT2_BAD_INO             1      /* Bad blocks inode */
  #define EXT2_FEATURE_INCOMPAT_SUPP    EXT2_FEATURE_INCOMPAT_FILETYPE
  #define EXT2_FEATURE_RO_COMPAT_SUPP   (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
                                         EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
  #define EXT2_FEATURE_INCOMPAT_SUPP    EXT2_FEATURE_INCOMPAT_FILETYPE
  #define EXT2_FEATURE_RO_COMPAT_SUPP   (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
                                         EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
-@@ -623,8 +600,10 @@ extern struct address_space_operations e
+@@ -624,8 +601,10 @@ extern struct address_space_operations e
  
  /* namei.c */
  extern struct inode_operations ext2_dir_inode_operations;
  
  /* namei.c */
  extern struct inode_operations ext2_dir_inode_operations;
  extern struct inode_operations ext2_fast_symlink_inode_operations;
  
  #endif        /* __KERNEL__ */
  extern struct inode_operations ext2_fast_symlink_inode_operations;
  
  #endif        /* __KERNEL__ */
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext2_xattr.h  2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext2_xattr.h 2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,157 @@
 +/*
 +  File: linux/ext2_xattr.h
 @@ -0,0 +1,157 @@
 +/*
 +  File: linux/ext2_xattr.h
 +
 +#endif  /* __KERNEL__ */
 +
 +
 +#endif  /* __KERNEL__ */
 +
---- linux-rh-2.4.20-8/include/linux/ext3_fs.h~linux-2.4.20-xattr-0.8.54-chaos  2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext3_fs.h     2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/ext3_fs.h~linux-2.4.20-xattr-0.8.54-chaos  2003-07-12 15:33:41.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext3_fs.h    2003-07-12 15:34:44.000000000 -0600
 @@ -63,8 +63,6 @@
   */
  #define       EXT3_BAD_INO             1      /* Bad blocks inode */
 @@ -63,8 +63,6 @@
   */
  #define       EXT3_BAD_INO             1      /* Bad blocks inode */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
-@@ -520,7 +496,7 @@ struct ext3_super_block {
+@@ -521,7 +497,7 @@ struct ext3_super_block {
  #define EXT3_FEATURE_INCOMPAT_RECOVER         0x0004 /* Needs recovery */
  #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV     0x0008 /* Journal device */
  
  #define EXT3_FEATURE_INCOMPAT_RECOVER         0x0004 /* Needs recovery */
  #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV     0x0008 /* Journal device */
  
  #define EXT3_FEATURE_INCOMPAT_SUPP    (EXT3_FEATURE_INCOMPAT_FILETYPE| \
                                         EXT3_FEATURE_INCOMPAT_RECOVER)
  #define EXT3_FEATURE_RO_COMPAT_SUPP   (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
  #define EXT3_FEATURE_INCOMPAT_SUPP    (EXT3_FEATURE_INCOMPAT_FILETYPE| \
                                         EXT3_FEATURE_INCOMPAT_RECOVER)
  #define EXT3_FEATURE_RO_COMPAT_SUPP   (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
-@@ -703,6 +679,7 @@ extern void ext3_check_inodes_bitmap (st
+@@ -704,6 +680,7 @@ extern void ext3_check_inodes_bitmap (st
  extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
  
  /* inode.c */
  extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
  
  /* inode.c */
  extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
  extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
  
  extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
  extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
  
-@@ -771,8 +748,10 @@ extern struct address_space_operations e
+@@ -773,8 +750,10 @@ extern struct address_space_operations e
  
  /* namei.c */
  extern struct inode_operations ext3_dir_inode_operations;
  
  /* namei.c */
  extern struct inode_operations ext3_dir_inode_operations;
  extern struct inode_operations ext3_fast_symlink_inode_operations;
  
  
  extern struct inode_operations ext3_fast_symlink_inode_operations;
  
  
---- linux-rh-2.4.20-8/include/linux/ext3_jbd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext3_jbd.h    2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/ext3_jbd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext3_jbd.h   2003-07-12 15:34:44.000000000 -0600
 @@ -30,13 +30,19 @@
  
  #define EXT3_SINGLEDATA_TRANS_BLOCKS  8U
 @@ -30,13 +30,19 @@
  
  #define EXT3_SINGLEDATA_TRANS_BLOCKS  8U
  
  extern int ext3_writepage_trans_blocks(struct inode *inode);
  
  
  extern int ext3_writepage_trans_blocks(struct inode *inode);
  
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext3_xattr.h  2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext3_xattr.h 2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,157 @@
 +/*
 +  File: linux/ext3_xattr.h
 @@ -0,0 +1,157 @@
 +/*
 +  File: linux/ext3_xattr.h
 +
 +#endif  /* __KERNEL__ */
 +
 +
 +#endif  /* __KERNEL__ */
 +
---- linux-rh-2.4.20-8/include/linux/fs.h~linux-2.4.20-xattr-0.8.54-chaos       2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/fs.h  2003-05-07 17:34:25.000000000 +0800
-@@ -915,7 +915,7 @@ struct inode_operations {
+--- kernel-2.4.20-6chaos_18_7/include/linux/fs.h~linux-2.4.20-xattr-0.8.54-chaos       2003-07-12 15:31:35.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/fs.h 2003-07-12 15:34:44.000000000 -0600
+@@ -914,7 +914,7 @@ struct inode_operations {
        int (*setattr) (struct dentry *, struct iattr *);
        int (*setattr) (struct dentry *, struct iattr *);
-       int (*setattr_raw) (struct inode *, struct iattr *);
+       int (*setattr_raw) (struct inode *, struct iattr *);
        int (*getattr) (struct dentry *, struct iattr *);
 -      int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
 +      int (*setxattr) (struct dentry *, const char *, const void *, size_t, int);
        ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
        ssize_t (*listxattr) (struct dentry *, char *, size_t);
        int (*removexattr) (struct dentry *, const char *);
        int (*getattr) (struct dentry *, struct iattr *);
 -      int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
 +      int (*setxattr) (struct dentry *, const char *, const void *, size_t, int);
        ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
        ssize_t (*listxattr) (struct dentry *, char *, size_t);
        int (*removexattr) (struct dentry *, const char *);
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/mbcache.h     2003-05-07 17:34:25.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/mbcache.h    2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,69 @@
 +/*
 +  File: linux/mbcache.h
 @@ -0,0 +1,69 @@
 +/*
 +  File: linux/mbcache.h
 +struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int,
 +                                              kdev_t, unsigned int);
 +#endif
 +struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int,
 +                                              kdev_t, unsigned int);
 +#endif
---- linux-rh-2.4.20-8/kernel/ksyms.c~linux-2.4.20-xattr-0.8.54-chaos   2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/kernel/ksyms.c      2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/kernel/ksyms.c~linux-2.4.20-xattr-0.8.54-chaos   2003-07-12 15:14:02.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/kernel/ksyms.c     2003-07-12 15:35:19.000000000 -0600
 @@ -12,6 +12,7 @@
  #define __KERNEL_SYSCALLS__
  #include <linux/config.h>
 @@ -12,6 +12,7 @@
  #define __KERNEL_SYSCALLS__
  #include <linux/config.h>
  #include <linux/smp.h>
  #include <linux/module.h>
  #include <linux/blkdev.h>
  #include <linux/smp.h>
  #include <linux/module.h>
  #include <linux/blkdev.h>
-@@ -107,6 +108,7 @@ EXPORT_SYMBOL(exit_mm);
+@@ -106,6 +107,7 @@ EXPORT_SYMBOL(do_brk);
+ EXPORT_SYMBOL(exit_mm);
  EXPORT_SYMBOL(exit_files);
  EXPORT_SYMBOL(exit_fs);
  EXPORT_SYMBOL(exit_files);
  EXPORT_SYMBOL(exit_fs);
- EXPORT_SYMBOL(exit_sighand);
 +EXPORT_SYMBOL(copy_fs_struct);
 +EXPORT_SYMBOL(copy_fs_struct);
+ EXPORT_SYMBOL(exit_sighand);
+ EXPORT_SYMBOL_GPL(make_pages_present);
  
  
- /* internal kernel memory management */
- EXPORT_SYMBOL(_alloc_pages);
-@@ -125,6 +127,8 @@ EXPORT_SYMBOL(kmem_cache_alloc);
+@@ -126,6 +128,8 @@ EXPORT_SYMBOL(kmem_cache_alloc);
  EXPORT_SYMBOL(kmem_cache_free);
  EXPORT_SYMBOL(kmem_cache_validate);
  EXPORT_SYMBOL(kmem_cache_size);
  EXPORT_SYMBOL(kmem_cache_free);
  EXPORT_SYMBOL(kmem_cache_validate);
  EXPORT_SYMBOL(kmem_cache_size);
  EXPORT_SYMBOL(kmalloc);
  EXPORT_SYMBOL(kfree);
  EXPORT_SYMBOL(vfree);
  EXPORT_SYMBOL(kmalloc);
  EXPORT_SYMBOL(kfree);
  EXPORT_SYMBOL(vfree);
---- linux-rh-2.4.20-8/mm/vmscan.c~linux-2.4.20-xattr-0.8.54-chaos      2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/mm/vmscan.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/mm/vmscan.c~linux-2.4.20-xattr-0.8.54-chaos      2003-07-12 15:33:34.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/mm/vmscan.c        2003-07-12 15:34:44.000000000 -0600
 @@ -21,6 +21,7 @@
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
 @@ -21,6 +21,7 @@
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
  #ifdef CONFIG_QUOTA
        ret += shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
  #endif
  #ifdef CONFIG_QUOTA
        ret += shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
  #endif
---- /dev/null  2003-01-30 18:24:37.000000000 +0800
-+++ linux-root/fs/ext3/ext3-exports.c  2003-05-05 18:19:11.000000000 +0800
+--- /dev/null  2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/ext3-exports.c     2003-07-12 15:34:44.000000000 -0600
 @@ -0,0 +1,13 @@
 +#include <linux/config.h>
 +#include <linux/module.h>
 @@ -0,0 +1,13 @@
 +#include <linux/config.h>
 +#include <linux/module.h>
index 78855ac..c987485 100644 (file)
@@ -7,6 +7,6 @@
 --- /dev/null  Fri Aug 30 17:31:37 2002
 +++ linux-2.4.18-18.8.0-l12-braam/include/linux/lustre_version.h       Thu Feb 13 07:58:33 2003
 @@ -0,0 +1 @@
 --- /dev/null  Fri Aug 30 17:31:37 2002
 +++ linux-2.4.18-18.8.0-l12-braam/include/linux/lustre_version.h       Thu Feb 13 07:58:33 2003
 @@ -0,0 +1 @@
-+#define LUSTRE_KERNEL_VERSION 19
++#define LUSTRE_KERNEL_VERSION 21
 
 _
 
 _
index 710cdc9..7aa5941 100644 (file)
@@ -1,7 +1,7 @@
  0 files changed
 
  0 files changed
 
---- linux-2.4.20-rh/fs/dcache.c~vfs_intent-2.4.20-rh   2003-04-11 14:04:58.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/dcache.c   2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/dcache.c~vfs_intent-2.4.20-rh      2003-07-17 08:32:59.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/dcache.c   2003-07-17 08:35:22.000000000 -0700
 @@ -186,6 +186,13 @@ int d_invalidate(struct dentry * dentry)
                spin_unlock(&dcache_lock);
                return 0;
 @@ -186,6 +186,13 @@ int d_invalidate(struct dentry * dentry)
                spin_unlock(&dcache_lock);
                return 0;
        /*
         * Check whether to do a partial shrink_dcache
         * to get rid of unused child entries.
        /*
         * Check whether to do a partial shrink_dcache
         * to get rid of unused child entries.
-@@ -624,6 +631,7 @@ struct dentry * d_alloc(struct dentry * 
-       dentry->d_fsdata = NULL;
-       dentry->d_extra_attributes = NULL;
-       dentry->d_mounted = 0;
-+      dentry->d_it = NULL;
-       dentry->d_cookie = NULL;
-       INIT_LIST_HEAD(&dentry->d_hash);
-       INIT_LIST_HEAD(&dentry->d_lru);
-@@ -839,13 +847,19 @@ void d_delete(struct dentry * dentry)
+@@ -839,13 +846,19 @@ void d_delete(struct dentry * dentry)
   * Adds a dentry to the hash according to its name.
   */
   
   * Adds a dentry to the hash according to its name.
   */
   
  }
  
  #define do_switch(x,y) do { \
  }
  
  #define do_switch(x,y) do { \
---- linux-2.4.20-rh/fs/namei.c~vfs_intent-2.4.20-rh    2003-04-11 14:04:57.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/namei.c    2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/namei.c~vfs_intent-2.4.20-rh       2003-07-17 08:32:47.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/namei.c    2003-07-17 08:35:22.000000000 -0700
 @@ -94,6 +94,13 @@
   * XEmacs seems to be relying on it...
   */
  
 @@ -94,6 +94,13 @@
   * XEmacs seems to be relying on it...
   */
  
-+void intent_release(struct dentry *de, struct lookup_intent *it)
++void intent_release(struct lookup_intent *it)
 +{
 +{
-+      if (it && de->d_op && de->d_op->d_intent_release)
-+              de->d_op->d_intent_release(de, it);
++      if (it && it->it_op_release)
++              it->it_op_release(it);
 +
 +}
 +
 +
 +}
 +
@@ -73,8 +65,8 @@
  {
        struct dentry * dentry = d_lookup(parent, name);
  
  {
        struct dentry * dentry = d_lookup(parent, name);
  
-+      if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
-+              if (!dentry->d_op->d_revalidate2(dentry, flags, it) &&
++      if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
++              if (!dentry->d_op->d_revalidate_it(dentry, flags, it) &&
 +                  !d_invalidate(dentry)) {
 +                      dput(dentry);
 +                      dentry = NULL;
 +                  !d_invalidate(dentry)) {
 +                      dput(dentry);
 +                      dentry = NULL;
                result = ERR_PTR(-ENOMEM);
                if (dentry) {
                        lock_kernel();
                result = ERR_PTR(-ENOMEM);
                if (dentry) {
                        lock_kernel();
-+                      if (dir->i_op->lookup2)
-+                              result = dir->i_op->lookup2(dir, dentry, it);
++                      if (dir->i_op->lookup_it)
++                              result = dir->i_op->lookup_it(dir, dentry, it, flags);
 +                      else
                        result = dir->i_op->lookup(dir, dentry);
                        unlock_kernel();
 +                      else
                        result = dir->i_op->lookup(dir, dentry);
                        unlock_kernel();
                        dput(result);
                        result = ERR_PTR(-ENOENT);
                }
                        dput(result);
                        result = ERR_PTR(-ENOENT);
                }
-+      } else if (result->d_op && result->d_op->d_revalidate2) {
-+              if (!result->d_op->d_revalidate2(result, flags, it) &&
++      } else if (result->d_op && result->d_op->d_revalidate_it) {
++              if (!result->d_op->d_revalidate_it(result, flags, it) &&
 +                  !d_invalidate(result)) {
 +                      dput(result);
 +                      goto again;
 +                  !d_invalidate(result)) {
 +                      dput(result);
 +                      goto again;
  {
        int err;
        if (current->link_count >= max_recursive_link)
  {
        int err;
        if (current->link_count >= max_recursive_link)
-@@ -348,10 +377,21 @@ static inline int do_follow_link(struct 
+@@ -348,10 +377,18 @@ static inline int do_follow_link(struct 
        current->link_count++;
        current->total_link_count++;
        UPDATE_ATIME(dentry->d_inode);
        current->link_count++;
        current->total_link_count++;
        UPDATE_ATIME(dentry->d_inode);
--      err = dentry->d_inode->i_op->follow_link(dentry, nd);
 +      nd->it = it;
 +      nd->it = it;
-+      if (dentry->d_inode->i_op->follow_link2)
-+              err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+      else
-+              err = dentry->d_inode->i_op->follow_link(dentry, nd);
+       err = dentry->d_inode->i_op->follow_link(dentry, nd);
 +      if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
 +              /* vfs_follow_link was never called */
 +      if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
 +              /* vfs_follow_link was never called */
-+              intent_release(dentry, it);
++              intent_release(it);
 +              path_release(nd);
 +              err = -ENOLINK;
 +      }
        current->link_count--;
        return err;
  loop:
 +              path_release(nd);
 +              err = -ENOLINK;
 +      }
        current->link_count--;
        return err;
  loop:
-+      intent_release(dentry, it);
++      intent_release(it);
        path_release(nd);
        return -ELOOP;
  }
        path_release(nd);
        return -ELOOP;
  }
-@@ -381,15 +421,26 @@ int follow_up(struct vfsmount **mnt, str
+@@ -381,15 +418,26 @@ int follow_up(struct vfsmount **mnt, str
        return __follow_up(mnt, dentry);
  }
  
        return __follow_up(mnt, dentry);
  }
  
 +                      opc = it->it_op;
 +                      mode = it->it_mode;
 +              }
 +                      opc = it->it_op;
 +                      mode = it->it_mode;
 +              }
-+              intent_release(*dentry, it);
++              intent_release(it);
 +              if (it) {
 +                      it->it_op = opc;
 +                      it->it_mode = mode;
 +              if (it) {
 +                      it->it_op = opc;
 +                      it->it_mode = mode;
                dput(*dentry);
                mntput(mounted->mnt_parent);
                *dentry = dget(mounted->mnt_root);
                dput(*dentry);
                mntput(mounted->mnt_parent);
                *dentry = dget(mounted->mnt_root);
-@@ -401,7 +452,7 @@ static inline int __follow_down(struct v
+@@ -401,7 +449,7 @@ static inline int __follow_down(struct v
  
  int follow_down(struct vfsmount **mnt, struct dentry **dentry)
  {
  
  int follow_down(struct vfsmount **mnt, struct dentry **dentry)
  {
  }
   
  static inline void follow_dotdot(struct nameidata *nd)
  }
   
  static inline void follow_dotdot(struct nameidata *nd)
-@@ -437,7 +488,7 @@ static inline void follow_dotdot(struct 
+@@ -437,7 +485,7 @@ static inline void follow_dotdot(struct 
                mntput(nd->mnt);
                nd->mnt = parent;
        }
                mntput(nd->mnt);
                nd->mnt = parent;
        }
                ;
  }
  
                ;
  }
  
-@@ -449,7 +500,8 @@ static inline void follow_dotdot(struct 
+@@ -449,7 +497,8 @@ static inline void follow_dotdot(struct 
   *
   * We expect 'base' to be positive and a directory.
   */
   *
   * We expect 'base' to be positive and a directory.
   */
  {
        struct dentry *dentry;
        struct inode *inode;
  {
        struct dentry *dentry;
        struct inode *inode;
-@@ -526,18 +578,18 @@ int link_path_walk(const char * name, st
+@@ -526,19 +575,18 @@ int link_path_walk(const char * name, st
                                break;
                }
                /* This does the actual lookups.. */
 -              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
                                break;
                }
                /* This does the actual lookups.. */
 -              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
-+              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
++              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
                if (!dentry) {
                        err = -EWOULDBLOCKIO;
                        if (atomic)
                                break;
 -                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
                if (!dentry) {
                        err = -EWOULDBLOCKIO;
                        if (atomic)
                                break;
 -                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
-+                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
++                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
                        err = PTR_ERR(dentry);
                        if (IS_ERR(dentry))
                                break;
                }
                /* Check mountpoints.. */
 -              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
                        err = PTR_ERR(dentry);
                        if (IS_ERR(dentry))
                                break;
                }
                /* Check mountpoints.. */
 -              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
-+              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL))
-                       ;
+-                      ;
++              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL));
  
                err = -ENOENT;
  
                err = -ENOENT;
-@@ -548,8 +600,8 @@ int link_path_walk(const char * name, st
-               if (!inode->i_op)
+               inode = dentry->d_inode;
+@@ -549,7 +597,7 @@ int link_path_walk(const char * name, st
                        goto out_dput;
  
                        goto out_dput;
  
--              if (inode->i_op->follow_link) {
+               if (inode->i_op->follow_link) {
 -                      err = do_follow_link(dentry, nd);
 -                      err = do_follow_link(dentry, nd);
-+              if (inode->i_op->follow_link || inode->i_op->follow_link2) {
 +                      err = do_follow_link(dentry, nd, NULL);
                        dput(dentry);
                        if (err)
                                goto return_err;
 +                      err = do_follow_link(dentry, nd, NULL);
                        dput(dentry);
                        if (err)
                                goto return_err;
-@@ -565,7 +617,7 @@ int link_path_walk(const char * name, st
+@@ -565,7 +613,7 @@ int link_path_walk(const char * name, st
                        nd->dentry = dentry;
                }
                err = -ENOTDIR; 
 -              if (!inode->i_op->lookup)
                        nd->dentry = dentry;
                }
                err = -ENOTDIR; 
 -              if (!inode->i_op->lookup)
-+              if (!inode->i_op->lookup && !inode->i_op->lookup2)
++              if (!inode->i_op->lookup && !inode->i_op->lookup_it)
                        break;
                continue;
                /* here ends the main loop */
                        break;
                continue;
                /* here ends the main loop */
-@@ -592,22 +644,23 @@ last_component:
+@@ -592,22 +640,22 @@ last_component:
                        if (err < 0)
                                break;
                }
 -              dentry = cached_lookup(nd->dentry, &this, 0);
                        if (err < 0)
                                break;
                }
 -              dentry = cached_lookup(nd->dentry, &this, 0);
-+              dentry = cached_lookup(nd->dentry, &this, 0, it);
++              dentry = cached_lookup(nd->dentry, &this, 0, it);
                if (!dentry) {
                        err = -EWOULDBLOCKIO;
                        if (atomic)
                                break;
 -                      dentry = real_lookup(nd->dentry, &this, 0);
                if (!dentry) {
                        err = -EWOULDBLOCKIO;
                        if (atomic)
                                break;
 -                      dentry = real_lookup(nd->dentry, &this, 0);
-+                      dentry = real_lookup(nd->dentry, &this, 0, it);
++                      dentry = real_lookup(nd->dentry, &this, 0, it);
                        err = PTR_ERR(dentry);
                        if (IS_ERR(dentry))
                                break;
                }
 -              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
                        err = PTR_ERR(dentry);
                        if (IS_ERR(dentry))
                                break;
                }
 -              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
-+              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it))
++              while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it))
                        ;
                inode = dentry->d_inode;
                if ((lookup_flags & LOOKUP_FOLLOW)
                        ;
                inode = dentry->d_inode;
                if ((lookup_flags & LOOKUP_FOLLOW)
--                  && inode && inode->i_op && inode->i_op->follow_link) {
+                   && inode && inode->i_op && inode->i_op->follow_link) {
 -                      err = do_follow_link(dentry, nd);
 -                      err = do_follow_link(dentry, nd);
-+                  && inode && inode->i_op &&
-+                  (inode->i_op->follow_link || inode->i_op->follow_link2)) {
-+                      err = do_follow_link(dentry, nd, it);
++                      err = do_follow_link(dentry, nd, it);
                        dput(dentry);
                        if (err)
                                goto return_err;
                        dput(dentry);
                        if (err)
                                goto return_err;
-@@ -621,7 +674,8 @@ last_component:
+@@ -621,7 +669,8 @@ last_component:
                        goto no_inode;
                if (lookup_flags & LOOKUP_DIRECTORY) {
                        err = -ENOTDIR; 
 -                      if (!inode->i_op || !inode->i_op->lookup)
 +                      if (!inode->i_op ||
                        goto no_inode;
                if (lookup_flags & LOOKUP_DIRECTORY) {
                        err = -ENOTDIR; 
 -                      if (!inode->i_op || !inode->i_op->lookup)
 +                      if (!inode->i_op ||
-+                          (!inode->i_op->lookup && !inode->i_op->lookup2))
++                          (!inode->i_op->lookup && !inode->i_op->lookup_it))
                                break;
                }
                goto return_base;
                                break;
                }
                goto return_base;
-@@ -645,6 +699,23 @@ return_reval:
+@@ -645,6 +694,23 @@ return_reval:
                 * Check the cached dentry for staleness.
                 */
                dentry = nd->dentry;
                 * Check the cached dentry for staleness.
                 */
                dentry = nd->dentry;
-+        revalidate_again:
-+              if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
++      revalidate_again:
++              if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
 +                      err = -ESTALE;
 +                      err = -ESTALE;
-+                      if (!dentry->d_op->d_revalidate2(dentry, 0, it)) {
-+                                struct dentry *new;
-+                                err = permission(dentry->d_parent->d_inode, 
-+                                                 MAY_EXEC);
-+                                if (err)
-+                                        break;
-+                                new = real_lookup(dentry->d_parent,
-+                                                  &dentry->d_name, 0, NULL);
++                      if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
++                              struct dentry *new;
++                              err = permission(dentry->d_parent->d_inode,
++                                               MAY_EXEC);
++                              if (err)
++                                      break;
++                              new = real_lookup(dentry->d_parent,
++                                                &dentry->d_name, 0, NULL);
 +                              d_invalidate(dentry);
 +                              d_invalidate(dentry);
-+                                dput(dentry);
-+                                dentry = new;
-+                                goto revalidate_again;
-+                        }
++                              dput(dentry);
++                              dentry = new;
++                              goto revalidate_again;
++                      }
 +              } else
                if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
                        err = -ESTALE;
                        if (!dentry->d_op->d_revalidate(dentry, 0)) {
 +              } else
                if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
                        err = -ESTALE;
                        if (!dentry->d_op->d_revalidate(dentry, 0)) {
-@@ -658,15 +729,28 @@ out_dput:
+@@ -658,15 +724,28 @@ out_dput:
                dput(dentry);
                break;
        }
 +      if (err)
                dput(dentry);
                break;
        }
 +      if (err)
-+              intent_release(nd->dentry, it);
++              intent_release(it);
        path_release(nd);
  return_err:
        return err;
        path_release(nd);
  return_err:
        return err;
  }
  
  /* SMP-safe */
  }
  
  /* SMP-safe */
-@@ -751,6 +835,17 @@ walk_init_root(const char *name, struct 
+@@ -751,6 +830,17 @@ walk_init_root(const char *name, struct 
  }
  
  /* SMP-safe */
  }
  
  /* SMP-safe */
  int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
  {
        int error = 0;
  int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
  {
        int error = 0;
-@@ -765,6 +860,7 @@ int path_init(const char *name, unsigned
+@@ -765,6 +855,7 @@ int path_init(const char *name, unsigned
  {
        nd->last_type = LAST_ROOT; /* if there are only slashes... */
        nd->flags = flags;
  {
        nd->last_type = LAST_ROOT; /* if there are only slashes... */
        nd->flags = flags;
        if (*name=='/')
                return walk_init_root(name,nd);
        read_lock(&current->fs->lock);
        if (*name=='/')
                return walk_init_root(name,nd);
        read_lock(&current->fs->lock);
-@@ -779,7 +875,8 @@ int path_init(const char *name, unsigned
+@@ -779,7 +870,8 @@ int path_init(const char *name, unsigned
   * needs parent already locked. Doesn't follow mounts.
   * SMP-safe.
   */
   * needs parent already locked. Doesn't follow mounts.
   * SMP-safe.
   */
  {
        struct dentry * dentry;
        struct inode *inode;
  {
        struct dentry * dentry;
        struct inode *inode;
-@@ -802,13 +899,16 @@ struct dentry * lookup_hash(struct qstr 
+@@ -802,13 +894,16 @@ struct dentry * lookup_hash(struct qstr 
                        goto out;
        }
  
                        goto out;
        }
  
                if (!new)
                        goto out;
                lock_kernel();
                if (!new)
                        goto out;
                lock_kernel();
-+              if (inode->i_op->lookup2)
-+                      dentry = inode->i_op->lookup2(inode, new, it);
++              if (inode->i_op->lookup_it)
++                      dentry = inode->i_op->lookup_it(inode, new, it, 0);
 +              else
                dentry = inode->i_op->lookup(inode, new);
                unlock_kernel();
                if (!dentry)
 +              else
                dentry = inode->i_op->lookup(inode, new);
                unlock_kernel();
                if (!dentry)
-@@ -820,6 +920,12 @@ out:
+@@ -820,6 +915,12 @@ out:
        return dentry;
  }
  
        return dentry;
  }
  
  /* SMP-safe */
  struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
  {
  /* SMP-safe */
  struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
  {
-@@ -841,7 +947,7 @@ struct dentry * lookup_one_len(const cha
+@@ -841,7 +942,7 @@ struct dentry * lookup_one_len(const cha
        }
        this.hash = end_name_hash(hash);
  
        }
        this.hash = end_name_hash(hash);
  
  access:
        return ERR_PTR(-EACCES);
  }
  access:
        return ERR_PTR(-EACCES);
  }
-@@ -872,6 +978,23 @@ int __user_walk(const char *name, unsign
+@@ -872,6 +973,23 @@ int __user_walk(const char *name, unsign
        return err;
  }
  
        return err;
  }
  
  /*
   * It's inline, so penalty for filesystems that don't use sticky bit is
   * minimal.
  /*
   * It's inline, so penalty for filesystems that don't use sticky bit is
   * minimal.
-@@ -1010,7 +1133,8 @@ exit_lock:
+@@ -969,7 +1087,8 @@ static inline int lookup_flags(unsigned 
+       return retval;
+ }
+-int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode,
++                       struct lookup_intent *it)
+ {
+       int error;
+@@ -982,12 +1101,15 @@ int vfs_create(struct inode *dir, struct
+               goto exit_lock;
+       error = -EACCES;        /* shouldn't it be ENOSYS? */
+-      if (!dir->i_op || !dir->i_op->create)
++      if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it))
+               goto exit_lock;
+       DQUOT_INIT(dir);
+       lock_kernel();
+-      error = dir->i_op->create(dir, dentry, mode);
++      if (dir->i_op->create_it)
++              error = dir->i_op->create_it(dir, dentry, mode, it);
++      else
++              error = dir->i_op->create(dir, dentry, mode);
+       unlock_kernel();
+ exit_lock:
+       up(&dir->i_zombie);
+@@ -996,6 +1118,11 @@ exit_lock:
+       return error;
+ }
++int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++{
++      return vfs_create_it(dir, dentry, mode, NULL);
++}
++
+ /*
+  *    open_namei()
+  *
+@@ -1010,7 +1137,8 @@ exit_lock:
   * for symlinks (where the permissions are checked later).
   * SMP-safe
   */
   * for symlinks (where the permissions are checked later).
   * SMP-safe
   */
  {
        int acc_mode, error = 0;
        struct inode *inode;
  {
        int acc_mode, error = 0;
        struct inode *inode;
-@@ -1024,7 +1148,7 @@ int open_namei(const char * pathname, in
+@@ -1024,7 +1152,7 @@ int open_namei(const char * pathname, in
         * The simplest case - just a plain lookup.
         */
        if (!(flag & O_CREAT)) {
         * The simplest case - just a plain lookup.
         */
        if (!(flag & O_CREAT)) {
                if (error)
                        return error;
                dentry = nd->dentry;
                if (error)
                        return error;
                dentry = nd->dentry;
-@@ -1034,6 +1158,10 @@ int open_namei(const char * pathname, in
+@@ -1034,6 +1162,10 @@ int open_namei(const char * pathname, in
        /*
         * Create - we need to know the parent.
         */
        /*
         * Create - we need to know the parent.
         */
        error = path_lookup(pathname, LOOKUP_PARENT, nd);
        if (error)
                return error;
        error = path_lookup(pathname, LOOKUP_PARENT, nd);
        if (error)
                return error;
-@@ -1049,7 +1177,7 @@ int open_namei(const char * pathname, in
+@@ -1049,7 +1181,7 @@ int open_namei(const char * pathname, in
  
        dir = nd->dentry;
        down(&dir->d_inode->i_sem);
  
        dir = nd->dentry;
        down(&dir->d_inode->i_sem);
  
  do_last:
        error = PTR_ERR(dentry);
  
  do_last:
        error = PTR_ERR(dentry);
-@@ -1058,6 +1186,7 @@ do_last:
+@@ -1058,10 +1190,11 @@ do_last:
                goto exit;
        }
  
 +      it->it_mode = mode;
        /* Negative dentry, just create the file */
        if (!dentry->d_inode) {
                goto exit;
        }
  
 +      it->it_mode = mode;
        /* Negative dentry, just create the file */
        if (!dentry->d_inode) {
-               error = vfs_create(dir->d_inode, dentry,
-@@ -1086,12 +1215,13 @@ do_last:
+-              error = vfs_create(dir->d_inode, dentry,
+-                                 mode & ~current->fs->umask);
++              error = vfs_create_it(dir->d_inode, dentry,
++                                 mode & ~current->fs->umask, it);
+               up(&dir->d_inode->i_sem);
+               dput(nd->dentry);
+               nd->dentry = dentry;
+@@ -1086,7 +1219,7 @@ do_last:
                error = -ELOOP;
                if (flag & O_NOFOLLOW)
                        goto exit_dput;
                error = -ELOOP;
                if (flag & O_NOFOLLOW)
                        goto exit_dput;
        }
        error = -ENOENT;
        if (!dentry->d_inode)
        }
        error = -ENOENT;
        if (!dentry->d_inode)
-               goto exit_dput;
--      if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
-+      if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link ||
-+                                    dentry->d_inode->i_op->follow_link2))
-               goto do_link;
-       dput(nd->dentry);
-@@ -1165,7 +1295,7 @@ ok:
+@@ -1165,7 +1298,7 @@ ok:
                if (!error) {
                        DQUOT_INIT(inode);
                        
                if (!error) {
                        DQUOT_INIT(inode);
                        
                }
                put_write_access(inode);
                if (error)
                }
                put_write_access(inode);
                if (error)
-@@ -1177,8 +1307,10 @@ ok:
+@@ -1177,8 +1310,10 @@ ok:
        return 0;
  
  exit_dput:
        return 0;
  
  exit_dput:
-+      intent_release(dentry, it);
++      intent_release(it);
        dput(dentry);
  exit:
        dput(dentry);
  exit:
-+      intent_release(nd->dentry, it);
++      intent_release(it);
        path_release(nd);
        return error;
  
        path_release(nd);
        return error;
  
-@@ -1197,7 +1329,19 @@ do_link:
+@@ -1197,7 +1332,16 @@ do_link:
         * are done. Procfs-like symlinks just set LAST_BIND.
         */
        UPDATE_ATIME(dentry->d_inode);
         * are done. Procfs-like symlinks just set LAST_BIND.
         */
        UPDATE_ATIME(dentry->d_inode);
--      error = dentry->d_inode->i_op->follow_link(dentry, nd);
 +      nd->it = it;
 +      nd->it = it;
-+      if (dentry->d_inode->i_op->follow_link2)
-+              error = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+      else
-+              error = dentry->d_inode->i_op->follow_link(dentry, nd);
+       error = dentry->d_inode->i_op->follow_link(dentry, nd);
 +      if (error) {
 +      if (error) {
-+              intent_release(dentry, it);
++              intent_release(it);
 +      } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
 +              /* vfs_follow_link was never called */
 +      } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
 +              /* vfs_follow_link was never called */
-+              intent_release(dentry, it);
++              intent_release(it);
 +              path_release(nd);
 +              error = -ENOLINK;
 +      }
 +              path_release(nd);
 +              error = -ENOLINK;
 +      }
        if (IS_ERR(dentry))
                goto fail;
        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
        if (IS_ERR(dentry))
                goto fail;
        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1289,7 +1440,19 @@ asmlinkage long sys_mknod(const char * f
+@@ -1289,7 +1440,16 @@ asmlinkage long sys_mknod(const char * f
        error = path_lookup(tmp, LOOKUP_PARENT, &nd);
        if (error)
                goto out;
 -      dentry = lookup_create(&nd, 0);
 +
        error = path_lookup(tmp, LOOKUP_PARENT, &nd);
        if (error)
                goto out;
 -      dentry = lookup_create(&nd, 0);
 +
-+      if (nd.dentry->d_inode->i_op->mknod2) {
++      if (nd.dentry->d_inode->i_op->mknod_raw) {
 +              struct inode_operations *op = nd.dentry->d_inode->i_op;
 +              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->mknod2(nd.dentry->d_inode,
-+                                 nd.last.name,
-+                                 nd.last.len,
-+                                 mode, dev);
++              error = op->mknod_raw(&nd, mode, dev);
 +              /* the file system wants to use normal vfs path now */
 +              if (error != -EOPNOTSUPP)
 +                      goto out2;
 +              /* the file system wants to use normal vfs path now */
 +              if (error != -EOPNOTSUPP)
 +                      goto out2;
        error = PTR_ERR(dentry);
  
        mode &= ~current->fs->umask;
        error = PTR_ERR(dentry);
  
        mode &= ~current->fs->umask;
-@@ -1310,6 +1473,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1310,6 +1470,7 @@ asmlinkage long sys_mknod(const char * f
                dput(dentry);
        }
        up(&nd.dentry->d_inode->i_sem);
                dput(dentry);
        }
        up(&nd.dentry->d_inode->i_sem);
        path_release(&nd);
  out:
        putname(tmp);
        path_release(&nd);
  out:
        putname(tmp);
-@@ -1357,7 +1521,17 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1357,7 +1518,14 @@ asmlinkage long sys_mkdir(const char * p
                error = path_lookup(tmp, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 1);
                error = path_lookup(tmp, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 1);
-+              if (nd.dentry->d_inode->i_op->mkdir2) {
++              if (nd.dentry->d_inode->i_op->mkdir_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->mkdir2(nd.dentry->d_inode,
-+                                         nd.last.name,
-+                                         nd.last.len,
-+                                         mode);
++                      error = op->mkdir_raw(&nd, mode);
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out2;
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out2;
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
                        error = vfs_mkdir(nd.dentry->d_inode, dentry,
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
                        error = vfs_mkdir(nd.dentry->d_inode, dentry,
-@@ -1365,6 +1539,7 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1365,6 +1533,7 @@ asmlinkage long sys_mkdir(const char * p
                        dput(dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
                        dput(dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
                path_release(&nd);
  out:
                putname(tmp);
                path_release(&nd);
  out:
                putname(tmp);
-@@ -1465,8 +1640,33 @@ asmlinkage long sys_rmdir(const char * p
+@@ -1465,8 +1634,16 @@ asmlinkage long sys_rmdir(const char * p
                        error = -EBUSY;
                        goto exit1;
        }
                        error = -EBUSY;
                        goto exit1;
        }
-+      if (nd.dentry->d_inode->i_op->rmdir2) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              struct dentry *last;
-+
-+              down(&nd.dentry->d_inode->i_sem);
-+              last = lookup_hash_it(&nd.last, nd.dentry, NULL);
-+              up(&nd.dentry->d_inode->i_sem);
-+              if (IS_ERR(last)) {
-+                      error = PTR_ERR(last);
-+                      goto exit1;
-+              }
-+              if (d_mountpoint(last)) {
-+                      dput(last);
-+                      error = -EBUSY;
-+                      goto exit1;
-+              }
-+              dput(last);
++      if (nd.dentry->d_inode->i_op->rmdir_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
 +
 +
-+              error = op->rmdir2(nd.dentry->d_inode,
-+                                 nd.last.name,
-+                                 nd.last.len);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++              error = op->rmdir_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
 +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                error = vfs_rmdir(nd.dentry->d_inode, dentry);
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
 +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                error = vfs_rmdir(nd.dentry->d_inode, dentry);
-@@ -1524,8 +1724,17 @@ asmlinkage long sys_unlink(const char * 
+@@ -1524,8 +1701,15 @@ asmlinkage long sys_unlink(const char * 
        error = -EISDIR;
        if (nd.last_type != LAST_NORM)
                goto exit1;
        error = -EISDIR;
        if (nd.last_type != LAST_NORM)
                goto exit1;
-+      if (nd.dentry->d_inode->i_op->unlink2) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->unlink2(nd.dentry->d_inode,
-+                                  nd.last.name,
-+                                  nd.last.len);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++      if (nd.dentry->d_inode->i_op->unlink_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->unlink_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
 +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                /* Why not before? Because we want correct error value */
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
 +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                /* Why not before? Because we want correct error value */
-@@ -1592,15 +1801,26 @@ asmlinkage long sys_symlink(const char *
+@@ -1592,15 +1776,23 @@ asmlinkage long sys_symlink(const char *
                error = path_lookup(to, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 0);
                error = path_lookup(to, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 0);
-+              if (nd.dentry->d_inode->i_op->symlink2) {
++              if (nd.dentry->d_inode->i_op->symlink_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->symlink2(nd.dentry->d_inode,
-+                                           nd.last.name,
-+                                           nd.last.len,
-+                                           from);
++                      error = op->symlink_raw(&nd, from);
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out2;
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out2;
                putname(to);
        }
        putname(from);
                putname(to);
        }
        putname(from);
-@@ -1676,7 +1896,17 @@ asmlinkage long sys_link(const char * ol
+@@ -1676,7 +1868,14 @@ asmlinkage long sys_link(const char * ol
                error = -EXDEV;
                if (old_nd.mnt != nd.mnt)
                        goto out_release;
 -              new_dentry = lookup_create(&nd, 0);
                error = -EXDEV;
                if (old_nd.mnt != nd.mnt)
                        goto out_release;
 -              new_dentry = lookup_create(&nd, 0);
-+              if (nd.dentry->d_inode->i_op->link2) {
++              if (nd.dentry->d_inode->i_op->link_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->link2(old_nd.dentry->d_inode,
-+                                        nd.dentry->d_inode,
-+                                        nd.last.name,
-+                                        nd.last.len);
++                      error = op->link_raw(&old_nd, &nd);
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out_release;
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out_release;
                error = PTR_ERR(new_dentry);
                if (!IS_ERR(new_dentry)) {
                        error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
                error = PTR_ERR(new_dentry);
                if (!IS_ERR(new_dentry)) {
                        error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
-@@ -1720,7 +1950,8 @@ exit:
+@@ -1720,7 +1919,7 @@ exit:
   *       locking].
   */
  int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 -             struct inode *new_dir, struct dentry *new_dentry)
   *       locking].
   */
  int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 -             struct inode *new_dir, struct dentry *new_dentry)
-+                 struct inode *new_dir, struct dentry *new_dentry,
-+                 struct lookup_intent *it)
++                 struct inode *new_dir, struct dentry *new_dentry)
  {
        int error;
        struct inode *target;
  {
        int error;
        struct inode *target;
-@@ -1778,6 +2009,7 @@ int vfs_rename_dir(struct inode *old_dir
-               error = -EBUSY;
-       else 
-               error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+      intent_release(new_dentry, it);
-       if (target) {
-               if (!error)
-                       target->i_flags |= S_DEAD;
-@@ -1799,7 +2031,8 @@ out_unlock:
+@@ -1799,7 +1998,7 @@ out_unlock:
  }
  
  int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 -             struct inode *new_dir, struct dentry *new_dentry)
  }
  
  int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 -             struct inode *new_dir, struct dentry *new_dentry)
-+                   struct inode *new_dir, struct dentry *new_dentry,
-+                   struct lookup_intent *it)
++                   struct inode *new_dir, struct dentry *new_dentry)
  {
        int error;
  
  {
        int error;
  
-@@ -1830,6 +2063,7 @@ int vfs_rename_other(struct inode *old_d
-               error = -EBUSY;
-       else
-               error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+      intent_release(new_dentry, it);
-       double_up(&old_dir->i_zombie, &new_dir->i_zombie);
-       if (error)
-               return error;
-@@ -1841,13 +2075,14 @@ int vfs_rename_other(struct inode *old_d
- }
- int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
--             struct inode *new_dir, struct dentry *new_dentry)
-+             struct inode *new_dir, struct dentry *new_dentry,
-+             struct lookup_intent *it)
- {
-       int error;
-       if (S_ISDIR(old_dentry->d_inode->i_mode))
--              error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
-+              error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it);
-       else
--              error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
-+              error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it);
-       if (!error) {
-               if (old_dir == new_dir)
-                       inode_dir_notify(old_dir, DN_RENAME);
-@@ -1889,7 +2124,7 @@ static inline int do_rename(const char *
+@@ -1887,9 +2086,18 @@ static inline int do_rename(const char *
+       if (newnd.last_type != LAST_NORM)
+               goto exit2;
  
  
++      if (old_dir->d_inode->i_op->rename_raw) {
++              lock_kernel();
++              error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++              unlock_kernel();
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit2;
++      }
++
        double_lock(new_dir, old_dir);
  
 -      old_dentry = lookup_hash(&oldnd.last, old_dir);
        double_lock(new_dir, old_dir);
  
 -      old_dentry = lookup_hash(&oldnd.last, old_dir);
        error = PTR_ERR(old_dentry);
        if (IS_ERR(old_dentry))
                goto exit3;
        error = PTR_ERR(old_dentry);
        if (IS_ERR(old_dentry))
                goto exit3;
-@@ -1905,16 +2140,37 @@ static inline int do_rename(const char *
+@@ -1905,16 +2113,16 @@ static inline int do_rename(const char *
                if (newnd.last.name[newnd.last.len])
                        goto exit4;
        }
                if (newnd.last.name[newnd.last.len])
                        goto exit4;
        }
        if (IS_ERR(new_dentry))
                goto exit4;
  
        if (IS_ERR(new_dentry))
                goto exit4;
  
-+      if (old_dir->d_inode->i_op->rename2) {
-+              lock_kernel();
-+              /* don't rename mount point. mds will take care of
-+               * the rest sanity checking */
-+              if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) {
-+                      error = -EBUSY;
-+                      goto exit5;
-+              }
-+
-+              error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
-+                                                      new_dir->d_inode,
-+                                                      oldnd.last.name,
-+                                                      oldnd.last.len,
-+                                                      newnd.last.name,
-+                                                      newnd.last.len);
-+              unlock_kernel();
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit5;
-+      }
 +
        lock_kernel();
        error = vfs_rename(old_dir->d_inode, old_dentry,
 +
        lock_kernel();
        error = vfs_rename(old_dir->d_inode, old_dentry,
--                                 new_dir->d_inode, new_dentry);
-+                                 new_dir->d_inode, new_dentry, NULL);
+                                  new_dir->d_inode, new_dentry);
        unlock_kernel();
 -
        unlock_kernel();
 -
-+exit5:
        dput(new_dentry);
  exit4:
        dput(old_dentry);
        dput(new_dentry);
  exit4:
        dput(old_dentry);
-@@ -1965,20 +2221,28 @@ out:
+@@ -1965,20 +2173,28 @@ out:
  }
  
  static inline int
  }
  
  static inline int
  out:
        if (current->link_count || res || nd->last_type!=LAST_NORM)
                return res;
  out:
        if (current->link_count || res || nd->last_type!=LAST_NORM)
                return res;
-@@ -2002,7 +2266,13 @@ fail:
+@@ -2002,7 +2218,13 @@ fail:
  
  int vfs_follow_link(struct nameidata *nd, const char *link)
  {
  
  int vfs_follow_link(struct nameidata *nd, const char *link)
  {
  }
  
  /* get the link contents into pagecache */
  }
  
  /* get the link contents into pagecache */
-@@ -2044,7 +2314,7 @@ int page_follow_link(struct dentry *dent
+@@ -2044,7 +2266,7 @@ int page_follow_link(struct dentry *dent
  {
        struct page *page = NULL;
        char *s = page_getlink(dentry, &page);
  {
        struct page *page = NULL;
        char *s = page_getlink(dentry, &page);
        if (page) {
                kunmap(page);
                page_cache_release(page);
        if (page) {
                kunmap(page);
                page_cache_release(page);
---- linux-2.4.20-rh/fs/nfsd/vfs.c~vfs_intent-2.4.20-rh 2003-04-11 14:04:48.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/nfsd/vfs.c 2003-06-09 23:18:07.000000000 +0800
-@@ -1293,7 +1293,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
-                       err = nfserr_perm;
-       } else
- #endif
--      err = vfs_rename(fdir, odentry, tdir, ndentry);
-+      err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
-       if (!err && EX_ISSYNC(tfhp->fh_export)) {
-               nfsd_sync_dir(tdentry);
-               nfsd_sync_dir(fdentry);
---- linux-2.4.20-rh/fs/open.c~vfs_intent-2.4.20-rh     2003-04-11 14:04:57.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/open.c     2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/open.c~vfs_intent-2.4.20-rh        2003-07-17 08:32:45.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/open.c     2003-07-17 08:35:22.000000000 -0700
 @@ -19,6 +19,8 @@
  #include <asm/uaccess.h>
  
 @@ -19,6 +19,8 @@
  #include <asm/uaccess.h>
  
        int error;
        struct iattr newattrs;
  
        int error;
        struct iattr newattrs;
  
-@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l
+@@ -108,7 +111,13 @@ int do_truncate(struct dentry *dentry, l
        down(&inode->i_sem);
        newattrs.ia_size = length;
        newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
        down(&inode->i_sem);
        newattrs.ia_size = length;
        newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
 +              newattrs.ia_valid |= ATTR_FROM_OPEN;
 +      if (op->setattr_raw) {
 +              newattrs.ia_valid |= ATTR_RAW;
 +              newattrs.ia_valid |= ATTR_FROM_OPEN;
 +      if (op->setattr_raw) {
 +              newattrs.ia_valid |= ATTR_RAW;
-+              newattrs.ia_ctime = CURRENT_TIME;
 +              error = op->setattr_raw(inode, &newattrs);
 +              error = op->setattr_raw(inode, &newattrs);
-+      } else 
++      } else
 +              error = notify_change(dentry, &newattrs);
        up(&inode->i_sem);
        return error;
  }
 +              error = notify_change(dentry, &newattrs);
        up(&inode->i_sem);
        return error;
  }
-@@ -118,12 +128,13 @@ static inline long do_sys_truncate(const
+@@ -118,12 +127,13 @@ static inline long do_sys_truncate(const
        struct nameidata nd;
        struct inode * inode;
        int error;
        struct nameidata nd;
        struct inode * inode;
        int error;
        if (error)
                goto out;
        inode = nd.dentry->d_inode;
        if (error)
                goto out;
        inode = nd.dentry->d_inode;
-@@ -163,11 +174,13 @@ static inline long do_sys_truncate(const
+@@ -163,11 +173,13 @@ static inline long do_sys_truncate(const
        error = locks_verify_truncate(inode, NULL, length);
        if (!error) {
                DQUOT_INIT(inode);
 -              error = do_truncate(nd.dentry, length);
        error = locks_verify_truncate(inode, NULL, length);
        if (!error) {
                DQUOT_INIT(inode);
 -              error = do_truncate(nd.dentry, length);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
 +              error = do_truncate(nd.dentry, length, 0);
        }
        put_write_access(inode);
  
  dput_and_out:
 +              error = do_truncate(nd.dentry, length, 0);
        }
        put_write_access(inode);
  
  dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
        path_release(&nd);
  out:
        return error;
        path_release(&nd);
  out:
        return error;
-@@ -215,7 +228,7 @@ static inline long do_sys_ftruncate(unsi
+@@ -215,7 +227,7 @@ static inline long do_sys_ftruncate(unsi
  
        error = locks_verify_truncate(inode, file, length);
        if (!error)
  
        error = locks_verify_truncate(inode, file, length);
        if (!error)
  out_putf:
        fput(file);
  out:
  out_putf:
        fput(file);
  out:
-@@ -260,11 +273,13 @@ asmlinkage long sys_utime(char * filenam
+@@ -260,11 +272,13 @@ asmlinkage long sys_utime(char * filenam
        struct inode * inode;
        struct iattr newattrs;
  
        struct inode * inode;
        struct iattr newattrs;
  
        error = -EROFS;
        if (IS_RDONLY(inode))
                goto dput_and_out;
        error = -EROFS;
        if (IS_RDONLY(inode))
                goto dput_and_out;
-@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam
+@@ -279,11 +293,25 @@ asmlinkage long sys_utime(char * filenam
                        goto dput_and_out;
  
                newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
                        goto dput_and_out;
  
                newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
 +                      goto dput_and_out;
 +      }
 +
 +                      goto dput_and_out;
 +      }
 +
-+      error = -EROFS;
-+      if (IS_RDONLY(inode))
-+              goto dput_and_out;
-+
 +      error = -EPERM;
 +      if (!times) {
                if (current->fsuid != inode->i_uid &&
 +      error = -EPERM;
 +      if (!times) {
                if (current->fsuid != inode->i_uid &&
        error = notify_change(nd.dentry, &newattrs);
  dput_and_out:
        path_release(&nd);
        error = notify_change(nd.dentry, &newattrs);
  dput_and_out:
        path_release(&nd);
-@@ -304,12 +337,14 @@ asmlinkage long sys_utimes(char * filena
+@@ -304,12 +332,14 @@ asmlinkage long sys_utimes(char * filena
        struct inode * inode;
        struct iattr newattrs;
  
        struct inode * inode;
        struct iattr newattrs;
  
        error = -EROFS;
        if (IS_RDONLY(inode))
                goto dput_and_out;
        error = -EROFS;
        if (IS_RDONLY(inode))
                goto dput_and_out;
-@@ -324,7 +359,20 @@ asmlinkage long sys_utimes(char * filena
+@@ -324,7 +354,20 @@ asmlinkage long sys_utimes(char * filena
                newattrs.ia_atime = times[0].tv_sec;
                newattrs.ia_mtime = times[1].tv_sec;
                newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
                newattrs.ia_atime = times[0].tv_sec;
                newattrs.ia_mtime = times[1].tv_sec;
                newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
                if (current->fsuid != inode->i_uid &&
                    (error = permission(inode,MAY_WRITE)) != 0)
                        goto dput_and_out;
                if (current->fsuid != inode->i_uid &&
                    (error = permission(inode,MAY_WRITE)) != 0)
                        goto dput_and_out;
-@@ -347,6 +395,7 @@ asmlinkage long sys_access(const char * 
+@@ -347,6 +390,7 @@ asmlinkage long sys_access(const char * 
        int old_fsuid, old_fsgid;
        kernel_cap_t old_cap;
        int res;
        int old_fsuid, old_fsgid;
        kernel_cap_t old_cap;
        int res;
  
        if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
                return -EINVAL;
  
        if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
                return -EINVAL;
-@@ -364,13 +413,14 @@ asmlinkage long sys_access(const char * 
+@@ -364,13 +408,14 @@ asmlinkage long sys_access(const char * 
        else
                current->cap_effective = current->cap_permitted;
  
        else
                current->cap_effective = current->cap_permitted;
  
                if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
                   && !special_file(nd.dentry->d_inode->i_mode))
                        res = -EROFS;
                if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
                   && !special_file(nd.dentry->d_inode->i_mode))
                        res = -EROFS;
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
  
                path_release(&nd);
        }
  
-@@ -385,8 +435,9 @@ asmlinkage long sys_chdir(const char * f
+@@ -385,8 +430,9 @@ asmlinkage long sys_chdir(const char * f
  {
        int error;
        struct nameidata nd;
  {
        int error;
        struct nameidata nd;
        if (error)
                goto out;
  
        if (error)
                goto out;
  
-@@ -397,6 +448,7 @@ asmlinkage long sys_chdir(const char * f
+@@ -397,6 +443,7 @@ asmlinkage long sys_chdir(const char * f
        set_fs_pwd(current->fs, nd.mnt, nd.dentry);
  
  dput_and_out:
        set_fs_pwd(current->fs, nd.mnt, nd.dentry);
  
  dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
        path_release(&nd);
  out:
        return error;
        path_release(&nd);
  out:
        return error;
-@@ -436,9 +488,10 @@ asmlinkage long sys_chroot(const char * 
+@@ -436,9 +483,10 @@ asmlinkage long sys_chroot(const char * 
  {
        int error;
        struct nameidata nd;
  {
        int error;
        struct nameidata nd;
        if (error)
                goto out;
  
        if (error)
                goto out;
  
-@@ -454,6 +507,7 @@ asmlinkage long sys_chroot(const char * 
+@@ -454,6 +502,7 @@ asmlinkage long sys_chroot(const char * 
        set_fs_altroot();
        error = 0;
  dput_and_out:
        set_fs_altroot();
        error = 0;
  dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
        path_release(&nd);
  out:
        return error;
        path_release(&nd);
  out:
        return error;
-@@ -508,6 +562,18 @@ asmlinkage long sys_chmod(const char * f
+@@ -508,6 +557,18 @@ asmlinkage long sys_chmod(const char * f
        if (IS_RDONLY(inode))
                goto dput_and_out;
  
        if (IS_RDONLY(inode))
                goto dput_and_out;
  
        error = -EPERM;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                goto dput_and_out;
        error = -EPERM;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                goto dput_and_out;
-@@ -538,6 +604,20 @@ static int chown_common(struct dentry * 
+@@ -538,6 +599,20 @@ static int chown_common(struct dentry * 
        error = -EROFS;
        if (IS_RDONLY(inode))
                goto out;
        error = -EROFS;
        if (IS_RDONLY(inode))
                goto out;
 +
 +              newattrs.ia_uid = user;
 +              newattrs.ia_gid = group;
 +
 +              newattrs.ia_uid = user;
 +              newattrs.ia_gid = group;
-+              newattrs.ia_valid = ATTR_UID | ATTR_GID;
++              newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
 +              newattrs.ia_valid |= ATTR_RAW;
 +              error = op->setattr_raw(inode, &newattrs);
 +              /* the file system wants to use normal vfs path now */
 +              newattrs.ia_valid |= ATTR_RAW;
 +              error = op->setattr_raw(inode, &newattrs);
 +              /* the file system wants to use normal vfs path now */
        error = -EPERM;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                goto out;
        error = -EPERM;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                goto out;
-@@ -642,6 +722,7 @@ struct file *filp_open(const char * file
+@@ -642,8 +717,9 @@ struct file *filp_open(const char * file
  {
        int namei_flags, error;
        struct nameidata nd;
  {
        int namei_flags, error;
        struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = flags };
-       
-       flags &= ~O_DIRECT;
+-      
+-      flags &= ~O_DIRECT;
++      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = flags };
++
++      //flags &= ~O_DIRECT;
  
  
-@@ -651,14 +732,15 @@ struct file *filp_open(const char * file
+       namei_flags = flags;
+       if ((namei_flags+1) & O_ACCMODE)
+@@ -651,14 +727,15 @@ struct file *filp_open(const char * file
        if (namei_flags & O_TRUNC)
                namei_flags |= 2;
  
        if (namei_flags & O_TRUNC)
                namei_flags |= 2;
  
  {
        struct file * f;
        struct inode *inode;
  {
        struct file * f;
        struct inode *inode;
-@@ -701,6 +783,7 @@ struct file *dentry_open(struct dentry *
+@@ -695,12 +772,15 @@ struct file *dentry_open(struct dentry *
+       }
+       if (f->f_op && f->f_op->open) {
++              f->f_it = it;
+               error = f->f_op->open(inode,f);
++              f->f_it = NULL;
+               if (error)
+                       goto cleanup_all;
        }
        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
  
        }
        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
  
-+      intent_release(dentry, it);
++      intent_release(it);
        return f;
  
  cleanup_all:
        return f;
  
  cleanup_all:
-@@ -715,11 +798,17 @@ cleanup_all:
+@@ -715,11 +795,17 @@ cleanup_all:
  cleanup_file:
        put_filp(f);
  cleanup_dentry:
  cleanup_file:
        put_filp(f);
  cleanup_dentry:
-+      intent_release(dentry, it);
++      intent_release(it);
        dput(dentry);
        mntput(mnt);
        return ERR_PTR(error);
        dput(dentry);
        mntput(mnt);
        return ERR_PTR(error);
  /*
   * Find an empty file descriptor entry, and mark it busy.
   */
  /*
   * Find an empty file descriptor entry, and mark it busy.
   */
---- linux-2.4.20-rh/fs/stat.c~vfs_intent-2.4.20-rh     2003-04-11 14:05:08.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/stat.c     2003-06-09 23:18:07.000000000 +0800
-@@ -110,11 +110,13 @@ static int do_getattr(struct vfsmount *m
- int vfs_stat(char *name, struct kstat *stat)
+--- linux-2.4.20/fs/stat.c~vfs_intent-2.4.20-rh        2003-07-17 08:33:05.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/stat.c     2003-07-17 08:51:33.000000000 -0700
+@@ -17,10 +17,12 @@
+  * Revalidate the inode. This is required for proper NFS attribute caching.
+  */
+ static __inline__ int
+-do_revalidate(struct dentry *dentry)
++do_revalidate(struct dentry *dentry, struct lookup_intent *it)
+ {
+       struct inode * inode = dentry->d_inode;
+-      if (inode->i_op && inode->i_op->revalidate)
++      if (inode->i_op && inode->i_op->revalidate_it)
++              return inode->i_op->revalidate_it(dentry, it);
++      else if (inode->i_op && inode->i_op->revalidate)
+               return inode->i_op->revalidate(dentry);
+       return 0;
+ }
+@@ -32,13 +34,13 @@ static inline nlink_t user_nlink(struct 
+       return inode->i_nlink;
+ }
+-static int do_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
++static int do_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat, struct lookup_intent *it)
+ {
+       int res = 0;
+       unsigned int blocks, indirect;
+       struct inode *inode = dentry->d_inode;
+-      res = do_revalidate(dentry);
++      res = do_revalidate(dentry, it);
+       if (res)
+               return res;
+@@ -111,10 +113,12 @@ int vfs_stat(char *name, struct kstat *s
  {
        struct nameidata nd;
  {
        struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
        int error;
        int error;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
  
 -      error = user_path_walk(name, &nd);
  
 -      error = user_path_walk(name, &nd);
-+      error = user_path_walk_it(name, &nd, &it);
++      error = user_path_walk_it(name, &nd, &it);
        if (!error) {
        if (!error) {
-               error = do_getattr(nd.mnt, nd.dentry, stat);
-+              intent_release(nd.dentry, &it);
+-              error = do_getattr(nd.mnt, nd.dentry, stat);
++              error = do_getattr(nd.mnt, nd.dentry, stat, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
                path_release(&nd);
        }
        return error;
-@@ -123,11 +125,13 @@ int vfs_stat(char *name, struct kstat *s
- int vfs_lstat(char *name, struct kstat *stat)
+@@ -124,10 +128,12 @@ int vfs_lstat(char *name, struct kstat *
  {
        struct nameidata nd;
  {
        struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
        int error;
        int error;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
  
 -      error = user_path_walk_link(name, &nd);
  
 -      error = user_path_walk_link(name, &nd);
-+      error = user_path_walk_link_it(name, &nd, &it);
++      error = user_path_walk_link_it(name, &nd, &it);
        if (!error) {
        if (!error) {
-               error = do_getattr(nd.mnt, nd.dentry, stat);
-+              intent_release(nd.dentry, &it);
+-              error = do_getattr(nd.mnt, nd.dentry, stat);
++              error = do_getattr(nd.mnt, nd.dentry, stat, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
                path_release(&nd);
        }
        return error;
---- linux-2.4.20-rh/include/linux/dcache.h~vfs_intent-2.4.20-rh        2003-04-12 15:46:39.000000000 +0800
-+++ linux-2.4.20-rh-root/include/linux/dcache.h        2003-06-09 23:18:07.000000000 +0800
-@@ -7,6 +7,28 @@
+@@ -139,7 +145,7 @@ int vfs_fstat(unsigned int fd, struct ks
+       int error = -EBADF;
+       if (f) {
+-              error = do_getattr(f->f_vfsmnt, f->f_dentry, stat);
++              error = do_getattr(f->f_vfsmnt, f->f_dentry, stat, NULL);
+               fput(f);
+       }
+       return error;
+@@ -286,7 +292,7 @@ asmlinkage long sys_readlink(const char 
+               error = -EINVAL;
+               if (inode->i_op && inode->i_op->readlink &&
+-                  !(error = do_revalidate(nd.dentry))) {
++                  !(error = do_revalidate(nd.dentry, NULL))) {
+                       UPDATE_ATIME(inode);
+                       error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
+               }
+--- linux-2.4.20/include/linux/dcache.h~vfs_intent-2.4.20-rh   2003-07-17 08:32:48.000000000 -0700
++++ linux-2.4.20-mmonroe/include/linux/dcache.h        2003-07-17 08:35:22.000000000 -0700
+@@ -6,6 +6,45 @@
+ #include <asm/atomic.h>
  #include <linux/mount.h>
  #include <linux/kernel.h>
  #include <linux/mount.h>
  #include <linux/kernel.h>
-+#define IT_OPEN     (1)
-+#define IT_CREAT    (1<<1)
-+#define IT_READDIR  (1<<2)
-+#define IT_GETATTR  (1<<3)
-+#define IT_LOOKUP   (1<<4)
-+#define IT_UNLINK   (1<<5)
++#include <linux/string.h>
++
++#define IT_OPEN     0x0001
++#define IT_CREAT    0x0002
++#define IT_READDIR  0x0004
++#define IT_GETATTR  0x0008
++#define IT_LOOKUP   0x0010
++#define IT_UNLINK   0x0020
++#define IT_GETXATTR 0x0040
++#define IT_EXEC     0x0080
++#define IT_PIN      0x0100
 +
 +
-+#define IT_FL_LOCKED   (1)
-+#define IT_FL_FOLLOWED (1<<1) /* set by vfs_follow_link */
++#define IT_FL_LOCKED   0x0001
++#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */
++
++#define INTENT_MAGIC 0x19620323
 +
 +struct lookup_intent {
 +      int it_op;
 +
 +struct lookup_intent {
 +      int it_op;
++      void (*it_op_release)(struct lookup_intent *);
++      int it_magic;
 +      int it_mode;
 +      int it_flags;
 +      int it_disposition;
 +      int it_mode;
 +      int it_flags;
 +      int it_disposition;
 +      void *it_data;
 +};
 +
 +      void *it_data;
 +};
 +
++static inline void intent_init(struct lookup_intent *it, int op, int flags)
++{
++      memset(it, 0, sizeof(*it));
++      it->it_magic = INTENT_MAGIC;
++      it->it_op = op;
++      it->it_flags = flags;
++}
++
  /*
   * linux/include/linux/dcache.h
  /*
   * linux/include/linux/dcache.h
-  *
-@@ -82,6 +104,7 @@ struct dentry {
-       unsigned long d_time;           /* used by d_revalidate */
-       struct dentry_operations  *d_op;
-       struct super_block * d_sb;      /* The root of the dentry tree */
-+      struct lookup_intent *d_it;
-       unsigned long d_vfs_flags;
-       void * d_fsdata;                /* fs-specific data */
-       void * d_extra_attributes;      /* TUX-specific data */
-@@ -96,8 +119,15 @@ struct dentry_operations {
+@@ -96,8 +135,22 @@ struct dentry_operations {
        int (*d_delete)(struct dentry *);
        void (*d_release)(struct dentry *);
        void (*d_iput)(struct dentry *, struct inode *);
        int (*d_delete)(struct dentry *);
        void (*d_release)(struct dentry *);
        void (*d_iput)(struct dentry *, struct inode *);
-+      int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *);
-+      void (*d_intent_release)(struct dentry *, struct lookup_intent *);
++      int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *);
++      void (*d_pin)(struct dentry *, struct vfsmount * , int);
++      void (*d_unpin)(struct dentry *, struct vfsmount *, int);
  };
  
  };
  
++#define PIN(de,mnt,flag)  if (de->d_op && de->d_op->d_pin) \
++                              de->d_op->d_pin(de, mnt, flag);
++#define UNPIN(de,mnt,flag)  if (de->d_op && de->d_op->d_unpin) \
++                              de->d_op->d_unpin(de, mnt, flag);
++
++
 +/* defined in fs/namei.c */
 +/* defined in fs/namei.c */
-+extern void intent_release(struct dentry *de, struct lookup_intent *it);
++extern void intent_release(struct lookup_intent *it);
 +/* defined in fs/dcache.c */
 +extern void __d_rehash(struct dentry * entry, int lock);
 +
  /* the dentry parameter passed to d_hash and d_compare is the parent
   * directory of the entries to be compared. It is used in case these
   * functions need any directory specific information for determining
 +/* defined in fs/dcache.c */
 +extern void __d_rehash(struct dentry * entry, int lock);
 +
  /* the dentry parameter passed to d_hash and d_compare is the parent
   * directory of the entries to be compared. It is used in case these
   * functions need any directory specific information for determining
-@@ -129,6 +159,7 @@ d_iput:            no              no              yes
+@@ -129,6 +182,7 @@ d_iput:            no              no              yes
                                         * s_nfsd_free_path semaphore will be down
                                         */
  #define DCACHE_REFERENCED     0x0008  /* Recently used, don't discard. */
                                         * s_nfsd_free_path semaphore will be down
                                         */
  #define DCACHE_REFERENCED     0x0008  /* Recently used, don't discard. */
  
  extern spinlock_t dcache_lock;
  
  
  extern spinlock_t dcache_lock;
  
---- linux-2.4.20-rh/include/linux/fs.h~vfs_intent-2.4.20-rh    2003-05-30 02:07:39.000000000 +0800
-+++ linux-2.4.20-rh-root/include/linux/fs.h    2003-06-09 23:18:07.000000000 +0800
-@@ -337,6 +337,8 @@ extern void set_bh_page(struct buffer_he
+--- linux-2.4.20/include/linux/fs.h~vfs_intent-2.4.20-rh       2003-07-17 08:34:44.000000000 -0700
++++ linux-2.4.20-mmonroe/include/linux/fs.h    2003-07-17 08:35:22.000000000 -0700
+@@ -337,6 +337,9 @@ extern void set_bh_page(struct buffer_he
  #define ATTR_MTIME_SET        256
  #define ATTR_FORCE    512     /* Not a change, but a change it */
  #define ATTR_ATTR_FLAG        1024
  #define ATTR_MTIME_SET        256
  #define ATTR_FORCE    512     /* Not a change, but a change it */
  #define ATTR_ATTR_FLAG        1024
-+#define ATTR_RAW      2048    /* file system, not vfs will massage attrs */
-+#define ATTR_FROM_OPEN        4096    /* called from open path, ie O_TRUNC */
++#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
++#define ATTR_CTIME_SET 0x2000
  
  /*
   * This is the Inode Attributes structure, used for notify_change().  It
  
  /*
   * This is the Inode Attributes structure, used for notify_change().  It
-@@ -574,6 +576,7 @@ struct file {
+@@ -574,6 +577,7 @@ struct file {
  
        /* needed for tty driver, and maybe others */
        void                    *private_data;
  
        /* needed for tty driver, and maybe others */
        void                    *private_data;
-+      struct lookup_intent    *f_intent;
++      struct lookup_intent    *f_it;
  
        /* preallocated helper kiobuf to speedup O_DIRECT */
        struct kiobuf           *f_iobuf;
  
        /* preallocated helper kiobuf to speedup O_DIRECT */
        struct kiobuf           *f_iobuf;
-@@ -701,6 +704,7 @@ struct nameidata {
+@@ -701,6 +705,7 @@ struct nameidata {
        struct qstr last;
        unsigned int flags;
        int last_type;
        struct qstr last;
        unsigned int flags;
        int last_type;
  };
  
  /*
  };
  
  /*
-@@ -821,7 +825,9 @@ extern int vfs_symlink(struct inode *, s
+@@ -821,7 +826,8 @@ extern int vfs_symlink(struct inode *, s
  extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
  extern int vfs_rmdir(struct inode *, struct dentry *);
  extern int vfs_unlink(struct inode *, struct dentry *);
 -extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
 +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
  extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
  extern int vfs_rmdir(struct inode *, struct dentry *);
  extern int vfs_unlink(struct inode *, struct dentry *);
 -extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
 +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-+              struct inode *new_dir, struct dentry *new_dentry,
-+              struct lookup_intent *it);
++             struct inode *new_dir, struct dentry *new_dentry);
  
  /*
   * File types
  
  /*
   * File types
-@@ -882,20 +888,33 @@ struct file_operations {
+@@ -881,21 +887,32 @@ struct file_operations {
  struct inode_operations {
        int (*create) (struct inode *,struct dentry *,int);
  struct inode_operations {
        int (*create) (struct inode *,struct dentry *,int);
++      int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *);
        struct dentry * (*lookup) (struct inode *,struct dentry *);
        struct dentry * (*lookup) (struct inode *,struct dentry *);
-+      struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
++      struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags);
        int (*link) (struct dentry *,struct inode *,struct dentry *);
        int (*link) (struct dentry *,struct inode *,struct dentry *);
-+      int (*link2) (struct inode *,struct inode *, const char *, int);
++      int (*link_raw) (struct nameidata *,struct nameidata *);
        int (*unlink) (struct inode *,struct dentry *);
        int (*unlink) (struct inode *,struct dentry *);
-+      int (*unlink2) (struct inode *, const char *, int);
++      int (*unlink_raw) (struct nameidata *);
        int (*symlink) (struct inode *,struct dentry *,const char *);
        int (*symlink) (struct inode *,struct dentry *,const char *);
-+      int (*symlink2) (struct inode *, const char *, int, const char *);
++      int (*symlink_raw) (struct nameidata *,const char *);
        int (*mkdir) (struct inode *,struct dentry *,int);
        int (*mkdir) (struct inode *,struct dentry *,int);
-+      int (*mkdir2) (struct inode *, const char *, int,int);
++      int (*mkdir_raw) (struct nameidata *,int);
        int (*rmdir) (struct inode *,struct dentry *);
        int (*rmdir) (struct inode *,struct dentry *);
-+      int (*rmdir2) (struct inode *, const char *, int);
++      int (*rmdir_raw) (struct nameidata *);
        int (*mknod) (struct inode *,struct dentry *,int,int);
        int (*mknod) (struct inode *,struct dentry *,int,int);
-+      int (*mknod2) (struct inode *, const char *, int,int,int);
++      int (*mknod_raw) (struct nameidata *,int,dev_t);
        int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
        int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
-+      int (*rename2) (struct inode *, struct inode *,
-+                      const char *oldname, int oldlen,
-+                      const char *newname, int newlen);
++      int (*rename_raw) (struct nameidata *, struct nameidata *);
        int (*readlink) (struct dentry *, char *,int);
        int (*follow_link) (struct dentry *, struct nameidata *);
        int (*readlink) (struct dentry *, char *,int);
        int (*follow_link) (struct dentry *, struct nameidata *);
-+      int (*follow_link2) (struct dentry *, struct nameidata *,
-+                           struct lookup_intent *it);
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int);
        int (*revalidate) (struct dentry *);
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int);
        int (*revalidate) (struct dentry *);
++      int (*revalidate_it) (struct dentry *, struct lookup_intent *);
        int (*setattr) (struct dentry *, struct iattr *);
        int (*setattr) (struct dentry *, struct iattr *);
-+      int (*setattr_raw) (struct inode *, struct iattr *);
++      int (*setattr_raw) (struct inode *, struct iattr *);
        int (*getattr) (struct dentry *, struct iattr *);
        int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
        ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
        int (*getattr) (struct dentry *, struct iattr *);
        int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
        ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
-@@ -1091,10 +1110,14 @@ static inline int get_lease(struct inode
+@@ -1091,10 +1108,14 @@ static inline int get_lease(struct inode
  
  asmlinkage long sys_open(const char *, int, int);
  asmlinkage long sys_close(unsigned int);      /* yes, it's really unsigned */
  
  asmlinkage long sys_open(const char *, int, int);
  asmlinkage long sys_close(unsigned int);      /* yes, it's really unsigned */
  extern int filp_close(struct file *, fl_owner_t id);
  extern char * getname(const char *);
  
  extern int filp_close(struct file *, fl_owner_t id);
  extern char * getname(const char *);
  
-@@ -1385,6 +1408,7 @@ typedef int (*read_actor_t)(read_descrip
+@@ -1385,6 +1406,7 @@ typedef int (*read_actor_t)(read_descrip
  extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
  
  extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
  extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
  
  extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_walk(const char *, struct nameidata *));
  extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_walk(const char *, struct nameidata *));
  extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
-@@ -1396,6 +1420,8 @@ extern struct dentry * lookup_one_len(co
+@@ -1396,6 +1418,8 @@ extern struct dentry * lookup_one_len(co
  extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
  #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
  #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
  extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
  #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
  #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
  
  extern void inode_init_once(struct inode *);
  extern void iput(struct inode *);
  
  extern void inode_init_once(struct inode *);
  extern void iput(struct inode *);
-@@ -1495,6 +1521,8 @@ extern struct file_operations generic_ro
+@@ -1497,6 +1521,8 @@ extern struct file_operations generic_ro
  
  extern int vfs_readlink(struct dentry *, char *, int, const char *);
  extern int vfs_follow_link(struct nameidata *, const char *);
  
  extern int vfs_readlink(struct dentry *, char *, int, const char *);
  extern int vfs_follow_link(struct nameidata *, const char *);
  extern int page_readlink(struct dentry *, char *, int);
  extern int page_follow_link(struct dentry *, struct nameidata *);
  extern struct inode_operations page_symlink_inode_operations;
  extern int page_readlink(struct dentry *, char *, int);
  extern int page_follow_link(struct dentry *, struct nameidata *);
  extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.20-rh/kernel/ksyms.c~vfs_intent-2.4.20-rh        2003-05-30 02:07:42.000000000 +0800
-+++ linux-2.4.20-rh-root/kernel/ksyms.c        2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/kernel/ksyms.c~vfs_intent-2.4.20-rh   2003-07-17 08:34:45.000000000 -0700
++++ linux-2.4.20-mmonroe/kernel/ksyms.c        2003-07-17 08:35:22.000000000 -0700
 @@ -298,6 +298,7 @@ EXPORT_SYMBOL(read_cache_page);
  EXPORT_SYMBOL(set_page_dirty);
  EXPORT_SYMBOL(vfs_readlink);
 @@ -298,6 +298,7 @@ EXPORT_SYMBOL(read_cache_page);
  EXPORT_SYMBOL(set_page_dirty);
  EXPORT_SYMBOL(vfs_readlink);
  EXPORT_SYMBOL(page_readlink);
  EXPORT_SYMBOL(page_follow_link);
  EXPORT_SYMBOL(page_symlink_inode_operations);
  EXPORT_SYMBOL(page_readlink);
  EXPORT_SYMBOL(page_follow_link);
  EXPORT_SYMBOL(page_symlink_inode_operations);
---- linux-2.4.20-rh/fs/exec.c~vfs_intent-2.4.20-rh     2003-04-13 10:07:02.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/exec.c     2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/exec.c~vfs_intent-2.4.20-rh        2003-07-17 08:33:09.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/exec.c     2003-07-17 08:35:22.000000000 -0700
 @@ -114,8 +114,9 @@ asmlinkage long sys_uselib(const char * 
        struct file * file;
        struct nameidata nd;
        int error;
 @@ -114,8 +114,9 @@ asmlinkage long sys_uselib(const char * 
        struct file * file;
        struct nameidata nd;
        int error;
--
++      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
 -      error = user_path_walk(library, &nd);
 -      error = user_path_walk(library, &nd);
-+              struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
-+                                                                                                                                             
-+        error = user_path_walk_it(library, &nd, &it);
++      error = user_path_walk_it(library, &nd, &it);
        if (error)
                goto out;
  
        if (error)
                goto out;
  
                goto exit;
  
 -      file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
                goto exit;
  
 -      file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
-+      file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);        
-+      intent_release(nd.dentry, &it);
++      file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++      intent_release(&it);
        error = PTR_ERR(file);
        if (IS_ERR(file))
                goto out;
        error = PTR_ERR(file);
        if (IS_ERR(file))
                goto out;
        struct inode *inode;
        struct file *file;
        int err = 0;
        struct inode *inode;
        struct file *file;
        int err = 0;
--
--      err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
 +      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
 +      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
-+                                                                                                                                             
+-      err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
 +      err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
        file = ERR_PTR(err);
        if (!err) {
                inode = nd.dentry->d_inode;
 +      err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
        file = ERR_PTR(err);
        if (!err) {
                inode = nd.dentry->d_inode;
-@@ -395,7 +398,7 @@ struct file *open_exec(const char *name)
+@@ -395,7 +398,8 @@ struct file *open_exec(const char *name)
                                err = -EACCES;
                        file = ERR_PTR(err);
                        if (!err) {
 -                              file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
                                err = -EACCES;
                        file = ERR_PTR(err);
                        if (!err) {
 -                              file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
-+                                file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++                              file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++                              intent_release(&it);
                                if (!IS_ERR(file)) {
                                        err = deny_write_access(file);
                                        if (err) {
                                if (!IS_ERR(file)) {
                                        err = deny_write_access(file);
                                        if (err) {
-@@ -404,6 +407,7 @@ struct file *open_exec(const char *name)
-                                       }
-                               }
- out:
-+                              intent_release(nd.dentry, &it);
+@@ -407,6 +411,7 @@ out:
                                return file;
                        }
                }
                                return file;
                        }
                }
-@@ -1283,7 +1287,7 @@ int do_coredump(long signr, int exit_cod
++              intent_release(&it);
+               path_release(&nd);
+       }
+       goto out;
+@@ -1283,7 +1288,7 @@ int do_coredump(long signr, int exit_cod
                goto close_fail;
        if (!file->f_op->write)
                goto close_fail;
                goto close_fail;
        if (!file->f_op->write)
                goto close_fail;
                goto close_fail;
  
        retval = binfmt->core_dump(signr, regs, file);
                goto close_fail;
  
        retval = binfmt->core_dump(signr, regs, file);
---- linux-2.4.20-rh/fs/proc/base.c~vfs_intent-2.4.20-rh        2003-06-09 23:16:51.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/proc/base.c        2003-06-09 23:18:52.000000000 +0800
+--- linux-2.4.20/fs/proc/base.c~vfs_intent-2.4.20-rh   2003-07-17 08:33:05.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/proc/base.c        2003-07-17 08:35:22.000000000 -0700
 @@ -464,6 +464,9 @@ static int proc_pid_follow_link(struct d
  
        error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
        nd->last_type = LAST_BIND;
 +
 @@ -464,6 +464,9 @@ static int proc_pid_follow_link(struct d
  
        error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
        nd->last_type = LAST_BIND;
 +
-+        if (nd->it != NULL)
-+                nd->it->it_int_flags |= IT_FL_FOLLOWED;
++      if (nd->it != NULL)
++              nd->it->it_int_flags |= IT_FL_FOLLOWED;
  out:
        return error;
  }
  out:
        return error;
  }
index 09bcb22..e522896 100644 (file)
@@ -1,17 +1,20 @@
- fs/dcache.c            |   20 ++
- fs/exec.c              |   15 +
- fs/namei.c             |  378 ++++++++++++++++++++++++++++++++++++++++++-------
- fs/nfsd/vfs.c          |    2 
- fs/open.c              |  126 ++++++++++++++--
- fs/proc/base.c         |    3 
- fs/stat.c              |   24 ++-
- include/linux/dcache.h |   31 ++++
- include/linux/fs.h     |   32 +++-
- kernel/ksyms.c         |    1 
- 10 files changed, 543 insertions(+), 89 deletions(-)
+ fs/dcache.c               |   19 ++
+ fs/exec.c                 |   15 +-
+ fs/namei.c                |  329 ++++++++++++++++++++++++++++++++++++++--------
+ fs/namespace.c            |   30 +++-
+ fs/open.c                 |  128 +++++++++++++++--
+ fs/proc/base.c            |    3 
+ fs/stat.c                 |   50 ++++--
+ include/linux/dcache.h    |   53 +++++++
+ include/linux/fs.h        |   29 +++-
+ include/linux/fs_struct.h |    4 
+ kernel/exit.c             |    3 
+ kernel/fork.c             |    3 
+ kernel/ksyms.c            |    1 
+ 13 files changed, 560 insertions(+), 107 deletions(-)
 
 
---- linux-2.4.20-l18/fs/exec.c~vfs_intent-2.4.20-vanilla       Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/exec.c    Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/fs/exec.c~vfs_intent-2.4.20-vanilla        2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/exec.c    2003-07-07 15:13:53.000000000 -0600
 @@ -107,8 +107,9 @@ asmlinkage long sys_uselib(const char * 
        struct file * file;
        struct nameidata nd;
 @@ -107,8 +107,9 @@ asmlinkage long sys_uselib(const char * 
        struct file * file;
        struct nameidata nd;
@@ -29,7 +32,7 @@
  
 -      file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
 +      file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
  
 -      file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
 +      file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
        error = PTR_ERR(file);
        if (IS_ERR(file))
                goto out;
        error = PTR_ERR(file);
        if (IS_ERR(file))
                goto out;
@@ -50,7 +53,7 @@
                        if (!err) {
 -                              file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
 +                              file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
                        if (!err) {
 -                              file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
 +                              file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
-+                                intent_release(nd.dentry, &it);
++                              intent_release(&it);
                                if (!IS_ERR(file)) {
                                        err = deny_write_access(file);
                                        if (err) {
                                if (!IS_ERR(file)) {
                                        err = deny_write_access(file);
                                        if (err) {
@@ -58,7 +61,7 @@
                                return file;
                        }
                }
                                return file;
                        }
                }
-+                intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        goto out;
                path_release(&nd);
        }
        goto out;
@@ -71,8 +74,8 @@
                goto close_fail;
  
        retval = binfmt->core_dump(signr, regs, file);
                goto close_fail;
  
        retval = binfmt->core_dump(signr, regs, file);
---- linux-2.4.20-l18/fs/dcache.c~vfs_intent-2.4.20-vanilla     Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/dcache.c  Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/fs/dcache.c~vfs_intent-2.4.20-vanilla      2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/dcache.c  2003-07-09 01:46:27.000000000 -0600
 @@ -181,6 +181,13 @@ int d_invalidate(struct dentry * dentry)
                spin_unlock(&dcache_lock);
                return 0;
 @@ -181,6 +181,13 @@ int d_invalidate(struct dentry * dentry)
                spin_unlock(&dcache_lock);
                return 0;
        /*
         * Check whether to do a partial shrink_dcache
         * to get rid of unused child entries.
        /*
         * Check whether to do a partial shrink_dcache
         * to get rid of unused child entries.
-@@ -616,6 +623,7 @@ struct dentry * d_alloc(struct dentry * 
-       dentry->d_op = NULL;
-       dentry->d_fsdata = NULL;
-       dentry->d_mounted = 0;
-+      dentry->d_it = NULL;
-       INIT_LIST_HEAD(&dentry->d_hash);
-       INIT_LIST_HEAD(&dentry->d_lru);
-       INIT_LIST_HEAD(&dentry->d_subdirs);
-@@ -830,13 +838,19 @@ void d_delete(struct dentry * dentry)
+@@ -830,13 +837,19 @@ void d_delete(struct dentry * dentry)
   * Adds a dentry to the hash according to its name.
   */
   
   * Adds a dentry to the hash according to its name.
   */
   
  }
  
  #define do_switch(x,y) do { \
  }
  
  #define do_switch(x,y) do { \
---- linux-2.4.20-l18/fs/namei.c~vfs_intent-2.4.20-vanilla      Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/namei.c   Sun Jun  1 23:41:35 2003
+--- linux-2.4.20-ad/fs/namespace.c~vfs_intent-2.4.20-vanilla   2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/namespace.c       2003-07-07 15:13:53.000000000 -0600
+@@ -99,6 +99,7 @@ static void detach_mnt(struct vfsmount *
+ {
+       old_nd->dentry = mnt->mnt_mountpoint;
+       old_nd->mnt = mnt->mnt_parent;
++      UNPIN(old_nd->dentry, old_nd->mnt, 1);
+       mnt->mnt_parent = mnt;
+       mnt->mnt_mountpoint = mnt->mnt_root;
+       list_del_init(&mnt->mnt_child);
+@@ -110,6 +111,7 @@ static void attach_mnt(struct vfsmount *
+ {
+       mnt->mnt_parent = mntget(nd->mnt);
+       mnt->mnt_mountpoint = dget(nd->dentry);
++      PIN(nd->dentry, nd->mnt, 1);
+       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+       nd->dentry->d_mounted++;
+@@ -485,14 +487,17 @@ static int do_loopback(struct nameidata 
+ {
+       struct nameidata old_nd;
+       struct vfsmount *mnt = NULL;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int err = mount_is_safe(nd);
+       if (err)
+               return err;
+       if (!old_name || !*old_name)
+               return -EINVAL;
+-      err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
+-      if (err)
++      err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
++      if (err) {
++              intent_release(&it);
+               return err;
++      }
+       down_write(&current->namespace->sem);
+       err = -EINVAL;
+@@ -515,6 +520,7 @@ static int do_loopback(struct nameidata 
+       }
+       up_write(&current->namespace->sem);
++      intent_release(&it);
+       path_release(&old_nd);
+       return err;
+ }
+@@ -698,7 +704,8 @@ long do_mount(char * dev_name, char * di
+                 unsigned long flags, void *data_page)
+ {
+       struct nameidata nd;
+-      int retval = 0;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
++      int retval = 0;
+       int mnt_flags = 0;
+       /* Discard magic */
+@@ -722,10 +729,11 @@ long do_mount(char * dev_name, char * di
+       flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
+       /* ... and get the mountpoint */
+-      retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
+-      if (retval)
++      retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
++      if (retval) {
++              intent_release(&it);
+               return retval;
+-
++      }
+       if (flags & MS_REMOUNT)
+               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+                                   data_page);
+@@ -736,6 +744,8 @@ long do_mount(char * dev_name, char * di
+       else
+               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+                                     dev_name, data_page);
++
++      intent_release(&it);
+       path_release(&nd);
+       return retval;
+ }
+@@ -901,6 +911,8 @@ asmlinkage long sys_pivot_root(const cha
+ {
+       struct vfsmount *tmp;
+       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++      struct lookup_intent new_it = { .it_op = IT_GETATTR };
++      struct lookup_intent old_it = { .it_op = IT_GETATTR };
+       int error;
+       if (!capable(CAP_SYS_ADMIN))
+@@ -908,14 +920,14 @@ asmlinkage long sys_pivot_root(const cha
+       lock_kernel();
+-      error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
++      error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
+       if (error)
+               goto out0;
+       error = -EINVAL;
+       if (!check_mnt(new_nd.mnt))
+               goto out1;
+-      error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
++      error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
+       if (error)
+               goto out1;
+@@ -970,8 +982,10 @@ out2:
+       up(&old_nd.dentry->d_inode->i_zombie);
+       up_write(&current->namespace->sem);
+       path_release(&user_nd);
++      intent_release(&old_it);
+       path_release(&old_nd);
+ out1:
++      intent_release(&new_it);
+       path_release(&new_nd);
+ out0:
+       unlock_kernel();
+--- linux-2.4.20-ad/fs/namei.c~vfs_intent-2.4.20-vanilla       2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/namei.c   2003-07-08 13:53:48.000000000 -0600
 @@ -94,6 +94,13 @@
   * XEmacs seems to be relying on it...
   */
  
 @@ -94,6 +94,13 @@
   * XEmacs seems to be relying on it...
   */
  
-+void intent_release(struct dentry *de, struct lookup_intent *it)
++void intent_release(struct lookup_intent *it)
 +{
 +{
-+      if (it && de->d_op && de->d_op->d_intent_release)
-+              de->d_op->d_intent_release(de, it);
++      if (it && it->it_op_release)
++              it->it_op_release(it);
 +
 +}
 +
 +
 +}
 +
  {
        struct dentry * dentry = d_lookup(parent, name);
  
  {
        struct dentry * dentry = d_lookup(parent, name);
  
-+      if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
-+              if (!dentry->d_op->d_revalidate2(dentry, flags, it) &&
++      if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
++              if (!dentry->d_op->d_revalidate_it(dentry, flags, it) &&
 +                  !d_invalidate(dentry)) {
 +                      dput(dentry);
 +                      dentry = NULL;
 +                  !d_invalidate(dentry)) {
 +                      dput(dentry);
 +                      dentry = NULL;
                result = ERR_PTR(-ENOMEM);
                if (dentry) {
                        lock_kernel();
                result = ERR_PTR(-ENOMEM);
                if (dentry) {
                        lock_kernel();
-+                      if (dir->i_op->lookup2)
-+                              result = dir->i_op->lookup2(dir, dentry, it);
++                      if (dir->i_op->lookup_it)
++                              result = dir->i_op->lookup_it(dir, dentry, it, flags);
 +                      else
                        result = dir->i_op->lookup(dir, dentry);
                        unlock_kernel();
 +                      else
                        result = dir->i_op->lookup(dir, dentry);
                        unlock_kernel();
                        dput(result);
                        result = ERR_PTR(-ENOENT);
                }
                        dput(result);
                        result = ERR_PTR(-ENOENT);
                }
-+      } else if (result->d_op && result->d_op->d_revalidate2) {
-+              if (!result->d_op->d_revalidate2(result, flags, it) &&
++      } else if (result->d_op && result->d_op->d_revalidate_it) {
++              if (!result->d_op->d_revalidate_it(result, flags, it) &&
 +                  !d_invalidate(result)) {
 +                      dput(result);
 +                      goto again;
 +                  !d_invalidate(result)) {
 +                      dput(result);
 +                      goto again;
  {
        int err;
        if (current->link_count >= 5)
  {
        int err;
        if (current->link_count >= 5)
-@@ -346,10 +375,21 @@ static inline int do_follow_link(struct 
+@@ -346,10 +375,18 @@ static inline int do_follow_link(struct 
        current->link_count++;
        current->total_link_count++;
        UPDATE_ATIME(dentry->d_inode);
 -      err = dentry->d_inode->i_op->follow_link(dentry, nd);
        current->link_count++;
        current->total_link_count++;
        UPDATE_ATIME(dentry->d_inode);
 -      err = dentry->d_inode->i_op->follow_link(dentry, nd);
-+        nd->it = it;
-+      if (dentry->d_inode->i_op->follow_link2)
-+              err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+        else
-+              err = dentry->d_inode->i_op->follow_link(dentry, nd);
-+        if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
-+                /* vfs_follow_link was never called */
-+              intent_release(dentry, it);
-+                path_release(nd);
-+                err = -ENOLINK;
-+        }
++      nd->it = it;
++      err = dentry->d_inode->i_op->follow_link(dentry, nd);
++      if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
++              /* vfs_follow_link was never called */
++              intent_release(it);
++              path_release(nd);
++              err = -ENOLINK;
++      }
        current->link_count--;
        return err;
  loop:
        current->link_count--;
        return err;
  loop:
-+      intent_release(dentry, it);
++      intent_release(it);
        path_release(nd);
        return -ELOOP;
  }
        path_release(nd);
        return -ELOOP;
  }
-@@ -379,15 +419,26 @@ int follow_up(struct vfsmount **mnt, str
+@@ -379,15 +416,26 @@ int follow_up(struct vfsmount **mnt, str
        return __follow_up(mnt, dentry);
  }
  
        return __follow_up(mnt, dentry);
  }
  
 +                      opc = it->it_op;
 +                      mode = it->it_mode;
 +              }
 +                      opc = it->it_op;
 +                      mode = it->it_mode;
 +              }
-+              intent_release(*dentry, it);
++              intent_release(it);
 +              if (it) {
 +                      it->it_op = opc;
 +                      it->it_mode = mode;
 +              if (it) {
 +                      it->it_op = opc;
 +                      it->it_mode = mode;
                dput(*dentry);
                mntput(mounted->mnt_parent);
                *dentry = dget(mounted->mnt_root);
                dput(*dentry);
                mntput(mounted->mnt_parent);
                *dentry = dget(mounted->mnt_root);
-@@ -399,7 +450,7 @@ static inline int __follow_down(struct v
+@@ -399,7 +447,7 @@ static inline int __follow_down(struct v
  
  int follow_down(struct vfsmount **mnt, struct dentry **dentry)
  {
  
  int follow_down(struct vfsmount **mnt, struct dentry **dentry)
  {
  }
   
  static inline void follow_dotdot(struct nameidata *nd)
  }
   
  static inline void follow_dotdot(struct nameidata *nd)
-@@ -435,7 +486,7 @@ static inline void follow_dotdot(struct 
+@@ -435,7 +483,7 @@ static inline void follow_dotdot(struct 
                mntput(nd->mnt);
                nd->mnt = parent;
        }
                mntput(nd->mnt);
                nd->mnt = parent;
        }
                ;
  }
  
                ;
  }
  
-@@ -447,7 +498,8 @@ static inline void follow_dotdot(struct 
+@@ -447,7 +495,8 @@ static inline void follow_dotdot(struct 
   *
   * We expect 'base' to be positive and a directory.
   */
   *
   * We expect 'base' to be positive and a directory.
   */
  {
        struct dentry *dentry;
        struct inode *inode;
  {
        struct dentry *dentry;
        struct inode *inode;
-@@ -520,15 +572,15 @@ int link_path_walk(const char * name, st
+@@ -520,15 +569,15 @@ int link_path_walk(const char * name, st
                                break;
                }
                /* This does the actual lookups.. */
                                break;
                }
                /* This does the actual lookups.. */
                        ;
  
                err = -ENOENT;
                        ;
  
                err = -ENOENT;
-@@ -539,8 +591,8 @@ int link_path_walk(const char * name, st
+@@ -539,8 +588,8 @@ int link_path_walk(const char * name, st
                if (!inode->i_op)
                        goto out_dput;
  
 -              if (inode->i_op->follow_link) {
 -                      err = do_follow_link(dentry, nd);
                if (!inode->i_op)
                        goto out_dput;
  
 -              if (inode->i_op->follow_link) {
 -                      err = do_follow_link(dentry, nd);
-+              if (inode->i_op->follow_link || inode->i_op->follow_link2) {
++              if (inode->i_op->follow_link) {
 +                      err = do_follow_link(dentry, nd, NULL);
                        dput(dentry);
                        if (err)
                                goto return_err;
 +                      err = do_follow_link(dentry, nd, NULL);
                        dput(dentry);
                        if (err)
                                goto return_err;
-@@ -556,7 +608,7 @@ int link_path_walk(const char * name, st
+@@ -556,7 +605,7 @@ int link_path_walk(const char * name, st
                        nd->dentry = dentry;
                }
                err = -ENOTDIR; 
 -              if (!inode->i_op->lookup)
                        nd->dentry = dentry;
                }
                err = -ENOTDIR; 
 -              if (!inode->i_op->lookup)
-+              if (!inode->i_op->lookup && !inode->i_op->lookup2)
++              if (!inode->i_op->lookup && !inode->i_op->lookup_it)
                        break;
                continue;
                /* here ends the main loop */
                        break;
                continue;
                /* here ends the main loop */
-@@ -583,19 +635,20 @@ last_component:
+@@ -583,19 +632,19 @@ last_component:
                        if (err < 0)
                                break;
                }
                        if (err < 0)
                                break;
                }
                        ;
                inode = dentry->d_inode;
                if ((lookup_flags & LOOKUP_FOLLOW)
                        ;
                inode = dentry->d_inode;
                if ((lookup_flags & LOOKUP_FOLLOW)
--                  && inode && inode->i_op && inode->i_op->follow_link) {
+                   && inode && inode->i_op && inode->i_op->follow_link) {
 -                      err = do_follow_link(dentry, nd);
 -                      err = do_follow_link(dentry, nd);
-+                  && inode && inode->i_op &&
-+                  (inode->i_op->follow_link || inode->i_op->follow_link2)) {
 +                      err = do_follow_link(dentry, nd, it);
                        dput(dentry);
                        if (err)
                                goto return_err;
 +                      err = do_follow_link(dentry, nd, it);
                        dput(dentry);
                        if (err)
                                goto return_err;
-@@ -609,7 +662,8 @@ last_component:
+@@ -609,7 +658,8 @@ last_component:
                        goto no_inode;
                if (lookup_flags & LOOKUP_DIRECTORY) {
                        err = -ENOTDIR; 
 -                      if (!inode->i_op || !inode->i_op->lookup)
 +                      if (!inode->i_op ||
                        goto no_inode;
                if (lookup_flags & LOOKUP_DIRECTORY) {
                        err = -ENOTDIR; 
 -                      if (!inode->i_op || !inode->i_op->lookup)
 +                      if (!inode->i_op ||
-+                          (!inode->i_op->lookup && !inode->i_op->lookup2))
++                          (!inode->i_op->lookup && !inode->i_op->lookup_it))
                                break;
                }
                goto return_base;
                                break;
                }
                goto return_base;
-@@ -633,6 +687,23 @@ return_reval:
+@@ -633,6 +683,23 @@ return_reval:
                 * Check the cached dentry for staleness.
                 */
                dentry = nd->dentry;
                 * Check the cached dentry for staleness.
                 */
                dentry = nd->dentry;
-+        revalidate_again:
-+              if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
++      revalidate_again:
++              if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
 +                      err = -ESTALE;
 +                      err = -ESTALE;
-+                      if (!dentry->d_op->d_revalidate2(dentry, 0, it)) {
-+                                struct dentry *new;
-+                                err = permission(dentry->d_parent->d_inode, 
-+                                                 MAY_EXEC);
-+                                if (err)
-+                                        break;
-+                                new = real_lookup(dentry->d_parent,
-+                                                  &dentry->d_name, 0, NULL);
++                      if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
++                              struct dentry *new;
++                              err = permission(dentry->d_parent->d_inode,
++                                               MAY_EXEC);
++                              if (err)
++                                      break;
++                              new = real_lookup(dentry->d_parent,
++                                                &dentry->d_name, 0, NULL);
 +                              d_invalidate(dentry);
 +                              d_invalidate(dentry);
-+                                dput(dentry);
-+                                dentry = new;
-+                                goto revalidate_again;
-+                        }
++                              dput(dentry);
++                              dentry = new;
++                              goto revalidate_again;
++                      }
 +              } else
                if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
                        err = -ESTALE;
                        if (!dentry->d_op->d_revalidate(dentry, 0)) {
 +              } else
                if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
                        err = -ESTALE;
                        if (!dentry->d_op->d_revalidate(dentry, 0)) {
-@@ -646,15 +717,28 @@ out_dput:
+@@ -646,15 +713,28 @@ out_dput:
                dput(dentry);
                break;
        }
 +      if (err)
                dput(dentry);
                break;
        }
 +      if (err)
-+              intent_release(nd->dentry, it);
++              intent_release(it);
        path_release(nd);
  return_err:
        return err;
        path_release(nd);
  return_err:
        return err;
  }
  
  /* SMP-safe */
  }
  
  /* SMP-safe */
-@@ -739,6 +823,17 @@ walk_init_root(const char *name, struct 
+@@ -739,6 +819,17 @@ walk_init_root(const char *name, struct 
  }
  
  /* SMP-safe */
  }
  
  /* SMP-safe */
  int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
  {
        int error = 0;
  int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
  {
        int error = 0;
-@@ -753,6 +848,7 @@ int path_init(const char *name, unsigned
+@@ -753,6 +844,7 @@ int path_init(const char *name, unsigned
  {
        nd->last_type = LAST_ROOT; /* if there are only slashes... */
        nd->flags = flags;
  {
        nd->last_type = LAST_ROOT; /* if there are only slashes... */
        nd->flags = flags;
-+        nd->it = NULL;
++      nd->it = NULL;
        if (*name=='/')
                return walk_init_root(name,nd);
        read_lock(&current->fs->lock);
        if (*name=='/')
                return walk_init_root(name,nd);
        read_lock(&current->fs->lock);
-@@ -767,7 +863,8 @@ int path_init(const char *name, unsigned
+@@ -767,7 +859,8 @@ int path_init(const char *name, unsigned
   * needs parent already locked. Doesn't follow mounts.
   * SMP-safe.
   */
   * needs parent already locked. Doesn't follow mounts.
   * SMP-safe.
   */
  {
        struct dentry * dentry;
        struct inode *inode;
  {
        struct dentry * dentry;
        struct inode *inode;
-@@ -790,13 +887,16 @@ struct dentry * lookup_hash(struct qstr 
+@@ -790,13 +883,16 @@ struct dentry * lookup_hash(struct qstr 
                        goto out;
        }
  
                        goto out;
        }
  
                if (!new)
                        goto out;
                lock_kernel();
                if (!new)
                        goto out;
                lock_kernel();
-+              if (inode->i_op->lookup2)
-+                      dentry = inode->i_op->lookup2(inode, new, it);
++              if (inode->i_op->lookup_it)
++                      dentry = inode->i_op->lookup_it(inode, new, it, 0);
 +              else
                dentry = inode->i_op->lookup(inode, new);
                unlock_kernel();
                if (!dentry)
 +              else
                dentry = inode->i_op->lookup(inode, new);
                unlock_kernel();
                if (!dentry)
-@@ -808,6 +908,12 @@ out:
+@@ -808,6 +904,12 @@ out:
        return dentry;
  }
  
        return dentry;
  }
  
  /* SMP-safe */
  struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
  {
  /* SMP-safe */
  struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
  {
-@@ -829,7 +935,7 @@ struct dentry * lookup_one_len(const cha
+@@ -829,7 +931,7 @@ struct dentry * lookup_one_len(const cha
        }
        this.hash = end_name_hash(hash);
  
        }
        this.hash = end_name_hash(hash);
  
  access:
        return ERR_PTR(-EACCES);
  }
  access:
        return ERR_PTR(-EACCES);
  }
-@@ -860,6 +966,23 @@ int __user_walk(const char *name, unsign
+@@ -860,6 +962,23 @@ int __user_walk(const char *name, unsign
        return err;
  }
  
        return err;
  }
  
  /*
   * It's inline, so penalty for filesystems that don't use sticky bit is
   * minimal.
  /*
   * It's inline, so penalty for filesystems that don't use sticky bit is
   * minimal.
-@@ -996,7 +1119,8 @@ exit_lock:
+@@ -955,7 +1074,8 @@ static inline int lookup_flags(unsigned 
+       return retval;
+ }
+-int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode,
++                       struct lookup_intent *it)
+ {
+       int error;
+@@ -968,12 +1088,15 @@ int vfs_create(struct inode *dir, struct
+               goto exit_lock;
+       error = -EACCES;        /* shouldn't it be ENOSYS? */
+-      if (!dir->i_op || !dir->i_op->create)
++      if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it))
+               goto exit_lock;
+       DQUOT_INIT(dir);
+       lock_kernel();
+-      error = dir->i_op->create(dir, dentry, mode);
++      if (dir->i_op->create_it)
++              error = dir->i_op->create_it(dir, dentry, mode, it);
++      else
++              error = dir->i_op->create(dir, dentry, mode);
+       unlock_kernel();
+ exit_lock:
+       up(&dir->i_zombie);
+@@ -982,6 +1105,11 @@ exit_lock:
+       return error;
+ }
++int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++{
++      return vfs_create_it(dir, dentry, mode, NULL);
++}
++
+ /*
+  *    open_namei()
+  *
+@@ -996,7 +1124,8 @@ exit_lock:
   * for symlinks (where the permissions are checked later).
   * SMP-safe
   */
   * for symlinks (where the permissions are checked later).
   * SMP-safe
   */
  {
        int acc_mode, error = 0;
        struct inode *inode;
  {
        int acc_mode, error = 0;
        struct inode *inode;
-@@ -1010,7 +1134,7 @@ int open_namei(const char * pathname, in
+@@ -1010,7 +1139,7 @@ int open_namei(const char * pathname, in
         * The simplest case - just a plain lookup.
         */
        if (!(flag & O_CREAT)) {
         * The simplest case - just a plain lookup.
         */
        if (!(flag & O_CREAT)) {
                if (error)
                        return error;
                dentry = nd->dentry;
                if (error)
                        return error;
                dentry = nd->dentry;
-@@ -1020,6 +1144,10 @@ int open_namei(const char * pathname, in
+@@ -1020,6 +1149,10 @@ int open_namei(const char * pathname, in
        /*
         * Create - we need to know the parent.
         */
        /*
         * Create - we need to know the parent.
         */
        error = path_lookup(pathname, LOOKUP_PARENT, nd);
        if (error)
                return error;
        error = path_lookup(pathname, LOOKUP_PARENT, nd);
        if (error)
                return error;
-@@ -1035,7 +1163,7 @@ int open_namei(const char * pathname, in
+@@ -1035,7 +1168,7 @@ int open_namei(const char * pathname, in
  
        dir = nd->dentry;
        down(&dir->d_inode->i_sem);
  
        dir = nd->dentry;
        down(&dir->d_inode->i_sem);
  
  do_last:
        error = PTR_ERR(dentry);
  
  do_last:
        error = PTR_ERR(dentry);
-@@ -1044,6 +1172,7 @@ do_last:
+@@ -1044,10 +1177,11 @@ do_last:
                goto exit;
        }
  
 +      it->it_mode = mode;
        /* Negative dentry, just create the file */
        if (!dentry->d_inode) {
                goto exit;
        }
  
 +      it->it_mode = mode;
        /* Negative dentry, just create the file */
        if (!dentry->d_inode) {
-               error = vfs_create(dir->d_inode, dentry,
-@@ -1072,12 +1201,13 @@ do_last:
+-              error = vfs_create(dir->d_inode, dentry,
+-                                 mode & ~current->fs->umask);
++              error = vfs_create_it(dir->d_inode, dentry,
++                                 mode & ~current->fs->umask, it);
+               up(&dir->d_inode->i_sem);
+               dput(nd->dentry);
+               nd->dentry = dentry;
+@@ -1072,7 +1206,7 @@ do_last:
                error = -ELOOP;
                if (flag & O_NOFOLLOW)
                        goto exit_dput;
                error = -ELOOP;
                if (flag & O_NOFOLLOW)
                        goto exit_dput;
        }
        error = -ENOENT;
        if (!dentry->d_inode)
        }
        error = -ENOENT;
        if (!dentry->d_inode)
-               goto exit_dput;
--      if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
-+      if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link ||
-+                                    dentry->d_inode->i_op->follow_link2))
-               goto do_link;
-       dput(nd->dentry);
-@@ -1151,7 +1281,7 @@ ok:
+@@ -1151,7 +1285,7 @@ ok:
                if (!error) {
                        DQUOT_INIT(inode);
                        
                if (!error) {
                        DQUOT_INIT(inode);
                        
                }
                put_write_access(inode);
                if (error)
                }
                put_write_access(inode);
                if (error)
-@@ -1163,8 +1293,10 @@ ok:
+@@ -1163,8 +1297,10 @@ ok:
        return 0;
  
  exit_dput:
        return 0;
  
  exit_dput:
-+      intent_release(dentry, it);
++      intent_release(it);
        dput(dentry);
  exit:
        dput(dentry);
  exit:
-+      intent_release(nd->dentry, it);
++      intent_release(it);
        path_release(nd);
        return error;
  
        path_release(nd);
        return error;
  
-@@ -1183,7 +1315,19 @@ do_link:
+@@ -1183,7 +1319,16 @@ do_link:
         * are done. Procfs-like symlinks just set LAST_BIND.
         */
        UPDATE_ATIME(dentry->d_inode);
 -      error = dentry->d_inode->i_op->follow_link(dentry, nd);
         * are done. Procfs-like symlinks just set LAST_BIND.
         */
        UPDATE_ATIME(dentry->d_inode);
 -      error = dentry->d_inode->i_op->follow_link(dentry, nd);
-+        nd->it = it;
-+      if (dentry->d_inode->i_op->follow_link2)
-+              error = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+      else
-+              error = dentry->d_inode->i_op->follow_link(dentry, nd);
++      nd->it = it;
++      error = dentry->d_inode->i_op->follow_link(dentry, nd);
 +      if (error) {
 +      if (error) {
-+              intent_release(dentry, it);
-+        } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
-+                /* vfs_follow_link was never called */
-+              intent_release(dentry, it);
-+                path_release(nd);
-+                error = -ENOLINK;
-+        }
++              intent_release(it);
++      } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
++              /* vfs_follow_link was never called */
++              intent_release(it);
++              path_release(nd);
++              error = -ENOLINK;
++      }
        dput(dentry);
        if (error)
                return error;
        dput(dentry);
        if (error)
                return error;
-@@ -1205,13 +1349,20 @@ do_link:
+@@ -1205,13 +1350,20 @@ do_link:
        }
        dir = nd->dentry;
        down(&dir->d_inode->i_sem);
        }
        dir = nd->dentry;
        down(&dir->d_inode->i_sem);
  {
        struct dentry *dentry;
  
  {
        struct dentry *dentry;
  
-@@ -1219,7 +1370,7 @@ static struct dentry *lookup_create(stru
+@@ -1219,7 +1371,7 @@ static struct dentry *lookup_create(stru
        dentry = ERR_PTR(-EEXIST);
        if (nd->last_type != LAST_NORM)
                goto fail;
        dentry = ERR_PTR(-EEXIST);
        if (nd->last_type != LAST_NORM)
                goto fail;
        if (IS_ERR(dentry))
                goto fail;
        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
        if (IS_ERR(dentry))
                goto fail;
        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1275,7 +1426,19 @@ asmlinkage long sys_mknod(const char * f
+@@ -1275,7 +1427,16 @@ asmlinkage long sys_mknod(const char * f
        error = path_lookup(tmp, LOOKUP_PARENT, &nd);
        if (error)
                goto out;
 -      dentry = lookup_create(&nd, 0);
 +
        error = path_lookup(tmp, LOOKUP_PARENT, &nd);
        if (error)
                goto out;
 -      dentry = lookup_create(&nd, 0);
 +
-+      if (nd.dentry->d_inode->i_op->mknod2) {
++      if (nd.dentry->d_inode->i_op->mknod_raw) {
 +              struct inode_operations *op = nd.dentry->d_inode->i_op;
 +              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->mknod2(nd.dentry->d_inode,
-+                                 nd.last.name,
-+                                 nd.last.len,
-+                                 mode, dev);
++              error = op->mknod_raw(&nd, mode, dev);
 +              /* the file system wants to use normal vfs path now */
 +              if (error != -EOPNOTSUPP)
 +                      goto out2;
 +              /* the file system wants to use normal vfs path now */
 +              if (error != -EOPNOTSUPP)
 +                      goto out2;
        error = PTR_ERR(dentry);
  
        mode &= ~current->fs->umask;
        error = PTR_ERR(dentry);
  
        mode &= ~current->fs->umask;
-@@ -1296,6 +1459,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1296,6 +1457,7 @@ asmlinkage long sys_mknod(const char * f
                dput(dentry);
        }
        up(&nd.dentry->d_inode->i_sem);
                dput(dentry);
        }
        up(&nd.dentry->d_inode->i_sem);
        path_release(&nd);
  out:
        putname(tmp);
        path_release(&nd);
  out:
        putname(tmp);
-@@ -1343,7 +1507,17 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1343,7 +1505,14 @@ asmlinkage long sys_mkdir(const char * p
                error = path_lookup(tmp, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 1);
                error = path_lookup(tmp, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 1);
-+              if (nd.dentry->d_inode->i_op->mkdir2) {
++              if (nd.dentry->d_inode->i_op->mkdir_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->mkdir2(nd.dentry->d_inode,
-+                                         nd.last.name,
-+                                         nd.last.len,
-+                                         mode);
++                      error = op->mkdir_raw(&nd, mode);
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out2;
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out2;
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
                        error = vfs_mkdir(nd.dentry->d_inode, dentry,
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
                        error = vfs_mkdir(nd.dentry->d_inode, dentry,
-@@ -1351,6 +1525,7 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1351,6 +1520,7 @@ asmlinkage long sys_mkdir(const char * p
                        dput(dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
                        dput(dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
                path_release(&nd);
  out:
                putname(tmp);
                path_release(&nd);
  out:
                putname(tmp);
-@@ -1451,8 +1626,33 @@ asmlinkage long sys_rmdir(const char * p
+@@ -1451,8 +1621,16 @@ asmlinkage long sys_rmdir(const char * p
                        error = -EBUSY;
                        goto exit1;
        }
                        error = -EBUSY;
                        goto exit1;
        }
-+      if (nd.dentry->d_inode->i_op->rmdir2) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              struct dentry *last;
-+
-+              down(&nd.dentry->d_inode->i_sem);
-+              last = lookup_hash_it(&nd.last, nd.dentry, NULL);
-+              up(&nd.dentry->d_inode->i_sem);
-+              if (IS_ERR(last)) {
-+                      error = PTR_ERR(last);
-+                      goto exit1;
-+              }
-+              if (d_mountpoint(last)) {
-+                      dput(last);
-+                      error = -EBUSY;
-+                      goto exit1;
-+              }
-+              dput(last);
++      if (nd.dentry->d_inode->i_op->rmdir_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
 +
 +
-+              error = op->rmdir2(nd.dentry->d_inode,
-+                                 nd.last.name,
-+                                 nd.last.len);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++              error = op->rmdir_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
 +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                error = vfs_rmdir(nd.dentry->d_inode, dentry);
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
 +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                error = vfs_rmdir(nd.dentry->d_inode, dentry);
-@@ -1510,8 +1710,17 @@ asmlinkage long sys_unlink(const char * 
+@@ -1510,8 +1688,15 @@ asmlinkage long sys_unlink(const char * 
        error = -EISDIR;
        if (nd.last_type != LAST_NORM)
                goto exit1;
        error = -EISDIR;
        if (nd.last_type != LAST_NORM)
                goto exit1;
-+      if (nd.dentry->d_inode->i_op->unlink2) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->unlink2(nd.dentry->d_inode,
-+                                  nd.last.name,
-+                                  nd.last.len);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++      if (nd.dentry->d_inode->i_op->unlink_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->unlink_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
 +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                /* Why not before? Because we want correct error value */
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
 +      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                /* Why not before? Because we want correct error value */
-@@ -1578,15 +1787,26 @@ asmlinkage long sys_symlink(const char *
+@@ -1578,15 +1763,23 @@ asmlinkage long sys_symlink(const char *
                error = path_lookup(to, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 0);
                error = path_lookup(to, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 0);
-+              if (nd.dentry->d_inode->i_op->symlink2) {
++              if (nd.dentry->d_inode->i_op->symlink_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->symlink2(nd.dentry->d_inode,
-+                                           nd.last.name,
-+                                           nd.last.len,
-+                                           from);
++                      error = op->symlink_raw(&nd, from);
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out2;
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out2;
                putname(to);
        }
        putname(from);
                putname(to);
        }
        putname(from);
-@@ -1662,7 +1882,17 @@ asmlinkage long sys_link(const char * ol
+@@ -1662,7 +1855,14 @@ asmlinkage long sys_link(const char * ol
                error = -EXDEV;
                if (old_nd.mnt != nd.mnt)
                        goto out_release;
 -              new_dentry = lookup_create(&nd, 0);
                error = -EXDEV;
                if (old_nd.mnt != nd.mnt)
                        goto out_release;
 -              new_dentry = lookup_create(&nd, 0);
-+              if (nd.dentry->d_inode->i_op->link2) {
++              if (nd.dentry->d_inode->i_op->link_raw) {
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
 +                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->link2(old_nd.dentry->d_inode,
-+                                        nd.dentry->d_inode,
-+                                        nd.last.name,
-+                                        nd.last.len);
++                      error = op->link_raw(&old_nd, &nd);
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out_release;
 +                      /* the file system wants to use normal vfs path now */
 +                      if (error != -EOPNOTSUPP)
 +                              goto out_release;
                error = PTR_ERR(new_dentry);
                if (!IS_ERR(new_dentry)) {
                        error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
                error = PTR_ERR(new_dentry);
                if (!IS_ERR(new_dentry)) {
                        error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
-@@ -1706,7 +1936,8 @@ exit:
+@@ -1706,7 +1906,7 @@ exit:
   *       locking].
   */
  int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 -             struct inode *new_dir, struct dentry *new_dentry)
   *       locking].
   */
  int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 -             struct inode *new_dir, struct dentry *new_dentry)
-+                 struct inode *new_dir, struct dentry *new_dentry,
-+                 struct lookup_intent *it)
++                 struct inode *new_dir, struct dentry *new_dentry)
  {
        int error;
        struct inode *target;
  {
        int error;
        struct inode *target;
-@@ -1764,6 +1995,7 @@ int vfs_rename_dir(struct inode *old_dir
-               error = -EBUSY;
-       else 
-               error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+      intent_release(new_dentry, it);
-       if (target) {
-               if (!error)
-                       target->i_flags |= S_DEAD;
-@@ -1785,7 +2017,8 @@ out_unlock:
+@@ -1785,7 +1985,7 @@ out_unlock:
  }
  
  int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 -             struct inode *new_dir, struct dentry *new_dentry)
  }
  
  int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 -             struct inode *new_dir, struct dentry *new_dentry)
-+                   struct inode *new_dir, struct dentry *new_dentry,
-+                   struct lookup_intent *it)
++                   struct inode *new_dir, struct dentry *new_dentry)
  {
        int error;
  
  {
        int error;
  
-@@ -1816,6 +2049,7 @@ int vfs_rename_other(struct inode *old_d
-               error = -EBUSY;
-       else
-               error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+      intent_release(new_dentry, it);
-       double_up(&old_dir->i_zombie, &new_dir->i_zombie);
-       if (error)
-               return error;
-@@ -1827,13 +2061,14 @@ int vfs_rename_other(struct inode *old_d
- }
- int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
--             struct inode *new_dir, struct dentry *new_dentry)
-+             struct inode *new_dir, struct dentry *new_dentry,
-+             struct lookup_intent *it)
- {
-       int error;
-       if (S_ISDIR(old_dentry->d_inode->i_mode))
--              error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
-+              error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it);
-       else
--              error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
-+              error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it);
-       if (!error) {
-               if (old_dir == new_dir)
-                       inode_dir_notify(old_dir, DN_RENAME);
-@@ -1875,7 +2110,7 @@ static inline int do_rename(const char *
+@@ -1873,9 +2073,18 @@ static inline int do_rename(const char *
+       if (newnd.last_type != LAST_NORM)
+               goto exit2;
  
  
++      if (old_dir->d_inode->i_op->rename_raw) {
++              lock_kernel();
++              error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++              unlock_kernel();
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit2;
++      }
++
        double_lock(new_dir, old_dir);
  
 -      old_dentry = lookup_hash(&oldnd.last, old_dir);
        double_lock(new_dir, old_dir);
  
 -      old_dentry = lookup_hash(&oldnd.last, old_dir);
        error = PTR_ERR(old_dentry);
        if (IS_ERR(old_dentry))
                goto exit3;
        error = PTR_ERR(old_dentry);
        if (IS_ERR(old_dentry))
                goto exit3;
-@@ -1891,16 +2126,37 @@ static inline int do_rename(const char *
+@@ -1891,16 +2100,16 @@ static inline int do_rename(const char *
                if (newnd.last.name[newnd.last.len])
                        goto exit4;
        }
                if (newnd.last.name[newnd.last.len])
                        goto exit4;
        }
        if (IS_ERR(new_dentry))
                goto exit4;
  
        if (IS_ERR(new_dentry))
                goto exit4;
  
-+      if (old_dir->d_inode->i_op->rename2) {
-+              lock_kernel();
-+              /* don't rename mount point. mds will take care of
-+               * the rest sanity checking */
-+              if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) {
-+                      error = -EBUSY;
-+                      goto exit5;
-+              }
-+
-+              error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
-+                                                      new_dir->d_inode,
-+                                                      oldnd.last.name,
-+                                                      oldnd.last.len,
-+                                                      newnd.last.name,
-+                                                      newnd.last.len);
-+              unlock_kernel();
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit5;
-+      }
 +
        lock_kernel();
        error = vfs_rename(old_dir->d_inode, old_dentry,
 +
        lock_kernel();
        error = vfs_rename(old_dir->d_inode, old_dentry,
--                                 new_dir->d_inode, new_dentry);
-+                                 new_dir->d_inode, new_dentry, NULL);
+                                  new_dir->d_inode, new_dentry);
        unlock_kernel();
 -
        unlock_kernel();
 -
-+exit5:
        dput(new_dentry);
  exit4:
        dput(old_dentry);
        dput(new_dentry);
  exit4:
        dput(old_dentry);
-@@ -1951,20 +2207,28 @@ out:
+@@ -1951,20 +2160,28 @@ out:
  }
  
  static inline int
  }
  
  static inline int
        if (IS_ERR(link))
                goto fail;
  
        if (IS_ERR(link))
                goto fail;
  
-+        if (it == NULL)
-+                it = nd->it;
-+        else if (it != nd->it)
-+                printk("it != nd->it: tell phil@clusterfs.com\n");
-+        if (it != NULL)
-+                it->it_int_flags |= IT_FL_FOLLOWED;
++      if (it == NULL)
++              it = nd->it;
++      else if (it != nd->it)
++              printk("it != nd->it: tell phil@clusterfs.com\n");
++      if (it != NULL)
++              it->it_int_flags |= IT_FL_FOLLOWED;
 +
        if (*link == '/') {
                path_release(nd);
 +
        if (*link == '/') {
                path_release(nd);
  out:
        if (current->link_count || res || nd->last_type!=LAST_NORM)
                return res;
  out:
        if (current->link_count || res || nd->last_type!=LAST_NORM)
                return res;
-@@ -1986,7 +2250,13 @@ fail:
+@@ -1986,7 +2203,13 @@ fail:
  
  int vfs_follow_link(struct nameidata *nd, const char *link)
  {
  
  int vfs_follow_link(struct nameidata *nd, const char *link)
  {
  }
  
  /* get the link contents into pagecache */
  }
  
  /* get the link contents into pagecache */
-@@ -2028,7 +2298,7 @@ int page_follow_link(struct dentry *dent
+@@ -2028,7 +2251,7 @@ int page_follow_link(struct dentry *dent
  {
        struct page *page = NULL;
        char *s = page_getlink(dentry, &page);
  {
        struct page *page = NULL;
        char *s = page_getlink(dentry, &page);
        if (page) {
                kunmap(page);
                page_cache_release(page);
        if (page) {
                kunmap(page);
                page_cache_release(page);
---- linux-2.4.20-l18/fs/nfsd/vfs.c~vfs_intent-2.4.20-vanilla   Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/nfsd/vfs.c        Wed May 28 01:39:18 2003
-@@ -1291,7 +1291,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
-                       err = nfserr_perm;
-       } else
- #endif
--      err = vfs_rename(fdir, odentry, tdir, ndentry);
-+      err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
-       if (!err && EX_ISSYNC(tfhp->fh_export)) {
-               nfsd_sync_dir(tdentry);
-               nfsd_sync_dir(fdentry);
---- linux-2.4.20-l18/fs/open.c~vfs_intent-2.4.20-vanilla       Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/open.c    Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/fs/open.c~vfs_intent-2.4.20-vanilla        2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/open.c    2003-07-08 13:51:14.000000000 -0600
 @@ -19,6 +19,8 @@
  #include <asm/uaccess.h>
  
 @@ -19,6 +19,8 @@
  #include <asm/uaccess.h>
  
        int error;
        struct iattr newattrs;
  
        int error;
        struct iattr newattrs;
  
-@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l
+@@ -108,7 +111,13 @@ int do_truncate(struct dentry *dentry, l
        down(&inode->i_sem);
        newattrs.ia_size = length;
        newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
        down(&inode->i_sem);
        newattrs.ia_size = length;
        newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
 +              newattrs.ia_valid |= ATTR_FROM_OPEN;
 +      if (op->setattr_raw) {
 +              newattrs.ia_valid |= ATTR_RAW;
 +              newattrs.ia_valid |= ATTR_FROM_OPEN;
 +      if (op->setattr_raw) {
 +              newattrs.ia_valid |= ATTR_RAW;
-+              newattrs.ia_ctime = CURRENT_TIME;
 +              error = op->setattr_raw(inode, &newattrs);
 +              error = op->setattr_raw(inode, &newattrs);
-+      } else 
++      } else
 +              error = notify_change(dentry, &newattrs);
        up(&inode->i_sem);
        return error;
 +              error = notify_change(dentry, &newattrs);
        up(&inode->i_sem);
        return error;
        if (!error) {
                DQUOT_INIT(inode);
 -              error = do_truncate(nd.dentry, length);
        if (!error) {
                DQUOT_INIT(inode);
 -              error = do_truncate(nd.dentry, length);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
 +              error = do_truncate(nd.dentry, length, 0);
        }
        put_write_access(inode);
  
  dput_and_out:
 +              error = do_truncate(nd.dentry, length, 0);
        }
        put_write_access(inode);
  
  dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
        path_release(&nd);
  out:
        return error;
        path_release(&nd);
  out:
        return error;
        error = -EROFS;
        if (IS_RDONLY(inode))
                goto dput_and_out;
        error = -EROFS;
        if (IS_RDONLY(inode))
                goto dput_and_out;
-@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam
+@@ -279,11 +294,25 @@ asmlinkage long sys_utime(char * filenam
                        goto dput_and_out;
  
                newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
                        goto dput_and_out;
  
                newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
 +                      goto dput_and_out;
 +      }
 +
 +                      goto dput_and_out;
 +      }
 +
-+      error = -EROFS;
-+      if (IS_RDONLY(inode))
-+              goto dput_and_out;
-+
 +      error = -EPERM;
 +      if (!times) {
                if (current->fsuid != inode->i_uid &&
 +      error = -EPERM;
 +      if (!times) {
                if (current->fsuid != inode->i_uid &&
                if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
                   && !special_file(nd.dentry->d_inode->i_mode))
                        res = -EROFS;
                if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
                   && !special_file(nd.dentry->d_inode->i_mode))
                        res = -EROFS;
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
  
                path_release(&nd);
        }
  
        set_fs_pwd(current->fs, nd.mnt, nd.dentry);
  
  dput_and_out:
        set_fs_pwd(current->fs, nd.mnt, nd.dentry);
  
  dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
        path_release(&nd);
  out:
        return error;
        path_release(&nd);
  out:
        return error;
        set_fs_altroot();
        error = 0;
  dput_and_out:
        set_fs_altroot();
        error = 0;
  dput_and_out:
-+      intent_release(nd.dentry, &it);
++      intent_release(&it);
        path_release(&nd);
  out:
        return error;
        path_release(&nd);
  out:
        return error;
 +
 +              newattrs.ia_uid = user;
 +              newattrs.ia_gid = group;
 +
 +              newattrs.ia_uid = user;
 +              newattrs.ia_gid = group;
-+              newattrs.ia_valid = ATTR_UID | ATTR_GID;
++              newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
 +              newattrs.ia_valid |= ATTR_RAW;
 +              error = op->setattr_raw(inode, &newattrs);
 +              /* the file system wants to use normal vfs path now */
 +              newattrs.ia_valid |= ATTR_RAW;
 +              error = op->setattr_raw(inode, &newattrs);
 +              /* the file system wants to use normal vfs path now */
  {
        struct file * f;
        struct inode *inode;
  {
        struct file * f;
        struct inode *inode;
-@@ -699,6 +782,7 @@ struct file *dentry_open(struct dentry *
+@@ -693,12 +776,15 @@ struct file *dentry_open(struct dentry *
+       }
+       if (f->f_op && f->f_op->open) {
++              f->f_it = it;
+               error = f->f_op->open(inode,f);
++              f->f_it = NULL;
+               if (error)
+                       goto cleanup_all;
        }
        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
  
        }
        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
  
-+      intent_release(dentry, it);
++      intent_release(it);
        return f;
  
  cleanup_all:
        return f;
  
  cleanup_all:
-@@ -713,11 +797,17 @@ cleanup_all:
+@@ -713,11 +799,17 @@ cleanup_all:
  cleanup_file:
        put_filp(f);
  cleanup_dentry:
  cleanup_file:
        put_filp(f);
  cleanup_dentry:
-+      intent_release(dentry, it);
++      intent_release(it);
        dput(dentry);
        mntput(mnt);
        return ERR_PTR(error);
        dput(dentry);
        mntput(mnt);
        return ERR_PTR(error);
  /*
   * Find an empty file descriptor entry, and mark it busy.
   */
  /*
   * Find an empty file descriptor entry, and mark it busy.
   */
---- linux-2.4.20-l18/fs/stat.c~vfs_intent-2.4.20-vanilla       Thu Sep 13 19:04:43 2001
-+++ linux-2.4.20-l18-phil/fs/stat.c    Wed May 28 01:39:18 2003
-@@ -135,13 +135,15 @@ static int cp_new_stat(struct inode * in
+--- linux-2.4.20-ad/fs/stat.c~vfs_intent-2.4.20-vanilla        2001-09-13 17:04:43.000000000 -0600
++++ linux-2.4.20-ad-braam/fs/stat.c    2003-07-07 15:13:53.000000000 -0600
+@@ -17,10 +17,12 @@
+  * Revalidate the inode. This is required for proper NFS attribute caching.
+  */
+ static __inline__ int
+-do_revalidate(struct dentry *dentry)
++do_revalidate(struct dentry *dentry, struct lookup_intent *it)
+ {
+       struct inode * inode = dentry->d_inode;
+-      if (inode->i_op && inode->i_op->revalidate)
++      if (inode->i_op && inode->i_op->revalidate_it)
++              return inode->i_op->revalidate_it(dentry, it);
++      else if (inode->i_op && inode->i_op->revalidate)
+               return inode->i_op->revalidate(dentry);
+       return 0;
+ }
+@@ -135,13 +137,15 @@ static int cp_new_stat(struct inode * in
  asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf)
  {
        struct nameidata nd;
  asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf)
  {
        struct nameidata nd;
 -      error = user_path_walk(filename, &nd);
 +      error = user_path_walk_it(filename, &nd, &it);
        if (!error) {
 -      error = user_path_walk(filename, &nd);
 +      error = user_path_walk_it(filename, &nd, &it);
        if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                if (!error)
                        error = cp_old_stat(nd.dentry->d_inode, statbuf);
                if (!error)
                        error = cp_old_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
                path_release(&nd);
        }
        return error;
-@@ -151,13 +153,15 @@ asmlinkage long sys_stat(char * filename
+@@ -151,13 +155,15 @@ asmlinkage long sys_stat(char * filename
  asmlinkage long sys_newstat(char * filename, struct stat * statbuf)
  {
        struct nameidata nd;
  asmlinkage long sys_newstat(char * filename, struct stat * statbuf)
  {
        struct nameidata nd;
 -      error = user_path_walk(filename, &nd);
 +      error = user_path_walk_it(filename, &nd, &it);
        if (!error) {
 -      error = user_path_walk(filename, &nd);
 +      error = user_path_walk_it(filename, &nd, &it);
        if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                if (!error)
                        error = cp_new_stat(nd.dentry->d_inode, statbuf);
                if (!error)
                        error = cp_new_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
                path_release(&nd);
        }
        return error;
-@@ -172,13 +176,15 @@ asmlinkage long sys_newstat(char * filen
+@@ -172,13 +178,15 @@ asmlinkage long sys_newstat(char * filen
  asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf)
  {
        struct nameidata nd;
  asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf)
  {
        struct nameidata nd;
 -      error = user_path_walk_link(filename, &nd);
 +      error = user_path_walk_link_it(filename, &nd, &it);
        if (!error) {
 -      error = user_path_walk_link(filename, &nd);
 +      error = user_path_walk_link_it(filename, &nd, &it);
        if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                if (!error)
                        error = cp_old_stat(nd.dentry->d_inode, statbuf);
                if (!error)
                        error = cp_old_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
                path_release(&nd);
        }
        return error;
-@@ -189,13 +195,15 @@ asmlinkage long sys_lstat(char * filenam
+@@ -189,13 +197,15 @@ asmlinkage long sys_lstat(char * filenam
  asmlinkage long sys_newlstat(char * filename, struct stat * statbuf)
  {
        struct nameidata nd;
  asmlinkage long sys_newlstat(char * filename, struct stat * statbuf)
  {
        struct nameidata nd;
 -      error = user_path_walk_link(filename, &nd);
 +      error = user_path_walk_link_it(filename, &nd, &it);
        if (!error) {
 -      error = user_path_walk_link(filename, &nd);
 +      error = user_path_walk_link_it(filename, &nd, &it);
        if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                if (!error)
                        error = cp_new_stat(nd.dentry->d_inode, statbuf);
                if (!error)
                        error = cp_new_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
                path_release(&nd);
        }
        return error;
-@@ -333,12 +341,14 @@ asmlinkage long sys_stat64(char * filena
+@@ -216,7 +226,7 @@ asmlinkage long sys_fstat(unsigned int f
+       if (f) {
+               struct dentry * dentry = f->f_dentry;
+-              err = do_revalidate(dentry);
++              err = do_revalidate(dentry, NULL);
+               if (!err)
+                       err = cp_old_stat(dentry->d_inode, statbuf);
+               fput(f);
+@@ -235,7 +245,7 @@ asmlinkage long sys_newfstat(unsigned in
+       if (f) {
+               struct dentry * dentry = f->f_dentry;
+-              err = do_revalidate(dentry);
++              err = do_revalidate(dentry, NULL);
+               if (!err)
+                       err = cp_new_stat(dentry->d_inode, statbuf);
+               fput(f);
+@@ -257,7 +267,7 @@ asmlinkage long sys_readlink(const char 
+               error = -EINVAL;
+               if (inode->i_op && inode->i_op->readlink &&
+-                  !(error = do_revalidate(nd.dentry))) {
++                  !(error = do_revalidate(nd.dentry, NULL))) {
+                       UPDATE_ATIME(inode);
+                       error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
+               }
+@@ -333,12 +343,14 @@ asmlinkage long sys_stat64(char * filena
  {
        struct nameidata nd;
        int error;
  {
        struct nameidata nd;
        int error;
 -      error = user_path_walk(filename, &nd);
 +      error = user_path_walk_it(filename, &nd, &it);
        if (!error) {
 -      error = user_path_walk(filename, &nd);
 +      error = user_path_walk_it(filename, &nd, &it);
        if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                if (!error)
                        error = cp_new_stat64(nd.dentry->d_inode, statbuf);
                if (!error)
                        error = cp_new_stat64(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
                path_release(&nd);
        }
        return error;
-@@ -348,12 +358,14 @@ asmlinkage long sys_lstat64(char * filen
+@@ -348,12 +360,14 @@ asmlinkage long sys_lstat64(char * filen
  {
        struct nameidata nd;
        int error;
  {
        struct nameidata nd;
        int error;
 -      error = user_path_walk_link(filename, &nd);
 +      error = user_path_walk_link_it(filename, &nd, &it);
        if (!error) {
 -      error = user_path_walk_link(filename, &nd);
 +      error = user_path_walk_link_it(filename, &nd, &it);
        if (!error) {
-               error = do_revalidate(nd.dentry);
+-              error = do_revalidate(nd.dentry);
++              error = do_revalidate(nd.dentry, &it);
                if (!error)
                        error = cp_new_stat64(nd.dentry->d_inode, statbuf);
                if (!error)
                        error = cp_new_stat64(nd.dentry->d_inode, statbuf);
-+              intent_release(nd.dentry, &it);
++              intent_release(&it);
                path_release(&nd);
        }
        return error;
                path_release(&nd);
        }
        return error;
---- linux-2.4.20-l18/fs/proc/base.c~vfs_intent-2.4.20-vanilla  Wed Jun  4 22:53:14 2003
-+++ linux-2.4.20-l18-phil/fs/proc/base.c       Wed Jun  4 22:50:35 2003
+@@ -368,7 +382,7 @@ asmlinkage long sys_fstat64(unsigned lon
+       if (f) {
+               struct dentry * dentry = f->f_dentry;
+-              err = do_revalidate(dentry);
++              err = do_revalidate(dentry, NULL);
+               if (!err)
+                       err = cp_new_stat64(dentry->d_inode, statbuf);
+               fput(f);
+--- linux-2.4.20-ad/fs/proc/base.c~vfs_intent-2.4.20-vanilla   2002-08-02 18:39:45.000000000 -0600
++++ linux-2.4.20-ad-braam/fs/proc/base.c       2003-07-07 15:13:53.000000000 -0600
 @@ -464,6 +464,9 @@ static int proc_pid_follow_link(struct d
  
        error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
        nd->last_type = LAST_BIND;
 +
 @@ -464,6 +464,9 @@ static int proc_pid_follow_link(struct d
  
        error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
        nd->last_type = LAST_BIND;
 +
-+        if (nd->it != NULL)
-+                nd->it->it_int_flags |= IT_FL_FOLLOWED;
++      if (nd->it != NULL)
++              nd->it->it_int_flags |= IT_FL_FOLLOWED;
  out:
        return error;
  }
  out:
        return error;
  }
---- linux-2.4.20-l18/include/linux/dcache.h~vfs_intent-2.4.20-vanilla  Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/include/linux/dcache.h       Sun Jun  1 22:35:10 2003
-@@ -7,6 +7,28 @@
+--- linux-2.4.20-ad/include/linux/dcache.h~vfs_intent-2.4.20-vanilla   2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/include/linux/dcache.h       2003-07-09 01:40:11.000000000 -0600
+@@ -7,6 +7,44 @@
  #include <linux/mount.h>
  #include <linux/kernel.h>
  
  #include <linux/mount.h>
  #include <linux/kernel.h>
  
-+#define IT_OPEN     (1)
-+#define IT_CREAT    (1<<1)
-+#define IT_READDIR  (1<<2)
-+#define IT_GETATTR  (1<<3)
-+#define IT_LOOKUP   (1<<4)
-+#define IT_UNLINK   (1<<5)
++#define IT_OPEN     0x0001
++#define IT_CREAT    0x0002
++#define IT_READDIR  0x0004
++#define IT_GETATTR  0x0008
++#define IT_LOOKUP   0x0010
++#define IT_UNLINK   0x0020
++#define IT_GETXATTR 0x0040
++#define IT_EXEC     0x0080
++#define IT_PIN      0x0100
++
++#define IT_FL_LOCKED   0x0001
++#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */
 +
 +
-+#define IT_FL_LOCKED   (1)
-+#define IT_FL_FOLLOWED (1<<1) /* set by vfs_follow_link */
++#define INTENT_MAGIC 0x19620323
 +
 +struct lookup_intent {
 +      int it_op;
 +
 +struct lookup_intent {
 +      int it_op;
++      void (*it_op_release)(struct lookup_intent *);
++      int it_magic;
 +      int it_mode;
 +      int it_flags;
 +      int it_disposition;
 +      int it_mode;
 +      int it_flags;
 +      int it_disposition;
 +      void *it_data;
 +};
 +
 +      void *it_data;
 +};
 +
++static inline void intent_init(struct lookup_intent *it, int op, int flags)
++{
++      memset(it, 0, sizeof(*it));
++      it->it_magic = INTENT_MAGIC;
++      it->it_op = op;
++      it->it_flags = flags;
++}
++
++
  /*
   * linux/include/linux/dcache.h
   *
  /*
   * linux/include/linux/dcache.h
   *
-@@ -79,6 +101,7 @@ struct dentry {
-       unsigned long d_time;           /* used by d_revalidate */
-       struct dentry_operations  *d_op;
-       struct super_block * d_sb;      /* The root of the dentry tree */
-+      struct lookup_intent *d_it;
-       unsigned long d_vfs_flags;
-       void * d_fsdata;                /* fs-specific data */
-       unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
-@@ -91,8 +114,15 @@ struct dentry_operations {
+@@ -91,8 +129,22 @@ struct dentry_operations {
        int (*d_delete)(struct dentry *);
        void (*d_release)(struct dentry *);
        void (*d_iput)(struct dentry *, struct inode *);
        int (*d_delete)(struct dentry *);
        void (*d_release)(struct dentry *);
        void (*d_iput)(struct dentry *, struct inode *);
-+      int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *);
-+      void (*d_intent_release)(struct dentry *, struct lookup_intent *);
++      int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *);
++      void (*d_pin)(struct dentry *, struct vfsmount * , int);
++      void (*d_unpin)(struct dentry *, struct vfsmount *, int);
  };
  
  };
  
++#define PIN(de,mnt,flag)  if (de->d_op && de->d_op->d_pin) \
++                              de->d_op->d_pin(de, mnt, flag);
++#define UNPIN(de,mnt,flag)  if (de->d_op && de->d_op->d_unpin) \
++                              de->d_op->d_unpin(de, mnt, flag);
++
++
 +/* defined in fs/namei.c */
 +/* defined in fs/namei.c */
-+extern void intent_release(struct dentry *de, struct lookup_intent *it);
++extern void intent_release(struct lookup_intent *it);
 +/* defined in fs/dcache.c */
 +extern void __d_rehash(struct dentry * entry, int lock);
 +
  /* the dentry parameter passed to d_hash and d_compare is the parent
   * directory of the entries to be compared. It is used in case these
   * functions need any directory specific information for determining
 +/* defined in fs/dcache.c */
 +extern void __d_rehash(struct dentry * entry, int lock);
 +
  /* the dentry parameter passed to d_hash and d_compare is the parent
   * directory of the entries to be compared. It is used in case these
   * functions need any directory specific information for determining
-@@ -124,6 +154,7 @@ d_iput:            no              no              yes
+@@ -124,6 +176,7 @@ d_iput:            no              no              yes
                                         * s_nfsd_free_path semaphore will be down
                                         */
  #define DCACHE_REFERENCED     0x0008  /* Recently used, don't discard. */
                                         * s_nfsd_free_path semaphore will be down
                                         */
  #define DCACHE_REFERENCED     0x0008  /* Recently used, don't discard. */
  
  extern spinlock_t dcache_lock;
  
  
  extern spinlock_t dcache_lock;
  
---- linux-2.4.20-l18/include/linux/fs.h~vfs_intent-2.4.20-vanilla      Wed May 28 01:39:17 2003
-+++ linux-2.4.20-l18-phil/include/linux/fs.h   Sun Jun  1 22:07:11 2003
-@@ -338,6 +338,8 @@ extern void set_bh_page(struct buffer_he
+--- linux-2.4.20/include/linux/fs.h~vfs_intent-2.4.20-vanilla  2003-06-12 03:24:59.000000000 -0600
++++ linux-2.4.20-braam/include/linux/fs.h      2003-06-12 03:25:00.000000000 -0600
+@@ -338,6 +338,9 @@ extern void set_bh_page(struct buffer_he
  #define ATTR_MTIME_SET        256
  #define ATTR_FORCE    512     /* Not a change, but a change it */
  #define ATTR_ATTR_FLAG        1024
  #define ATTR_MTIME_SET        256
  #define ATTR_FORCE    512     /* Not a change, but a change it */
  #define ATTR_ATTR_FLAG        1024
-+#define ATTR_RAW      2048    /* file system, not vfs will massage attrs */
-+#define ATTR_FROM_OPEN        4096    /* called from open path, ie O_TRUNC */
++#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
++#define ATTR_CTIME_SET 0x2000
  
  /*
   * This is the Inode Attributes structure, used for notify_change().  It
  
  /*
   * This is the Inode Attributes structure, used for notify_change().  It
  
        /* needed for tty driver, and maybe others */
        void                    *private_data;
  
        /* needed for tty driver, and maybe others */
        void                    *private_data;
-+      struct lookup_intent    *f_intent;
++      struct lookup_intent    *f_it;
  
        /* preallocated helper kiobuf to speedup O_DIRECT */
        struct kiobuf           *f_iobuf;
  
        /* preallocated helper kiobuf to speedup O_DIRECT */
        struct kiobuf           *f_iobuf;
        struct qstr last;
        unsigned int flags;
        int last_type;
        struct qstr last;
        unsigned int flags;
        int last_type;
-+        struct lookup_intent *it;
++      struct lookup_intent *it;
  };
  
  #define DQUOT_USR_ENABLED     0x01            /* User diskquotas enabled */
  };
  
  #define DQUOT_USR_ENABLED     0x01            /* User diskquotas enabled */
-@@ -794,7 +798,9 @@ extern int vfs_symlink(struct inode *, s
+@@ -794,7 +798,8 @@ extern int vfs_symlink(struct inode *, s
  extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
  extern int vfs_rmdir(struct inode *, struct dentry *);
  extern int vfs_unlink(struct inode *, struct dentry *);
 -extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
 +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
  extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
  extern int vfs_rmdir(struct inode *, struct dentry *);
  extern int vfs_unlink(struct inode *, struct dentry *);
 -extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
 +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-+              struct inode *new_dir, struct dentry *new_dentry,
-+              struct lookup_intent *it);
++             struct inode *new_dir, struct dentry *new_dentry);
  
  /*
   * File types
  
  /*
   * File types
-@@ -855,20 +861,33 @@ struct file_operations {
+@@ -854,21 +859,32 @@ struct file_operations {
  struct inode_operations {
        int (*create) (struct inode *,struct dentry *,int);
  struct inode_operations {
        int (*create) (struct inode *,struct dentry *,int);
++      int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *);
        struct dentry * (*lookup) (struct inode *,struct dentry *);
        struct dentry * (*lookup) (struct inode *,struct dentry *);
-+      struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
++      struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags);
        int (*link) (struct dentry *,struct inode *,struct dentry *);
        int (*link) (struct dentry *,struct inode *,struct dentry *);
-+      int (*link2) (struct inode *,struct inode *, const char *, int);
++      int (*link_raw) (struct nameidata *,struct nameidata *);
        int (*unlink) (struct inode *,struct dentry *);
        int (*unlink) (struct inode *,struct dentry *);
-+      int (*unlink2) (struct inode *, const char *, int);
++      int (*unlink_raw) (struct nameidata *);
        int (*symlink) (struct inode *,struct dentry *,const char *);
        int (*symlink) (struct inode *,struct dentry *,const char *);
-+      int (*symlink2) (struct inode *, const char *, int, const char *);
++      int (*symlink_raw) (struct nameidata *,const char *);
        int (*mkdir) (struct inode *,struct dentry *,int);
        int (*mkdir) (struct inode *,struct dentry *,int);
-+      int (*mkdir2) (struct inode *, const char *, int,int);
++      int (*mkdir_raw) (struct nameidata *,int);
        int (*rmdir) (struct inode *,struct dentry *);
        int (*rmdir) (struct inode *,struct dentry *);
-+      int (*rmdir2) (struct inode *, const char *, int);
++      int (*rmdir_raw) (struct nameidata *);
        int (*mknod) (struct inode *,struct dentry *,int,int);
        int (*mknod) (struct inode *,struct dentry *,int,int);
-+      int (*mknod2) (struct inode *, const char *, int,int,int);
++      int (*mknod_raw) (struct nameidata *,int,dev_t);
        int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
        int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
-+      int (*rename2) (struct inode *, struct inode *,
-+                      const char *oldname, int oldlen,
-+                      const char *newname, int newlen);
++      int (*rename_raw) (struct nameidata *, struct nameidata *);
        int (*readlink) (struct dentry *, char *,int);
        int (*follow_link) (struct dentry *, struct nameidata *);
        int (*readlink) (struct dentry *, char *,int);
        int (*follow_link) (struct dentry *, struct nameidata *);
-+      int (*follow_link2) (struct dentry *, struct nameidata *,
-+                           struct lookup_intent *it);
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int);
        int (*revalidate) (struct dentry *);
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int);
        int (*revalidate) (struct dentry *);
++      int (*revalidate_it) (struct dentry *, struct lookup_intent *);
        int (*setattr) (struct dentry *, struct iattr *);
        int (*setattr) (struct dentry *, struct iattr *);
-+      int (*setattr_raw) (struct inode *, struct iattr *);
++      int (*setattr_raw) (struct inode *, struct iattr *);
        int (*getattr) (struct dentry *, struct iattr *);
        int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
        ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
        int (*getattr) (struct dentry *, struct iattr *);
        int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
        ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
-@@ -1070,10 +1089,14 @@ static inline int get_lease(struct inode
+@@ -1070,10 +1086,14 @@ static inline int get_lease(struct inode
  
  asmlinkage long sys_open(const char *, int, int);
  asmlinkage long sys_close(unsigned int);      /* yes, it's really unsigned */
  
  asmlinkage long sys_open(const char *, int, int);
  asmlinkage long sys_close(unsigned int);      /* yes, it's really unsigned */
  extern int filp_close(struct file *, fl_owner_t id);
  extern char * getname(const char *);
  
  extern int filp_close(struct file *, fl_owner_t id);
  extern char * getname(const char *);
  
-@@ -1335,6 +1358,7 @@ typedef int (*read_actor_t)(read_descrip
+@@ -1335,6 +1355,7 @@ typedef int (*read_actor_t)(read_descrip
  extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
  
  extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
  extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
  
  extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_walk(const char *, struct nameidata *));
  extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_walk(const char *, struct nameidata *));
  extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
-@@ -1346,6 +1370,8 @@ extern struct dentry * lookup_one_len(co
+@@ -1346,6 +1367,8 @@ extern struct dentry * lookup_one_len(co
  extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
  #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
  #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
  extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
  #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
  #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
  
  extern void iput(struct inode *);
  extern void force_delete(struct inode *);
  
  extern void iput(struct inode *);
  extern void force_delete(struct inode *);
-@@ -1455,6 +1481,8 @@ extern struct file_operations generic_ro
+@@ -1455,6 +1478,8 @@ extern struct file_operations generic_ro
  
  extern int vfs_readlink(struct dentry *, char *, int, const char *);
  extern int vfs_follow_link(struct nameidata *, const char *);
  
  extern int vfs_readlink(struct dentry *, char *, int, const char *);
  extern int vfs_follow_link(struct nameidata *, const char *);
  extern int page_readlink(struct dentry *, char *, int);
  extern int page_follow_link(struct dentry *, struct nameidata *);
  extern struct inode_operations page_symlink_inode_operations;
  extern int page_readlink(struct dentry *, char *, int);
  extern int page_follow_link(struct dentry *, struct nameidata *);
  extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.20-l18/kernel/ksyms.c~vfs_intent-2.4.20-vanilla  Wed May 28 01:39:18 2003
-+++ linux-2.4.20-l18-phil/kernel/ksyms.c       Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/include/linux/fs_struct.h~vfs_intent-2.4.20-vanilla        2001-07-13 16:10:44.000000000 -0600
++++ linux-2.4.20-ad-braam/include/linux/fs_struct.h    2003-07-07 15:13:53.000000000 -0600
+@@ -34,10 +34,12 @@ static inline void set_fs_root(struct fs
+       write_lock(&fs->lock);
+       old_root = fs->root;
+       old_rootmnt = fs->rootmnt;
++      PIN(dentry, mnt, 1);
+       fs->rootmnt = mntget(mnt);
+       fs->root = dget(dentry);
+       write_unlock(&fs->lock);
+       if (old_root) {
++              UNPIN(old_root, old_rootmnt, 1);
+               dput(old_root);
+               mntput(old_rootmnt);
+       }
+@@ -57,10 +59,12 @@ static inline void set_fs_pwd(struct fs_
+       write_lock(&fs->lock);
+       old_pwd = fs->pwd;
+       old_pwdmnt = fs->pwdmnt;
++      PIN(dentry, mnt, 0);
+       fs->pwdmnt = mntget(mnt);
+       fs->pwd = dget(dentry);
+       write_unlock(&fs->lock);
+       if (old_pwd) {
++              UNPIN(old_pwd, old_pwdmnt, 0);
+               dput(old_pwd);
+               mntput(old_pwdmnt);
+       }
+--- linux-2.4.20-ad/kernel/ksyms.c~vfs_intent-2.4.20-vanilla   2003-07-07 15:13:52.000000000 -0600
++++ linux-2.4.20-ad-braam/kernel/ksyms.c       2003-07-07 15:13:53.000000000 -0600
 @@ -269,6 +269,7 @@ EXPORT_SYMBOL(read_cache_page);
  EXPORT_SYMBOL(set_page_dirty);
  EXPORT_SYMBOL(vfs_readlink);
 @@ -269,6 +269,7 @@ EXPORT_SYMBOL(read_cache_page);
  EXPORT_SYMBOL(set_page_dirty);
  EXPORT_SYMBOL(vfs_readlink);
  EXPORT_SYMBOL(page_readlink);
  EXPORT_SYMBOL(page_follow_link);
  EXPORT_SYMBOL(page_symlink_inode_operations);
  EXPORT_SYMBOL(page_readlink);
  EXPORT_SYMBOL(page_follow_link);
  EXPORT_SYMBOL(page_symlink_inode_operations);
+--- linux-2.4.20-ad/kernel/fork.c~vfs_intent-2.4.20-vanilla    2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/kernel/fork.c        2003-07-07 15:13:53.000000000 -0600
+@@ -384,10 +384,13 @@ static inline struct fs_struct *__copy_f
+               fs->umask = old->umask;
+               read_lock(&old->lock);
+               fs->rootmnt = mntget(old->rootmnt);
++              PIN(old->pwd, old->pwdmnt, 0);
++              PIN(old->root, old->rootmnt, 1);
+               fs->root = dget(old->root);
+               fs->pwdmnt = mntget(old->pwdmnt);
+               fs->pwd = dget(old->pwd);
+               if (old->altroot) {
++                      PIN(old->altroot, old->altrootmnt, 1);
+                       fs->altrootmnt = mntget(old->altrootmnt);
+                       fs->altroot = dget(old->altroot);
+               } else {
+--- linux-2.4.20-ad/kernel/exit.c~vfs_intent-2.4.20-vanilla    2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/kernel/exit.c        2003-07-07 15:13:53.000000000 -0600
+@@ -238,11 +238,14 @@ static inline void __put_fs_struct(struc
+ {
+       /* No need to hold fs->lock if we are killing it */
+       if (atomic_dec_and_test(&fs->count)) {
++              UNPIN(fs->pwd, fs->pwdmnt, 0);
++              UNPIN(fs->root, fs->rootmnt, 1);
+               dput(fs->root);
+               mntput(fs->rootmnt);
+               dput(fs->pwd);
+               mntput(fs->pwdmnt);
+               if (fs->altroot) {
++                      UNPIN(fs->altroot, fs->altrootmnt, 1);
+                       dput(fs->altroot);
+                       mntput(fs->altrootmnt);
+               }
 
 _
 
 _
index 5770132..1afa4d4 100644 (file)
@@ -1,3 +1,5 @@
 fs/ext3/super.c
 fs/ext3/super.c
+fs/ext3/file.c
+fs/ext3/inode.c
 include/linux/ext3_fs.h
 include/linux/ext3_fs_sb.h
 include/linux/ext3_fs.h
 include/linux/ext3_fs_sb.h
index 5770132..a2c3109 100644 (file)
@@ -1,3 +1,5 @@
 fs/ext3/super.c
 fs/ext3/super.c
+fs/ext3/inode.c
+fs/ext3/file.c
 include/linux/ext3_fs.h
 include/linux/ext3_fs_sb.h
 include/linux/ext3_fs.h
 include/linux/ext3_fs_sb.h
index 31901ee..6ad2589 100644 (file)
@@ -1,4 +1,5 @@
 fs/ext3/namei.c
 fs/ext3/ialloc.c
 fs/ext3/namei.c
 fs/ext3/ialloc.c
+fs/ext3/inode.c
 fs/ext3/ioctl.c
 include/linux/ext3_fs.h
 fs/ext3/ioctl.c
 include/linux/ext3_fs.h
index b647d5a..6c80106 100644 (file)
@@ -2,6 +2,7 @@ fs/ext3/ialloc.c
 fs/ext3/inode.c
 fs/ext3/namei.c
 fs/ext3/super.c
 fs/ext3/inode.c
 fs/ext3/namei.c
 fs/ext3/super.c
+fs/ext3/ext3-exports.c
 fs/ext3/xattr.c
 include/linux/ext3_fs.h
 include/linux/ext3_jbd.h
 fs/ext3/xattr.c
 include/linux/ext3_fs.h
 include/linux/ext3_jbd.h
index f8a99ea..f3375a3 100644 (file)
@@ -1,5 +1,6 @@
 fs/exec.c
 fs/dcache.c
 fs/exec.c
 fs/dcache.c
+fs/namespace.c
 fs/namei.c
 fs/nfsd/vfs.c
 fs/open.c
 fs/namei.c
 fs/nfsd/vfs.c
 fs/open.c
@@ -7,4 +8,7 @@ fs/stat.c
 fs/proc/base.c
 include/linux/dcache.h
 include/linux/fs.h
 fs/proc/base.c
 include/linux/dcache.h
 include/linux/fs.h
+include/linux/fs_struct.h
 kernel/ksyms.c
 kernel/ksyms.c
+kernel/fork.c
+kernel/exit.c
index 78e494b..8d3d4f0 100644 (file)
@@ -78,7 +78,7 @@ check_pc_match()
                if [ $? != 0 ]; then
                        echo " $1 do not match with $2 "
                        echo " $2 will be changed to match $2"
                if [ $? != 0 ]; then
                        echo " $1 do not match with $2 "
                        echo " $2 will be changed to match $2"
-                       cat $tmpfile > $P/pc/$PATCH_NAME.pc
+                       cat $tmpfile > $P/pc/$PATCH_NAME.pc
                fi
                rm -rf $tmpfile
        fi
                fi
                rm -rf $tmpfile
        fi
index b951209..c2cc2fa 100644 (file)
@@ -2,7 +2,7 @@ dev_read_only_hp_2.4.20.patch
 exports_2.4.20-rh-hp.patch
 kmem_cache_validate_hp.patch
 lustre_version.patch
 exports_2.4.20-rh-hp.patch
 kmem_cache_validate_hp.patch
 lustre_version.patch
-vfs_intent-2.4.20-vanilla.patch
+vfs_intent-2.4.20-hp.patch
 invalidate_show.patch
 export-truncate.patch
 iod-stock-24-exports_hp.patch
 invalidate_show.patch
 export-truncate.patch
 iod-stock-24-exports_hp.patch
@@ -21,5 +21,7 @@ ext3-delete_thread-2.4.20.patch
 ext3-noread-2.4.20.patch
 extN-wantedi.patch
 ext3-san-2.4.20.patch
 ext3-noread-2.4.20.patch
 extN-wantedi.patch
 ext3-san-2.4.20.patch
+ext3-map_inode_page.patch
+ext3-error-export.patch
 iopen-2.4.20.patch
 tcp-zero-copy.patch
 iopen-2.4.20.patch
 tcp-zero-copy.patch
index a97c37c..970061d 100644 (file)
@@ -15,9 +15,12 @@ ext-2.4-patch-4.patch
 linux-2.4.20-xattr-0.8.54-chaos.patch
 ext3-2.4.20-fixes.patch
 ext3_orphan_lock-2.4.20-rh.patch
 linux-2.4.20-xattr-0.8.54-chaos.patch
 ext3-2.4.20-fixes.patch
 ext3_orphan_lock-2.4.20-rh.patch
-ext3-delete_thread-2.4.20.patch
+ext3_delete_thread_2.4.20_chaos.patch
 ext3-noread-2.4.20.patch
 extN-wantedi.patch
 ext3-san-2.4.20.patch
 ext3-noread-2.4.20.patch
 extN-wantedi.patch
 ext3-san-2.4.20.patch
+ext3-map_inode_page.patch
+ext3-error-export.patch
 iopen-2.4.20.patch
 iopen-2.4.20.patch
-tcp-zero-copy.patch
+tcp_zero_copy_2.4.20_chaos.patch
+gpl_header-chaos-2.4.20.patch
index e56cac6..726a028 100644 (file)
@@ -1,4 +1,4 @@
-uml-patch-2.4.20-4.patch
+uml-patch-2.4.20-6.patch
 dev_read_only_2.4.20.patch
 exports_2.4.20.patch
 kmem_cache_validate_2.4.20.patch
 dev_read_only_2.4.20.patch
 exports_2.4.20.patch
 kmem_cache_validate_2.4.20.patch
@@ -25,5 +25,7 @@ ext3-noread-2.4.20.patch
 ext3-delete_thread-2.4.20.patch
 extN-wantedi.patch
 ext3-san-2.4.20.patch
 ext3-delete_thread-2.4.20.patch
 extN-wantedi.patch
 ext3-san-2.4.20.patch
+ext3-map_inode_page.patch
+ext3-error-export.patch
 iopen-2.4.20.patch
 tcp-zero-copy.patch
 iopen-2.4.20.patch
 tcp-zero-copy.patch
index 2ef001d..28e8648 100644 (file)
@@ -1,13 +1,8 @@
-SERIES               MEMNONIC                  COMMENT
+SERIES            MEMNONIC                 COMMENT                     ARCH
 
 
-hp-pnnl-2.4.20       linux-2.4.20-hp4_pnnl1    same as vanilla but no uml
-vanilla-2.4.20       linux-2.4.20              patch includes uml
-chaos-2.4.20         linux-chaos-2.4.20        same as rh-2.4.20-8
-rh-2.4.20            linux-rh-2.4.20-8         same as chaos-2.4.20
-rh-2.4.18-18         linux-rh-2.4.18-18        same as chaos but includes uml
-chaos                linux-chaos-2.4.18        same as rh-2.4.18-18 but no uml
-
-REVIEW:
-
-vanilla-2.5          linux-2.5.63
-hp-pnnl              linux-2.4.19-hp2_pnnl6
+chaos-2.4.18      linux-chaos-2.4.18       LLNL 2.4.18 chaos ~65       i386
+hp-pnnl-2.4.20    linux-2.4.20-hp4_pnnl1   same as vanilla but no uml  ia64
+vanilla-2.4.20    linux-2.4.20             patch with uml-2.4.20-6     um
+chaos-2.4.20      linux-chaos-2.4.20       same as rh-2.4.20-8         i386
+rh-2.4.20         linux-rh-2.4.20-8        same as chaos-2.4.20        i386
+kgdb-2.5.73       linux-2.5.73             vanilla 2.5.73 with kgdb    i386
index e995588..e69dc6d 100644 (file)
@@ -1,3 +1,4 @@
 .deps
 Makefile
 Makefile.in
 .deps
 Makefile
 Makefile.in
+.*.cmd
index 9b53b54..e3f8673 100644 (file)
@@ -32,7 +32,7 @@
 #include <linux/lustre_mds.h>
 #include <linux/lustre_net.h>
 
 #include <linux/lustre_mds.h>
 #include <linux/lustre_net.h>
 
-int client_import_connect(struct lustre_handle *dlm_handle, 
+int client_import_connect(struct lustre_handle *dlm_handle,
                           struct obd_device *obd,
                           struct obd_uuid *cluuid)
 {
                           struct obd_device *obd,
                           struct obd_uuid *cluuid)
 {
@@ -47,7 +47,6 @@ int client_import_connect(struct lustre_handle *dlm_handle,
         char *tmp[] = {imp->imp_target_uuid.uuid,
                        obd->obd_uuid.uuid,
                        (char *)dlm_handle};
         char *tmp[] = {imp->imp_target_uuid.uuid,
                        obd->obd_uuid.uuid,
                        (char *)dlm_handle};
-        int rq_opc = (obd->obd_type->typ_ops->o_brw) ? OST_CONNECT :MDS_CONNECT;
         int msg_flags;
 
         ENTRY;
         int msg_flags;
 
         ENTRY;
@@ -67,13 +66,15 @@ int client_import_connect(struct lustre_handle *dlm_handle,
         if (obd->obd_namespace == NULL)
                 GOTO(out_disco, rc = -ENOMEM);
 
         if (obd->obd_namespace == NULL)
                 GOTO(out_disco, rc = -ENOMEM);
 
-        request = ptlrpc_prep_req(imp, rq_opc, 3, size, tmp);
+        request = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp);
         if (!request)
                 GOTO(out_ldlm, rc = -ENOMEM);
 
         request->rq_level = LUSTRE_CONN_NEW;
         request->rq_replen = lustre_msg_size(0, NULL);
 
         if (!request)
                 GOTO(out_ldlm, rc = -ENOMEM);
 
         request->rq_level = LUSTRE_CONN_NEW;
         request->rq_replen = lustre_msg_size(0, NULL);
 
+        lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_PEER);
+
         imp->imp_dlm_handle = *dlm_handle;
 
         imp->imp_level = LUSTRE_CONN_CON;
         imp->imp_dlm_handle = *dlm_handle;
 
         imp->imp_level = LUSTRE_CONN_CON;
@@ -88,7 +89,7 @@ int client_import_connect(struct lustre_handle *dlm_handle,
         class_export_put(exp);
 
         msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
         class_export_put(exp);
 
         msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
-        if (rq_opc == MDS_CONNECT || msg_flags & MSG_CONNECT_REPLAYABLE) {
+        if (msg_flags & MSG_CONNECT_REPLAYABLE) {
                 imp->imp_replayable = 1;
                 CDEBUG(D_HA, "connected to replayable target: %s\n",
                        imp->imp_target_uuid.uuid);
                 imp->imp_replayable = 1;
                 CDEBUG(D_HA, "connected to replayable target: %s\n",
                        imp->imp_target_uuid.uuid);
@@ -130,7 +131,16 @@ int client_import_disconnect(struct lustre_handle *dlm_handle, int failover)
                 RETURN(-EINVAL);
         }
 
                 RETURN(-EINVAL);
         }
 
-        rq_opc = obd->obd_type->typ_ops->o_brw ? OST_DISCONNECT:MDS_DISCONNECT;
+        switch (imp->imp_connect_op) {
+        case OST_CONNECT: rq_opc = OST_DISCONNECT; break;
+        case MDS_CONNECT: rq_opc = MDS_DISCONNECT; break;
+        case MGMT_CONNECT:rq_opc = MGMT_DISCONNECT;break;
+        default:
+                CERROR("don't know how to disconnect from %s (connect_op %d)\n",
+                       imp->imp_target_uuid.uuid, imp->imp_connect_op);
+                RETURN(-EINVAL);
+        }
+
         down(&cli->cl_sem);
         if (!cli->cl_conn_count) {
                 CERROR("disconnecting disconnected device (%s)\n",
         down(&cli->cl_sem);
         if (!cli->cl_conn_count) {
                 CERROR("disconnecting disconnected device (%s)\n",
@@ -229,36 +239,31 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
         struct obd_uuid remote_uuid;
         struct list_head *p;
         char *str, *tmp;
         struct obd_uuid remote_uuid;
         struct list_head *p;
         char *str, *tmp;
-        int rc, i, abort_recovery;
+        int rc = 0, abort_recovery;
         ENTRY;
 
         LASSERT_REQSWAB (req, 0);
         ENTRY;
 
         LASSERT_REQSWAB (req, 0);
-        str = lustre_msg_string (req->rq_reqmsg, 0, sizeof (tgtuuid.uuid) - 1);
+        str = lustre_msg_string(req->rq_reqmsg, 0, sizeof(tgtuuid) - 1);
         if (str == NULL) {
                 CERROR("bad target UUID for connect\n");
                 GOTO(out, rc = -EINVAL);
         }
         if (str == NULL) {
                 CERROR("bad target UUID for connect\n");
                 GOTO(out, rc = -EINVAL);
         }
+
         obd_str2uuid (&tgtuuid, str);
         obd_str2uuid (&tgtuuid, str);
+        target = class_uuid2obd(&tgtuuid);
+        if (!target || target->obd_stopping || !target->obd_set_up) {
+                CERROR("UUID '%s' is not available for connect\n", str);
+                GOTO(out, rc = -ENODEV);
+        }
 
         LASSERT_REQSWAB (req, 1);
 
         LASSERT_REQSWAB (req, 1);
-        str = lustre_msg_string (req->rq_reqmsg, 1, sizeof (cluuid.uuid) - 1);
+        str = lustre_msg_string(req->rq_reqmsg, 1, sizeof(cluuid) - 1);
         if (str == NULL) {
                 CERROR("bad client UUID for connect\n");
                 GOTO(out, rc = -EINVAL);
         }
         if (str == NULL) {
                 CERROR("bad client UUID for connect\n");
                 GOTO(out, rc = -EINVAL);
         }
-        obd_str2uuid (&cluuid, str);
 
 
-        i = class_uuid2dev(&tgtuuid);
-        if (i == -1) {
-                CERROR("UUID '%s' not found for connect\n", tgtuuid.uuid);
-                GOTO(out, rc = -ENODEV);
-        }
-
-        target = &obd_dev[i];
-        if (!target || target->obd_stopping || !target->obd_set_up) {
-                CERROR("UUID '%s' is not available for connect\n", str);
-                GOTO(out, rc = -ENODEV);
-        }
+        obd_str2uuid (&cluuid, str);
 
         /* XXX extract a nettype and format accordingly */
         snprintf(remote_uuid.uuid, sizeof remote_uuid,
 
         /* XXX extract a nettype and format accordingly */
         snprintf(remote_uuid.uuid, sizeof remote_uuid,
@@ -491,8 +496,7 @@ static void reset_recovery_timer(struct obd_device *obd)
 
         if (!recovering)
                 return;
 
         if (!recovering)
                 return;
-        CDEBUG(D_ERROR, "timer will expire in %ld seconds\n",
-               OBD_RECOVERY_TIMEOUT / HZ);
+        CERROR("timer will expire in %ld seconds\n", OBD_RECOVERY_TIMEOUT / HZ);
         mod_timer(&obd->obd_recovery_timer, jiffies + OBD_RECOVERY_TIMEOUT);
 }
 
         mod_timer(&obd->obd_recovery_timer, jiffies + OBD_RECOVERY_TIMEOUT);
 }
 
@@ -568,7 +572,8 @@ static void process_recovery_queue(struct obd_device *obd)
                 DEBUG_REQ(D_ERROR, req, "processing: ");
                 (void)obd->obd_recovery_handler(req);
                 reset_recovery_timer(obd);
                 DEBUG_REQ(D_ERROR, req, "processing: ");
                 (void)obd->obd_recovery_handler(req);
                 reset_recovery_timer(obd);
-#warning FIXME: mds_fsync_super(mds->mds_sb);
+                /* bug 1580: decide how to properly sync() in recovery */
+                //mds_fsync_super(mds->mds_sb);
                 class_export_put(req->rq_export);
                 OBD_FREE(req->rq_reqmsg, req->rq_reqlen);
                 OBD_FREE(req, sizeof *req);
                 class_export_put(req->rq_export);
                 OBD_FREE(req->rq_reqmsg, req->rq_reqlen);
                 OBD_FREE(req, sizeof *req);
@@ -715,8 +720,7 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc)
         if (recovery_done) {
                 struct list_head *tmp, *n;
                 ldlm_reprocess_all_ns(req->rq_export->exp_obd->obd_namespace);
         if (recovery_done) {
                 struct list_head *tmp, *n;
                 ldlm_reprocess_all_ns(req->rq_export->exp_obd->obd_namespace);
-                CDEBUG(D_ERROR,
-                       "%s: all clients recovered, sending delayed replies\n",
+                CERROR("%s: all clients recovered, sending delayed replies\n",
                        obd->obd_name);
                 obd->obd_recovering = 0;
                 list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) {
                        obd->obd_name);
                 obd->obd_recovering = 0;
                 list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) {
index 2dc60cf..3995e95 100644 (file)
@@ -71,6 +71,8 @@ char *ldlm_it2str(int it)
                 return "lookup";
         case IT_UNLINK:
                 return "unlink";
                 return "lookup";
         case IT_UNLINK:
                 return "unlink";
+        case IT_GETXATTR:
+                return "getxattr";
         default:
                 CERROR("Unknown intent %d\n", it);
                 return "UNKNOWN";
         default:
                 CERROR("Unknown intent %d\n", it);
                 return "UNKNOWN";
@@ -954,8 +956,8 @@ int ldlm_run_ast_work(struct list_head *rpc_list)
                 if (rc == -ERESTART)
                         retval = rc;
                 else if (rc)
                 if (rc == -ERESTART)
                         retval = rc;
                 else if (rc)
-                        CERROR("Failed AST - should clean & disconnect "
-                               "client\n");
+                        CDEBUG(D_DLMTRACE, "Failed AST - should clean & "
+                               "disconnect client\n");
                 LDLM_LOCK_PUT(w->w_lock);
                 list_del(&w->w_list);
                 OBD_FREE(w, sizeof(*w));
                 LDLM_LOCK_PUT(w->w_lock);
                 list_del(&w->w_list);
                 OBD_FREE(w, sizeof(*w));
index de304d4..50bc96a 100644 (file)
@@ -243,8 +243,7 @@ int ldlm_del_waiting_lock(struct ldlm_lock *lock)
 
 #endif /* __KERNEL__ */
 
 
 #endif /* __KERNEL__ */
 
-static inline void ldlm_failed_ast(struct ldlm_lock *lock, int rc,
-                                   char *ast_type)
+static void ldlm_failed_ast(struct ldlm_lock *lock, int rc, char *ast_type)
 {
         CERROR("%s AST failed (%d) for res "LPU64"/"LPU64
                ", mode %s: evicting client %s@%s NID "LPU64"\n",
 {
         CERROR("%s AST failed (%d) for res "LPU64"/"LPU64
                ", mode %s: evicting client %s@%s NID "LPU64"\n",
@@ -347,10 +346,19 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
+/* XXX copied from ptlrpc/service.c */
+static long timeval_sub(struct timeval *large, struct timeval *small)
+{
+        return (large->tv_sec - small->tv_sec) * 1000000 +
+                (large->tv_usec - small->tv_usec);
+}
+
 int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
 {
         struct ldlm_request *body;
         struct ptlrpc_request *req;
 int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
 {
         struct ldlm_request *body;
         struct ptlrpc_request *req;
+        struct timeval granted_time;
+        long total_enqueue_wait;
         int rc = 0, size = sizeof(*body);
         ENTRY;
 
         int rc = 0, size = sizeof(*body);
         ENTRY;
 
@@ -359,6 +367,12 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
                 RETURN(-EINVAL);
         }
 
                 RETURN(-EINVAL);
         }
 
+        do_gettimeofday(&granted_time);
+        total_enqueue_wait = timeval_sub(&granted_time, &lock->l_enqueued_time);
+
+        if (total_enqueue_wait / 1000000 > obd_timeout)
+                LDLM_ERROR(lock, "enqueue wait took %ldus", total_enqueue_wait);
+
         req = ptlrpc_prep_req(lock->l_export->exp_ldlm_data.led_import,
                               LDLM_CP_CALLBACK, 1, &size, NULL);
         if (!req)
         req = ptlrpc_prep_req(lock->l_export->exp_ldlm_data.led_import,
                               LDLM_CP_CALLBACK, 1, &size, NULL);
         if (!req)
@@ -370,7 +384,8 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
         body->lock_flags = flags;
         ldlm_lock2desc(lock, &body->lock_desc);
 
         body->lock_flags = flags;
         ldlm_lock2desc(lock, &body->lock_desc);
 
-        LDLM_DEBUG(lock, "server preparing completion AST");
+        LDLM_DEBUG(lock, "server preparing completion AST (after %ldus wait)",
+                   total_enqueue_wait);
         req->rq_replen = lustre_msg_size(0, NULL);
 
         req->rq_level = LUSTRE_CONN_RECOVER;
         req->rq_replen = lustre_msg_size(0, NULL);
 
         req->rq_level = LUSTRE_CONN_RECOVER;
@@ -447,6 +462,7 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req,
         if (!lock)
                 GOTO(out, err = -ENOMEM);
 
         if (!lock)
                 GOTO(out, err = -ENOMEM);
 
+        do_gettimeofday(&lock->l_enqueued_time);
         memcpy(&lock->l_remote_handle, &dlm_req->lock_handle1,
                sizeof(lock->l_remote_handle));
         LDLM_DEBUG(lock, "server-side enqueue handler, new lock created");
         memcpy(&lock->l_remote_handle, &dlm_req->lock_handle1,
                sizeof(lock->l_remote_handle));
         LDLM_DEBUG(lock, "server-side enqueue handler, new lock created");
@@ -640,22 +656,10 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
                 lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
                 LDLM_DEBUG(lock, "completion AST, new lock mode");
         }
                 lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
                 LDLM_DEBUG(lock, "completion AST, new lock mode");
         }
-        if (lock->l_resource->lr_type == LDLM_EXTENT) {
+        if (lock->l_resource->lr_type == LDLM_EXTENT)
                 memcpy(&lock->l_extent, &dlm_req->lock_desc.l_extent,
                        sizeof(lock->l_extent));
 
                 memcpy(&lock->l_extent, &dlm_req->lock_desc.l_extent,
                        sizeof(lock->l_extent));
 
-                if ((lock->l_extent.end & ~PAGE_MASK) != ~PAGE_MASK) {
-                        /* XXX Old versions of BA OST code have a fencepost bug
-                         * which will cause them to grant a lock that's one
-                         * byte too large.  This can be safely removed after BA
-                         * ships their next release -phik (02 Apr 2003) */
-                        lock->l_extent.end--;
-                } else if ((lock->l_extent.start & ~PAGE_MASK) ==
-                           ~PAGE_MASK) {
-                        lock->l_extent.start++;
-                }
-        }
-
         ldlm_resource_unlink_lock(lock);
         if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
                    &lock->l_resource->lr_name,
         ldlm_resource_unlink_lock(lock);
         if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
                    &lock->l_resource->lr_name,
@@ -961,7 +965,7 @@ static int ldlm_setup(struct obd_device *obddev, obd_count len, void *buf)
         return rc;
 }
 
         return rc;
 }
 
-static int ldlm_cleanup(struct obd_device *obddev, int force, int failover)
+static int ldlm_cleanup(struct obd_device *obddev, int flags)
 {
         struct ldlm_obd *ldlm = &obddev->u.ldlm;
         ENTRY;
 {
         struct ldlm_obd *ldlm = &obddev->u.ldlm;
         ENTRY;
@@ -973,7 +977,7 @@ static int ldlm_cleanup(struct obd_device *obddev, int force, int failover)
         }
 
 #ifdef __KERNEL__
         }
 
 #ifdef __KERNEL__
-        if (force) {
+        if (flags & OBD_OPT_FORCE) {
                 ptlrpc_put_ldlm_hooks();
         } else if (ptlrpc_ldlm_hooks_referenced()) {
                 CERROR("Some connections weren't cleaned up; run lconf with "
                 ptlrpc_put_ldlm_hooks();
         } else if (ptlrpc_ldlm_hooks_referenced()) {
                 CERROR("Some connections weren't cleaned up; run lconf with "
@@ -1084,6 +1088,7 @@ EXPORT_SYMBOL(ldlm_replay_locks);
 EXPORT_SYMBOL(ldlm_resource_foreach);
 EXPORT_SYMBOL(ldlm_namespace_foreach);
 EXPORT_SYMBOL(ldlm_namespace_foreach_res);
 EXPORT_SYMBOL(ldlm_resource_foreach);
 EXPORT_SYMBOL(ldlm_namespace_foreach);
 EXPORT_SYMBOL(ldlm_namespace_foreach_res);
+EXPORT_SYMBOL(ldlm_change_cbdata);
 
 /* ldlm_lockd.c */
 EXPORT_SYMBOL(ldlm_server_blocking_ast);
 
 /* ldlm_lockd.c */
 EXPORT_SYMBOL(ldlm_server_blocking_ast);
index e6a8229..75e6dbd 100644 (file)
@@ -273,6 +273,7 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
                 /* Set a flag to prevent us from sending a CANCEL (bug 407) */
                 l_lock(&ns->ns_lock);
                 lock->l_flags |= LDLM_FL_LOCAL_ONLY;
                 /* Set a flag to prevent us from sending a CANCEL (bug 407) */
                 l_lock(&ns->ns_lock);
                 lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+                LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
                 l_unlock(&ns->ns_lock);
 
                 ldlm_lock_decref_and_cancel(lockh, mode);
                 l_unlock(&ns->ns_lock);
 
                 ldlm_lock_decref_and_cancel(lockh, mode);
@@ -295,7 +296,7 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
                 CERROR ("Can't unpack ldlm_reply\n");
                 GOTO (out_req, rc = -EPROTO);
         }
                 CERROR ("Can't unpack ldlm_reply\n");
                 GOTO (out_req, rc = -EPROTO);
         }
-        
+
         memcpy(&lock->l_remote_handle, &reply->lock_handle,
                sizeof(lock->l_remote_handle));
         *flags = reply->lock_flags;
         memcpy(&lock->l_remote_handle, &reply->lock_handle,
                sizeof(lock->l_remote_handle));
         *flags = reply->lock_flags;
@@ -309,17 +310,6 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
                        body->lock_desc.l_extent.end,
                        reply->lock_extent.start, reply->lock_extent.end);
 
                        body->lock_desc.l_extent.end,
                        reply->lock_extent.start, reply->lock_extent.end);
 
-                if ((reply->lock_extent.end & ~PAGE_MASK) != ~PAGE_MASK) {
-                        /* XXX Old versions of BA OST code have a fencepost bug
-                         * which will cause them to grant a lock that's one
-                         * byte too large.  This can be safely removed after BA
-                         * ships their next release -phik (02 Apr 2003) */
-                        reply->lock_extent.end--;
-                } else if ((reply->lock_extent.start & ~PAGE_MASK) ==
-                           ~PAGE_MASK) {
-                        reply->lock_extent.start++;
-                }
-
                 cookie = &reply->lock_extent; /* FIXME bug 267 */
                 cookielen = sizeof(reply->lock_extent);
         }
                 cookie = &reply->lock_extent; /* FIXME bug 267 */
                 cookielen = sizeof(reply->lock_extent);
         }
@@ -454,7 +444,7 @@ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags)
                 CERROR ("Can't unpack ldlm_reply\n");
                 GOTO (out, rc = -EPROTO);
         }
                 CERROR ("Can't unpack ldlm_reply\n");
                 GOTO (out, rc = -EPROTO);
         }
-        
+
         res = ldlm_lock_convert(lock, new_mode, &reply->lock_flags);
         if (res != NULL)
                 ldlm_reprocess_all(res);
         res = ldlm_lock_convert(lock, new_mode, &reply->lock_flags);
         if (res != NULL)
                 ldlm_reprocess_all(res);
@@ -535,11 +525,11 @@ int ldlm_cli_cancel(struct lustre_handle *lockh)
         local_cancel:
                 ldlm_lock_cancel(lock);
         } else {
         local_cancel:
                 ldlm_lock_cancel(lock);
         } else {
-                LDLM_DEBUG(lock, "client-side local cancel");
                 if (lock->l_resource->lr_namespace->ns_client) {
                 if (lock->l_resource->lr_namespace->ns_client) {
-                        CERROR("Trying to cancel local lock\n");
+                        LDLM_ERROR(lock, "Trying to cancel local lock\n");
                         LBUG();
                 }
                         LBUG();
                 }
+                LDLM_DEBUG(lock, "client-side local cancel");
                 ldlm_lock_cancel(lock);
                 ldlm_reprocess_all(lock->l_resource);
                 LDLM_DEBUG(lock, "client-side local cancel handler END");
                 ldlm_lock_cancel(lock);
                 ldlm_reprocess_all(lock->l_resource);
                 LDLM_DEBUG(lock, "client-side local cancel handler END");
@@ -631,9 +621,8 @@ static int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
                 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
 
                 if (opaque != NULL && lock->l_data != opaque) {
                 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
 
                 if (opaque != NULL && lock->l_data != opaque) {
-                        LDLM_ERROR(lock, "data %p doesn't match opaque %p res"
-                                  LPU64":"LPU64, lock->l_data, opaque,
-                                  res_id.name[0], res_id.name[1]);
+                        LDLM_ERROR(lock, "data %p doesn't match opaque %p",
+                                  lock->l_data, opaque);
                         //LBUG();
                         continue;
                 }
                         //LBUG();
                         continue;
                 }
@@ -797,12 +786,12 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
                                ldlm_res_iterator_t iter, void *closure)
 {
         int i, rc = LDLM_ITER_CONTINUE;
                                ldlm_res_iterator_t iter, void *closure)
 {
         int i, rc = LDLM_ITER_CONTINUE;
-        
+
         l_lock(&ns->ns_lock);
         for (i = 0; i < RES_HASH_SIZE; i++) {
                 struct list_head *tmp, *next;
                 list_for_each_safe(tmp, next, &(ns->ns_hash[i])) {
         l_lock(&ns->ns_lock);
         for (i = 0; i < RES_HASH_SIZE; i++) {
                 struct list_head *tmp, *next;
                 list_for_each_safe(tmp, next, &(ns->ns_hash[i])) {
-                        struct ldlm_resource *res = 
+                        struct ldlm_resource *res =
                                 list_entry(tmp, struct ldlm_resource, lr_hash);
 
                         ldlm_resource_getref(res);
                                 list_entry(tmp, struct ldlm_resource, lr_hash);
 
                         ldlm_resource_getref(res);
@@ -817,6 +806,34 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
+/* non-blocking function to manipulate a lock whose cb_data is being put away.*/
+void ldlm_change_cbdata(struct ldlm_namespace *ns, 
+                       struct ldlm_res_id *res_id, 
+                       ldlm_iterator_t iter,
+                       void *data)
+{
+        struct ldlm_resource *res;
+        int rc = 0;
+        ENTRY;
+
+        if (ns == NULL) {
+                CERROR("must pass in namespace");
+                LBUG();
+        }
+
+        res = ldlm_resource_get(ns, NULL, *res_id, 0, 0);
+        if (res == NULL) {
+                EXIT;
+                return;
+        }
+
+        l_lock(&ns->ns_lock);
+        rc = ldlm_resource_foreach(res, iter, data);
+        l_unlock(&ns->ns_lock);
+        ldlm_resource_putref(res);
+        EXIT;
+}
+
 /* Lock replay */
 
 static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
 /* Lock replay */
 
 static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
@@ -858,7 +875,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
                 flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_WAIT;
         else
                 flags = LDLM_FL_REPLAY;
                 flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_WAIT;
         else
                 flags = LDLM_FL_REPLAY;
-                
+
         size = sizeof(*body);
         req = ptlrpc_prep_req(imp, LDLM_ENQUEUE, 1, &size, NULL);
         if (!req)
         size = sizeof(*body);
         req = ptlrpc_prep_req(imp, LDLM_ENQUEUE, 1, &size, NULL);
         if (!req)
@@ -866,7 +883,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
 
         /* We're part of recovery, so don't wait for it. */
         req->rq_level = LUSTRE_CONN_RECOVER;
 
         /* We're part of recovery, so don't wait for it. */
         req->rq_level = LUSTRE_CONN_RECOVER;
-        
+
         body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
         ldlm_lock2desc(lock, &body->lock_desc);
         body->lock_flags = flags;
         body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
         ldlm_lock2desc(lock, &body->lock_desc);
         body->lock_flags = flags;
@@ -879,14 +896,14 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
         rc = ptlrpc_queue_wait(req);
         if (rc != ELDLM_OK)
                 GOTO(out, rc);
         rc = ptlrpc_queue_wait(req);
         if (rc != ELDLM_OK)
                 GOTO(out, rc);
-        
+
         reply = lustre_swab_repbuf(req, 0, sizeof (*reply),
                                    lustre_swab_ldlm_reply);
         if (reply == NULL) {
                 CERROR("Can't unpack ldlm_reply\n");
                 GOTO (out, rc = -EPROTO);
         }
         reply = lustre_swab_repbuf(req, 0, sizeof (*reply),
                                    lustre_swab_ldlm_reply);
         if (reply == NULL) {
                 CERROR("Can't unpack ldlm_reply\n");
                 GOTO (out, rc = -EPROTO);
         }
-        
+
         memcpy(&lock->l_remote_handle, &reply->lock_handle,
                sizeof(lock->l_remote_handle));
         LDLM_DEBUG(lock, "replayed lock:");
         memcpy(&lock->l_remote_handle, &reply->lock_handle,
                sizeof(lock->l_remote_handle));
         LDLM_DEBUG(lock, "replayed lock:");
@@ -901,7 +918,7 @@ int ldlm_replay_locks(struct obd_import *imp)
         struct list_head list, *pos, *next;
         struct ldlm_lock *lock;
         int rc = 0;
         struct list_head list, *pos, *next;
         struct ldlm_lock *lock;
         int rc = 0;
-        
+
         ENTRY;
         INIT_LIST_HEAD(&list);
 
         ENTRY;
         INIT_LIST_HEAD(&list);
 
index 84fdecc..4449c79 100644 (file)
@@ -114,12 +114,10 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
         if (!ns)
                 RETURN(NULL);
 
         if (!ns)
                 RETURN(NULL);
 
-        ns->ns_hash = vmalloc(sizeof(*ns->ns_hash) * RES_HASH_SIZE);
+        OBD_VMALLOC(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
         if (!ns->ns_hash)
                 GOTO(out_ns, NULL);
 
         if (!ns->ns_hash)
                 GOTO(out_ns, NULL);
 
-        atomic_add(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
-
         OBD_ALLOC(ns->ns_name, strlen(name) + 1);
         if (!ns->ns_name)
                 GOTO(out_hash, NULL);
         OBD_ALLOC(ns->ns_name, strlen(name) + 1);
         if (!ns->ns_name)
                 GOTO(out_hash, NULL);
@@ -152,8 +150,7 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
 
 out_hash:
         POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
 
 out_hash:
         POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
-        vfree(ns->ns_hash);
-        atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
+        OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
 out_ns:
         OBD_FREE(ns, sizeof(*ns));
         return NULL;
 out_ns:
         OBD_FREE(ns, sizeof(*ns));
         return NULL;
@@ -186,6 +183,7 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
                         lock->l_flags |= LDLM_FL_CBPENDING;
                         /* ... without sending a CANCEL message. */
                         lock->l_flags |= LDLM_FL_LOCAL_ONLY;
                         lock->l_flags |= LDLM_FL_CBPENDING;
                         /* ... without sending a CANCEL message. */
                         lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+                        LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
                         /* ... and without calling the cancellation callback */
                         lock->l_flags |= LDLM_FL_CANCEL;
                         LDLM_LOCK_PUT(lock);
                         /* ... and without calling the cancellation callback */
                         lock->l_flags |= LDLM_FL_CANCEL;
                         LDLM_LOCK_PUT(lock);
@@ -272,8 +270,7 @@ int ldlm_namespace_free(struct ldlm_namespace *ns)
         ldlm_namespace_cleanup(ns, 0);
 
         POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
         ldlm_namespace_cleanup(ns, 0);
 
         POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
-        vfree(ns->ns_hash /* , sizeof(*ns->ns_hash) * RES_HASH_SIZE */);
-        atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
+        OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
         OBD_FREE(ns->ns_name, strlen(ns->ns_name) + 1);
         OBD_FREE(ns, sizeof(*ns));
 
         OBD_FREE(ns->ns_name, strlen(ns->ns_name) + 1);
         OBD_FREE(ns, sizeof(*ns));
 
index 8344af5..88af047 100644 (file)
@@ -145,7 +145,7 @@ int llu_create(struct inode *dir, struct pnode_base *pnode, int mode)
 
         it = dentry->d_it;
 
 
         it = dentry->d_it;
 
-        rc = ll_it_open_error(IT_OPEN_CREATE, it);
+        rc = ll_it_open_error(DISP_OPEN_CREATE, it);
         if (rc) {
                 LL_GET_INTENT(dentry, it);
                 ptlrpc_req_finished(it->it_data);
         if (rc) {
                 LL_GET_INTENT(dentry, it);
                 ptlrpc_req_finished(it->it_data);
@@ -317,7 +317,7 @@ static int llu_file_open(struct inode *inode)
 #if 0
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
         LL_GET_INTENT(file->f_dentry, it);
 #if 0
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
         LL_GET_INTENT(file->f_dentry, it);
-        rc = ll_it_open_error(IT_OPEN_OPEN, it);
+        rc = ll_it_open_error(DISP_OPEN_OPEN, it);
         if (rc)
                 RETURN(rc);
 #endif
         if (rc)
                 RETURN(rc);
 #endif
@@ -477,7 +477,7 @@ static int llu_file_release(struct inode *inode)
                 oa.o_id = lsm->lsm_object_id;
                 oa.o_mode = S_IFREG;
                 oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
                 oa.o_id = lsm->lsm_object_id;
                 oa.o_mode = S_IFREG;
                 oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
-                
+
                 memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE);
                 oa.o_valid |= OBD_MD_FLHANDLE;
 
                 memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE);
                 oa.o_valid |= OBD_MD_FLHANDLE;
 
index 0e88933..0939352 100644 (file)
@@ -715,7 +715,7 @@ llu_fsswop_mount(const char *source,
 
 /* XXX do we need this??
         memset(&osfs, 0, sizeof(osfs));
 
 /* XXX do we need this??
         memset(&osfs, 0, sizeof(osfs));
-        rc = obd_statfs(&sbi->ll_mdc_conn, &osfs);
+        rc = obd_statfs(class_conn2obd(&sbi->ll_mdc_conn),&osfs,jiffies-100*HZ);
 */
         /* fetch attr of root inode */
         err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid,
 */
         /* fetch attr of root inode */
         err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid,
@@ -765,9 +765,9 @@ out_inode:
 out_request:
         ptlrpc_req_finished(request);
 out_osc:
 out_request:
         ptlrpc_req_finished(request);
 out_osc:
-        obd_disconnect(&sbi->ll_osc_conn);
+        obd_disconnect(&sbi->ll_osc_conn, 0);
 out_mdc:
 out_mdc:
-        obd_disconnect(&sbi->ll_mdc_conn);
+        obd_disconnect(&sbi->ll_mdc_conn, 0);
 out_free:
         OBD_FREE(sbi, sizeof(*sbi));
         return err;
 out_free:
         OBD_FREE(sbi, sizeof(*sbi));
         return err;
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
 Makefile.in
 .deps
 TAGS
+.*.cmd
index b6fc501..9ef9b7f 100644 (file)
@@ -9,8 +9,8 @@ MODULE = llite
 modulefs_DATA = llite.o
 EXTRA_PROGRAMS = llite
 
 modulefs_DATA = llite.o
 EXTRA_PROGRAMS = llite
 
-llite_SOURCES = dcache.c commit_callback.c super.c rw.c iod.c super25.c
-llite_SOURCES += file.c dir.c sysctl.c symlink.c
-llite_SOURCES += namei.c lproc_llite.c llite_internal.h
+llite_SOURCES = dcache.c commit_callback.c  rw.c  super25.c
+llite_SOURCES += file.c dir.c sysctl.c symlink.c llite_lib.c
+llite_SOURCES += namei.c lproc_llite.c super.c iod.c llite_internal.h
 
 include $(top_srcdir)/Rules
 
 include $(top_srcdir)/Rules
index 0684968..8c55b3d 100644 (file)
 #include <linux/lustre_idl.h>
 #include <linux/lustre_dlm.h>
 
 #include <linux/lustre_idl.h>
 #include <linux/lustre_dlm.h>
 
+#include "llite_internal.h"
+
 /* should NOT be called with the dcache lock, see fs/dcache.c */
 /* should NOT be called with the dcache lock, see fs/dcache.c */
-void ll_release(struct dentry *de)
+static void ll_release(struct dentry *de)
 {
 {
+        struct ll_dentry_data *lld = ll_d2d(de);
         ENTRY;
         ENTRY;
+
+        LASSERT(lld->lld_cwd_count == 0);
+        LASSERT(lld->lld_mnt_count == 0);
         OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data));
         OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data));
-        EXIT;
-}
 
 
-int ll_delete(struct dentry *de)
-{
-        if (de->d_it != 0) {
-                CERROR("%s put dentry %p+%p with d_it %p\n", current->comm,
-                       de, de->d_fsdata, de->d_it);
-                LBUG();
-        }
-        return 0;
+        EXIT;
 }
 
 void ll_set_dd(struct dentry *de)
 }
 
 void ll_set_dd(struct dentry *de)
@@ -55,23 +52,20 @@ void ll_set_dd(struct dentry *de)
         LASSERT(de != NULL);
 
         lock_kernel();
         LASSERT(de != NULL);
 
         lock_kernel();
-
         if (de->d_fsdata == NULL) {
                 OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
         if (de->d_fsdata == NULL) {
                 OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
-                sema_init(&ll_d2d(de)->lld_it_sem, 1);
         }
         }
-
         unlock_kernel();
 
         EXIT;
 }
 
         unlock_kernel();
 
         EXIT;
 }
 
-void ll_intent_release(struct dentry *de, struct lookup_intent *it)
+void ll_intent_release(struct lookup_intent *it)
 {
         struct lustre_handle *handle;
         ENTRY;
 
 {
         struct lustre_handle *handle;
         ENTRY;
 
-        if (it->it_lock_mode) {
+        if (it->it_op && it->it_lock_mode) {
                 handle = (struct lustre_handle *)it->it_lock_handle;
                 CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
                        " from it %p\n",
                 handle = (struct lustre_handle *)it->it_lock_handle;
                 CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
                        " from it %p\n",
@@ -83,84 +77,73 @@ void ll_intent_release(struct dentry *de, struct lookup_intent *it)
                    lock (see bug 494) */
                 it->it_lock_mode = 0;
         }
                    lock (see bug 494) */
                 it->it_lock_mode = 0;
         }
+        it->it_magic = 0;
+        it->it_op_release = 0;
+        EXIT;
+}
 
 
-        if (!de->d_it || it->it_op == IT_RELEASED_MAGIC) {
-                EXIT;
+void ll_unhash_aliases(struct inode *inode)
+{
+        struct dentry *dentry = NULL;
+        struct list_head *tmp;
+        struct ll_sb_info *sbi;
+        ENTRY;
+
+        if (inode == NULL) {
+                CERROR("unexpected NULL inode, tell phil\n");
                 return;
         }
 
                 return;
         }
 
-        if (de->d_it == it)
-                LL_GET_INTENT(de, it);
-        else
-                CDEBUG(D_INODE, "STRANGE intent release: %p %p\n",
-                       de->d_it, it);
+        sbi = ll_i2sbi(inode);
+
+        CDEBUG(D_INODE, "marking dentries for ino %lx/%x invalid\n",
+               inode->i_ino, inode->i_generation);
 
 
+        spin_lock(&dcache_lock);
+        list_for_each(tmp, &inode->i_dentry) {
+                dentry = list_entry(tmp, struct dentry, d_alias);
+
+                list_del_init(&dentry->d_hash);
+                dentry->d_flags |= DCACHE_LUSTRE_INVALID;
+                list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
+        }
+
+        spin_unlock(&dcache_lock);
         EXIT;
 }
 
 extern struct dentry *ll_find_alias(struct inode *, struct dentry *);
 
         EXIT;
 }
 
 extern struct dentry *ll_find_alias(struct inode *, struct dentry *);
 
-static int revalidate2_finish(int flag, struct ptlrpc_request *request,
+static int revalidate_it_finish(struct ptlrpc_request *request,
                               struct inode *parent, struct dentry **de,
                               struct lookup_intent *it, int offset, obd_id ino)
 {
         struct ll_sb_info     *sbi = ll_i2sbi(parent);
                               struct inode *parent, struct dentry **de,
                               struct lookup_intent *it, int offset, obd_id ino)
 {
         struct ll_sb_info     *sbi = ll_i2sbi(parent);
-        struct mds_body       *body;
-        struct lov_stripe_md  *lsm = NULL;
-        struct lov_mds_md     *lmm;
-        int                    lmmsize;
+        struct lustre_md      md;
         int                    rc = 0;
         ENTRY;
 
         /* NB 1 request reference will be taken away by ll_intent_lock()
          * when I return */
 
         int                    rc = 0;
         ENTRY;
 
         /* NB 1 request reference will be taken away by ll_intent_lock()
          * when I return */
 
-        if ((flag & LL_LOOKUP_NEGATIVE) != 0)
-                GOTO (out, rc = -ENOENT);
+        if (it_disposition(it, DISP_LOOKUP_NEG))
+                RETURN(-ENOENT);
 
 
-        /* We only get called if the mdc_enqueue() called from
-         * ll_intent_lock() was successful.  Therefore the mds_body is
-         * present and correct, and the eadata is present (but still
-         * opaque, so only obd_unpackmd() can check the size) */
-        body = lustre_msg_buf(request->rq_repmsg, offset, sizeof (*body));
-        LASSERT (body != NULL);
-        LASSERT_REPSWABBED (request, offset);
+        /* ll_intent_lock was successful, now prepare the lustre_md) */
+        rc = mdc_req2lustre_md(request, offset, &sbi->ll_osc_conn, &md);
+        if (rc)
+                RETURN(rc);
 
 
-        if (body->valid & OBD_MD_FLEASIZE) {
-                /* Only bother with this if inodes's LSM not set? */
-
-                if (body->eadatasize == 0) {
-                        CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n");
-                        GOTO (out, rc = -EPROTO);
-                }
-                lmmsize = body->eadatasize;
-                lmm = lustre_msg_buf (request->rq_repmsg, offset + 1, lmmsize);
-                LASSERT (lmm != NULL);
-                LASSERT_REPSWABBED (request, offset + 1);
-
-                rc = obd_unpackmd (&sbi->ll_osc_conn,
-                                   &lsm, lmm, lmmsize);
-                if (rc < 0) {
-                        CERROR ("Error %d unpacking eadata\n", rc);
-                        LBUG();
-                        /* XXX don't know if I should do this... */
-                        GOTO (out, rc);
-                        /* or skip the ll_update_inode but still do
-                         * mdc_lock_set_inode() */
-                }
-                LASSERT (rc >= sizeof (*lsm));
-                rc = 0;
-        }
+        ll_update_inode((*de)->d_inode, md.body, md.lsm);
 
 
-        ll_update_inode((*de)->d_inode, body, lsm);
+        if (md.lsm != NULL && ll_i2info((*de)->d_inode)->lli_smd != md.lsm)
+                obd_free_memmd (&sbi->ll_osc_conn, &md.lsm);
 
 
-        if (lsm != NULL &&
-            ll_i2info((*de)->d_inode)->lli_smd != lsm)
-                obd_free_memmd (&sbi->ll_osc_conn, &lsm);
-
-        ll_mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle,
-                              (*de)->d_inode);
- out:
+        CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+               (*de)->d_inode, (*de)->d_inode->i_ino,
+               (*de)->d_inode->i_generation);
+        ldlm_lock_set_data((struct lustre_handle *)it->it_lock_handle,
+                           (*de)->d_inode);
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
@@ -197,20 +180,26 @@ int ll_have_md_lock(struct dentry *de)
         RETURN(0);
 }
 
         RETURN(0);
 }
 
-int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
+int ll_revalidate_it(struct dentry *de, int flags, struct lookup_intent *it)
 {
         int rc;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
                LL_IT2STR(it));
 
 {
         int rc;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
                LL_IT2STR(it));
 
-        /* We don't want to cache negative dentries, so return 0 immediately.
-         * We believe that this is safe, that negative dentries cannot be
-         * pinned by someone else */
-        if (de->d_inode == NULL) {
-                CDEBUG(D_INODE, "negative dentry: ret 0 to force lookup2\n");
+        /* Cached negative dentries are unsafe for now - look them up again */
+        if (de->d_inode == NULL)
                 RETURN(0);
                 RETURN(0);
-        }
+
+        /* 
+         * never execute intents for mount points
+         * - attrs will be fixed up in ll_revalidate_inode
+         */
+        if (d_mountpoint(de))
+                RETURN(1);
+
+        if (it)
+                it->it_op_release = ll_intent_release;
 
         if (it == NULL || it->it_op == IT_GETATTR) {
                 /* We could just return 1 immediately, but since we should only
 
         if (it == NULL || it->it_op == IT_GETATTR) {
                 /* We could just return 1 immediately, but since we should only
@@ -233,7 +222,6 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
                                 memcpy(it->it_lock_handle, &lockh,
                                        sizeof(lockh));
                                 it->it_lock_mode = LCK_PR;
                                 memcpy(it->it_lock_handle, &lockh,
                                        sizeof(lockh));
                                 it->it_lock_mode = LCK_PR;
-                                LL_SAVE_INTENT(de, it);
                         } else {
                                 ldlm_lock_decref(&lockh, LCK_PR);
                         }
                         } else {
                                 ldlm_lock_decref(&lockh, LCK_PR);
                         }
@@ -248,7 +236,6 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
                                 memcpy(it->it_lock_handle, &lockh,
                                        sizeof(lockh));
                                 it->it_lock_mode = LCK_PW;
                                 memcpy(it->it_lock_handle, &lockh,
                                        sizeof(lockh));
                                 it->it_lock_mode = LCK_PW;
-                                LL_SAVE_INTENT(de, it);
                         } else {
                                 ldlm_lock_decref(&lockh, LCK_PW);
                         }
                         } else {
                                 ldlm_lock_decref(&lockh, LCK_PW);
                         }
@@ -256,31 +243,123 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
                 }
                 if (S_ISDIR(de->d_inode->i_mode))
                         ll_invalidate_inode_pages(de->d_inode);
                 }
                 if (S_ISDIR(de->d_inode->i_mode))
                         ll_invalidate_inode_pages(de->d_inode);
-                d_unhash_aliases(de->d_inode);
+                ll_unhash_aliases(de->d_inode);
                 RETURN(0);
         }
 
                 RETURN(0);
         }
 
-        rc = ll_intent_lock(de->d_parent->d_inode, &de, it, revalidate2_finish);
+        rc = ll_intent_lock(de->d_parent->d_inode, &de, it, flags,
+                            revalidate_it_finish);
         if (rc < 0) {
                 if (rc != -ESTALE) {
                         CERROR("ll_intent_lock: rc %d : it->it_status %d\n", rc,
                                it->it_status);
                 }
         if (rc < 0) {
                 if (rc != -ESTALE) {
                         CERROR("ll_intent_lock: rc %d : it->it_status %d\n", rc,
                                it->it_status);
                 }
+                ll_unhash_aliases(de->d_inode);
                 RETURN(0);
         }
         /* unfortunately ll_intent_lock may cause a callback and revoke our
            dentry */
         spin_lock(&dcache_lock);
                 RETURN(0);
         }
         /* unfortunately ll_intent_lock may cause a callback and revoke our
            dentry */
         spin_lock(&dcache_lock);
-        list_del_init(&de->d_hash);
+        hlist_del_init(&de->d_hash);
         __d_rehash(de, 0);
         spin_unlock(&dcache_lock);
 
         RETURN(1);
 }
 
         __d_rehash(de, 0);
         spin_unlock(&dcache_lock);
 
         RETURN(1);
 }
 
+static void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
+{
+        struct inode *inode= de->d_inode;
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        struct ll_dentry_data *ldd = ll_d2d(de);
+        struct obd_client_handle *handle;
+        int rc = 0;
+        ENTRY;
+        LASSERT(ldd);
+
+        lock_kernel();
+        /* Strictly speaking this introduces an additional race: the
+         * increments should wait until the rpc has returned.
+         * However, given that at present the function is void, this
+         * issue is moot. */
+        if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
+                unlock_kernel();
+                EXIT;
+                return;
+        }
+
+        if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
+                unlock_kernel();
+                EXIT;
+                return;
+        }
+        unlock_kernel();
+
+        handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
+        rc = obd_pin(&sbi->ll_mdc_conn, inode->i_ino, inode->i_generation,
+                     inode->i_mode & S_IFMT, handle, flag);
+
+        if (rc) {
+                lock_kernel();
+                memset(handle, 0, sizeof(*handle));
+                if (flag == 0)
+                        ldd->lld_cwd_count--;
+                else
+                        ldd->lld_mnt_count--;
+                unlock_kernel();
+        }
+
+        EXIT;
+        return;
+}
+
+static void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
+{
+        struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
+        struct ll_dentry_data *ldd = ll_d2d(de);
+        struct obd_client_handle handle;
+        int count, rc = 0;
+        ENTRY;
+        LASSERT(ldd);
+
+        lock_kernel();
+        /* Strictly speaking this introduces an additional race: the
+         * increments should wait until the rpc has returned.
+         * However, given that at present the function is void, this
+         * issue is moot. */
+        handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
+        if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
+                /* the "pin" failed */
+                unlock_kernel();
+                EXIT;
+                return;
+        }
+
+        if (flag)
+                count = --ldd->lld_mnt_count;
+        else
+                count = --ldd->lld_cwd_count;
+        unlock_kernel();
+
+        if (count != 0) {
+                EXIT;
+                return;
+        }
+
+        rc = obd_unpin(&sbi->ll_mdc_conn, &handle, flag);
+        EXIT;
+        return;
+}
+
 struct dentry_operations ll_d_ops = {
 struct dentry_operations ll_d_ops = {
-        .d_revalidate2 = ll_revalidate2,
-        .d_intent_release = ll_intent_release,
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+        .d_revalidate_nd = ll_revalidate_nd,
+#else
+        .d_revalidate_it = ll_revalidate_it,
+#endif
         .d_release = ll_release,
         .d_release = ll_release,
-        .d_delete = ll_delete,
+#if 0
+        .d_pin = ll_pin,
+        .d_unpin = ll_unpin,
+#endif
 };
 };
index 115ed4e..a81a7d4 100644 (file)
@@ -54,14 +54,6 @@ typedef struct ext2_dir_entry_2 ext2_dirent;
 #define PageChecked(page)        test_bit(PG_checked, &(page)->flags)
 #define SetPageChecked(page)     set_bit(PG_checked, &(page)->flags)
 
 #define PageChecked(page)        test_bit(PG_checked, &(page)->flags)
 #define SetPageChecked(page)     set_bit(PG_checked, &(page)->flags)
 
-
-static int ll_dir_prepare_write(struct file *file, struct page *page,
-                                unsigned from, unsigned to)
-{
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-        return 0;
-}
-
 /* returns the page unlocked, but with a reference */
 static int ll_dir_readpage(struct file *file, struct page *page)
 {
 /* returns the page unlocked, but with a reference */
 static int ll_dir_readpage(struct file *file, struct page *page)
 {
@@ -98,7 +90,7 @@ static int ll_dir_readpage(struct file *file, struct page *page)
                              &lockh);
         if (!rc) {
                 ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0);
                              &lockh);
         if (!rc) {
                 ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0);
-                
+
                 rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_PR,
                                  &data, &lockh, NULL, 0,
                                  ldlm_completion_ast, ll_mdc_blocking_ast,
                 rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_PR,
                                  &data, &lockh, NULL, 0,
                                  ldlm_completion_ast, ll_mdc_blocking_ast,
@@ -137,39 +129,14 @@ static int ll_dir_readpage(struct file *file, struct page *page)
                 SetPageUptodate(page);
 
         unlock_page(page);
                 SetPageUptodate(page);
 
         unlock_page(page);
-        ll_unlock(LCK_PR, &lockh);
-        if (rc != ELDLM_OK)
-                CERROR("ll_unlock: err: %d\n", rc);
+        ldlm_lock_decref(&lockh, LCK_PR);
         return rc;
 }
 
 struct address_space_operations ll_dir_aops = {
         readpage: ll_dir_readpage,
         return rc;
 }
 
 struct address_space_operations ll_dir_aops = {
         readpage: ll_dir_readpage,
-        prepare_write: ll_dir_prepare_write
 };
 
 };
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3))
-int waitfor_one_page(struct page *page)
-{
-        int error = 0;
-        struct buffer_head *bh, *head = page->buffers;
-
-        bh = head;
-        do {
-                wait_on_buffer(bh);
-                if (buffer_req(bh) && !buffer_uptodate(bh))
-                        error = -EIO;
-        } while ((bh = bh->b_this_page) != head);
-        return error;
-}
-#elif (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-int waitfor_one_page(struct page *page)
-{
-        wait_on_page_locked(page);
-        return 0;
-}
-#endif
-
 /*
  * ext2 uses block-sized chunks. Arguably, sector-sized ones would be
  * more robust, but we have what we have
 /*
  * ext2 uses block-sized chunks. Arguably, sector-sized ones would be
  * more robust, but we have what we have
@@ -190,27 +157,6 @@ static inline unsigned long dir_pages(struct inode *inode)
         return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
 }
 
         return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
 }
 
-extern void set_page_clean(struct page *page);
-
-static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to)
-{
-        struct inode *dir = page->mapping->host;
-        loff_t new_size = (page->index << PAGE_CACHE_SHIFT) + to;
-        int err = 0;
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        dir->i_version = ++event;
-#endif
-        if (new_size > dir->i_size)
-                dir->i_size = new_size;
-        SetPageUptodate(page);
-        set_page_clean(page);
-
-        //page->mapping->a_ops->commit_write(NULL, page, from, to);
-        //if (IS_SYNC(dir))
-        //      err = waitfor_one_page(page);
-        return err;
-}
 
 static void ext2_check_page(struct page *page)
 {
 
 static void ext2_check_page(struct page *page)
 {
@@ -324,20 +270,6 @@ fail:
         return ERR_PTR(-EIO);
 }
 
         return ERR_PTR(-EIO);
 }
 
-/*
- * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure.
- *
- * len <= EXT2_NAME_LEN and de != NULL are guaranteed by caller.
- */
-static inline int ext2_match (int len, const char * const name,
-                                        struct ext2_dir_entry_2 * de)
-{
-        if (len != de->name_len)
-                return 0;
-        if (!de->inode)
-                return 0;
-        return !memcmp(name, de->name, len);
-}
 
 /*
  * p is at least 6 bytes before the end of page
 
 /*
  * p is at least 6 bytes before the end of page
@@ -368,33 +300,6 @@ static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
         [EXT2_FT_SYMLINK]       DT_LNK,
 };
 
         [EXT2_FT_SYMLINK]       DT_LNK,
 };
 
-static unsigned int ll_dt2fmt[DT_WHT + 1] = {
-        [EXT2_FT_UNKNOWN]       0,
-        [EXT2_FT_REG_FILE]      S_IFREG,
-        [EXT2_FT_DIR]           S_IFDIR,
-        [EXT2_FT_CHRDEV]        S_IFCHR,
-        [EXT2_FT_BLKDEV]        S_IFBLK,
-        [EXT2_FT_FIFO]          S_IFIFO,
-        [EXT2_FT_SOCK]          S_IFSOCK,
-        [EXT2_FT_SYMLINK]       S_IFLNK
-};
-
-#define S_SHIFT 12
-static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = {
-        [S_IFREG >> S_SHIFT]    EXT2_FT_REG_FILE,
-        [S_IFDIR >> S_SHIFT]    EXT2_FT_DIR,
-        [S_IFCHR >> S_SHIFT]    EXT2_FT_CHRDEV,
-        [S_IFBLK >> S_SHIFT]    EXT2_FT_BLKDEV,
-        [S_IFIFO >> S_SHIFT]    EXT2_FT_FIFO,
-        [S_IFSOCK >> S_SHIFT]   EXT2_FT_SOCK,
-        [S_IFLNK >> S_SHIFT]    EXT2_FT_SYMLINK,
-};
-
-static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode)
-{
-        mode_t mode = inode->i_mode;
-        de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
-}
 
 int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
 
 int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
@@ -437,7 +342,7 @@ int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
                 }
                 de = (ext2_dirent *)(kaddr+offset);
                 limit = kaddr + PAGE_CACHE_SIZE - EXT2_DIR_REC_LEN(1);
                 }
                 de = (ext2_dirent *)(kaddr+offset);
                 limit = kaddr + PAGE_CACHE_SIZE - EXT2_DIR_REC_LEN(1);
-                for ( ;(char*)de <= limit; de = ext2_next_entry(de))
+                for ( ;(char*)de <= limit; de = ext2_next_entry(de)) {
                         if (de->inode) {
                                 int over;
                                 unsigned char d_type = DT_UNKNOWN;
                         if (de->inode) {
                                 int over;
                                 unsigned char d_type = DT_UNKNOWN;
@@ -454,334 +359,31 @@ int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
                                         GOTO(done,0);
                                 }
                         }
                                         GOTO(done,0);
                                 }
                         }
+                }
                 ext2_put_page(page);
         }
 
 done:
         filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset;
         filp->f_version = inode->i_version;
                 ext2_put_page(page);
         }
 
 done:
         filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset;
         filp->f_version = inode->i_version;
-        UPDATE_ATIME(inode);
+        update_atime(inode);
         RETURN(0);
 }
 
         RETURN(0);
 }
 
-/*
- *      ext2_find_entry()
- *
- * finds an entry in the specified directory with the wanted name. It
- * returns the page in which the entry was found, and the entry itself
- * (as a parameter - res_dir). Page is returned mapped and unlocked.
- * Entry is guaranteed to be valid.
- */
-struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
-                        struct dentry *dentry, struct page ** res_page)
-{
-        const char *name = dentry->d_name.name;
-        int namelen = dentry->d_name.len;
-        unsigned reclen = EXT2_DIR_REC_LEN(namelen);
-        unsigned long start, n;
-        unsigned long npages = dir_pages(dir);
-        struct page *page = NULL;
-        ext2_dirent * de;
-
-        /* OFFSET_CACHE */
-        *res_page = NULL;
-
-        //      start = dir->u.ext2_i.i_dir_start_lookup;
-        start = 0;
-        if (start >= npages)
-                start = 0;
-        n = start;
-        do {
-                char *kaddr;
-                page = ll_get_dir_page(dir, n);
-                if (!IS_ERR(page)) {
-                        kaddr = page_address(page);
-                        de = (ext2_dirent *) kaddr;
-                        kaddr += PAGE_CACHE_SIZE - reclen;
-                        while ((char *) de <= kaddr) {
-                                if (ext2_match (namelen, name, de))
-                                        goto found;
-                                de = ext2_next_entry(de);
-                        }
-                        ext2_put_page(page);
-                }
-                if (++n >= npages)
-                        n = 0;
-        } while (n != start);
-        return NULL;
-
-found:
-        *res_page = page;
-        //      dir->u.ext2_i.i_dir_start_lookup = n;
-        return de;
-}
-
-struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p)
-{
-        struct page *page = ll_get_dir_page(dir, 0);
-        ext2_dirent *de = NULL;
-
-        if (!IS_ERR(page)) {
-                de = ext2_next_entry((ext2_dirent *) page_address(page));
-                *p = page;
-        }
-        return de;
-}
-
-obd_id ll_inode_by_name(struct inode * dir, struct dentry *dentry, int *type)
-{
-        obd_id res = 0;
-        struct ext2_dir_entry_2 * de;
-        struct page *page;
-
-        de = ext2_find_entry (dir, dentry, &page);
-        if (de) {
-                res = le32_to_cpu(de->inode);
-                *type = ll_dt2fmt[de->file_type];
-                kunmap(page);
-                page_cache_release(page);
-        }
-        return res;
-}
-
-/* Releases the page */
-void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
-                        struct page *page, struct inode *inode)
-{
-        unsigned from = (char *) de - (char *) page_address(page);
-        unsigned to = from + le16_to_cpu(de->rec_len);
-        int err;
-
-        lock_page(page);
-        err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
-        if (err)
-                LBUG();
-        de->inode = cpu_to_le32(inode->i_ino);
-        ext2_set_de_type (de, inode);
-        dir->i_mtime = dir->i_ctime = CURRENT_TIME;
-        err = ext2_commit_chunk(page, from, to);
-        unlock_page(page);
-        ext2_put_page(page);
-}
-
-/*
- *      Parent is locked.
- */
-int ll_add_link (struct dentry *dentry, struct inode *inode)
-{
-        struct inode *dir = dentry->d_parent->d_inode;
-        const char *name = dentry->d_name.name;
-        int namelen = dentry->d_name.len;
-        unsigned reclen = EXT2_DIR_REC_LEN(namelen);
-        unsigned short rec_len, name_len;
-        struct page *page = NULL;
-        ext2_dirent * de;
-        unsigned long npages = dir_pages(dir);
-        unsigned long n;
-        char *kaddr;
-        unsigned from, to;
-        int err;
-
-        /* We take care of directory expansion in the same loop */
-        for (n = 0; n <= npages; n++) {
-                page = ll_get_dir_page(dir, n);
-                err = PTR_ERR(page);
-                if (IS_ERR(page))
-                        goto out;
-                kaddr = page_address(page);
-                de = (ext2_dirent *)kaddr;
-                kaddr += PAGE_CACHE_SIZE - reclen;
-                while ((char *)de <= kaddr) {
-                        err = -EEXIST;
-                        if (ext2_match (namelen, name, de))
-                                goto out_page;
-                        name_len = EXT2_DIR_REC_LEN(de->name_len);
-                        rec_len = le16_to_cpu(de->rec_len);
-                        if ( n==npages && rec_len == 0) {
-                                CERROR("Fatal dir behaviour\n");
-                                goto out_page;
-                        }
-                        if (!de->inode && rec_len >= reclen)
-                                goto got_it;
-                        if (rec_len >= name_len + reclen)
-                                goto got_it;
-                        de = (ext2_dirent *) ((char *) de + rec_len);
-                }
-                ext2_put_page(page);
-        }
-        LBUG();
-        return -EINVAL;
-
-got_it:
-        from = (char*)de - (char*)page_address(page);
-        to = from + rec_len;
-        lock_page(page);
-        err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
-        if (err)
-                goto out_unlock;
-        if (de->inode) {
-                ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
-                de1->rec_len = cpu_to_le16(rec_len - name_len);
-                de->rec_len = cpu_to_le16(name_len);
-                de = de1;
-        }
-        de->name_len = namelen;
-        memcpy (de->name, name, namelen);
-        de->inode = cpu_to_le32(inode->i_ino);
-        ext2_set_de_type (de, inode);
-        CDEBUG(D_INODE, "type set to %o\n", de->file_type);
-        dir->i_mtime = dir->i_ctime = CURRENT_TIME;
-        err = ext2_commit_chunk(page, from, to);
-
-        // change_inode happens with the commit_chunk
-        /* XXX OFFSET_CACHE */
-
-out_unlock:
-        unlock_page(page);
-out_page:
-        ext2_put_page(page);
-out:
-        return err;
-}
-
-/*
- * ext2_delete_entry deletes a directory entry by merging it with the
- * previous entry. Page is up-to-date. Releases the page.
- */
-int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
-{
-        struct address_space *mapping = page->mapping;
-        struct inode *inode = mapping->host;
-        char *kaddr = page_address(page);
-        unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
-        unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len);
-        ext2_dirent * pde = NULL;
-        ext2_dirent * de = (ext2_dirent *) (kaddr + from);
-        int err;
-
-        while ((char*)de < (char*)dir) {
-                pde = de;
-                de = ext2_next_entry(de);
-        }
-        if (pde)
-                from = (char*)pde - (char*)page_address(page);
-        lock_page(page);
-        err = mapping->a_ops->prepare_write(NULL, page, from, to);
-        if (err)
-                LBUG();
-        if (pde)
-                pde->rec_len = cpu_to_le16(to-from);
-        dir->inode = 0;
-        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-        err = ext2_commit_chunk(page, from, to);
-        unlock_page(page);
-        ext2_put_page(page);
-        return err;
-}
-
-/*
- * Set the first fragment of directory.
- */
-int ext2_make_empty(struct inode *inode, struct inode *parent)
-{
-        struct address_space *mapping = inode->i_mapping;
-        struct page *page = grab_cache_page(mapping, 0);
-        unsigned chunk_size = ext2_chunk_size(inode);
-        struct ext2_dir_entry_2 * de;
-        char *base;
-        int err;
-        ENTRY;
-
-        if (!page)
-                return -ENOMEM;
-        base = kmap(page);
-        if (!base)
-                return -ENOMEM;
-
-        err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size);
-        if (err)
-                goto fail;
-
-        de = (struct ext2_dir_entry_2 *) base;
-        de->name_len = 1;
-        de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
-        memcpy (de->name, ".\0\0", 4);
-        de->inode = cpu_to_le32(inode->i_ino);
-        ext2_set_de_type (de, inode);
-
-        de = (struct ext2_dir_entry_2 *) (base + EXT2_DIR_REC_LEN(1));
-        de->name_len = 2;
-        de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1));
-        de->inode = cpu_to_le32(parent->i_ino);
-        memcpy (de->name, "..\0", 4);
-        ext2_set_de_type (de, inode);
-
-        err = ext2_commit_chunk(page, 0, chunk_size);
-fail:
-        kunmap(page);
-        unlock_page(page);
-        page_cache_release(page);
-        ENTRY;
-        return err;
-}
-
-/*
- * routine to check that the specified directory is empty (for rmdir)
- */
-int ext2_empty_dir (struct inode * inode)
-{
-        struct page *page = NULL;
-        unsigned long i, npages = dir_pages(inode);
-
-        for (i = 0; i < npages; i++) {
-                char *kaddr;
-                ext2_dirent * de;
-                page = ll_get_dir_page(inode, i);
-
-                if (IS_ERR(page))
-                        continue;
-
-                kaddr = page_address(page);
-                de = (ext2_dirent *)kaddr;
-                kaddr += PAGE_CACHE_SIZE-EXT2_DIR_REC_LEN(1);
-
-                while ((char *)de <= kaddr) {
-                        if (de->inode != 0) {
-                                /* check for . and .. */
-                                if (de->name[0] != '.')
-                                        goto not_empty;
-                                if (de->name_len > 2)
-                                        goto not_empty;
-                                if (de->name_len < 2) {
-                                        if (de->inode !=
-                                            cpu_to_le32(inode->i_ino))
-                                                goto not_empty;
-                                } else if (de->name[1] != '.')
-                                        goto not_empty;
-                        }
-                        de = ext2_next_entry(de);
-                }
-                ext2_put_page(page);
-        }
-        return 1;
-
-not_empty:
-        ext2_put_page(page);
-        return 0;
-}
-
 static int ll_dir_ioctl(struct inode *inode, struct file *file,
                         unsigned int cmd, unsigned long arg)
 {
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         struct obd_ioctl_data *data;
         ENTRY;
 static int ll_dir_ioctl(struct inode *inode, struct file *file,
                         unsigned int cmd, unsigned long arg)
 {
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         struct obd_ioctl_data *data;
         ENTRY;
+
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino,
                inode->i_generation, inode, cmd);
 
         if (_IOC_TYPE(cmd) == 'T') /* tty ioctls */
                 return -ENOTTY;
 
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino,
                inode->i_generation, inode, cmd);
 
         if (_IOC_TYPE(cmd) == 'T') /* tty ioctls */
                 return -ENOTTY;
 
+        lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_IOCTL);
         switch(cmd) {
         case IOC_MDC_LOOKUP: {
                 struct ptlrpc_request *request = NULL;
         switch(cmd) {
         case IOC_MDC_LOOKUP: {
                 struct ptlrpc_request *request = NULL;
@@ -834,9 +436,61 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 obd_ioctl_freedata(buf, len);
                 return rc;
         }
                 obd_ioctl_freedata(buf, len);
                 return rc;
         }
-        default:
-                CERROR("unrecognized ioctl %#x\n", cmd);
+        case LL_IOC_LOV_SETSTRIPE:
+        case LL_IOC_LOV_GETSTRIPE:
                 RETURN(-ENOTTY);
                 RETURN(-ENOTTY);
+        case IOC_MDC_GETSTRIPE: {
+                struct ptlrpc_request *request = NULL;
+                struct ll_fid fid;
+                struct mds_body *body;
+                struct lov_mds_md *lmm;
+                char *filename;
+                int rc, lmmsize;
+
+                filename = getname((const char *)arg);
+                if (IS_ERR(filename))
+                        RETURN(PTR_ERR(filename));
+
+                ll_inode2fid(&fid, inode);
+                rc = mdc_getattr_name(&sbi->ll_mdc_conn, &fid, filename,
+                                      strlen(filename)+1, OBD_MD_FLEASIZE,
+                                      obd_size_diskmd(&sbi->ll_osc_conn, NULL),
+                                      &request);
+                if (rc < 0) {
+                        CERROR("mdc_getattr_name: failed on %s: rc %d\n",
+                               filename, rc);
+                        GOTO(out_name, rc);
+                }
+
+                body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body));
+                LASSERT(body != NULL);         /* checked by mdc_getattr_name */
+                LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */
+
+                lmmsize = body->eadatasize;
+
+                if (!(body->valid & OBD_MD_FLEASIZE) || lmmsize == 0)
+                        GOTO(out_req, rc = -ENODATA);
+
+                if (lmmsize > 4096)
+                        GOTO(out_req, rc = -EFBIG);
+
+                lmm = lustre_msg_buf(request->rq_repmsg, 1, lmmsize);
+                LASSERT(lmm != NULL);
+                LASSERT_REPSWABBED(request, 1);
+
+                rc = copy_to_user((struct lov_mds_md *)arg, lmm, lmmsize);
+                if (rc)
+                        GOTO(out_req, rc = -EFAULT);
+
+                EXIT;
+        out_req:
+                ptlrpc_req_finished(request);
+        out_name:
+                putname(filename);
+                return rc;
+        }
+        default:
+                return obd_iocontrol(cmd,&sbi->ll_osc_conn,0,NULL,(void *)arg);
         }
 }
 
         }
 }
 
index 943ba1b..67d18fd 100644 (file)
@@ -32,8 +32,7 @@
 #include <linux/lustre_compat25.h>
 #endif
 
 #include <linux/lustre_compat25.h>
 #endif
 
-int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
-extern int ll_setattr(struct dentry *de, struct iattr *attr);
+#include "llite_internal.h"
 
 static int ll_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode,
                         struct file *file)
 
 static int ll_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode,
                         struct file *file)
@@ -135,28 +134,21 @@ int ll_file_release(struct inode *inode, struct file *file)
         lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_RELEASE);
         fd = (struct ll_file_data *)file->private_data;
         if (!fd) /* no process opened the file after an mcreate */
         lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_RELEASE);
         fd = (struct ll_file_data *)file->private_data;
         if (!fd) /* no process opened the file after an mcreate */
-                RETURN(rc = 0);
+                RETURN(0);
 
         /* we might not be able to get a valid handle on this file
          * again so we really want to flush our write cache.. */
 
         /* we might not be able to get a valid handle on this file
          * again so we really want to flush our write cache.. */
-        if (S_ISREG(inode->i_mode)) {
-                filemap_fdatasync(inode->i_mapping);
-                filemap_fdatawait(inode->i_mapping);
-
-                if (lsm != NULL) {
-                        memset(&oa, 0, sizeof(oa));
-                        oa.o_id = lsm->lsm_object_id;
-                        oa.o_mode = S_IFREG;
-                        oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
-
-                        memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE);
-                        oa.o_valid |= OBD_MD_FLHANDLE;
+        if (S_ISREG(inode->i_mode) && lsm) {
+                write_inode_now(inode, 0);
+                obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+                                            OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+                memcpy(obdo_handle(&oa), &fd->fd_ost_och, FD_OSTDATA_SIZE);
+                oa.o_valid |= OBD_MD_FLHANDLE;
 
 
-                        rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
-                        if (rc)
-                                CERROR("inode %lu object close failed: rc = "
-                                       "%d\n", inode->i_ino, rc);
-                }
+                rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
+                if (rc)
+                        CERROR("inode %lu object close failed: rc %d\n",
+                               inode->i_ino, rc);
         }
 
         rc2 = ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
         }
 
         rc2 = ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
@@ -206,16 +198,16 @@ static int ll_osc_open(struct lustre_handle *conn, struct inode *inode,
                 RETURN(-ENOMEM);
         oa->o_id = lsm->lsm_object_id;
         oa->o_mode = S_IFREG;
                 RETURN(-ENOMEM);
         oa->o_id = lsm->lsm_object_id;
         oa->o_mode = S_IFREG;
-        oa->o_valid = (OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLBLOCKS |
-                       OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
         rc = obd_open(conn, oa, lsm, NULL, &fd->fd_ost_och);
         if (rc)
                 GOTO(out, rc);
 
         file->f_flags &= ~O_LOV_DELAY_CREATE;
         rc = obd_open(conn, oa, lsm, NULL, &fd->fd_ost_och);
         if (rc)
                 GOTO(out, rc);
 
         file->f_flags &= ~O_LOV_DELAY_CREATE;
-        obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
-                                 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-
+        obdo_refresh_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
+                                      OBD_MD_FLATIME | OBD_MD_FLMTIME |
+                                      OBD_MD_FLCTIME);
         EXIT;
 out:
         obdo_free(oa);
         EXIT;
 out:
         obdo_free(oa);
@@ -236,24 +228,33 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode,
         struct obdo *oa;
         struct iattr iattr;
         struct mdc_op_data op_data;
         struct obdo *oa;
         struct iattr iattr;
         struct mdc_op_data op_data;
-        int rc, err, lmm_size = 0;;
+        struct obd_trans_info oti = { 0 };
+        int rc, err, lmm_size = 0;
         ENTRY;
 
         oa = obdo_alloc();
         if (!oa)
                 RETURN(-ENOMEM);
 
         ENTRY;
 
         oa = obdo_alloc();
         if (!oa)
                 RETURN(-ENOMEM);
 
+        LASSERT(S_ISREG(inode->i_mode));
         oa->o_mode = S_IFREG | 0600;
         oa->o_id = inode->i_ino;
         oa->o_mode = S_IFREG | 0600;
         oa->o_id = inode->i_ino;
+        oa->o_generation = inode->i_generation;
         /* Keep these 0 for now, because chown/chgrp does not change the
          * ownership on the OST, and we don't want to allow BA OST NFS
          * users to access these objects by mistake. */
         oa->o_uid = 0;
         oa->o_gid = 0;
         /* Keep these 0 for now, because chown/chgrp does not change the
          * ownership on the OST, and we don't want to allow BA OST NFS
          * users to access these objects by mistake. */
         oa->o_uid = 0;
         oa->o_gid = 0;
-        oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
-                OBD_MD_FLUID | OBD_MD_FLGID;
+        oa->o_valid = OBD_MD_FLID | OBD_MD_FLGENER | OBD_MD_FLTYPE |
+                OBD_MD_FLMODE | OBD_MD_FLUID | OBD_MD_FLGID;
+#ifdef ENABLE_ORPHANS
+        oa->o_valid |= OBD_MD_FLCOOKIE;
+#endif
 
 
-        rc = obd_create(conn, oa, &lsm, NULL);
+        obdo_from_inode(oa, inode, OBD_MD_FLTYPE|OBD_MD_FLATIME|OBD_MD_FLMTIME|
+                        OBD_MD_FLCTIME | (inode->i_size ? OBD_MD_FLSIZE : 0));
+
+        rc = obd_create(conn, oa, &lsm, &oti);
         if (rc) {
                 CERROR("error creating objects for inode %lu: rc = %d\n",
                        inode->i_ino, rc);
         if (rc) {
                 CERROR("error creating objects for inode %lu: rc = %d\n",
                        inode->i_ino, rc);
@@ -263,7 +264,7 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode,
                 }
                 GOTO(out_oa, rc);
         }
                 }
                 GOTO(out_oa, rc);
         }
-        obdo_to_inode(inode, oa, OBD_MD_FLBLKSZ);
+        obdo_refresh_inode(inode, oa, OBD_MD_FLBLKSZ);
 
         LASSERT(lsm && lsm->lsm_object_id);
         rc = obd_packmd(conn, &lmm, lsm);
 
         LASSERT(lsm && lsm->lsm_object_id);
         rc = obd_packmd(conn, &lmm, lsm);
@@ -278,11 +279,18 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode,
 
         ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
 
 
         ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
 
-        rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, &op_data,
-                         &iattr, lmm, lmm_size, &req);
+#if 0
+#warning FIXME: next line is for debugging purposes only
+        obd_log_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, oti.oti_numcookies,
+                       oti.oti_logcookies, OBD_LLOG_FL_SENDNOW);
+#endif
+
+        rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, &op_data, &iattr,
+                         lmm, lmm_size, oti.oti_logcookies,
+                         oti.oti_numcookies * sizeof(oti.oti_onecookie), &req);
         ptlrpc_req_finished(req);
 
         ptlrpc_req_finished(req);
 
-        obd_free_diskmd (conn, &lmm);
+        obd_free_diskmd(conn, &lmm);
 
         /* If we couldn't complete mdc_open() and store the stripe MD on the
          * MDS, we need to destroy the objects now or they will be leaked.
 
         /* If we couldn't complete mdc_open() and store the stripe MD on the
          * MDS, we need to destroy the objects now or they will be leaked.
@@ -297,13 +305,21 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode,
 
         EXIT;
 out_oa:
 
         EXIT;
 out_oa:
+        oti_free_cookies(&oti);
         obdo_free(oa);
         return rc;
 
 out_destroy:
         obdo_free(oa);
         return rc;
 
 out_destroy:
-        obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
         oa->o_id = lsm->lsm_object_id;
         oa->o_id = lsm->lsm_object_id;
-        oa->o_valid |= OBD_MD_FLID;
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
+#if 0
+        err = obd_log_cancel(conn, lsm, oti.oti_numcookies, oti.oti_logcookies,
+                             OBD_LLOG_FL_SENDNOW);
+        if (err)
+                CERROR("error cancelling inode %lu log cookies: rc %d\n",
+                       inode->i_ino, err);
+#endif
         err = obd_destroy(conn, oa, lsm, NULL);
         obd_free_memmd(conn, &lsm);
         if (err)
         err = obd_destroy(conn, oa, lsm, NULL);
         obd_free_memmd(conn, &lsm);
         if (err)
@@ -327,8 +343,6 @@ out_destroy:
  * before returning in the O_LOV_DELAY_CREATE case and dropping it here
  * or in ll_file_release(), but I'm not sure that is desirable/necessary.
  */
  * before returning in the O_LOV_DELAY_CREATE case and dropping it here
  * or in ll_file_release(), but I'm not sure that is desirable/necessary.
  */
-extern int ll_it_open_error(int phase, struct lookup_intent *it);
-
 int ll_file_open(struct inode *inode, struct file *file)
 {
         struct ll_sb_info *sbi = ll_i2sbi(inode);
 int ll_file_open(struct inode *inode, struct file *file)
 {
         struct ll_sb_info *sbi = ll_i2sbi(inode);
@@ -346,9 +360,10 @@ int ll_file_open(struct inode *inode, struct file *file)
         if (inode->i_sb->s_root == file->f_dentry)
                 RETURN(0);
 
         if (inode->i_sb->s_root == file->f_dentry)
                 RETURN(0);
 
+        it = file->f_it;
         lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
         lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
-        LL_GET_INTENT(file->f_dentry, it);
-        rc = ll_it_open_error(IT_OPEN_OPEN, it);
+
+        rc = ll_it_open_error(DISP_OPEN_OPEN, it);
         if (rc)
                 RETURN(rc);
 
         if (rc)
                 RETURN(rc);
 
@@ -363,7 +378,8 @@ int ll_file_open(struct inode *inode, struct file *file)
 
         lsm = lli->lli_smd;
         if (lsm == NULL) {
 
         lsm = lli->lli_smd;
         if (lsm == NULL) {
-                if (file->f_flags & O_LOV_DELAY_CREATE) {
+                if (file->f_flags & O_LOV_DELAY_CREATE ||
+                    !(file->f_mode & FMODE_WRITE)) {
                         CDEBUG(D_INODE, "delaying object creation\n");
                         RETURN(0);
                 }
                         CDEBUG(D_INODE, "delaying object creation\n");
                         RETURN(0);
                 }
@@ -418,7 +434,7 @@ int ll_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm,
                 OBD_MD_FLCTIME;
 
         if (ostdata != NULL) {
                 OBD_MD_FLCTIME;
 
         if (ostdata != NULL) {
-                memcpy(&oa.o_inline, ostdata, FD_OSTDATA_SIZE);
+                memcpy(obdo_handle(&oa), ostdata, FD_OSTDATA_SIZE);
                 oa.o_valid |= OBD_MD_FLHANDLE;
         }
 
                 oa.o_valid |= OBD_MD_FLHANDLE;
         }
 
@@ -455,8 +471,8 @@ int ll_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm,
                  (aft != 0 || after < before) &&
                  oa.o_size < ((u64)before + 1) << PAGE_CACHE_SHIFT);
 
                  (aft != 0 || after < before) &&
                  oa.o_size < ((u64)before + 1) << PAGE_CACHE_SHIFT);
 
-        obdo_to_inode(inode, &oa, (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
-                                   OBD_MD_FLMTIME | OBD_MD_FLCTIME));
+        obdo_refresh_inode(inode, &oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
+                                       OBD_MD_FLMTIME | OBD_MD_FLCTIME);
         if (inode->i_blksize < PAGE_CACHE_SIZE)
                 inode->i_blksize = PAGE_CACHE_SIZE;
 
         if (inode->i_blksize < PAGE_CACHE_SIZE)
                 inode->i_blksize = PAGE_CACHE_SIZE;
 
@@ -477,102 +493,6 @@ int ll_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm,
         RETURN(0);
 }
 
         RETURN(0);
 }
 
-/*
- * some callers, notably truncate, really don't want i_size set based
- * on the the size returned by the getattr, or lock acquisition in
- * the future.
- */
-int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
-                   struct lov_stripe_md *lsm,
-                   int mode, struct ldlm_extent *extent,
-                   struct lustre_handle *lockh)
-{
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        int rc, flags = 0;
-        ENTRY;
-
-        LASSERT(lockh->cookie == 0);
-
-        /* XXX phil: can we do this?  won't it screw the file size up? */
-        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
-            (sbi->ll_flags & LL_SBI_NOLCK))
-                RETURN(0);
-
-        CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n",
-               inode->i_ino, extent->start, extent->end);
-
-        rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, extent,
-                         sizeof(extent), mode, &flags, ll_extent_lock_callback,
-                         inode, lockh);
-
-        RETURN(rc);
-}
-
-/*
- * this grabs a lock and manually implements behaviour that makes it look like
- * the OST is returning the file size with each lock acquisition.
- */
-int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
-                   struct lov_stripe_md *lsm, int mode,
-                   struct ldlm_extent *extent, struct lustre_handle *lockh)
-{
-        struct ll_inode_info *lli = ll_i2info(inode);
-        struct ldlm_extent size_lock;
-        struct lustre_handle match_lockh = {0};
-        int flags, rc, matched;
-        ENTRY;
-
-        rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh);
-        if (rc != ELDLM_OK)
-                RETURN(rc);
-
-        if (test_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags))
-                RETURN(0);
-
-        rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL);
-        if (rc) {
-                ll_extent_unlock(fd, inode, lsm, mode, lockh);
-                RETURN(rc);
-        }
-
-        size_lock.start = inode->i_size;
-        size_lock.end = OBD_OBJECT_EOF;
-
-        /* XXX I bet we should be checking the lock ignore flags.. */
-        flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA;
-        matched = obd_match(&ll_i2sbi(inode)->ll_osc_conn, lsm, LDLM_EXTENT,
-                            &size_lock, sizeof(size_lock), LCK_PR, &flags,
-                            inode, &match_lockh);
-
-        /* hey, alright, we hold a size lock that covers the size we
-         * just found, its not going to change for a while.. */
-        if (matched == 1) {
-                set_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags);
-                obd_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, LCK_PR,
-                           &match_lockh);
-        }
-
-        RETURN(0);
-}
-
-int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
-                struct lov_stripe_md *lsm, int mode,
-                struct lustre_handle *lockh)
-{
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        int rc;
-        ENTRY;
-
-        /* XXX phil: can we do this?  won't it screw the file size up? */
-        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
-            (sbi->ll_flags & LL_SBI_NOLCK))
-                RETURN(0);
-
-        rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh);
-
-        RETURN(rc);
-}
-
 static inline void ll_remove_suid(struct inode *inode)
 {
         unsigned int mode;
 static inline void ll_remove_suid(struct inode *inode)
 {
         unsigned int mode;
@@ -591,22 +511,10 @@ static inline void ll_remove_suid(struct inode *inode)
 #if 0
 static void ll_update_atime(struct inode *inode)
 {
 #if 0
 static void ll_update_atime(struct inode *inode)
 {
-#ifdef USE_ATIME
-        struct iattr attr;
-
-        attr.ia_atime = LTIME_S(CURRENT_TIME);
-        attr.ia_valid = ATTR_ATIME;
-
-        if (inode->i_atime == attr.ia_atime) return;
         if (IS_RDONLY(inode)) return;
         if (IS_RDONLY(inode)) return;
-        if (IS_NOATIME(inode)) return;
 
 
-        /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */
-        ll_inode_setattr(inode, &attr, 0);
-#else
         /* update atime, but don't explicitly write it out just this change */
         inode->i_atime = CURRENT_TIME;
         /* update atime, but don't explicitly write it out just this change */
         inode->i_atime = CURRENT_TIME;
-#endif
 }
 #endif
 
 }
 #endif
 
@@ -676,19 +584,19 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
 
         /* start writeback on dirty pages in the extent when its PW */
         for (i = start, j = start % count;
 
         /* start writeback on dirty pages in the extent when its PW */
         for (i = start, j = start % count;
-                        lock->l_granted_mode == LCK_PW && i < end; j++, i++) {
+             lock->l_granted_mode == LCK_PW && i < end; j++, i++) {
                 if (j == count) {
                         i += skip;
                         j = 0;
                 }
                 /* its unlikely, but give us a chance to bail when we're out */
                 if (j == count) {
                         i += skip;
                         j = 0;
                 }
                 /* its unlikely, but give us a chance to bail when we're out */
-                PGCACHE_WRLOCK(inode->i_mapping);
+                ll_pgcache_lock(inode->i_mapping);
                 if (list_empty(&inode->i_mapping->dirty_pages)) {
                         CDEBUG(D_INODE, "dirty list empty\n");
                 if (list_empty(&inode->i_mapping->dirty_pages)) {
                         CDEBUG(D_INODE, "dirty list empty\n");
-                        PGCACHE_WRUNLOCK(inode->i_mapping);
+                        ll_pgcache_unlock(inode->i_mapping);
                         break;
                 }
                         break;
                 }
-                PGCACHE_WRUNLOCK(inode->i_mapping);
+                ll_pgcache_unlock(inode->i_mapping);
 
                 if (need_resched())
                         schedule();
 
                 if (need_resched())
                         schedule();
@@ -702,10 +610,10 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
                 }
                 if (PageDirty(page)) {
                         CDEBUG(D_INODE, "writing page %p\n", page);
                 }
                 if (PageDirty(page)) {
                         CDEBUG(D_INODE, "writing page %p\n", page);
-                        PGCACHE_WRLOCK(inode->i_mapping);
+                        ll_pgcache_lock(inode->i_mapping);
                         list_del(&page->list);
                         list_add(&page->list, &inode->i_mapping->locked_pages);
                         list_del(&page->list);
                         list_add(&page->list, &inode->i_mapping->locked_pages);
-                        PGCACHE_WRUNLOCK(inode->i_mapping);
+                        ll_pgcache_unlock(inode->i_mapping);
 
                         /* this writepage might write out pages outside
                          * this extent, but that's ok, the pages are only
 
                         /* this writepage might write out pages outside
                          * this extent, but that's ok, the pages are only
@@ -730,19 +638,19 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
         LASSERT((extent->start & ~PAGE_CACHE_MASK) == 0);
         LASSERT(((extent->end+1) & ~PAGE_CACHE_MASK) == 0);
         for (i = start, j = start % count ; i < end ; j++, i++) {
         LASSERT((extent->start & ~PAGE_CACHE_MASK) == 0);
         LASSERT(((extent->end+1) & ~PAGE_CACHE_MASK) == 0);
         for (i = start, j = start % count ; i < end ; j++, i++) {
-                if ( j == count ) {
+                if (j == count) {
                         i += skip;
                         j = 0;
                 }
                         i += skip;
                         j = 0;
                 }
-                PGCACHE_WRLOCK(inode->i_mapping);
+                ll_pgcache_lock(inode->i_mapping);
                 if (list_empty(&inode->i_mapping->dirty_pages) &&
                      list_empty(&inode->i_mapping->clean_pages) &&
                      list_empty(&inode->i_mapping->locked_pages)) {
                         CDEBUG(D_INODE, "nothing left\n");
                 if (list_empty(&inode->i_mapping->dirty_pages) &&
                      list_empty(&inode->i_mapping->clean_pages) &&
                      list_empty(&inode->i_mapping->locked_pages)) {
                         CDEBUG(D_INODE, "nothing left\n");
-                        PGCACHE_WRUNLOCK(inode->i_mapping);
+                        ll_pgcache_unlock(inode->i_mapping);
                         break;
                 }
                         break;
                 }
-                PGCACHE_WRUNLOCK(inode->i_mapping);
+                ll_pgcache_unlock(inode->i_mapping);
                 if (need_resched())
                         schedule();
                 page = find_get_page(inode->i_mapping, i);
                 if (need_resched())
                         schedule();
                 page = find_get_page(inode->i_mapping, i);
@@ -755,15 +663,16 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
                         truncate_complete_page(page);
 #else
                         truncate_complete_page(page->mapping, page);
                         truncate_complete_page(page);
 #else
                         truncate_complete_page(page->mapping, page);
-#endif                
+#endif
                 unlock_page(page);
                 page_cache_release(page);
         }
         EXIT;
 }
 
                 unlock_page(page);
                 page_cache_release(page);
         }
         EXIT;
 }
 
-int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
-                            void *data, int flag)
+static int ll_extent_lock_callback(struct ldlm_lock *lock,
+                                   struct ldlm_lock_desc *new, void *data,
+                                   int flag)
 {
         struct inode *inode = data;
         struct ll_inode_info *lli = ll_i2info(inode);
 {
         struct inode *inode = data;
         struct ll_inode_info *lli = ll_i2info(inode);
@@ -771,7 +680,10 @@ int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
         int rc;
         ENTRY;
 
         int rc;
         ENTRY;
 
-        LASSERT(inode != NULL);
+        if ((unsigned long)inode < 0x1000) {
+                LDLM_ERROR(lock, "cancelling lock with bad data %p", data);
+                LBUG();
+        }
 
         switch (flag) {
         case LDLM_CB_BLOCKING:
 
         switch (flag) {
         case LDLM_CB_BLOCKING:
@@ -785,9 +697,15 @@ int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
                  * could know to write-back or simply throw away the pages
                  * based on if the cancel comes from a desire to, say,
                  * read or truncate.. */
                  * could know to write-back or simply throw away the pages
                  * based on if the cancel comes from a desire to, say,
                  * read or truncate.. */
-                LASSERT((unsigned long)inode > 0x1000);
-                LASSERT((unsigned long)lli > 0x1000);
-                LASSERT((unsigned long)lli->lli_smd > 0x1000);
+                if ((unsigned long)lli->lli_smd < 0x1000) {
+                        /* note that lli is part of the inode itself, so it
+                         * is valid if as checked the inode pointer above. */
+                        CERROR("inode %lu, sb %p, lli %p, lli_smd %p\n",
+                               inode->i_ino, inode->i_sb, lli, lli->lli_smd);
+                        LDLM_ERROR(lock, "cancel lock on bad inode %p", inode);
+                        LBUG();
+                }
+
                 ll_pgcache_remove_extent(inode, lli->lli_smd, lock);
                 break;
         default:
                 ll_pgcache_remove_extent(inode, lli->lli_smd, lock);
                 break;
         default:
@@ -797,6 +715,102 @@ int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
         RETURN(0);
 }
 
         RETURN(0);
 }
 
+/*
+ * some callers, notably truncate, really don't want i_size set based
+ * on the the size returned by the getattr, or lock acquisition in
+ * the future.
+ */
+int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
+                   struct lov_stripe_md *lsm,
+                   int mode, struct ldlm_extent *extent,
+                   struct lustre_handle *lockh)
+{
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        int rc, flags = 0;
+        ENTRY;
+
+        LASSERT(lockh->cookie == 0);
+
+        /* XXX phil: can we do this?  won't it screw the file size up? */
+        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
+            (sbi->ll_flags & LL_SBI_NOLCK))
+                RETURN(0);
+
+        CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n",
+               inode->i_ino, extent->start, extent->end);
+
+        rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, extent,
+                         sizeof(extent), mode, &flags, ll_extent_lock_callback,
+                         inode, lockh);
+
+        RETURN(rc);
+}
+
+/*
+ * this grabs a lock and manually implements behaviour that makes it look like
+ * the OST is returning the file size with each lock acquisition.
+ */
+int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
+                   struct lov_stripe_md *lsm, int mode,
+                   struct ldlm_extent *extent, struct lustre_handle *lockh)
+{
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct ldlm_extent size_lock;
+        struct lustre_handle match_lockh = {0};
+        int flags, rc, matched;
+        ENTRY;
+
+        rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh);
+        if (rc != ELDLM_OK)
+                RETURN(rc);
+
+        if (test_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags))
+                RETURN(0);
+
+        rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL);
+        if (rc) {
+                ll_extent_unlock(fd, inode, lsm, mode, lockh);
+                RETURN(rc);
+        }
+
+        size_lock.start = inode->i_size;
+        size_lock.end = OBD_OBJECT_EOF;
+
+        /* XXX I bet we should be checking the lock ignore flags.. */
+        flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA;
+        matched = obd_match(&ll_i2sbi(inode)->ll_osc_conn, lsm, LDLM_EXTENT,
+                            &size_lock, sizeof(size_lock), LCK_PR, &flags,
+                            inode, &match_lockh);
+
+        /* hey, alright, we hold a size lock that covers the size we
+         * just found, its not going to change for a while.. */
+        if (matched == 1) {
+                set_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags);
+                obd_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, LCK_PR,
+                           &match_lockh);
+        }
+
+        RETURN(0);
+}
+
+int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
+                struct lov_stripe_md *lsm, int mode,
+                struct lustre_handle *lockh)
+{
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        int rc;
+        ENTRY;
+
+        /* XXX phil: can we do this?  won't it screw the file size up? */
+        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
+            (sbi->ll_flags & LL_SBI_NOLCK))
+                RETURN(0);
+
+        rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh);
+
+        RETURN(rc);
+}
+
 static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
                             loff_t *ppos)
 {
 static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
                             loff_t *ppos)
 {
@@ -819,6 +833,10 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
 
         lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_READ_BYTES,
                             count);
 
         lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_READ_BYTES,
                             count);
+
+        if (!lsm)
+                RETURN(0);
+
         /* grab a -> eof extent to push extending writes out of node's caches
          * so we can see them at the getattr after lock acquisition.  this will
          * turn into a seperate [*ppos + count, EOF] 'size intent' lock attempt
         /* grab a -> eof extent to push extending writes out of node's caches
          * so we can see them at the getattr after lock acquisition.  this will
          * turn into a seperate [*ppos + count, EOF] 'size intent' lock attempt
@@ -852,8 +870,8 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
 /*
  * Write to a file (through the page cache).
  */
 /*
  * Write to a file (through the page cache).
  */
-static ssize_t
-ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
+static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
+                             loff_t *ppos)
 {
         struct ll_file_data *fd = file->private_data;
         struct inode *inode = file->f_dentry->d_inode;
 {
         struct ll_file_data *fd = file->private_data;
         struct inode *inode = file->f_dentry->d_inode;
@@ -868,6 +886,7 @@ ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
                inode->i_ino, inode->i_generation, inode, count, *ppos);
 
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
                inode->i_ino, inode->i_generation, inode, count, *ppos);
 
+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
         /*
          * sleep doing some writeback work of this mount's dirty data
          * if the VM thinks we're low on memory.. other dirtying code
         /*
          * sleep doing some writeback work of this mount's dirty data
          * if the VM thinks we're low on memory.. other dirtying code
@@ -875,12 +894,14 @@ ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
          * careful not to hold locked pages while they do so.  like
          * ll_prepare_write.  *cough*
          */
          * careful not to hold locked pages while they do so.  like
          * ll_prepare_write.  *cough*
          */
-        LL_CHECK_DIRTY(inode->i_sb);
+        ll_check_dirty(inode->i_sb);
 
         /* POSIX, but surprised the VFS doesn't check this already */
         if (count == 0)
                 RETURN(0);
 
 
         /* POSIX, but surprised the VFS doesn't check this already */
         if (count == 0)
                 RETURN(0);
 
+        LASSERT(lsm);
+
         if (file->f_flags & O_APPEND) {
                 extent.start = 0;
                 extent.end = OBD_OBJECT_EOF;
         if (file->f_flags & O_APPEND) {
                 extent.start = 0;
                 extent.end = OBD_OBJECT_EOF;
@@ -943,7 +964,8 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file,
         lsm = lli->lli_smd;
         if (lsm) {
                 up(&lli->lli_open_sem);
         lsm = lli->lli_smd;
         if (lsm) {
                 up(&lli->lli_open_sem);
-                CERROR("stripe already exists for ino %lu\n", inode->i_ino);
+                CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
+                       inode->i_ino);
                 /* If we haven't already done the open, do so now */
                 if (file->f_flags & O_LOV_DELAY_CREATE) {
                         int rc2 = ll_osc_open(conn, inode, file, lsm);
                 /* If we haven't already done the open, do so now */
                 if (file->f_flags & O_LOV_DELAY_CREATE) {
                         int rc2 = ll_osc_open(conn, inode, file, lsm);
@@ -987,6 +1009,7 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
         struct ll_file_data *fd = file->private_data;
         struct lustre_handle *conn;
         int flags;
         struct ll_file_data *fd = file->private_data;
         struct lustre_handle *conn;
         int flags;
+
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino,
                inode->i_generation, inode, cmd);
 
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino,
                inode->i_generation, inode, cmd);
 
@@ -1077,8 +1100,8 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
 
 int ll_fsync(struct file *file, struct dentry *dentry, int data)
 {
 
 int ll_fsync(struct file *file, struct dentry *dentry, int data)
 {
-        int ret;
         struct inode *inode = dentry->d_inode;
         struct inode *inode = dentry->d_inode;
+        int rc;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
                inode->i_generation, inode);
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
                inode->i_generation, inode);
@@ -1090,17 +1113,17 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data)
          * still holding the PW lock that covered the dirty pages.  XXX we
          * should probably get a reference on it, though, just to be clear.
          */
          * still holding the PW lock that covered the dirty pages.  XXX we
          * should probably get a reference on it, though, just to be clear.
          */
-        ret = filemap_fdatasync(dentry->d_inode->i_mapping);
-        if ( ret == 0 )
-                ret = filemap_fdatawait(dentry->d_inode->i_mapping);
+        rc = filemap_fdatasync(inode->i_mapping);
+        if (rc == 0)
+                rc = filemap_fdatawait(inode->i_mapping);
 
 
-        RETURN(ret);
+        RETURN(rc);
 }
 
 }
 
-int ll_inode_revalidate(struct dentry *dentry)
+int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
 {
         struct inode *inode = dentry->d_inode;
 {
         struct inode *inode = dentry->d_inode;
-        struct lov_stripe_md *lsm = NULL;
+        struct lov_stripe_md *lsm;
         ENTRY;
 
         if (!inode) {
         ENTRY;
 
         if (!inode) {
@@ -1118,70 +1141,41 @@ int ll_inode_revalidate(struct dentry *dentry)
            below when the lock is marked CB_PENDING.  That RPC may not
            go out because someone else may be in another RPC waiting for
            that lock*/
            below when the lock is marked CB_PENDING.  That RPC may not
            go out because someone else may be in another RPC waiting for
            that lock*/
-        if (!(dentry->d_it && dentry->d_it->it_lock_mode) &&
-            !ll_have_md_lock(dentry)) {
+        if (!(it && it->it_lock_mode) && !ll_have_md_lock(dentry)) {
+                struct lustre_md md;
                 struct ptlrpc_request *req = NULL;
                 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
                 struct ll_fid fid;
                 struct ptlrpc_request *req = NULL;
                 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
                 struct ll_fid fid;
-                struct mds_body *body;
-                struct lov_mds_md *lmm;
                 unsigned long valid = 0;
                 unsigned long valid = 0;
-                int eadatalen = 0, rc;
+                int rc;
+                int ealen = 0;
 
 
-                /* Why don't we update all valid MDS fields here, if we're
-                 * doing an RPC anyways?  -phil */
                 if (S_ISREG(inode->i_mode)) {
                 if (S_ISREG(inode->i_mode)) {
-                        eadatalen = obd_size_diskmd(&sbi->ll_osc_conn, NULL);
+                        ealen = obd_size_diskmd(&sbi->ll_osc_conn, NULL);
                         valid |= OBD_MD_FLEASIZE;
                 }
                 ll_inode2fid(&fid, inode);
                         valid |= OBD_MD_FLEASIZE;
                 }
                 ll_inode2fid(&fid, inode);
-                rc = mdc_getattr(&sbi->ll_mdc_conn, &fid,
-                                 valid, eadatalen, &req);
+                rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, valid, ealen, &req);
                 if (rc) {
                         CERROR("failure %d inode %lu\n", rc, inode->i_ino);
                         RETURN(-abs(rc));
                 }
                 if (rc) {
                         CERROR("failure %d inode %lu\n", rc, inode->i_ino);
                         RETURN(-abs(rc));
                 }
-
-                body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
-                LASSERT (body != NULL);         /* checked by mdc_getattr() */
-                LASSERT_REPSWABBED (req, 0);    /* swabbed by mdc_getattr() */
-
-                if (S_ISREG(inode->i_mode) &&
-                    (body->valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))) {
-                        CERROR("MDS sent back size for regular file\n");
-                        body->valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
-                }
+                rc = mdc_req2lustre_md(req, 0, &sbi->ll_osc_conn, &md);
 
                 /* XXX Too paranoid? */
 
                 /* XXX Too paranoid? */
-                if ((body->valid ^ valid) & OBD_MD_FLEASIZE)
+                if ((md.body->valid ^ valid) & OBD_MD_FLEASIZE)
                         CERROR("Asked for %s eadata but got %s\n",
                                (valid & OBD_MD_FLEASIZE) ? "some" : "no",
                         CERROR("Asked for %s eadata but got %s\n",
                                (valid & OBD_MD_FLEASIZE) ? "some" : "no",
-                               (body->valid & OBD_MD_FLEASIZE) ? "some":"none");
-
-                if (S_ISREG(inode->i_mode) &&
-                    (body->valid & OBD_MD_FLEASIZE)) {
-                        if (body->eadatasize == 0) { /* no EA data */
-                                CERROR("OBD_MD_FLEASIZE set but no data\n");
-                                RETURN(-EPROTO);
-                        }
-                        /* Only bother with this if inode's lsm not set? */
-                        lmm = lustre_msg_buf(req->rq_repmsg,1,body->eadatasize);
-                        LASSERT(lmm != NULL);       /* mdc_getattr() checked */
-                        LASSERT_REPSWABBED(req, 1); /* mdc_getattr() swabbed */
-
-                        rc = obd_unpackmd (&sbi->ll_osc_conn,
-                                           &lsm, lmm, body->eadatasize);
-                        if (rc < 0) {
-                                CERROR("Error %d unpacking eadata\n", rc);
-                                ptlrpc_req_finished(req);
-                                RETURN(rc);
-                        }
-                        LASSERT(rc >= sizeof(*lsm));
+                               (md.body->valid & OBD_MD_FLEASIZE) ? "some":
+                               "none");
+                if (rc) {
+                        ptlrpc_req_finished(req);
+                        RETURN(rc);
                 }
 
                 }
 
-                ll_update_inode(inode, body, lsm);
-                if (lsm != NULL && ll_i2info(inode)->lli_smd != lsm)
-                        obd_free_memmd(&sbi->ll_osc_conn, &lsm);
+                ll_update_inode(inode, md.body, md.lsm);
+                if (md.lsm != NULL && ll_i2info(inode)->lli_smd != md.lsm)
+                        obd_free_memmd(&sbi->ll_osc_conn, &md.lsm);
 
                 ptlrpc_req_finished(req);
         }
 
                 ptlrpc_req_finished(req);
         }
@@ -1211,19 +1205,20 @@ int ll_inode_revalidate(struct dentry *dentry)
 }
 
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
 }
 
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-static int ll_getattr(struct vfsmount *mnt, struct dentry *de,
+int ll_getattr(struct vfsmount *mnt, struct dentry *de,
+                      struct lookup_intent *it, 
                       struct kstat *stat)
 {
         int res = 0;
         struct inode *inode = de->d_inode;
 
                       struct kstat *stat)
 {
         int res = 0;
         struct inode *inode = de->d_inode;
 
+        res = ll_inode_revalidate_it(de, it);
         lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_GETATTR);
         lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_GETATTR);
-        res = ll_inode_revalidate(de);
+
         if (res)
                 return res;
         if (res)
                 return res;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        stat->dev = inode->i_dev;
-#endif
+
+        stat->dev = inode->i_sb->s_dev;
         stat->ino = inode->i_ino;
         stat->mode = inode->i_mode;
         stat->nlink = inode->i_nlink;
         stat->ino = inode->i_ino;
         stat->mode = inode->i_mode;
         stat->nlink = inode->i_nlink;
@@ -1234,6 +1229,8 @@ static int ll_getattr(struct vfsmount *mnt, struct dentry *de,
         stat->mtime = inode->i_mtime;
         stat->ctime = inode->i_ctime;
         stat->size = inode->i_size;
         stat->mtime = inode->i_mtime;
         stat->ctime = inode->i_ctime;
         stat->size = inode->i_size;
+        stat->blksize = inode->i_blksize;
+        stat->blocks = inode->i_blocks;
         return 0;
 }
 #endif
         return 0;
 }
 #endif
@@ -1254,9 +1251,9 @@ struct inode_operations ll_file_inode_operations = {
         setattr:    ll_setattr,
         truncate:   ll_truncate,
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
         setattr:    ll_setattr,
         truncate:   ll_truncate,
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-        getattr: ll_getattr,
+        getattr_it: ll_getattr,
 #else
 #else
-        revalidate: ll_inode_revalidate,
+        revalidate_it: ll_inode_revalidate_it,
 #endif
 };
 
 #endif
 };
 
@@ -1264,8 +1261,8 @@ struct inode_operations ll_special_inode_operations = {
         setattr_raw:    ll_setattr_raw,
         setattr:    ll_setattr,
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
         setattr_raw:    ll_setattr_raw,
         setattr:    ll_setattr,
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-        getattr:    ll_getattr,
+        getattr_it:    ll_getattr,
 #else
 #else
-        revalidate: ll_inode_revalidate,
+        revalidate_it: ll_inode_revalidate_it,
 #endif
 };
 #endif
 };
index e3fabe6..c30ef8a 100644 (file)
@@ -38,7 +38,6 @@
 #include <linux/rbtree.h>
 #include <linux/seq_file.h>
 #include <linux/time.h>
 #include <linux/rbtree.h>
 #include <linux/seq_file.h>
 #include <linux/time.h>
-#include "llite_internal.h"
 
 /* PG_inactive_clean is shorthand for rmap, we want free_high/low here.. */
 #ifdef PG_inactive_clean
 
 /* PG_inactive_clean is shorthand for rmap, we want free_high/low here.. */
 #ifdef PG_inactive_clean
@@ -47,6 +46,7 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 #include <linux/lustre_lite.h>
 
 #define DEBUG_SUBSYSTEM S_LLITE
 #include <linux/lustre_lite.h>
+#include "llite_internal.h"
 
 #ifndef list_for_each_prev_safe
 #define list_for_each_prev_safe(pos, n, head) \
 
 #ifndef list_for_each_prev_safe
 #define list_for_each_prev_safe(pos, n, head) \
 
 extern spinlock_t inode_lock;
 
 
 extern spinlock_t inode_lock;
 
-struct ll_writeback_pages {
-        obd_count npgs, max;
-        struct brw_page *pga;
-};
-
 /*
  * check to see if we're racing with truncate and put the page in
  * the brw_page array.  returns 0 if there is more room and 1
 /*
  * check to see if we're racing with truncate and put the page in
  * the brw_page array.  returns 0 if there is more room and 1
@@ -139,13 +134,13 @@ static void ll_get_dirty_pages(struct inode *inode,
                 list_del(&page->list);
                 list_add(&page->list, &mapping->locked_pages);
 
                 list_del(&page->list);
                 list_add(&page->list, &mapping->locked_pages);
 
-                if ( ! PageDirty(page) ) {
+                if (!PageDirty(page)) {
                         unlock_page(page);
                         continue;
                 }
                 ClearPageDirty(page);
 
                         unlock_page(page);
                         continue;
                 }
                 ClearPageDirty(page);
 
-                if ( llwp_consume_page(llwp, inode, page) != 0)
+                if (llwp_consume_page(llwp, inode, page) != 0)
                         break;
         }
 
                         break;
         }
 
@@ -153,26 +148,31 @@ static void ll_get_dirty_pages(struct inode *inode,
         EXIT;
 }
 
         EXIT;
 }
 
-static void ll_writeback(struct inode *inode, struct ll_writeback_pages *llwp)
+static void ll_writeback(struct inode *inode, struct obdo *oa,
+                         struct ll_writeback_pages *llwp)
 {
 {
-        int rc, i;
         struct ptlrpc_request_set *set;
         struct ptlrpc_request_set *set;
+        int rc, i;
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),bytes=%u\n",
                inode->i_ino, inode->i_generation, inode,
                ((llwp->npgs-1) << PAGE_SHIFT) + llwp->pga[llwp->npgs-1].count);
 
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),bytes=%u\n",
                inode->i_ino, inode->i_generation, inode,
                ((llwp->npgs-1) << PAGE_SHIFT) + llwp->pga[llwp->npgs-1].count);
 
+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
         set = ptlrpc_prep_set();
         if (set == NULL) {
                 CERROR ("Can't create request set\n");
                 rc = -ENOMEM;
         } else {
         set = ptlrpc_prep_set();
         if (set == NULL) {
                 CERROR ("Can't create request set\n");
                 rc = -ENOMEM;
         } else {
-                rc = obd_brw_async(OBD_BRW_WRITE, ll_i2obdconn(inode),
+                rc = obd_brw_async(OBD_BRW_WRITE, ll_i2obdconn(inode), oa,
                                    ll_i2info(inode)->lli_smd, llwp->npgs,
                                    llwp->pga, set, NULL);
                 if (rc == 0)
                                    ll_i2info(inode)->lli_smd, llwp->npgs,
                                    llwp->pga, set, NULL);
                 if (rc == 0)
-                        rc = ptlrpc_set_wait (set);
+                        rc = ptlrpc_set_wait(set);
+                if (rc == 0)
+                        obdo_refresh_inode(inode, oa,
+                                           oa->o_valid & ~OBD_MD_FLSIZE);
                 ptlrpc_set_destroy (set);
         }
         /*
                 ptlrpc_set_destroy (set);
         }
         /*
@@ -278,6 +278,7 @@ int ll_check_dirty(struct super_block *sb)
         unsigned long old_flags; /* hack? */
         int making_progress;
         struct inode *inode;
         unsigned long old_flags; /* hack? */
         int making_progress;
         struct inode *inode;
+        struct obdo oa;
         int rc = 0;
         ENTRY;
 
         int rc = 0;
         ENTRY;
 
@@ -328,12 +329,18 @@ int ll_check_dirty(struct super_block *sb)
                         llwp.npgs = 0;
                         ll_get_dirty_pages(inode, &llwp);
                         if (llwp.npgs) {
                         llwp.npgs = 0;
                         ll_get_dirty_pages(inode, &llwp);
                         if (llwp.npgs) {
-                               lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
-                                                   LPROC_LL_WB_PRESSURE,
-                                                   llwp.npgs);
-                               ll_writeback(inode, &llwp);
-                               rc += llwp.npgs;
-                               making_progress = 1;
+                                oa.o_id =
+                                      ll_i2info(inode)->lli_smd->lsm_object_id;
+                                oa.o_valid = OBD_MD_FLID;
+                                obdo_from_inode(&oa, inode,
+                                                OBD_MD_FLTYPE | OBD_MD_FLATIME|
+                                                OBD_MD_FLMTIME| OBD_MD_FLCTIME);
+                                lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
+                                                    LPROC_LL_WB_PRESSURE,
+                                                    llwp.npgs);
+                                ll_writeback(inode, &oa, &llwp);
+                                rc += llwp.npgs;
+                                making_progress = 1;
                         }
                 } while (llwp.npgs && should_writeback());
 
                         }
                 } while (llwp.npgs && should_writeback());
 
@@ -382,13 +389,14 @@ cleanup:
 }
 #endif /* linux 2.5 */
 
 }
 #endif /* linux 2.5 */
 
-int ll_batch_writepage(struct inode *inode, struct page *page)
+int ll_batch_writepage(struct inode *inode, struct obdo *oa, struct page *page)
 {
         unsigned long old_flags; /* hack? */
         struct ll_writeback_pages llwp;
         int rc = 0;
         ENTRY;
 
 {
         unsigned long old_flags; /* hack? */
         struct ll_writeback_pages llwp;
         int rc = 0;
         ENTRY;
 
+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
         old_flags = current->flags;
         current->flags |= PF_MEMALLOC;
         rc = ll_alloc_brw(inode, &llwp);
         old_flags = current->flags;
         current->flags |= PF_MEMALLOC;
         rc = ll_alloc_brw(inode, &llwp);
@@ -401,7 +409,7 @@ int ll_batch_writepage(struct inode *inode, struct page *page)
         if (llwp.npgs) {
                 lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                     LPROC_LL_WB_WRITEPAGE, llwp.npgs);
         if (llwp.npgs) {
                 lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                     LPROC_LL_WB_WRITEPAGE, llwp.npgs);
-                ll_writeback(inode, &llwp);
+                ll_writeback(inode, oa, &llwp);
         }
         kfree(llwp.pga);
 
         }
         kfree(llwp.pga);
 
index 4684383..fd37709 100644 (file)
 #ifndef LLITE_INTERNAL_H
 #define LLITE_INTERNAL_H
 
 #ifndef LLITE_INTERNAL_H
 #define LLITE_INTERNAL_H
 
+
+struct ll_sb_info;
 struct lustre_handle;
 struct lov_stripe_md;
 
 struct lustre_handle;
 struct lov_stripe_md;
 
+extern void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
+extern struct proc_dir_entry *proc_lustre_fs_root;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# define hlist_del_init list_del_init
+#endif 
+
+static inline struct inode *ll_info2i(struct ll_inode_info *lli)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+        return &lli->lli_vfs_inode;
+#else
+        return list_entry(lli, struct inode, u.generic_ip);
+#endif
+}
+
+/* llite/commit_callback.c */
+int ll_commitcbd_setup(struct ll_sb_info *);
+int ll_commitcbd_cleanup(struct ll_sb_info *);
+
+/* lproc_llite.c */
+int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
+                                struct super_block *sb, char *osc, char *mdc);
+void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
+
+/* llite/namei.c */
+struct inode *ll_iget(struct super_block *sb, ino_t hash,
+                      struct lustre_md *lic);
+struct dentry *ll_find_alias(struct inode *, struct dentry *);
+int ll_it_open_error(int phase, struct lookup_intent *it);
 int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
                          int flags, void *opaque);
 int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
                          int flags, void *opaque);
+
+/* llite/rw.c */
+void ll_end_writeback(struct inode *, struct page *);
+
+void ll_remove_dirty(struct inode *inode, unsigned long start,
+                     unsigned long end);
 int ll_rd_dirty_pages(char *page, char **start, off_t off, int count,
                       int *eof, void *data);
 int ll_rd_max_dirty_pages(char *page, char **start, off_t off, int count,
 int ll_rd_dirty_pages(char *page, char **start, off_t off, int count,
                       int *eof, void *data);
 int ll_rd_max_dirty_pages(char *page, char **start, off_t off, int count,
@@ -26,4 +64,96 @@ int ll_clear_dirty_pages(struct lustre_handle *conn, struct lov_stripe_md *lsm,
 int ll_mark_dirty_page(struct lustre_handle *conn, struct lov_stripe_md *lsm,
                        unsigned long index);
 
 int ll_mark_dirty_page(struct lustre_handle *conn, struct lov_stripe_md *lsm,
                        unsigned long index);
 
+/* llite/file.c */
+extern int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *);
+
+/* llite/super.c */
+int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
+int ll_setattr(struct dentry *de, struct iattr *attr);
+
+/* iod.c */
+#define IO_STAT_ADD(FIS, STAT, VAL) do {        \
+        struct file_io_stats *_fis_ = (FIS);    \
+        spin_lock(&_fis_->fis_lock);            \
+        _fis_->fis_##STAT += VAL;               \
+        spin_unlock(&_fis_->fis_lock);          \
+} while (0)
+
+#define INODE_IO_STAT_ADD(INODE, STAT, VAL)        \
+        IO_STAT_ADD(&ll_i2sbi(INODE)->ll_iostats, STAT, VAL)
+
+#define PAGE_IO_STAT_ADD(PAGE, STAT, VAL)               \
+        INODE_IO_STAT_ADD((PAGE)->mapping, STAT, VAL)
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+/* XXX lliod needs more work in 2.5 before being proven and brought back
+ * to 2.4, it'll at least require a patch to introduce page->private */
+int lliod_start(struct ll_sb_info *sbi, struct inode *inode);
+void lliod_stop(struct ll_sb_info *sbi);
+#else
+#define lliod_start(sbi, inode) ({int _ret = 0; (void)sbi, (void)inode; _ret;})
+#define lliod_stop(sbi) do { (void)sbi; } while (0)
+#endif
+void lliod_wakeup(struct inode *inode);
+void lliod_give_plist(struct inode *inode, struct plist *plist, int rw);
+void lliod_give_page(struct inode *inode, struct page *page, int rw);
+void plist_init(struct plist *plist); /* for lli initialization.. */
+
+void ll_lldo_init(struct ll_dirty_offsets *lldo);
+void ll_record_dirty(struct inode *inode, unsigned long offset);
+void ll_remove_dirty(struct inode *inode, unsigned long start,
+                     unsigned long end);
+int ll_find_dirty(struct ll_dirty_offsets *lldo, unsigned long *start,
+                  unsigned long *end);
+int ll_farthest_dirty(struct ll_dirty_offsets *lldo, unsigned long *farthest);
+
+
+/* llite/super25.c */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+int ll_getattr(struct vfsmount *mnt, struct dentry *de,
+               struct lookup_intent *it, 
+               struct kstat *stat);
+#endif
+
+
+/* llite/dcache.c */
+void ll_intent_release(struct lookup_intent *);
+extern void ll_set_dd(struct dentry *de);
+void ll_unhash_aliases(struct inode *);
+
+/* llite/rw.c */
+void ll_truncate(struct inode *inode);
+void ll_end_writeback(struct inode *inode, struct page *page);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+int ll_check_dirty(struct super_block *sb);
+int ll_batch_writepage(struct inode *inode, struct obdo *oa, struct page *page);
+#else
+#define ll_check_dirty(SB) do { (void)SB; } while (0)
+#endif
+
+/* llite/llite_lib.c */
+
+extern struct super_operations ll_super_operations;
+
+char *ll_read_opt(const char *opt, char *data);
+int ll_set_opt(const char *opt, char *data, int fl);
+void ll_options(char *options, char **ost, char **mds, int *flags);
+void ll_lli_init(struct ll_inode_info *lli);
+int ll_fill_super(struct super_block *sb, void *data, int silent);
+void ll_put_super(struct super_block *sb);
+void ll_clear_inode(struct inode *inode);
+int ll_attr2inode(struct inode *inode, struct iattr *attr, int trunc);
+int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
+int ll_setattr_raw(struct inode *inode, struct iattr *attr);
+int ll_setattr(struct dentry *de, struct iattr *attr);
+int ll_statfs(struct super_block *sb, struct kstatfs *sfs);
+void ll_update_inode(struct inode *inode, struct mds_body *body,
+                     struct lov_stripe_md *lsm);
+int it_disposition(struct lookup_intent *it, int flag);
+void it_set_disposition(struct lookup_intent *it, int flag);
+void ll_read_inode2(struct inode *inode, void *opaque);
+void ll_umount_begin(struct super_block *sb);
+
+
+
 #endif /* LLITE_INTERNAL_H */
 #endif /* LLITE_INTERNAL_H */
index 42fea4b..8908d44 100644 (file)
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include <linux/version.h>
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
 #include <linux/lustre_lite.h>
 #include <linux/lprocfs_status.h>
 
 #include "llite_internal.h"
 
 /* /proc/lustre/llite mount point registration */
 #include <linux/lustre_lite.h>
 #include <linux/lprocfs_status.h>
 
 #include "llite_internal.h"
 
 /* /proc/lustre/llite mount point registration */
+struct proc_dir_entry *proc_lustre_fs_root;
 
 #ifndef LPROCFS
 int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
 
 #ifndef LPROCFS
 int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
@@ -41,36 +39,113 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
 void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi){}
 #else
 
 void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi){}
 #else
 
-#define LPROC_LLITE_STAT_FCT(fct_name, get_statfs_fct)                    \
-int fct_name(char *page, char **start, off_t off,                         \
-             int count, int *eof, void *data)                             \
-{                                                                         \
-        struct statfs sfs;                                                \
-        int rc;                                                           \
-        LASSERT(data != NULL);                                            \
-        rc = get_statfs_fct((struct super_block*)data, &sfs);             \
-        return (rc==0                                                     \
-                ? lprocfs_##fct_name (page, start, off, count, eof, &sfs) \
-                : rc);                                                    \
+long long mnt_instance;
+
+static int ll_rd_blksize(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
+{
+        struct super_block *sb = (struct super_block *)data;
+        struct obd_statfs osfs;
+        int rc;
+
+        LASSERT(sb != NULL);
+        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        if (!rc) {
+              *eof = 1;
+              rc = snprintf(page, count, "%u\n", osfs.os_bsize);
+        }
+
+        return rc;
 }
 
 }
 
-long long mnt_instance;
+static int ll_rd_kbytestotal(char *page, char **start, off_t off, int count,
+                             int *eof, void *data)
+{
+        struct super_block *sb = (struct super_block *)data;
+        struct obd_statfs osfs;
+        int rc;
+
+        LASSERT(sb != NULL);
+        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        if (!rc) {
+                __u32 blk_size = osfs.os_bsize >> 10;
+                __u64 result = osfs.os_blocks;
+
+                while (blk_size >>= 1)
+                        result <<= 1;
+
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", result);
+        }
+        return rc;
+
+}
+
+static int ll_rd_kbytesfree(char *page, char **start, off_t off, int count,
+                            int *eof, void *data)
+{
+        struct super_block *sb = (struct super_block *)data;
+        struct obd_statfs osfs;
+        int rc;
 
 
-LPROC_LLITE_STAT_FCT(rd_blksize,     vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_kbytestotal, vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_kbytesfree,  vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_filestotal,  vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_filesfree,   vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_filegroups,  vfs_statfs);
+        LASSERT(sb != NULL);
+        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        if (!rc) {
+                __u32 blk_size = osfs.os_bsize >> 10;
+                __u64 result = osfs.os_bfree;
+
+                while (blk_size >>= 1)
+                        result <<= 1;
+
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", result);
+        }
+        return rc;
+}
+
+static int ll_rd_filestotal(char *page, char **start, off_t off, int count,
+                            int *eof, void *data)
+{
+        struct super_block *sb = (struct super_block *)data;
+        struct obd_statfs osfs;
+        int rc;
+
+        LASSERT(sb != NULL);
+        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        if (!rc) {
+                 *eof = 1;
+                 rc = snprintf(page, count, LPU64"\n", osfs.os_files);
+        }
+        return rc;
+}
 
 
-int rd_path(char *page, char **start, off_t off, int count, int *eof,
-            void *data)
+static int ll_rd_filesfree(char *page, char **start, off_t off, int count,
+                           int *eof, void *data)
+{
+        struct super_block *sb = (struct super_block *)data;
+        struct obd_statfs osfs;
+        int rc;
+
+        LASSERT(sb != NULL);
+        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        if (!rc) {
+                 *eof = 1;
+                 rc = snprintf(page, count, LPU64"\n", osfs.os_ffree);
+        }
+        return rc;
+
+}
+
+#if 0
+static int ll_rd_path(char *page, char **start, off_t off, int count, int *eof,
+                      void *data)
 {
         return 0;
 }
 {
         return 0;
 }
+#endif
 
 
-int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
-              void *data)
+static int ll_rd_fstype(char *page, char **start, off_t off, int count,
+                        int *eof, void *data)
 {
         struct super_block *sb = (struct super_block*)data;
 
 {
         struct super_block *sb = (struct super_block*)data;
 
@@ -79,8 +154,8 @@ int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, "%s\n", sb->s_type->name);
 }
 
         return snprintf(page, count, "%s\n", sb->s_type->name);
 }
 
-int rd_sb_uuid(char *page, char **start, off_t off, int count, int *eof,
-               void *data)
+static int ll_rd_sb_uuid(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
 {
         struct super_block *sb = (struct super_block *)data;
 
 {
         struct super_block *sb = (struct super_block *)data;
 
@@ -89,18 +164,20 @@ int rd_sb_uuid(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, "%s\n", ll_s2sbi(sb)->ll_sb_uuid.uuid);
 }
 
         return snprintf(page, count, "%s\n", ll_s2sbi(sb)->ll_sb_uuid.uuid);
 }
 
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",        rd_sb_uuid,     0, 0 },
-        { "mntpt_path",  rd_path,        0, 0 },
-        { "fstype",      rd_fstype,      0, 0 },
-        { "blocksize",   rd_blksize,     0, 0 },
-        { "kbytestotal", rd_kbytestotal, 0, 0 },
-        { "kbytesfree",  rd_kbytesfree,  0, 0 },
-        { "filestotal",  rd_filestotal,  0, 0 },
-        { "filesfree",   rd_filesfree,   0, 0 },
-        { "filegroups",  rd_filegroups,  0, 0 },
-        { "dirty_pages", ll_rd_dirty_pages, 0, 0},
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",         ll_rd_sb_uuid,          0, 0 },
+        //{ "mntpt_path",   ll_rd_path,             0, 0 },
+        { "fstype",       ll_rd_fstype,           0, 0 },
+        { "blocksize",    ll_rd_blksize,          0, 0 },
+        { "kbytestotal",  ll_rd_kbytestotal,      0, 0 },
+        { "kbytesfree",   ll_rd_kbytesfree,       0, 0 },
+        { "filestotal",   ll_rd_filestotal,       0, 0 },
+        { "filesfree",    ll_rd_filesfree,        0, 0 },
+        //{ "filegroups",   lprocfs_rd_filegroups,  0, 0 },
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        { "dirty_pages",  ll_rd_dirty_pages,      0, 0},
         { "max_dirty_pages", ll_rd_max_dirty_pages, ll_wr_max_dirty_pages, 0},
         { "max_dirty_pages", ll_rd_max_dirty_pages, ll_wr_max_dirty_pages, 0},
+#endif
         { 0 }
 };
 
         { 0 }
 };
 
index da6e670..b9223e8 100644 (file)
 #include <linux/obd_support.h>
 #include <linux/lustre_lite.h>
 #include <linux/lustre_dlm.h>
 #include <linux/obd_support.h>
 #include <linux/lustre_lite.h>
 #include <linux/lustre_dlm.h>
-
-/* from dcache.c */
-extern void ll_set_dd(struct dentry *de);
-
-/* from super.c */
-extern void ll_change_inode(struct inode *inode);
-extern int ll_setattr(struct dentry *de, struct iattr *attr);
-
-/* from dir.c */
-extern int ll_add_link (struct dentry *dentry, struct inode *inode);
-obd_id ll_inode_by_name(struct inode * dir, struct dentry *dentry, int *typ);
-int ext2_make_empty(struct inode *inode, struct inode *parent);
-struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
-                   struct dentry *dentry, struct page ** res_page);
-int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page );
-int ext2_empty_dir (struct inode * inode);
-struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p);
-void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
-                   struct page *page, struct inode *inode);
-
-/*
- * Couple of helper functions - make the code slightly cleaner.
- */
-static inline void ext2_inc_count(struct inode *inode)
-{
-        inode->i_nlink++;
-}
-
-/* postpone the disk update until the inode really goes away */
-static inline void ext2_dec_count(struct inode *inode)
-{
-        inode->i_nlink--;
-}
-static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
-{
-        int err;
-        err = ll_add_link(dentry, inode);
-        if (!err) {
-                d_instantiate(dentry, inode);
-                return 0;
-        }
-        ext2_dec_count(inode);
-        iput(inode);
-        return err;
-}
+#include "llite_internal.h"
 
 /* methods */
 
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 
 /* methods */
 
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-static int ll_find_inode(struct inode *inode, unsigned long ino, void *opaque)
+static int ll_test_inode(struct inode *inode, unsigned long ino, void *opaque)
 #else
 static int ll_test_inode(struct inode *inode, void *opaque)
 #endif
 {
 #else
 static int ll_test_inode(struct inode *inode, void *opaque)
 #endif
 {
-        struct ll_read_inode2_cookie *lic = opaque;
-        struct mds_body *body = lic->lic_body;
+        struct lustre_md *md = opaque;
 
 
-        if (!(lic->lic_body->valid & (OBD_MD_FLGENER | OBD_MD_FLID)))
+        if (!(md->body->valid & (OBD_MD_FLGENER | OBD_MD_FLID)))
                 CERROR("invalid generation\n");
                 CERROR("invalid generation\n");
-        CDEBUG(D_VFSTRACE, "comparing inode %p ino %lu/%u to body %lu/%u\n",
-               inode, inode->i_ino, inode->i_generation, ino,
-               lic->lic_body->generation);
+        CDEBUG(D_VFSTRACE, "comparing inode %p ino %lu/%u to body %u/%u\n",
+               inode, inode->i_ino, inode->i_generation, 
+               md->body->ino, md->body->generation);
 
 
-        if (inode->i_generation != lic->lic_body->generation)
+        if (inode->i_generation != md->body->generation)
                 return 0;
 
         /* Apply the attributes in 'opaque' to this inode */
                 return 0;
 
         /* Apply the attributes in 'opaque' to this inode */
-        ll_update_inode(inode, body, lic->lic_lsm);
+        ll_update_inode(inode, md->body, md->lsm);
         return 1;
 }
 
         return 1;
 }
 
@@ -127,16 +82,21 @@ int ll_unlock(__u32 mode, struct lustre_handle *lockh)
  * Returns inode or NULL
  */
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
  * Returns inode or NULL
  */
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-extern int ll_read_inode2(struct inode *inode, void *opaque);
+int ll_set_inode(struct inode *inode, void *opaque)
+{
+        ll_read_inode2(inode, opaque);
+        return 0;
+}
 struct inode *ll_iget(struct super_block *sb, ino_t hash,
 struct inode *ll_iget(struct super_block *sb, ino_t hash,
-                      struct ll_read_inode2_cookie *lic)
+                      struct lustre_md *md)
 {
         struct inode *inode;
 
         LASSERT(hash != 0);
 {
         struct inode *inode;
 
         LASSERT(hash != 0);
-        inode = iget5_locked(sb, hash, ll_test_inode, ll_read_inode2, lic);
-        if (inode == NULL)
-                return NULL;              /* removed ERR_PTR(-ENOMEM) -eeb */
+        inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md);
+
+        if (!inode)
+                return (NULL);              /* removed ERR_PTR(-ENOMEM) -eeb */
 
         if (inode->i_state & I_NEW)
                 unlock_new_inode(inode);
 
         if (inode->i_state & I_NEW)
                 unlock_new_inode(inode);
@@ -146,11 +106,11 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash,
 }
 #else
 struct inode *ll_iget(struct super_block *sb, ino_t hash,
 }
 #else
 struct inode *ll_iget(struct super_block *sb, ino_t hash,
-                      struct ll_read_inode2_cookie *lic)
+                      struct lustre_md *md)
 {
         struct inode *inode;
         LASSERT(hash != 0);
 {
         struct inode *inode;
         LASSERT(hash != 0);
-        inode = iget4(sb, hash, ll_find_inode, lic);
+        inode = iget4(sb, hash, ll_test_inode, md);
         CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", inode->i_ino,
                inode->i_generation, inode);
         return inode;
         CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", inode->i_ino,
                inode->i_generation, inode);
         return inode;
@@ -171,36 +131,37 @@ static int ll_intent_to_lock_mode(struct lookup_intent *it)
 
 int ll_it_open_error(int phase, struct lookup_intent *it)
 {
 
 int ll_it_open_error(int phase, struct lookup_intent *it)
 {
-        if (it->it_disposition & IT_OPEN_OPEN) {
-                if (phase == IT_OPEN_OPEN)
+        if (it_disposition(it, DISP_OPEN_OPEN)) {
+                if (phase == DISP_OPEN_OPEN)
                         return it->it_status;
                 else
                         return 0;
         }
 
                         return it->it_status;
                 else
                         return 0;
         }
 
-        if (it->it_disposition & IT_OPEN_CREATE) {
-                if (phase == IT_OPEN_CREATE)
+        if (it_disposition(it, DISP_OPEN_CREATE)) {
+                if (phase == DISP_OPEN_CREATE)
                         return it->it_status;
                 else
                         return 0;
         }
 
                         return it->it_status;
                 else
                         return 0;
         }
 
-        if (it->it_disposition & IT_OPEN_LOOKUP) {
-                if (phase == IT_OPEN_LOOKUP)
+        if (it_disposition(it, DISP_LOOKUP_EXECD)) {
+                if (phase == DISP_LOOKUP_EXECD)
                         return it->it_status;
                 else
                         return 0;
         }
                         return it->it_status;
                 else
                         return 0;
         }
+        CERROR("it disp: %X, status: %d\n", it->it_disposition, it->it_status);
         LBUG();
         return 0;
 }
 
         LBUG();
         return 0;
 }
 
-int ll_mdc_blocking_ast(struct ldlm_lock *lock,
-                        struct ldlm_lock_desc *desc,
+int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                         void *data, int flag)
 {
         int rc;
         struct lustre_handle lockh;
                         void *data, int flag)
 {
         int rc;
         struct lustre_handle lockh;
+        struct inode *inode = lock->l_data;
         ENTRY;
 
         switch (flag) {
         ENTRY;
 
         switch (flag) {
@@ -214,9 +175,13 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock,
                 break;
         case LDLM_CB_CANCELING: {
                 /* Invalidate all dentries associated with this inode */
                 break;
         case LDLM_CB_CANCELING: {
                 /* Invalidate all dentries associated with this inode */
-                struct inode *inode = lock->l_data;
-                LASSERT(inode != NULL);
-
+                if (inode == NULL)
+                        break;
+                if (lock->l_resource->lr_name.name[0] != inode->i_ino ||
+                    lock->l_resource->lr_name.name[1] != inode->i_generation) {
+                        LDLM_ERROR(lock, "data mismatch with ino %lu/%u",
+                                   inode->i_ino, inode->i_generation);
+                }
                 if (S_ISDIR(inode->i_mode)) {
                         CDEBUG(D_INODE, "invalidating inode %lu\n",
                                inode->i_ino);
                 if (S_ISDIR(inode->i_mode)) {
                         CDEBUG(D_INODE, "invalidating inode %lu\n",
                                inode->i_ino);
@@ -227,7 +192,7 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock,
 #warning FIXME: we should probably free this inode if there are no aliases
                 if (inode->i_sb->s_root &&
                     inode != inode->i_sb->s_root->d_inode)
 #warning FIXME: we should probably free this inode if there are no aliases
                 if (inode->i_sb->s_root &&
                     inode != inode->i_sb->s_root->d_inode)
-                        d_unhash_aliases(inode);
+                        ll_unhash_aliases(inode);
                 break;
         }
         default:
                 break;
         }
         default:
@@ -237,17 +202,6 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock,
         RETURN(0);
 }
 
         RETURN(0);
 }
 
-void ll_mdc_lock_set_inode(struct lustre_handle *lockh, struct inode *inode)
-{
-        struct ldlm_lock *lock = ldlm_handle2lock(lockh);
-        ENTRY;
-
-        LASSERT(lock != NULL);
-        lock->l_data = inode;
-        LDLM_LOCK_PUT(lock);
-        EXIT;
-}
-
 int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
                          int flags, void *opaque)
 {
 int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
                          int flags, void *opaque)
 {
@@ -287,35 +241,74 @@ void ll_prepare_mdc_op_data(struct mdc_op_data *data,
         data->mode = mode;
 }
 
         data->mode = mode;
 }
 
-#define IT_ENQ_COMPLETE (1<<16)
-
+/* 
+ *This long block is all about fixing up the local state so that it is
+ *correct as of the moment _before_ the operation was applied; that
+ *way, the VFS will think that everything is normal and call Lustre's
+ *regular VFS methods.
+ *
+ * If we're performing a creation, that means that unless the creation
+ * failed with EEXIST, we should fake up a negative dentry.
+ *
+ * For everything else, we want to lookup to succeed.
+ *
+ * One additional note: if CREATE or OPEN succeeded, we add an extra
+ * reference to the request because we need to keep it around until
+ * ll_create/ll_open gets called.
+ *
+ * The server will return to us, in it_disposition, an indication of
+ * exactly what it_status refers to.
+ *
+ * If DISP_OPEN_OPEN is set, then it_status refers to the open() call,
+ * otherwise if DISP_OPEN_CREATE is set, then it status is the
+ * creation failure mode.  In either case, one of DISP_LOOKUP_NEG or
+ * DISP_LOOKUP_POS will be set, indicating whether the child lookup
+ * was successful.
+ *
+ * Else, if DISP_LOOKUP_EXECD then it_status is the rc of the child
+ * lookup.
+ */
 int ll_intent_lock(struct inode *parent, struct dentry **de,
 int ll_intent_lock(struct inode *parent, struct dentry **de,
-                   struct lookup_intent *it, intent_finish_cb intent_finish)
+                   struct lookup_intent *it, int flags, intent_finish_cb intent_finish)
 {
         struct dentry *dentry = *de;
         struct inode *inode = dentry->d_inode;
         struct ll_sb_info *sbi = ll_i2sbi(parent);
         struct lustre_handle lockh;
         struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
 {
         struct dentry *dentry = *de;
         struct inode *inode = dentry->d_inode;
         struct ll_sb_info *sbi = ll_i2sbi(parent);
         struct lustre_handle lockh;
         struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
-        struct ptlrpc_request *request = NULL;
-        int rc = 0, offset, flag = 0;
+        struct ptlrpc_request *request;
+        int rc = 0;
+        struct mds_body *mds_body;
+        int mode;
         obd_id ino = 0;
         ENTRY;
 
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
         obd_id ino = 0;
         ENTRY;
 
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-        if (it && it->it_op == 0)
-                *it = lookup_it;
+        if (it && it->it_magic != INTENT_MAGIC) { 
+                CERROR("WARNING: uninitialized intent\n");
+                LBUG();
+                intent_init(it, IT_LOOKUP, 0);
+        }
+        if (it->it_op == IT_GETATTR || 
+            it->it_op == 0)
+                it->it_op = IT_LOOKUP;
+        
 #endif
 #endif
-        if (it == NULL)
+        if (!it ||it->it_op == IT_GETXATTR)
                 it = &lookup_it;
 
                 it = &lookup_it;
 
+        it->it_op_release = ll_intent_release;
+
         CDEBUG(D_DLMTRACE, "name: %*s, intent: %s\n", dentry->d_name.len,
                dentry->d_name.name, ldlm_it2str(it->it_op));
         CDEBUG(D_DLMTRACE, "name: %*s, intent: %s\n", dentry->d_name.len,
                dentry->d_name.name, ldlm_it2str(it->it_op));
-
+        
         if (dentry->d_name.len > EXT2_NAME_LEN)
                 RETURN(-ENAMETOOLONG);
 
         if (dentry->d_name.len > EXT2_NAME_LEN)
                 RETURN(-ENAMETOOLONG);
 
-        if (!(it->it_disposition & IT_ENQ_COMPLETE)) {
+        /* This function may be called twice, we only once want to
+           execute the request associated with the intent. If it was
+           done already, we skip past this and use the results. */ 
+        if (!it_disposition(it, DISP_ENQ_COMPLETE)) {
                 struct mdc_op_data op_data;
 
                 ll_prepare_mdc_op_data(&op_data, parent, dentry->d_inode,
                 struct mdc_op_data op_data;
 
                 ll_prepare_mdc_op_data(&op_data, parent, dentry->d_inode,
@@ -325,174 +318,73 @@ int ll_intent_lock(struct inode *parent, struct dentry **de,
                 rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, it,
                                  ll_intent_to_lock_mode(it), &op_data,
                                  &lockh, NULL, 0, ldlm_completion_ast,
                 rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, it,
                                  ll_intent_to_lock_mode(it), &op_data,
                                  &lockh, NULL, 0, ldlm_completion_ast,
-                                 ll_mdc_blocking_ast, parent);
+                                 ll_mdc_blocking_ast, NULL);
                 if (rc < 0)
                         RETURN(rc);
                 memcpy(it->it_lock_handle, &lockh, sizeof(lockh));
         }
                 if (rc < 0)
                         RETURN(rc);
                 memcpy(it->it_lock_handle, &lockh, sizeof(lockh));
         }
-
-        request = (struct ptlrpc_request *)it->it_data;
+        request = it->it_data;
+        LASSERT(request != NULL);
 
         /* non-zero it_disposition indicates that the server performed the
          * intent on our behalf. */
 
         /* non-zero it_disposition indicates that the server performed the
          * intent on our behalf. */
-        if (it->it_disposition) {
-                struct mds_body *mds_body;
-                int mode;
-
-                /* This long block is all about fixing up the local
-                 * state so that it is correct as of the moment
-                 * _before_ the operation was applied; that way, the
-                 * VFS will think that everything is normal and call
-                 * Lustre's regular FS function.
-                 *
-                 * If we're performing a creation, that means that unless the
-                 * creation failed with EEXIST, we should fake up a negative
-                 * dentry.  Likewise for the target of a hard link.
-                 *
-                 * For everything else, we want to lookup to succeed. */
-
-                /* One additional note: if CREATE/MKDIR/etc succeeded,
-                 * we add an extra reference to the request because we
-                 * need to keep it around until ll_create gets called.
-                 * For anything else which results in
-                 * LL_LOOKUP_POSITIVE, we can do the iget()
-                 * immediately with the contents of the reply (in the
-                 * intent_finish callback).  In the create case,
-                 * however, we need to wait until ll_create_node to do
-                 * the iget() or the VFS will abort with -EEXISTS.
-                 */
-
-                offset = 1;
-                mds_body = lustre_msg_buf(request->rq_repmsg, offset,
-                                          sizeof(*mds_body));
-                LASSERT (mds_body != NULL);           /* mdc_enqueue checked */
-                LASSERT_REPSWABBED (request, offset); /* mdc_enqueue swabbed */
-
-                ino = mds_body->fid1.id;
-                mode = mds_body->mode;
-
-                /*We were called from revalidate2: did we find the same inode?*/
-                if (inode && (ino != inode->i_ino ||
-                    mds_body->fid1.generation != inode->i_generation)) {
-                        it->it_disposition |= IT_ENQ_COMPLETE;
-                        RETURN(-ESTALE);
-                }
+        LASSERT(it_disposition(it, DISP_IT_EXECD));
+
+                
+        mds_body = lustre_msg_buf(request->rq_repmsg, 1, sizeof(*mds_body));
+        LASSERT(mds_body != NULL);           /* mdc_enqueue checked */
+        LASSERT_REPSWABBED(request, 1); /* mdc_enqueue swabbed */
+
+        /* XXX everything with fids please, no ino's inode's etc */
+        ino = mds_body->fid1.id;
+        mode = mds_body->mode;
+
+        /*We were called from revalidate2: did we find the same inode?*/
+        if (inode && 
+            (ino != inode->i_ino ||
+             mds_body->fid1.generation != inode->i_generation)) {
+                it_set_disposition(it, DISP_ENQ_COMPLETE);
+                RETURN(-ESTALE);
+        }
 
 
-                /* If we're doing an IT_OPEN which did not result in an actual
-                 * successful open, then we need to remove the bit which saves
-                 * this request for unconditional replay. */
-                if (it->it_op & IT_OPEN &&
-                    (!(it->it_disposition & IT_OPEN_OPEN) ||
-                     it->it_status != 0)) {
+        /* If we're doing an IT_OPEN which did not result in an actual
+         * successful open, then we need to remove the bit which saves
+         * this request for unconditional replay. */
+        if (it->it_op & IT_OPEN) {
+                if (!it_disposition(it, DISP_OPEN_OPEN) ||
+                    it->it_status != 0) {
                         unsigned long flags;
                         unsigned long flags;
-
+                
                         spin_lock_irqsave (&request->rq_lock, flags);
                         request->rq_replay = 0;
                         spin_unlock_irqrestore (&request->rq_lock, flags);
                 }
                         spin_lock_irqsave (&request->rq_lock, flags);
                         request->rq_replay = 0;
                         spin_unlock_irqrestore (&request->rq_lock, flags);
                 }
-
-                if (it->it_op & IT_CREAT) {
-                        mdc_store_inode_generation(request, 2, 1);
-                        /* The server will return to us, in it_disposition, an
-                         * indication of exactly what it_status refers to.
-                         *
-                         * If IT_OPEN_OPEN is set, then it_status refers to the
-                         * open() call, otherwise if IT_OPEN_CREATE is set, then
-                         * it status is the creation failure mode.  In either
-                         * case, one of IT_OPEN_NEG or IT_OPEN_POS will be set,
-                         * indicating whether the child lookup was successful.
-                         *
-                         * Else, if IT_OPEN_LOOKUP then it_status is the rc
-                         * of the child lookup.
-                         *
-                         * Finally, if none of the bits are set, then the
-                         * failure occurred while looking up the parent. */
-                        rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
-                        if (rc)
-                                GOTO(drop_req, rc);
-
-                        if (it->it_disposition & IT_OPEN_CREATE)
-                                ptlrpc_request_addref(request);
-                        if (it->it_disposition & IT_OPEN_OPEN)
-                                ptlrpc_request_addref(request);
-
-                        if (it->it_disposition & IT_OPEN_NEG)
-                                flag = LL_LOOKUP_NEGATIVE;
-                        else
-                                flag = LL_LOOKUP_POSITIVE;
-                } else if (it->it_op == IT_OPEN) {
-                        LASSERT(!(it->it_disposition & IT_OPEN_CREATE));
-
-                        rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
-                        if (rc)
-                                GOTO(drop_req, rc);
-
-                        if (it->it_disposition & IT_OPEN_OPEN)
-                                ptlrpc_request_addref(request);
-
-                        if (it->it_disposition & IT_OPEN_NEG)
-                                flag = LL_LOOKUP_NEGATIVE;
-                        else
-                                flag = LL_LOOKUP_POSITIVE;
-                } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
-                        /* For check ops, we want the lookup to succeed */
-                        it->it_data = NULL;
-                        if (it->it_status)
-                                flag = LL_LOOKUP_NEGATIVE;
-                        else
-                                flag = LL_LOOKUP_POSITIVE;
-                } else
-                        LBUG();
-        } else {
-                struct ll_fid fid;
-                obd_flag valid;
-                int eadatalen;
-                int mode;
-
-                LBUG(); /* For the moment, no non-intent locks */
-
-                /* it_disposition == 0 indicates that it just did a simple lock
-                 * request, for which we are very thankful.  move along with
-                 * the local lookup then. */
-
-                //memcpy(&lli->lli_intent_lock_handle, &lockh, sizeof(lockh));
-                offset = 0;
-
-                ino = ll_inode_by_name(parent, dentry, &mode);
-                if (!ino) {
-                        CERROR("inode %*s not found by name\n",
-                               dentry->d_name.len, dentry->d_name.name);
-                        GOTO(drop_lock, rc = -ENOENT);
-                }
-
-                valid = OBD_MD_FLNOTOBD;
-
-                if (S_ISREG(mode)) {
-                        eadatalen = obd_size_diskmd(&sbi->ll_osc_conn, NULL),
-                        valid |= OBD_MD_FLEASIZE;
-                } else {
-                        eadatalen = 0;
-                        valid |= OBD_MD_FLBLOCKS;
-                }
-
-                fid.id = ino;
-                fid.generation = 0;
-                fid.f_type = mode;
-                rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, valid,
-                                 eadatalen, &request);
-                if (rc) {
-                        CERROR("failure %d inode "LPX64"\n", rc, ino);
-                        GOTO(drop_lock, rc = -abs(rc));
-                }
         }
 
         }
 
-        LASSERT (request != NULL);
+        rc = ll_it_open_error(DISP_LOOKUP_EXECD, it);
+        if (rc)
+                GOTO(drop_req, rc);
+        
+        /* keep requests around for the multiple phases of the call
+         * this shows the DISP_XX must guarantee we make it into the call 
+         */ 
+        if (it_disposition(it, DISP_OPEN_CREATE))
+                ptlrpc_request_addref(request);
+        if (it_disposition(it, DISP_OPEN_OPEN))
+                ptlrpc_request_addref(request);
+        
+        if (it->it_op & IT_CREAT) {
+                /* XXX this belongs in ll_create_iit */
+        } else if (it->it_op == IT_OPEN) {
+                LASSERT(!it_disposition(it, DISP_OPEN_CREATE));
+        } else 
+                LASSERT(it->it_op & (IT_GETATTR | IT_LOOKUP));
 
         if (intent_finish != NULL) {
                 struct lustre_handle old_lock;
                 struct ldlm_lock *lock;
 
 
         if (intent_finish != NULL) {
                 struct lustre_handle old_lock;
                 struct ldlm_lock *lock;
 
-                rc = intent_finish(flag, request, parent, de, it, offset, ino);
+                rc = intent_finish(request, parent, de, it, 1, ino);
                 dentry = *de; /* intent_finish may change *de */
                 inode = dentry->d_inode;
                 if (rc != 0)
                 dentry = *de; /* intent_finish may change *de */
                 inode = dentry->d_inode;
                 if (rc != 0)
@@ -525,29 +417,16 @@ int ll_intent_lock(struct inode *parent, struct dentry **de,
         }
         ptlrpc_req_finished(request);
 
         }
         ptlrpc_req_finished(request);
 
-        /* This places the intent in the dentry so that the vfs_xxx
-         * operation can lay its hands on it; but that is not always
-         * needed...  (we need to save it in the GETATTR case for the
-         * benefit of ll_inode_revalidate -phil) */
-        /* Ignore trying to save the intent for "special" inodes as
-         * they have special semantics that can cause deadlocks on
-         * the intent semaphore. -mmex */
-        if ((!inode || S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
-             S_ISLNK(inode->i_mode)) && (it->it_op & (IT_OPEN | IT_GETATTR)))
-                LL_SAVE_INTENT(dentry, it);
-        else
-                CDEBUG(D_DENTRY,
-                       "D_IT dentry %p fsdata %p intent: %s status %d\n",
-                       dentry, ll_d2d(dentry), ldlm_it2str(it->it_op),
-                       it->it_status);
-
+        CDEBUG(D_DENTRY, "D_IT dentry %p intent: %s status %d disp %x\n",
+               dentry, ldlm_it2str(it->it_op), it->it_status, it->it_disposition);
+        
+        /* drop IT_LOOKUP locks */
         if (it->it_op == IT_LOOKUP)
         if (it->it_op == IT_LOOKUP)
-                ll_intent_release(dentry, it);
-
+                ll_intent_release(it);
         RETURN(rc);
 
  drop_lock:
         RETURN(rc);
 
  drop_lock:
-        ll_intent_release(dentry, it);
+        ll_intent_release(it);
  drop_req:
         ptlrpc_req_finished(request);
         RETURN(rc);
  drop_req:
         ptlrpc_req_finished(request);
         RETURN(rc);
@@ -582,7 +461,7 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
                 if (!list_empty(&dentry->d_lru))
                         list_del_init(&dentry->d_lru);
 
                 if (!list_empty(&dentry->d_lru))
                         list_del_init(&dentry->d_lru);
 
-                list_del_init(&dentry->d_hash);
+                hlist_del_init(&dentry->d_hash);
                 __d_rehash(dentry, 0); /* avoid taking dcache_lock inside */
                 spin_unlock(&dcache_lock);
                 atomic_inc(&dentry->d_count);
                 __d_rehash(dentry, 0); /* avoid taking dcache_lock inside */
                 spin_unlock(&dcache_lock);
                 atomic_inc(&dentry->d_count);
@@ -597,68 +476,34 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
 }
 
 static int
 }
 
 static int
-lookup2_finish(int flag, struct ptlrpc_request *request,
+lookup2_finish(struct ptlrpc_request *request,
                struct inode *parent, struct dentry **de,
                struct lookup_intent *it, int offset, obd_id ino)
 {
         struct ll_sb_info *sbi = ll_i2sbi(parent);
         struct dentry *dentry = *de, *saved = *de;
         struct inode *inode = NULL;
                struct inode *parent, struct dentry **de,
                struct lookup_intent *it, int offset, obd_id ino)
 {
         struct ll_sb_info *sbi = ll_i2sbi(parent);
         struct dentry *dentry = *de, *saved = *de;
         struct inode *inode = NULL;
-        struct ll_read_inode2_cookie lic = {.lic_body = NULL, .lic_lsm = NULL};
+        int rc;
 
         /* NB 1 request reference will be taken away by ll_intent_lock()
          * when I return */
 
         /* NB 1 request reference will be taken away by ll_intent_lock()
          * when I return */
-
-        if (!(flag & LL_LOOKUP_NEGATIVE)) {
+        if (!it_disposition(it, DISP_LOOKUP_NEG)) {
+                struct lustre_md md;
                 ENTRY;
 
                 ENTRY;
 
-                /* We only get called if the mdc_enqueue() called from
-                 * ll_intent_lock() was successful.  Therefore the mds_body
-                 * is present and correct, and the eadata is present if
-                 * body->eadatasize != 0 (but still opaque, so only
-                 * obd_unpackmd() can check the size) */
-                lic.lic_body = lustre_msg_buf(request->rq_repmsg, offset,
-                                              sizeof (*lic.lic_body));
-                LASSERT(lic.lic_body != NULL);
-                LASSERT_REPSWABBED(request, offset);
-
-                if (S_ISREG(lic.lic_body->mode) &&
-                    (lic.lic_body->valid & OBD_MD_FLEASIZE)) {
-                        struct lov_mds_md    *lmm;
-                        int                   lmm_size;
-                        int                   rc;
-
-                        lmm_size = lic.lic_body->eadatasize;
-                        if (lmm_size == 0) {
-                                CERROR("OBD_MD_FLEASIZE set but "
-                                       "eadatasize 0\n");
-                                RETURN(-EPROTO);
-                        }
-                        lmm = lustre_msg_buf(request->rq_repmsg, offset + 1,
-                                             lmm_size);
-                        LASSERT(lmm != NULL);
-                        LASSERT_REPSWABBED(request, offset + 1);
-
-                        rc = obd_unpackmd(&sbi->ll_osc_conn,
-                                          &lic.lic_lsm, lmm, lmm_size);
-                        if (rc < 0) {
-                                CERROR("Error %d unpacking eadata\n", rc);
-                                RETURN(rc);
-                        }
-                        LASSERT(rc >= sizeof(*lic.lic_lsm));
-                }
+                rc =mdc_req2lustre_md(request, offset, &sbi->ll_osc_conn, &md);
+                if (rc) 
+                        RETURN(rc);
 
 
-                /* Both ENOMEM and an RPC timeout are possible in ll_iget; which
-                 * to pick?  A more generic EIO?  -phik */
-                inode = ll_iget(dentry->d_sb, ino, &lic);
+                inode = ll_iget(dentry->d_sb, ino, &md);
                 if (!inode) {
                         /* free the lsm if we allocated one above */
                 if (!inode) {
                         /* free the lsm if we allocated one above */
-                        if (lic.lic_lsm != NULL)
-                                obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm);
+                        if (md.lsm != NULL)
+                                obd_free_memmd(&sbi->ll_osc_conn, &md.lsm);
                         RETURN(-ENOMEM);
                         RETURN(-ENOMEM);
-                } else if (lic.lic_lsm != NULL &&
-                           ll_i2info(inode)->lli_smd != lic.lic_lsm) {
-                        obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm);
+                } else if (md.lsm != NULL &&
+                           ll_i2info(inode)->lli_smd != md.lsm) {
+                        obd_free_memmd(&sbi->ll_osc_conn, &md.lsm);
                 }
 
                 /* If this is a stat, get the authoritative file size */
                 }
 
                 /* If this is a stat, get the authoritative file size */
@@ -685,8 +530,10 @@ lookup2_finish(int flag, struct ptlrpc_request *request,
                 /* We asked for a lock on the directory, and may have been
                  * granted a lock on the inode.  Just in case, fixup the data
                  * pointer. */
                 /* We asked for a lock on the directory, and may have been
                  * granted a lock on the inode.  Just in case, fixup the data
                  * pointer. */
-                ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle,
-                                      inode);
+                CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+                       inode, inode->i_ino, inode->i_generation);
+                ldlm_lock_set_data((struct lustre_handle*)it->it_lock_handle,
+                                   inode);
         } else {
                 ENTRY;
         }
         } else {
                 ENTRY;
         }
@@ -700,8 +547,8 @@ lookup2_finish(int flag, struct ptlrpc_request *request,
         RETURN(0);
 }
 
         RETURN(0);
 }
 
-static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry,
-                                 struct lookup_intent *it)
+static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
+                                   struct lookup_intent *it, int flags)
 {
         struct dentry *save = dentry, *retval;
         int rc;
 {
         struct dentry *save = dentry, *retval;
         int rc;
@@ -711,7 +558,11 @@ static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry,
                dentry->d_name.name, parent->i_ino, parent->i_generation,
                parent, LL_IT2STR(it));
 
                dentry->d_name.name, parent->i_ino, parent->i_generation,
                parent, LL_IT2STR(it));
 
-        rc = ll_intent_lock(parent, &dentry, it, lookup2_finish);
+        if (d_mountpoint(dentry)) { 
+                CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
+        }
+
+        rc = ll_intent_lock(parent, &dentry, it, flags, lookup2_finish);
         if (rc < 0) {
                 CDEBUG(D_INFO, "ll_intent_lock: %d\n", rc);
                 GOTO(out, retval = ERR_PTR(rc));
         if (rc < 0) {
                 CDEBUG(D_INFO, "ll_intent_lock: %d\n", rc);
                 GOTO(out, retval = ERR_PTR(rc));
@@ -725,167 +576,136 @@ static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry,
         return retval;
 }
 
         return retval;
 }
 
-/* We depend on "mode" being set with the proper file type/umask by now */
-static struct inode *ll_create_node(struct inode *dir, const char *name,
-                                    int namelen, const void *data, int datalen,
-                                    int mode, __u64 extra,
-                                    struct lookup_intent *it)
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, 
+                                   struct nameidata *nd)
 {
 {
-        struct inode *inode;
-        struct ptlrpc_request *request = NULL;
-        struct mds_body *body;
-        time_t time = LTIME_S(CURRENT_TIME);
-        struct ll_sb_info *sbi = ll_i2sbi(dir);
-        struct ll_read_inode2_cookie lic;
+        struct dentry *de;
         ENTRY;
 
         ENTRY;
 
-        if (it && it->it_disposition) {
-                ll_invalidate_inode_pages(dir);
-                request = it->it_data;
-                body = lustre_msg_buf(request->rq_repmsg, 1, sizeof (*body));
-                LASSERT (body != NULL);         /* checked already */
-                LASSERT_REPSWABBED (request, 1); /* swabbed already */
-        } else {
-                struct mdc_op_data op_data;
-                int gid = current->fsgid;
-                int rc;
-
-                if (dir->i_mode & S_ISGID) {
-                        gid = dir->i_gid;
-                        if (S_ISDIR(mode))
-                                mode |= S_ISGID;
-                }
-
-                ll_prepare_mdc_op_data(&op_data, dir, NULL, name, namelen, 0);
-                rc = mdc_create(&sbi->ll_mdc_conn, &op_data,
-                                data, datalen, mode, current->fsuid, gid,
-                                time, extra, &request);
-                if (rc) {
-                        inode = ERR_PTR(rc);
-                        GOTO(out, rc);
-                }
-                body = lustre_swab_repbuf(request, 0, sizeof (*body),
-                                          lustre_swab_mds_body);
-                if (body == NULL) {
-                        CERROR ("Can't unpack mds_body\n");
-                        GOTO (out, inode = ERR_PTR(-EPROTO));
-                }
-        }
-
-        lic.lic_body = body;
-        lic.lic_lsm = NULL;
-
-        inode = ll_iget(dir->i_sb, body->ino, &lic);
-        if (!inode || is_bad_inode(inode)) {
-                /* XXX might need iput() for bad inode */
-                int rc = -EIO;
-                CERROR("new_inode -fatal: rc %d\n", rc);
-                LBUG();
-                GOTO(out, rc);
-        }
-
-        if (!list_empty(&inode->i_dentry)) {
-                CERROR("new_inode -fatal: inode %d, ct %d lnk %d\n",
-                       body->ino, atomic_read(&inode->i_count),
-                       inode->i_nlink);
-                iput(inode);
-                LBUG();
-                inode = ERR_PTR(-EIO);
-                GOTO(out, -EIO);
-        }
-
-        if (it && it->it_disposition) {
-                /* We asked for a lock on the directory, but were
-                 * granted a lock on the inode.  Since we finally have
-                 * an inode pointer, stuff it in the lock. */
-                ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle,
-                                      inode);
-        }
+        if (nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
+                de = ll_lookup_it(parent, dentry, &nd->it, nd->flags);
+        else 
+                de = ll_lookup_it(parent, dentry, NULL, 0);
 
 
-        EXIT;
- out:
-        ptlrpc_req_finished(request);
-        return inode;
+        RETURN(de);
 }
 }
+#endif
 
 static int ll_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode,
                          const char *name, int len)
 {
         struct ptlrpc_request *request = NULL;
 
 static int ll_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode,
                          const char *name, int len)
 {
         struct ptlrpc_request *request = NULL;
-        struct ll_sb_info *sbi = ll_i2sbi(dir);
         struct mds_body *body;
         struct lov_mds_md *eadata;
         struct lov_stripe_md *lsm = NULL;
         struct mds_body *body;
         struct lov_mds_md *eadata;
         struct lov_stripe_md *lsm = NULL;
-        struct lustre_handle lockh;
-        struct lookup_intent it = { .it_op = IT_UNLINK };
-        struct obdo *oa;
-        int err;
+        struct obd_trans_info oti = { 0 };
         struct mdc_op_data op_data;
         struct mdc_op_data op_data;
+        struct obdo *oa;
+        int rc;
         ENTRY;
 
         ll_prepare_mdc_op_data(&op_data, dir, child, name, len, mode);
         ENTRY;
 
         ll_prepare_mdc_op_data(&op_data, dir, child, name, len, mode);
-
-        err = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_EX,
-                         &op_data, &lockh, NULL, 0,
-                         ldlm_completion_ast, ll_mdc_blocking_ast,
-                         dir);
-        request = (struct ptlrpc_request *)it.it_data;
-        if (err < 0)
-                GOTO(out, err);
-        if (it.it_status)
-                GOTO(out, err = it.it_status);
-        err = 0;
-
-        body = lustre_msg_buf (request->rq_repmsg, 1, sizeof (*body));
-        LASSERT (body != NULL);                 /* checked by mdc_enqueue() */
-        LASSERT_REPSWABBED (request, 1);        /* swabbed by mdc_enqueue() */
+        rc = mdc_unlink(&ll_i2sbi(dir)->ll_mdc_conn, &op_data, &request);
+        if (rc)
+                GOTO(out, rc);
+        /* req is swabbed so this is safe */
+        body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body));
 
         if (!(body->valid & OBD_MD_FLEASIZE))
 
         if (!(body->valid & OBD_MD_FLEASIZE))
-                GOTO(out, 0);
+                GOTO(out, rc = 0);
 
         if (body->eadatasize == 0) {
 
         if (body->eadatasize == 0) {
-                CERROR ("OBD_MD_FLEASIZE set but eadatasize zero\n");
-                GOTO (out, err = -EPROTO);
+                CERROR("OBD_MD_FLEASIZE set but eadatasize zero\n");
+                GOTO(out, rc = -EPROTO);
         }
 
         /* The MDS sent back the EA because we unlinked the last reference
          * to this file. Use this EA to unlink the objects on the OST.
         }
 
         /* The MDS sent back the EA because we unlinked the last reference
          * to this file. Use this EA to unlink the objects on the OST.
-         * Note that mdc_enqueue() has already checked there _is_ some EA
-         * data, but this data is opaque to both mdc_enqueue() and the MDS.
-         * We have to leave it to obd_unpackmd() to check it is complete
-         * and sensible. */
-        eadata = lustre_msg_buf (request->rq_repmsg, 2, body->eadatasize);
-        LASSERT (eadata != NULL);
-        LASSERT_REPSWABBED (request, 2);
-
-        err = obd_unpackmd(ll_i2obdconn(dir), &lsm, eadata,
-                           body->eadatasize);
-        if (err < 0) {
-                CERROR("obd_unpackmd: %d\n", err);
-                GOTO (out_unlock, err);
+         * It's opaque so we don't swab here; we leave it to obd_unpackmd() to
+         * check it is complete and sensible. */
+        eadata = lustre_swab_repbuf(request, 1, body->eadatasize, NULL);
+        LASSERT(eadata != NULL);
+        if (eadata == NULL) {
+                CERROR("Can't unpack MDS EA data\n");
+                GOTO(out, rc = -EPROTO);
         }
         }
-        LASSERT (err >= sizeof (*lsm));
+
+        rc = obd_unpackmd(ll_i2obdconn(dir), &lsm, eadata, body->eadatasize);
+        if (rc < 0) {
+                CERROR("obd_unpackmd: %d\n", rc);
+                GOTO(out, rc);
+        }
+        LASSERT(rc >= sizeof(*lsm));
 
         oa = obdo_alloc();
         if (oa == NULL)
 
         oa = obdo_alloc();
         if (oa == NULL)
-                GOTO(out_free_memmd, err = -ENOMEM);
+                GOTO(out_free_memmd, rc = -ENOMEM);
 
         oa->o_id = lsm->lsm_object_id;
         oa->o_mode = body->mode & S_IFMT;
         oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
 
 
         oa->o_id = lsm->lsm_object_id;
         oa->o_mode = body->mode & S_IFMT;
         oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
 
-        err = obd_destroy(ll_i2obdconn(dir), oa, lsm, NULL);
+        if (body->valid & OBD_MD_FLCOOKIE) {
+                oa->o_valid |= OBD_MD_FLCOOKIE;
+                oti.oti_logcookies = lustre_msg_buf(request->rq_repmsg, 3,
+                                                    body->eadatasize);
+        }
+
+        rc = obd_destroy(ll_i2obdconn(dir), oa, lsm, &oti);
         obdo_free(oa);
         obdo_free(oa);
-        if (err)
+        if (rc)
                 CERROR("obd destroy objid 0x"LPX64" error %d\n",
                 CERROR("obd destroy objid 0x"LPX64" error %d\n",
-                       lsm->lsm_object_id, err);
+                       lsm->lsm_object_id, rc);
  out_free_memmd:
         obd_free_memmd(ll_i2obdconn(dir), &lsm);
  out_free_memmd:
         obd_free_memmd(ll_i2obdconn(dir), &lsm);
- out_unlock:
-        ldlm_lock_decref_and_cancel(&lockh, LCK_EX);
  out:
         ptlrpc_req_finished(request);
  out:
         ptlrpc_req_finished(request);
-        return err;
+        return rc;
+}
+
+/* We depend on "mode" being set with the proper file type/umask by now */
+static struct inode *ll_create_node(struct inode *dir, const char *name,
+                                    int namelen, const void *data, int datalen,
+                                    int mode, __u64 extra,
+                                    struct lookup_intent *it)
+{
+        struct inode *inode;
+        struct ptlrpc_request *request = NULL;
+        struct ll_sb_info *sbi = ll_i2sbi(dir);
+        struct lustre_md md;
+        int rc;
+        ENTRY;
+
+        LASSERT(it && it->it_disposition);
+
+        ll_invalidate_inode_pages(dir);
+
+        request = it->it_data;
+        rc = mdc_req2lustre_md(request, 1, &sbi->ll_osc_conn, &md);
+        if (rc) { 
+                GOTO(out, inode = ERR_PTR(rc));
+        }
+
+        inode = ll_iget(dir->i_sb, md.body->ino, &md);
+        if (!inode || is_bad_inode(inode)) {
+                /* XXX might need iput() for bad inode */
+                int rc = -EIO;
+                CERROR("new_inode -fatal: rc %d\n", rc);
+                LBUG();
+                GOTO(out, rc);
+        }
+        LASSERT(list_empty(&inode->i_dentry));
+
+        CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+               inode, inode->i_ino, inode->i_generation);
+        ldlm_lock_set_data((struct lustre_handle*)it->it_lock_handle,
+                           inode);
+
+        EXIT;
+ out:
+        ptlrpc_req_finished(request);
+        return inode;
 }
 
 /*
 }
 
 /*
@@ -902,54 +722,46 @@ static int ll_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode,
  * If the create succeeds, we fill in the inode information
  * with d_instantiate().
  */
  * If the create succeeds, we fill in the inode information
  * with d_instantiate().
  */
-static int ll_create(struct inode *dir, struct dentry *dentry, int mode)
+static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, struct lookup_intent *it)
 {
 {
-        struct lookup_intent *it;
         struct inode *inode;
         struct inode *inode;
+        struct ptlrpc_request *request = it->it_data;
         int rc = 0;
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
                dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
         int rc = 0;
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
                dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
-               LL_IT2STR(dentry->d_it));
-
-        it = dentry->d_it;
+               LL_IT2STR(it));
 
 
-        rc = ll_it_open_error(IT_OPEN_CREATE, it);
+        rc = ll_it_open_error(DISP_OPEN_CREATE, it);
         if (rc) {
         if (rc) {
-                LL_GET_INTENT(dentry, it);
-                ptlrpc_req_finished(it->it_data);
+                ptlrpc_req_finished(request);
                 RETURN(rc);
         }
 
                 RETURN(rc);
         }
 
+        mdc_store_inode_generation(request, 2, 1);
         inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
                                NULL, 0, mode, 0, it);
         inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
                                NULL, 0, mode, 0, it);
-
         if (IS_ERR(inode)) {
         if (IS_ERR(inode)) {
-                LL_GET_INTENT(dentry, it);
                 RETURN(PTR_ERR(inode));
         }
 
                 RETURN(PTR_ERR(inode));
         }
 
-        /* no directory data updates when intents rule */
-        if (it && it->it_disposition) {
-                d_instantiate(dentry, inode);
-                RETURN(0);
-        }
-
-        rc = ext2_add_nondir(dentry, inode);
-        RETURN(rc);
+        d_instantiate(dentry, inode);
+        RETURN(0);
 }
 
 }
 
-static int ll_mknod(struct inode *dir, struct dentry *dentry, int mode,
-                    int rdev)
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+static int ll_create_nd(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
 {
 {
-        LBUG();
-        return -ENOSYS;
+        return ll_create_it(dir, dentry, mode, &nd->it);
 }
 }
+#endif
 
 
-static int ll_mknod2(struct inode *dir, const char *name, int len, int mode,
-                     int rdev)
+static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev)
 {
 {
+        struct inode *dir = nd->dentry->d_inode;
+        const char *name = nd->last.name;
+        int len = nd->last.len;
         struct ptlrpc_request *request = NULL;
         time_t time = LTIME_S(CURRENT_TIME);
         struct ll_sb_info *sbi = ll_i2sbi(dir);
         struct ptlrpc_request *request = NULL;
         time_t time = LTIME_S(CURRENT_TIME);
         struct ll_sb_info *sbi = ll_i2sbi(dir);
@@ -966,10 +778,13 @@ static int ll_mknod2(struct inode *dir, const char *name, int len, int mode,
         mode &= ~current->fs->umask;
 
         switch (mode & S_IFMT) {
         mode &= ~current->fs->umask;
 
         switch (mode & S_IFMT) {
-        case 0: case S_IFREG:
+        case 0: 
+        case S_IFREG:
                 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
                 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
-        case S_IFCHR: case S_IFBLK:
-        case S_IFIFO: case S_IFSOCK:
+        case S_IFCHR: 
+        case S_IFBLK:
+        case S_IFIFO: 
+        case S_IFSOCK:
                 ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
                 err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode,
                                  current->fsuid, current->fsgid, time,
                 ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
                 err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode,
                                  current->fsuid, current->fsgid, time,
@@ -985,16 +800,11 @@ static int ll_mknod2(struct inode *dir, const char *name, int len, int mode,
         RETURN(err);
 }
 
         RETURN(err);
 }
 
-static int ll_symlink(struct inode *dir, struct dentry *dentry,
-                      const char *symname)
-{
-        LBUG();
-        return -ENOSYS;
-}
-
-static int ll_symlink2(struct inode *dir, const char *name, int len,
-                       const char *tgt)
+static int ll_symlink_raw(struct nameidata *nd, const char *tgt)
 {
 {
+        struct inode *dir = nd->dentry->d_inode;
+        const char *name = nd->last.name;
+        int len = nd->last.len;
         struct ptlrpc_request *request = NULL;
         time_t time = LTIME_S(CURRENT_TIME);
         struct ll_sb_info *sbi = ll_i2sbi(dir);
         struct ptlrpc_request *request = NULL;
         time_t time = LTIME_S(CURRENT_TIME);
         struct ll_sb_info *sbi = ll_i2sbi(dir);
@@ -1016,16 +826,12 @@ static int ll_symlink2(struct inode *dir, const char *name, int len,
         RETURN(err);
 }
 
         RETURN(err);
 }
 
-static int ll_link(struct dentry *old_dentry, struct inode * dir,
-                   struct dentry *dentry)
-{
-        LBUG();
-        return -ENOSYS;
-}
-
-static int ll_link2(struct inode *src, struct inode *dir,
-                    const char *name, int len)
+static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
 {
 {
+        struct inode *src = srcnd->dentry->d_inode;
+        struct inode *dir = tgtnd->dentry->d_inode;
+        const char *name = tgtnd->last.name;
+        int len = tgtnd->last.len;
         struct ptlrpc_request *request = NULL;
         struct mdc_op_data op_data;
         int err;
         struct ptlrpc_request *request = NULL;
         struct mdc_op_data op_data;
         int err;
@@ -1043,14 +849,12 @@ static int ll_link2(struct inode *src, struct inode *dir,
         RETURN(err);
 }
 
         RETURN(err);
 }
 
-static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
-        LBUG();
-        return -ENOSYS;
-}
 
 
-static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode)
+static int ll_mkdir_raw(struct nameidata *nd, int mode)
 {
 {
+        struct inode *dir = nd->dentry->d_inode;
+        const char *name = nd->last.name;
+        int len = nd->last.len;
         struct ptlrpc_request *request = NULL;
         time_t time = LTIME_S(CURRENT_TIME);
         struct ll_sb_info *sbi = ll_i2sbi(dir);
         struct ptlrpc_request *request = NULL;
         time_t time = LTIME_S(CURRENT_TIME);
         struct ll_sb_info *sbi = ll_i2sbi(dir);
@@ -1066,14 +870,16 @@ static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode)
         mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
         ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
         err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode,
         mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
         ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
         err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode,
-                         current->fsuid, current->fsgid,
-                         time, 0, &request);
+                         current->fsuid, current->fsgid, time, 0, &request);
         ptlrpc_req_finished(request);
         RETURN(err);
 }
 
         ptlrpc_req_finished(request);
         RETURN(err);
 }
 
-static int ll_rmdir2(struct inode *dir, const char *name, int len)
+static int ll_rmdir_raw(struct nameidata *nd)
 {
 {
+        struct inode *dir = nd->dentry->d_inode;
+        const char *name = nd->last.name;
+        int len = nd->last.len;
         int rc;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
         int rc;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
@@ -1083,8 +889,11 @@ static int ll_rmdir2(struct inode *dir, const char *name, int len)
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-static int ll_unlink2(struct inode *dir, const char *name, int len)
+static int ll_unlink_raw(struct nameidata *nd)
 {
 {
+        struct inode *dir = nd->dentry->d_inode;
+        const char *name = nd->last.name;
+        int len = nd->last.len;
         int rc;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
         int rc;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
@@ -1094,29 +903,14 @@ static int ll_unlink2(struct inode *dir, const char *name, int len)
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-static int ll_unlink(struct inode *dir, struct dentry *dentry)
-{
-        LBUG();
-        return -ENOSYS;
-}
-
-static int ll_rmdir(struct inode *dir, struct dentry *dentry)
-{
-        LBUG();
-        return -ENOSYS;
-}
-
-static int ll_rename(struct inode * old_dir, struct dentry * old_dentry,
-                     struct inode * new_dir, struct dentry * new_dentry)
-{
-        LBUG();
-        return -ENOSYS;
-}
-
-static int ll_rename2(struct inode *src, struct inode *tgt,
-                      const char *oldname, int oldlen,
-                      const char *newname, int newlen)
+static int ll_rename_raw(struct nameidata *oldnd, struct nameidata *newnd)
 {
 {
+        struct inode *src = oldnd->dentry->d_inode;
+        struct inode *tgt = newnd->dentry->d_inode;
+        const char *oldname = oldnd->last.name;
+        int oldlen  = oldnd->last.len;
+        const char *newname = newnd->last.name;
+        int newlen  = newnd->last.len;
         struct ptlrpc_request *request = NULL;
         struct ll_sb_info *sbi = ll_i2sbi(src);
         struct mdc_op_data op_data;
         struct ptlrpc_request *request = NULL;
         struct ll_sb_info *sbi = ll_i2sbi(src);
         struct mdc_op_data op_data;
@@ -1134,27 +928,23 @@ static int ll_rename2(struct inode *src, struct inode *tgt,
         RETURN(err);
 }
 
         RETURN(err);
 }
 
-extern int ll_inode_revalidate(struct dentry *dentry);
 struct inode_operations ll_dir_inode_operations = {
 struct inode_operations ll_dir_inode_operations = {
-        create:          ll_create,
-        lookup2:         ll_lookup2,
-        link:            ll_link,          /* LBUG() */
-        link2:           ll_link2,
-        unlink:          ll_unlink,        /* LBUG() */
-        unlink2:         ll_unlink2,
-        symlink:         ll_symlink,       /* LBUG() */
-        symlink2:        ll_symlink2,
-        mkdir:           ll_mkdir,         /* LBUG() */
-        mkdir2:          ll_mkdir2,
-        rmdir:           ll_rmdir,         /* LBUG() */
-        rmdir2:          ll_rmdir2,
-        mknod:           ll_mknod,         /* LBUG() */
-        mknod2:          ll_mknod2,
-        rename:          ll_rename,        /* LBUG() */
-        rename2:         ll_rename2,
+        link_raw:           ll_link_raw,
+        unlink_raw:         ll_unlink_raw,
+        symlink_raw:        ll_symlink_raw,
+        mkdir_raw:          ll_mkdir_raw,
+        rmdir_raw:          ll_rmdir_raw,
+        mknod_raw:          ll_mknod_raw,
+        rename_raw:         ll_rename_raw,
         setattr:         ll_setattr,
         setattr_raw:     ll_setattr_raw,
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
         setattr:         ll_setattr,
         setattr_raw:     ll_setattr_raw,
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        revalidate:      ll_inode_revalidate,
+        create_it:          ll_create_it,
+        lookup_it:            ll_lookup_it,
+        revalidate_it:      ll_inode_revalidate_it,
+#else
+        lookup_it:          ll_lookup_nd,
+        create_nd:          ll_create_nd,
+        getattr_it:         ll_getattr,
 #endif
 };
 #endif
 };
index 98f6086..b4004b5 100644 (file)
 #include <linux/version.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <linux/version.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
-#include "llite_internal.h"
 
 #include <linux/fs.h>
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
 #include <linux/buffer_head.h>
 
 #include <linux/fs.h>
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
 #include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include <linux/writeback.h>
 #else
 #include <linux/iobuf.h>
 #endif
 #else
 #include <linux/iobuf.h>
 #endif
@@ -51,7 +52,7 @@
 
 #include <linux/lustre_mds.h>
 #include <linux/lustre_lite.h>
 
 #include <linux/lustre_mds.h>
 #include <linux/lustre_lite.h>
-#include <linux/lustre_lib.h>
+#include "llite_internal.h"
 #include <linux/lustre_compat25.h>
 
 /*
 #include <linux/lustre_compat25.h>
 
 /*
@@ -90,7 +91,8 @@ void set_page_clean(struct page *page)
 }
 
 /* SYNCHRONOUS I/O to object storage for an inode */
 }
 
 /* SYNCHRONOUS I/O to object storage for an inode */
-static int ll_brw(int cmd, struct inode *inode, struct page *page, int flags)
+static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,
+                  struct page *page, int flags)
 {
         struct ll_inode_info *lli = ll_i2info(inode);
         struct lov_stripe_md *lsm = lli->lli_smd;
 {
         struct ll_inode_info *lli = ll_i2info(inode);
         struct lov_stripe_md *lsm = lli->lli_smd;
@@ -124,8 +126,8 @@ static int ll_brw(int cmd, struct inode *inode, struct page *page, int flags)
         else
                 lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                     LPROC_LL_BRW_READ, pg.count);
         else
                 lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                     LPROC_LL_BRW_READ, pg.count);
-        rc = obd_brw(cmd, ll_i2obdconn(inode), lsm, 1, &pg, NULL);
-        if (rc)
+        rc = obd_brw(cmd, ll_i2obdconn(inode), oa, lsm, 1, &pg, NULL);
+        if (rc != 0 && rc != -EIO)
                 CERROR("error from obd_brw: rc = %d\n", rc);
 
         RETURN(rc);
                 CERROR("error from obd_brw: rc = %d\n", rc);
 
         RETURN(rc);
@@ -142,6 +144,7 @@ static int ll_readpage(struct file *file, struct page *first_page)
         struct page *page = first_page;
         struct list_head *pos;
         struct brw_page *pgs;
         struct page *page = first_page;
         struct list_head *pos;
         struct brw_page *pgs;
+        struct obdo *oa;
         unsigned long end_index, extent_end = 0;
         struct ptlrpc_request_set *set;
         int npgs = 0, rc = 0, max_pages;
         unsigned long end_index, extent_end = 0;
         struct ptlrpc_request_set *set;
         int npgs = 0, rc = 0, max_pages;
@@ -276,19 +279,33 @@ static int ll_readpage(struct file *file, struct page *first_page)
 
         } while (page);
 
 
         } while (page);
 
-        set = ptlrpc_prep_set();
-        if (set == NULL) {
+        if ((oa = obdo_alloc()) == NULL) {
+                CERROR("ENOMEM allocing obdo\n");
+                rc = -ENOMEM;
+        } else if ((set = ptlrpc_prep_set()) == NULL) {
                 CERROR("ENOMEM allocing request set\n");
                 CERROR("ENOMEM allocing request set\n");
+                obdo_free(oa);
                 rc = -ENOMEM;
         } else {
                 rc = -ENOMEM;
         } else {
-                rc = obd_brw_async(OBD_BRW_READ, ll_i2obdconn(inode),
+                struct ll_file_data *fd = file->private_data;
+
+                oa->o_id = lli->lli_smd->lsm_object_id;
+                memcpy(obdo_handle(oa), &fd->fd_ost_och.och_fh,
+                       sizeof(fd->fd_ost_och.och_fh));
+                oa->o_valid = OBD_MD_FLID | OBD_MD_FLHANDLE;
+                obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME);
+
+                rc = obd_brw_async(OBD_BRW_READ, ll_i2obdconn(inode), oa,
                                    ll_i2info(inode)->lli_smd, npgs, pgs,
                                    set, NULL);
                 if (rc == 0)
                         rc = ptlrpc_set_wait(set);
                 ptlrpc_set_destroy(set);
                                    ll_i2info(inode)->lli_smd, npgs, pgs,
                                    set, NULL);
                 if (rc == 0)
                         rc = ptlrpc_set_wait(set);
                 ptlrpc_set_destroy(set);
+                if (rc == 0)
+                        obdo_refresh_inode(inode, oa, oa->o_valid);
                 if (rc && rc != -EIO)
                         CERROR("error from obd_brw_async: rc = %d\n", rc);
                 if (rc && rc != -EIO)
                         CERROR("error from obd_brw_async: rc = %d\n", rc);
+                obdo_free(oa);
         }
 
         while (npgs-- > 0) {
         }
 
         while (npgs-- > 0) {
@@ -310,15 +327,15 @@ static int ll_readpage(struct file *file, struct page *first_page)
 void ll_truncate(struct inode *inode)
 {
         struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
 void ll_truncate(struct inode *inode)
 {
         struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-        struct obdo oa = {0};
+        struct obdo oa;
         int err;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
                inode->i_generation, inode);
 
         int err;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
                inode->i_generation, inode);
 
+        /* object not yet allocated */
         if (!lsm) {
         if (!lsm) {
-                /* object not yet allocated */
-                inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+                CERROR("truncate on inode %lu with no objects\n", inode->i_ino);
                 EXIT;
                 return;
         }
                 EXIT;
                 return;
         }
@@ -331,8 +348,9 @@ void ll_truncate(struct inode *inode)
                         ~0);
 
         oa.o_id = lsm->lsm_object_id;
                         ~0);
 
         oa.o_id = lsm->lsm_object_id;
-        oa.o_mode = inode->i_mode;
-        oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;
+        oa.o_valid = OBD_MD_FLID;
+        obdo_from_inode(&oa, inode, OBD_MD_FLTYPE|OBD_MD_FLMODE|OBD_MD_FLATIME|
+                                    OBD_MD_FLMTIME | OBD_MD_FLCTIME);
 
         CDEBUG(D_INFO, "calling punch for "LPX64" (all bytes after %Lu)\n",
                oa.o_id, inode->i_size);
 
         CDEBUG(D_INFO, "calling punch for "LPX64" (all bytes after %Lu)\n",
                oa.o_id, inode->i_size);
@@ -343,7 +361,9 @@ void ll_truncate(struct inode *inode)
         if (err)
                 CERROR("obd_truncate fails (%d) ino %lu\n", err, inode->i_ino);
         else
         if (err)
                 CERROR("obd_truncate fails (%d) ino %lu\n", err, inode->i_ino);
         else
-                obdo_to_inode(inode, &oa, oa.o_valid);
+                obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                          OBD_MD_FLATIME | OBD_MD_FLMTIME |
+                                          OBD_MD_FLCTIME);
 
         EXIT;
         return;
 
         EXIT;
         return;
@@ -356,9 +376,11 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
 {
         struct inode *inode = page->mapping->host;
         struct ll_inode_info *lli = ll_i2info(inode);
 {
         struct inode *inode = page->mapping->host;
         struct ll_inode_info *lli = ll_i2info(inode);
+        struct ll_file_data *fd = file->private_data;
         struct lov_stripe_md *lsm = lli->lli_smd;
         obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
         struct brw_page pg;
         struct lov_stripe_md *lsm = lli->lli_smd;
         obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
         struct brw_page pg;
+        struct obdo oa;
         int rc = 0;
         ENTRY;
 
         int rc = 0;
         ENTRY;
 
@@ -375,7 +397,7 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
         pg.off = offset;
         pg.count = PAGE_SIZE;
         pg.flag = 0;
         pg.off = offset;
         pg.count = PAGE_SIZE;
         pg.flag = 0;
-        rc = obd_brw(OBD_BRW_CHECK, ll_i2obdconn(inode), lsm, 1, &pg, NULL);
+        rc = obd_brw(OBD_BRW_CHECK, ll_i2obdconn(inode), NULL, lsm, 1,&pg,NULL);
         if (rc)
                 RETURN(rc);
 
         if (rc)
                 RETURN(rc);
 
@@ -393,7 +415,15 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
                 GOTO(prepare_done, rc = 0);
         }
 
                 GOTO(prepare_done, rc = 0);
         }
 
-        rc = ll_brw(OBD_BRW_READ, inode, page, 0);
+        oa.o_id = lsm->lsm_object_id;
+        oa.o_mode = inode->i_mode;
+        memcpy(obdo_handle(&oa), &fd->fd_ost_och.och_fh,
+               sizeof(fd->fd_ost_och.och_fh));
+        oa.o_valid = OBD_MD_FLID |OBD_MD_FLMODE |OBD_MD_FLTYPE |OBD_MD_FLHANDLE;
+
+        rc = ll_brw(OBD_BRW_READ, inode, &oa, page, 0);
+        if (rc == 0)
+                obdo_refresh_inode(inode, &oa, oa.o_valid);
 
         EXIT;
  prepare_done:
 
         EXIT;
  prepare_done:
@@ -544,15 +574,19 @@ int ll_mark_dirty_page(struct lustre_handle *conn, struct lov_stripe_md *lsm,
 static int ll_writepage(struct page *page)
 {
         struct inode *inode = page->mapping->host;
 static int ll_writepage(struct page *page)
 {
         struct inode *inode = page->mapping->host;
+        struct obdo oa;
         ENTRY;
 
         CDEBUG(D_CACHE, "page %p [lau %d] inode %p\n", page,
         ENTRY;
 
         CDEBUG(D_CACHE, "page %p [lau %d] inode %p\n", page,
-                        PageLaunder(page), inode);
+               PageLaunder(page), inode);
         LASSERT(PageLocked(page));
 
         LASSERT(PageLocked(page));
 
-        /* XXX should obd_brw errors trickle up? */
-        ll_batch_writepage(inode, page);
-        RETURN(0);
+        oa.o_id = ll_i2info(inode)->lli_smd->lsm_object_id;
+        oa.o_valid = OBD_MD_FLID;
+        obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+                                    OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
+        RETURN(ll_batch_writepage(inode, &oa, page));
 }
 
 /*
 }
 
 /*
@@ -567,6 +601,7 @@ static int ll_commit_write(struct file *file, struct page *page,
         int rc = 0;
         ENTRY;
 
         int rc = 0;
         ENTRY;
 
+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
         LASSERT(inode == file->f_dentry->d_inode);
         LASSERT(PageLocked(page));
 
         LASSERT(inode == file->f_dentry->d_inode);
         LASSERT(PageLocked(page));
 
@@ -595,7 +630,18 @@ static int ll_commit_write(struct file *file, struct page *page,
         /* This means that we've hit either the local cache limit or the limit
          * of the OST's grant. */
         if (rc == -EDQUOT) {
         /* This means that we've hit either the local cache limit or the limit
          * of the OST's grant. */
         if (rc == -EDQUOT) {
-                int rc = ll_batch_writepage(inode, page);
+                struct ll_file_data *fd = file->private_data;
+                struct obdo oa;
+                int rc;
+
+                oa.o_id = ll_i2info(inode)->lli_smd->lsm_object_id;
+                memcpy(obdo_handle(&oa), &fd->fd_ost_och.och_fh,
+                       sizeof(fd->fd_ost_och.och_fh));
+                oa.o_valid = OBD_MD_FLID | OBD_MD_FLHANDLE;
+                obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+                                            OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
+                rc = ll_batch_writepage(inode, &oa, page);
                 lock_page(page); /* caller expects to unlock */
                 RETURN(rc);
         }
                 lock_page(page); /* caller expects to unlock */
                 RETURN(rc);
         }
@@ -624,12 +670,13 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
         struct lov_stripe_md *lsm = lli->lli_smd;
         struct brw_page *pga;
         struct ptlrpc_request_set *set;
         struct lov_stripe_md *lsm = lli->lli_smd;
         struct brw_page *pga;
         struct ptlrpc_request_set *set;
+        struct obdo oa;
         int length, i, flags, rc = 0;
         loff_t offset;
         ENTRY;
 
         if (!lsm || !lsm->lsm_object_id)
         int length, i, flags, rc = 0;
         loff_t offset;
         ENTRY;
 
         if (!lsm || !lsm->lsm_object_id)
-                RETURN(-ENOMEM);
+                RETURN(-EBADF);
 
         if ((iobuf->offset & (blocksize - 1)) ||
             (iobuf->length & (blocksize - 1)))
 
         if ((iobuf->offset & (blocksize - 1)) ||
             (iobuf->length & (blocksize - 1)))
@@ -663,6 +710,11 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
                 }
         }
 
                 }
         }
 
+        oa.o_id = lsm->lsm_object_id;
+        oa.o_valid = OBD_MD_FLID;
+        obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+                                    OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
         if (rw == WRITE)
                 lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                     LPROC_LL_DIRECT_WRITE, iobuf->length);
         if (rw == WRITE)
                 lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                     LPROC_LL_DIRECT_WRITE, iobuf->length);
@@ -670,8 +722,8 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
                 lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                     LPROC_LL_DIRECT_READ, iobuf->length);
         rc = obd_brw_async(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
                 lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                     LPROC_LL_DIRECT_READ, iobuf->length);
         rc = obd_brw_async(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
-                           ll_i2obdconn(inode), lsm, iobuf->nr_pages, pga, set,
-                           NULL);
+                           ll_i2obdconn(inode), &oa, lsm, iobuf->nr_pages, pga,
+                           set, NULL);
         if (rc) {
                 CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
                        "error from obd_brw_async: rc = %d\n", rc);
         if (rc) {
                 CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
                        "error from obd_brw_async: rc = %d\n", rc);
index 85532f0..9a3ffa1 100644 (file)
 #include "llite_internal.h"
 
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 #include "llite_internal.h"
 
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-kmem_cache_t *ll_file_data_slab;
 extern struct address_space_operations ll_aops;
 extern struct address_space_operations ll_dir_aops;
 extern struct address_space_operations ll_aops;
 extern struct address_space_operations ll_dir_aops;
-struct super_operations ll_super_operations;
-
-/* /proc/lustre/llite root that tracks llite mount points */
-struct proc_dir_entry *proc_lustre_fs_root = NULL;
-/* lproc_llite.c */
-extern void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
-extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
-                                       struct super_block *sb,
-                                       char *osc, char *mdc);
-
-extern int ll_recover(struct recovd_data *, int);
-extern int ll_commitcbd_setup(struct ll_sb_info *);
-extern int ll_commitcbd_cleanup(struct ll_sb_info *);
-
-static char *ll_read_opt(const char *opt, char *data)
-{
-        char *value;
-        char *retval;
-        ENTRY;
-
-        CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
-        if (strncmp(opt, data, strlen(opt)))
-                RETURN(NULL);
-        if ((value = strchr(data, '=')) == NULL)
-                RETURN(NULL);
-
-        value++;
-        OBD_ALLOC(retval, strlen(value) + 1);
-        if (!retval) {
-                CERROR("out of memory!\n");
-                RETURN(NULL);
-        }
-
-        memcpy(retval, value, strlen(value)+1);
-        CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
-        RETURN(retval);
-}
-
-static int ll_set_opt(const char *opt, char *data, int fl)
-{
-        ENTRY;
-
-        CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
-        if (strncmp(opt, data, strlen(opt)))
-                RETURN(0);
-        else
-                RETURN(fl);
-}
-
-static void ll_options(char *options, char **ost, char **mds, int *flags)
-{
-        char *this_char;
-        ENTRY;
-
-        if (!options) {
-                EXIT;
-                return;
-        }
-
-        for (this_char = strtok (options, ",");
-             this_char != NULL;
-             this_char = strtok (NULL, ",")) {
-                CDEBUG(D_SUPER, "this_char %s\n", this_char);
-                if ((!*ost && (*ost = ll_read_opt("osc", this_char)))||
-                    (!*mds && (*mds = ll_read_opt("mdc", this_char)))||
-                    (!(*flags & LL_SBI_NOLCK) &&
-                     ((*flags) = (*flags) |
-                      ll_set_opt("nolock", this_char, LL_SBI_NOLCK))))
-                        continue;
-        }
-        EXIT;
-}
-
-#ifndef log2
-#define log2(n) ffz(~(n))
-#endif
 
 static struct super_block *ll_read_super(struct super_block *sb,
                                          void *data, int silent)
 {
 
 static struct super_block *ll_read_super(struct super_block *sb,
                                          void *data, int silent)
 {
-        struct inode *root = 0;
-        struct obd_device *obd;
-        struct ll_sb_info *sbi;
-        struct obd_export *mdc_export;
-        char *osc = NULL;
-        char *mdc = NULL;
         int err;
         int err;
-        struct ll_fid rootfid;
-        struct obd_statfs osfs;
-        struct ptlrpc_request *request = NULL;
-        struct ptlrpc_connection *mdc_conn;
-        struct ll_read_inode2_cookie lic;
-        class_uuid_t uuid;
-
         ENTRY;
         ENTRY;
-
-        CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
-        OBD_ALLOC(sbi, sizeof(*sbi));
-        if (!sbi)
+        err = ll_fill_super(sb, data, silent);
+        if (err)
                 RETURN(NULL);
                 RETURN(NULL);
-
-        INIT_LIST_HEAD(&sbi->ll_conn_chain);
-        INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
-        generate_random_uuid(uuid);
-        class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
-
-        sb->u.generic_sbp = sbi;
-
-        ll_options(data, &osc, &mdc, &sbi->ll_flags);
-
-        if (!osc) {
-                CERROR("no osc\n");
-                GOTO(out_free, sb = NULL);
-        }
-
-        if (!mdc) {
-                CERROR("no mdc\n");
-                GOTO(out_free, sb = NULL);
-        }
-
-        obd = class_name2obd(mdc);
-        if (!obd) {
-                CERROR("MDC %s: not setup or attached\n", mdc);
-                GOTO(out_free, sb = NULL);
-        }
-
-        err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid);
-        if (err) {
-                CERROR("cannot connect to %s: rc = %d\n", mdc, err);
-                GOTO(out_free, sb = NULL);
-        }
-
-        mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection;
-
-        obd = class_name2obd(osc);
-        if (!obd) {
-                CERROR("OSC %s: not setup or attached\n", osc);
-                GOTO(out_mdc, sb = NULL);
-        }
-
-        err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid);
-        if (err) {
-                CERROR("cannot connect to %s: rc = %d\n", osc, err);
-                GOTO(out_mdc, sb = NULL);
-        }
-
-        err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
-        if (err) {
-                CERROR("cannot mds_connect: rc = %d\n", err);
-                GOTO(out_osc, sb = NULL);
-        }
-        CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
-        sbi->ll_rootino = rootfid.id;
-
-        memset(&osfs, 0, sizeof(osfs));
-        mdc_export = class_conn2export(&sbi->ll_mdc_conn);
-        if (mdc_export == NULL) {
-                CERROR("null mdc_export\n");
-                GOTO(out_osc, sb = NULL);
-        }
-        err = obd_statfs(mdc_export, &osfs);
-        class_export_put(mdc_export);
-        sb->s_blocksize = osfs.os_bsize;
-        sb->s_blocksize_bits = log2(osfs.os_bsize);
-        sb->s_magic = LL_SUPER_MAGIC;
-        sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
-
-        sb->s_op = &ll_super_operations;
-
-        /* make root inode 
-         * XXX: move this to after cbd setup? */
-        err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid,
-                          OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request);
-        if (err) {
-                CERROR("mdc_getattr failed for root: rc = %d\n", err);
-                GOTO(out_osc, sb = NULL);
-        }
-
-        /* initialize committed transaction callback daemon */
-        spin_lock_init(&sbi->ll_commitcbd_lock);
-        init_waitqueue_head(&sbi->ll_commitcbd_waitq);
-        init_waitqueue_head(&sbi->ll_commitcbd_ctl_waitq);
-        sbi->ll_commitcbd_flags = 0;
-        err = ll_commitcbd_setup(sbi);
-        if (err) {
-                CERROR("failed to start commit callback daemon: rc = %d\n",err);
-                ptlrpc_req_finished (request);
-                GOTO(out_osc, sb = NULL);
-        }
-
-        lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0,
-                                      sizeof(*lic.lic_body));
-        LASSERT (lic.lic_body != NULL);         /* checked by mdc_getattr() */
-        LASSERT_REPSWABBED (request, 0);        /* swabbed by mdc_getattr() */
-
-        lic.lic_lsm = NULL;
-
-        LASSERT(sbi->ll_rootino != 0);
-        root = iget4(sb, sbi->ll_rootino, NULL, &lic);
-
-        ptlrpc_req_finished(request);
-
-        if (root == NULL || is_bad_inode(root)) {
-                /* XXX might need iput() for bad inode */
-                CERROR("lustre_lite: bad iget4 for root\n");
-                GOTO(out_cbd, sb = NULL);
-        }
-
-        sb->s_root = d_alloc_root(root);
-
-        if (proc_lustre_fs_root) {
-                err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
-                                                  osc, mdc);
-                if (err < 0)
-                        CERROR("could not register mount in /proc/lustre");
-        }
-
-out_dev:
-        if (mdc)
-                OBD_FREE(mdc, strlen(mdc) + 1);
-        if (osc)
-                OBD_FREE(osc, strlen(osc) + 1);
-
         RETURN(sb);
         RETURN(sb);
-
-out_cbd:
-        ll_commitcbd_cleanup(sbi);
-out_osc:
-        obd_disconnect(&sbi->ll_osc_conn, 0);
-out_mdc:
-        obd_disconnect(&sbi->ll_mdc_conn, 0);
-out_free:
-        lprocfs_unregister_mountpoint(sbi);
-        OBD_FREE(sbi, sizeof(*sbi));
-
-        goto out_dev;
-} /* ll_read_super */
-
-static void ll_put_super(struct super_block *sb)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct list_head *tmp, *next;
-        struct ll_fid rootfid;
-        struct obd_device *obd = class_conn2obd(&sbi->ll_mdc_conn);
-        ENTRY;
-
-        CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
-        list_del(&sbi->ll_conn_chain);
-        ll_commitcbd_cleanup(sbi);
-        obd_disconnect(&sbi->ll_osc_conn, 0);
-
-        /* NULL request to force sync on the MDS, and get the last_committed
-         * value to flush remaining RPCs from the sending queue on client.
-         *
-         * XXX This should be an mdc_sync() call to sync the whole MDS fs,
-         *     which we can call for other reasons as well.
-         */
-        if (!obd->obd_no_recov)
-                mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
-
-        lprocfs_unregister_mountpoint(sbi);
-        if (sbi->ll_proc_root) {
-                lprocfs_remove(sbi->ll_proc_root);
-                sbi->ll_proc_root = NULL;
-        }
-
-        obd_disconnect(&sbi->ll_mdc_conn, 0);
-
-        spin_lock(&dcache_lock);
-        list_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) {
-                struct dentry *dentry = list_entry(tmp, struct dentry, d_hash);
-                shrink_dcache_parent(dentry);
-        }
-        spin_unlock(&dcache_lock);
-
-        OBD_FREE(sbi, sizeof(*sbi));
-
-        EXIT;
-} /* ll_put_super */
-
-static void ll_clear_inode(struct inode *inode)
-{
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        struct ll_inode_info *lli = ll_i2info(inode);
-        int rc;
-        ENTRY;
-
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-               inode->i_generation, inode);
-        rc = ll_mdc_cancel_unused(&sbi->ll_mdc_conn, inode,
-                                  LDLM_FL_NO_CALLBACK, inode);
-        if (rc < 0) {
-                CERROR("ll_mdc_cancel_unused: %d\n", rc);
-                /* XXX FIXME do something dramatic */
-        }
-
-        if (atomic_read(&inode->i_count) != 0)
-                CERROR("clearing in-use inode %lu: count = %d\n",
-                       inode->i_ino, atomic_read(&inode->i_count));
-
-        if (lli->lli_smd) {
-                rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd,
-                                       LDLM_FL_WARN, inode);
-                if (rc < 0) {
-                        CERROR("obd_cancel_unused: %d\n", rc);
-                        /* XXX FIXME do something dramatic */
-                }
-                obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
-                lli->lli_smd = NULL;
-        }
-
-        if (lli->lli_symlink_name) {
-                OBD_FREE(lli->lli_symlink_name,
-                         strlen(lli->lli_symlink_name) + 1);
-                lli->lli_symlink_name = NULL;
-        }
-
-        EXIT;
-}
-
-#if 0
-static void ll_delete_inode(struct inode *inode)
-{
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-               inode->i_generation, inode);
-        if (S_ISREG(inode->i_mode)) {
-                int err;
-                struct obdo *oa;
-                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-
-                /* mcreate with no open */
-                if (!lsm)
-                        GOTO(out, 0);
-
-                if (lsm->lsm_object_id == 0) {
-                        CERROR("This really happens\n");
-                        /* No obdo was ever created */
-                        GOTO(out, 0);
-                }
-
-                oa = obdo_alloc();
-                if (oa == NULL)
-                        GOTO(out, -ENOMEM);
-
-                oa->o_id = lsm->lsm_object_id;
-                obdo_from_inode(oa, inode, OBD_MD_FLID | OBD_MD_FLTYPE);
-
-                err = obd_destroy(ll_i2obdconn(inode), oa, lsm, NULL);
-                obdo_free(oa);
-                if (err)
-                        CDEBUG(D_INODE,
-                               "inode %lu obd_destroy objid "LPX64" error %d\n",
-                               inode->i_ino, lsm->lsm_object_id, err);
-        }
-out:
-        clear_inode(inode);
-        EXIT;
-}
-#endif
-
-/* like inode_setattr, but doesn't mark the inode dirty */
-static int ll_attr2inode(struct inode *inode, struct iattr *attr, int trunc)
-{
-        unsigned int ia_valid = attr->ia_valid;
-        int error = 0;
-
-        if ((ia_valid & ATTR_SIZE) && trunc) {
-                if (attr->ia_size > ll_file_maxbytes(inode)) {
-                        error = -EFBIG;
-                        goto out;
-                }
-                error = vmtruncate(inode, attr->ia_size);
-                if (error)
-                        goto out;
-        } else if (ia_valid & ATTR_SIZE)
-                inode->i_size = attr->ia_size;
-
-        if (ia_valid & ATTR_UID)
-                inode->i_uid = attr->ia_uid;
-        if (ia_valid & ATTR_GID)
-                inode->i_gid = attr->ia_gid;
-        if (ia_valid & ATTR_ATIME)
-                inode->i_atime = attr->ia_atime;
-        if (ia_valid & ATTR_MTIME)
-                inode->i_mtime = attr->ia_mtime;
-        if (ia_valid & ATTR_CTIME)
-                inode->i_ctime = attr->ia_ctime;
-        if (ia_valid & ATTR_MODE) {
-                inode->i_mode = attr->ia_mode;
-                if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-                        inode->i_mode &= ~S_ISGID;
-        }
-out:
-        return error;
-}
-
-int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc)
-{
-        struct ptlrpc_request *request = NULL;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        int err = 0;
-        ENTRY;
-
-        /* change incore inode */
-        err = ll_attr2inode(inode, attr, do_trunc);
-        if (err)
-                RETURN(err);
-
-        /* Don't send size changes to MDS to avoid "fast EA" problems, and
-         * also avoid a pointless RPC (we get file size from OST anyways).
-         */
-        attr->ia_valid &= ~ATTR_SIZE;
-        if (attr->ia_valid) {
-                struct mdc_op_data op_data;
-
-                ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
-                err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
-                                  attr, NULL, 0, &request);
-                if (err)
-                        CERROR("mdc_setattr fails: err = %d\n", err);
-
-                ptlrpc_req_finished(request);
-                if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
-                        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-                        struct obdo oa;
-                        int err2;
-
-                        CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
-                               inode->i_ino, attr->ia_mtime);
-                        oa.o_id = lsm->lsm_object_id;
-                        oa.o_mode = S_IFREG;
-                        oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME;
-                        oa.o_mtime = attr->ia_mtime;
-                        err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
-                        if (err2) {
-                                CERROR("obd_setattr fails: rc=%d\n", err);
-                                if (!err)
-                                        err = err2;
-                        }
-                }
-        }
-
-        RETURN(err);
-}
-
-int ll_setattr_raw(struct inode *inode, struct iattr *attr)
-{
-        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        struct ptlrpc_request *request = NULL;
-        struct mdc_op_data op_data;
-        int rc = 0, err;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-               inode->i_generation, inode);
-
-        if ((attr->ia_valid & ATTR_SIZE)) {
-                struct ldlm_extent extent = {attr->ia_size, OBD_OBJECT_EOF};
-                struct lustre_handle lockh = { 0 };
-
-                if (attr->ia_size > ll_file_maxbytes(inode))
-                        RETURN(-EFBIG);
-
-                /* writeback uses inode->i_size to determine how far out
-                 * its cached pages go.  ll_truncate gets a PW lock, canceling
-                 * our lock, _after_ it has updated i_size.  this can confuse
-                 *
-                 * If this file doesn't have stripes yet, it is already,
-                 * by definition, truncated. */
-                if ((attr->ia_valid & ATTR_FROM_OPEN) && lsm == NULL) {
-                        LASSERT(attr->ia_size == 0);
-                        GOTO(skip_extent_lock, rc = 0);
-                }
-
-                /* we really need to get our PW lock before we change
-                 * inode->i_size.  if we don't we can race with other
-                 * i_size updaters on our node, like ll_file_read.  we
-                 * can also race with i_size propogation to other
-                 * nodes through dirtying and writeback of final cached
-                 * pages.  this last one is especially bad for racing
-                 * o_append users on other nodes. */
-                rc = ll_extent_lock_no_validate(NULL, inode, lsm, LCK_PW,
-                                                &extent, &lockh);
-                if (rc != ELDLM_OK) {
-                        if (rc > 0)
-                                RETURN(-ENOLCK);
-                        RETURN(rc);
-                }
-
-                rc = vmtruncate(inode, attr->ia_size);
-                if (rc == 0)
-                        set_bit(LLI_F_HAVE_SIZE_LOCK,
-                                &ll_i2info(inode)->lli_flags);
-
-                /* unlock now as we don't mind others file lockers racing with
-                 * the mds updates below? */
-                err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh);
-                if (err)
-                        CERROR("ll_extent_unlock failed: %d\n", err);
-                if (rc)
-                        RETURN(rc);
-        }
-
-skip_extent_lock:
-        /* Don't send size changes to MDS to avoid "fast EA" problems, and
-         * also avoid a pointless RPC (we get file size from OST anyways).
-         */
-        attr->ia_valid &= ~ATTR_SIZE;
-        if (!attr->ia_valid)
-                RETURN(0);
-
-        ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
-
-        err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
-                          attr, NULL, 0, &request);
-        if (err)
-                CERROR("mdc_setattr fails: err = %d\n", err);
-
-        ptlrpc_req_finished(request);
-
-        if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_MTIME_SET)) {
-                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-                struct obdo oa;
-                int err2;
-
-                if (lsm == NULL) {
-                        CDEBUG(D_INODE, "no lsm: not setting mtime on OSTs\n");
-                        RETURN(err);
-                }
-
-                CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
-                       inode->i_ino, attr->ia_mtime);
-                oa.o_id = lsm->lsm_object_id;
-                oa.o_mode = S_IFREG;
-                oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMTIME;
-                oa.o_mtime = attr->ia_mtime;
-                err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
-                if (err2) {
-                        CERROR("obd_setattr fails: rc=%d\n", err);
-                        if (!err)
-                                err = err2;
-                }
-        }
-        RETURN(err);
-}
-
-int ll_setattr(struct dentry *de, struct iattr *attr)
-{
-        int rc = inode_change_ok(de->d_inode, attr);
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s\n", de->d_name.name);
-        if (rc)
-                return rc;
-        lprocfs_counter_incr(ll_i2sbi(de->d_inode)->ll_stats, LPROC_LL_SETATTR);
-
-        return ll_inode_setattr(de->d_inode, attr, 1);
-}
-
-static int ll_statfs(struct super_block *sb, struct statfs *sfs)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct obd_export *mdc_exp = class_conn2export(&sbi->ll_mdc_conn);
-        struct obd_export *osc_exp;
-        struct obd_statfs osfs;
-        int rc;
-        ENTRY;
-
-        if (mdc_exp == NULL)
-                RETURN(-EINVAL);
-
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-        lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_STAFS);
-        memset(sfs, 0, sizeof(*sfs));
-        rc = obd_statfs(mdc_exp, &osfs);
-        statfs_unpack(sfs, &osfs);
-        if (rc)
-                CERROR("mdc_statfs fails: rc = %d\n", rc);
-        else
-                CDEBUG(D_SUPER, "mdc_statfs shows blocks "LPU64"/"LPU64
-                       " objects "LPU64"/"LPU64"\n",
-                       osfs.os_bavail, osfs.os_blocks,
-                       osfs.os_ffree, osfs.os_files);
-
-        /* temporary until mds_statfs returns statfs info for all OSTs */
-        if (!rc) {
-                osc_exp = class_conn2export(&sbi->ll_osc_conn);
-                if (osc_exp == NULL)
-                        GOTO(out, rc = -EINVAL);
-                rc = obd_statfs(osc_exp, &osfs);
-                class_export_put(osc_exp);
-                if (rc) {
-                        CERROR("obd_statfs fails: rc = %d\n", rc);
-                        GOTO(out, rc);
-                }
-                CDEBUG(D_SUPER, "obd_statfs shows blocks "LPU64"/"LPU64
-                       " objects "LPU64"/"LPU64"\n",
-                       osfs.os_bavail, osfs.os_blocks,
-                       osfs.os_ffree, osfs.os_files);
-
-                while (osfs.os_blocks > ~0UL) {
-                        sfs->f_bsize <<= 1;
-
-                        osfs.os_blocks >>= 1;
-                        osfs.os_bfree >>= 1;
-                        osfs.os_bavail >>= 1;
-                }
-
-                sfs->f_blocks = osfs.os_blocks;
-                sfs->f_bfree = osfs.os_bfree;
-                sfs->f_bavail = osfs.os_bavail;
-
-                /* If we don't have as many objects free on the OST as inodes
-                 * on the MDS, we reduce the total number of inodes to
-                 * compensate, so that the "inodes in use" number is correct.
-                 */
-                if (osfs.os_ffree < (__u64)sfs->f_ffree) {
-                        sfs->f_files = (sfs->f_files - sfs->f_ffree) +
-                                       osfs.os_ffree;
-                        sfs->f_ffree = osfs.os_ffree;
-                }
-        }
-
-out:
-        class_export_put(mdc_exp);
-        RETURN(rc);
-}
-
-void dump_lsm(int level, struct lov_stripe_md *lsm)
-{
-        CDEBUG(level, "objid "LPX64", maxbytes "LPX64", magic %#08x, "
-               "stripe_size %#08x, offset %u, stripe_count %u\n",
-               lsm->lsm_object_id, lsm->lsm_maxbytes, lsm->lsm_magic,
-               lsm->lsm_stripe_size, lsm->lsm_stripe_offset,
-               lsm->lsm_stripe_count);
-}
-
-void ll_update_inode(struct inode *inode, struct mds_body *body,
-                     struct lov_stripe_md *lsm)
-{
-        struct ll_inode_info *lli = ll_i2info(inode);
-
-        LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
-        if (lsm != NULL) {
-                if (lli->lli_smd == NULL) {
-                        lli->lli_maxbytes = lsm->lsm_maxbytes;
-                        if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
-                                lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
-                        lli->lli_smd = lsm;
-                } else {
-                        if (memcmp(lli->lli_smd, lsm, sizeof(*lsm))) {
-                                CERROR("lsm mismatch for inode %ld\n",
-                                       inode->i_ino);
-                                CERROR("lli_smd:\n");
-                                dump_lsm(D_ERROR, lli->lli_smd);
-                                CERROR("lsm:\n");
-                                dump_lsm(D_ERROR, lsm);
-                                LBUG();
-                        }
-                }
-        }
-
-        if (body->valid & OBD_MD_FLID)
-                inode->i_ino = body->ino;
-        if (body->valid & OBD_MD_FLATIME)
-                LTIME_S(inode->i_atime) = body->atime;
-        if (body->valid & OBD_MD_FLMTIME)
-                LTIME_S(inode->i_mtime) = body->mtime;
-        if (body->valid & OBD_MD_FLCTIME)
-                LTIME_S(inode->i_ctime) = body->ctime;
-        if (body->valid & OBD_MD_FLMODE)
-                inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
-        if (body->valid & OBD_MD_FLTYPE)
-                inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
-        if (body->valid & OBD_MD_FLUID)
-                inode->i_uid = body->uid;
-        if (body->valid & OBD_MD_FLGID)
-                inode->i_gid = body->gid;
-        if (body->valid & OBD_MD_FLFLAGS)
-                inode->i_flags = body->flags;
-        if (body->valid & OBD_MD_FLNLINK)
-                inode->i_nlink = body->nlink;
-        if (body->valid & OBD_MD_FLGENER)
-                inode->i_generation = body->generation;
-        if (body->valid & OBD_MD_FLRDEV)
-                inode->i_rdev = body->rdev;
-        if (body->valid & OBD_MD_FLSIZE)
-                inode->i_size = body->size;
-        if (body->valid & OBD_MD_FLBLOCKS)
-                inode->i_blocks = body->blocks;
-}
-
-static void ll_read_inode2(struct inode *inode, void *opaque)
-{
-        struct ll_read_inode2_cookie *lic = opaque;
-        struct mds_body *body = lic->lic_body;
-        struct ll_inode_info *lli = ll_i2info(inode);
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-               inode->i_generation, inode);
-
-        sema_init(&lli->lli_open_sem, 1);
-        spin_lock_init(&lli->lli_read_extent_lock);
-        INIT_LIST_HEAD(&lli->lli_read_extents);
-        lli->lli_flags = 0;
-        /* We default to 2T-4k until the LSM is created/read, at which point
-         * it'll be updated. */
-        lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES;
-
-        LASSERT(!lli->lli_smd);
-
-        /* core attributes from the MDS first */
-        ll_update_inode(inode, body, lic->lic_lsm);
-
-        /* OIDEBUG(inode); */
-
-        if (S_ISREG(inode->i_mode)) {
-                inode->i_op = &ll_file_inode_operations;
-                inode->i_fop = &ll_file_operations;
-                inode->i_mapping->a_ops = &ll_aops;
-                EXIT;
-        } else if (S_ISDIR(inode->i_mode)) {
-                inode->i_op = &ll_dir_inode_operations;
-                inode->i_fop = &ll_dir_operations;
-                inode->i_mapping->a_ops = &ll_dir_aops;
-                EXIT;
-        } else if (S_ISLNK(inode->i_mode)) {
-                inode->i_op = &ll_fast_symlink_inode_operations;
-                EXIT;
-        } else {
-                inode->i_op = &ll_special_inode_operations;
-                init_special_inode(inode, inode->i_mode, inode->i_rdev);
-                EXIT;
-        }
-}
-
-void ll_umount_begin(struct super_block *sb)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct obd_device *obd;
-        struct obd_ioctl_data ioc_data = { 0 };
-
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
-        obd = class_conn2obd(&sbi->ll_mdc_conn);
-        obd->obd_no_recov = 1;
-        obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_mdc_conn, sizeof ioc_data,
-                      &ioc_data, NULL);
-
-        obd = class_conn2obd(&sbi->ll_osc_conn);
-        obd->obd_no_recov = 1;
-        obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_osc_conn, sizeof ioc_data,
-                      &ioc_data, NULL);
-
-        /* Really, we'd like to wait until there are no requests outstanding,
-         * and then continue.  For now, we just invalidate the requests,
-         * schedule, and hope.
-         */
-        schedule();
-
-        EXIT;
 }
 
 /* exported operations */
 }
 
 /* exported operations */
index 980bfcd..5ab03ff 100644 (file)
 #include <linux/lprocfs_status.h>
 #include "llite_internal.h"
 
 #include <linux/lprocfs_status.h>
 #include "llite_internal.h"
 
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-kmem_cache_t *ll_file_data_slab;
-extern struct address_space_operations ll_aops;
-extern struct address_space_operations ll_dir_aops;
-struct super_operations ll_super_operations;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 
 
-/* /proc/lustre/llite root that tracks llite mount points */
-struct proc_dir_entry *proc_lustre_fs_root = NULL;
-/* lproc_llite.c */
-extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
-                                       struct super_block *sb,
-                                       char *osc, char *mdc);
-
-extern int ll_init_inodecache(void);
-extern void ll_destroy_inodecache(void);
-extern int ll_recover(struct recovd_data *, int);
-extern int ll_commitcbd_setup(struct ll_sb_info *);
-extern int ll_commitcbd_cleanup(struct ll_sb_info *);
-int ll_read_inode2(struct inode *inode, void *opaque);
-
-extern int ll_proc_namespace(struct super_block* sb, char* osc, char* mdc);
-
-static char *ll_read_opt(const char *opt, char *data)
-{
-        char *value;
-        char *retval;
-        ENTRY;
-
-        CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
-        if (strncmp(opt, data, strlen(opt)))
-                RETURN(NULL);
-        if ((value = strchr(data, '=')) == NULL)
-                RETURN(NULL);
-
-        value++;
-        OBD_ALLOC(retval, strlen(value) + 1);
-        if (!retval) {
-                CERROR("out of memory!\n");
-                RETURN(NULL);
-        }
-
-        memcpy(retval, value, strlen(value)+1);
-        CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
-        RETURN(retval);
-}
-
-static int ll_set_opt(const char *opt, char *data, int fl)
-{
-        ENTRY;
-
-        CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
-        if (strncmp(opt, data, strlen(opt)))
-                RETURN(0);
-        else
-                RETURN(fl);
-}
-
-static void ll_options(char *options, char **ost, char **mds, int *flags)
-{
-        char *opt_ptr = options;
-        char *this_char;
-        ENTRY;
-
-        if (!options) {
-                EXIT;
-                return;
-        }
-
-        while ((this_char = strsep (&opt_ptr, ",")) != NULL) {
-                CDEBUG(D_SUPER, "this_char %s\n", this_char);
-                if ((!*ost && (*ost = ll_read_opt("osc", this_char)))||
-                    (!*mds && (*mds = ll_read_opt("mdc", this_char)))||
-                    (!(*flags & LL_SBI_NOLCK) &&
-                     ((*flags) = (*flags) |
-                      ll_set_opt("nolock", this_char, LL_SBI_NOLCK))))
-                        continue;
-        }
-        EXIT;
-}
-
-#ifndef log2
-#define log2(n) ffz(~(n))
-#endif
-
-
-static int ll_fill_super(struct super_block *sb, void *data, int silent)
-{
-        struct inode *root = 0;
-        struct obd_device *obd;
-        struct ll_sb_info *sbi;
-        char *osc = NULL;
-        char *mdc = NULL;
-        int err;
-        struct ll_fid rootfid;
-        struct obd_statfs osfs;
-        struct ptlrpc_request *request = NULL;
-        struct ptlrpc_connection *mdc_conn;
-        struct ll_read_inode2_cookie lic;
-        class_uuid_t uuid;
-
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
-        OBD_ALLOC(sbi, sizeof(*sbi));
-        if (!sbi)
-                RETURN(-ENOMEM);
-
-        INIT_LIST_HEAD(&sbi->ll_conn_chain);
-        INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
-        generate_random_uuid(uuid);
-        class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
-
-        sb->s_fs_info = sbi;
-
-        ll_options(data, &osc, &mdc, &sbi->ll_flags);
-
-        if (!osc) {
-                CERROR("no osc\n");
-                GOTO(out_free, sb = NULL);
-        }
-
-        if (!mdc) {
-                CERROR("no mdc\n");
-                GOTO(out_free, sb = NULL);
-        }
-
-        obd = class_name2obd(mdc);
-        if (!obd) {
-                CERROR("MDC %s: not setup or attached\n", mdc);
-                GOTO(out_free, sb = NULL);
-        }
-
-        err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid);
-        if (err) {
-                CERROR("cannot connect to %s: rc = %d\n", mdc, err);
-                GOTO(out_free, sb = NULL);
-        }
-
-        mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection;
-
-        obd = class_name2obd(osc);
-        if (!obd) {
-                CERROR("OSC %s: not setup or attached\n", osc);
-                GOTO(out_mdc, sb = NULL);
-        }
-
-        err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid);
-        if (err) {
-                CERROR("cannot connect to %s: rc = %d\n", osc, err);
-                GOTO(out_mdc, sb = NULL);
-        }
-
-        err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
-        if (err) {
-                CERROR("cannot mds_connect: rc = %d\n", err);
-                GOTO(out_osc, sb = NULL);
-        }
-        CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
-        sbi->ll_rootino = rootfid.id;
-
-        memset(&osfs, 0, sizeof(osfs));
-        err = obd_statfs(&sbi->ll_mdc_conn, &osfs);
-        sb->s_blocksize = osfs.os_bsize;
-        sb->s_blocksize_bits = log2(osfs.os_bsize);
-        sb->s_magic = LL_SUPER_MAGIC;
-        sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
-
-        sb->s_op = &ll_super_operations;
-
-        /* make root inode 
-         * XXX: move this to after cbd setup? */
-        err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid,
-                          OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request);
-        if (err) {
-                CERROR("mdc_getattr failed for root: rc = %d\n", err);
-                GOTO(out_osc, sb = NULL);
-        }
-
-        /* initialize committed transaction callback daemon */
-        spin_lock_init(&sbi->ll_commitcbd_lock);
-        init_waitqueue_head(&sbi->ll_commitcbd_waitq);
-        init_waitqueue_head(&sbi->ll_commitcbd_ctl_waitq);
-        sbi->ll_commitcbd_flags = 0;
-        err = ll_commitcbd_setup(sbi);
-        if (err) {
-                CERROR("failed to start commit callback daemon: rc = %d\n",err);
-                ptlrpc_req_finished (request);
-                GOTO(out_osc, sb = NULL);
-        }
-
-        lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*lic.lic_body));
-        LASSERT (lic.lic_body != NULL);         /* checked by mdc_getattr() */
-        LASSERT_REPSWABBED (request, 0);        /* swabbed by mdc_getattr() */
-
-        lic.lic_lsm = NULL;
-
-        root = iget5_locked(sb, sbi->ll_rootino, NULL,
-                            ll_read_inode2, &lic);
-
-        ptlrpc_req_finished(request);
-
-        if (root == NULL || is_bad_inode(root)) {
-                /* XXX might need iput() for bad inode */
-                CERROR("lustre_lite: bad iget5 for root\n");
-                GOTO(out_cbd, sb = NULL);
-        }
-
-        sb->s_root = d_alloc_root(root);
-        root->i_state &= ~(I_LOCK | I_NEW);
-        printk("AMRUT 1\n");
-        if (proc_lustre_fs_root) {
-                err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
-                                                  osc, mdc);
-                if (err < 0)
-                        CERROR("could not register mount in /proc/lustre");
-        }
-
-out_dev:
-        if (mdc)
-                OBD_FREE(mdc, strlen(mdc) + 1);
-        if (osc)
-                OBD_FREE(osc, strlen(osc) + 1);
-        printk("AMRUT 2\n");
-
-        RETURN(0);
-
-out_cbd:
-        ll_commitcbd_cleanup(sbi);
-out_osc:
-        obd_disconnect(&sbi->ll_osc_conn, 0);
-out_mdc:
-        obd_disconnect(&sbi->ll_mdc_conn, 0);
-out_free:
-        lprocfs_unregister_mountpoint(sbi);
-        OBD_FREE(sbi, sizeof(*sbi));
-
-        goto out_dev;
-} /* ll_fill_super */
-
-
-int ll_setattr_raw(struct inode *inode, struct iattr *attr)
-{
-        struct ptlrpc_request *request = NULL;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        struct mdc_op_data op_data;
-        int err = 0;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
-
-        LPROC_COUNTER_INODE_INCBY1(inode, LPROC_LL_SETATTR);
-        if ((attr->ia_valid & ATTR_SIZE)) {
-                /* writeback uses inode->i_size to determine how far out
-                 * its cached pages go.  ll_truncate gets a PW lock, canceling
-                 * our lock, _after_ it has updated i_size.  this can confuse
-                 * us into zero extending the file to the newly truncated
-                 * size, and this has bad implications for a racing o_append.
-                 * if we're extending our size we need to flush the pages
-                 * with the correct i_size before vmtruncate stomps on
-                 * the new i_size.  again, this can only find pages to
-                 * purge if the PW lock that generated them is still held.
-                 */
-                if ( attr->ia_size > inode->i_size ) {
-                        filemap_fdatasync(inode->i_mapping);
-                        filemap_fdatawait(inode->i_mapping);
-                }
-                err = vmtruncate(inode, attr->ia_size);
-                if (err)
-                        RETURN(err);
-        }
-
-        /* Don't send size changes to MDS to avoid "fast EA" problems, and
-         * also avoid a pointless RPC (we get file size from OST anyways).
-         */
-        attr->ia_valid &= ~ATTR_SIZE;
-        if (!attr->ia_valid)
-                RETURN(0);
-
-        ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
-
-        err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
-                          attr, NULL, 0, &request);
-        if (err)
-                CERROR("mdc_setattr fails: err = %d\n", err);
-
-        ptlrpc_req_finished(request);
-
-        if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
-                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-                struct obdo oa;
-                int err2;
-
-                CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
-                       inode->i_ino, attr->ia_mtime);
-                oa.o_id = lsm->lsm_object_id;
-                oa.o_mode = S_IFREG;
-                oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMTIME;
-                oa.o_mtime = LTIME_S(attr->ia_mtime);
-                err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
-                if (err2) {
-                        CERROR("obd_setattr fails: rc=%d\n", err);
-                        if (!err)
-                                err = err2;
-                }
-        }
-        RETURN(err);
-}
 struct super_block * ll_get_sb(struct file_system_type *fs_type,
 struct super_block * ll_get_sb(struct file_system_type *fs_type,
-                               int flags, char *devname, void * data)
+                               int flags, const char *devname, void * data)
 {
 {
+        /* calls back in fill super */
         return get_sb_nodev(fs_type, flags, data, ll_fill_super);
 }
 
         return get_sb_nodev(fs_type, flags, data, ll_fill_super);
 }
 
-static void ll_put_super(struct super_block *sb)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct list_head *tmp, *next;
-        struct ll_fid rootfid;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
-        list_del(&sbi->ll_conn_chain);
-        ll_commitcbd_cleanup(sbi);
-        obd_disconnect(&sbi->ll_osc_conn, 0);
-
-        /* NULL request to force sync on the MDS, and get the last_committed
-         * value to flush remaining RPCs from the pending queue on client.
-         *
-         * XXX This should be an mdc_sync() call to sync the whole MDS fs,
-         *     which we can call for other reasons as well.
-         */
-        mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
-
-        lprocfs_unregister_mountpoint(sbi);
-        if (sbi->ll_proc_root) {
-                lprocfs_remove(sbi->ll_proc_root);
-        sbi->ll_proc_root = NULL;
-        }
-
-        obd_disconnect(&sbi->ll_mdc_conn, 0);
-
-        spin_lock(&dcache_lock);
-        list_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list){
-                struct dentry *dentry = list_entry(tmp, struct dentry, d_hash);
-                shrink_dcache_parent(dentry);
-        }
-        spin_unlock(&dcache_lock);
-
-        OBD_FREE(sbi, sizeof(*sbi));
-
-        EXIT;
-} /* ll_put_super */
-
-static void ll_clear_inode(struct inode *inode)
-{
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        struct ll_inode_info *lli = ll_i2info(inode);
-        int rc;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
-
-#warning "Is there a reason we don't do this in 2.5, but we do in 2.4?"
-#if 0
-        rc = ll_mdc_cancel_unused(&sbi->ll_mdc_conn, inode, LDLM_FL_NO_CALLBACK);
-        if (rc < 0) {
-                CERROR("ll_mdc_cancel_unused: %d\n", rc);
-                /* XXX FIXME do something dramatic */
-        }
-
-        if (lli->lli_smd) {
-                rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd, 0);
-                if (rc < 0) {
-                        CERROR("obd_cancel_unused: %d\n", rc);
-                        /* XXX FIXME do something dramatic */
-                }
-        }
-#endif
-
-        if (atomic_read(&inode->i_count) != 0)
-                CERROR("clearing in-use inode %lu: count = %d\n",
-                       inode->i_ino, atomic_read(&inode->i_count));
-
-        if (lli->lli_smd) {
-                obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
-                lli->lli_smd = NULL;
-        }
-
-        if (lli->lli_symlink_name) {
-                OBD_FREE(lli->lli_symlink_name,strlen(lli->lli_symlink_name)+1);
-                lli->lli_symlink_name = NULL;
-        }
-
-        EXIT;
-}
-
-#if 0
-static void ll_delete_inode(struct inode *inode)
-{
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
-        if (S_ISREG(inode->i_mode)) {
-                int err;
-                struct obdo *oa;
-                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-
-                /* mcreate with no open */
-                if (!lsm)
-                        GOTO(out, 0);
-
-                if (lsm->lsm_object_id == 0) {
-                        CERROR("This really happens\n");
-                        /* No obdo was ever created */
-                        GOTO(out, 0);
-                }
-
-                oa = obdo_alloc();
-                if (oa == NULL)
-                        GOTO(out, -ENOMEM);
-
-                oa->o_id = lsm->lsm_object_id;
-                oa->o_mode = inode->i_mode;
-                oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
-
-                err = obd_destroy(ll_i2obdconn(inode), oa, lsm);
-                obdo_free(oa);
-                if (err)
-                        CDEBUG(D_SUPER, "obd destroy objid "LPX64" error %d\n",
-                               lsm->lsm_object_id, err);
-        }
-out:
-        clear_inode(inode);
-        EXIT;
-}
-#endif
-
-/* like inode_setattr, but doesn't mark the inode dirty */
-static int ll_attr2inode(struct inode * inode, struct iattr * attr, int trunc)
-{
-        unsigned int ia_valid = attr->ia_valid;
-        int error = 0;
-
-        if ((ia_valid & ATTR_SIZE) && trunc) {
-                if (attr->ia_size > ll_file_maxbytes(inode)) {
-                        error = -EFBIG;
-                        goto out;
-                }
-                error = vmtruncate(inode, attr->ia_size);
-                if (error)
-                        goto out;
-        } else if (ia_valid & ATTR_SIZE)
-                inode->i_size = attr->ia_size;
-
-        if (ia_valid & ATTR_UID)
-                inode->i_uid = attr->ia_uid;
-        if (ia_valid & ATTR_GID)
-                inode->i_gid = attr->ia_gid;
-        if (ia_valid & ATTR_ATIME)
-                inode->i_atime = attr->ia_atime;
-        if (ia_valid & ATTR_MTIME)
-                inode->i_mtime = attr->ia_mtime;
-        if (ia_valid & ATTR_CTIME)
-                inode->i_ctime = attr->ia_ctime;
-        if (ia_valid & ATTR_MODE) {
-                inode->i_mode = attr->ia_mode;
-                if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-                        inode->i_mode &= ~S_ISGID;
-        }
-out:
-        return error;
-}
-
-int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc)
-{
-        struct ptlrpc_request *request = NULL;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        int err = 0;
-
-        ENTRY;
-
-        /* change incore inode */
-        err = ll_attr2inode(inode, attr, do_trunc);
-        if (err)
-                RETURN(err);
-
-        /* Don't send size changes to MDS to avoid "fast EA" problems, and
-         * also avoid a pointless RPC (we get file size from OST anyways).
-         */
-        attr->ia_valid &= ~ATTR_SIZE;
-        if (attr->ia_valid) {
-                struct mdc_op_data op_data;
-
-                ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
-
-                err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
-                                  attr, NULL, 0, &request);
-                if (err)
-                        CERROR("mdc_setattr fails: err = %d\n", err);
-
-                ptlrpc_req_finished(request);
-                if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
-                        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-                        struct obdo oa;
-                        int err2;
-
-                        CDEBUG(D_ERROR, "setting mtime on OST\n");
-                        oa.o_id = lsm->lsm_object_id;
-                        oa.o_mode = S_IFREG;
-                        oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME;
-                        oa.o_mtime = LTIME_S(attr->ia_mtime);
-                        err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
-                        if (err2) {
-                                CERROR("obd_setattr fails: rc=%d\n", err);
-                                if (!err)
-                                        err = err2;
-                        }
-                }
-        }
-
-        RETURN(err);
-}
-
-int ll_setattr(struct dentry *de, struct iattr *attr)
-{
-        int rc = inode_change_ok(de->d_inode, attr);
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s\n", de->d_name.name);
-        if (rc)
-                return rc;
-
-        LPROC_COUNTER_INODE_INCBY1((de->d_inode), LPROC_LL_SETATTR);
-        return ll_inode_setattr(de->d_inode, attr, 1);
-}
-
-static int ll_statfs(struct super_block *sb, struct statfs *sfs)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct obd_statfs osfs;
-        int rc;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
-        LPROC_COUNTER_SBI_INCBY1(sbi, LPROC_LL_STAFS);
-        memset(sfs, 0, sizeof(*sfs));
-        rc = obd_statfs(&sbi->ll_mdc_conn, &osfs);
-        statfs_unpack(sfs, &osfs);
-        if (rc)
-                CERROR("mdc_statfs fails: rc = %d\n", rc);
-        else
-                CDEBUG(D_SUPER, "mdc_statfs shows blocks "LPU64"/"LPU64
-                       " objects "LPU64"/"LPU64"\n",
-                       osfs.os_bavail, osfs.os_blocks,
-                       osfs.os_ffree, osfs.os_files);
-
-        /* temporary until mds_statfs returns statfs info for all OSTs */
-        if (!rc) {
-                rc = obd_statfs(&sbi->ll_osc_conn, &osfs);
-                if (rc) {
-                        CERROR("obd_statfs fails: rc = %d\n", rc);
-                        GOTO(out, rc);
-                }
-                CDEBUG(D_SUPER, "obd_statfs shows blocks "LPU64"/"LPU64
-                       " objects "LPU64"/"LPU64"\n",
-                       osfs.os_bavail, osfs.os_blocks,
-                       osfs.os_ffree, osfs.os_files);
-
-                while (osfs.os_blocks > ~0UL) {
-                        sfs->f_bsize <<= 1;
-
-                        osfs.os_blocks >>= 1;
-                        osfs.os_bfree >>= 1;
-                        osfs.os_bavail >>= 1;
-                }
-                sfs->f_blocks = osfs.os_blocks;
-                sfs->f_bfree = osfs.os_bfree;
-                sfs->f_bavail = osfs.os_bavail;
-                if (osfs.os_ffree < (__u64)sfs->f_ffree) {
-                        sfs->f_files = (sfs->f_files - sfs->f_ffree) +
-                                       osfs.os_ffree;
-                        sfs->f_ffree = osfs.os_ffree;
-                }
-        }
-
-out:
-        RETURN(rc);
-}
-
-void ll_update_inode(struct inode *inode, struct mds_body *body,
-                     struct lov_stripe_md *lsm)
-{
-        struct ll_inode_info *lli = ll_i2info(inode);
-
-        LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
-        if (lsm != NULL) {
-                if (lli->lli_smd == NULL) {
-                        lli->lli_smd = lsm;
-                        lli->lli_maxbytes = lsm->lsm_maxbytes;
-                        if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
-                                lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
-                } else {
-                        LASSERT (!memcmp (lli->lli_smd, lsm, sizeof (*lsm)));
-                }
-        }
-
-        if (body->valid & OBD_MD_FLID)
-                inode->i_ino = body->ino;
-        if (body->valid & OBD_MD_FLATIME)
-                LTIME_S(inode->i_atime) = body->atime;
-        if (body->valid & OBD_MD_FLMTIME)
-                LTIME_S(inode->i_mtime) = body->mtime;
-        if (body->valid & OBD_MD_FLCTIME)
-                LTIME_S(inode->i_ctime) = body->ctime;
-        if (body->valid & OBD_MD_FLMODE)
-                inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
-        if (body->valid & OBD_MD_FLTYPE)
-                inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
-        if (body->valid & OBD_MD_FLUID)
-                inode->i_uid = body->uid;
-        if (body->valid & OBD_MD_FLGID)
-                inode->i_gid = body->gid;
-        if (body->valid & OBD_MD_FLFLAGS)
-                inode->i_flags = body->flags;
-        if (body->valid & OBD_MD_FLNLINK)
-                inode->i_nlink = body->nlink;
-        if (body->valid & OBD_MD_FLGENER)
-                inode->i_generation = body->generation;
-        if (body->valid & OBD_MD_FLRDEV)
-                inode->i_rdev = to_kdev_t(body->rdev);
-        if (body->valid & OBD_MD_FLSIZE)
-                inode->i_size = body->size;
-        if (body->valid & OBD_MD_FLBLOCKS)
-                inode->i_blocks = body->blocks;
-}
-
-int ll_read_inode2(struct inode *inode, void *opaque)
-{
-        struct ll_read_inode2_cookie *lic = opaque;
-        struct mds_body *body = lic->lic_body;
-        struct ll_inode_info *lli = ll_i2info(inode);
-        int rc = 0;
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
-
-        sema_init(&lli->lli_open_sem, 1);
-        /* these are 2.4 only, but putting them here for consistency.. */
-        spin_lock_init(&lli->lli_read_extent_lock);
-        INIT_LIST_HEAD(&lli->lli_read_extents);
-        ll_lldo_init(&lli->lli_dirty);
-        lli->lli_flags = 0;
-        lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES;
-
-        LASSERT(!lli->lli_smd);
-
-        /* core attributes first */
-        ll_update_inode(inode, body, lic ? lic->lic_lsm : NULL);
-
-        /* OIDEBUG(inode); */
-
-        if (S_ISREG(inode->i_mode)) {
-                inode->i_op = &ll_file_inode_operations;
-                inode->i_fop = &ll_file_operations;
-                inode->i_mapping->a_ops = &ll_aops;
-                EXIT;
-        } else if (S_ISDIR(inode->i_mode)) {
-                inode->i_op = &ll_dir_inode_operations;
-                inode->i_fop = &ll_dir_operations;
-                inode->i_mapping->a_ops = &ll_dir_aops;
-                EXIT;
-        } else if (S_ISLNK(inode->i_mode)) {
-                inode->i_op = &ll_fast_symlink_inode_operations;
-                EXIT;
-        } else {
-                inode->i_op = &ll_special_inode_operations;
-                init_special_inode(inode, inode->i_mode,
-                                   kdev_t_to_nr(inode->i_rdev));
-                EXIT;
-        }
-
-        return rc;
-}
-
-
-void ll_umount_begin(struct super_block *sb)
-{
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct obd_device *obd;
-        struct obd_ioctl_data ioc_data = { 0 };
-
-        ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
-        obd = class_conn2obd(&sbi->ll_mdc_conn);
-        obd->obd_no_recov = 1;
-        obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_mdc_conn, sizeof ioc_data,
-                      &ioc_data, NULL);
-
-        obd = class_conn2obd(&sbi->ll_osc_conn);
-        obd->obd_no_recov = 1;
-        obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_osc_conn, sizeof ioc_data,
-                      &ioc_data, NULL);
-        
-        /* Really, we'd like to wait until there are no requests outstanding,
-         * and then continue.  For now, we just invalidate the requests,
-         * schedule, and hope.
-         */
-        schedule();
-
-        EXIT;
-}
-
 static kmem_cache_t *ll_inode_cachep;
 
 static struct inode *ll_alloc_inode(struct super_block *sb)
 {
         struct ll_inode_info *lli;
 static kmem_cache_t *ll_inode_cachep;
 
 static struct inode *ll_alloc_inode(struct super_block *sb)
 {
         struct ll_inode_info *lli;
-        LPROC_COUNTER_SBI_INCBY1((ll_s2sbi(sb)), LL_ALLOC_INODE);
+        lprocfs_counter_incr((ll_s2sbi(sb))->ll_stats, LPROC_LL_ALLOC_INODE);
         OBD_SLAB_ALLOC(lli, ll_inode_cachep, SLAB_KERNEL, sizeof *lli);
         if (lli == NULL)
                 return NULL;
 
         OBD_SLAB_ALLOC(lli, ll_inode_cachep, SLAB_KERNEL, sizeof *lli);
         if (lli == NULL)
                 return NULL;
 
-        memset(lli, 0, (char *)&lli->lli_vfs_inode - (char *)lli);
-        sema_init(&lli->lli_open_sem, 1);
-        init_MUTEX(&lli->lli_size_valid_sem);
-        lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+        inode_init_once(&lli->lli_vfs_inode);
+        ll_lli_init(lli);
 
         return &lli->lli_vfs_inode;
 }
 
 static void ll_destroy_inode(struct inode *inode)
 {
 
         return &lli->lli_vfs_inode;
 }
 
 static void ll_destroy_inode(struct inode *inode)
 {
-        OBD_SLAB_FREE(ll_inode_cachep, ll_i2info(inode),
-                      sizeof(struct ll_inode_info));
+        struct ll_inode_info *ptr = ll_i2info(inode);
+        OBD_SLAB_FREE(ptr, ll_inode_cachep, sizeof(*ptr));
 }
 
 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 }
 
 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
@@ -792,15 +91,12 @@ void ll_destroy_inodecache(void)
                 CERROR("ll_inode_cache: not all structures were freed\n");
 }
 
                 CERROR("ll_inode_cache: not all structures were freed\n");
 }
 
-
-
 /* exported operations */
 struct super_operations ll_super_operations =
 {
         alloc_inode: ll_alloc_inode,
         destroy_inode: ll_destroy_inode,
         clear_inode: ll_clear_inode,
 /* exported operations */
 struct super_operations ll_super_operations =
 {
         alloc_inode: ll_alloc_inode,
         destroy_inode: ll_destroy_inode,
         clear_inode: ll_clear_inode,
-//        delete_inode: ll_delete_inode,
         put_super: ll_put_super,
         statfs: ll_statfs,
         umount_begin: ll_umount_begin
         put_super: ll_put_super,
         statfs: ll_statfs,
         umount_begin: ll_umount_begin
index 19d234e..427f7f0 100644 (file)
 #include <linux/stat.h>
 #include <linux/smp_lock.h>
 #include <linux/version.h>
 #include <linux/stat.h>
 #include <linux/smp_lock.h>
 #include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include <linux/lustre_lite.h>
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include <linux/lustre_lite.h>
+#include "llite_internal.h"
 
 static int ll_readlink_internal(struct inode *inode,
                                 struct ptlrpc_request **request, char **symname)
 
 static int ll_readlink_internal(struct inode *inode,
                                 struct ptlrpc_request **request, char **symname)
@@ -117,82 +115,46 @@ static int ll_readlink(struct dentry *dentry, char *buffer, int buflen)
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-static int ll_follow_link(struct dentry *dentry, struct nameidata *nd,
-                          struct lookup_intent *it)
+static int ll_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
         struct inode *inode = dentry->d_inode;
         struct ll_inode_info *lli = ll_i2info(inode);
 {
         struct inode *inode = dentry->d_inode;
         struct ll_inode_info *lli = ll_i2info(inode);
+        struct lookup_intent *it = ll_nd2it(nd);
         struct ptlrpc_request *request;
         struct ptlrpc_request *request;
-        int op = 0, mode = 0, rc;
+        int rc;
         char *symname;
         ENTRY;
 
         char *symname;
         ENTRY;
 
-        CDEBUG(D_VFSTRACE, "VFS Op\n");
         if (it != NULL) {
         if (it != NULL) {
-                op = it->it_op;
-                mode = it->it_mode;
-
-                ll_intent_release(dentry, it);
-        }
-
-        down(&lli->lli_open_sem);
-        rc = ll_readlink_internal(inode, &request, &symname);
-        up(&lli->lli_open_sem);
-        if (rc)
-                GOTO(out, rc);
+                int op = it->it_op;
+                int mode = it->it_mode;
 
 
-        if (it != NULL) {
+                ll_intent_release(it);
                 it->it_op = op;
                 it->it_mode = mode;
         }
 
                 it->it_op = op;
                 it->it_mode = mode;
         }
 
-        rc = vfs_follow_link_it(nd, symname, it);
-        ptlrpc_req_finished(request);
- out:
-        RETURN(rc);
-}
-#else
-static int ll_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-        struct inode *inode = dentry->d_inode;
-        struct ll_inode_info *lli = ll_i2info(inode);
-        struct ptlrpc_request *request;
-        int op = 0, mode = 0, rc;
-        char *symname;
-        ENTRY;
-
-        op = nd->it.it_op;
-        mode = nd->it.it_mode;
-
-        ll_intent_release(dentry, &nd->it);
-
+        CDEBUG(D_VFSTRACE, "VFS Op\n");
         down(&lli->lli_open_sem);
         down(&lli->lli_open_sem);
-
         rc = ll_readlink_internal(inode, &request, &symname);
         rc = ll_readlink_internal(inode, &request, &symname);
+        up(&lli->lli_open_sem);
         if (rc)
                 GOTO(out, rc);
 
         if (rc)
                 GOTO(out, rc);
 
-        nd->it.it_op = op;
-        nd->it.it_mode = mode;
-
         rc = vfs_follow_link(nd, symname);
         ptlrpc_req_finished(request);
  out:
         rc = vfs_follow_link(nd, symname);
         ptlrpc_req_finished(request);
  out:
-        up(&lli->lli_open_sem);
-
         RETURN(rc);
 }
         RETURN(rc);
 }
-#endif
 
 
-extern int ll_inode_revalidate(struct dentry *dentry);
-extern int ll_setattr(struct dentry *de, struct iattr *attr);
 struct inode_operations ll_fast_symlink_inode_operations = {
         readlink:       ll_readlink,
         setattr:        ll_setattr,
         setattr_raw:    ll_setattr_raw,
 struct inode_operations ll_fast_symlink_inode_operations = {
         readlink:       ll_readlink,
         setattr:        ll_setattr,
         setattr_raw:    ll_setattr_raw,
-        follow_link2:   ll_follow_link,
+        follow_link   ll_follow_link,
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        revalidate:     ll_inode_revalidate
+        revalidate_it:  ll_inode_revalidate_it
+#else 
+        getattr_it:     ll_getattr
 #endif
 };
 #endif
 };
index e995588..e69dc6d 100644 (file)
@@ -1,3 +1,4 @@
 .deps
 Makefile
 Makefile.in
 .deps
 Makefile
 Makefile.in
+.*.cmd
index 879e44d..83dba1a 100644 (file)
@@ -7,12 +7,12 @@ DEFS=
 
 if LIBLUSTRE
 lib_LIBRARIES = liblov.a
 
 if LIBLUSTRE
 lib_LIBRARIES = liblov.a
-liblov_a_SOURCES = lov_obd.c lov_pack.c
+liblov_a_SOURCES = lov_obd.c lov_pack.c lov_internal.h
 else
 MODULE = lov
 modulefs_DATA = lov.o
 EXTRA_PROGRAMS = lov
 else
 MODULE = lov
 modulefs_DATA = lov.o
 EXTRA_PROGRAMS = lov
-lov_SOURCES = lov_obd.c lov_pack.c lproc_lov.c
+lov_SOURCES = lov_obd.c lov_pack.c lproc_lov.c lov_internal.h
 endif
 
 include $(top_srcdir)/Rules
 endif
 
 include $(top_srcdir)/Rules
index 2974b2a..9562a4f 100644 (file)
 #include <linux/seq_file.h>
 #include <linux/lprocfs_status.h>
 
 #include <linux/seq_file.h>
 #include <linux/lprocfs_status.h>
 
+#include "lov_internal.h"
+
+static int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
+                             int stripeno, obd_off *obd_off);
+
 struct lov_file_handles {
         struct portals_handle lfh_handle;
         atomic_t lfh_refcount;
 struct lov_file_handles {
         struct portals_handle lfh_handle;
         atomic_t lfh_refcount;
@@ -68,7 +73,7 @@ static void lov_lfh_addref(void *lfhp)
         struct lov_file_handles *lfh = lfhp;
 
         atomic_inc(&lfh->lfh_refcount);
         struct lov_file_handles *lfh = lfhp;
 
         atomic_inc(&lfh->lfh_refcount);
-        CDEBUG(D_INFO, "GETting lfh %p : new refcount %d\n", lfh,
+        CDEBUG(D_MALLOC, "GETting lfh %p : new refcount %d\n", lfh,
                atomic_read(&lfh->lfh_refcount));
 }
 
                atomic_read(&lfh->lfh_refcount));
 }
 
@@ -99,7 +104,7 @@ static struct lov_file_handles *lov_handle2lfh(struct lustre_handle *handle)
 
 static void lov_lfh_put(struct lov_file_handles *lfh)
 {
 
 static void lov_lfh_put(struct lov_file_handles *lfh)
 {
-        CDEBUG(D_INFO, "PUTting lfh %p : new refcount %d\n", lfh,
+        CDEBUG(D_MALLOC, "PUTting lfh %p : new refcount %d\n", lfh,
                atomic_read(&lfh->lfh_refcount) - 1);
         LASSERT(atomic_read(&lfh->lfh_refcount) > 0 &&
                 atomic_read(&lfh->lfh_refcount) < 0x5a5a);
                atomic_read(&lfh->lfh_refcount) - 1);
         LASSERT(atomic_read(&lfh->lfh_refcount) > 0 &&
                 atomic_read(&lfh->lfh_refcount) < 0x5a5a);
@@ -174,19 +179,18 @@ int lov_attach(struct obd_device *dev, obd_count len, void *data)
         struct proc_dir_entry *entry;
         int rc;
 
         struct proc_dir_entry *entry;
         int rc;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(lov, &lvars);
         rc = lprocfs_obd_attach(dev, lvars.obd_vars);
         rc = lprocfs_obd_attach(dev, lvars.obd_vars);
-        if (rc) 
+        if (rc)
                 return rc;
 
         entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry);
                 return rc;
 
         entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry);
-        if (entry == NULL) 
+        if (entry == NULL)
                 RETURN(-ENOMEM);
                 RETURN(-ENOMEM);
-        entry->proc_fops = &ll_proc_target_fops;
+        entry->proc_fops = &lov_proc_target_fops;
         entry->data = dev;
         entry->data = dev;
-        
+
         return rc;
         return rc;
-        
 }
 
 int lov_detach(struct obd_device *dev)
 }
 
 int lov_detach(struct obd_device *dev)
@@ -214,15 +218,17 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
         if (rc)
                 RETURN(rc);
 
         if (rc)
                 RETURN(rc);
 
+        exp = class_conn2export(conn);
+        spin_lock_init(&exp->exp_lov_data.led_lock);
+        INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head);
+
         /* We don't want to actually do the underlying connections more than
          * once, so keep track. */
         lov->refcount++;
         /* We don't want to actually do the underlying connections more than
          * once, so keep track. */
         lov->refcount++;
-        if (lov->refcount > 1)
+        if (lov->refcount > 1) {
+                class_export_put(exp);
                 RETURN(0);
                 RETURN(0);
-
-        exp = class_conn2export(conn);
-        spin_lock_init(&exp->exp_lov_data.led_lock);
-        INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head);
+        }
 
         /* retrieve LOV metadata from MDS */
         rc = obd_connect(&mdc_conn, lov->mdcobd, &lov_mds_uuid);
 
         /* retrieve LOV metadata from MDS */
         rc = obd_connect(&mdc_conn, lov->mdcobd, &lov_mds_uuid);
@@ -248,9 +254,9 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
          * array fits in LOV_MAX_UUID_BUFFER_SIZE and all uuids are
          * terminated), but I still need to verify it makes overall
          * sense */
          * array fits in LOV_MAX_UUID_BUFFER_SIZE and all uuids are
          * terminated), but I still need to verify it makes overall
          * sense */
-        mdesc = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*mdesc));
-        LASSERT (mdesc != NULL);
-        LASSERT_REPSWABBED (req, 0);
+        mdesc = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*mdesc));
+        LASSERT(mdesc != NULL);
+        LASSERT_REPSWABBED(req, 0);
 
         *desc = *mdesc;
 
 
         *desc = *mdesc;
 
@@ -279,15 +285,15 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
          * demands on memory here. */
         lov->bufsize = sizeof(struct lov_tgt_desc) * desc->ld_tgt_count;
         OBD_ALLOC(lov->tgts, lov->bufsize);
          * demands on memory here. */
         lov->bufsize = sizeof(struct lov_tgt_desc) * desc->ld_tgt_count;
         OBD_ALLOC(lov->tgts, lov->bufsize);
-        if (!lov->tgts) {
+        if (lov->tgts == NULL) {
                 CERROR("Out of memory\n");
                 GOTO(out_req, rc = -ENOMEM);
         }
 
         uuids = lustre_msg_buf(req->rq_repmsg, 1,
                                sizeof(*uuids) * desc->ld_tgt_count);
                 CERROR("Out of memory\n");
                 GOTO(out_req, rc = -ENOMEM);
         }
 
         uuids = lustre_msg_buf(req->rq_repmsg, 1,
                                sizeof(*uuids) * desc->ld_tgt_count);
-        LASSERT (uuids != NULL);
-        LASSERT_REPSWABBED (req, 1);
+        LASSERT(uuids != NULL);
+        LASSERT_REPSWABBED(req, 1);
 
         for (i = 0, tgts = lov->tgts; i < desc->ld_tgt_count; i++, tgts++) {
                 struct obd_uuid *uuid = &tgts->uuid;
 
         for (i = 0, tgts = lov->tgts; i < desc->ld_tgt_count; i++, tgts++) {
                 struct obd_uuid *uuid = &tgts->uuid;
@@ -330,7 +336,9 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
         }
 
         mdc->cl_max_mds_easize = obd_size_diskmd(conn, NULL);
         }
 
         mdc->cl_max_mds_easize = obd_size_diskmd(conn, NULL);
-        ptlrpc_req_finished (req);
+        mdc->cl_max_mds_cookiesize = desc->ld_tgt_count *
+                sizeof(struct llog_cookie);
+        ptlrpc_req_finished(req);
         class_export_put(exp);
         RETURN (0);
 
         class_export_put(exp);
         RETURN (0);
 
@@ -356,7 +364,7 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
         RETURN (rc);
 }
 
         RETURN (rc);
 }
 
-static int lov_disconnect(struct lustre_handle *conn, int failover)
+static int lov_disconnect(struct lustre_handle *conn, int flags)
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct lov_obd *lov = &obd->u.lov;
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct lov_obd *lov = &obd->u.lov;
@@ -383,7 +391,7 @@ static int lov_disconnect(struct lustre_handle *conn, int failover)
                                 class_conn2obd(&lov->tgts[i].conn);
                         osc_obd->obd_no_recov = 1;
                 }
                                 class_conn2obd(&lov->tgts[i].conn);
                         osc_obd->obd_no_recov = 1;
                 }
-                rc = obd_disconnect(&lov->tgts[i].conn, failover);
+                rc = obd_disconnect(&lov->tgts[i].conn, flags);
                 if (rc) {
                         if (lov->tgts[i].active) {
                                 CERROR("Target %s disconnect error %d\n",
                 if (rc) {
                         if (lov->tgts[i].active) {
                                 CERROR("Target %s disconnect error %d\n",
@@ -400,6 +408,7 @@ static int lov_disconnect(struct lustre_handle *conn, int failover)
         lov->bufsize = 0;
         lov->tgts = NULL;
 
         lov->bufsize = 0;
         lov->tgts = NULL;
 
+ out_local:
         exp = class_conn2export(conn);
         if (exp == NULL) {
                 CERROR("export handle "LPU64" invalid!  If you can reproduce, "
         exp = class_conn2export(conn);
         if (exp == NULL) {
                 CERROR("export handle "LPU64" invalid!  If you can reproduce, "
@@ -421,7 +430,6 @@ static int lov_disconnect(struct lustre_handle *conn, int failover)
         spin_unlock(&exp->exp_lov_data.led_lock);
         class_export_put(exp);
 
         spin_unlock(&exp->exp_lov_data.led_lock);
         class_export_put(exp);
 
- out_local:
         rc = class_disconnect(conn, 0);
         RETURN(rc);
 }
         rc = class_disconnect(conn, 0);
         RETURN(rc);
 }
@@ -548,6 +556,8 @@ static obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size,
 static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid,
                             struct lov_stripe_md *lsm, int stripeno, int *set)
 {
 static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid,
                             struct lov_stripe_md *lsm, int stripeno, int *set)
 {
+        valid &= src->o_valid;
+
         if (*set) {
                 if (valid & OBD_MD_FLSIZE) {
                         /* this handles sparse files properly */
         if (*set) {
                 if (valid & OBD_MD_FLSIZE) {
                         /* this handles sparse files properly */
@@ -566,68 +576,102 @@ static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid,
                 if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
                         tgt->o_mtime = src->o_mtime;
         } else {
                 if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
                         tgt->o_mtime = src->o_mtime;
         } else {
-                obdo_cpy_md(tgt, src, valid);
+                memcpy(tgt, src, sizeof(*tgt));
+                tgt->o_id = lsm->lsm_object_id;
                 if (valid & OBD_MD_FLSIZE)
                         tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
                 *set = 1;
         }
 }
 
                 if (valid & OBD_MD_FLSIZE)
                         tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
                 *set = 1;
         }
 }
 
+#ifndef log2
+#define log2(n) ffz(~(n))
+#endif
+
 /* the LOV expects oa->o_id to be set to the LOV object id */
 /* the LOV expects oa->o_id to be set to the LOV object id */
-static int lov_create(struct lustre_handle *conn, struct obdo *oa,
+static int lov_create(struct lustre_handle *conn, struct obdo *src_oa,
                       struct lov_stripe_md **ea, struct obd_trans_info *oti)
 {
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
         struct lov_stripe_md *lsm;
                       struct lov_stripe_md **ea, struct obd_trans_info *oti)
 {
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
         struct lov_stripe_md *lsm;
-        struct lov_oinfo *loi;
-        struct obdo *tmp;
+        struct lov_oinfo *loi = NULL;
+        struct obdo *tmp_oa, *ret_oa;
+        struct llog_cookie *cookies = NULL;
         unsigned ost_count, ost_idx;
         unsigned ost_count, ost_idx;
-        int set = 0, obj_alloc = 0;
-        int rc = 0, i;
+        int set = 0, obj_alloc = 0, cookie_sent = 0, rc = 0, i;
         ENTRY;
 
         LASSERT(ea);
 
         if (!export)
         ENTRY;
 
         LASSERT(ea);
 
         if (!export)
-                GOTO(out_exp, rc = -EINVAL);
+                RETURN(-EINVAL);
 
         lov = &export->exp_obd->u.lov;
 
         if (!lov->desc.ld_active_tgt_count)
                 GOTO(out_exp, rc = -EIO);
 
 
         lov = &export->exp_obd->u.lov;
 
         if (!lov->desc.ld_active_tgt_count)
                 GOTO(out_exp, rc = -EIO);
 
-        tmp = obdo_alloc();
-        if (!tmp)
+        ret_oa = obdo_alloc();
+        if (!ret_oa)
                 GOTO(out_exp, rc = -ENOMEM);
 
                 GOTO(out_exp, rc = -ENOMEM);
 
+        tmp_oa = obdo_alloc();
+        if (!tmp_oa)
+                GOTO(out_oa, rc = -ENOMEM);
+
         lsm = *ea;
 
         if (!lsm) {
         lsm = *ea;
 
         if (!lsm) {
-                rc = obd_alloc_memmd(conn, &lsm);
+                int stripes;
+                ost_count = lov_get_stripecnt(lov, 0);
+
+                /* If the MDS file was truncated up to some size, stripe over
+                 * enough OSTs to allow the file to be created at that size.
+                 */
+                if (src_oa->o_valid & OBD_MD_FLSIZE) {
+                        stripes=((src_oa->o_size+LUSTRE_STRIPE_MAXBYTES)>>12)-1;
+                        do_div(stripes, (__u32)(LUSTRE_STRIPE_MAXBYTES >> 12));
+
+                        if (stripes > lov->desc.ld_active_tgt_count)
+                                GOTO(out_exp, rc = -EFBIG);
+                        if (stripes < ost_count)
+                                stripes = ost_count;
+                } else
+                        stripes = ost_count;
+
+                rc = lov_alloc_memmd(&lsm, stripes);
                 if (rc < 0)
                         GOTO(out_tmp, rc);
 
                 rc = 0;
                 if (rc < 0)
                         GOTO(out_tmp, rc);
 
                 rc = 0;
-                lsm->lsm_magic = LOV_MAGIC;
         }
 
         ost_count = lov->desc.ld_tgt_count;
 
         }
 
         ost_count = lov->desc.ld_tgt_count;
 
-        LASSERT(oa->o_valid & OBD_MD_FLID);
-        lsm->lsm_object_id = oa->o_id;
+        LASSERT(src_oa->o_valid & OBD_MD_FLID);
+        lsm->lsm_object_id = src_oa->o_id;
         if (!lsm->lsm_stripe_size)
                 lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
 
         if (!*ea || lsm->lsm_stripe_offset >= ost_count) {
                 get_random_bytes(&ost_idx, 2);
                 ost_idx %= ost_count;
         if (!lsm->lsm_stripe_size)
                 lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
 
         if (!*ea || lsm->lsm_stripe_offset >= ost_count) {
                 get_random_bytes(&ost_idx, 2);
                 ost_idx %= ost_count;
-        } else
+        } else {
                 ost_idx = lsm->lsm_stripe_offset;
                 ost_idx = lsm->lsm_stripe_offset;
+        }
 
         CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n",
                lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx);
 
 
         CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n",
                lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx);
 
+        /* XXX LOV STACKING: need to figure out how many real OSCs */
+        if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) {
+                oti_alloc_cookies(oti, lsm->lsm_stripe_count);
+                if (!oti->oti_logcookies)
+                        GOTO(out_cleanup, rc = -ENOMEM);
+                cookies = oti->oti_logcookies;
+        }
+
         loi = lsm->lsm_oinfo;
         for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
                 struct lov_stripe_md obj_md;
         loi = lsm->lsm_oinfo;
         for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
                 struct lov_stripe_md obj_md;
@@ -640,14 +684,30 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
                 }
 
                 /* create data objects with "parent" OA */
                 }
 
                 /* create data objects with "parent" OA */
-                memcpy(tmp, oa, sizeof(*tmp));
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+
+                /* XXX When we start creating objects on demand, we need to
+                 *     make sure that we always create the object on the
+                 *     stripe which holds the existing file size.
+                 */
+                if (src_oa->o_valid & OBD_MD_FLSIZE) {
+                        if (lov_stripe_offset(lsm, src_oa->o_size, i,
+                                              &tmp_oa->o_size) < 0 &&
+                            tmp_oa->o_size)
+                                tmp_oa->o_size--;
+
+                        CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
+                               i, tmp_oa->o_size, src_oa->o_size);
+                }
+
                 /* XXX: LOV STACKING: use real "obj_mdp" sub-data */
                 /* XXX: LOV STACKING: use real "obj_mdp" sub-data */
-                err = obd_create(&lov->tgts[ost_idx].conn, tmp, &obj_mdp, oti);
+                err = obd_create(&lov->tgts[ost_idx].conn, tmp_oa,&obj_mdp,oti);
                 if (err) {
                         if (lov->tgts[ost_idx].active) {
                                 CERROR("error creating objid "LPX64" sub-object"
                 if (err) {
                         if (lov->tgts[ost_idx].active) {
                                 CERROR("error creating objid "LPX64" sub-object"
-                                       " on OST idx %d/%d: rc = %d\n", oa->o_id,
-                                       ost_idx, lsm->lsm_stripe_count, err);
+                                       " on OST idx %d/%d: rc = %d\n",
+                                       src_oa->o_id, ost_idx,
+                                       lsm->lsm_stripe_count, err);
                                 if (err > 0) {
                                         CERROR("obd_create returned invalid "
                                                "err %d\n", err);
                                 if (err > 0) {
                                         CERROR("obd_create returned invalid "
                                                "err %d\n", err);
@@ -658,17 +718,22 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
                                 rc = err;
                         continue;
                 }
                                 rc = err;
                         continue;
                 }
-                loi->loi_id = tmp->o_id;
+                loi->loi_id = tmp_oa->o_id;
                 loi->loi_ost_idx = ost_idx;
                 CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64" at idx %d\n",
                        lsm->lsm_object_id, loi->loi_id, ost_idx);
 
                 if (set == 0)
                         lsm->lsm_stripe_offset = ost_idx;
                 loi->loi_ost_idx = ost_idx;
                 CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64" at idx %d\n",
                        lsm->lsm_object_id, loi->loi_id, ost_idx);
 
                 if (set == 0)
                         lsm->lsm_stripe_offset = ost_idx;
-                lov_merge_attrs(oa, tmp, OBD_MD_FLBLKSZ, lsm, obj_alloc, &set);
-                ot_init(&loi->loi_dirty_ot_inline);
+                lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm,
+                                obj_alloc, &set);
                 loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
                 loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
+                ot_init(loi->loi_dirty_ot);
 
 
+                if (cookies)
+                        ++oti->oti_logcookies;
+                if (tmp_oa->o_valid & OBD_MD_FLCOOKIE)
+                        ++cookie_sent;
                 ++obj_alloc;
                 ++loi;
 
                 ++obj_alloc;
                 ++loi;
 
@@ -677,6 +742,12 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
                         GOTO(out_done, rc = 0);
         }
 
                         GOTO(out_done, rc = 0);
         }
 
+        /* If we were passed specific striping params, then a failure to
+         * meet those requirements is an error, since we can't reallocate
+         * that memory (it might be part of a larger array or something).
+         *
+         * We can only get here if lsm_stripe_count was originally > 1.
+         */
         if (*ea != NULL) {
                 CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n",
                        lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count,rc);
         if (*ea != NULL) {
                 CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n",
                        lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count,rc);
@@ -686,27 +757,61 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
         } else {
                 struct lov_stripe_md *lsm_new;
                 /* XXX LOV STACKING call into osc for sizes */
         } else {
                 struct lov_stripe_md *lsm_new;
                 /* XXX LOV STACKING call into osc for sizes */
-                unsigned size = lov_stripe_md_size(obj_alloc);
+                unsigned oldsize, newsize;
+
+                if (oti && cookies && cookie_sent) {
+                        oldsize = lsm->lsm_stripe_count * sizeof(*cookies);
+                        newsize = obj_alloc * sizeof(*cookies);
+
+                        oti_alloc_cookies(oti, obj_alloc);
+                        if (oti->oti_logcookies) {
+                                memcpy(oti->oti_logcookies, cookies, newsize);
+                                OBD_FREE(cookies, oldsize);
+                                cookies = oti->oti_logcookies;
+                        } else {
+                                CWARN("'leaking' %d bytes\n", oldsize-newsize);
+                        }
+                }
 
                 CERROR("reallocating LSM for objid "LPX64": old %u new %u\n",
                        lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count);
 
                 CERROR("reallocating LSM for objid "LPX64": old %u new %u\n",
                        lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count);
-                OBD_ALLOC(lsm_new, size);
-                if (!lsm_new)
-                        GOTO(out_cleanup, rc = -ENOMEM);
-                memcpy(lsm_new, lsm, size);
-                lsm_new->lsm_stripe_count = obj_alloc;
-
-                /* XXX LOV STACKING call into osc for sizes */
-                OBD_FREE(lsm, lov_stripe_md_size(lsm->lsm_stripe_count));
-                lsm = lsm_new;
-
+                oldsize = lov_stripe_md_size(lsm->lsm_stripe_count);
+                newsize = lov_stripe_md_size(obj_alloc);
+                OBD_ALLOC(lsm_new, newsize);
+                if (lsm_new != NULL) {
+                        memcpy(lsm_new, lsm, newsize);
+                        lsm_new->lsm_stripe_count = obj_alloc;
+                        OBD_FREE(lsm, newsize);
+                        lsm = lsm_new;
+                } else {
+                        CWARN("'leaking' %d bytes\n", oldsize - newsize);
+                }
                 rc = 0;
         }
  out_done:
         *ea = lsm;
                 rc = 0;
         }
  out_done:
         *ea = lsm;
+        if (src_oa->o_valid & OBD_MD_FLSIZE &&
+            ret_oa->o_size != src_oa->o_size) {
+                CERROR("original size "LPU64" isn't new object size "LPU64"\n",
+                       src_oa->o_size, ret_oa->o_size);
+                LBUG();
+        }
+        ret_oa->o_id = src_oa->o_id;
+        memcpy(src_oa, ret_oa, sizeof(*src_oa));
 
  out_tmp:
 
  out_tmp:
-        obdo_free(tmp);
+        obdo_free(tmp_oa);
+ out_oa:
+        obdo_free(ret_oa);
+        if (oti && cookies) {
+                oti->oti_logcookies = cookies;
+                if (!cookie_sent) {
+                        oti_free_cookies(oti);
+                        src_oa->o_valid &= ~OBD_MD_FLCOOKIE;
+                } else {
+                        src_oa->o_valid |= OBD_MD_FLCOOKIE;
+                }
+        }
  out_exp:
         class_export_put(export);
         return rc;
  out_exp:
         class_export_put(export);
         return rc;
@@ -717,15 +822,26 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
 
                 --loi;
                 /* destroy already created objects here */
 
                 --loi;
                 /* destroy already created objects here */
-                memcpy(tmp, oa, sizeof(*tmp));
-                tmp->o_id = loi->loi_id;
-                err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL,
-                                  NULL);
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+                tmp_oa->o_id = loi->loi_id;
+
+                if (oti && cookie_sent) {
+                        err = obd_log_cancel(&lov->tgts[loi->loi_ost_idx].conn,
+                                             NULL, 1, --oti->oti_logcookies,
+                                             OBD_LLOG_FL_SENDNOW);
+                        if (err)
+                                CERROR("Failed to cancel objid "LPX64" subobj "
+                                       LPX64" cookie on OST idx %d: rc = %d\n",
+                                       src_oa->o_id, loi->loi_id,
+                                       loi->loi_ost_idx, err);
+                }
+
+                err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
+                                  NULL, oti);
                 if (err)
                 if (err)
-                        CERROR("Failed to uncreate objid "LPX64" subobj "
-                               LPX64" on OST idx %d: rc = %d\n",
-                               oa->o_id, loi->loi_id, loi->loi_ost_idx,
-                               err);
+                        CERROR("Failed to uncreate objid "LPX64" subobj "LPX64
+                               " on OST idx %d: rc = %d\n", src_oa->o_id,
+                               loi->loi_id, loi->loi_ost_idx, err);
         }
         if (*ea == NULL)
                 obd_free_memmd(conn, &lsm);
         }
         if (*ea == NULL)
                 obd_free_memmd(conn, &lsm);
@@ -779,12 +895,12 @@ static int lov_destroy(struct lustre_handle *conn, struct obdo *oa,
                 memcpy(&tmp, oa, sizeof(tmp));
                 tmp.o_id = loi->loi_id;
                 if (lfh)
                 memcpy(&tmp, oa, sizeof(tmp));
                 tmp.o_id = loi->loi_id;
                 if (lfh)
-                        memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
-                               FD_OSTDATA_SIZE);
+                        memcpy(obdo_handle(&tmp), &lfh->lfh_och[i].och_fh,
+                               sizeof(lfh->lfh_och[i].och_fh));
                 else
                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
                 err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, &tmp,
                 else
                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
                 err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, &tmp,
-                                  NULL, NULL);
+                                  NULL, oti);
                 if (err && lov->tgts[loi->loi_ost_idx].active) {
                         CERROR("error: destroying objid "LPX64" subobj "
                                LPX64" on OST idx %d: rc = %d\n",
                 if (err && lov->tgts[loi->loi_ost_idx].active) {
                         CERROR("error: destroying objid "LPX64" subobj "
                                LPX64" on OST idx %d: rc = %d\n",
@@ -839,8 +955,8 @@ static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
                 memcpy(&tmp, oa, sizeof(tmp));
                 tmp.o_id = loi->loi_id;
                 if (lfh)
                 memcpy(&tmp, oa, sizeof(tmp));
                 tmp.o_id = loi->loi_id;
                 if (lfh)
-                        memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
-                               FD_OSTDATA_SIZE);
+                        memcpy(obdo_handle(&tmp), &lfh->lfh_och[i].och_fh,
+                               sizeof(lfh->lfh_och[i].och_fh));
                 else
                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
 
                 else
                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
 
@@ -867,12 +983,13 @@ static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
         return rc;
 }
 
         return rc;
 }
 
-static int lov_getattr_interpret(struct ptlrpc_request_set *rqset,
-                                 struct lov_getattr_async_args *aa, int rc)
+static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, void *data, 
+                                 int rc)
 {
 {
+        struct lov_getattr_async_args *aa = data;
         struct lov_stripe_md *lsm = aa->aa_lsm;
         struct obdo          *oa = aa->aa_oa;
         struct lov_stripe_md *lsm = aa->aa_lsm;
         struct obdo          *oa = aa->aa_oa;
-        struct obdo          *obdos = aa->aa_stripe_oas;
+        struct obdo          *obdos = aa->aa_obdos;
         struct lov_oinfo     *loi;
         int                   i;
         int                   set = 0;
         struct lov_oinfo     *loi;
         int                   i;
         int                   set = 0;
@@ -881,8 +998,8 @@ static int lov_getattr_interpret(struct ptlrpc_request_set *rqset,
         if (rc == 0) {
                 /* NB all stripe requests succeeded to get here */
 
         if (rc == 0) {
                 /* NB all stripe requests succeeded to get here */
 
-                for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
-                     i++,loi++) {
+                for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+                     i++, loi++) {
                         if (obdos[i].o_valid == 0)      /* inactive stripe */
                                 continue;
 
                         if (obdos[i].o_valid == 0)      /* inactive stripe */
                                 continue;
 
@@ -955,8 +1072,8 @@ static int lov_getattr_async (struct lustre_handle *conn, struct obdo *oa,
                 memcpy(&obdos[i], oa, sizeof(obdos[i]));
                 obdos[i].o_id = loi->loi_id;
                 if (lfh)
                 memcpy(&obdos[i], oa, sizeof(obdos[i]));
                 obdos[i].o_id = loi->loi_id;
                 if (lfh)
-                        memcpy(obdo_handle(&obdos[i]), lfh->lfh_och + i,
-                               FD_OSTDATA_SIZE);
+                        memcpy(obdo_handle(&obdos[i]), &lfh->lfh_och[i].och_fh,
+                               sizeof(lfh->lfh_och[i].och_fh));
                 else
                         obdos[i].o_valid &= ~OBD_MD_FLHANDLE;
 
                 else
                         obdos[i].o_valid &= ~OBD_MD_FLHANDLE;
 
@@ -980,7 +1097,7 @@ static int lov_getattr_async (struct lustre_handle *conn, struct obdo *oa,
         aa = (struct lov_getattr_async_args *)&rqset->set_args;
         aa->aa_lsm = lsm;
         aa->aa_oa = oa;
         aa = (struct lov_getattr_async_args *)&rqset->set_args;
         aa->aa_lsm = lsm;
         aa->aa_oa = oa;
-        aa->aa_stripe_oas = obdos;
+        aa->aa_obdos = obdos;
         GOTO (out, rc = 0);
 
  out_obdos:
         GOTO (out, rc = 0);
 
  out_obdos:
@@ -992,10 +1109,10 @@ static int lov_getattr_async (struct lustre_handle *conn, struct obdo *oa,
         RETURN (rc);
 }
 
         RETURN (rc);
 }
 
-static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
+static int lov_setattr(struct lustre_handle *conn, struct obdo *src_oa,
                        struct lov_stripe_md *lsm, struct obd_trans_info *oti)
 {
                        struct lov_stripe_md *lsm, struct obd_trans_info *oti)
 {
-        struct obdo *tmp;
+        struct obdo *tmp_oa, *ret_oa;
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
         struct lov_oinfo *loi;
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
         struct lov_oinfo *loi;
@@ -1009,18 +1126,17 @@ static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
         if (!export || !export->exp_obd)
                 GOTO(out, rc = -ENODEV);
 
         if (!export || !export->exp_obd)
                 GOTO(out, rc = -ENODEV);
 
-        /* size changes should go through punch and not setattr */
-        LASSERT(!(oa->o_valid & OBD_MD_FLSIZE));
-
-        /* for now, we only expect mtime updates here */
-        LASSERT(!(oa->o_valid & ~(OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME)));
-
-        tmp = obdo_alloc();
-        if (!tmp)
+        /* for now, we only expect time updates here */
+        LASSERT(!(src_oa->o_valid & ~(OBD_MD_FLID|OBD_MD_FLTYPE|OBD_MD_FLMODE|
+                                      OBD_MD_FLATIME | OBD_MD_FLMTIME |
+                                      OBD_MD_FLCTIME)));
+        ret_oa = obdo_alloc();
+        if (!ret_oa)
                 GOTO(out, rc = -ENOMEM);
 
                 GOTO(out, rc = -ENOMEM);
 
-        if (oa->o_valid & OBD_MD_FLHANDLE)
-                lfh = lov_handle2lfh(obdo_handle(oa));
+        tmp_oa = obdo_alloc();
+        if (!tmp_oa)
+                GOTO(out_oa, rc = -ENOMEM);
 
         lov = &export->exp_obd->u.lov;
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
 
         lov = &export->exp_obd->u.lov;
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
@@ -1031,46 +1147,54 @@ static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
                         continue;
                 }
 
                         continue;
                 }
 
-                obdo_cpy_md(tmp, oa, oa->o_valid);
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
 
                 if (lfh)
 
                 if (lfh)
-                        memcpy(obdo_handle(tmp), lfh->lfh_och + i,
-                               FD_OSTDATA_SIZE);
+                        memcpy(obdo_handle(tmp_oa), &lfh->lfh_och[i].och_fh,
+                               sizeof(lfh->lfh_och[i].och_fh));
                 else
                 else
-                        tmp->o_valid &= ~OBD_MD_FLHANDLE;
+                        tmp_oa->o_valid &= ~OBD_MD_FLHANDLE;
 
 
-                tmp->o_id = loi->loi_id;
+                tmp_oa->o_id = loi->loi_id;
 
 
-                err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp,
+                err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
                                   NULL, NULL);
                 if (err) {
                         if (lov->tgts[loi->loi_ost_idx].active) {
                                 CERROR("error: setattr objid "LPX64" subobj "
                                        LPX64" on OST idx %d: rc = %d\n",
                                   NULL, NULL);
                 if (err) {
                         if (lov->tgts[loi->loi_ost_idx].active) {
                                 CERROR("error: setattr objid "LPX64" subobj "
                                        LPX64" on OST idx %d: rc = %d\n",
-                                       oa->o_id, loi->loi_id, loi->loi_ost_idx,
-                                       err);
+                                       src_oa->o_id, loi->loi_id,
+                                       loi->loi_ost_idx, err);
                                 if (!rc)
                                         rc = err;
                         }
                                 if (!rc)
                                         rc = err;
                         }
-                } else
-                        set = 1;
+                        continue;
+                }
+
+                lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, i, &set);
         }
         }
-        obdo_free(tmp);
         if (!set && !rc)
                 rc = -EIO;
         if (lfh != NULL)
                 lov_lfh_put(lfh);
         if (!set && !rc)
                 rc = -EIO;
         if (lfh != NULL)
                 lov_lfh_put(lfh);
-        GOTO(out, rc);
- out:
+
+        ret_oa->o_id = src_oa->o_id;
+        memcpy(src_oa, ret_oa, sizeof(*src_oa));
+        GOTO(out_tmp, rc);
+out_tmp:
+        obdo_free(tmp_oa);
+out_oa:
+        obdo_free(ret_oa);
+out:
         class_export_put(export);
         return rc;
 }
 
         class_export_put(export);
         return rc;
 }
 
-static int lov_open(struct lustre_handle *conn, struct obdo *oa,
+static int lov_open(struct lustre_handle *conn, struct obdo *src_oa,
                     struct lov_stripe_md *lsm, struct obd_trans_info *oti,
                     struct obd_client_handle *och)
 {
                     struct lov_stripe_md *lsm, struct obd_trans_info *oti,
                     struct obd_client_handle *och)
 {
-        struct obdo *tmp; /* on the heap here, on the stack in lov_close? */
+        struct obdo *tmp_oa, *ret_oa;
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
         struct lov_oinfo *loi;
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
         struct lov_oinfo *loi;
@@ -1085,20 +1209,24 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
         if (!export || !export->exp_obd)
                 GOTO(out_exp, rc = -ENODEV);
 
         if (!export || !export->exp_obd)
                 GOTO(out_exp, rc = -ENODEV);
 
-        tmp = obdo_alloc();
-        if (!tmp)
+        ret_oa = obdo_alloc();
+        if (!ret_oa)
                 GOTO(out_exp, rc = -ENOMEM);
 
                 GOTO(out_exp, rc = -ENOMEM);
 
+        tmp_oa = obdo_alloc();
+        if (!tmp_oa)
+                GOTO(out_oa, rc = -ENOMEM);
+
         lfh = lov_lfh_new();
         if (lfh == NULL)
                 GOTO(out_tmp, rc = -ENOMEM);
         lfh = lov_lfh_new();
         if (lfh == NULL)
                 GOTO(out_tmp, rc = -ENOMEM);
-        OBD_ALLOC(lfh->lfh_och, lsm->lsm_stripe_count * sizeof *och);
+        OBD_ALLOC(lfh->lfh_och, lsm->lsm_stripe_count * sizeof(*och));
         if (!lfh->lfh_och)
                 GOTO(out_lfh, rc = -ENOMEM);
 
         lov = &export->exp_obd->u.lov;
         if (!lfh->lfh_och)
                 GOTO(out_lfh, rc = -ENOMEM);
 
         lov = &export->exp_obd->u.lov;
-        oa->o_size = 0;
-        oa->o_blocks = 0;
+        src_oa->o_size = 0;
+        src_oa->o_blocks = 0;
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
                 if (lov->tgts[loi->loi_ost_idx].active == 0) {
                         CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
                 if (lov->tgts[loi->loi_ost_idx].active == 0) {
                         CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
@@ -1106,11 +1234,11 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
                 }
 
                 /* create data objects with "parent" OA */
                 }
 
                 /* create data objects with "parent" OA */
-                memcpy(tmp, oa, sizeof(*tmp));
-                tmp->o_id = loi->loi_id;
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+                tmp_oa->o_id = loi->loi_id;
 
 
-                rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp,
-                              NULL, NULL, lfh->lfh_och + i);
+                rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
+                              NULL, NULL, &lfh->lfh_och[i]);
                 if (rc) {
                         if (!lov->tgts[loi->loi_ost_idx].active) {
                                 rc = 0;
                 if (rc) {
                         if (!lov->tgts[loi->loi_ost_idx].active) {
                                 rc = 0;
@@ -1118,27 +1246,31 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
                         }
                         CERROR("error: open objid "LPX64" subobj "LPX64
                                " on OST idx %d: rc = %d\n",
                         }
                         CERROR("error: open objid "LPX64" subobj "LPX64
                                " on OST idx %d: rc = %d\n",
-                               oa->o_id, lsm->lsm_oinfo[i].loi_id,
+                               src_oa->o_id, lsm->lsm_oinfo[i].loi_id,
                                loi->loi_ost_idx, rc);
                         goto out_handles;
                 }
 
                                loi->loi_ost_idx, rc);
                         goto out_handles;
                 }
 
-                lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &set);
+                lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, i, &set);
         }
 
         lfh->lfh_count = lsm->lsm_stripe_count;
         och->och_fh.cookie = lfh->lfh_handle.h_cookie;
         }
 
         lfh->lfh_count = lsm->lsm_stripe_count;
         och->och_fh.cookie = lfh->lfh_handle.h_cookie;
-        obdo_handle(oa)->cookie = lfh->lfh_handle.h_cookie;
-        oa->o_valid |= OBD_MD_FLHANDLE;
+        obdo_handle(ret_oa)->cookie = lfh->lfh_handle.h_cookie;
+        ret_oa->o_valid |= OBD_MD_FLHANDLE;
+        ret_oa->o_id = src_oa->o_id;
+        memcpy(src_oa, ret_oa, sizeof(*src_oa));
 
 
-        /* llfh refcount transfers to list */
+        /* lfh refcount transfers to list */
         spin_lock(&export->exp_lov_data.led_lock);
         list_add(&lfh->lfh_list, &export->exp_lov_data.led_open_head);
         spin_unlock(&export->exp_lov_data.led_lock);
 
         GOTO(out_tmp, rc);
  out_tmp:
         spin_lock(&export->exp_lov_data.led_lock);
         list_add(&lfh->lfh_list, &export->exp_lov_data.led_open_head);
         spin_unlock(&export->exp_lov_data.led_lock);
 
         GOTO(out_tmp, rc);
  out_tmp:
-        obdo_free(tmp);
+        obdo_free(tmp_oa);
+ out_oa:
+        obdo_free(ret_oa);
  out_exp:
         class_export_put(export);
         return rc;
  out_exp:
         class_export_put(export);
         return rc;
@@ -1150,16 +1282,16 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
                 if (lov->tgts[loi->loi_ost_idx].active == 0)
                         continue;
 
                 if (lov->tgts[loi->loi_ost_idx].active == 0)
                         continue;
 
-                memcpy(tmp, oa, sizeof(*tmp));
-                tmp->o_id = loi->loi_id;
-                memcpy(obdo_handle(tmp), lfh->lfh_och + i, FD_OSTDATA_SIZE);
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+                tmp_oa->o_id = loi->loi_id;
+                memcpy(obdo_handle(tmp_oa), &lfh->lfh_och[i], FD_OSTDATA_SIZE);
 
 
-                err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp,
+                err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
                                 NULL, NULL);
                 if (err && lov->tgts[loi->loi_ost_idx].active) {
                         CERROR("error: closing objid "LPX64" subobj "LPX64
                                " on OST idx %d after open error: rc=%d\n",
                                 NULL, NULL);
                 if (err && lov->tgts[loi->loi_ost_idx].active) {
                         CERROR("error: closing objid "LPX64" subobj "LPX64
                                " on OST idx %d after open error: rc=%d\n",
-                               oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
+                               src_oa->o_id, loi->loi_id, loi->loi_ost_idx,err);
                 }
         }
 
                 }
         }
 
@@ -1189,6 +1321,8 @@ static int lov_close(struct lustre_handle *conn, struct obdo *oa,
 
         if (oa->o_valid & OBD_MD_FLHANDLE)
                 lfh = lov_handle2lfh(obdo_handle(oa));
 
         if (oa->o_valid & OBD_MD_FLHANDLE)
                 lfh = lov_handle2lfh(obdo_handle(oa));
+        if (!lfh)
+                LBUG();
 
         lov = &export->exp_obd->u.lov;
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
 
         lov = &export->exp_obd->u.lov;
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
@@ -1198,7 +1332,7 @@ static int lov_close(struct lustre_handle *conn, struct obdo *oa,
                 memcpy(&tmp, oa, sizeof(tmp));
                 tmp.o_id = loi->loi_id;
                 if (lfh)
                 memcpy(&tmp, oa, sizeof(tmp));
                 tmp.o_id = loi->loi_id;
                 if (lfh)
-                        memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
+                        memcpy(obdo_handle(&tmp), &lfh->lfh_och[i],
                                FD_OSTDATA_SIZE);
                 else
                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
                                FD_OSTDATA_SIZE);
                 else
                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
@@ -1223,18 +1357,16 @@ static int lov_close(struct lustre_handle *conn, struct obdo *oa,
 
                 OBD_FREE(lfh->lfh_och, lsm->lsm_stripe_count * FD_OSTDATA_SIZE);
                 lov_lfh_destroy(lfh);
 
                 OBD_FREE(lfh->lfh_och, lsm->lsm_stripe_count * FD_OSTDATA_SIZE);
                 lov_lfh_destroy(lfh);
+                LASSERT(atomic_read(&lfh->lfh_refcount) == 1);
                 lov_lfh_put(lfh); /* balance handle2lfh above */
                 lov_lfh_put(lfh); /* balance handle2lfh above */
-        }
+        } else
+                LBUG();
         GOTO(out, rc);
  out:
         class_export_put(export);
         return rc;
 }
 
         GOTO(out, rc);
  out:
         class_export_put(export);
         return rc;
 }
 
-#ifndef log2
-#define log2(n) ffz(~(n))
-#endif
-
 /* we have an offset in file backed by an lov and want to find out where
  * that offset lands in our given stripe of the file.  for the easy
  * case where the offset is within the stripe, we just have to scale the
 /* we have an offset in file backed by an lov and want to find out where
  * that offset lands in our given stripe of the file.  for the easy
  * case where the offset is within the stripe, we just have to scale the
@@ -1404,8 +1536,8 @@ static int lov_punch(struct lustre_handle *conn, struct obdo *oa,
                 memcpy(&tmp, oa, sizeof(tmp));
                 tmp.o_id = loi->loi_id;
                 if (lfh)
                 memcpy(&tmp, oa, sizeof(tmp));
                 tmp.o_id = loi->loi_id;
                 if (lfh)
-                        memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
-                               FD_OSTDATA_SIZE);
+                        memcpy(obdo_handle(&tmp), &lfh->lfh_och[i].och_fh,
+                               sizeof(lfh->lfh_och[i].och_fh));
                 else
                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
 
                 else
                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
 
@@ -1455,7 +1587,7 @@ static int lov_brw_check(struct lov_obd *lov, struct lov_stripe_md *lsm,
         return 0;
 }
 
         return 0;
 }
 
-static int lov_brw(int cmd, struct lustre_handle *conn,
+static int lov_brw(int cmd, struct lustre_handle *conn, struct obdo *src_oa,
                    struct lov_stripe_md *lsm, obd_count oa_bufs,
                    struct brw_page *pga, struct obd_trans_info *oti)
 {
                    struct lov_stripe_md *lsm, obd_count oa_bufs,
                    struct brw_page *pga, struct obd_trans_info *oti)
 {
@@ -1467,10 +1599,12 @@ static int lov_brw(int cmd, struct lustre_handle *conn,
                 int ost_idx;
         } *stripeinfo, *si, *si_last;
         struct obd_export *export = class_conn2export(conn);
                 int ost_idx;
         } *stripeinfo, *si, *si_last;
         struct obd_export *export = class_conn2export(conn);
+        struct obdo *ret_oa = NULL, *tmp_oa = NULL;
+        struct lov_file_handles *lfh = NULL;
         struct lov_obd *lov;
         struct brw_page *ioarr;
         struct lov_oinfo *loi;
         struct lov_obd *lov;
         struct brw_page *ioarr;
         struct lov_oinfo *loi;
-        int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count;
+        int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count, set = 0;
         ENTRY;
 
         if (lsm_bad_magic(lsm))
         ENTRY;
 
         if (lsm_bad_magic(lsm))
@@ -1495,6 +1629,21 @@ static int lov_brw(int cmd, struct lustre_handle *conn,
         if (!ioarr)
                 GOTO(out_where, rc = -ENOMEM);
 
         if (!ioarr)
                 GOTO(out_where, rc = -ENOMEM);
 
+        if (src_oa) {
+                ret_oa = obdo_alloc();
+                if (!ret_oa)
+                        GOTO(out_ioarr, rc = -ENOMEM);
+
+                tmp_oa = obdo_alloc();
+                if (!tmp_oa)
+                        GOTO(out_oa, rc = -ENOMEM);
+
+                if (src_oa->o_valid & OBD_MD_FLHANDLE)
+                        lfh = lov_handle2lfh(obdo_handle(src_oa));
+                else
+                        src_oa->o_valid &= ~OBD_MD_FLHANDLE;
+        }
+
         for (i = 0; i < oa_bufs; i++) {
                 where[i] = lov_stripe_number(lsm, pga[i].off);
                 stripeinfo[where[i]].bufct++;
         for (i = 0; i < oa_bufs; i++) {
                 where[i] = lov_stripe_number(lsm, pga[i].off);
                 stripeinfo[where[i]].bufct++;
@@ -1524,23 +1673,46 @@ static int lov_brw(int cmd, struct lustre_handle *conn,
 
                 if (lov->tgts[si->ost_idx].active == 0) {
                         CDEBUG(D_HA, "lov idx %d inactive\n", si->ost_idx);
 
                 if (lov->tgts[si->ost_idx].active == 0) {
                         CDEBUG(D_HA, "lov idx %d inactive\n", si->ost_idx);
-                        GOTO(out_ioarr, rc = -EIO);
+                        GOTO(out_oa, rc = -EIO);
                 }
 
                 if (si->bufct) {
                         LASSERT(shift < oa_bufs);
                 }
 
                 if (si->bufct) {
                         LASSERT(shift < oa_bufs);
-                        rc = obd_brw(cmd, &lov->tgts[si->ost_idx].conn,
+                        if (src_oa) {
+                                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+                                if (lfh)
+                                        memcpy(obdo_handle(tmp_oa),
+                                               &lfh->lfh_och[i].och_fh,
+                                               sizeof(lfh->lfh_och[i].och_fh));
+                        }
+
+                        tmp_oa->o_id = si->lsm.lsm_object_id;
+                        rc = obd_brw(cmd, &lov->tgts[si->ost_idx].conn, tmp_oa,
                                      &si->lsm, si->bufct, &ioarr[shift],
                                      oti);
                         if (rc)
                                 GOTO(out_ioarr, rc);
                                      &si->lsm, si->bufct, &ioarr[shift],
                                      oti);
                         if (rc)
                                 GOTO(out_ioarr, rc);
+
+                        lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm,
+                                        i, &set);
                 }
         }
                 }
         }
-        GOTO(out_ioarr, rc);
+
+        ret_oa->o_id = src_oa->o_id;
+        memcpy(src_oa, ret_oa, sizeof(*src_oa));
+
+        GOTO(out_oa, rc);
+ out_oa:
+        if (tmp_oa)
+                obdo_free(tmp_oa);
+        if (ret_oa)
+                obdo_free(ret_oa);
  out_ioarr:
         OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
  out_where:
         OBD_FREE(where, sizeof(*where) * oa_bufs);
  out_ioarr:
         OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
  out_where:
         OBD_FREE(where, sizeof(*where) * oa_bufs);
+        if (lfh)
+                lov_lfh_put(lfh);
  out_sinfo:
         OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
  out_exp:
  out_sinfo:
         OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
  out_exp:
@@ -1548,18 +1720,43 @@ static int lov_brw(int cmd, struct lustre_handle *conn,
         return rc;
 }
 
         return rc;
 }
 
-static int lov_brw_interpret (struct ptlrpc_request_set *set,
-                              struct lov_brw_async_args *aa, int rc)
+static int lov_brw_interpret(struct ptlrpc_request_set *rqset,
+                             struct lov_brw_async_args *aa, int rc)
 {
 {
-        obd_count        oa_bufs = aa->aa_oa_bufs;
-        struct brw_page *ioarr = aa->aa_ioarr;
+        struct lov_stripe_md *lsm = aa->aa_lsm;
+        obd_count             oa_bufs = aa->aa_oa_bufs;
+        struct obdo          *oa = aa->aa_oa;
+        struct obdo          *obdos = aa->aa_obdos;
+        struct brw_page      *ioarr = aa->aa_ioarr;
+        struct lov_oinfo     *loi;
+        int i, set = 0;
         ENTRY;
 
         ENTRY;
 
-        OBD_FREE (ioarr, sizeof (*ioarr) * oa_bufs);
-        RETURN (rc);
+        if (rc == 0) {
+                /* NB all stripe requests succeeded to get here */
+
+                for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+                     i++, loi++) {
+                        if (obdos[i].o_valid == 0)      /* inactive stripe */
+                                continue;
+
+                        lov_merge_attrs(oa, &obdos[i], obdos[i].o_valid, lsm,
+                                        i, &set);
+                }
+
+                if (!set) {
+                        CERROR("No stripes had valid attrs\n");
+                        rc = -EIO;
+                }
+        }
+        oa->o_id = lsm->lsm_object_id;
+
+        OBD_FREE(obdos, lsm->lsm_stripe_count * sizeof(*obdos));
+        OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
+        RETURN(rc);
 }
 
 }
 
-static int lov_brw_async(int cmd, struct lustre_handle *conn,
+static int lov_brw_async(int cmd, struct lustre_handle *conn, struct obdo *oa,
                          struct lov_stripe_md *lsm, obd_count oa_bufs,
                          struct brw_page *pga, struct ptlrpc_request_set *set,
                          struct obd_trans_info *oti)
                          struct lov_stripe_md *lsm, obd_count oa_bufs,
                          struct brw_page *pga, struct ptlrpc_request_set *set,
                          struct obd_trans_info *oti)
@@ -1573,7 +1770,9 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn,
         } *stripeinfo, *si, *si_last;
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
         } *stripeinfo, *si, *si_last;
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
+        struct lov_file_handles *lfh = NULL;
         struct brw_page *ioarr;
         struct brw_page *ioarr;
+        struct obdo *obdos = NULL;
         struct lov_oinfo *loi;
         struct lov_brw_async_args *aa;
         int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count;
         struct lov_oinfo *loi;
         struct lov_brw_async_args *aa;
         int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count;
@@ -1597,9 +1796,20 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn,
         if (!where)
                 GOTO(out_sinfo, rc = -ENOMEM);
 
         if (!where)
                 GOTO(out_sinfo, rc = -ENOMEM);
 
+        if (oa) {
+                OBD_ALLOC(obdos, sizeof(*obdos) * stripe_count);
+                if (!obdos)
+                        GOTO(out_where, rc = -ENOMEM);
+
+                if (oa->o_valid & OBD_MD_FLHANDLE)
+                        lfh = lov_handle2lfh(obdo_handle(oa));
+                else
+                        oa->o_valid &= ~OBD_MD_FLHANDLE;
+        }
+
         OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs);
         if (!ioarr)
         OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs);
         if (!ioarr)
-                GOTO(out_where, rc = -ENOMEM);
+                GOTO(out_obdos, rc = -ENOMEM);
 
         for (i = 0; i < oa_bufs; i++) {
                 where[i] = lov_stripe_number(lsm, pga[i].off);
 
         for (i = 0; i < oa_bufs; i++) {
                 where[i] = lov_stripe_number(lsm, pga[i].off);
@@ -1612,6 +1822,15 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn,
                         si->index = si_last->index + si_last->bufct;
                 si->lsm.lsm_object_id = loi->loi_id;
                 si->ost_idx = loi->loi_ost_idx;
                         si->index = si_last->index + si_last->bufct;
                 si->lsm.lsm_object_id = loi->loi_id;
                 si->ost_idx = loi->loi_ost_idx;
+
+                if (oa) {
+                        memcpy(&obdos[i], oa, sizeof(*obdos));
+                        obdos[i].o_id = si->lsm.lsm_object_id;
+                        if (lfh)
+                                memcpy(obdo_handle(&obdos[i]),
+                                       &lfh->lfh_och[i].och_fh,
+                                       sizeof(lfh->lfh_och[i].och_fh));
+                }
         }
 
         for (i = 0; i < oa_bufs; i++) {
         }
 
         for (i = 0; i < oa_bufs; i++) {
@@ -1637,24 +1856,35 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn,
                 }
 
                 LASSERT(shift < oa_bufs);
                 }
 
                 LASSERT(shift < oa_bufs);
+
                 rc = obd_brw_async(cmd, &lov->tgts[si->ost_idx].conn,
                 rc = obd_brw_async(cmd, &lov->tgts[si->ost_idx].conn,
-                                   &si->lsm, si->bufct, &ioarr[shift],
-                                   set, oti);
+                                   &obdos[i], &si->lsm, si->bufct,
+                                   &ioarr[shift], set, oti);
                 if (rc)
                         GOTO(out_ioarr, rc);
         }
                 if (rc)
                         GOTO(out_ioarr, rc);
         }
-        LASSERT (rc == 0);
-        LASSERT (set->set_interpret == NULL);
-        set->set_interpret = lov_brw_interpret;
-        LASSERT (sizeof (set->set_args) >= sizeof (struct lov_brw_async_args));
+        LASSERT(rc == 0);
+        LASSERT(set->set_interpret == NULL);
+        set->set_interpret = (set_interpreter_func)lov_brw_interpret;
+        LASSERT(sizeof(set->set_args) >= sizeof(struct lov_brw_async_args));
         aa = (struct lov_brw_async_args *)&set->set_args;
         aa = (struct lov_brw_async_args *)&set->set_args;
-        aa->aa_oa_bufs = oa_bufs;
+        aa->aa_lsm = lsm;
+        aa->aa_obdos = obdos;
+        aa->aa_oa = oa;
         aa->aa_ioarr = ioarr;
         aa->aa_ioarr = ioarr;
+        aa->aa_oa_bufs = oa_bufs;
+
+        /* Don't free ioarr or obdos - that's done in lov_brw_interpret */
         GOTO(out_where, rc);
         GOTO(out_where, rc);
+
  out_ioarr:
         OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
  out_ioarr:
         OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
+ out_obdos:
+        OBD_FREE(obdos, stripe_count * sizeof(*obdos));
  out_where:
         OBD_FREE(where, sizeof(*where) * oa_bufs);
  out_where:
         OBD_FREE(where, sizeof(*where) * oa_bufs);
+        if (lfh)
+                lov_lfh_put(lfh);
  out_sinfo:
         OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
  out_exp:
  out_sinfo:
         OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
  out_exp:
@@ -1980,20 +2210,16 @@ static int lov_cancel_unused(struct lustre_handle *conn,
                         (tot) += (add);                                 \
         } while(0)
 
                         (tot) += (add);                                 \
         } while(0)
 
-static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
+static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                      unsigned long max_age)
 {
 {
-        struct obd_export *tgt_export;
-        struct lov_obd *lov;
+        struct lov_obd *lov = &obd->u.lov;
         struct obd_statfs lov_sfs;
         int set = 0;
         int rc = 0;
         int i;
         ENTRY;
 
         struct obd_statfs lov_sfs;
         int set = 0;
         int rc = 0;
         int i;
         ENTRY;
 
-        if (!export || !export->exp_obd)
-                RETURN(-ENODEV);
-
-        lov = &export->exp_obd->u.lov;
 
         /* We only get block data from the OBD */
         for (i = 0; i < lov->desc.ld_tgt_count; i++) {
 
         /* We only get block data from the OBD */
         for (i = 0; i < lov->desc.ld_tgt_count; i++) {
@@ -2004,14 +2230,8 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
                         continue;
                 }
 
                         continue;
                 }
 
-                tgt_export = class_conn2export(&lov->tgts[i].conn);
-                if (!tgt_export) {
-                        CDEBUG(D_HA, "lov idx %d NULL export\n", i);
-                        continue;
-                }
-
-                err = obd_statfs(tgt_export, &lov_sfs);
-                class_export_put(tgt_export);
+                err = obd_statfs(class_conn2obd(&lov->tgts[i].conn), &lov_sfs,
+                                 max_age);
                 if (err) {
                         if (lov->tgts[i].active) {
                                 CERROR("error: statfs OSC %s on OST idx %d: "
                 if (err) {
                         if (lov->tgts[i].active) {
                                 CERROR("error: statfs OSC %s on OST idx %d: "
@@ -2022,6 +2242,7 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
                         }
                         continue;
                 }
                         }
                         continue;
                 }
+
                 if (!set) {
                         memcpy(osfs, &lov_sfs, sizeof(lov_sfs));
                         set = 1;
                 if (!set) {
                         memcpy(osfs, &lov_sfs, sizeof(lov_sfs));
                         set = 1;
@@ -2044,6 +2265,7 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
                         LOV_SUM_MAX(osfs->os_ffree, lov_sfs.os_ffree);
                 }
         }
                         LOV_SUM_MAX(osfs->os_ffree, lov_sfs.os_ffree);
                 }
         }
+
         if (set) {
                 __u32 expected_stripes = lov->desc.ld_default_stripe_count ?
                                          lov->desc.ld_default_stripe_count :
         if (set) {
                 __u32 expected_stripes = lov->desc.ld_default_stripe_count ?
                                          lov->desc.ld_default_stripe_count :
@@ -2055,6 +2277,7 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
                         do_div(osfs->os_ffree, expected_stripes);
         } else if (!rc)
                 rc = -EIO;
                         do_div(osfs->os_ffree, expected_stripes);
         } else if (!rc)
                 rc = -EIO;
+
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
@@ -2191,7 +2414,28 @@ static int lov_get_info(struct lustre_handle *conn, __u32 keylen,
         RETURN(-EINVAL);
 }
 
         RETURN(-EINVAL);
 }
 
-static int lov_mark_page_dirty(struct lustre_handle *conn, 
+static int lov_set_info(struct lustre_handle *conn, obd_count keylen,
+                        void *key, obd_count vallen, void *val)
+{
+        struct obd_device *obddev = class_conn2obd(conn);
+        struct lov_obd *lov = &obddev->u.lov;
+        int i, rc = 0;
+        ENTRY;
+
+        if (keylen < strlen("mds_conn") ||
+            memcmp(key, "mds_conn", strlen("mds_conn")) != 0)
+                RETURN(-EINVAL);
+
+        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+                int er;
+                er = obd_set_info(&lov->tgts[i].conn, keylen, key, vallen, val);
+                if (!rc)
+                        rc = er;
+        }
+        RETURN(rc);
+}
+
+static int lov_mark_page_dirty(struct lustre_handle *conn,
                                struct lov_stripe_md *lsm, unsigned long offset)
 {
         struct lov_obd *lov = &class_conn2obd(conn)->u.lov;
                                struct lov_stripe_md *lsm, unsigned long offset)
 {
         struct lov_obd *lov = &class_conn2obd(conn)->u.lov;
@@ -2209,12 +2453,12 @@ static int lov_mark_page_dirty(struct lustre_handle *conn,
                 RETURN(-ENOMEM);
 
         stripe = lov_stripe_number(lsm, (obd_off)offset << PAGE_CACHE_SHIFT);
                 RETURN(-ENOMEM);
 
         stripe = lov_stripe_number(lsm, (obd_off)offset << PAGE_CACHE_SHIFT);
-        lov_stripe_offset(lsm, (obd_off)offset << PAGE_CACHE_SHIFT, stripe, 
+        lov_stripe_offset(lsm, (obd_off)offset << PAGE_CACHE_SHIFT, stripe,
                           &off);
         off >>= PAGE_CACHE_SHIFT;
 
         loi = &lsm->lsm_oinfo[stripe];
                           &off);
         off >>= PAGE_CACHE_SHIFT;
 
         loi = &lsm->lsm_oinfo[stripe];
-        CDEBUG(D_INODE, "off %lu => off %lu on stripe %d\n", offset, 
+        CDEBUG(D_INODE, "off %lu => off %lu on stripe %d\n", offset,
                (unsigned long)off, stripe);
         submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
 
                (unsigned long)off, stripe);
         submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
 
@@ -2223,7 +2467,7 @@ static int lov_mark_page_dirty(struct lustre_handle *conn,
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-static int lov_clear_dirty_pages(struct lustre_handle *conn, 
+static int lov_clear_dirty_pages(struct lustre_handle *conn,
                                  struct lov_stripe_md *lsm, unsigned long start,
                                  unsigned long end, unsigned long *cleared)
 
                                  struct lov_stripe_md *lsm, unsigned long start,
                                  unsigned long end, unsigned long *cleared)
 
@@ -2267,11 +2511,11 @@ static int lov_clear_dirty_pages(struct lustre_handle *conn,
                 obd_start >>= PAGE_CACHE_SHIFT;
                 obd_end >>= PAGE_CACHE_SHIFT;
 
                 obd_start >>= PAGE_CACHE_SHIFT;
                 obd_end >>= PAGE_CACHE_SHIFT;
 
-                CDEBUG(D_INODE, "offs [%lu,%lu] => offs [%lu,%lu] stripe %d\n", 
-                       start, end, (unsigned long)obd_start, 
+                CDEBUG(D_INODE, "offs [%lu,%lu] => offs [%lu,%lu] stripe %d\n",
+                       start, end, (unsigned long)obd_start,
                        (unsigned long)obd_end, loi->loi_ost_idx);
                 submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
                        (unsigned long)obd_end, loi->loi_ost_idx);
                 submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
-                rc = obd_clear_dirty_pages(&lov->tgts[loi->loi_ost_idx].conn, 
+                rc = obd_clear_dirty_pages(&lov->tgts[loi->loi_ost_idx].conn,
                                            submd, obd_start, obd_end,
                                            &osc_cleared);
                 if (rc)
                                            submd, obd_start, obd_end,
                                            &osc_cleared);
                 if (rc)
@@ -2310,15 +2554,14 @@ static int lov_last_dirty_offset(struct lustre_handle *conn,
         *offset = 0;
         lov = &export->exp_obd->u.lov;
         rc = -ENOENT;
         *offset = 0;
         lov = &export->exp_obd->u.lov;
         rc = -ENOENT;
-        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; 
-                                          i++, loi++) {
 
 
+        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++){
                 count = lsm->lsm_stripe_size >> PAGE_CACHE_SHIFT;
                 skip = (lsm->lsm_stripe_count - 1) * count;
 
                 submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
 
                 count = lsm->lsm_stripe_size >> PAGE_CACHE_SHIFT;
                 skip = (lsm->lsm_stripe_count - 1) * count;
 
                 submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
 
-                err = obd_last_dirty_offset(&lov->tgts[loi->loi_ost_idx].conn, 
+                err = obd_last_dirty_offset(&lov->tgts[loi->loi_ost_idx].conn,
                                             submd, &tmp);
                 if (err == -ENOENT)
                         continue;
                                             submd, &tmp);
                 if (err == -ENOENT)
                         continue;
@@ -2326,7 +2569,7 @@ static int lov_last_dirty_offset(struct lustre_handle *conn,
                         GOTO(out_exp, rc = err);
 
                 rc = 0;
                         GOTO(out_exp, rc = err);
 
                 rc = 0;
-                if (tmp != ~0) 
+                if (tmp != ~0)
                         tmp += (tmp/count * skip) + (i * count);
                 if (tmp > *offset)
                         *offset = tmp;
                         tmp += (tmp/count * skip) + (i * count);
                 if (tmp > *offset)
                         *offset = tmp;
@@ -2338,6 +2581,100 @@ out_exp:
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
+/* For LOV catalogs, we "nest" catalogs from the parent catalog.  What this
+ * means is that the parent catalog has a bunch of log cookies that are
+ * pointing at one catalog for each OSC.  The OSC catalogs in turn hold
+ * cookies for actual log files. */
+static int lov_get_catalogs(struct lov_obd *lov, struct llog_handle *cathandle)
+{
+        int i, rc;
+
+        ENTRY;
+        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+                lov->tgts[i].ltd_cathandle = llog_new_log(cathandle,
+                                                          &lov->tgts[i].uuid);
+                if (IS_ERR(lov->tgts[i].ltd_cathandle))
+                        continue;
+                rc = llog_init_catalog(cathandle, &lov->tgts[i].uuid);
+                if (rc)
+                        GOTO(err_logs, rc);
+        }
+        lov->lo_catalog_loaded = 1;
+        RETURN(0);
+err_logs:
+        while (i-- > 0) {
+                llog_delete_log(cathandle, lov->tgts[i].ltd_cathandle);
+                llog_close_log(cathandle, lov->tgts[i].ltd_cathandle);
+        }
+        return rc;
+}
+
+/* Add log records for each OSC that this object is striped over, and return
+ * cookies for each one.  We _would_ have nice abstraction here, except that
+ * we need to keep cookies in stripe order, even if some are NULL, so that
+ * the right cookies are passed back to the right OSTs at the client side.
+ * Unset cookies should be all-zero (which will never occur naturally). */
+static int lov_log_add(struct lustre_handle *conn,
+                       struct llog_handle *cathandle,
+                       struct llog_trans_hdr *rec, struct lov_stripe_md *lsm,
+                       struct llog_cookie *logcookies, int numcookies)
+{
+        struct obd_device *obd = class_conn2obd(conn);
+        struct lov_obd *lov = &obd->u.lov;
+        struct lov_oinfo *loi;
+        int i, rc = 0;
+        ENTRY;
+
+        LASSERT(logcookies && numcookies >= lsm->lsm_stripe_count);
+
+        if (unlikely(!lov->lo_catalog_loaded))
+                lov_get_catalogs(lov, cathandle);
+
+        for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
+                rc += obd_log_add(&lov->tgts[loi->loi_ost_idx].conn,
+                                  lov->tgts[loi->loi_ost_idx].ltd_cathandle,
+                                  rec, NULL, logcookies + rc, numcookies - rc);
+        }
+
+        RETURN(rc);
+}
+
+static int lov_log_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm,
+                          int count, struct llog_cookie *cookies, int flags)
+{
+        struct obd_export *export = class_conn2export(conn);
+        struct lov_obd *lov;
+        struct lov_oinfo *loi;
+        int rc = 0, i;
+        ENTRY;
+
+        LASSERT(lsm != NULL);
+        if (export == NULL || export->exp_obd == NULL)
+                GOTO(out, rc = -ENODEV);
+
+        LASSERT(count == lsm->lsm_stripe_count);
+
+        loi = lsm->lsm_oinfo;
+        lov = &export->exp_obd->u.lov;
+        for (i = 0; i < count; i++, cookies++, loi++) {
+                int err;
+
+                err = obd_log_cancel(&lov->tgts[loi->loi_ost_idx].conn,
+                                     NULL, 1, cookies, flags);
+                if (err && lov->tgts[loi->loi_ost_idx].active) {
+                        CERROR("error: objid "LPX64" subobj "LPX64
+                               " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
+                               loi->loi_id, loi->loi_ost_idx, err);
+                        if (!rc)
+                                rc = err;
+                }
+        }
+        GOTO(out, rc);
+ out:
+        class_export_put(export);
+        return rc;
+}
+
 struct obd_ops lov_obd_ops = {
         o_owner:       THIS_MODULE,
         o_attach:      lov_attach,
 struct obd_ops lov_obd_ops = {
         o_owner:       THIS_MODULE,
         o_attach:      lov_attach,
@@ -2364,9 +2701,12 @@ struct obd_ops lov_obd_ops = {
         o_cancel_unused: lov_cancel_unused,
         o_iocontrol:   lov_iocontrol,
         o_get_info:    lov_get_info,
         o_cancel_unused: lov_cancel_unused,
         o_iocontrol:   lov_iocontrol,
         o_get_info:    lov_get_info,
-        .o_mark_page_dirty =    lov_mark_page_dirty,
-        .o_clear_dirty_pages =    lov_clear_dirty_pages,
-        .o_last_dirty_offset =    lov_last_dirty_offset,
+        o_set_info:    lov_set_info,
+        o_log_add:     lov_log_add,
+        o_log_cancel:  lov_log_cancel,
+        o_mark_page_dirty:   lov_mark_page_dirty,
+        o_clear_dirty_pages: lov_clear_dirty_pages,
+        o_last_dirty_offset: lov_last_dirty_offset,
 };
 
 int __init lov_init(void)
 };
 
 int __init lov_init(void)
@@ -2374,15 +2714,13 @@ int __init lov_init(void)
         struct lprocfs_static_vars lvars;
         int rc;
 
         struct lprocfs_static_vars lvars;
         int rc;
 
-        printk(KERN_INFO "Lustre Logical Object Volume driver; "
-               "info@clusterfs.com\n");
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(lov, &lvars);
         rc = class_register_type(&lov_obd_ops, lvars.module_vars,
                                  OBD_LOV_DEVICENAME);
         RETURN(rc);
 }
 
         rc = class_register_type(&lov_obd_ops, lvars.module_vars,
                                  OBD_LOV_DEVICENAME);
         RETURN(rc);
 }
 
-static void __exit lov_exit(void)
+static void /*__exit*/ lov_exit(void)
 {
         class_unregister_type(OBD_LOV_DEVICENAME);
 }
 {
         class_unregister_type(OBD_LOV_DEVICENAME);
 }
index bbb40de..a719aac 100644 (file)
@@ -34,6 +34,8 @@
 #include <linux/obd_class.h>
 #include <linux/obd_support.h>
 
 #include <linux/obd_class.h>
 #include <linux/obd_support.h>
 
+#include "lov_internal.h"
+
 void lov_dump_lmm(int level, struct lov_mds_md *lmm)
 {
         struct lov_object_id *loi;
 void lov_dump_lmm(int level, struct lov_mds_md *lmm)
 {
         struct lov_object_id *loi;
@@ -129,14 +131,14 @@ int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmmp,
         for (i = 0, loi = lsm->lsm_oinfo; i < stripe_count; i++, loi++) {
                 /* XXX call down to osc_packmd() to do the packing */
                 LASSERT (loi->loi_id);
         for (i = 0, loi = lsm->lsm_oinfo; i < stripe_count; i++, loi++) {
                 /* XXX call down to osc_packmd() to do the packing */
                 LASSERT (loi->loi_id);
-                lmm->lmm_objects[loi->loi_ost_idx].l_object_id = 
+                lmm->lmm_objects[loi->loi_ost_idx].l_object_id =
                         cpu_to_le64 (loi->loi_id);
         }
 
         RETURN(lmm_size);
 }
 
                         cpu_to_le64 (loi->loi_id);
         }
 
         RETURN(lmm_size);
 }
 
-static int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
+int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
 {
         if (!stripe_count)
                 stripe_count = lov->desc.ld_default_stripe_count;
 {
         if (!stripe_count)
                 stripe_count = lov->desc.ld_default_stripe_count;
@@ -146,6 +148,90 @@ static int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
         return stripe_count;
 }
 
         return stripe_count;
 }
 
+static int lov_verify_lmm(struct lov_mds_md *lmm, int lmm_bytes,
+                          int *ost_count, int *stripe_count, int *ost_offset)
+{
+        if (lmm_bytes < sizeof(*lmm)) {
+                CERROR("lov_mds_md too small: %d, need at least %d\n",
+                       lmm_bytes, (int)sizeof(*lmm));
+                return -EINVAL;
+        }
+
+        if (le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC) {
+                CERROR("bad disk LOV MAGIC: %#08x != %#08x\n",
+                       le32_to_cpu(lmm->lmm_magic), LOV_MAGIC);
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        *ost_count = le16_to_cpu(lmm->lmm_ost_count);
+        *stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
+        *ost_offset = le32_to_cpu(lmm->lmm_stripe_offset);
+
+        if (*ost_count == 0 || *stripe_count == 0) {
+                CERROR("zero OST count %d or stripe count %d\n",
+                       *ost_count, *stripe_count);
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        if (lmm_bytes < lov_mds_md_size(*ost_count)) {
+                CERROR("lov_mds_md too small: %d, need %d\n",
+                       lmm_bytes, lov_mds_md_size(*ost_count));
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        if (*ost_offset > *ost_count) {
+                CERROR("starting OST offset %d > number of OSTs %d\n",
+                       *ost_offset, *ost_count);
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        if (*stripe_count > *ost_count) {
+                CERROR("stripe count %d > number of OSTs %d\n",
+                       *stripe_count, *ost_count);
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        if (lmm->lmm_object_id == 0) {
+                CERROR("zero object id\n");
+                lov_dump_lmm(D_WARNING, lmm);
+                return -EINVAL;
+        }
+
+        return 0;
+}
+
+int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count)
+{
+        int lsm_size = lov_stripe_md_size(stripe_count);
+        struct lov_oinfo *loi;
+        int i;
+
+        OBD_ALLOC(*lsmp, lsm_size);
+        if (!*lsmp)
+                return -ENOMEM;
+
+        (*lsmp)->lsm_magic = LOV_MAGIC;
+        (*lsmp)->lsm_stripe_count = stripe_count;
+        (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
+
+        for (i = 0, loi = (*lsmp)->lsm_oinfo; i < stripe_count; i++, loi++){
+                loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
+                ot_init(loi->loi_dirty_ot);
+        }
+        return lsm_size;
+}
+
+void lov_free_memmd(struct lov_stripe_md **lsmp)
+{
+        OBD_FREE(*lsmp, lov_stripe_md_size((*lsmp)->lsm_stripe_count));
+        *lsmp = NULL;
+}
+
 /* Unpack LOV object metadata from disk storage.  It is packed in LE byte
  * order and is opaque to the networking layer.
  */
 /* Unpack LOV object metadata from disk storage.  It is packed in LE byte
  * order and is opaque to the networking layer.
  */
@@ -156,75 +242,48 @@ int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
         struct lov_obd *lov = &obd->u.lov;
         struct lov_stripe_md *lsm;
         struct lov_oinfo *loi;
         struct lov_obd *lov = &obd->u.lov;
         struct lov_stripe_md *lsm;
         struct lov_oinfo *loi;
-        int ost_count = 0;
-        int ost_offset = 0;
+        int ost_count;
+        int ost_offset;
         int stripe_count;
         int lsm_size;
         int i;
         ENTRY;
 
         int stripe_count;
         int lsm_size;
         int i;
         ENTRY;
 
+        /* If passed an MDS struct use values from there, otherwise defaults */
         if (lmm) {
         if (lmm) {
-                if (lmm_bytes < sizeof (*lmm)) {
-                        CERROR("lov_mds_md too small: %d, need %d\n",
-                                lmm_bytes, (int)sizeof(*lmm));
-                        RETURN(-EINVAL);
-                }
-                if (le32_to_cpu (lmm->lmm_magic) != LOV_MAGIC) {
-                        CERROR("bad disk LOV MAGIC: %#08x != %#08x\n",
-                               le32_to_cpu (lmm->lmm_magic), LOV_MAGIC);
-                        RETURN(-EINVAL);
-                }
-
-                ost_count = le16_to_cpu (lmm->lmm_ost_count);
-                stripe_count = le16_to_cpu (lmm->lmm_stripe_count);
-
-                if (ost_count == 0 || stripe_count == 0) {
-                        CERROR ("zero ost %d or stripe %d count\n",
-                                ost_count, stripe_count);
-                        RETURN (-EINVAL);
-                }
-
-                if (lmm_bytes < lov_mds_md_size (ost_count)) {
-                        CERROR ("lov_mds_md too small: %d, need %d\n",
-                                lmm_bytes, lov_mds_md_size (ost_count));
-                        RETURN (-EINVAL);
-                }
-        } else
+                i = lov_verify_lmm(lmm, lmm_bytes, &ost_count, &stripe_count,
+                                   &ost_offset);
+                if (i)
+                        RETURN(i);
+        } else {
+                ost_count = 0;
                 stripe_count = lov_get_stripecnt(lov, 0);
                 stripe_count = lov_get_stripecnt(lov, 0);
+                ost_offset = 0;
+        }
 
 
-        /* XXX LOV STACKING call into osc for sizes */
-        lsm_size = lov_stripe_md_size(stripe_count);
-
+        /* If we aren't passed an lsmp struct, we just want the size */
         if (!lsmp)
         if (!lsmp)
-                RETURN(lsm_size);
+                /* XXX LOV STACKING call into osc for sizes */
+                RETURN(lov_stripe_md_size(stripe_count));
 
 
+        /* If we are passed an allocated struct but nothing to unpack, free */
         if (*lsmp && !lmm) {
         if (*lsmp && !lmm) {
-                stripe_count = (*lsmp)->lsm_stripe_count;
-                OBD_FREE(*lsmp, lov_stripe_md_size(stripe_count));
-                *lsmp = NULL;
+                lov_free_memmd(lsmp);
                 RETURN(0);
         }
 
                 RETURN(0);
         }
 
-        if (!*lsmp) {
-                OBD_ALLOC(*lsmp, lsm_size);
-                if (!*lsmp)
-                        RETURN(-ENOMEM);
-        }
-
-        lsm = *lsmp;
-        lsm->lsm_magic = LOV_MAGIC;
-        lsm->lsm_stripe_count = stripe_count;
-        lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
+        lsm_size = lov_alloc_memmd(lsmp, stripe_count);
+        if (lsm_size < 0)
+                RETURN(lsm_size);
 
 
+        /* If we are passed a pointer but nothing to unpack, we only alloc */
         if (!lmm)
                 RETURN(lsm_size);
 
         if (!lmm)
                 RETURN(lsm_size);
 
-        lsm->lsm_object_id = le64_to_cpu (lmm->lmm_object_id);
-        lsm->lsm_stripe_size = le32_to_cpu (lmm->lmm_stripe_size);
-        ost_offset = lsm->lsm_stripe_offset = le32_to_cpu (lmm->lmm_stripe_offset);
-
-        LMM_ASSERT(lsm->lsm_object_id);
-        LMM_ASSERT(ost_count);
+        lsm = *lsmp;
+        lsm->lsm_object_id = le64_to_cpu(lmm->lmm_object_id);
+        lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
+        lsm->lsm_stripe_offset = ost_offset;
 
         for (i = 0, loi = lsm->lsm_oinfo; i < ost_count; i++, ost_offset++) {
                 ost_offset %= ost_count;
 
         for (i = 0, loi = lsm->lsm_oinfo; i < ost_count; i++, ost_offset++) {
                 ost_offset %= ost_count;
@@ -232,17 +291,20 @@ int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
                 if (!lmm->lmm_objects[ost_offset].l_object_id)
                         continue;
 
                 if (!lmm->lmm_objects[ost_offset].l_object_id)
                         continue;
 
-                LMM_ASSERT(loi - lsm->lsm_oinfo < stripe_count);
                 /* XXX LOV STACKING call down to osc_unpackmd() */
                 loi->loi_id =
                 /* XXX LOV STACKING call down to osc_unpackmd() */
                 loi->loi_id =
-                        le64_to_cpu (lmm->lmm_objects[ost_offset].l_object_id);
+                        le64_to_cpu(lmm->lmm_objects[ost_offset].l_object_id);
                 loi->loi_ost_idx = ost_offset;
                 loi->loi_ost_idx = ost_offset;
-                loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
-                ot_init(loi->loi_dirty_ot);
                 loi++;
         }
                 loi++;
         }
-        LMM_ASSERT(loi - lsm->lsm_oinfo > 0);
-        LMM_ASSERT(loi - lsm->lsm_oinfo == stripe_count);
+
+        if (loi - lsm->lsm_oinfo != stripe_count) {
+                CERROR("missing objects in lmm struct\n");
+                lov_dump_lmm(D_WARNING, lmm);
+                lov_free_memmd(lsmp);
+                RETURN(-EINVAL);
+        }
+
 
         RETURN(lsm_size);
 }
 
         RETURN(lsm_size);
 }
@@ -260,7 +322,6 @@ int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
         struct obd_device *obd = class_conn2obd(conn);
         struct lov_obd *lov = &obd->u.lov;
         struct lov_mds_md lmm;
         struct obd_device *obd = class_conn2obd(conn);
         struct lov_obd *lov = &obd->u.lov;
         struct lov_mds_md lmm;
-        struct lov_stripe_md *lsm;
         int stripe_count;
         int rc;
         ENTRY;
         int stripe_count;
         int rc;
         ENTRY;
@@ -272,7 +333,7 @@ int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
         /* Bug 1185 FIXME: struct lov_mds_md is little-endian everywhere else */
 
         if (lmm.lmm_magic != LOV_MAGIC) {
         /* Bug 1185 FIXME: struct lov_mds_md is little-endian everywhere else */
 
         if (lmm.lmm_magic != LOV_MAGIC) {
-                CERROR("bad userland LOV MAGIC: %#08x != %#08x\n",
+                CDEBUG(D_IOCTL, "bad userland LOV MAGIC: %#08x != %#08x\n",
                        lmm.lmm_magic, LOV_MAGIC);
                 RETURN(-EINVAL);
         }
                        lmm.lmm_magic, LOV_MAGIC);
                 RETURN(-EINVAL);
         }
@@ -291,32 +352,27 @@ int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
         }
 #endif
         if (lmm.lmm_stripe_size & (PAGE_SIZE - 1)) {
         }
 #endif
         if (lmm.lmm_stripe_size & (PAGE_SIZE - 1)) {
-                CERROR("stripe size %u not multiple of %lu\n",
+                CDEBUG(D_IOCTL, "stripe size %u not multiple of %lu\n",
                        lmm.lmm_stripe_size, PAGE_SIZE);
                 RETURN(-EINVAL);
         }
         stripe_count = lov_get_stripecnt(lov, lmm.lmm_stripe_count);
 
         if ((__u64)lmm.lmm_stripe_size * stripe_count > ~0UL) {
                        lmm.lmm_stripe_size, PAGE_SIZE);
                 RETURN(-EINVAL);
         }
         stripe_count = lov_get_stripecnt(lov, lmm.lmm_stripe_count);
 
         if ((__u64)lmm.lmm_stripe_size * stripe_count > ~0UL) {
-                CERROR("stripe width %ux%u > %lu on 32-bit system\n",
+                CDEBUG(D_IOCTL, "stripe width %ux%u > %lu on 32-bit system\n",
                        lmm.lmm_stripe_size, (int)lmm.lmm_stripe_count, ~0UL);
                 RETURN(-EINVAL);
         }
 
                        lmm.lmm_stripe_size, (int)lmm.lmm_stripe_count, ~0UL);
                 RETURN(-EINVAL);
         }
 
-        /* XXX LOV STACKING call into osc for sizes */
-        OBD_ALLOC(lsm, lov_stripe_md_size(stripe_count));
-        if (!lsm)
-                RETURN(-ENOMEM);
+        rc = lov_alloc_memmd(lsmp, stripe_count);
 
 
-        lsm->lsm_magic = LOV_MAGIC;
-        lsm->lsm_stripe_count = stripe_count;
-        lsm->lsm_stripe_offset = lmm.lmm_stripe_offset;
-        lsm->lsm_stripe_size = lmm.lmm_stripe_size;
-        lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
+        if (rc < 0)
+                RETURN(rc);
 
 
-        *lsmp = lsm;
+        (*lsmp)->lsm_stripe_offset = lmm.lmm_stripe_offset;
+        (*lsmp)->lsm_stripe_size = lmm.lmm_stripe_size;
 
 
-        RETURN(rc);
+        RETURN(0);
 }
 
 /* Retrieve object striping information.
 }
 
 /* Retrieve object striping information.
index e0b3adb..7b7a00c 100644 (file)
 #include <linux/seq_file.h>
 
 #ifndef LPROCFS
 #include <linux/seq_file.h>
 
 #ifndef LPROCFS
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
 #else
 
 #else
 
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  obd_self_statfs);
-
-int rd_stripesize(char *page, char **start, off_t off, int count, int *eof,
-                  void *data)
+static int lov_rd_stripesize(char *page, char **start, off_t off, int count,
+                             int *eof, void *data)
 {
         struct obd_device *dev = (struct obd_device *)data;
         struct lov_desc *desc;
 {
         struct obd_device *dev = (struct obd_device *)data;
         struct lov_desc *desc;
@@ -53,8 +46,8 @@ int rd_stripesize(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_size);
 }
 
         return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_size);
 }
 
-int rd_stripeoffset(char *page, char **start, off_t off, int count, int *eof,
-                    void *data)
+static int lov_rd_stripeoffset(char *page, char **start, off_t off, int count,
+                               int *eof, void *data)
 {
         struct obd_device *dev = (struct obd_device *)data;
         struct lov_desc *desc;
 {
         struct obd_device *dev = (struct obd_device *)data;
         struct lov_desc *desc;
@@ -65,8 +58,8 @@ int rd_stripeoffset(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_offset);
 }
 
         return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_offset);
 }
 
-int rd_stripetype(char *page, char **start, off_t off, int count, int *eof,
-                  void *data)
+static int lov_rd_stripetype(char *page, char **start, off_t off, int count,
+                             int *eof, void *data)
 {
         struct obd_device* dev = (struct obd_device*)data;
         struct lov_desc *desc;
 {
         struct obd_device* dev = (struct obd_device*)data;
         struct lov_desc *desc;
@@ -77,8 +70,8 @@ int rd_stripetype(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, "%u\n", desc->ld_pattern);
 }
 
         return snprintf(page, count, "%u\n", desc->ld_pattern);
 }
 
-int rd_stripecount(char *page, char **start, off_t off, int count, int *eof,
-                   void *data)
+static int lov_rd_stripecount(char *page, char **start, off_t off, int count,
+                              int *eof, void *data)
 {
         struct obd_device *dev = (struct obd_device *)data;
         struct lov_desc *desc;
 {
         struct obd_device *dev = (struct obd_device *)data;
         struct lov_desc *desc;
@@ -89,8 +82,8 @@ int rd_stripecount(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, "%u\n", desc->ld_default_stripe_count);
 }
 
         return snprintf(page, count, "%u\n", desc->ld_default_stripe_count);
 }
 
-int rd_numobd(char *page, char **start, off_t off, int count, int *eof,
-              void *data)
+static int lov_rd_numobd(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
 {
         struct obd_device *dev = (struct obd_device*)data;
         struct lov_desc *desc;
 {
         struct obd_device *dev = (struct obd_device*)data;
         struct lov_desc *desc;
@@ -102,8 +95,8 @@ int rd_numobd(char *page, char **start, off_t off, int count, int *eof,
 
 }
 
 
 }
 
-int rd_activeobd(char *page, char **start, off_t off, int count, int *eof,
-                 void *data)
+static int lov_rd_activeobd(char *page, char **start, off_t off, int count,
+                            int *eof, void *data)
 {
         struct obd_device* dev = (struct obd_device*)data;
         struct lov_desc *desc;
 {
         struct obd_device* dev = (struct obd_device*)data;
         struct lov_desc *desc;
@@ -114,7 +107,8 @@ int rd_activeobd(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, "%u\n", desc->ld_active_tgt_count);
 }
 
         return snprintf(page, count, "%u\n", desc->ld_active_tgt_count);
 }
 
-int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int lov_rd_mdc(char *page, char **start, off_t off, int count, int *eof,
+                      void *data)
 {
         struct obd_device *dev = (struct obd_device*) data;
         struct lov_obd *lov;
 {
         struct obd_device *dev = (struct obd_device*) data;
         struct lov_obd *lov;
@@ -125,7 +119,7 @@ int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data)
         return snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid.uuid);
 }
 
         return snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid.uuid);
 }
 
-static void *ll_tgt_seq_start(struct seq_file *p, loff_t *pos)
+static void *lov_tgt_seq_start(struct seq_file *p, loff_t *pos)
 {
         struct obd_device *dev = p->private;
         struct lov_obd *lov = &dev->u.lov;
 {
         struct obd_device *dev = p->private;
         struct lov_obd *lov = &dev->u.lov;
@@ -133,12 +127,12 @@ static void *ll_tgt_seq_start(struct seq_file *p, loff_t *pos)
         return (*pos >= lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]);
 
 }
         return (*pos >= lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]);
 
 }
-static void ll_tgt_seq_stop(struct seq_file *p, void *v)
-{
 
 
+static void lov_tgt_seq_stop(struct seq_file *p, void *v)
+{
 }
 
 }
 
-static void *ll_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
+static void *lov_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
 {
         struct obd_device *dev = p->private;
         struct lov_obd *lov = &dev->u.lov;
 {
         struct obd_device *dev = p->private;
         struct lov_obd *lov = &dev->u.lov;
@@ -147,7 +141,7 @@ static void *ll_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
         return (*pos >=lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]);
 }
 
         return (*pos >=lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]);
 }
 
-static int ll_tgt_seq_show(struct seq_file *p, void *v)
+static int lov_tgt_seq_show(struct seq_file *p, void *v)
 {
         struct lov_tgt_desc *tgt = v;
         struct obd_device *dev = p->private;
 {
         struct lov_tgt_desc *tgt = v;
         struct obd_device *dev = p->private;
@@ -157,18 +151,18 @@ static int ll_tgt_seq_show(struct seq_file *p, void *v)
                           tgt->active ? "" : "IN");
 }
 
                           tgt->active ? "" : "IN");
 }
 
-struct seq_operations ll_tgt_sops = {
-        .start = ll_tgt_seq_start,
-        .stop = ll_tgt_seq_stop,
-        .next = ll_tgt_seq_next,
-        .show = ll_tgt_seq_show,
+struct seq_operations lov_tgt_sops = {
+        .start = lov_tgt_seq_start,
+        .stop = lov_tgt_seq_stop,
+        .next = lov_tgt_seq_next,
+        .show = lov_tgt_seq_show,
 };
 
 };
 
-static int ll_target_seq_open(struct inode *inode, struct file *file)
+static int lov_target_seq_open(struct inode *inode, struct file *file)
 {
         struct proc_dir_entry *dp = inode->u.generic_ip;
         struct seq_file *seq;
 {
         struct proc_dir_entry *dp = inode->u.generic_ip;
         struct seq_file *seq;
-        int rc = seq_open(file, &ll_tgt_sops);
+        int rc = seq_open(file, &lov_tgt_sops);
 
         if (rc)
                 return rc;
 
         if (rc)
                 return rc;
@@ -178,35 +172,36 @@ static int ll_target_seq_open(struct inode *inode, struct file *file)
 
         return 0;
 }
 
         return 0;
 }
+
 struct lprocfs_vars lprocfs_obd_vars[] = {
 struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",         lprocfs_rd_uuid, 0, 0 },
-        { "stripesize",   rd_stripesize,   0, 0 },
-        { "stripeoffset", rd_stripeoffset, 0, 0 },
-        { "stripecount",  rd_stripecount,  0, 0 },
-        { "stripetype",   rd_stripetype,   0, 0 },
-        { "numobd",       rd_numobd,       0, 0 },
-        { "activeobd",    rd_activeobd,    0, 0 },
-        { "filestotal",   rd_filestotal,   0, 0 },
-        { "filesfree",    rd_filesfree,    0, 0 },
-        { "filegroups",   rd_filegroups,   0, 0 },
-        { "blocksize",    rd_blksize,      0, 0 },
-        { "kbytestotal",  rd_kbytestotal,  0, 0 },
-        { "kbytesfree",   rd_kbytesfree,   0, 0 },
-        { "target_mdc",   rd_mdc,          0, 0 },
+        { "uuid",         lprocfs_rd_uuid,        0, 0 },
+        { "stripesize",   lov_rd_stripesize,      0, 0 },
+        { "stripeoffset", lov_rd_stripeoffset,    0, 0 },
+        { "stripecount",  lov_rd_stripecount,     0, 0 },
+        { "stripetype",   lov_rd_stripetype,      0, 0 },
+        { "numobd",       lov_rd_numobd,          0, 0 },
+        { "activeobd",    lov_rd_activeobd,       0, 0 },
+        { "filestotal",   lprocfs_rd_filestotal,  0, 0 },
+        { "filesfree",    lprocfs_rd_filesfree,   0, 0 },
+        //{ "filegroups",   lprocfs_rd_filegroups,  0, 0 },
+        { "blocksize",    lprocfs_rd_blksize,     0, 0 },
+        { "kbytestotal",  lprocfs_rd_kbytestotal, 0, 0 },
+        { "kbytesfree",   lprocfs_rd_kbytesfree,  0, 0 },
+        { "target_mdc",   lov_rd_mdc,             0, 0 },
         { 0 }
 };
 
         { 0 }
 };
 
-struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs",     lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
         { 0 }
 };
 
         { 0 }
 };
 
-struct file_operations ll_proc_target_fops = {
-        .open = ll_target_seq_open,
+struct file_operations lov_proc_target_fops = {
+        .open = lov_target_seq_open,
         .read = seq_read,
         .llseek = seq_lseek,
         .release = seq_release,
 };
 
 #endif /* LPROCFS */
         .read = seq_read,
         .llseek = seq_lseek,
         .release = seq_release,
 };
 
 #endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(lov, lprocfs_module_vars, lprocfs_obd_vars)
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
 Makefile.in
 .deps
 TAGS
+.*.cmd
index 3f81507..6dca228 100644 (file)
 #define DEBUG_SUBSYSTEM S_CLASS
 
 #include <linux/version.h>
 #define DEBUG_SUBSYSTEM S_CLASS
 
 #include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
+#include <linux/vfs.h>
 #include <linux/obd_class.h>
 #include <linux/lprocfs_status.h>
 
 #ifndef LPROCFS
 #include <linux/obd_class.h>
 #include <linux/lprocfs_status.h>
 
 #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
 #else
 #else
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  obd_self_statfs);
-
-struct lprocfs_vars lprocfs_obd_vars[] = {
+static struct lprocfs_vars lprocfs_obd_vars[] = {
         { "uuid",            lprocfs_rd_uuid,        0, 0 },
         { "uuid",            lprocfs_rd_uuid,        0, 0 },
-        { "blocksize",       rd_blksize,             0, 0 },
-        { "kbytestotal",     rd_kbytestotal,         0, 0 },
-        { "kbytesfree",      rd_kbytesfree,          0, 0 },
-        { "filestotal",      rd_filestotal,          0, 0 },
-        { "filesfree",       rd_filesfree,           0, 0 },
-        { "filegroups",      rd_filegroups,          0, 0 },
+        { "blocksize",       lprocfs_rd_blksize,     0, 0 },
+        { "kbytestotal",     lprocfs_rd_kbytestotal, 0, 0 },
+        { "kbytesfree",      lprocfs_rd_kbytesfree,  0, 0 },
+        { "filestotal",      lprocfs_rd_filestotal,  0, 0 },
+        { "filesfree",       lprocfs_rd_filesfree,   0, 0 },
+        //{ "filegroups",      lprocfs_rd_filegroups,  0, 0 },
         { "mds_server_uuid", lprocfs_rd_server_uuid, 0, 0 },
         { "mds_conn_uuid",   lprocfs_rd_conn_uuid,   0, 0 },
         { 0 }
 };
 
         { "mds_server_uuid", lprocfs_rd_server_uuid, 0, 0 },
         { "mds_conn_uuid",   lprocfs_rd_conn_uuid,   0, 0 },
         { 0 }
 };
 
-struct lprocfs_vars lprocfs_module_vars[] = {
+static struct lprocfs_vars lprocfs_module_vars[] = {
         { "num_refs",        lprocfs_rd_numrefs,     0, 0 },
         { 0 }
 };
 
 #endif /* LPROCFS */
 
         { "num_refs",        lprocfs_rd_numrefs,     0, 0 },
         { 0 }
 };
 
 #endif /* LPROCFS */
 
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(mdc, lprocfs_module_vars, lprocfs_obd_vars)
index e39a0aa..49d85ab 100644 (file)
@@ -1,24 +1,25 @@
-void mds_pack_req_body(struct ptlrpc_request *);
-void mds_pack_rep_body(struct ptlrpc_request *);
-void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
+void mdc_pack_req_body(struct ptlrpc_request *);
+void mdc_pack_rep_body(struct ptlrpc_request *);
+void mdc_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
                       obd_id ino, int type);
                       obd_id ino, int type);
-void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
+void mdc_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
                       int flags, struct mdc_op_data *data);
                       int flags, struct mdc_op_data *data);
-void mds_setattr_pack(struct ptlrpc_request *req,
+void mdc_setattr_pack(struct ptlrpc_request *req,
                       struct mdc_op_data *data,
                       struct mdc_op_data *data,
-                      struct iattr *iattr, void *ea, int ealen);
-void mds_create_pack(struct ptlrpc_request *req, int offset,
+                      struct iattr *iattr, void *ea, int ealen,
+                     void *ea2, int ea2len);
+void mdc_create_pack(struct ptlrpc_request *req, int offset,
                      struct mdc_op_data *op_data,
                      __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                      const void *data, int datalen);
                      struct mdc_op_data *op_data,
                      __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                      const void *data, int datalen);
-void mds_open_pack(struct ptlrpc_request *req, int offset,
+void mdc_open_pack(struct ptlrpc_request *req, int offset,
                    struct mdc_op_data *op_data,
                    __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                    __u32 flags, const void *data, int datalen);
                    struct mdc_op_data *op_data,
                    __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                    __u32 flags, const void *data, int datalen);
-void mds_unlink_pack(struct ptlrpc_request *req, int offset,
+void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
                      struct mdc_op_data *data);
                      struct mdc_op_data *data);
-void mds_link_pack(struct ptlrpc_request *req, int offset,
+void mdc_link_pack(struct ptlrpc_request *req, int offset,
                    struct mdc_op_data *data);
                    struct mdc_op_data *data);
-void mds_rename_pack(struct ptlrpc_request *req, int offset,
+void mdc_rename_pack(struct ptlrpc_request *req, int offset,
                      struct mdc_op_data *data,
                      const char *old, int oldlen, const char *new, int newlen);
                      struct mdc_op_data *data,
                      const char *old, int oldlen, const char *new, int newlen);
index 806a830..a17f7a1 100644 (file)
@@ -28,7 +28,7 @@
 #include <linux/lustre_mds.h>
 #include <linux/lustre_lite.h>
 
 #include <linux/lustre_mds.h>
 #include <linux/lustre_lite.h>
 
-void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
+void mdc_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
                       obd_id ino, int type, __u64 xid)
 {
         struct mds_body *b;
                       obd_id ino, int type, __u64 xid)
 {
         struct mds_body *b;
@@ -45,7 +45,7 @@ void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
         b->nlink = size;                        /* !! */
 }
 
         b->nlink = size;                        /* !! */
 }
 
-static void mds_pack_body(struct mds_body *b)
+static void mdc_pack_body(struct mds_body *b)
 {
         LASSERT (b != NULL);
 
 {
         LASSERT (b != NULL);
 
@@ -54,14 +54,14 @@ static void mds_pack_body(struct mds_body *b)
         b->capability = current->cap_effective;
 }
 
         b->capability = current->cap_effective;
 }
 
-void mds_pack_req_body(struct ptlrpc_request *req)
+void mdc_pack_req_body(struct ptlrpc_request *req)
 {
         struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*b));
 {
         struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*b));
-        mds_pack_body(b);
+        mdc_pack_body(b);
 }
 
 /* packing of MDS records */
 }
 
 /* packing of MDS records */
-void mds_create_pack(struct ptlrpc_request *req, int offset,
+void mdc_create_pack(struct ptlrpc_request *req, int offset,
                      struct mdc_op_data *op_data,
                      __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                      const void *data, int datalen)
                      struct mdc_op_data *op_data,
                      __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                      const void *data, int datalen)
@@ -94,8 +94,9 @@ void mds_create_pack(struct ptlrpc_request *req, int offset,
                 memcpy (tmp, data, datalen);
         }
 }
                 memcpy (tmp, data, datalen);
         }
 }
+
 /* packing of MDS records */
 /* packing of MDS records */
-void mds_open_pack(struct ptlrpc_request *req, int offset,
+void mdc_open_pack(struct ptlrpc_request *req, int offset,
                    struct mdc_op_data *op_data,
                    __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                    __u32 flags, const void *data, int datalen)
                    struct mdc_op_data *op_data,
                    __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                    __u32 flags, const void *data, int datalen)
@@ -109,8 +110,9 @@ void mds_open_pack(struct ptlrpc_request *req, int offset,
         rec->cr_fsuid = current->fsuid;
         rec->cr_fsgid = current->fsgid;
         rec->cr_cap = current->cap_effective;
         rec->cr_fsuid = current->fsuid;
         rec->cr_fsgid = current->fsgid;
         rec->cr_cap = current->cap_effective;
-        ll_ino2fid(&rec->cr_fid, op_data->ino1,
-                   op_data->gen1, op_data->typ1);
+        if (op_data != NULL)
+                ll_ino2fid(&rec->cr_fid, op_data->ino1,
+                           op_data->gen1, op_data->typ1);
         memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
         rec->cr_mode = mode;
         rec->cr_flags = flags;
         memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
         rec->cr_mode = mode;
         rec->cr_flags = flags;
@@ -123,17 +125,22 @@ void mds_open_pack(struct ptlrpc_request *req, int offset,
         else
                 rec->cr_suppgid = -1;
 
         else
                 rec->cr_suppgid = -1;
 
-        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1);
-        LOGL0(op_data->name, op_data->namelen, tmp);
+        if (op_data->name) {
+                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1,
+                                     op_data->namelen + 1);
+                LOGL0(op_data->name, op_data->namelen, tmp);
+        }
 
         if (data) {
                 tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen);
                 memcpy (tmp, data, datalen);
         }
 }
 
         if (data) {
                 tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen);
                 memcpy (tmp, data, datalen);
         }
 }
-void mds_setattr_pack(struct ptlrpc_request *req,
+
+void mdc_setattr_pack(struct ptlrpc_request *req,
                       struct mdc_op_data *data,
                       struct mdc_op_data *data,
-                      struct iattr *iattr, void *ea, int ealen)
+                      struct iattr *iattr, void *ea, int ealen,
+                      void *ea2, int ea2len)
 {
         struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, 0,
                                                      sizeof (*rec));
 {
         struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, 0,
                                                      sizeof (*rec));
@@ -163,11 +170,18 @@ void mds_setattr_pack(struct ptlrpc_request *req,
                         rec->sa_suppgid = -1;
         }
 
                         rec->sa_suppgid = -1;
         }
 
-        if (ealen != 0)
-                memcpy(lustre_msg_buf(req->rq_reqmsg, 1, ealen), ea, ealen);
+        if (ealen == 0)
+                return;
+
+        memcpy(lustre_msg_buf(req->rq_reqmsg, 1, ealen), ea, ealen);
+
+        if (ea2len == 0)
+                return;
+
+        memcpy(lustre_msg_buf(req->rq_reqmsg, 2, ea2len), ea2, ea2len);
 }
 
 }
 
-void mds_unlink_pack(struct ptlrpc_request *req, int offset,
+void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
                      struct mdc_op_data *data)
 {
         struct mds_rec_unlink *rec;
                      struct mdc_op_data *data)
 {
         struct mds_rec_unlink *rec;
@@ -194,7 +208,7 @@ void mds_unlink_pack(struct ptlrpc_request *req, int offset,
         LOGL0(data->name, data->namelen, tmp);
 }
 
         LOGL0(data->name, data->namelen, tmp);
 }
 
-void mds_link_pack(struct ptlrpc_request *req, int offset,
+void mdc_link_pack(struct ptlrpc_request *req, int offset,
                    struct mdc_op_data *data)
 {
         struct mds_rec_link *rec;
                    struct mdc_op_data *data)
 {
         struct mds_rec_link *rec;
@@ -221,7 +235,7 @@ void mds_link_pack(struct ptlrpc_request *req, int offset,
         LOGL0(data->name, data->namelen, tmp);
 }
 
         LOGL0(data->name, data->namelen, tmp);
 }
 
-void mds_rename_pack(struct ptlrpc_request *req, int offset,
+void mdc_rename_pack(struct ptlrpc_request *req, int offset,
                      struct mdc_op_data *data,
                      const char *old, int oldlen, const char *new, int newlen)
 {
                      struct mdc_op_data *data,
                      const char *old, int oldlen, const char *new, int newlen)
 {
@@ -255,7 +269,7 @@ void mds_rename_pack(struct ptlrpc_request *req, int offset,
         }
 }
 
         }
 }
 
-void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
+void mdc_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
                       int flags, struct mdc_op_data *data)
 {
         struct mds_body *b;
                       int flags, struct mdc_op_data *data)
 {
         struct mds_body *b;
index 2da2fdb..4f7443e 100644 (file)
 #include "mdc_internal.h"
 
 /* mdc_setattr does its own semaphore handling */
 #include "mdc_internal.h"
 
 /* mdc_setattr does its own semaphore handling */
-static int mdc_reint(struct ptlrpc_request *request, int level)
+static int mdc_reint(struct ptlrpc_request *request,
+                     struct mdc_rpc_lock *rpc_lock, int level)
 {
         int rc;
 {
         int rc;
-        __u32 *opcodeptr;
+        
 
 
-        opcodeptr = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*opcodeptr));
         request->rq_level = level;
 
         request->rq_level = level;
 
-        if (!(*opcodeptr == REINT_SETATTR))
-                mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+        mdc_get_rpc_lock(rpc_lock, NULL);
         rc = ptlrpc_queue_wait(request);
         rc = ptlrpc_queue_wait(request);
-        if (!(*opcodeptr == REINT_SETATTR))
-                mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
-
+        mdc_put_rpc_lock(rpc_lock, NULL);
         if (rc)
                 CDEBUG(D_INFO, "error in handling %d\n", rc);
         if (rc)
                 CDEBUG(D_INFO, "error in handling %d\n", rc);
+        else if (!lustre_swab_repbuf(request, 0, sizeof(struct mds_body),
+                                     lustre_swab_mds_body)) {
+                CERROR ("Can't unpack mds_body\n");
+                rc = -EPROTO;
+        }
         return rc;
 }
 
         return rc;
 }
 
@@ -60,42 +62,45 @@ static int mdc_reint(struct ptlrpc_request *request, int level)
  * If it is called with iattr->ia_valid & ATTR_FROM_OPEN, then it is a
  * magic open-path setattr that should take the setattr semaphore and
  * go to the setattr portal. */
  * If it is called with iattr->ia_valid & ATTR_FROM_OPEN, then it is a
  * magic open-path setattr that should take the setattr semaphore and
  * go to the setattr portal. */
-int mdc_setattr(struct lustre_handle *conn,
-                struct mdc_op_data *data,
-                struct iattr *iattr, void *ea, int ealen,
+int mdc_setattr(struct lustre_handle *conn, struct mdc_op_data *data,
+                struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
                 struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
         struct mds_rec_setattr *rec;
         struct mdc_rpc_lock *rpc_lock;
                 struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
         struct mds_rec_setattr *rec;
         struct mdc_rpc_lock *rpc_lock;
-        int rc, bufcount = 1, size[2] = {sizeof(*rec), ealen};
+        int rc, bufcount = 1, size[3] = {sizeof(*rec), ealen, ea2len};
         ENTRY;
 
         LASSERT(iattr != NULL);
 
         ENTRY;
 
         LASSERT(iattr != NULL);
 
-        if (ealen > 0)
+        if (ealen > 0) {
                 bufcount = 2;
                 bufcount = 2;
+                if (ea2len > 0)
+                        bufcount = 3;
+        }
 
         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount,
                               size, NULL);
 
         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount,
                               size, NULL);
-        if (!req)
+        if (req == NULL)
                 RETURN(-ENOMEM);
 
         if (iattr->ia_valid & ATTR_FROM_OPEN) {
                 req->rq_request_portal = MDS_SETATTR_PORTAL; //XXX FIXME bug 249
                 rpc_lock = &mdc_setattr_lock;
                 RETURN(-ENOMEM);
 
         if (iattr->ia_valid & ATTR_FROM_OPEN) {
                 req->rq_request_portal = MDS_SETATTR_PORTAL; //XXX FIXME bug 249
                 rpc_lock = &mdc_setattr_lock;
-        } else
+        } else {
                 rpc_lock = &mdc_rpc_lock;
                 rpc_lock = &mdc_rpc_lock;
+        }
 
 
-        mds_setattr_pack(req, data, iattr, ea, ealen);
+        if (iattr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
+                CDEBUG(D_INODE, "setting mtime %lu, ctime %lu\n",
+                       iattr->ia_mtime, iattr->ia_ctime);
+        mdc_setattr_pack(req, data, iattr, ea, ealen, ea2, ea2len);
 
         size[0] = sizeof(struct mds_body);
         req->rq_replen = lustre_msg_size(1, size);
 
 
         size[0] = sizeof(struct mds_body);
         req->rq_replen = lustre_msg_size(1, size);
 
-        mdc_get_rpc_lock(rpc_lock, NULL);
-        rc = mdc_reint(req, LUSTRE_CONN_FULL);
-        mdc_put_rpc_lock(rpc_lock, NULL);
-
+        rc = mdc_reint(req, rpc_lock, LUSTRE_CONN_FULL);
         *request = req;
         if (rc == -ERESTARTSYS)
                 rc = 0;
         *request = req;
         if (rc == -ERESTARTSYS)
                 rc = 0;
@@ -103,17 +108,14 @@ int mdc_setattr(struct lustre_handle *conn,
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-int mdc_create(struct lustre_handle *conn,
-               struct mdc_op_data *op_data,
-               const void *data, int datalen,
-               int mode, __u32 uid, __u32 gid, __u64 time, __u64 rdev,
-               struct ptlrpc_request **request)
+int mdc_create(struct lustre_handle *conn, struct mdc_op_data *op_data,
+               const void *data, int datalen, int mode, __u32 uid, __u32 gid,
+               __u64 time, __u64 rdev, struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
 {
         struct ptlrpc_request *req;
-        int rc, size[3] = {sizeof(struct mds_rec_create),
-                           op_data->namelen + 1, 0};
+        int rc, size[3] = {sizeof(struct mds_rec_create), op_data->namelen + 1};
         int level, bufcount = 2;
         int level, bufcount = 2;
-//        ENTRY;
+        ENTRY;
 
         if (data && datalen) {
                 size[bufcount] = datalen;
 
         if (data && datalen) {
                 size[bufcount] = datalen;
@@ -122,14 +124,12 @@ int mdc_create(struct lustre_handle *conn,
 
         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount,
                               size, NULL);
 
         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount,
                               size, NULL);
-        if (!req)
-                return -ENOMEM;
-//                RETURN(-ENOMEM);
+        if (req == NULL)
+                RETURN(-ENOMEM);
 
 
-        /* mds_create_pack fills msg->bufs[1] with name
+        /* mdc_create_pack fills msg->bufs[1] with name
          * and msg->bufs[2] with tgt, for symlinks or lov MD data */
          * and msg->bufs[2] with tgt, for symlinks or lov MD data */
-        mds_create_pack(req, 0, op_data,
-                        mode, rdev, uid, gid, time,
+        mdc_create_pack(req, 0, op_data, mode, rdev, uid, gid, time,
                         data, datalen);
 
         size[0] = sizeof(struct mds_body);
                         data, datalen);
 
         size[0] = sizeof(struct mds_body);
@@ -137,7 +137,7 @@ int mdc_create(struct lustre_handle *conn,
 
         level = LUSTRE_CONN_FULL;
  resend:
 
         level = LUSTRE_CONN_FULL;
  resend:
-        rc = mdc_reint(req, level);
+        rc = mdc_reint(req, &mdc_rpc_lock, level);
         /* Resend if we were told to. */
         if (rc == -ERESTARTSYS) {
                 level = LUSTRE_CONN_RECOVER;
         /* Resend if we were told to. */
         if (rc == -ERESTARTSYS) {
                 level = LUSTRE_CONN_RECOVER;
@@ -148,12 +148,10 @@ int mdc_create(struct lustre_handle *conn,
                 mdc_store_inode_generation(req, 0, 0);
 
         *request = req;
                 mdc_store_inode_generation(req, 0, 0);
 
         *request = req;
-        return rc;
-//        RETURN(rc);
+        RETURN(rc);
 }
 
 }
 
-int mdc_unlink(struct lustre_handle *conn,
-               struct mdc_op_data *data,
+int mdc_unlink(struct lustre_handle *conn, struct mdc_op_data *data,
                struct ptlrpc_request **request)
 {
         struct obd_device *obddev = class_conn2obd(conn);
                struct ptlrpc_request **request)
 {
         struct obd_device *obddev = class_conn2obd(conn);
@@ -162,27 +160,26 @@ int mdc_unlink(struct lustre_handle *conn,
         ENTRY;
 
         LASSERT(req == NULL);
         ENTRY;
 
         LASSERT(req == NULL);
-
         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size,
                               NULL);
         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size,
                               NULL);
-        if (!req)
+        if (req == NULL)
                 RETURN(-ENOMEM);
         *request = req;
 
         size[0] = sizeof(struct mds_body);
         size[1] = obddev->u.cli.cl_max_mds_easize;
                 RETURN(-ENOMEM);
         *request = req;
 
         size[0] = sizeof(struct mds_body);
         size[1] = obddev->u.cli.cl_max_mds_easize;
-        req->rq_replen = lustre_msg_size(2, size);
+        size[2] = obddev->u.cli.cl_max_mds_cookiesize;
+        req->rq_replen = lustre_msg_size(3, size);
 
 
-        mds_unlink_pack(req, 0, data);
+        mdc_unlink_pack(req, 0, data);
 
 
-        rc = mdc_reint(req, LUSTRE_CONN_FULL);
+        rc = mdc_reint(req, &mdc_rpc_lock, LUSTRE_CONN_FULL);
         if (rc == -ERESTARTSYS)
                 rc = 0;
         RETURN(rc);
 }
 
         if (rc == -ERESTARTSYS)
                 rc = 0;
         RETURN(rc);
 }
 
-int mdc_link(struct lustre_handle *conn,
-             struct mdc_op_data *data,
+int mdc_link(struct lustre_handle *conn, struct mdc_op_data *data,
              struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
              struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
@@ -191,15 +188,15 @@ int mdc_link(struct lustre_handle *conn,
 
         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size,
                               NULL);
 
         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size,
                               NULL);
-        if (!req)
+        if (req == NULL)
                 RETURN(-ENOMEM);
 
                 RETURN(-ENOMEM);
 
-        mds_link_pack(req, 0, data);
+        mdc_link_pack(req, 0, data);
 
         size[0] = sizeof(struct mds_body);
         req->rq_replen = lustre_msg_size(1, size);
 
 
         size[0] = sizeof(struct mds_body);
         req->rq_replen = lustre_msg_size(1, size);
 
-        rc = mdc_reint(req, LUSTRE_CONN_FULL);
+        rc = mdc_reint(req, &mdc_rpc_lock, LUSTRE_CONN_FULL);
         *request = req;
         if (rc == -ERESTARTSYS)
                 rc = 0;
         *request = req;
         if (rc == -ERESTARTSYS)
                 rc = 0;
@@ -207,10 +204,8 @@ int mdc_link(struct lustre_handle *conn,
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-int mdc_rename(struct lustre_handle *conn,
-               struct mdc_op_data *data,
-               const char *old, int oldlen,
-               const char *new, int newlen,
+int mdc_rename(struct lustre_handle *conn, struct mdc_op_data *data,
+               const char *old, int oldlen, const char *new, int newlen,
                struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
                struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
@@ -220,15 +215,15 @@ int mdc_rename(struct lustre_handle *conn,
 
         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 3, size,
                               NULL);
 
         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 3, size,
                               NULL);
-        if (!req)
+        if (req == NULL)
                 RETURN(-ENOMEM);
 
                 RETURN(-ENOMEM);
 
-        mds_rename_pack(req, 0, data, old, oldlen, new, newlen);
+        mdc_rename_pack(req, 0, data, old, oldlen, new, newlen);
 
         size[0] = sizeof(struct mds_body);
         req->rq_replen = lustre_msg_size(1, size);
 
 
         size[0] = sizeof(struct mds_body);
         req->rq_replen = lustre_msg_size(1, size);
 
-        rc = mdc_reint(req, LUSTRE_CONN_FULL);
+        rc = mdc_reint(req, &mdc_rpc_lock, LUSTRE_CONN_FULL);
         *request = req;
         if (rc == -ERESTARTSYS)
                 rc = 0;
         *request = req;
         if (rc == -ERESTARTSYS)
                 rc = 0;
index 204a836..b205d21 100644 (file)
@@ -46,6 +46,7 @@ struct mdc_rpc_lock mdc_setattr_lock;
 EXPORT_SYMBOL(mdc_rpc_lock);
 
 /* Helper that implements most of mdc_getstatus and signal_completed_replay. */
 EXPORT_SYMBOL(mdc_rpc_lock);
 
 /* Helper that implements most of mdc_getstatus and signal_completed_replay. */
+/* XXX this should become mdc_get_info("key"), sending MDS_GET_INFO RPC */
 static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
                           int level, int msg_flags)
 {
 static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
                           int level, int msg_flags)
 {
@@ -62,7 +63,7 @@ static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
         req->rq_level = level;
         req->rq_replen = lustre_msg_size(1, &size);
 
         req->rq_level = level;
         req->rq_replen = lustre_msg_size(1, &size);
 
-        mds_pack_req_body(req);
+        mdc_pack_req_body(req);
         req->rq_reqmsg->flags |= msg_flags;
         rc = ptlrpc_queue_wait(req);
 
         req->rq_reqmsg->flags |= msg_flags;
         rc = ptlrpc_queue_wait(req);
 
@@ -88,13 +89,14 @@ static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
         return rc;
 }
 
         return rc;
 }
 
-/* should become mdc_getinfo() */
+/* This should be mdc_get_info("rootfid") */
 int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid)
 {
         return send_getstatus(class_conn2cliimp(conn), rootfid, LUSTRE_CONN_CON,
                               0);
 }
 
 int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid)
 {
         return send_getstatus(class_conn2cliimp(conn), rootfid, LUSTRE_CONN_CON,
                               0);
 }
 
+/* should call mdc_get_info("lovdesc") and mdc_get_info("lovtgts") */
 int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
                    struct ptlrpc_request **request)
 {
 int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
                    struct ptlrpc_request **request)
 {
@@ -233,7 +235,7 @@ int mdc_getattr(struct lustre_handle *conn, struct ll_fid *fid,
         memcpy(&body->fid1, fid, sizeof(*fid));
         body->valid = valid;
         body->eadatasize = ea_size;
         memcpy(&body->fid1, fid, sizeof(*fid));
         body->valid = valid;
         body->eadatasize = ea_size;
-        mds_pack_req_body(req);
+        mdc_pack_req_body(req);
 
         rc = mdc_getattr_common (conn, ea_size, req);
         if (rc != 0) {
 
         rc = mdc_getattr_common (conn, ea_size, req);
         if (rc != 0) {
@@ -263,7 +265,7 @@ int mdc_getattr_name(struct lustre_handle *conn, struct ll_fid *fid,
         memcpy(&body->fid1, fid, sizeof(*fid));
         body->valid = valid;
         body->eadatasize = ea_size;
         memcpy(&body->fid1, fid, sizeof(*fid));
         body->valid = valid;
         body->eadatasize = ea_size;
-        mds_pack_req_body(req);
+        mdc_pack_req_body(req);
 
         LASSERT (strnlen (filename, namelen) == namelen - 1);
         memcpy(lustre_msg_buf(req->rq_reqmsg, 1, namelen), filename, namelen);
 
         LASSERT (strnlen (filename, namelen) == namelen - 1);
         memcpy(lustre_msg_buf(req->rq_reqmsg, 1, namelen), filename, namelen);
@@ -283,9 +285,9 @@ void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
                                 int repoff)
 {
         struct mds_rec_create *rec =
                                 int repoff)
 {
         struct mds_rec_create *rec =
-                lustre_msg_buf(req->rq_reqmsg, reqoff, sizeof (*rec));
+                lustre_msg_buf(req->rq_reqmsg, reqoff, sizeof(*rec));
         struct mds_body *body =
         struct mds_body *body =
-                lustre_msg_buf(req->rq_repmsg, repoff, sizeof (*body));
+                lustre_msg_buf(req->rq_repmsg, repoff, sizeof(*body));
 
         LASSERT (rec != NULL);
         LASSERT (body != NULL);
 
         LASSERT (rec != NULL);
         LASSERT (body != NULL);
@@ -295,11 +297,49 @@ void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
                   rec->cr_replayfid.generation, rec->cr_replayfid.id);
 }
 
                   rec->cr_replayfid.generation, rec->cr_replayfid.id);
 }
 
+int mdc_req2lustre_md(struct ptlrpc_request *req, int offset,
+                      struct lustre_handle *obd_import,
+                      struct lustre_md *md)
+{
+        int rc;
+        ENTRY;
+
+        LASSERT(md);
+        memset(md, 0, sizeof(*md));
+
+        md->body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*md->body));
+        LASSERT (md->body != NULL);
+        LASSERT_REPSWABBED (req, offset);
+
+        if (md->body->valid & OBD_MD_FLEASIZE) {
+                int lmmsize;
+                struct lov_mds_md *lmm;
+
+                LASSERT(S_ISREG(md->body->mode));
+
+                if (md->body->eadatasize == 0) {
+                        CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n");
+                        RETURN(-EPROTO);
+                }
+                lmmsize = md->body->eadatasize;
+                lmm = lustre_msg_buf(req->rq_repmsg, offset + 1, lmmsize);
+                LASSERT (lmm != NULL);
+                LASSERT_REPSWABBED (req, offset + 1);
+
+                rc = obd_unpackmd(obd_import, &md->lsm, lmm, lmmsize);
+                if (rc < 0) {
+                        /* XXX don't know if I should do this... */
+                        CERROR ("Error %d unpacking eadata\n", rc);
+                        LBUG();
+                }
+                LASSERT (rc >= sizeof (*md->lsm));
+        }
+        RETURN(0);
+}
+
+
 /* We always reserve enough space in the reply packet for a stripe MD, because
 /* We always reserve enough space in the reply packet for a stripe MD, because
- * we don't know in advance the file type.
- *
- * XXX we could get that from ext2_dir_entry_2 file_type
- */
+ * we don't know in advance the file type. */
 int mdc_enqueue(struct lustre_handle *conn,
                 int lock_type,
                 struct lookup_intent *it,
 int mdc_enqueue(struct lustre_handle *conn,
                 int lock_type,
                 struct lookup_intent *it,
@@ -318,9 +358,10 @@ int mdc_enqueue(struct lustre_handle *conn,
                 { .name = {data->ino1, data->gen1} };
         int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
         int rc, flags = LDLM_FL_HAS_INTENT;
                 { .name = {data->ino1, data->gen1} };
         int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
         int rc, flags = LDLM_FL_HAS_INTENT;
-        int repsize[3] = {sizeof(struct ldlm_reply),
+        int repsize[4] = {sizeof(struct ldlm_reply),
                           sizeof(struct mds_body),
                           sizeof(struct mds_body),
-                          obddev->u.cli.cl_max_mds_easize};
+                          obddev->u.cli.cl_max_mds_easize,
+                          obddev->u.cli.cl_max_mds_cookiesize};
         struct ldlm_reply *dlm_rep;
         struct ldlm_intent *lit;
         struct ldlm_request *lockreq;
         struct ldlm_reply *dlm_rep;
         struct ldlm_intent *lit;
         struct ldlm_request *lockreq;
@@ -352,7 +393,7 @@ int mdc_enqueue(struct lustre_handle *conn,
                 lit->opc = (__u64)it->it_op;
 
                 /* pack the intended request */
                 lit->opc = (__u64)it->it_op;
 
                 /* pack the intended request */
-                mds_open_pack(req, 2, data, it->it_mode, 0, current->fsuid,
+                mdc_open_pack(req, 2, data, it->it_mode, 0, current->fsuid,
                               current->fsgid, LTIME_S(CURRENT_TIME),
                               it->it_flags, tgt, tgtlen);
                 /* get ready for the reply */
                               current->fsgid, LTIME_S(CURRENT_TIME),
                               it->it_flags, tgt, tgtlen);
                 /* get ready for the reply */
@@ -371,10 +412,10 @@ int mdc_enqueue(struct lustre_handle *conn,
                 lit->opc = (__u64)it->it_op;
 
                 /* pack the intended request */
                 lit->opc = (__u64)it->it_op;
 
                 /* pack the intended request */
-                mds_unlink_pack(req, 2, data);
+                mdc_unlink_pack(req, 2, data);
                 /* get ready for the reply */
                 /* get ready for the reply */
-                reply_buffers = 3;
-                req->rq_replen = lustre_msg_size(3, repsize);
+                reply_buffers = 4;
+                req->rq_replen = lustre_msg_size(4, repsize);
         } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
                 int valid = OBD_MD_FLNOTOBD | OBD_MD_FLEASIZE;
                 size[2] = sizeof(struct mds_body);
         } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
                 int valid = OBD_MD_FLNOTOBD | OBD_MD_FLEASIZE;
                 size[2] = sizeof(struct mds_body);
@@ -390,7 +431,7 @@ int mdc_enqueue(struct lustre_handle *conn,
                 lit->opc = (__u64)it->it_op;
 
                 /* pack the intended request */
                 lit->opc = (__u64)it->it_op;
 
                 /* pack the intended request */
-                mds_getattr_pack(req, valid, 2, it->it_flags, data);
+                mdc_getattr_pack(req, valid, 2, it->it_flags, data);
                 /* get ready for the reply */
                 reply_buffers = 3;
                 req->rq_replen = lustre_msg_size(3, repsize);
                 /* get ready for the reply */
                 reply_buffers = 3;
                 req->rq_replen = lustre_msg_size(3, repsize);
@@ -447,8 +488,8 @@ int mdc_enqueue(struct lustre_handle *conn,
         }
 
         dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep));
         }
 
         dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep));
-        LASSERT (dlm_rep != NULL);           /* checked by ldlm_cli_enqueue() */
-        LASSERT_REPSWABBED (req, 0);         /* swabbed by ldlm_cli_enqueue() */
+        LASSERT(dlm_rep != NULL);           /* checked by ldlm_cli_enqueue() */
+        LASSERT_REPSWABBED(req, 0);         /* swabbed by ldlm_cli_enqueue() */
 
         it->it_disposition = (int) dlm_rep->lock_policy_res1;
         it->it_status = (int) dlm_rep->lock_policy_res2;
 
         it->it_disposition = (int) dlm_rep->lock_policy_res1;
         it->it_status = (int) dlm_rep->lock_policy_res2;
@@ -456,8 +497,8 @@ int mdc_enqueue(struct lustre_handle *conn,
         it->it_data = req;
 
         /* We know what to expect, so we do any byte flipping required here */
         it->it_data = req;
 
         /* We know what to expect, so we do any byte flipping required here */
-        LASSERT (reply_buffers == 3 || reply_buffers == 1);
-        if (reply_buffers == 3) {
+        LASSERT(reply_buffers == 4 || reply_buffers == 3 || reply_buffers == 1);
+        if (reply_buffers >= 3) {
                 struct mds_body *body;
 
                 body = lustre_swab_repbuf (req, 1, sizeof (*body),
                 struct mds_body *body;
 
                 body = lustre_swab_repbuf (req, 1, sizeof (*body),
@@ -471,8 +512,8 @@ int mdc_enqueue(struct lustre_handle *conn,
                         /* The eadata is opaque; just check that it is
                          * there.  Eventually, obd_unpackmd() will check
                          * the contents */
                         /* The eadata is opaque; just check that it is
                          * there.  Eventually, obd_unpackmd() will check
                          * the contents */
-                        eadata = lustre_swab_repbuf (req, 2, body->eadatasize,
-                                                     NULL);
+                        eadata = lustre_swab_repbuf(req, 2, body->eadatasize,
+                                                    NULL);
                         if (eadata == NULL) {
                                 CERROR ("Missing/short eadata\n");
                                 RETURN (-EPROTO);
                         if (eadata == NULL) {
                                 CERROR ("Missing/short eadata\n");
                                 RETURN (-EPROTO);
@@ -490,8 +531,7 @@ static void mdc_replay_open(struct ptlrpc_request *req)
         struct list_head *tmp;
         struct mds_body *body;
 
         struct list_head *tmp;
         struct mds_body *body;
 
-        body = lustre_swab_repbuf (req, 1, sizeof (*body),
-                                   lustre_swab_mds_body);
+        body = lustre_swab_repbuf(req, 1, sizeof(*body), lustre_swab_mds_body);
         LASSERT (body != NULL);
 
         memcpy(&old, file_fh, sizeof(old));
         LASSERT (body != NULL);
 
         memcpy(&old, file_fh, sizeof(old));
@@ -517,15 +557,15 @@ void mdc_set_open_replay_data(struct obd_client_handle *och)
 {
         struct ptlrpc_request *req = och->och_req;
         struct mds_rec_create *rec =
 {
         struct ptlrpc_request *req = och->och_req;
         struct mds_rec_create *rec =
-                lustre_msg_buf(req->rq_reqmsg, 2, sizeof (*rec));
+                lustre_msg_buf(req->rq_reqmsg, 2, sizeof(*rec));
         struct mds_body *body =
         struct mds_body *body =
-                lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
+                lustre_msg_buf(req->rq_repmsg, 1, sizeof(*body));
 
 
-        LASSERT (rec != NULL);
+        LASSERT(rec != NULL);
         /* outgoing messages always in my byte order */
         /* outgoing messages always in my byte order */
-        LASSERT (body != NULL);
+        LASSERT(body != NULL);
         /* incoming message in my byte order (it's been swabbed) */
         /* incoming message in my byte order (it's been swabbed) */
-        LASSERT_REPSWABBED (req, 1);
+        LASSERT_REPSWABBED(req, 1);
 
         memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
         req->rq_replay_cb = mdc_replay_open;
 
         memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
         req->rq_replay_cb = mdc_replay_open;
@@ -589,7 +629,7 @@ int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
         if (rc != 0)
                 GOTO(out, rc);
 
         if (rc != 0)
                 GOTO(out, rc);
 
-        mds_readdir_pack(req, offset, PAGE_CACHE_SIZE, ino, type);
+        mdc_readdir_pack(req, offset, PAGE_CACHE_SIZE, ino, type);
 
         req->rq_replen = lustre_msg_size(1, &size);
         rc = ptlrpc_queue_wait(req);
 
         req->rq_replen = lustre_msg_size(1, &size);
         rc = ptlrpc_queue_wait(req);
@@ -622,27 +662,28 @@ static int mdc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
         case OBD_IOC_CLIENT_RECOVER:
                 RETURN(ptlrpc_recover_import(imp, data->ioc_inlbuf1));
         case IOC_OSC_SET_ACTIVE:
         case OBD_IOC_CLIENT_RECOVER:
                 RETURN(ptlrpc_recover_import(imp, data->ioc_inlbuf1));
         case IOC_OSC_SET_ACTIVE:
-                if (data->ioc_offset) {
-                        CERROR("%s: can't reactivate MDC\n",
-                               obddev->obd_uuid.uuid);
-                        RETURN(-ENOTTY);
-                }
-                RETURN(ptlrpc_set_import_active(imp, 0));
+                RETURN(ptlrpc_set_import_active(imp, data->ioc_offset));
         default:
                 CERROR("osc_ioctl(): unrecognised ioctl %#x\n", cmd);
                 RETURN(-ENOTTY);
         }
 }
 
         default:
                 CERROR("osc_ioctl(): unrecognised ioctl %#x\n", cmd);
                 RETURN(-ENOTTY);
         }
 }
 
-static int mdc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                      unsigned long max_age)
 {
         struct ptlrpc_request *req;
         struct obd_statfs *msfs;
         int rc, size = sizeof(*msfs);
         ENTRY;
 
 {
         struct ptlrpc_request *req;
         struct obd_statfs *msfs;
         int rc, size = sizeof(*msfs);
         ENTRY;
 
-        req = ptlrpc_prep_req(exp->exp_obd->u.cli.cl_import, MDS_STATFS, 0, 
-                              NULL, NULL);
+        /* We could possibly pass max_age in the request (as an absolute
+         * timestamp or a "seconds.usec ago") so the target can avoid doing
+         * extra calls into the filesystem if that isn't necessary (e.g.
+         * during mount that would help a bit).  Having relative timestamps
+         * is not so great if request processing is slow, while absolute
+         * timestamps are not ideal because they need time synchronization. */
+        req = ptlrpc_prep_req(obd->u.cli.cl_import, MDS_STATFS, 0, NULL, NULL);
         if (!req)
                 RETURN(-ENOMEM);
 
         if (!req)
                 RETURN(-ENOMEM);
 
@@ -655,14 +696,13 @@ static int mdc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
         if (rc)
                 GOTO(out, rc);
 
         if (rc)
                 GOTO(out, rc);
 
-        msfs = lustre_swab_repbuf (req, 0, sizeof (*msfs),
-                                   lustre_swab_obd_statfs);
+        msfs = lustre_swab_repbuf(req, 0, sizeof(*msfs),lustre_swab_obd_statfs);
         if (msfs == NULL) {
         if (msfs == NULL) {
-                CERROR ("Can't unpack obd_statfs\n");
-                GOTO (out, rc = -EPROTO);
+                CERROR("Can't unpack obd_statfs\n");
+                GOTO(out, rc = -EPROTO);
         }
 
         }
 
-        memcpy (osfs, msfs, sizeof (*msfs));
+        memcpy(osfs, msfs, sizeof (*msfs));
         EXIT;
 out:
         ptlrpc_req_finished(req);
         EXIT;
 out:
         ptlrpc_req_finished(req);
@@ -670,11 +710,83 @@ out:
         return rc;
 }
 
         return rc;
 }
 
+static int mdc_pin(struct lustre_handle *conn, obd_id ino, __u32 gen, int type,
+                   struct obd_client_handle *handle, int flag)
+{
+        struct ptlrpc_request *req;
+        struct mds_body *body;
+        int rc, size = sizeof(*body);
+        ENTRY;
+
+        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_PIN, 1, &size, NULL);
+        if (req == NULL)
+                RETURN(-ENOMEM);
+
+        body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
+        ll_ino2fid(&body->fid1, ino, gen, type);
+        body->flags = flag;
+
+        req->rq_replen = lustre_msg_size(1, &size);
+
+        mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+        rc = ptlrpc_queue_wait(req);
+        mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
+        if (rc) {
+                CERROR("pin failed: %d\n", rc);
+                ptlrpc_req_finished(req);
+                RETURN(rc);
+        }
+
+        body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_mds_body);
+        if (body == NULL) {
+                ptlrpc_req_finished(req);
+                RETURN(rc);
+        }
+
+        memcpy(&handle->och_fh, &body->handle, sizeof(body->handle));
+        handle->och_req = req; /* will be dropped by unpin */
+        handle->och_magic = OBD_CLIENT_HANDLE_MAGIC;
+        RETURN(rc);
+}
+
+static int mdc_unpin(struct lustre_handle *conn,
+                     struct obd_client_handle *handle, int flag)
+{
+        struct ptlrpc_request *req;
+        struct mds_body *body;
+        int rc, size = sizeof(*body);
+        ENTRY;
+
+        if (handle->och_magic != OBD_CLIENT_HANDLE_MAGIC)
+                RETURN(0);
+
+        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size,
+                              NULL);
+        if (req == NULL)
+                RETURN(-ENOMEM);
+
+        body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body));
+        memcpy(&body->handle, &handle->och_fh, sizeof(body->handle));
+        body->flags = flag;
+
+        req->rq_replen = lustre_msg_size(0, NULL);
+        mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+        rc = ptlrpc_queue_wait(req);
+        mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
+
+        if (rc != 0)
+                CERROR("unpin failed: %d\n", rc);
+
+        ptlrpc_req_finished(req);
+        ptlrpc_req_finished(handle->och_req);
+        RETURN(rc);
+}
+
 static int mdc_attach(struct obd_device *dev, obd_count len, void *data)
 {
         struct lprocfs_static_vars lvars;
 
 static int mdc_attach(struct obd_device *dev, obd_count len, void *data)
 {
         struct lprocfs_static_vars lvars;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(mdc, &lvars);
         return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
         return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
@@ -692,7 +804,9 @@ struct obd_ops mdc_obd_ops = {
         o_connect:     client_import_connect,
         o_disconnect:  client_import_disconnect,
         o_iocontrol:   mdc_iocontrol,
         o_connect:     client_import_connect,
         o_disconnect:  client_import_disconnect,
         o_iocontrol:   mdc_iocontrol,
-        o_statfs:      mdc_statfs
+        o_statfs:      mdc_statfs,
+        o_pin:         mdc_pin,
+        o_unpin:       mdc_unpin,
 };
 
 int __init mdc_init(void)
 };
 
 int __init mdc_init(void)
@@ -700,12 +814,12 @@ int __init mdc_init(void)
         struct lprocfs_static_vars lvars;
         mdc_init_rpc_lock(&mdc_rpc_lock);
         mdc_init_rpc_lock(&mdc_setattr_lock);
         struct lprocfs_static_vars lvars;
         mdc_init_rpc_lock(&mdc_rpc_lock);
         mdc_init_rpc_lock(&mdc_setattr_lock);
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(mdc, &lvars);
         return class_register_type(&mdc_obd_ops, lvars.module_vars,
                                    LUSTRE_MDC_NAME);
 }
 
         return class_register_type(&mdc_obd_ops, lvars.module_vars,
                                    LUSTRE_MDC_NAME);
 }
 
-static void __exit mdc_exit(void)
+static void /*__exit*/ mdc_exit(void)
 {
         class_unregister_type(LUSTRE_MDC_NAME);
 }
 {
         class_unregister_type(LUSTRE_MDC_NAME);
 }
@@ -715,6 +829,7 @@ MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
 MODULE_DESCRIPTION("Lustre Metadata Client");
 MODULE_LICENSE("GPL");
 
 MODULE_DESCRIPTION("Lustre Metadata Client");
 MODULE_LICENSE("GPL");
 
+EXPORT_SYMBOL(mdc_req2lustre_md);
 EXPORT_SYMBOL(mdc_getstatus);
 EXPORT_SYMBOL(mdc_getlovinfo);
 EXPORT_SYMBOL(mdc_enqueue);
 EXPORT_SYMBOL(mdc_getstatus);
 EXPORT_SYMBOL(mdc_getlovinfo);
 EXPORT_SYMBOL(mdc_enqueue);
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
 Makefile.in
 .deps
 TAGS
+.*.cmd
index 6b712fb..0696bd7 100644 (file)
@@ -3,8 +3,9 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-include fs/lustre/portals/Kernelenv
+include $(src)/../portals/Kernelenv
 
 obj-y += mds.o
 
 obj-y += mds.o
-
-mds-objs    := mds_lov.o handler.o mds_reint.o mds_fs.o lproc_mds.o mds_internal.h mds_updates.o mds_open.o simple.o target.o
+mds-objs := mds_lov.o handler.o mds_reint.o mds_fs.o lproc_mds.o mds_open.o \
+               mds_lib.o
+       
index de3f2ed..756e290 100644 (file)
 #include <linux/lustre_mds.h>
 #include <linux/lustre_fsfilt.h>
 #include <linux/lprocfs_status.h>
 #include <linux/lustre_mds.h>
 #include <linux/lustre_fsfilt.h>
 #include <linux/lprocfs_status.h>
+#include <linux/lustre_commit_confd.h>
+
 #include "mds_internal.h"
 
 #include "mds_internal.h"
 
-extern int mds_get_lovtgts(struct mds_obd *obd, int tgt_count,
-                           struct obd_uuid *uuidarray);
-extern int mds_get_lovdesc(struct mds_obd  *obd, struct lov_desc *desc);
-int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
-                       struct ptlrpc_request *req, int rc, int disp);
-static int mds_cleanup(struct obd_device * obddev, int force, int failover);
-
-inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req)
-{
-        return &req->rq_export->exp_obd->u.mds;
-}
+static int mds_cleanup(struct obd_device *obd, int flags);
 
 static int mds_bulk_timeout(void *data)
 {
 
 static int mds_bulk_timeout(void *data)
 {
@@ -188,6 +180,9 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
 
         snprintf(fid_name, sizeof(fid_name), "0x%lx", ino);
 
 
         snprintf(fid_name, sizeof(fid_name), "0x%lx", ino);
 
+        CDEBUG(D_DENTRY, "--> mds_fid2dentry: ino %lu, gen %u, sb %p\n",
+               ino, generation, mds->mds_sb);
+
         /* under ext3 this is neither supposed to return bad inodes
            nor NULL inodes. */
         result = ll_lookup_one_len(fid_name, mds->mds_fid_de, strlen(fid_name));
         /* under ext3 this is neither supposed to return bad inodes
            nor NULL inodes. */
         result = ll_lookup_one_len(fid_name, mds->mds_fid_de, strlen(fid_name));
@@ -198,9 +193,6 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
         if (!inode)
                 RETURN(ERR_PTR(-ENOENT));
 
         if (!inode)
                 RETURN(ERR_PTR(-ENOENT));
 
-        CDEBUG(D_DENTRY, "--> mds_fid2dentry: ino %lu, gen %u, sb %p\n",
-               inode->i_ino, inode->i_generation, inode->i_sb);
-
         if (generation && inode->i_generation != generation) {
                 /* we didn't find the right inode.. */
                 CERROR("bad inode %lu, link: %d ct: %d or generation %u/%u\n",
         if (generation && inode->i_generation != generation) {
                 /* we didn't find the right inode.. */
                 CERROR("bad inode %lu, link: %d ct: %d or generation %u/%u\n",
@@ -341,80 +333,125 @@ void mds_mfd_destroy(struct mds_file_data *mfd)
         mds_mfd_put(mfd);
 }
 
         mds_mfd_put(mfd);
 }
 
-/* Call with med->med_open_lock held, please. */
-static int mds_close_mfd(struct mds_file_data *mfd, struct mds_export_data *med)
+/* Close a "file descriptor" and possibly unlink an orphan from the
+ * PENDING directory.
+ *
+ * If we are being called from mds_disconnect() because the client has
+ * disappeared, then req == NULL and we do not update last_rcvd because
+ * there is nothing that could be recovered by the client at this stage
+ * (it will not even _have_ an entry in last_rcvd anymore).
+ */
+static int mds_mfd_close(struct ptlrpc_request *req, struct obd_device *obd,
+                         struct mds_file_data *mfd)
 {
 {
-        struct dentry *de = NULL;
-
-#ifdef CONFIG_SMP
-        LASSERT(spin_is_locked(&med->med_open_lock));
-#endif
-        list_del(&mfd->mfd_list);
+        struct dentry *dparent = mfd->mfd_dentry->d_parent;
+        struct inode *child_inode = mfd->mfd_dentry->d_inode;
+        char fidname[LL_FID_NAMELEN];
+        int last_orphan, fidlen, rc = 0;
+        ENTRY;
 
 
-        if (mfd->mfd_dentry->d_parent) {
-                LASSERT(atomic_read(&mfd->mfd_dentry->d_parent->d_count));
-                de = dget(mfd->mfd_dentry->d_parent);
+        if (dparent) {
+                LASSERT(atomic_read(&dparent->d_count) > 0);
+                dparent = dget(dparent);
         }
 
         }
 
-        /* this is the actual "close" */
-        l_dput(mfd->mfd_dentry);
+        fidlen = ll_fid2str(fidname, child_inode->i_ino,
+                            child_inode->i_generation);
 
 
-        if (de)
-                l_dput(de);
+        last_orphan = mds_open_orphan_dec_test(child_inode) &&
+                mds_inode_is_orphan(child_inode);
 
 
+        /* this is the actual "close" */
+        l_dput(mfd->mfd_dentry);
         mds_mfd_destroy(mfd);
         mds_mfd_destroy(mfd);
-        RETURN(0);
-}
 
 
-static int mds_disconnect(struct lustre_handle *conn, int failover)
-{
-        struct obd_export *export = class_conn2export(conn);
-        int rc;
-        unsigned long flags;
-        ENTRY;
+        if (dparent)
+                l_dput(dparent);
 
 
-        ldlm_cancel_locks_for_export(export);
+        if (last_orphan) {
+                struct mds_obd *mds = &obd->u.mds;
+                struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+                struct dentry *pending_child = NULL;
+                void *handle;
 
 
-        spin_lock_irqsave(&export->exp_lock, flags);
-        export->exp_failover = failover;
-        spin_unlock_irqrestore(&export->exp_lock, flags);
+                CDEBUG(D_ERROR, "destroying orphan object %s\n", fidname);
 
 
-        rc = class_disconnect(conn, failover);
-        class_export_put(export);
+                /* Sadly, there is no easy way to save pending_child from
+                 * mds_reint_unlink() into mfd, so we need to re-lookup,
+                 * but normally it will still be in the dcache.
+                 */
+                down(&pending_dir->i_sem);
+                pending_child = lookup_one_len(fidname, mds->mds_pending_dir,
+                                               fidlen);
+                if (IS_ERR(pending_child))
+                        GOTO(out_lock, rc = PTR_ERR(pending_child));
+                LASSERT(pending_child->d_inode != NULL);
+
+                handle = fsfilt_start(obd, pending_dir, FSFILT_OP_UNLINK, NULL);
+                if (IS_ERR(handle))
+                        GOTO(out_dput, rc = PTR_ERR(handle));
+                rc = vfs_unlink(pending_dir, pending_child);
+                if (rc)
+                        CERROR("error unlinking orphan %s: rc %d\n",fidname,rc);
+
+                if (req) {
+                        rc = mds_finish_transno(mds, pending_dir, handle, req,
+                                                rc, 0);
+                } else {
+                        int err = fsfilt_commit(obd, pending_dir, handle, 0);
+                        if (err) {
+                                CERROR("error committing orphan unlink: %d\n",
+                                       err);
+                                if (!rc)
+                                        rc = err;
+                        }
+                }
+        out_dput:
+                dput(pending_child);
+        out_lock:
+                up(&pending_dir->i_sem);
+        }
 
         RETURN(rc);
 }
 
 
         RETURN(rc);
 }
 
-static void mds_destroy_export(struct obd_export *export)
+static int mds_disconnect(struct lustre_handle *conn, int flags)
 {
 {
+        struct obd_export *export = class_conn2export(conn);
         struct mds_export_data *med = &export->exp_mds_data;
         struct mds_export_data *med = &export->exp_mds_data;
-        struct list_head *tmp, *n;
+        struct obd_device *obd = export->exp_obd;
+        struct obd_run_ctxt saved;
         int rc;
         int rc;
-
         ENTRY;
         ENTRY;
-        LASSERT(!strcmp(export->exp_obd->obd_type->typ_name,
-                        LUSTRE_MDS_NAME));
 
 
-        /*
-         * Close any open files.
-         */
+        push_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+        /* Close any open files (which may also cause orphan unlinking). */
         spin_lock(&med->med_open_lock);
         spin_lock(&med->med_open_lock);
-        list_for_each_safe(tmp, n, &med->med_open_head) {
+        while (!list_empty(&med->med_open_head)) {
+                struct list_head *tmp = med->med_open_head.next;
                 struct mds_file_data *mfd =
                         list_entry(tmp, struct mds_file_data, mfd_list);
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
                 struct mds_file_data *mfd =
                         list_entry(tmp, struct mds_file_data, mfd_list);
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+                /* bug 1579: fix force-closing for 2.5 */
                 struct dentry *dentry = mfd->mfd_dentry;
                 struct dentry *dentry = mfd->mfd_dentry;
+
+                list_del(&mfd->mfd_list);
+                spin_unlock(&med->med_open_lock);
+
                 CERROR("force closing client file handle for %*s (%s:%lu)\n",
                        dentry->d_name.len, dentry->d_name.name,
                        kdevname(dentry->d_inode->i_sb->s_dev),
                        dentry->d_inode->i_ino);
                 CERROR("force closing client file handle for %*s (%s:%lu)\n",
                        dentry->d_name.len, dentry->d_name.name,
                        kdevname(dentry->d_inode->i_sb->s_dev),
                        dentry->d_inode->i_ino);
+                rc = mds_mfd_close(NULL, obd, mfd);
 #endif
 #endif
-                rc = mds_close_mfd(mfd, med);
                 if (rc)
                         CDEBUG(D_INODE, "Error closing file: %d\n", rc);
                 if (rc)
                         CDEBUG(D_INODE, "Error closing file: %d\n", rc);
+                spin_lock(&med->med_open_lock);
         }
         spin_unlock(&med->med_open_lock);
         }
         spin_unlock(&med->med_open_lock);
+        pop_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
 
 
+        ldlm_cancel_locks_for_export(export);
         if (export->exp_outstanding_reply) {
                 struct ptlrpc_request *req = export->exp_outstanding_reply;
                 unsigned long          flags;
         if (export->exp_outstanding_reply) {
                 struct ptlrpc_request *req = export->exp_outstanding_reply;
                 unsigned long          flags;
@@ -432,9 +469,13 @@ static void mds_destroy_export(struct obd_export *export)
                 export->exp_outstanding_reply = NULL;
         }
 
                 export->exp_outstanding_reply = NULL;
         }
 
-        if (!export->exp_failover)
+        if (!(flags & OBD_OPT_FAILOVER))
                 mds_client_free(export);
                 mds_client_free(export);
-        EXIT;
+
+        rc = class_disconnect(conn, flags);
+        class_export_put(export);
+
+        RETURN(rc);
 }
 
 /*
 }
 
 /*
@@ -448,14 +489,24 @@ static void mds_fsync_super(struct super_block *sb)
 {
         lock_kernel();
         lock_super(sb);
 {
         lock_kernel();
         lock_super(sb);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
         if (sb->s_dirt && sb->s_op && sb->s_op->write_super)
                 sb->s_op->write_super(sb);
         if (sb->s_dirt && sb->s_op && sb->s_op->write_super)
                 sb->s_op->write_super(sb);
+#else
+        if (sb->s_dirt && sb->s_op) {
+                if (sb->s_op->sync_fs)
+                        sb->s_op->sync_fs(sb, 1);
+                else if (sb->s_op->write_super)
+                        sb->s_op->write_super(sb);
+        }
+#endif
         unlock_super(sb);
         unlock_kernel();
 }
 
 static int mds_getstatus(struct ptlrpc_request *req)
 {
         unlock_super(sb);
         unlock_kernel();
 }
 
 static int mds_getstatus(struct ptlrpc_request *req)
 {
+        struct obd_device *obd = req->rq_export->exp_obd;
         struct mds_obd *mds = mds_req2mds(req);
         struct mds_body *body;
         int rc, size = sizeof(*body);
         struct mds_obd *mds = mds_req2mds(req);
         struct mds_body *body;
         int rc, size = sizeof(*body);
@@ -473,7 +524,7 @@ static int mds_getstatus(struct ptlrpc_request *req)
          * requests if they have any.  This would be fsync_super() if it
          * was exported.
          */
          * requests if they have any.  This would be fsync_super() if it
          * was exported.
          */
-        mds_fsync_super(mds->mds_sb);
+        fsfilt_sync(obd, mds->mds_sb);
 
         body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
         memcpy(&body->fid1, &mds->mds_rootfid, sizeof(body->fid1));
 
         body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
         memcpy(&body->fid1, &mds->mds_rootfid, sizeof(body->fid1));
@@ -525,8 +576,7 @@ static int mds_getlovinfo(struct ptlrpc_request *req)
         memcpy(desc, &mds->mds_lov_desc, sizeof (*desc));
 
         tgt_count = mds->mds_lov_desc.ld_tgt_count;
         memcpy(desc, &mds->mds_lov_desc, sizeof (*desc));
 
         tgt_count = mds->mds_lov_desc.ld_tgt_count;
-        uuid0 = lustre_msg_buf (req->rq_repmsg, 1,
-                                tgt_count * sizeof (*uuid0));
+        uuid0 = lustre_msg_buf(req->rq_repmsg, 1, tgt_count * sizeof (*uuid0));
         if (uuid0 == NULL) {
                 CERROR("too many targets, enlarge client buffers\n");
                 req->rq_status = -ENOSPC;
         if (uuid0 == NULL) {
                 CERROR("too many targets, enlarge client buffers\n");
                 req->rq_status = -ENOSPC;
@@ -539,6 +589,8 @@ static int mds_getlovinfo(struct ptlrpc_request *req)
                 req->rq_status = rc;
                 RETURN(0);
         }
                 req->rq_status = rc;
                 RETURN(0);
         }
+        memcpy(&mds->mds_osc_uuid, &mds->mds_lov_desc.ld_uuid,
+               sizeof(mds->mds_osc_uuid));
         RETURN(0);
 }
 
         RETURN(0);
 }
 
@@ -616,8 +668,8 @@ int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg,
 
         rc = fsfilt_get_md(obd, inode, lmm, lmm_size);
         if (rc < 0) {
 
         rc = fsfilt_get_md(obd, inode, lmm, lmm_size);
         if (rc < 0) {
-                CERROR ("Error %d reading eadata for ino %lu\n",
-                        rc, inode->i_ino);
+                CERROR("Error %d reading eadata for ino %lu\n",
+                       rc, inode->i_ino);
         } else if (rc > 0) {
                 body->valid |= OBD_MD_FLEASIZE;
                 body->eadatasize = rc;
         } else if (rc > 0) {
                 body->valid |= OBD_MD_FLEASIZE;
                 body->eadatasize = rc;
@@ -639,19 +691,22 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry,
         if (inode == NULL)
                 RETURN(-ENOENT);
 
         if (inode == NULL)
                 RETURN(-ENOENT);
 
-        body = lustre_msg_buf(req->rq_repmsg, reply_off, sizeof (*body));
-        LASSERT (body != NULL);                 /* caller prepped reply */
+        body = lustre_msg_buf(req->rq_repmsg, reply_off, sizeof(*body));
+        LASSERT(body != NULL);                 /* caller prepped reply */
 
         mds_pack_inode2fid(&body->fid1, inode);
         mds_pack_inode2body(body, inode);
 
 
         mds_pack_inode2fid(&body->fid1, inode);
         mds_pack_inode2body(body, inode);
 
-        if (S_ISREG(inode->i_mode) &&
-            (reqbody->valid & OBD_MD_FLEASIZE) != 0) {
-                rc = mds_pack_md(obd, req->rq_repmsg, reply_off + 1,
-                                 body, inode);
+        if (S_ISREG(inode->i_mode) && (reqbody->valid & OBD_MD_FLEASIZE) != 0) {
+                rc = mds_pack_md(obd, req->rq_repmsg, reply_off+1, body, inode);
+
+                /* If we have LOV EA data, the OST holds size, atime, mtime */
+                if (!(body->valid & OBD_MD_FLEASIZE))
+                        body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                        OBD_MD_FLATIME | OBD_MD_FLMTIME);
         } else if (S_ISLNK(inode->i_mode) &&
                    (reqbody->valid & OBD_MD_LINKNAME) != 0) {
         } else if (S_ISLNK(inode->i_mode) &&
                    (reqbody->valid & OBD_MD_LINKNAME) != 0) {
-                char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1, 0);
+                char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1,0);
                 int len;
 
                 LASSERT (symname != NULL);       /* caller prepped reply */
                 int len;
 
                 LASSERT (symname != NULL);       /* caller prepped reply */
@@ -672,6 +727,7 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry,
                         rc = 0;
                 }
         }
                         rc = 0;
                 }
         }
+
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
@@ -684,11 +740,10 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
         ENTRY;
 
         body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body));
         ENTRY;
 
         body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body));
-        LASSERT (body != NULL);                 /* checked by caller */
-        LASSERT_REQSWABBED (req, offset);       /* swabbed by caller */
+        LASSERT(body != NULL);                 /* checked by caller */
+        LASSERT_REQSWABBED(req, offset);       /* swabbed by caller */
 
 
-        if (S_ISREG(inode->i_mode) &&
-            (body->valid & OBD_MD_FLEASIZE) != 0) {
+        if (S_ISREG(inode->i_mode) && (body->valid & OBD_MD_FLEASIZE)) {
                 int rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0);
                 CDEBUG(D_INODE, "got %d bytes MD data for inode %lu\n",
                        rc, inode->i_ino);
                 int rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0);
                 CDEBUG(D_INODE, "got %d bytes MD data for inode %lu\n",
                        rc, inode->i_ino);
@@ -701,14 +756,14 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
                         size[bufcount] = 0;
                         CERROR("MD size %d larger than maximum possible %u\n",
                                rc, mds->mds_max_mdsize);
                         size[bufcount] = 0;
                         CERROR("MD size %d larger than maximum possible %u\n",
                                rc, mds->mds_max_mdsize);
-                } else
+                } else {
                         size[bufcount] = rc;
                         size[bufcount] = rc;
+                }
                 bufcount++;
                 bufcount++;
-        } else if (S_ISLNK (inode->i_mode) &&
-                   (body->valid & OBD_MD_LINKNAME) != 0) {
+        } else if (S_ISLNK(inode->i_mode) && (body->valid & OBD_MD_LINKNAME)) {
                 if (inode->i_size + 1 != body->eadatasize)
                 if (inode->i_size + 1 != body->eadatasize)
-                        CERROR ("symlink size: %Lu, reply space: %d\n",
-                                inode->i_size + 1, body->eadatasize);
+                        CERROR("symlink size: %Lu, reply space: %d\n",
+                               inode->i_size + 1, body->eadatasize);
                 size[bufcount] = MIN(inode->i_size + 1, body->eadatasize);
                 bufcount++;
                 CDEBUG(D_INODE, "symlink size: %Lu, reply space: %d\n",
                 size[bufcount] = MIN(inode->i_size + 1, body->eadatasize);
                 bufcount++;
                 CDEBUG(D_INODE, "symlink size: %Lu, reply space: %d\n",
@@ -724,9 +779,8 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
         rc = lustre_pack_msg(bufcount, size, NULL, &req->rq_replen,
                              &req->rq_repmsg);
         if (rc) {
         rc = lustre_pack_msg(bufcount, size, NULL, &req->rq_replen,
                              &req->rq_repmsg);
         if (rc) {
-                CERROR("out of memoryK\n");
-                req->rq_status = rc;
-                GOTO(out, rc);
+                CERROR("out of memory\n");
+                GOTO(out, req->rq_status = rc);
         }
 
         EXIT;
         }
 
         EXIT;
@@ -738,6 +792,8 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
 static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req,
                                      struct lustre_handle *client_lockh)
 {
 static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req,
                                      struct lustre_handle *client_lockh)
 {
+        struct mds_export_data *med = &req->rq_export->exp_mds_data;
+        struct mds_client_data *mcd = med->med_mcd;
         struct obd_device *obd = req->rq_export->exp_obd;
         struct mds_obd *mds = mds_req2mds(req);
         struct dentry *parent, *child;
         struct obd_device *obd = req->rq_export->exp_obd;
         struct mds_obd *mds = mds_req2mds(req);
         struct dentry *parent, *child;
@@ -748,8 +804,15 @@ static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req,
         int namelen, rc = 0;
         char *name;
 
         int namelen, rc = 0;
         char *name;
 
-        if (req->rq_export->exp_outstanding_reply)
-                mds_steal_ack_locks(req->rq_export, req);
+        req->rq_transno = mcd->mcd_last_transno;
+        req->rq_status = mcd->mcd_last_result;
+
+        LASSERT (req->rq_export->exp_outstanding_reply);
+
+        mds_steal_ack_locks(req->rq_export, req);
+
+        if (req->rq_status)
+                return;
 
         body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body));
         LASSERT (body != NULL);                 /* checked by caller */
 
         body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body));
         LASSERT (body != NULL);                 /* checked by caller */
@@ -770,6 +833,7 @@ static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req,
         uc.ouc_cap = body->capability;
         uc.ouc_suppgid1 = body->suppgid;
         uc.ouc_suppgid2 = -1;
         uc.ouc_cap = body->capability;
         uc.ouc_suppgid1 = body->suppgid;
         uc.ouc_suppgid2 = -1;
+
         push_ctxt(&saved, &mds->mds_ctxt, &uc);
         parent = mds_fid2dentry(mds, &body->fid1, NULL);
         LASSERT(!IS_ERR(parent));
         push_ctxt(&saved, &mds->mds_ctxt, &uc);
         parent = mds_fid2dentry(mds, &body->fid1, NULL);
         LASSERT(!IS_ERR(parent));
@@ -785,7 +849,8 @@ static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req,
         }
 
         rc = mds_getattr_internal(obd, child, req, body, offset);
         }
 
         rc = mds_getattr_internal(obd, child, req, body, offset);
-        req->rq_status = rc;
+        /* XXX need to handle error here */
+        LASSERT(!rc);
         l_dput(child);
         l_dput(parent);
 }
         l_dput(child);
         l_dput(parent);
 }
@@ -795,6 +860,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req,
 {
         struct mds_obd *mds = mds_req2mds(req);
         struct obd_device *obd = req->rq_export->exp_obd;
 {
         struct mds_obd *mds = mds_req2mds(req);
         struct obd_device *obd = req->rq_export->exp_obd;
+        struct ldlm_reply *rep = NULL;
         struct obd_run_ctxt saved;
         struct mds_body *body;
         struct dentry *de = NULL, *dchild = NULL;
         struct obd_run_ctxt saved;
         struct mds_body *body;
         struct dentry *de = NULL, *dchild = NULL;
@@ -803,7 +869,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req,
         struct ldlm_res_id child_res_id = { .name = {0} };
         struct lustre_handle parent_lockh;
         int namesize;
         struct ldlm_res_id child_res_id = { .name = {0} };
         struct lustre_handle parent_lockh;
         int namesize;
-        int flags = 0, rc = 0, cleanup_phase = 0, req_was_resent;
+        int flags = 0, rc = 0, cleanup_phase = 0;
         char *name;
         ENTRY;
 
         char *name;
         ENTRY;
 
@@ -811,34 +877,39 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req,
 
         /* Swab now, before anyone looks inside the request */
 
 
         /* Swab now, before anyone looks inside the request */
 
-        body = lustre_swab_reqbuf (req, offset, sizeof (*body),
-                                   lustre_swab_mds_body);
+        body = lustre_swab_reqbuf(req, offset, sizeof(*body),
+                                  lustre_swab_mds_body);
         if (body == NULL) {
         if (body == NULL) {
-                CERROR ("Can't swab mds_body\n");
-                GOTO (cleanup, rc = -EFAULT);
+                CERROR("Can't swab mds_body\n");
+                GOTO(cleanup, rc = -EFAULT);
         }
 
         }
 
-        LASSERT_REQSWAB (req, offset + 1);
-        name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0);
+        LASSERT_REQSWAB(req, offset + 1);
+        name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0);
         if (name == NULL) {
         if (name == NULL) {
-                CERROR ("Can't unpack name\n");
-                GOTO (cleanup, rc = -EFAULT);
+                CERROR("Can't unpack name\n");
+                GOTO(cleanup, rc = -EFAULT);
         }
         namesize = req->rq_reqmsg->buflens[offset + 1];
 
         }
         namesize = req->rq_reqmsg->buflens[offset + 1];
 
-        req_was_resent = lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT;
-        if (child_lockh->cookie) {
-                LASSERT(req_was_resent);
-                reconstruct_getattr_name(offset, req, child_lockh);
-                RETURN(0);
-        } else if (req_was_resent) {
-                DEBUG_REQ(D_HA, req, "no reply for RESENT req");
+        if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
+                struct obd_export *exp = req->rq_export;
+                if (exp->exp_outstanding_reply &&
+                    exp->exp_outstanding_reply->rq_xid == req->rq_xid) {
+                        reconstruct_getattr_name(offset, req, child_lockh);
+                        RETURN(0);
+                }
+                DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")",
+                          exp->exp_outstanding_reply ?
+                          exp->exp_outstanding_reply->rq_xid : (u64)0);
         }
 
         LASSERT (offset == 0 || offset == 2);
         }
 
         LASSERT (offset == 0 || offset == 2);
-        /* if requests were at offset 2, replies go back at 1 */
-        if (offset)
+        /* if requests were at offset 2, the getattr reply goes back at 1 */
+        if (offset) { 
+                rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
                 offset = 1;
                 offset = 1;
+        }
 
         uc.ouc_fsuid = body->fsuid;
         uc.ouc_fsgid = body->fsgid;
 
         uc.ouc_fsuid = body->fsuid;
         uc.ouc_fsgid = body->fsgid;
@@ -847,6 +918,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req,
         uc.ouc_suppgid2 = -1;
         push_ctxt(&saved, &mds->mds_ctxt, &uc);
         /* Step 1: Lookup/lock parent */
         uc.ouc_suppgid2 = -1;
         push_ctxt(&saved, &mds->mds_ctxt, &uc);
         /* Step 1: Lookup/lock parent */
+        intent_set_disposition(rep, DISP_LOOKUP_EXECD);
         de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_PR,
                                    &parent_lockh);
         if (IS_ERR(de))
         de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_PR,
                                    &parent_lockh);
         if (IS_ERR(de))
@@ -868,7 +940,10 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req,
         cleanup_phase = 2; /* child dentry */
 
         if (dchild->d_inode == NULL) {
         cleanup_phase = 2; /* child dentry */
 
         if (dchild->d_inode == NULL) {
+                intent_set_disposition(rep, DISP_LOOKUP_NEG);
                 GOTO(cleanup, rc = -ENOENT);
                 GOTO(cleanup, rc = -ENOENT);
+        } else {
+                intent_set_disposition(rep, DISP_LOOKUP_POS);
         }
 
         /* Step 3: Lock child */
         }
 
         /* Step 3: Lock child */
@@ -963,11 +1038,17 @@ out_pop:
         return rc;
 }
 
         return rc;
 }
 
+
+static int mds_obd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                          unsigned long max_age)
+{
+        return fsfilt_statfs(obd, obd->u.mds.mds_sb, osfs);
+}
+
 static int mds_statfs(struct ptlrpc_request *req)
 {
         struct obd_device *obd = req->rq_export->exp_obd;
 static int mds_statfs(struct ptlrpc_request *req)
 {
         struct obd_device *obd = req->rq_export->exp_obd;
-        struct obd_statfs *osfs;
-        int rc, size = sizeof(*osfs);
+        int rc, size = sizeof(struct obd_statfs);
         ENTRY;
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         ENTRY;
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
@@ -976,10 +1057,10 @@ static int mds_statfs(struct ptlrpc_request *req)
                 GOTO(out, rc);
         }
 
                 GOTO(out, rc);
         }
 
-        osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*osfs));
-        rc = fsfilt_statfs(obd, obd->u.mds.mds_sb, osfs);
+        /* We call this so that we can cache a bit - 1 jiffie worth */
+        rc = obd_statfs(obd, lustre_msg_buf(req->rq_repmsg,0,size),jiffies-HZ);
         if (rc) {
         if (rc) {
-                CERROR("mdsstatfs failed: rc %d\n", rc);
+                CERROR("mds_obd_statfs failed: rc %d\n", rc);
                 GOTO(out, rc);
         }
 
                 GOTO(out, rc);
         }
 
@@ -1006,8 +1087,10 @@ static void reconstruct_close(struct ptlrpc_request *req)
 static int mds_close(struct ptlrpc_request *req)
 {
         struct mds_export_data *med = &req->rq_export->exp_mds_data;
 static int mds_close(struct ptlrpc_request *req)
 {
         struct mds_export_data *med = &req->rq_export->exp_mds_data;
+        struct obd_device *obd = req->rq_export->exp_obd;
         struct mds_body *body;
         struct mds_file_data *mfd;
         struct mds_body *body;
         struct mds_file_data *mfd;
+        struct obd_run_ctxt saved;
         int rc;
         ENTRY;
 
         int rc;
         ENTRY;
 
@@ -1028,10 +1111,20 @@ static int mds_close(struct ptlrpc_request *req)
                 RETURN(-ESTALE);
         }
 
                 RETURN(-ESTALE);
         }
 
+        rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc) {
+                CERROR("lustre_pack_msg: rc = %d\n", rc);
+                req->rq_status = rc;
+        }
+
         spin_lock(&med->med_open_lock);
         spin_lock(&med->med_open_lock);
-        req->rq_status = mds_close_mfd(mfd, med);
+        list_del(&mfd->mfd_list);
         spin_unlock(&med->med_open_lock);
 
         spin_unlock(&med->med_open_lock);
 
+        push_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+        req->rq_status = mds_mfd_close(rc ? NULL : req, obd, mfd);
+        pop_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+
         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK)) {
                 CERROR("test case OBD_FAIL_MDS_CLOSE_PACK\n");
                 req->rq_status = -ENOMEM;
         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK)) {
                 CERROR("test case OBD_FAIL_MDS_CLOSE_PACK\n");
                 req->rq_status = -ENOMEM;
@@ -1039,12 +1132,6 @@ static int mds_close(struct ptlrpc_request *req)
                 RETURN(-ENOMEM);
         }
 
                 RETURN(-ENOMEM);
         }
 
-        rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
-        if (rc) {
-                CERROR("mds: lustre_pack_msg: rc = %d\n", rc);
-                req->rq_status = rc;
-        }
-
         mds_mfd_put(mfd);
         RETURN(0);
 }
         mds_mfd_put(mfd);
         RETURN(0);
 }
@@ -1073,7 +1160,7 @@ static int mds_readpage(struct ptlrpc_request *req)
                 GOTO (out, rc = -EFAULT);
 
         /* body->size is actually the offset -eeb */
                 GOTO (out, rc = -EFAULT);
 
         /* body->size is actually the offset -eeb */
-        if ((body->size & (PAGE_SIZE - 1)) != 0) {
+        if ((body->size & ~PAGE_MASK) != 0) {
                 CERROR ("offset "LPU64"not on a page boundary\n", body->size);
                 GOTO (out, rc = -EFAULT);
         }
                 CERROR ("offset "LPU64"not on a page boundary\n", body->size);
                 GOTO (out, rc = -EFAULT);
         }
@@ -1306,9 +1393,10 @@ int mds_handle(struct ptlrpc_request *req)
                 break;
 
         case MDS_REINT: {
                 break;
 
         case MDS_REINT: {
-                __u32 *opcp = lustre_msg_buf (req->rq_reqmsg, 0, sizeof (*opcp));
+                __u32 *opcp = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*opcp));
                 __u32  opc;
                 __u32  opc;
-                int size[2] = {sizeof(struct mds_body), mds->mds_max_mdsize};
+                int size[3] = {sizeof(struct mds_body), mds->mds_max_mdsize,
+                               mds->mds_max_cookiesize};
                 int bufcount;
 
                 /* NB only peek inside req now; mds_reint() will swab it */
                 int bufcount;
 
                 /* NB only peek inside req now; mds_reint() will swab it */
@@ -1319,15 +1407,18 @@ int mds_handle(struct ptlrpc_request *req)
                 }
                 opc = *opcp;
                 if (lustre_msg_swabbed (req->rq_reqmsg))
                 }
                 opc = *opcp;
                 if (lustre_msg_swabbed (req->rq_reqmsg))
-                        __swab32s (&opc);
+                        __swab32s(&opc);
 
                 DEBUG_REQ(D_INODE, req, "reint %d (%s)", opc,
 
                 DEBUG_REQ(D_INODE, req, "reint %d (%s)", opc,
-                          (opc < sizeof (reint_names) / sizeof (reint_names[0]) ||
-                           reint_names[opc] == NULL) ? reint_names[opc] : "unknown opcode");
+                          (opc < sizeof(reint_names) / sizeof(reint_names[0]) ||
+                           reint_names[opc] == NULL) ? reint_names[opc] :
+                                                       "unknown opcode");
 
                 OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET, 0);
 
                 if (opc == REINT_UNLINK)
 
                 OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET, 0);
 
                 if (opc == REINT_UNLINK)
+                        bufcount = 3;
+                else if (opc == REINT_OPEN)
                         bufcount = 2;
                 else
                         bufcount = 1;
                         bufcount = 2;
                 else
                         bufcount = 1;
@@ -1348,11 +1439,23 @@ int mds_handle(struct ptlrpc_request *req)
                 rc = mds_close(req);
                 break;
 
                 rc = mds_close(req);
                 break;
 
+        case MDS_PIN:
+                DEBUG_REQ(D_INODE, req, "pin");
+                OBD_FAIL_RETURN(OBD_FAIL_MDS_PIN_NET, 0);
+                rc = mds_pin(req);
+                break;
+
         case OBD_PING:
                 DEBUG_REQ(D_INODE, req, "ping");
                 rc = target_handle_ping(req);
                 break;
 
         case OBD_PING:
                 DEBUG_REQ(D_INODE, req, "ping");
                 rc = target_handle_ping(req);
                 break;
 
+        case OBD_LOG_CANCEL:
+                CDEBUG(D_INODE, "log cancel\n");
+                OBD_FAIL_RETURN(OBD_FAIL_OBD_LOG_CANCEL_NET, 0);
+                rc = -ENOTSUPP; /* la la la */
+                break;
+
         case LDLM_ENQUEUE:
                 DEBUG_REQ(D_INODE, req, "enqueue");
                 OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
         case LDLM_ENQUEUE:
                 DEBUG_REQ(D_INODE, req, "enqueue");
                 OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
@@ -1385,7 +1488,7 @@ int mds_handle(struct ptlrpc_request *req)
                 struct obd_device *obd = list_entry(mds, struct obd_device,
                                                     u.mds);
                 req->rq_repmsg->last_xid =
                 struct obd_device *obd = list_entry(mds, struct obd_device,
                                                     u.mds);
                 req->rq_repmsg->last_xid =
-                        le64_to_cpu (med->med_mcd->mcd_last_xid);
+                        le64_to_cpu(med->med_mcd->mcd_last_xid);
 
                 if (!obd->obd_no_transno) {
                         req->rq_repmsg->last_committed =
 
                 if (!obd->obd_no_transno) {
                         req->rq_repmsg->last_committed =
@@ -1421,8 +1524,9 @@ int mds_handle(struct ptlrpc_request *req)
  *
  * Also assumes for mds_last_transno that we are not modifying it (no locking).
  */
  *
  * Also assumes for mds_last_transno that we are not modifying it (no locking).
  */
-int mds_update_server_data(struct mds_obd *mds)
+int mds_update_server_data(struct obd_device *obd)
 {
 {
+        struct mds_obd *mds = &obd->u.mds;
         struct mds_server_data *msd = mds->mds_server_data;
         struct file *filp = mds->mds_rcvd_filp;
         struct obd_run_ctxt saved;
         struct mds_server_data *msd = mds->mds_server_data;
         struct file *filp = mds->mds_rcvd_filp;
         struct obd_run_ctxt saved;
@@ -1433,21 +1537,16 @@ int mds_update_server_data(struct mds_obd *mds)
         msd->msd_last_transno = cpu_to_le64(mds->mds_last_transno);
         msd->msd_mount_count = cpu_to_le64(mds->mds_mount_count);
 
         msd->msd_last_transno = cpu_to_le64(mds->mds_last_transno);
         msd->msd_mount_count = cpu_to_le64(mds->mds_mount_count);
 
-        CDEBUG(D_SUPER, "MDS mount_count is %Lu, last_transno is %Lu\n",
-               (unsigned long long)mds->mds_mount_count,
-               (unsigned long long)mds->mds_last_transno);
-        rc = lustre_fwrite(filp, (char *)msd, sizeof(*msd), &off);
+        CDEBUG(D_SUPER, "MDS mount_count is "LPU64", last_transno is "LPU64"\n",
+               mds->mds_mount_count, mds->mds_last_transno);
+        rc = fsfilt_write_record(obd, filp, (char *)msd, sizeof(*msd), &off);
         if (rc != sizeof(*msd)) {
                 CERROR("error writing MDS server data: rc = %d\n", rc);
                 if (rc > 0)
                         rc = -EIO;
                 GOTO(out, rc);
         }
         if (rc != sizeof(*msd)) {
                 CERROR("error writing MDS server data: rc = %d\n", rc);
                 if (rc > 0)
                         rc = -EIO;
                 GOTO(out, rc);
         }
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        rc = fsync_dev(filp->f_dentry->d_inode->i_rdev);
-#else
         rc = file_fsync(filp, filp->f_dentry, 1);
         rc = file_fsync(filp, filp->f_dentry, 1);
-#endif
         if (rc)
                 CERROR("error flushing MDS server data: rc = %d\n", rc);
 
         if (rc)
                 CERROR("error flushing MDS server data: rc = %d\n", rc);
 
@@ -1457,10 +1556,10 @@ out:
 }
 
 /* mount the file system (secretly) */
 }
 
 /* mount the file system (secretly) */
-static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
+static int mds_setup(struct obd_device *obd, obd_count len, void *buf)
 {
         struct obd_ioctl_data* data = buf;
 {
         struct obd_ioctl_data* data = buf;
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
         struct vfsmount *mnt;
         int rc = 0;
         unsigned long page;
         struct vfsmount *mnt;
         int rc = 0;
         unsigned long page;
@@ -1473,9 +1572,12 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
         if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2)
                 RETURN(rc = -EINVAL);
 
         if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2)
                 RETURN(rc = -EINVAL);
 
-        obddev->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2);
-        if (IS_ERR(obddev->obd_fsops))
-                RETURN(rc = PTR_ERR(obddev->obd_fsops));
+        if (data->ioc_inlbuf4)
+                obd_str2uuid(&mds->mds_osc_uuid, data->ioc_inlbuf4);
+
+        obd->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2);
+        if (IS_ERR(obd->obd_fsops))
+                RETURN(rc = PTR_ERR(obd->obd_fsops));
 
 
         if (data->ioc_inllen3 > 0 && data->ioc_inlbuf3) {
 
 
         if (data->ioc_inllen3 > 0 && data->ioc_inlbuf3) {
@@ -1511,73 +1613,93 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
 
         spin_lock_init(&mds->mds_transno_lock);
         mds->mds_max_mdsize = sizeof(struct lov_mds_md);
 
         spin_lock_init(&mds->mds_transno_lock);
         mds->mds_max_mdsize = sizeof(struct lov_mds_md);
-        rc = mds_fs_setup(obddev, mnt);
+        mds->mds_max_cookiesize = sizeof(struct llog_cookie);
+        rc = mds_fs_setup(obd, mnt);
         if (rc) {
                 CERROR("MDS filesystem method init failed: rc = %d\n", rc);
                 GOTO(err_put, rc);
         }
 
         if (rc) {
                 CERROR("MDS filesystem method init failed: rc = %d\n", rc);
                 GOTO(err_put, rc);
         }
 
-        obddev->obd_namespace =
-                ldlm_namespace_new("mds_server", LDLM_NAMESPACE_SERVER);
-        if (obddev->obd_namespace == NULL) {
-                mds_cleanup(obddev, 0, 0);
-                GOTO(err_fs, rc = -ENOMEM);
+#ifdef ENABLE_ORPHANS
+        rc = llog_start_commit_thread();
+        if (rc < 0)
+                GOTO(err_fs, rc);
+#endif
+
+#ifdef ENABLE_ORPHANS
+        mds->mds_catalog = mds_get_catalog(obd);
+        if (IS_ERR(mds->mds_catalog))
+                GOTO(err_fs, rc = PTR_ERR(mds->mds_catalog));
+#endif
+
+        obd->obd_namespace = ldlm_namespace_new("mds_server",
+                                                LDLM_NAMESPACE_SERVER);
+        if (obd->obd_namespace == NULL) {
+                mds_cleanup(obd, 0);
+                GOTO(err_log, rc = -ENOMEM);
         }
 
         ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
         }
 
         ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
-                           "mds_ldlm_client", &obddev->obd_ldlm_client);
+                           "mds_ldlm_client", &obd->obd_ldlm_client);
 
         mds->mds_has_lov_desc = 0;
 
         mds->mds_has_lov_desc = 0;
+        obd->obd_replayable = 1;
 
         RETURN(0);
 
 
         RETURN(0);
 
+err_log:
+#ifdef ENABLE_ORPHANS
+        mds_put_catalog(mds->mds_catalog);
+        /* No extra cleanup needed for llog_init_commit_thread() */
 err_fs:
 err_fs:
-        mds_fs_cleanup(obddev, 0);
+#endif
+        mds_fs_cleanup(obd, 0);
 err_put:
         unlock_kernel();
         mntput(mds->mds_vfsmnt);
         mds->mds_sb = 0;
         lock_kernel();
 err_ops:
 err_put:
         unlock_kernel();
         mntput(mds->mds_vfsmnt);
         mds->mds_sb = 0;
         lock_kernel();
 err_ops:
-        fsfilt_put_ops(obddev->obd_fsops);
+        fsfilt_put_ops(obd->obd_fsops);
         return rc;
 }
 
         return rc;
 }
 
-static int mds_cleanup(struct obd_device *obddev, int force, int failover)
+static int mds_cleanup(struct obd_device *obd, int flags)
 {
 {
-        struct super_block *sb;
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
         ENTRY;
 
         ENTRY;
 
-        sb = mds->mds_sb;
-        if (!mds->mds_sb)
+        if (mds->mds_sb == NULL)
                 RETURN(0);
 
                 RETURN(0);
 
-        mds_update_server_data(mds);
-        mds_fs_cleanup(obddev, failover);
+#ifdef ENABLE_ORPHANS
+        mds_put_catalog(mds->mds_catalog);
+#endif
+        if (mds->mds_osc_obd)
+                obd_disconnect(&mds->mds_osc_conn, flags);
+        mds_update_server_data(obd);
+        mds_fs_cleanup(obd, flags);
 
         unlock_kernel();
 
         /* 2 seems normal on mds, (may_umount() also expects 2
           fwiw), but we only see 1 at this point in obdfilter. */
 
         unlock_kernel();
 
         /* 2 seems normal on mds, (may_umount() also expects 2
           fwiw), but we only see 1 at this point in obdfilter. */
-        if (atomic_read(&obddev->u.mds.mds_vfsmnt->mnt_count) > 2){
-                CERROR("%s: mount point busy, mnt_count: %d\n",
-                       obddev->obd_name,
-                       atomic_read(&obddev->u.mds.mds_vfsmnt->mnt_count));
-        }
+        if (atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count) > 2)
+                CERROR("%s: mount point busy, mnt_count: %d\n", obd->obd_name,
+                       atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count));
 
         mntput(mds->mds_vfsmnt);
         mds->mds_sb = 0;
 
 
         mntput(mds->mds_vfsmnt);
         mds->mds_sb = 0;
 
-        ldlm_namespace_free(obddev->obd_namespace);
+        ldlm_namespace_free(obd->obd_namespace);
 
 
-        if (obddev->obd_recovering)
-                target_cancel_recovery_timer(obddev);
+        if (obd->obd_recovering)
+                target_cancel_recovery_timer(obd);
         lock_kernel();
 #ifdef CONFIG_DEV_RDONLY
         dev_clear_rdonly(2);
 #endif
         lock_kernel();
 #ifdef CONFIG_DEV_RDONLY
         dev_clear_rdonly(2);
 #endif
-        fsfilt_put_ops(obddev->obd_fsops);
+        fsfilt_put_ops(obd->obd_fsops);
 
         RETURN(0);
 }
 
         RETURN(0);
 }
@@ -1616,13 +1738,26 @@ static void fixup_handle_for_resent_req(struct ptlrpc_request *req,
                   remote_hdl.cookie);
 }
 
                   remote_hdl.cookie);
 }
 
+int intent_disposition(struct ldlm_reply *rep, int flag)
+{
+        if (!rep)
+                return 0;
+        return (rep->lock_policy_res1 & flag);
+}
+
+void intent_set_disposition(struct ldlm_reply *rep, int flag)
+{
+        if (!rep)
+                return;
+        rep->lock_policy_res1 |= flag;
+}
+
 static int ldlm_intent_policy(struct ldlm_namespace *ns,
                               struct ldlm_lock **lockp, void *req_cookie,
                               ldlm_mode_t mode, int flags, void *data)
 {
         struct ptlrpc_request *req = req_cookie;
         struct ldlm_lock *lock = *lockp;
 static int ldlm_intent_policy(struct ldlm_namespace *ns,
                               struct ldlm_lock **lockp, void *req_cookie,
                               ldlm_mode_t mode, int flags, void *data)
 {
         struct ptlrpc_request *req = req_cookie;
         struct ldlm_lock *lock = *lockp;
-        int rc = 0;
         ENTRY;
 
         if (!req_cookie)
         ENTRY;
 
         if (!req_cookie)
@@ -1632,34 +1767,33 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns,
                 /* an intent needs to be considered */
                 struct ldlm_intent *it;
                 struct mds_obd *mds = &req->rq_export->exp_obd->u.mds;
                 /* an intent needs to be considered */
                 struct ldlm_intent *it;
                 struct mds_obd *mds = &req->rq_export->exp_obd->u.mds;
-                struct mds_body *mds_body;
                 struct ldlm_reply *rep;
                 struct ldlm_reply *rep;
-                struct lustre_handle lockh = { 0 };
+                struct lustre_handle lockh;
                 struct ldlm_lock *new_lock;
                 struct ldlm_lock *new_lock;
-                int rc, offset = 2, repsize[3] = {sizeof(struct ldlm_reply),
-                                                  sizeof(struct mds_body),
-                                                  mds->mds_max_mdsize};
+                int offset = 2, repsize[4] = {sizeof(struct ldlm_reply),
+                                              sizeof(struct mds_body),
+                                              mds->mds_max_mdsize,
+                                              mds->mds_max_cookiesize};
 
 
-                it = lustre_swab_reqbuf (req, 1, sizeof (*it),
-                                         lustre_swab_ldlm_intent);
+                it = lustre_swab_reqbuf(req, 1, sizeof (*it),
+                                        lustre_swab_ldlm_intent);
                 if (it == NULL) {
                         CERROR ("Intent missing\n");
                 if (it == NULL) {
                         CERROR ("Intent missing\n");
-                        rc = req->rq_status = -EFAULT;
-                        RETURN (rc);
+                        req->rq_status = -EFAULT;
+                        RETURN(req->rq_status);
                 }
 
                 LDLM_DEBUG(lock, "intent policy, opc: %s",
                            ldlm_it2str(it->opc));
 
                 }
 
                 LDLM_DEBUG(lock, "intent policy, opc: %s",
                            ldlm_it2str(it->opc));
 
-                rc = lustre_pack_msg(3, repsize, NULL, &req->rq_replen,
-                                     &req->rq_repmsg);
-                if (rc) {
-                        rc = req->rq_status = -ENOMEM;
-                        RETURN(rc);
-                }
+                req->rq_status = lustre_pack_msg(it->opc == IT_UNLINK ? 4 : 3,
+                                                 repsize, NULL, &req->rq_replen,
+                                                 &req->rq_repmsg);
+                if (req->rq_status)
+                        RETURN(req->rq_status);
 
                 rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
 
                 rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
-                rep->lock_policy_res1 = IT_INTENT_EXEC;
+                intent_set_disposition(rep, DISP_IT_EXECD);
 
                 fixup_handle_for_resent_req(req, lock, &lockh);
 
 
                 fixup_handle_for_resent_req(req, lock, &lockh);
 
@@ -1667,45 +1801,28 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns,
                 switch ((long)it->opc) {
                 case IT_OPEN:
                 case IT_CREAT|IT_OPEN:
                 switch ((long)it->opc) {
                 case IT_OPEN:
                 case IT_CREAT|IT_OPEN:
-                        rc = mds_reint(req, offset, &lockh);
-                        /* We return a dentry to the client if IT_OPEN_POS is
-                         * set, or if we make it to the OPEN portion of the
-                         * programme (which implies that we created) */
-                        if (!(rep->lock_policy_res1 & IT_OPEN_POS ||
-                              rep->lock_policy_res1 & IT_OPEN_OPEN)) {
-                                rep->lock_policy_res2 = rc;
+                        /* XXX swab here to assert that an mds_open reint
+                         * packet is following */
+                        rep->lock_policy_res2 = mds_reint(req, offset, &lockh);
+                        /* We abort the lock if the lookup was negative and
+                         * we did not make it to the OPEN portion */
+                        if (intent_disposition(rep, DISP_LOOKUP_NEG) &&
+                            !intent_disposition(rep, DISP_OPEN_OPEN))
                                 RETURN(ELDLM_LOCK_ABORTED);
                                 RETURN(ELDLM_LOCK_ABORTED);
-                        }
-                        break;
-                case IT_UNLINK:
-                        rc = mds_reint(req, offset, &lockh);
-                        /* Don't return a lock if the unlink failed, or if we're
-                         * not sending back an EA */
-                        if (rc) {
-                                rep->lock_policy_res2 = rc;
-                                RETURN(ELDLM_LOCK_ABORTED);
-                        }
-                        if (req->rq_status != 0) {
-                                rep->lock_policy_res2 = req->rq_status;
-                                RETURN(ELDLM_LOCK_ABORTED);
-                        }
-                        mds_body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*mds_body));
-                        if (!(mds_body->valid & OBD_MD_FLEASIZE)) {
-                                rep->lock_policy_res2 = rc;
-                                RETURN(ELDLM_LOCK_ABORTED);
-                        }
                         break;
                 case IT_GETATTR:
                 case IT_LOOKUP:
                 case IT_READDIR:
                         break;
                 case IT_GETATTR:
                 case IT_LOOKUP:
                 case IT_READDIR:
-                        rc = mds_getattr_name(offset, req, &lockh);
+                        rep->lock_policy_res2 = mds_getattr_name(offset, req,
+                                                                 &lockh);
                         /* FIXME: we need to sit down and decide on who should
                          * set req->rq_status, who should return negative and
                         /* FIXME: we need to sit down and decide on who should
                          * set req->rq_status, who should return negative and
-                         * positive return values, and what they all mean. */
-                        if (rc) {
-                                rep->lock_policy_res2 = rc;
+                         * positive return values, and what they all mean. 
+                         * - replay: returns 0 & req->status is old status
+                         * - otherwise: returns req->status */
+                        if (!intent_disposition(rep, DISP_LOOKUP_POS) || 
+                            rep->lock_policy_res2)
                                 RETURN(ELDLM_LOCK_ABORTED);
                                 RETURN(ELDLM_LOCK_ABORTED);
-                        }
                         if (req->rq_status != 0) {
                                 rep->lock_policy_res2 = req->rq_status;
                                 RETURN(ELDLM_LOCK_ABORTED);
                         if (req->rq_status != 0) {
                                 rep->lock_policy_res2 = req->rq_status;
                                 RETURN(ELDLM_LOCK_ABORTED);
@@ -1717,10 +1834,17 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns,
                 }
 
                 /* By this point, whatever function we called above must have
                 }
 
                 /* By this point, whatever function we called above must have
-                 * filled in 'lockh' or returned an error.  We want to give the
-                 * new lock to the client instead of whatever lock it was about
-                 * to get. */
+                 * either filled in 'lockh', been an intent replay, or returned
+                 * an error.  We want to allow replayed RPCs to not get a lock,
+                 * since we would just drop it below anyways because lock replay
+                 * is done separately by the client afterwards.  For regular
+                 * RPCs we want to give the new lock to the client instead of
+                 * whatever lock it was about to get.
+                 */
                 new_lock = ldlm_handle2lock(&lockh);
                 new_lock = ldlm_handle2lock(&lockh);
+                if (flags & LDLM_FL_INTENT_ONLY && !new_lock)
+                        RETURN(ELDLM_LOCK_ABORTED);
+
                 LASSERT(new_lock != NULL);
 
                 /* If we've already given this lock to a client once, then we
                 LASSERT(new_lock != NULL);
 
                 /* If we've already given this lock to a client once, then we
@@ -1785,14 +1909,13 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns,
                 RETURN(ELDLM_LOCK_REPLACED);
         } else {
                 int size = sizeof(struct ldlm_reply);
                 RETURN(ELDLM_LOCK_REPLACED);
         } else {
                 int size = sizeof(struct ldlm_reply);
-                rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
-                                     &req->rq_repmsg);
-                if (rc) {
+                if (lustre_pack_msg(1, &size, NULL, &req->rq_replen,
+                                    &req->rq_repmsg)) {
                         LBUG();
                         RETURN(-ENOMEM);
                 }
         }
                         LBUG();
                         RETURN(-ENOMEM);
                 }
         }
-        RETURN(rc);
+        RETURN(0);
 }
 
 int mds_attach(struct obd_device *dev, obd_count len, void *data)
 }
 
 int mds_attach(struct obd_device *dev, obd_count len, void *data)
@@ -1906,7 +2029,7 @@ err_thread:
 }
 
 
 }
 
 
-static int mdt_cleanup(struct obd_device *obddev, int force, int failover)
+static int mdt_cleanup(struct obd_device *obddev, int flags)
 {
         struct mds_obd *mds = &obddev->u.mds;
         ENTRY;
 {
         struct mds_obd *mds = &obddev->u.mds;
         ENTRY;
@@ -1928,15 +2051,15 @@ extern int mds_iocontrol(unsigned int cmd, struct lustre_handle *conn,
 
 /* use obd ops to offer management infrastructure */
 static struct obd_ops mds_obd_ops = {
 
 /* use obd ops to offer management infrastructure */
 static struct obd_ops mds_obd_ops = {
-        o_owner:          THIS_MODULE,
-        o_attach:         mds_attach,
-        o_detach:         mds_detach,
-        o_connect:        mds_connect,
-        o_disconnect:     mds_disconnect,
-        o_setup:          mds_setup,
-        o_cleanup:        mds_cleanup,
-        o_iocontrol:      mds_iocontrol,
-        o_destroy_export: mds_destroy_export
+        o_owner:       THIS_MODULE,
+        o_attach:      mds_attach,
+        o_detach:      mds_detach,
+        o_connect:     mds_connect,
+        o_disconnect:  mds_disconnect,
+        o_setup:       mds_setup,
+        o_cleanup:     mds_cleanup,
+        o_statfs:      mds_obd_statfs,
+        o_iocontrol:   mds_iocontrol
 };
 
 static struct obd_ops mdt_obd_ops = {
 };
 
 static struct obd_ops mdt_obd_ops = {
@@ -1961,7 +2084,7 @@ static int __init mds_init(void)
         return 0;
 }
 
         return 0;
 }
 
-static void __exit mds_exit(void)
+static void /*__exit*/ mds_exit(void)
 {
         ldlm_unregister_intent();
         class_unregister_type(LUSTRE_MDS_NAME);
 {
         ldlm_unregister_intent();
         class_unregister_type(LUSTRE_MDS_NAME);
index 5d6fa57..e355415 100644 (file)
@@ -37,71 +37,43 @@ struct lprocfs_vars lprocfs_mdt_module_vars[] = { {0} };
 
 #else
 
 
 #else
 
-static inline int lprocfs_mds_statfs(void *data, struct statfs *sfs)
-{
-        struct obd_device* dev = (struct obd_device*) data;
-        struct mds_obd *mds;
-
-        LASSERT(dev != NULL);
-        mds = &dev->u.mds;
-        return vfs_statfs(mds->mds_sb, sfs);
-}
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  lprocfs_mds_statfs);
-
-int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
-              void *data)
-{
-        struct obd_device *obd = (struct obd_device *)data;
-
-        LASSERT(obd != NULL);
-        LASSERT(obd->obd_fsops != NULL);
-        LASSERT(obd->obd_fsops->fs_type != NULL);
-        return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
-}
-
-int lprocfs_mds_rd_mntdev(char *page, char **start, off_t off, int count,
-                          int *eof, void *data)
+static int lprocfs_mds_rd_mntdev(char *page, char **start, off_t off, int count,
+                                 int *eof, void *data)
 {
         struct obd_device* obd = (struct obd_device *)data;
 
         LASSERT(obd != NULL);
         LASSERT(obd->u.mds.mds_vfsmnt->mnt_devname);
         *eof = 1;
 {
         struct obd_device* obd = (struct obd_device *)data;
 
         LASSERT(obd != NULL);
         LASSERT(obd->u.mds.mds_vfsmnt->mnt_devname);
         *eof = 1;
-        return snprintf(page, count, "%s\n",
-                        obd->u.mds.mds_vfsmnt->mnt_devname);
+
+        return snprintf(page, count, "%s\n",obd->u.mds.mds_vfsmnt->mnt_devname);
 }
 
 struct lprocfs_vars lprocfs_mds_obd_vars[] = {
 }
 
 struct lprocfs_vars lprocfs_mds_obd_vars[] = {
-        { "uuid",       lprocfs_rd_uuid, 0, 0 },
-        { "blocksize",  rd_blksize,      0, 0 },
-        { "kbytestotal",rd_kbytestotal,  0, 0 },
-        { "kbytesfree", rd_kbytesfree,   0, 0 },
-        { "fstype",     rd_fstype,       0, 0 },
-        { "filestotal", rd_filestotal,   0, 0 },
-        { "filesfree",  rd_filesfree,    0, 0 },
-        { "filegroups", rd_filegroups,   0, 0 },
-        { "mntdev",     lprocfs_mds_rd_mntdev,    0, 0 },
+        { "uuid",         lprocfs_rd_uuid,        0, 0 },
+        { "blocksize",    lprocfs_rd_blksize,     0, 0 },
+        { "kbytestotal",  lprocfs_rd_kbytestotal, 0, 0 },
+        { "kbytesfree",   lprocfs_rd_kbytesfree,  0, 0 },
+        { "fstype",       lprocfs_rd_fstype,      0, 0 },
+        { "filestotal",   lprocfs_rd_filestotal,  0, 0 },
+        { "filesfree",    lprocfs_rd_filesfree,   0, 0 },
+        //{ "filegroups",   lprocfs_rd_filegroups,  0, 0 },
+        { "mntdev",       lprocfs_mds_rd_mntdev,  0, 0 },
         { 0 }
 };
 
 struct lprocfs_vars lprocfs_mds_module_vars[] = {
         { 0 }
 };
 
 struct lprocfs_vars lprocfs_mds_module_vars[] = {
-        { "num_refs",   lprocfs_rd_numrefs, 0, 0 },
+        { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
         { 0 }
 };
 
 struct lprocfs_vars lprocfs_mdt_obd_vars[] = {
         { 0 }
 };
 
 struct lprocfs_vars lprocfs_mdt_obd_vars[] = {
-        { "uuid",       lprocfs_rd_uuid, 0, 0 },
+        { "uuid",         lprocfs_rd_uuid,        0, 0 },
         { 0 }
 };
 
 struct lprocfs_vars lprocfs_mdt_module_vars[] = {
         { 0 }
 };
 
 struct lprocfs_vars lprocfs_mdt_module_vars[] = {
-        { "num_refs",   lprocfs_rd_numrefs, 0, 0 },
+        { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
         { 0 }
 };
 
         { 0 }
 };
 
index cefc680..56346ca 100644 (file)
@@ -37,6 +37,9 @@
 #include <linux/obd_support.h>
 #include <linux/lustre_lib.h>
 #include <linux/lustre_fsfilt.h>
 #include <linux/obd_support.h>
 #include <linux/lustre_lib.h>
 #include <linux/lustre_fsfilt.h>
+#include <portals/list.h>
+
+#include "mds_internal.h"
 
 /* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
 #define MDS_MAX_CLIENTS (PAGE_SIZE * 8)
 
 /* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
 #define MDS_MAX_CLIENTS (PAGE_SIZE * 8)
  * we know its offset.
  */
 int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
  * we know its offset.
  */
 int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
-                   struct mds_export_data *med, int cl_off)
+                   struct mds_export_data *med, int cl_idx)
 {
         unsigned long *bitmap = mds->mds_client_bitmap;
 {
         unsigned long *bitmap = mds->mds_client_bitmap;
-        int new_client = (cl_off == -1);
+        int new_client = (cl_idx == -1);
 
         LASSERT(bitmap != NULL);
 
 
         LASSERT(bitmap != NULL);
 
@@ -61,39 +64,40 @@ int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
         if (!strcmp(med->med_mcd->mcd_uuid, "OBD_CLASS_UUID"))
                 RETURN(0);
 
         if (!strcmp(med->med_mcd->mcd_uuid, "OBD_CLASS_UUID"))
                 RETURN(0);
 
-        /* the bitmap operations can handle cl_off > sizeof(long) * 8, so
+        /* the bitmap operations can handle cl_idx > sizeof(long) * 8, so
          * there's no need for extra complication here
          */
         if (new_client) {
          * there's no need for extra complication here
          */
         if (new_client) {
-                cl_off = find_first_zero_bit(bitmap, MDS_MAX_CLIENTS);
+                cl_idx = find_first_zero_bit(bitmap, MDS_MAX_CLIENTS);
         repeat:
         repeat:
-                if (cl_off >= MDS_MAX_CLIENTS) {
+                if (cl_idx >= MDS_MAX_CLIENTS) {
                         CERROR("no room for clients - fix MDS_MAX_CLIENTS\n");
                         return -ENOMEM;
                 }
                         CERROR("no room for clients - fix MDS_MAX_CLIENTS\n");
                         return -ENOMEM;
                 }
-                if (test_and_set_bit(cl_off, bitmap)) {
+                if (test_and_set_bit(cl_idx, bitmap)) {
                         CERROR("MDS client %d: found bit is set in bitmap\n",
                         CERROR("MDS client %d: found bit is set in bitmap\n",
-                               cl_off);
-                        cl_off = find_next_zero_bit(bitmap, MDS_MAX_CLIENTS,
-                                                    cl_off);
+                               cl_idx);
+                        cl_idx = find_next_zero_bit(bitmap, MDS_MAX_CLIENTS,
+                                                    cl_idx);
                         goto repeat;
                 }
         } else {
                         goto repeat;
                 }
         } else {
-                if (test_and_set_bit(cl_off, bitmap)) {
+                if (test_and_set_bit(cl_idx, bitmap)) {
                         CERROR("MDS client %d: bit already set in bitmap!!\n",
                         CERROR("MDS client %d: bit already set in bitmap!!\n",
-                               cl_off);
+                               cl_idx);
                         LBUG();
                 }
         }
 
                         LBUG();
                 }
         }
 
-        CDEBUG(D_INFO, "client at offset %d with UUID '%s' added\n",
-               cl_off, med->med_mcd->mcd_uuid);
+        CDEBUG(D_INFO, "client at index %d with UUID '%s' added\n",
+               cl_idx, med->med_mcd->mcd_uuid);
 
 
-        med->med_off = cl_off;
+        med->med_idx = cl_idx;
+        med->med_off = MDS_LR_CLIENT_START + (cl_idx * MDS_LR_CLIENT_SIZE);
 
         if (new_client) {
                 struct obd_run_ctxt saved;
 
         if (new_client) {
                 struct obd_run_ctxt saved;
-                loff_t off = MDS_LR_CLIENT + (cl_off * MDS_LR_SIZE);
+                loff_t off = med->med_off;
                 ssize_t written;
                 void *handle;
 
                 ssize_t written;
                 void *handle;
 
@@ -114,14 +118,16 @@ int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
                  * could use any of them, or maybe an FSFILT_OP_NONE is best?
                  */
                 handle = fsfilt_start(obd,mds->mds_rcvd_filp->f_dentry->d_inode,
                  * could use any of them, or maybe an FSFILT_OP_NONE is best?
                  */
                 handle = fsfilt_start(obd,mds->mds_rcvd_filp->f_dentry->d_inode,
-                                      FSFILT_OP_SETATTR);
+                                      FSFILT_OP_SETATTR, NULL);
                 if (IS_ERR(handle)) {
                         written = PTR_ERR(handle);
                         CERROR("unable to start transaction: rc %d\n",
                                (int)written);
                 } else {
                 if (IS_ERR(handle)) {
                         written = PTR_ERR(handle);
                         CERROR("unable to start transaction: rc %d\n",
                                (int)written);
                 } else {
-                        written = lustre_fwrite(mds->mds_rcvd_filp,med->med_mcd,
-                                                sizeof(*med->med_mcd), &off);
+                        written = fsfilt_write_record(obd, mds->mds_rcvd_filp,
+                                                      (char *)med->med_mcd,
+                                                      sizeof(*med->med_mcd),
+                                                      &off);
                         fsfilt_commit(obd,mds->mds_rcvd_filp->f_dentry->d_inode,
                                       handle, 0);
                 }
                         fsfilt_commit(obd,mds->mds_rcvd_filp->f_dentry->d_inode,
                                       handle, 0);
                 }
@@ -132,8 +138,8 @@ int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
                                 RETURN(written);
                         RETURN(-EIO);
                 }
                                 RETURN(written);
                         RETURN(-EIO);
                 }
-                CDEBUG(D_INFO, "wrote client mcd at off %u (len %u)\n",
-                       MDS_LR_CLIENT + (cl_off * MDS_LR_SIZE),
+                CDEBUG(D_INFO, "wrote client mcd at idx %u off %llu (len %u)\n",
+                       med->med_idx, med->med_off,
                        (unsigned int)sizeof(*med->med_mcd));
         }
         return 0;
                        (unsigned int)sizeof(*med->med_mcd));
         }
         return 0;
@@ -143,11 +149,11 @@ int mds_client_free(struct obd_export *exp)
 {
         struct mds_export_data *med = &exp->exp_mds_data;
         struct mds_obd *mds = &exp->exp_obd->u.mds;
 {
         struct mds_export_data *med = &exp->exp_mds_data;
         struct mds_obd *mds = &exp->exp_obd->u.mds;
+        struct obd_device *obd = exp->exp_obd;
         struct mds_client_data zero_mcd;
         struct obd_run_ctxt saved;
         int written;
         unsigned long *bitmap = mds->mds_client_bitmap;
         struct mds_client_data zero_mcd;
         struct obd_run_ctxt saved;
         int written;
         unsigned long *bitmap = mds->mds_client_bitmap;
-        loff_t off;
 
         LASSERT(bitmap);
         if (!med->med_mcd)
 
         LASSERT(bitmap);
         if (!med->med_mcd)
@@ -157,30 +163,29 @@ int mds_client_free(struct obd_export *exp)
         if (!strcmp(med->med_mcd->mcd_uuid, "OBD_CLASS_UUID"))
                 GOTO(free_and_out, 0);
 
         if (!strcmp(med->med_mcd->mcd_uuid, "OBD_CLASS_UUID"))
                 GOTO(free_and_out, 0);
 
-        off = MDS_LR_CLIENT + (med->med_off * MDS_LR_SIZE);
-
-        CDEBUG(D_INFO, "freeing client at offset %u (%lld)with UUID '%s'\n",
-               med->med_off, off, med->med_mcd->mcd_uuid);
+        CDEBUG(D_INFO, "freeing client at index %u (%lld)with UUID '%s'\n",
+               med->med_idx, med->med_off, med->med_mcd->mcd_uuid);
 
 
-        if (!test_and_clear_bit(med->med_off, bitmap)) {
+        if (!test_and_clear_bit(med->med_idx, bitmap)) {
                 CERROR("MDS client %u: bit already clear in bitmap!!\n",
                 CERROR("MDS client %u: bit already clear in bitmap!!\n",
-                       med->med_off);
+                       med->med_idx);
                 LBUG();
         }
 
         memset(&zero_mcd, 0, sizeof zero_mcd);
         push_ctxt(&saved, &mds->mds_ctxt, NULL);
                 LBUG();
         }
 
         memset(&zero_mcd, 0, sizeof zero_mcd);
         push_ctxt(&saved, &mds->mds_ctxt, NULL);
-        written = lustre_fwrite(mds->mds_rcvd_filp, (const char *)&zero_mcd,
-                                sizeof(zero_mcd), &off);
+        written = fsfilt_write_record(obd, mds->mds_rcvd_filp,
+                                      (char *)&zero_mcd, sizeof(zero_mcd),
+                                      &med->med_off);
         pop_ctxt(&saved, &mds->mds_ctxt, NULL);
 
         if (written != sizeof(zero_mcd)) {
         pop_ctxt(&saved, &mds->mds_ctxt, NULL);
 
         if (written != sizeof(zero_mcd)) {
-                CERROR("error zeroing out client %s off %d in %s: %d\n",
-                       med->med_mcd->mcd_uuid, med->med_off, LAST_RCVD,
+                CERROR("error zeroing out client %s index %d in %s: %d\n",
+                       med->med_mcd->mcd_uuid, med->med_idx, LAST_RCVD,
                        written);
         } else {
                 CDEBUG(D_INFO, "zeroed out disconnecting client %s at off %d\n",
                        written);
         } else {
                 CDEBUG(D_INFO, "zeroed out disconnecting client %s at off %d\n",
-                       med->med_mcd->mcd_uuid, med->med_off);
+                       med->med_mcd->mcd_uuid, med->med_idx);
         }
 
  free_and_out:
         }
 
  free_and_out:
@@ -199,20 +204,20 @@ static int mds_server_free_data(struct mds_obd *mds)
         return 0;
 }
 
         return 0;
 }
 
-static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
+static int mds_read_last_rcvd(struct obd_device *obd, struct file *file)
 {
 {
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
         struct mds_server_data *msd;
         struct mds_client_data *mcd = NULL;
         loff_t off = 0;
         struct mds_server_data *msd;
         struct mds_client_data *mcd = NULL;
         loff_t off = 0;
-        int cl_off;
-        unsigned long last_rcvd_size = f->f_dentry->d_inode->i_size;
+        int cl_idx;
+        unsigned long last_rcvd_size = file->f_dentry->d_inode->i_size;
         __u64 last_transno = 0;
         __u64 last_transno = 0;
-        __u64 last_mount;
+        __u64 mount_count;
         int rc = 0;
 
         int rc = 0;
 
-        LASSERT(sizeof(struct mds_client_data) == MDS_LR_SIZE);
-        LASSERT(sizeof(struct mds_server_data) <= MDS_LR_CLIENT);
+        LASSERT(sizeof(struct mds_client_data) == MDS_LR_CLIENT_SIZE);
+        LASSERT(sizeof(struct mds_server_data) <= MDS_LR_SERVER_SIZE);
 
         OBD_ALLOC(msd, sizeof(*msd));
         if (!msd)
 
         OBD_ALLOC(msd, sizeof(*msd));
         if (!msd)
@@ -225,40 +230,71 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
                 RETURN(-ENOMEM);
         }
 
                 RETURN(-ENOMEM);
         }
 
-        rc = lustre_fread(f, (char *)msd, sizeof(*msd), &off);
-
         mds->mds_server_data = msd;
         mds->mds_server_data = msd;
-        if (rc == 0) {
-                CERROR("%s: empty MDS %s, new MDS?\n", obddev->obd_name,
-                       LAST_RCVD);
+
+        if (last_rcvd_size == 0) {
+                CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD);
+                memcpy(msd->msd_uuid, obd->obd_uuid.uuid,sizeof(msd->msd_uuid));
+                msd->msd_server_size = cpu_to_le32(MDS_LR_SERVER_SIZE);
+                msd->msd_client_start = cpu_to_le32(MDS_LR_CLIENT_START);
+                msd->msd_client_size = cpu_to_le16(MDS_LR_CLIENT_SIZE);
+
                 RETURN(0);
         }
 
                 RETURN(0);
         }
 
+        rc = fsfilt_read_record(obd, file, (char *)msd, sizeof(*msd), &off);
+
         if (rc != sizeof(*msd)) {
         if (rc != sizeof(*msd)) {
-                CERROR("error reading MDS %s: rc = %d\n", LAST_RCVD, rc);
+                CERROR("error reading MDS %s: rc = %d\n", LAST_RCVD,rc);
                 if (rc > 0)
                         rc = -EIO;
                 GOTO(err_msd, rc);
         }
                 if (rc > 0)
                         rc = -EIO;
                 GOTO(err_msd, rc);
         }
+        if (!msd->msd_server_size)
+                msd->msd_server_size = cpu_to_le32(MDS_LR_SERVER_SIZE);
+        if (!msd->msd_client_start)
+                msd->msd_client_start = cpu_to_le32(MDS_LR_CLIENT_START);
+        if (!msd->msd_client_size)
+                msd->msd_client_size = cpu_to_le16(MDS_LR_CLIENT_SIZE);
+
+        if (msd->msd_feature_incompat) {
+                CERROR("unsupported incompat feature %x\n",
+                       le32_to_cpu(msd->msd_feature_incompat));
+                GOTO(err_msd, rc = -EINVAL);
+        }
+        if (msd->msd_feature_rocompat) {
+                CERROR("unsupported read-only feature %x\n",
+                       le32_to_cpu(msd->msd_feature_rocompat));
+                /* Do something like remount filesystem read-only */
+                GOTO(err_msd, rc = -EINVAL);
+        }
 
 
-        CDEBUG(D_INODE, "last_rcvd has size %lu (msd + %lu clients)\n",
-               last_rcvd_size, (last_rcvd_size - MDS_LR_CLIENT)/MDS_LR_SIZE);
-
-        /*
-         * When we do a clean MDS shutdown, we save the last_transno into
-         * the header.
-         */
         last_transno = le64_to_cpu(msd->msd_last_transno);
         mds->mds_last_transno = last_transno;
         last_transno = le64_to_cpu(msd->msd_last_transno);
         mds->mds_last_transno = last_transno;
-        CDEBUG(D_INODE, "got "LPU64" for server last_rcvd value\n",
-               last_transno);
-
-        last_mount = le64_to_cpu(msd->msd_mount_count);
-        mds->mds_mount_count = last_mount;
-        CDEBUG(D_INODE, "got "LPU64" for server last_mount value\n",last_mount);
 
 
-        /* off is adjusted by lustre_fread, so we don't adjust it in the loop */
-        for (off = MDS_LR_CLIENT, cl_off = 0; off < last_rcvd_size; cl_off++) {
+        mount_count = le64_to_cpu(msd->msd_mount_count);
+        mds->mds_mount_count = mount_count;
+
+        CDEBUG(D_INODE, "%s: server last_transno: "LPU64"\n",
+               obd->obd_name, last_transno);
+        CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n",
+               obd->obd_name, mount_count);
+        CDEBUG(D_INODE, "%s: server data size: %u\n",
+               obd->obd_name, le32_to_cpu(msd->msd_server_size));
+        CDEBUG(D_INODE, "%s: per-client data start: %u\n",
+               obd->obd_name, le32_to_cpu(msd->msd_client_start));
+        CDEBUG(D_INODE, "%s: per-client data size: %u\n",
+               obd->obd_name, le32_to_cpu(msd->msd_client_size));
+        CDEBUG(D_INODE, "%s: last_rcvd size: %lu\n",
+               obd->obd_name, last_rcvd_size);
+        CDEBUG(D_INODE, "%s: last_rcvd clients: %lu\n", obd->obd_name,
+               (last_rcvd_size - MDS_LR_CLIENT_START) / MDS_LR_CLIENT_SIZE);
+
+        /* When we do a clean FILTER shutdown, we save the last_transno into
+         * the header.  If we find clients with higher last_transno values
+         * then those clients may need recovery done. */
+        for (cl_idx = 0; off < last_rcvd_size; cl_idx++) {
+                __u64 last_transno;
                 int mount_age;
 
                 if (!mcd) {
                 int mount_age;
 
                 if (!mcd) {
@@ -267,10 +303,16 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
                                 GOTO(err_msd, rc = -ENOMEM);
                 }
 
                                 GOTO(err_msd, rc = -ENOMEM);
                 }
 
-                rc = lustre_fread(f, (char *)mcd, sizeof(*mcd), &off);
+                /* Don't assume off is incremented properly, in case
+                 * sizeof(fsd) isn't the same as fsd->fsd_client_size.
+                 */
+                off = le32_to_cpu(msd->msd_client_start) +
+                        cl_idx * le16_to_cpu(msd->msd_client_size);
+                rc = fsfilt_read_record(obd, file, (char *)mcd,
+                                        sizeof(*mcd), &off);
                 if (rc != sizeof(*mcd)) {
                         CERROR("error reading MDS %s offset %d: rc = %d\n",
                 if (rc != sizeof(*mcd)) {
                         CERROR("error reading MDS %s offset %d: rc = %d\n",
-                               LAST_RCVD, cl_off, rc);
+                               LAST_RCVD, cl_idx, rc);
                         if (rc > 0) /* XXX fatal error or just abort reading? */
                                 rc = -EIO;
                         break;
                         if (rc > 0) /* XXX fatal error or just abort reading? */
                                 rc = -EIO;
                         break;
@@ -278,7 +320,7 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
 
                 if (mcd->mcd_uuid[0] == '\0') {
                         CDEBUG(D_INFO, "skipping zeroed client at offset %d\n",
 
                 if (mcd->mcd_uuid[0] == '\0') {
                         CDEBUG(D_INFO, "skipping zeroed client at offset %d\n",
-                               cl_off);
+                               cl_idx);
                         continue;
                 }
 
                         continue;
                 }
 
@@ -287,10 +329,15 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
                 /* These exports are cleaned up by mds_disconnect(), so they
                  * need to be set up like real exports as mds_connect() does.
                  */
                 /* These exports are cleaned up by mds_disconnect(), so they
                  * need to be set up like real exports as mds_connect() does.
                  */
-                mount_age = last_mount - le64_to_cpu(mcd->mcd_mount_count);
+                mount_age = mount_count - le64_to_cpu(mcd->mcd_mount_count);
                 if (mount_age < MDS_MOUNT_RECOV) {
                 if (mount_age < MDS_MOUNT_RECOV) {
-                        struct obd_export *exp = class_new_export(obddev);
+                        struct obd_export *exp = class_new_export(obd);
                         struct mds_export_data *med;
                         struct mds_export_data *med;
+                        CERROR("RCVRNG CLIENT uuid: %s off: %d lr: "LPU64
+                               "srv lr: "LPU64" mnt: "LPU64" last mount: "LPU64
+                               "\n", mcd->mcd_uuid, cl_idx,
+                               last_transno, le64_to_cpu(msd->msd_last_transno),
+                               le64_to_cpu(mcd->mcd_mount_count), mount_count);
 
                         if (!exp) {
                                 rc = -ENOMEM;
 
                         if (!exp) {
                                 rc = -ENOMEM;
@@ -301,35 +348,35 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
                                sizeof exp->exp_client_uuid.uuid);
                         med = &exp->exp_mds_data;
                         med->med_mcd = mcd;
                                sizeof exp->exp_client_uuid.uuid);
                         med = &exp->exp_mds_data;
                         med->med_mcd = mcd;
-                        mds_client_add(obddev, mds, med, cl_off);
+                        mds_client_add(obd, mds, med, cl_idx);
                         /* create helper if export init gets more complex */
                         INIT_LIST_HEAD(&med->med_open_head);
                         spin_lock_init(&med->med_open_lock);
 
                         mcd = NULL;
                         /* create helper if export init gets more complex */
                         INIT_LIST_HEAD(&med->med_open_head);
                         spin_lock_init(&med->med_open_lock);
 
                         mcd = NULL;
-                        obddev->obd_recoverable_clients++;
+                        obd->obd_recoverable_clients++;
                         class_export_put(exp);
                 } else {
                         CDEBUG(D_INFO, "discarded client %d, UUID '%s', count "
                         class_export_put(exp);
                 } else {
                         CDEBUG(D_INFO, "discarded client %d, UUID '%s', count "
-                               LPU64"\n", cl_off, mcd->mcd_uuid,
+                               LPU64"\n", cl_idx, mcd->mcd_uuid,
                                le64_to_cpu(mcd->mcd_mount_count));
                 }
 
                                le64_to_cpu(mcd->mcd_mount_count));
                 }
 
-                CDEBUG(D_OTHER, "client at offset %d has last_transno = %Lu\n",
-                       cl_off, (unsigned long long)last_transno);
+                CDEBUG(D_OTHER, "client at offset %d has last_transno = "
+                       LPU64"\n", cl_idx, last_transno);
 
                 if (last_transno > mds->mds_last_transno)
                         mds->mds_last_transno = last_transno;
         }
 
 
                 if (last_transno > mds->mds_last_transno)
                         mds->mds_last_transno = last_transno;
         }
 
-        obddev->obd_last_committed = mds->mds_last_transno;
-        if (obddev->obd_recoverable_clients) {
+        obd->obd_last_committed = mds->mds_last_transno;
+        if (obd->obd_recoverable_clients) {
                 CERROR("RECOVERY: %d recoverable clients, last_transno "
                        LPU64"\n",
                 CERROR("RECOVERY: %d recoverable clients, last_transno "
                        LPU64"\n",
-                       obddev->obd_recoverable_clients, mds->mds_last_transno);
-                obddev->obd_next_recovery_transno = obddev->obd_last_committed
+                       obd->obd_recoverable_clients, mds->mds_last_transno);
+                obd->obd_next_recovery_transno = obd->obd_last_committed
                         + 1;
                         + 1;
-                obddev->obd_recovering = 1;
+                obd->obd_recovering = 1;
         }
 
         if (mcd)
         }
 
         if (mcd)
@@ -342,12 +389,12 @@ err_msd:
         return rc;
 }
 
         return rc;
 }
 
-static int mds_fs_prep(struct obd_device *obddev)
+static int mds_fs_prep(struct obd_device *obd)
 {
 {
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
         struct obd_run_ctxt saved;
         struct dentry *dentry;
         struct obd_run_ctxt saved;
         struct dentry *dentry;
-        struct file *f;
+        struct file *file;
         int rc;
 
         push_ctxt(&saved, &mds->mds_ctxt, NULL);
         int rc;
 
         push_ctxt(&saved, &mds->mds_ctxt, NULL);
@@ -373,46 +420,76 @@ static int mds_fs_prep(struct obd_device *obddev)
         }
         mds->mds_fid_de = dentry;
 
         }
         mds->mds_fid_de = dentry;
 
-        f = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0644);
-        if (IS_ERR(f)) {
-                rc = PTR_ERR(f);
+        dentry = simple_mkdir(current->fs->pwd, "PENDING", 0777);
+        if (IS_ERR(dentry)) {
+                rc = PTR_ERR(dentry);
+                CERROR("cannot create PENDING directory: rc = %d\n", rc);
+                GOTO(err_fid, rc);
+        }
+        mds->mds_pending_dir = dentry;
+
+        dentry = simple_mkdir(current->fs->pwd, "LOGS", 0700);
+        if (IS_ERR(dentry)) {
+                rc = PTR_ERR(dentry);
+                CERROR("cannot create LOGS directory: rc = %d\n", rc);
+                GOTO(err_pending, rc);
+        }
+        mds->mds_logs_dir = dentry;
+
+        file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0644);
+        if (IS_ERR(file)) {
+                rc = PTR_ERR(file);
                 CERROR("cannot open/create %s file: rc = %d\n", LAST_RCVD, rc);
                 CERROR("cannot open/create %s file: rc = %d\n", LAST_RCVD, rc);
-                GOTO(err_pop, rc = PTR_ERR(f));
+
+                GOTO(err_logs, rc = PTR_ERR(file));
         }
         }
-        if (!S_ISREG(f->f_dentry->d_inode->i_mode)) {
+        if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
                 CERROR("%s is not a regular file!: mode = %o\n", LAST_RCVD,
                 CERROR("%s is not a regular file!: mode = %o\n", LAST_RCVD,
-                       f->f_dentry->d_inode->i_mode);
+                       file->f_dentry->d_inode->i_mode);
                 GOTO(err_filp, rc = -ENOENT);
         }
 
                 GOTO(err_filp, rc = -ENOENT);
         }
 
-        rc = fsfilt_journal_data(obddev, f);
+        rc = fsfilt_journal_data(obd, file);
         if (rc) {
                 CERROR("cannot journal data on %s: rc = %d\n", LAST_RCVD, rc);
                 GOTO(err_filp, rc);
         }
 
         if (rc) {
                 CERROR("cannot journal data on %s: rc = %d\n", LAST_RCVD, rc);
                 GOTO(err_filp, rc);
         }
 
-        rc = mds_read_last_rcvd(obddev, f);
+        rc = mds_read_last_rcvd(obd, file);
         if (rc) {
                 CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
                 GOTO(err_client, rc);
         }
         if (rc) {
                 CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
                 GOTO(err_client, rc);
         }
-        mds->mds_rcvd_filp = f;
+        mds->mds_rcvd_filp = file;
+#ifdef I_SKIP_PDFLUSH
+        /*
+         * we need this to protect from deadlock
+         * pdflush vs. lustre_fwrite()
+         */
+        file->f_dentry->d_inode->i_flags |= I_SKIP_PDFLUSH;
+#endif
 err_pop:
         pop_ctxt(&saved, &mds->mds_ctxt, NULL);
 
         return rc;
 
 err_client:
 err_pop:
         pop_ctxt(&saved, &mds->mds_ctxt, NULL);
 
         return rc;
 
 err_client:
-        class_disconnect_exports(obddev, 0);
+        class_disconnect_exports(obd, 0);
 err_filp:
 err_filp:
-        if (filp_close(f, 0))
+        if (filp_close(file, 0))
                 CERROR("can't close %s after error\n", LAST_RCVD);
                 CERROR("can't close %s after error\n", LAST_RCVD);
+err_logs:
+        dput(mds->mds_logs_dir);
+err_pending:
+        dput(mds->mds_pending_dir);
+err_fid:
+        dput(mds->mds_fid_de);
         goto err_pop;
 }
 
         goto err_pop;
 }
 
-int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt)
+int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt)
 {
 {
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
         ENTRY;
 
         mds->mds_vfsmnt = mnt;
         ENTRY;
 
         mds->mds_vfsmnt = mnt;
@@ -421,21 +498,20 @@ int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt)
         mds->mds_ctxt.pwdmnt = mnt;
         mds->mds_ctxt.pwd = mnt->mnt_root;
         mds->mds_ctxt.fs = get_ds();
         mds->mds_ctxt.pwdmnt = mnt;
         mds->mds_ctxt.pwd = mnt->mnt_root;
         mds->mds_ctxt.fs = get_ds();
-        RETURN(mds_fs_prep(obddev));
+        RETURN(mds_fs_prep(obd));
 }
 
 }
 
-int mds_fs_cleanup(struct obd_device *obddev, int failover)
+int mds_fs_cleanup(struct obd_device *obd, int flags)
 {
 {
-        struct mds_obd *mds = &obddev->u.mds;
+        struct mds_obd *mds = &obd->u.mds;
         struct obd_run_ctxt saved;
         int rc = 0;
 
         struct obd_run_ctxt saved;
         int rc = 0;
 
-        if (failover)
+        if (flags & OBD_OPT_FAILOVER)
                 CERROR("%s: shutting down for failover; client state will"
                 CERROR("%s: shutting down for failover; client state will"
-                       " be preserved.\n", obddev->obd_name);
+                       " be preserved.\n", obd->obd_name);
 
 
-        class_disconnect_exports(obddev, failover); /* this cleans up client
-                                                   info too */
+        class_disconnect_exports(obd, flags); /* cleans up client info too */
         mds_server_free_data(mds);
 
         push_ctxt(&saved, &mds->mds_ctxt, NULL);
         mds_server_free_data(mds);
 
         push_ctxt(&saved, &mds->mds_ctxt, NULL);
@@ -443,7 +519,15 @@ int mds_fs_cleanup(struct obd_device *obddev, int failover)
                 rc = filp_close(mds->mds_rcvd_filp, 0);
                 mds->mds_rcvd_filp = NULL;
                 if (rc)
                 rc = filp_close(mds->mds_rcvd_filp, 0);
                 mds->mds_rcvd_filp = NULL;
                 if (rc)
-                        CERROR("last_rcvd file won't close, rc=%d\n", rc);
+                        CERROR("%s file won't close, rc=%d\n", LAST_RCVD, rc);
+        }
+        if (mds->mds_logs_dir) {
+                l_dput(mds->mds_logs_dir);
+                mds->mds_logs_dir = NULL;
+        }
+        if (mds->mds_pending_dir) {
+                l_dput(mds->mds_pending_dir);
+                mds->mds_pending_dir = NULL;
         }
         pop_ctxt(&saved, &mds->mds_ctxt, NULL);
         shrink_dcache_parent(mds->mds_fid_de);
         }
         pop_ctxt(&saved, &mds->mds_ctxt, NULL);
         shrink_dcache_parent(mds->mds_fid_de);
@@ -451,3 +535,233 @@ int mds_fs_cleanup(struct obd_device *obddev, int failover)
 
         return rc;
 }
 
         return rc;
 }
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+int mds_log_close(struct llog_handle *cathandle, struct llog_handle *loghandle)
+{
+        struct llog_object_hdr *llh = loghandle->lgh_hdr;
+        struct mds_obd *mds = &cathandle->lgh_obd->u.mds;
+        struct dentry *dchild = NULL;
+        int rc;
+        ENTRY;
+
+        /* If we are going to delete this log, grab a ref before we close
+         * it so we don't have to immediately do another lookup.
+         */
+        if (llh->llh_hdr.lth_type != LLOG_CATALOG_MAGIC && llh->llh_count == 0){
+                CDEBUG(D_INODE, "deleting log file "LPX64":%x\n",
+                       loghandle->lgh_cookie.lgc_lgl.lgl_oid,
+                       loghandle->lgh_cookie.lgc_lgl.lgl_ogen);
+                down(&mds->mds_logs_dir->d_inode->i_sem);
+                dchild = dget(loghandle->lgh_file->f_dentry);
+                llog_delete_log(cathandle, loghandle);
+        } else {
+                CDEBUG(D_INODE, "closing log file "LPX64":%x\n",
+                       loghandle->lgh_cookie.lgc_lgl.lgl_oid,
+                       loghandle->lgh_cookie.lgc_lgl.lgl_ogen);
+        }
+
+        rc = filp_close(loghandle->lgh_file, 0);
+
+        llog_free_handle(loghandle); /* also removes loghandle from list */
+
+        if (dchild) {
+                int err = vfs_unlink(mds->mds_logs_dir->d_inode, dchild);
+                if (err) {
+                        CERROR("error unlinking empty log %*s: rc %d\n",
+                               dchild->d_name.len, dchild->d_name.name, err);
+                        if (!rc)
+                                rc = err;
+                }
+                l_dput(dchild);
+                up(&mds->mds_logs_dir->d_inode->i_sem);
+        }
+        RETURN(rc);
+}
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+struct llog_handle *mds_log_open(struct obd_device *obd,
+                                 struct llog_cookie *logcookie)
+{
+        struct ll_fid fid = { .id = logcookie->lgc_lgl.lgl_oid,
+                              .generation = logcookie->lgc_lgl.lgl_ogen,
+                              .f_type = S_IFREG };
+        struct llog_handle *loghandle;
+        struct dentry *dchild;
+        int rc;
+        ENTRY;
+
+        loghandle = llog_alloc_handle();
+        if (loghandle == NULL)
+                RETURN(ERR_PTR(-ENOMEM));
+
+        down(&obd->u.mds.mds_logs_dir->d_inode->i_sem);
+        dchild = mds_fid2dentry(&obd->u.mds, &fid, NULL);
+        up(&obd->u.mds.mds_logs_dir->d_inode->i_sem);
+        if (IS_ERR(dchild)) {
+                rc = PTR_ERR(dchild);
+                CERROR("error looking up log file "LPX64":%x: rc %d\n",
+                       fid.id, fid.generation, rc);
+                GOTO(out, rc);
+        }
+
+        if (dchild->d_inode == NULL) {
+                rc = -ENOENT;
+                CERROR("nonexistent log file "LPX64":%x: rc %d\n",
+                       fid.id, fid.generation, rc);
+                GOTO(out_put, rc);
+        }
+
+        /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
+        mntget(obd->u.mds.mds_vfsmnt);
+        loghandle->lgh_file = dentry_open(dchild, obd->u.mds.mds_vfsmnt,
+                                          O_RDWR | O_LARGEFILE);
+        if (IS_ERR(loghandle->lgh_file)) {
+                rc = PTR_ERR(loghandle->lgh_file);
+                CERROR("error opening logfile "LPX64":%x: rc %d\n",
+                       fid.id, fid.generation, rc);
+                GOTO(out, rc);
+        }
+        memcpy(&loghandle->lgh_cookie, logcookie, sizeof(*logcookie));
+        loghandle->lgh_log_create = mds_log_create;
+        loghandle->lgh_log_open = mds_log_open;
+        loghandle->lgh_log_close = mds_log_close;
+        loghandle->lgh_obd = obd;
+
+        RETURN(loghandle);
+
+out_put:
+        l_dput(dchild);
+out:
+        llog_free_handle(loghandle);
+        return ERR_PTR(rc);
+}
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+struct llog_handle *mds_log_create(struct obd_device *obd)
+{
+        char logbuf[24], *logname; /* logSSSSSSSSSS.count */
+        struct llog_handle *loghandle;
+        int rc, open_flags = O_RDWR | O_CREAT | O_LARGEFILE;
+        ENTRY;
+
+        loghandle = llog_alloc_handle();
+        if (!loghandle)
+                RETURN(ERR_PTR(-ENOMEM));
+
+retry:
+        if (!obd->u.mds.mds_catalog) {
+                logname = "LOGS/catalog";
+        } else {
+                sprintf(logbuf, "LOGS/log%lu.%u\n",
+                        CURRENT_SECONDS, obd->u.mds.mds_catalog->lgh_index++);
+                open_flags |= O_EXCL;
+                logname = logbuf;
+        }
+        loghandle->lgh_file = filp_open(logname, open_flags, 0644);
+        if (IS_ERR(loghandle->lgh_file)) {
+                rc = PTR_ERR(loghandle->lgh_file);
+                if (rc == -EEXIST) {
+                        CDEBUG(D_HA, "collision in logfile %s creation\n",
+                               logname);
+                        obd->u.mds.mds_catalog->lgh_index++;
+                        goto retry;
+                }
+                CERROR("error opening/creating %s: rc %d\n", logname, rc);
+                GOTO(out_handle, rc);
+        }
+
+        loghandle->lgh_cookie.lgc_lgl.lgl_oid =
+                loghandle->lgh_file->f_dentry->d_inode->i_ino;
+        loghandle->lgh_cookie.lgc_lgl.lgl_ogen =
+                loghandle->lgh_file->f_dentry->d_inode->i_generation;
+        loghandle->lgh_log_create = mds_log_create;
+        loghandle->lgh_log_open = mds_log_open;
+        loghandle->lgh_log_close = mds_log_close;
+        loghandle->lgh_obd = obd;
+
+        RETURN(loghandle);
+
+out_handle:
+        llog_free_handle(loghandle);
+        return ERR_PTR(rc);
+}
+
+struct llog_handle *mds_get_catalog(struct obd_device *obd)
+{
+        struct mds_server_data *msd = obd->u.mds.mds_server_data;
+        struct obd_run_ctxt saved;
+        struct llog_handle *cathandle = NULL;
+        int rc = 0;
+        ENTRY;
+
+        push_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+
+        if (msd->msd_catalog_oid) {
+                struct llog_cookie catcookie;
+
+                catcookie.lgc_lgl.lgl_oid = le64_to_cpu(msd->msd_catalog_oid);
+                catcookie.lgc_lgl.lgl_ogen = le32_to_cpu(msd->msd_catalog_ogen);
+                cathandle = mds_log_open(obd, &catcookie);
+                if (IS_ERR(cathandle)) {
+                        CERROR("error opening catalog "LPX64":%x: rc %d\n",
+                               catcookie.lgc_lgl.lgl_oid,
+                               catcookie.lgc_lgl.lgl_ogen,
+                               (int)PTR_ERR(cathandle));
+                        msd->msd_catalog_oid = 0;
+                        msd->msd_catalog_ogen = 0;
+                }
+                /* ORPHANS FIXME: compare catalog UUID to msd_peeruuid */
+        }
+
+        if (!msd->msd_catalog_oid) {
+                struct llog_logid *lgl;
+
+                cathandle = mds_log_create(obd);
+                if (IS_ERR(cathandle)) {
+                        CERROR("error creating new catalog: rc %d\n",
+                               (int)PTR_ERR(cathandle));
+                        GOTO(out, cathandle);
+                }
+                lgl = &cathandle->lgh_cookie.lgc_lgl;
+                msd->msd_catalog_oid = cpu_to_le64(lgl->lgl_oid);
+                msd->msd_catalog_ogen = cpu_to_le32(lgl->lgl_ogen);
+                rc = mds_update_server_data(obd);
+                if (rc) {
+                        CERROR("error writing new catalog to disk: rc %d\n",rc);
+                        GOTO(out_handle, rc);
+                }
+        }
+
+        rc = llog_init_catalog(cathandle, &obd->u.mds.mds_osc_uuid);
+
+out:
+        pop_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+        RETURN(cathandle);
+
+out_handle:
+        mds_log_close(cathandle, cathandle);
+        cathandle = ERR_PTR(rc);
+        goto out;
+
+}
+
+void mds_put_catalog(struct llog_handle *cathandle)
+{
+        struct llog_handle *loghandle, *n;
+        int rc;
+        ENTRY;
+
+        list_for_each_entry_safe(loghandle, n, &cathandle->lgh_list, lgh_list)
+                mds_log_close(cathandle, loghandle);
+
+        rc = filp_close(cathandle->lgh_file, 0);
+        if (rc)
+                CERROR("error closing catalog: rc %d\n", rc);
+
+        llog_free_handle(cathandle);
+        EXIT;
+}
index 0b62a92..c2d3d77 100644 (file)
@@ -1,9 +1,41 @@
+#ifndef _MDS_INTERNAL_H
+#define _MDS_INTERNAL_H
+static inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req)
+{
+        return &req->rq_export->exp_obd->u.mds;
+}
+
+/* mds/mds_fs.c */
+struct llog_handle *mds_log_create(struct obd_device *obd);
+int mds_log_close(struct llog_handle *cathandle, struct llog_handle *loghandle);
+struct llog_handle *mds_log_open(struct obd_device *obd,
+                                 struct llog_cookie *logcookie);
+struct llog_handle *mds_get_catalog(struct obd_device *obd);
+void mds_put_catalog(struct llog_handle *cathandle);
+
+/* mds/handler.c */
 struct mds_file_data *mds_mfd_new(void);
 void mds_mfd_put(struct mds_file_data *mfd);
 void mds_mfd_destroy(struct mds_file_data *mfd);
 struct mds_file_data *mds_mfd_new(void);
 void mds_mfd_put(struct mds_file_data *mfd);
 void mds_mfd_destroy(struct mds_file_data *mfd);
+
+/* mds/mds_reint.c */
+void mds_commit_cb(struct obd_device *, __u64 last_rcvd, void *data, int error);
+int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
+                       struct ptlrpc_request *req, int rc, __u32 op_data);
+
+/* mds/mds_lib.c */
 int mds_update_unpack(struct ptlrpc_request *, int offset,
                       struct mds_update_record *);
 
 int mds_update_unpack(struct ptlrpc_request *, int offset,
                       struct mds_update_record *);
 
+/* mds/mds_lov.c */
+int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,
+                    struct obd_uuid *uuidarray);
+
+/* mds/mds_open.c */
+int mds_open(struct mds_update_record *rec, int offset,
+             struct ptlrpc_request *req, struct lustre_handle *);
+int mds_pin(struct ptlrpc_request *req);
+
 /* mds/mds_fs.c */
 int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
                   struct mds_export_data *med, int cl_off);
 /* mds/mds_fs.c */
 int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
                   struct mds_export_data *med, int cl_off);
@@ -13,3 +45,5 @@ int mds_client_free(struct obd_export *exp);
 void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode);
 void mds_pack_inode2body(struct mds_body *body, struct inode *inode);
 #endif
 void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode);
 void mds_pack_inode2body(struct mds_body *body, struct inode *inode);
 #endif
+
+#endif /* _MDS_INTERNAL_H */
index 8f16795..93ac300 100644 (file)
@@ -57,17 +57,15 @@ void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode)
         fid->f_type = (S_IFMT & inode->i_mode);
 }
 
         fid->f_type = (S_IFMT & inode->i_mode);
 }
 
+/* Note that we can copy all of the fields, just some will not be "valid" */
 void mds_pack_inode2body(struct mds_body *b, struct inode *inode)
 {
 void mds_pack_inode2body(struct mds_body *b, struct inode *inode)
 {
-        b->valid = OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME |
-                OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
-                OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
-                OBD_MD_FLNLINK | OBD_MD_FLGENER;
+        b->valid = OBD_MD_FLID | OBD_MD_FLCTIME | OBD_MD_FLUID | OBD_MD_FLGID |
+                OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLGENER;
 
 
-        /* The MDS file size isn't authoritative for regular files, so don't
-         * even pretend. */
-        if (S_ISREG(inode->i_mode))
-                b->valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
+        if (!S_ISREG(inode->i_mode))
+                b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLATIME |
+                            OBD_MD_FLMTIME;
 
         b->ino = inode->i_ino;
         b->atime = LTIME_S(inode->i_atime);
 
         b->ino = inode->i_ino;
         b->atime = LTIME_S(inode->i_atime);
@@ -80,10 +78,12 @@ void mds_pack_inode2body(struct mds_body *b, struct inode *inode)
         b->gid = inode->i_gid;
         b->flags = inode->i_flags;
         b->rdev = b->rdev;
         b->gid = inode->i_gid;
         b->flags = inode->i_flags;
         b->rdev = b->rdev;
-        b->nlink = inode->i_nlink;
+        /* Return the correct link count for orphan inodes */
+        b->nlink = mds_inode_is_orphan(inode) ? 0 : inode->i_nlink;
         b->generation = inode->i_generation;
         b->suppgid = -1;
 }
         b->generation = inode->i_generation;
         b->suppgid = -1;
 }
+
 /* unpacking */
 static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
                               struct mds_update_record *r)
 /* unpacking */
 static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
                               struct mds_update_record *r)
@@ -92,8 +92,8 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
         struct mds_rec_setattr *rec;
         ENTRY;
 
         struct mds_rec_setattr *rec;
         ENTRY;
 
-        rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
-                                  lustre_swab_mds_rec_setattr);
+        rec = lustre_swab_reqbuf(req, offset, sizeof(*rec),
+                                 lustre_swab_mds_rec_setattr);
         if (rec == NULL)
                 RETURN (-EFAULT);
 
         if (rec == NULL)
                 RETURN (-EFAULT);
 
@@ -120,9 +120,14 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
                 if (r->ur_eadata == NULL)
                         RETURN (-EFAULT);
                 r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 1];
                 if (r->ur_eadata == NULL)
                         RETURN (-EFAULT);
                 r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 1];
-        } else {
-                r->ur_eadata = NULL;
-                r->ur_eadatalen = 0;
+        }
+
+        if (req->rq_reqmsg->bufcount > offset + 2) {
+                r->ur_logcookies = lustre_msg_buf(req->rq_reqmsg, offset + 2,0);
+                if (r->ur_eadata == NULL)
+                        RETURN (-EFAULT);
+
+                r->ur_cookielen = req->rq_reqmsg->buflens[offset + 2];
         }
 
         RETURN(0);
         }
 
         RETURN(0);
@@ -172,9 +177,6 @@ static int mds_create_unpack(struct ptlrpc_request *req, int offset,
                 if (r->ur_tgt == NULL)
                         RETURN (-EFAULT);
                 r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2];
                 if (r->ur_tgt == NULL)
                         RETURN (-EFAULT);
                 r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2];
-        } else {
-                r->ur_tgt = NULL;
-                r->ur_tgtlen = 0;
         }
         RETURN(0);
 }
         }
         RETURN(0);
 }
index 02c53cc..ecca88c 100644 (file)
@@ -32,6 +32,9 @@
 #include <linux/obd_class.h>
 #include <linux/obd_lov.h>
 #include <linux/lustre_lib.h>
 #include <linux/obd_class.h>
 #include <linux/obd_lov.h>
 #include <linux/lustre_lib.h>
+#include <linux/lustre_fsfilt.h>
+
+#include "mds_internal.h"
 
 void le_lov_desc_to_cpu (struct lov_desc *ld)
 {
 
 void le_lov_desc_to_cpu (struct lov_desc *ld)
 {
@@ -141,6 +144,7 @@ int mds_set_lovdesc(struct obd_device *obd, struct lov_desc *desc,
         mds->mds_has_lov_desc = 1;
         /* XXX the MDS should not really know about this */
         mds->mds_max_mdsize = lov_mds_md_size(desc->ld_tgt_count);
         mds->mds_has_lov_desc = 1;
         /* XXX the MDS should not really know about this */
         mds->mds_max_mdsize = lov_mds_md_size(desc->ld_tgt_count);
+        mds->mds_max_cookiesize = desc->ld_tgt_count*sizeof(struct llog_cookie);
 
 out:
         pop_ctxt(&saved, &mds->mds_ctxt, NULL);
 
 out:
         pop_ctxt(&saved, &mds->mds_ctxt, NULL);
@@ -182,7 +186,8 @@ out:
         return rc;
 }
 
         return rc;
 }
 
-int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,struct obd_uuid *uuidarray)
+int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,
+                    struct obd_uuid *uuidarray)
 {
         struct obd_run_ctxt saved;
         struct file *f;
 {
         struct obd_run_ctxt saved;
         struct file *f;
@@ -266,13 +271,13 @@ int mds_iocontrol(unsigned int cmd, struct lustre_handle *conn,
 
                 RETURN(rc);
 
 
                 RETURN(rc);
 
-        case OBD_IOC_SET_READONLY:
+        case OBD_IOC_SET_READONLY: {
+                BDEVNAME_DECLARE_STORAGE(tmp);
                 CERROR("setting device %s read-only\n",
                 CERROR("setting device %s read-only\n",
-                       ll_bdevname(obd->u.mds.mds_sb->s_dev));
-#ifdef CONFIG_DEV_RDONLY
+                       ll_bdevname(obd->u.mds.mds_sb->s_dev, tmp));
                 dev_set_rdonly(obd->u.mds.mds_sb->s_dev, 2);
                 dev_set_rdonly(obd->u.mds.mds_sb->s_dev, 2);
-#endif
                 RETURN(0);
                 RETURN(0);
+        }
 
         case OBD_IOC_ABORT_RECOVERY:
                 CERROR("aborting recovery for device %s\n", obd->obd_name);
 
         case OBD_IOC_ABORT_RECOVERY:
                 CERROR("aborting recovery for device %s\n", obd->obd_name);
index 04d6ee9..2bd2f8c 100644 (file)
 
 #include "mds_internal.h"
 
 
 #include "mds_internal.h"
 
-extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req);
-int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
-                       struct ptlrpc_request *req, int rc, __u32 op_data);
-extern int enqueue_ordered_locks(int lock_mode, struct obd_device *obd,
-                                 struct ldlm_res_id *p1_res_id,
-                                 struct ldlm_res_id *p2_res_id,
-                                 struct ldlm_res_id *c1_res_id,
-                                 struct ldlm_res_id *c2_res_id,
-                                 struct lustre_handle *p1_lockh,
-                                 struct lustre_handle *p2_lockh,
-                                 struct lustre_handle *c1_lockh,
-                                 struct lustre_handle *c2_lockh);
-
 struct mds_file_data *mds_dentry_open(struct dentry *dentry,
                                       struct vfsmount *mnt,
                                       int flags,
 struct mds_file_data *mds_dentry_open(struct dentry *dentry,
                                       struct vfsmount *mnt,
                                       int flags,
@@ -65,17 +52,16 @@ struct mds_file_data *mds_dentry_open(struct dentry *dentry,
 {
         struct mds_export_data *med = &req->rq_export->exp_mds_data;
         struct inode *inode;
 {
         struct mds_export_data *med = &req->rq_export->exp_mds_data;
         struct inode *inode;
-        int mode;
         struct mds_file_data *mfd;
         struct mds_file_data *mfd;
-        int error;
+        int mode, error;
 
         mfd = mds_mfd_new();
 
         mfd = mds_mfd_new();
-        if (!mfd) {
+        if (mfd == NULL) {
                 CERROR("mds: out of memory\n");
                 GOTO(cleanup_dentry, error = -ENOMEM);
         }
 
                 CERROR("mds: out of memory\n");
                 GOTO(cleanup_dentry, error = -ENOMEM);
         }
 
-        mode = (flags+1) & O_ACCMODE;
+        mode = (flags + 1) & O_ACCMODE;
         inode = dentry->d_inode;
 
         if (mode & FMODE_WRITE) {
         inode = dentry->d_inode;
 
         if (mode & FMODE_WRITE) {
@@ -107,6 +93,7 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
                       struct ptlrpc_request *req,
                       struct lustre_handle *child_lockh)
 {
                       struct ptlrpc_request *req,
                       struct lustre_handle *child_lockh)
 {
+        struct ptlrpc_request *oldreq = req->rq_export->exp_outstanding_reply;
         struct mds_export_data *med = &req->rq_export->exp_mds_data;
         struct mds_client_data *mcd = med->med_mcd;
         struct mds_obd *mds = mds_req2mds(req);
         struct mds_export_data *med = &req->rq_export->exp_mds_data;
         struct mds_client_data *mcd = med->med_mcd;
         struct mds_obd *mds = mds_req2mds(req);
@@ -115,7 +102,7 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
         struct dentry *parent, *child;
         struct ldlm_reply *rep;
         struct mds_body *body;
         struct dentry *parent, *child;
         struct ldlm_reply *rep;
         struct mds_body *body;
-        int disp, rc;
+        int rc;
         struct list_head *t;
         int put_child = 1;
         ENTRY;
         struct list_head *t;
         int put_child = 1;
         ENTRY;
@@ -127,14 +114,13 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
         /* copy rc, transno and disp; steal locks */
         req->rq_transno = mcd->mcd_last_transno;
         req->rq_status = mcd->mcd_last_result;
         /* copy rc, transno and disp; steal locks */
         req->rq_transno = mcd->mcd_last_transno;
         req->rq_status = mcd->mcd_last_result;
-        disp = rep->lock_policy_res1 = mcd->mcd_last_data;
+        intent_set_disposition(rep, mcd->mcd_last_data);
 
 
-        if (req->rq_export->exp_outstanding_reply)
+        if (oldreq)
                 mds_steal_ack_locks(req->rq_export, req);
 
                 mds_steal_ack_locks(req->rq_export, req);
 
-        /* We never care about these. */
-        disp &= ~(IT_OPEN_LOOKUP | IT_OPEN_POS | IT_OPEN_NEG);
-        if (!disp) {
+        /* Only replay if create or open actually happened. */
+        if (!intent_disposition(rep, DISP_OPEN_CREATE | DISP_OPEN_OPEN) ) {
                 EXIT;
                 return; /* error looking up parent or child */
         }
                 EXIT;
                 return; /* error looking up parent or child */
         }
@@ -149,11 +135,11 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
                 GOTO(out_dput, 0); /* child not present to open */
         }
 
                 GOTO(out_dput, 0); /* child not present to open */
         }
 
-        /* At this point, we know we have a child, which means that we'll send
-         * it back _unless_ it was open failed, _and_ we didn't create the file.
-         * I love you guys.  No, really.
+        /* At this point, we know we have a child. We'll send
+         * it back _unless_ it not created and open failed.
          */
          */
-        if (((disp & (IT_OPEN_OPEN | IT_OPEN_CREATE)) == IT_OPEN_OPEN) &&
+        if (intent_disposition(rep, DISP_OPEN_OPEN) &&
+            !intent_disposition(rep, DISP_OPEN_CREATE) &&
             req->rq_status) {
                 GOTO(out_dput, 0);
         }
             req->rq_status) {
                 GOTO(out_dput, 0);
         }
@@ -165,8 +151,14 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
         if (S_ISREG(child->d_inode->i_mode)) {
                 rc = mds_pack_md(obd, req->rq_repmsg, 2, body,
                                  child->d_inode);
         if (S_ISREG(child->d_inode->i_mode)) {
                 rc = mds_pack_md(obd, req->rq_repmsg, 2, body,
                                  child->d_inode);
+
                 if (rc)
                         LASSERT(rc == req->rq_status);
                 if (rc)
                         LASSERT(rc == req->rq_status);
+
+                /* If we have LOV EA data, the OST holds size, mtime */
+                if (!(body->valid & OBD_MD_FLEASIZE))
+                        body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                        OBD_MD_FLATIME | OBD_MD_FLMTIME);
         } else {
                 /* XXX need to check this case */
         }
         } else {
                 /* XXX need to check this case */
         }
@@ -185,7 +177,7 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
         /* If we didn't get as far as trying to open, then some locking thing
          * probably went wrong, and we'll just bail here.
          */
         /* If we didn't get as far as trying to open, then some locking thing
          * probably went wrong, and we'll just bail here.
          */
-        if ((disp & IT_OPEN_OPEN) == 0)
+        if (!intent_disposition(rep, DISP_OPEN_OPEN))
                 GOTO(out_dput, 0);
 
         /* If we failed, then we must have failed opening, so don't look for
                 GOTO(out_dput, 0);
 
         /* If we failed, then we must have failed opening, so don't look for
@@ -197,12 +189,12 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
         mfd = NULL;
         list_for_each(t, &med->med_open_head) {
                 mfd = list_entry(t, struct mds_file_data, mfd_list);
         mfd = NULL;
         list_for_each(t, &med->med_open_head) {
                 mfd = list_entry(t, struct mds_file_data, mfd_list);
-                if (mfd->mfd_xid == req->rq_xid) 
+                if (mfd->mfd_xid == req->rq_xid)
                         break;
                 mfd = NULL;
         }
 
                         break;
                 mfd = NULL;
         }
 
-        if (req->rq_export->exp_outstanding_reply) {
+        if (oldreq) {
                 /* if we're not recovering, it had better be found */
                 LASSERT(mfd);
         } else if (mfd == NULL) {
                 /* if we're not recovering, it had better be found */
                 LASSERT(mfd);
         } else if (mfd == NULL) {
@@ -226,35 +218,180 @@ void reconstruct_open(struct mds_update_record *rec, int offset,
         EXIT;
 }
 
         EXIT;
 }
 
+int mds_pin(struct ptlrpc_request *req)
+{
+        struct mds_obd *mds = mds_req2mds(req);
+        struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+        struct mds_file_data *mfd = NULL;
+        struct mds_body *body;
+        struct dentry *dchild;
+        struct obd_run_ctxt saved;
+        char fidname[LL_FID_NAMELEN];
+        int fidlen = 0, rc, cleanup_phase = 0, size = sizeof(*body);
+        ENTRY;
+
+        body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
+
+        down(&pending_dir->i_sem);
+        fidlen = ll_fid2str(fidname, body->fid1.id, body->fid1.generation);
+        dchild = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
+        if (IS_ERR(dchild)) {
+                up(&pending_dir->i_sem);
+                rc = PTR_ERR(dchild);
+                CERROR("error looking up %s in PENDING: rc = %d\n",
+                       fidname, rc);
+                RETURN(rc);
+        }
+
+        cleanup_phase = 2;
+
+        if (dchild->d_inode) {
+                up(&pending_dir->i_sem);
+                mds_inode_set_orphan(dchild->d_inode);
+                mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+                mds_pack_inode2body(body, dchild->d_inode);
+                GOTO(openit, rc = 0);
+        }
+        dput(dchild);
+        up(&pending_dir->i_sem);
+
+        /* We didn't find it in PENDING so it isn't an orphan.  See
+         * if it's a regular inode. */
+        dchild = mds_fid2dentry(mds, &body->fid1, NULL);
+        if (!IS_ERR(dchild)) {
+                mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+                mds_pack_inode2body(body, dchild->d_inode);
+                GOTO(openit, rc = 0);
+        }
+
+        /* We didn't find this inode on disk, but we're trying to pin it.
+         * This should never happen. */
+        CERROR("ENOENT during mds_pin for fid "LPU64"/%u\n", body->fid1.id,
+               body->fid1.generation);
+        RETURN(-ENOENT);
+
+ openit:
+        /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
+        mfd = mds_dentry_open(dchild, mds->mds_vfsmnt, body->flags, req);
+        if (IS_ERR(mfd)) {
+                dchild = NULL; /* prevent a double dput in cleanup phase 2 */
+                GOTO(cleanup, rc = PTR_ERR(mfd));
+        }
+
+        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc) {
+                CERROR("out of memoryK\n");
+                GOTO(cleanup, rc);
+        }
+        body = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*body));
+
+        cleanup_phase = 4; /* mfd allocated */
+        body->handle.cookie = mfd->mfd_handle.h_cookie;
+        CDEBUG(D_INODE, "mfd %p, cookie "LPX64"\n", mfd,
+               mfd->mfd_handle.h_cookie);
+        GOTO(cleanup, rc = 0);
+
+ cleanup:
+        push_ctxt(&saved, &mds->mds_ctxt, NULL);
+        rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, NULL,
+                                req, rc, 0);
+        pop_ctxt(&saved, &mds->mds_ctxt, NULL);
+        /* XXX what do we do here if mds_finish_transno itself failed? */
+        switch (cleanup_phase) {
+        case 4:
+                if (rc)
+                        mds_mfd_destroy(mfd);
+        case 2:
+                if (rc || S_ISLNK(dchild->d_inode->i_mode))
+                        l_dput(dchild);
+        }
+        return rc;
+}
+
 int mds_open(struct mds_update_record *rec, int offset,
              struct ptlrpc_request *req, struct lustre_handle *child_lockh)
 {
 int mds_open(struct mds_update_record *rec, int offset,
              struct ptlrpc_request *req, struct lustre_handle *child_lockh)
 {
+        /* XXX ALLOCATE _something_ - 464 bytes on stack here */
         static const char acc_table [] = {[O_RDONLY] MAY_READ,
                                           [O_WRONLY] MAY_WRITE,
                                           [O_RDWR]   MAY_READ | MAY_WRITE};
         struct mds_obd *mds = mds_req2mds(req);
         struct obd_device *obd = req->rq_export->exp_obd;
         static const char acc_table [] = {[O_RDONLY] MAY_READ,
                                           [O_WRONLY] MAY_WRITE,
                                           [O_RDWR]   MAY_READ | MAY_WRITE};
         struct mds_obd *mds = mds_req2mds(req);
         struct obd_device *obd = req->rq_export->exp_obd;
-        struct ldlm_reply *rep;
-        struct mds_body *body;
-        struct dentry *dchild = NULL, *parent;
+        struct ldlm_reply *rep = NULL;
+        struct mds_body *body = NULL;
+        struct dentry *dchild = NULL, *parent = NULL;
         struct mds_export_data *med;
         struct mds_file_data *mfd = NULL;
         struct ldlm_res_id child_res_id = { .name = {0} };
         struct lustre_handle parent_lockh;
         int rc = 0, parent_mode, child_mode = LCK_PR, lock_flags, created = 0;
         struct mds_export_data *med;
         struct mds_file_data *mfd = NULL;
         struct ldlm_res_id child_res_id = { .name = {0} };
         struct lustre_handle parent_lockh;
         int rc = 0, parent_mode, child_mode = LCK_PR, lock_flags, created = 0;
-        int cleanup_phase = 0;
+        int cleanup_phase = 0, acc_mode;
         void *handle = NULL;
         void *handle = NULL;
-        int acc_mode;
         ENTRY;
 
         ENTRY;
 
-        LASSERT(offset == 2);                  /* only called via intent */
-        rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
-        body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
+        if (offset == 2) { /* intent */
+                rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
+                body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
+        } else if (offset == 0) { /* non-intent reint */
+                body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
+        } else {
+                body = NULL;
+                LBUG();
+        }
 
         MDS_CHECK_RESENT(req, reconstruct_open(rec, offset, req, child_lockh));
 
 
         MDS_CHECK_RESENT(req, reconstruct_open(rec, offset, req, child_lockh));
 
+        /* Step 0: If we are passed a fid, then we assume the client already
+         * opened this file and is only replaying the RPC, so we open the
+         * inode by fid (at some large expense in security).
+         */
+        if (rec->ur_fid2->id) {
+                struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+                char fidname[LL_FID_NAMELEN];
+                int fidlen = 0;
+
+                down(&pending_dir->i_sem);
+                fidlen = ll_fid2str(fidname, rec->ur_fid2->id,
+                                    rec->ur_fid2->generation);
+                dchild = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
+                if (IS_ERR(dchild)) {
+                        up(&pending_dir->i_sem);
+                        rc = PTR_ERR(dchild);
+                        CERROR("error looking up %s in PENDING: rc = %d\n",
+                               fidname, rc);
+                        RETURN(rc);
+                }
+
+                if (dchild->d_inode) {
+                        up(&pending_dir->i_sem);
+                        mds_inode_set_orphan(dchild->d_inode);
+                        mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+                        mds_pack_inode2body(body, dchild->d_inode);
+                        cleanup_phase = 2;
+                        GOTO(openit, rc = 0);
+                }
+                dput(dchild);
+                up(&pending_dir->i_sem);
+
+                /* We didn't find it in PENDING so it isn't an orphan.  See
+                 * if it was a regular inode that was previously created.
+                 */
+                dchild = mds_fid2dentry(mds, rec->ur_fid2, NULL);
+                if (!IS_ERR(dchild)) {
+                        mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+                        mds_pack_inode2body(body, dchild->d_inode);
+                        cleanup_phase = 2;
+                        GOTO(openit, rc = 0);
+                }
+
+                /* We didn't find the correct inode on disk either, so we
+                 * need to re-create it via a regular replay.  Do that below.
+                 */
+                LASSERT(rec->ur_flags & O_CREAT);
+        }
+        LASSERT(offset == 2); /* If we got here, we must be called via intent */
+
         med = &req->rq_export->exp_mds_data;
         med = &req->rq_export->exp_mds_data;
-        rep->lock_policy_res1 |= IT_OPEN_LOOKUP;
         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_PACK)) {
                 CERROR("test case OBD_FAIL_MDS_OPEN_PACK\n");
                 req->rq_status = -ENOMEM;
         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_PACK)) {
                 CERROR("test case OBD_FAIL_MDS_OPEN_PACK\n");
                 req->rq_status = -ENOMEM;
@@ -263,11 +400,12 @@ int mds_open(struct mds_update_record *rec, int offset,
 
         if ((rec->ur_flags & O_ACCMODE) >= sizeof (acc_table))
                 RETURN(-EINVAL);
 
         if ((rec->ur_flags & O_ACCMODE) >= sizeof (acc_table))
                 RETURN(-EINVAL);
-        acc_mode = acc_table [rec->ur_flags & O_ACCMODE];
+        acc_mode = acc_table[rec->ur_flags & O_ACCMODE];
         if ((rec->ur_flags & O_TRUNC) != 0)
                 acc_mode |= MAY_WRITE;
 
         /* Step 1: Find and lock the parent */
         if ((rec->ur_flags & O_TRUNC) != 0)
                 acc_mode |= MAY_WRITE;
 
         /* Step 1: Find and lock the parent */
+        intent_set_disposition(rep, DISP_LOOKUP_EXECD);
         parent_mode = (rec->ur_flags & O_CREAT) ? LCK_PW : LCK_PR;
         parent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, parent_mode,
                                        &parent_lockh);
         parent_mode = (rec->ur_flags & O_CREAT) ? LCK_PW : LCK_PR;
         parent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, parent_mode,
                                        &parent_lockh);
@@ -288,38 +426,88 @@ int mds_open(struct mds_update_record *rec, int offset,
         cleanup_phase = 2; /* child dentry */
 
         if (dchild->d_inode)
         cleanup_phase = 2; /* child dentry */
 
         if (dchild->d_inode)
-                rep->lock_policy_res1 |= IT_OPEN_POS;
+                intent_set_disposition(rep, DISP_LOOKUP_POS);
         else
         else
-                rep->lock_policy_res1 |= IT_OPEN_NEG;
+                intent_set_disposition(rep, DISP_LOOKUP_NEG);
 
         /* Step 3: If the child was negative, and we're supposed to,
          * create it. */
         if (!dchild->d_inode) {
 
         /* Step 3: If the child was negative, and we're supposed to,
          * create it. */
         if (!dchild->d_inode) {
+                unsigned long ino = rec->ur_fid2->id;
+
                 if (!(rec->ur_flags & O_CREAT)) {
                         /* It's negative and we weren't supposed to create it */
                         GOTO(cleanup, rc = -ENOENT);
                 }
 
                 if (!(rec->ur_flags & O_CREAT)) {
                         /* It's negative and we weren't supposed to create it */
                         GOTO(cleanup, rc = -ENOENT);
                 }
 
-                rep->lock_policy_res1 |= IT_OPEN_CREATE;
-                handle = fsfilt_start(obd, parent->d_inode, FSFILT_OP_CREATE);
+                intent_set_disposition(rep, DISP_OPEN_CREATE);
+                handle = fsfilt_start(obd, parent->d_inode, FSFILT_OP_CREATE,
+                                      NULL);
                 if (IS_ERR(handle)) {
                         rc = PTR_ERR(handle);
                         handle = NULL;
                         GOTO(cleanup, rc);
                 }
                 if (IS_ERR(handle)) {
                         rc = PTR_ERR(handle);
                         handle = NULL;
                         GOTO(cleanup, rc);
                 }
+                if (ino)
+                        dchild->d_fsdata = (void *)(unsigned long)ino;
+
                 rc = vfs_create(parent->d_inode, dchild, rec->ur_mode);
                 rc = vfs_create(parent->d_inode, dchild, rec->ur_mode);
-                if (rc)
+                if (dchild->d_fsdata == (void *)(unsigned long)ino)
+                        dchild->d_fsdata = NULL;
+
+                if (rc) {
+                        CDEBUG(D_INODE, "error during create: %d\n", rc);
                         GOTO(cleanup, rc);
                         GOTO(cleanup, rc);
-                created = 1;
+                } else {
+                        struct iattr iattr;
+                        struct inode *inode = dchild->d_inode;
+
+                        if (ino) {
+                                LASSERT(ino == inode->i_ino);
+                                /* Written as part of setattr */
+                                inode->i_generation = rec->ur_fid2->generation;
+                                CDEBUG(D_HA, "recreated ino %lu with gen %x\n",
+                                       inode->i_ino, inode->i_generation);
+                        }
+
+                        created = 1;
+                        LTIME_S(iattr.ia_atime) = rec->ur_time;
+                        LTIME_S(iattr.ia_ctime) = rec->ur_time;
+                        LTIME_S(iattr.ia_mtime) = rec->ur_time;
+
+                        iattr.ia_uid = rec->ur_uid;
+                        if (parent->d_inode->i_mode & S_ISGID) {
+                                iattr.ia_gid = parent->d_inode->i_gid;
+                        } else
+                                iattr.ia_gid = rec->ur_gid;
+
+                        iattr.ia_valid = ATTR_UID | ATTR_GID | ATTR_ATIME |
+                                ATTR_MTIME | ATTR_CTIME;
+
+                        rc = fsfilt_setattr(obd, dchild, handle, &iattr, 0);
+                        if (rc) {
+                                CERROR("error on setattr: rc = %d\n", rc);
+                                /* XXX should we abort here in case of error? */
+                        }
+                }
+
                 child_mode = LCK_PW;
                 acc_mode = 0;                  /* Don't check for permissions */
         }
 
                 child_mode = LCK_PW;
                 acc_mode = 0;                  /* Don't check for permissions */
         }
 
+        LASSERT(!mds_inode_is_orphan(dchild->d_inode));
+
         /* Step 4: It's positive, so lock the child */
         child_res_id.name[0] = dchild->d_inode->i_ino;
         child_res_id.name[1] = dchild->d_inode->i_generation;
  reacquire:
         lock_flags = 0;
         /* Step 4: It's positive, so lock the child */
         child_res_id.name[0] = dchild->d_inode->i_ino;
         child_res_id.name[1] = dchild->d_inode->i_generation;
  reacquire:
         lock_flags = 0;
+        /* For the open(O_CREAT) case, this would technically be a lock
+         * inversion (getting a VFS lock after starting a transaction),
+         * but in that case we cannot possibly block on this lock because
+         * we just created the child and also hold a write lock on the
+         * parent, so nobody could be holding the lock yet.
+         */
         rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
                               child_res_id, LDLM_PLAIN, NULL, 0, child_mode,
                               &lock_flags, ldlm_completion_ast,
         rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
                               child_res_id, LDLM_PLAIN, NULL, 0, child_mode,
                               &lock_flags, ldlm_completion_ast,
@@ -346,15 +534,19 @@ int mds_open(struct mds_update_record *rec, int offset,
 
                 /* An append-only file must be opened in append mode for
                  * writing */
 
                 /* An append-only file must be opened in append mode for
                  * writing */
-                if (IS_APPEND(dchild->d_inode) &&
-                    (acc_mode & MAY_WRITE) != 0 &&
+                if (IS_APPEND(dchild->d_inode) && (acc_mode & MAY_WRITE) != 0 &&
                     ((rec->ur_flags & O_APPEND) == 0 ||
                      (rec->ur_flags & O_TRUNC) != 0))
                     ((rec->ur_flags & O_APPEND) == 0 ||
                      (rec->ur_flags & O_TRUNC) != 0))
-                        GOTO (cleanup, rc = -EPERM);
+                        GOTO(cleanup, rc = -EPERM);
 
                 rc = mds_pack_md(obd, req->rq_repmsg, 2, body, dchild->d_inode);
                 if (rc)
                         GOTO(cleanup, rc);
 
                 rc = mds_pack_md(obd, req->rq_repmsg, 2, body, dchild->d_inode);
                 if (rc)
                         GOTO(cleanup, rc);
+
+                /* If we have LOV EA data, the OST holds size, mtime */
+                if (!(body->valid & OBD_MD_FLEASIZE))
+                        body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                        OBD_MD_FLATIME | OBD_MD_FLMTIME);
         }
 
         if (!created && (rec->ur_flags & O_CREAT) &&
         }
 
         if (!created && (rec->ur_flags & O_CREAT) &&
@@ -364,9 +556,9 @@ int mds_open(struct mds_update_record *rec, int offset,
                 GOTO(cleanup, rc = -EEXIST); // returns a lock to the client
         }
 
                 GOTO(cleanup, rc = -EEXIST); // returns a lock to the client
         }
 
-        /* If we're opening a file without an EA, the client needs a write
-         * lock. */
-        if (S_ISREG(dchild->d_inode->i_mode) &&
+        /* If we're opening a file without an EA for write, the client needs
+         * a write lock. */
+        if (S_ISREG(dchild->d_inode->i_mode) && (rec->ur_flags & O_ACCMODE) &&
             child_mode != LCK_PW && !(body->valid & OBD_MD_FLEASIZE)) {
                 ldlm_lock_decref(child_lockh, child_mode);
                 child_mode = LCK_PW;
             child_mode != LCK_PW && !(body->valid & OBD_MD_FLEASIZE)) {
                 ldlm_lock_decref(child_lockh, child_mode);
                 child_mode = LCK_PW;
@@ -381,15 +573,14 @@ int mds_open(struct mds_update_record *rec, int offset,
                 GOTO(cleanup, rc = -ENOTDIR);
 
         /* Step 5: mds_open it */
                 GOTO(cleanup, rc = -ENOTDIR);
 
         /* Step 5: mds_open it */
-        rep->lock_policy_res1 |= IT_OPEN_OPEN;
-
+        intent_set_disposition(rep, DISP_OPEN_OPEN);
+ openit:
         /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
         mfd = mds_dentry_open(dchild, mds->mds_vfsmnt,
                               rec->ur_flags & ~(O_DIRECT | O_TRUNC), req);
         /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
         mfd = mds_dentry_open(dchild, mds->mds_vfsmnt,
                               rec->ur_flags & ~(O_DIRECT | O_TRUNC), req);
-        if (!mfd) {
-                CERROR("mds: out of memory\n");
-                dchild = NULL; /* prevent a double dput in step 2 */
-                GOTO(cleanup, rc = -ENOMEM);
+        if (IS_ERR(mfd)) {
+                dchild = NULL; /* prevent a double dput in cleanup phase 2 */
+                GOTO(cleanup, rc = PTR_ERR(mfd));
         }
 
         cleanup_phase = 4; /* mfd allocated */
         }
 
         cleanup_phase = 4; /* mfd allocated */
@@ -401,6 +592,7 @@ int mds_open(struct mds_update_record *rec, int offset,
  cleanup:
         rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle,
                                 req, rc, rep->lock_policy_res1);
  cleanup:
         rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle,
                                 req, rc, rep->lock_policy_res1);
+        /* XXX what do we do here if mds_finish_transno itself failed? */
         switch (cleanup_phase) {
         case 4:
                 if (rc && !S_ISLNK(dchild->d_inode->i_mode))
         switch (cleanup_phase) {
         case 4:
                 if (rc && !S_ISLNK(dchild->d_inode->i_mode))
@@ -410,19 +602,22 @@ int mds_open(struct mds_update_record *rec, int offset,
                  * ldlm_intent_policy: if we found the dentry, or we tried to
                  * open it (meaning that we created, if it wasn't found), then
                  * we return the lock to the caller and client. */
                  * ldlm_intent_policy: if we found the dentry, or we tried to
                  * open it (meaning that we created, if it wasn't found), then
                  * we return the lock to the caller and client. */
-                if (!(rep->lock_policy_res1 & (IT_OPEN_OPEN | IT_OPEN_POS)))
+                if (intent_disposition(rep, DISP_LOOKUP_NEG) &&
+                    !intent_disposition(rep, DISP_OPEN_OPEN))
                         ldlm_lock_decref(child_lockh, child_mode);
         case 2:
                 if (rc || S_ISLNK(dchild->d_inode->i_mode))
                         l_dput(dchild);
         case 1:
                         ldlm_lock_decref(child_lockh, child_mode);
         case 2:
                 if (rc || S_ISLNK(dchild->d_inode->i_mode))
                         l_dput(dchild);
         case 1:
-                l_dput(parent);
-                if (rc) {
-                        ldlm_lock_decref(&parent_lockh, parent_mode);
-                } else {
-                        memcpy(&req->rq_ack_locks[0].lock, &parent_lockh,
-                               sizeof(parent_lockh));
-                        req->rq_ack_locks[0].mode = parent_mode;
+                if (parent) {
+                        l_dput(parent);
+                        if (rc) {
+                                ldlm_lock_decref(&parent_lockh, parent_mode);
+                        } else {
+                                memcpy(&req->rq_ack_locks[0].lock,&parent_lockh,
+                                       sizeof(parent_lockh));
+                                req->rq_ack_locks[0].mode = parent_mode;
+                        }
                 }
         }
         RETURN(rc);
                 }
         }
         RETURN(rc);
index 50949dd..61871d7 100644 (file)
 #include <linux/lustre_mds.h>
 #include <linux/lustre_dlm.h>
 #include <linux/lustre_fsfilt.h>
 #include <linux/lustre_mds.h>
 #include <linux/lustre_dlm.h>
 #include <linux/lustre_fsfilt.h>
+
 #include "mds_internal.h"
 
 #include "mds_internal.h"
 
-extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req);
+void mds_commit_cb(struct obd_device *obd, __u64 transno, void *data,
+                   int error)
+{
+        obd_transno_commit_cb(obd, transno, error);
+}
+
+struct mds_logcancel_data {
+        struct lov_mds_md      *mlcd_lmm;
+        int                     mlcd_size;
+        int                     mlcd_cookielen;
+        int                     mlcd_eadatalen;
+        struct llog_cookie      mlcd_cookies[0];
+};
+
+/* Establish a connection to the OSC when we first need it.  We don't do
+ * this during MDS setup because that would introduce setup ordering issues. */
+static int mds_osc_connect(struct obd_device *obd, struct mds_obd *mds)
+{
+        int rc;
+        ENTRY;
+
+        if (IS_ERR(mds->mds_osc_obd))
+                RETURN(PTR_ERR(mds->mds_osc_obd));
+
+        if (mds->mds_osc_obd)
+                RETURN(0);
+
+        mds->mds_osc_obd = class_uuid2obd(&mds->mds_osc_uuid);
+        if (!mds->mds_osc_obd) {
+                CERROR("MDS cannot locate OSC/LOV %s - no logging!\n",
+                       mds->mds_osc_uuid.uuid);
+                mds->mds_osc_obd = ERR_PTR(-ENOTCONN);
+                RETURN(-ENOTCONN);
+        }
+
+        rc = obd_connect(&mds->mds_osc_conn, mds->mds_osc_obd, &obd->obd_uuid);
+        if (rc) {
+                CERROR("MDS cannot locate OSC/LOV %s - no logging!\n",
+                       mds->mds_osc_uuid.uuid);
+                mds->mds_osc_obd = ERR_PTR(rc);
+                RETURN(rc);
+        }
+
+        rc = obd_set_info(&mds->mds_osc_conn, strlen("mds_conn"), "mds_conn",
+                          0, NULL);
+        RETURN(rc);
+}
 
 
-static void mds_commit_cb(struct obd_device *obd, __u64 transno, int error)
+static void mds_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
+                                  void *cb_data, int error)
 {
 {
+        struct mds_logcancel_data *mlcd = cb_data;
+        struct lov_stripe_md *lsm = NULL;
+        int rc;
+
         obd_transno_commit_cb(obd, transno, error);
         obd_transno_commit_cb(obd, transno, error);
+
+        CDEBUG(D_HA, "cancelling %d cookies\n",
+               (int)(mlcd->mlcd_cookielen / sizeof(*mlcd->mlcd_cookies)));
+
+        rc = obd_unpackmd(&obd->u.mds.mds_osc_conn, &lsm, mlcd->mlcd_lmm,
+                          mlcd->mlcd_eadatalen);
+        if (rc < 0) {
+                CERROR("bad LSM cancelling %d log cookies: rc %d\n",
+                       (int)(mlcd->mlcd_cookielen/sizeof(*mlcd->mlcd_cookies)),
+                       rc);
+        } else {
+                rc = obd_log_cancel(&obd->u.mds.mds_osc_conn, lsm,
+                                    mlcd->mlcd_cookielen /
+                                    sizeof(*mlcd->mlcd_cookies),
+                                    mlcd->mlcd_cookies, OBD_LLOG_FL_SENDNOW);
+                ///* XXX 0 normally, SENDNOW for debug */);
+                if (rc)
+                        CERROR("error cancelling %d log cookies: rc %d\n",
+                               (int)(mlcd->mlcd_cookielen /
+                                     sizeof(*mlcd->mlcd_cookies)), rc);
+        }
+
+        OBD_FREE(mlcd, mlcd->mlcd_size);
 }
 
 /* Assumes caller has already pushed us into the kernel context. */
 }
 
 /* Assumes caller has already pushed us into the kernel context. */
-int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
-                       struct ptlrpc_request *req, int rc,
-                       __u32 op_data)
+int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
+                       struct ptlrpc_request *req, int rc, __u32 op_data)
 {
         struct mds_export_data *med = &req->rq_export->exp_mds_data;
         struct mds_client_data *mcd = med->med_mcd;
 {
         struct mds_export_data *med = &req->rq_export->exp_mds_data;
         struct mds_client_data *mcd = med->med_mcd;
@@ -70,15 +144,15 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
 
         if (!handle) {
                 /* if we're starting our own xaction, use our own inode */
 
         if (!handle) {
                 /* if we're starting our own xaction, use our own inode */
-                i = mds->mds_rcvd_filp->f_dentry->d_inode;
-                handle = fsfilt_start(obd, i, FSFILT_OP_SETATTR);
+                inode = mds->mds_rcvd_filp->f_dentry->d_inode;
+                handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL);
                 if (IS_ERR(handle)) {
                         CERROR("fsfilt_start: %ld\n", PTR_ERR(handle));
                         GOTO(out, rc = PTR_ERR(handle));
                 }
         }
 
                 if (IS_ERR(handle)) {
                         CERROR("fsfilt_start: %ld\n", PTR_ERR(handle));
                         GOTO(out, rc = PTR_ERR(handle));
                 }
         }
 
-        off = MDS_LR_CLIENT + med->med_off * MDS_LR_SIZE;
+        off = med->med_off;
 
         transno = req->rq_reqmsg->transno;
         if (transno == 0) {
 
         transno = req->rq_reqmsg->transno;
         if (transno == 0) {
@@ -94,10 +168,11 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
         mcd->mcd_last_data = cpu_to_le32(op_data);
 
         fsfilt_set_last_rcvd(req->rq_export->exp_obd, transno, handle,
         mcd->mcd_last_data = cpu_to_le32(op_data);
 
         fsfilt_set_last_rcvd(req->rq_export->exp_obd, transno, handle,
-                             mds_commit_cb);
-        written = lustre_fwrite(mds->mds_rcvd_filp, mcd, sizeof(*mcd), &off);
-        CDEBUG(D_INODE, "wrote trans "LPU64" client %s at #%u: written = "
-               LPSZ"\n", transno, mcd->mcd_uuid, med->med_off, written);
+                             mds_commit_cb, NULL);
+        written = fsfilt_write_record(obd, mds->mds_rcvd_filp,
+                                      (char *)mcd, sizeof(*mcd), &off);
+        CDEBUG(D_INODE, "wrote trans "LPU64" client %s at idx %u: written = "
+               LPSZ"\n", transno, mcd->mcd_uuid, med->med_idx, written);
 
         if (written != sizeof(*mcd)) {
                 CERROR("error writing to last_rcvd: rc = "LPSZ"\n", written);
 
         if (written != sizeof(*mcd)) {
                 CERROR("error writing to last_rcvd: rc = "LPSZ"\n", written);
@@ -110,7 +185,7 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
         }
 
 commit:
         }
 
 commit:
-        err = fsfilt_commit(obd, i, handle, 0);
+        err = fsfilt_commit(obd, inode, handle, 0);
         if (err) {
                 CERROR("error committing transaction: %d\n", err);
                 if (!rc)
         if (err) {
                 CERROR("error committing transaction: %d\n", err);
                 if (!rc)
@@ -139,22 +214,29 @@ int mds_fix_attr(struct inode *inode, struct mds_update_record *rec)
         if (!(ia_valid & ATTR_RAW))
                 RETURN(0);
 
         if (!(ia_valid & ATTR_RAW))
                 RETURN(0);
 
-        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                RETURN(-EPERM);
-
-        LTIME_S(attr->ia_ctime) = now;
+        if (!(ia_valid & ATTR_CTIME_SET))
+                LTIME_S(attr->ia_ctime) = now;
         if (!(ia_valid & ATTR_ATIME_SET))
                 LTIME_S(attr->ia_atime) = now;
         if (!(ia_valid & ATTR_MTIME_SET))
                 LTIME_S(attr->ia_mtime) = now;
 
         if (!(ia_valid & ATTR_ATIME_SET))
                 LTIME_S(attr->ia_atime) = now;
         if (!(ia_valid & ATTR_MTIME_SET))
                 LTIME_S(attr->ia_mtime) = now;
 
+        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+                RETURN(-EPERM);
+
         /* times */
         /* times */
-        if ((ia_valid & (ATTR_MTIME|ATTR_ATIME))==(ATTR_MTIME|ATTR_ATIME) &&
-             !(ia_valid & ATTR_ATIME_SET)) {
+        if ((ia_valid & (ATTR_MTIME|ATTR_ATIME))==(ATTR_MTIME|ATTR_ATIME)) {
                 if (rec->ur_fsuid != inode->i_uid &&
                     (error = permission(inode,MAY_WRITE)) != 0)
                         RETURN(error);
                 if (rec->ur_fsuid != inode->i_uid &&
                     (error = permission(inode,MAY_WRITE)) != 0)
                         RETURN(error);
-        } else if (ia_valid & ATTR_UID) {
+        }
+
+        if (ia_valid & ATTR_SIZE) {
+                if ((error = permission(inode,MAY_WRITE)) != 0)
+                        RETURN(error);
+        }
+
+        if (ia_valid & ATTR_UID) {
                 /* chown */
                 error = -EPERM;
                 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                 /* chown */
                 error = -EPERM;
                 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
@@ -164,7 +246,6 @@ int mds_fix_attr(struct inode *inode, struct mds_update_record *rec)
                 if (attr->ia_gid == (gid_t) -1)
                         attr->ia_gid = inode->i_gid;
                 attr->ia_mode = inode->i_mode;
                 if (attr->ia_gid == (gid_t) -1)
                         attr->ia_gid = inode->i_gid;
                 attr->ia_mode = inode->i_mode;
-                attr->ia_valid =  ATTR_UID | ATTR_GID | ATTR_CTIME;
                 /*
                  * If the user or group of a non-directory has been
                  * changed by a non-root user, remove the setuid bit.
                 /*
                  * If the user or group of a non-directory has been
                  * changed by a non-root user, remove the setuid bit.
@@ -232,6 +313,14 @@ static void reconstruct_reint_setattr(struct mds_update_record *rec,
         mds_pack_inode2fid(&body->fid1, de->d_inode);
         mds_pack_inode2body(body, de->d_inode);
 
         mds_pack_inode2fid(&body->fid1, de->d_inode);
         mds_pack_inode2body(body, de->d_inode);
 
+        /* Don't return OST-specific attributes if we didn't just set them */
+        if (rec->ur_iattr.ia_valid & ATTR_SIZE)
+                body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+        if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_MTIME_SET))
+                body->valid |= OBD_MD_FLMTIME;
+        if (rec->ur_iattr.ia_valid & (ATTR_ATIME | ATTR_ATIME_SET))
+                body->valid |= OBD_MD_FLATIME;
+
         l_dput(de);
 }
 
         l_dput(de);
 }
 
@@ -251,6 +340,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
         struct inode *inode = NULL;
         struct lustre_handle lockh;
         void *handle = NULL;
         struct inode *inode = NULL;
         struct lustre_handle lockh;
         void *handle = NULL;
+        struct mds_logcancel_data *mlcd = NULL;
         int rc = 0, cleanup_phase = 0, err, locked = 0;
         ENTRY;
 
         int rc = 0, cleanup_phase = 0, err, locked = 0;
         ENTRY;
 
@@ -279,21 +369,28 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE,
                        to_kdev_t(inode->i_sb->s_dev));
 
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE,
                        to_kdev_t(inode->i_sb->s_dev));
 
-        handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR);
+#ifdef ENABLE_ORPHANS
+        if (unlikely(mds->mds_osc_obd == NULL))
+                mds_osc_connect(obd, mds);
+#endif
+
+        handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL);
         if (IS_ERR(handle)) {
                 rc = PTR_ERR(handle);
                 handle = NULL;
                 GOTO(cleanup, rc);
         }
 
         if (IS_ERR(handle)) {
                 rc = PTR_ERR(handle);
                 handle = NULL;
                 GOTO(cleanup, rc);
         }
 
+        if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_CTIME))
+                CDEBUG(D_INODE, "setting mtime %lu, ctime %lu\n",
+                       LTIME_S(rec->ur_iattr.ia_mtime),
+                       LTIME_S(rec->ur_iattr.ia_ctime));
         rc = mds_fix_attr(inode, rec);
         if (rc)
                 GOTO(cleanup, rc);
 
         rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr, 0);
         rc = mds_fix_attr(inode, rec);
         if (rc)
                 GOTO(cleanup, rc);
 
         rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr, 0);
-        if (rc == 0 &&
-            S_ISREG(inode->i_mode) &&
-            rec->ur_eadata != NULL) {
+        if (rc == 0 && S_ISREG(inode->i_mode) && rec->ur_eadata != NULL) {
                 rc = fsfilt_set_md(obd, inode, handle,
                                    rec->ur_eadata, rec->ur_eadatalen);
         }
                 rc = fsfilt_set_md(obd, inode, handle,
                                    rec->ur_eadata, rec->ur_eadatalen);
         }
@@ -302,10 +399,39 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
         mds_pack_inode2fid(&body->fid1, inode);
         mds_pack_inode2body(body, inode);
 
         mds_pack_inode2fid(&body->fid1, inode);
         mds_pack_inode2body(body, inode);
 
+        /* Don't return OST-specific attributes if we didn't just set them */
+        if (rec->ur_iattr.ia_valid & ATTR_SIZE)
+                body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+        if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_MTIME_SET))
+                body->valid |= OBD_MD_FLMTIME;
+        if (rec->ur_iattr.ia_valid & (ATTR_ATIME | ATTR_ATIME_SET))
+                body->valid |= OBD_MD_FLATIME;
+
+        if (rc == 0 && rec->ur_cookielen && !IS_ERR(mds->mds_osc_obd)) {
+                OBD_ALLOC(mlcd, sizeof(*mlcd) + rec->ur_cookielen +
+                          rec->ur_eadatalen);
+                if (mlcd) {
+                        mlcd->mlcd_size = sizeof(*mlcd) + rec->ur_cookielen +
+                                rec->ur_eadatalen;
+                        mlcd->mlcd_eadatalen = rec->ur_eadatalen;
+                        mlcd->mlcd_cookielen = rec->ur_cookielen;
+                        mlcd->mlcd_lmm = (void *)&mlcd->mlcd_cookies +
+                                mlcd->mlcd_cookielen;
+                        memcpy(&mlcd->mlcd_cookies, rec->ur_logcookies,
+                               mlcd->mlcd_cookielen);
+                        memcpy(mlcd->mlcd_lmm, rec->ur_eadata,
+                               mlcd->mlcd_eadatalen);
+                } else {
+                        CERROR("unable to allocate log cancel data\n");
+                }
+        }
         EXIT;
  cleanup:
         EXIT;
  cleanup:
+        if (mlcd != NULL)
+                fsfilt_set_last_rcvd(req->rq_export->exp_obd, 0, handle,
+                                     mds_cancel_cookies_cb, mlcd);
         err = mds_finish_transno(mds, inode, handle, req, rc, 0);
         err = mds_finish_transno(mds, inode, handle, req, rc, 0);
-        switch(cleanup_phase) {
+        switch (cleanup_phase) {
         case 1:
                 l_dput(de);
                 if (locked) {
         case 1:
                 l_dput(de);
                 if (locked) {
@@ -418,7 +544,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
 
         switch (type) {
         case S_IFREG:{
 
         switch (type) {
         case S_IFREG:{
-                handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE, NULL);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 rc = vfs_create(dir, dchild, rec->ur_mode);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 rc = vfs_create(dir, dchild, rec->ur_mode);
@@ -426,7 +552,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                 break;
         }
         case S_IFDIR:{
                 break;
         }
         case S_IFDIR:{
-                handle = fsfilt_start(obd, dir, FSFILT_OP_MKDIR);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_MKDIR, NULL);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 rc = vfs_mkdir(dir, dchild, rec->ur_mode);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 rc = vfs_mkdir(dir, dchild, rec->ur_mode);
@@ -434,7 +560,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                 break;
         }
         case S_IFLNK:{
                 break;
         }
         case S_IFLNK:{
-                handle = fsfilt_start(obd, dir, FSFILT_OP_SYMLINK);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_SYMLINK, NULL);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 if (rec->ur_tgt == NULL)        /* no target supplied */
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 if (rec->ur_tgt == NULL)        /* no target supplied */
@@ -449,7 +575,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
         case S_IFIFO:
         case S_IFSOCK:{
                 int rdev = rec->ur_rdev;
         case S_IFIFO:
         case S_IFSOCK:{
                 int rdev = rec->ur_rdev;
-                handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD, NULL);
                 if (IS_ERR(handle))
                         GOTO(cleanup, (handle = NULL, rc = PTR_ERR(handle)));
                 rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev);
                 if (IS_ERR(handle))
                         GOTO(cleanup, (handle = NULL, rc = PTR_ERR(handle)));
                 rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev);
@@ -458,13 +584,13 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
         }
         default:
                 CERROR("bad file type %o creating %s\n", type, rec->ur_name);
         }
         default:
                 CERROR("bad file type %o creating %s\n", type, rec->ur_name);
+                dchild->d_fsdata = NULL;
                 GOTO(cleanup, rc = -EINVAL);
         }
 
                 GOTO(cleanup, rc = -EINVAL);
         }
 
-        /* In case we stored the desired inum in here, we want to clean up.
-         * We also do this in the cleanup block, for the error cases.
-         */
-        dchild->d_fsdata = NULL;
+        /* In case we stored the desired inum in here, we want to clean up. */
+        if (dchild->d_fsdata == (void *)(unsigned long)rec->ur_fid2->id)
+                dchild->d_fsdata = NULL;
 
         if (rc) {
                 CDEBUG(D_INODE, "error during create: %d\n", rc);
 
         if (rc) {
                 CDEBUG(D_INODE, "error during create: %d\n", rc);
@@ -532,7 +658,6 @@ cleanup:
         }
         switch (cleanup_phase) {
         case 2: /* child dentry */
         }
         switch (cleanup_phase) {
         case 2: /* child dentry */
-                dchild->d_fsdata = NULL;
                 l_dput(dchild);
         case 1: /* locked parent dentry */
                 if (rc) {
                 l_dput(dchild);
         case 1: /* locked parent dentry */
                 if (rc) {
@@ -634,43 +759,134 @@ static void reconstruct_reint_unlink(struct mds_update_record *rec, int offset,
                   "can't get EA for reconstructed unlink, leaking OST inodes");
 }
 
                   "can't get EA for reconstructed unlink, leaking OST inodes");
 }
 
+/* If we are unlinking an open file/dir (i.e. creating an orphan) then
+ * we instead link the inode into the PENDING directory until it is
+ * finally released.  We can't simply call mds_reint_rename() or some
+ * part thereof, because we don't have the inode to check for link
+ * count/open status until after it is locked.
+ *
+ * For lock ordering, we always get the PENDING, then pending_child lock
+ * last to avoid deadlocks.
+ */
+static int mds_unlink_orphan(struct mds_update_record *rec,
+                             struct obd_device *obd, struct dentry *dparent,
+                             struct dentry *dchild, void **handle)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+        struct dentry *pending_child;
+        char fidname[LL_FID_NAMELEN];
+        int fidlen = 0, rc;
+        ENTRY;
+
+        LASSERT(!mds_inode_is_orphan(dchild->d_inode));
+
+        down(&pending_dir->i_sem);
+        fidlen = ll_fid2str(fidname, dchild->d_inode->i_ino,
+                            dchild->d_inode->i_generation);
+
+        CDEBUG(D_ERROR, "pending destroy of %dx open file %s = %s\n",
+               mds_open_orphan_count(dchild->d_inode),
+               rec->ur_name, fidname);
+
+        pending_child = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
+        if (IS_ERR(pending_child))
+                GOTO(out_lock, rc = PTR_ERR(pending_child));
+
+        if (pending_child->d_inode != NULL) {
+                CERROR("re-destroying orphan file %s?\n", rec->ur_name);
+                LASSERT(pending_child->d_inode == dchild->d_inode);
+                GOTO(out_dput, rc = 0);
+        }
+
+        *handle = fsfilt_start(obd, pending_dir, FSFILT_OP_RENAME, NULL);
+        if (IS_ERR(*handle))
+                GOTO(out_dput, rc = PTR_ERR(*handle));
+
+        rc = vfs_rename(dparent->d_inode, dchild, pending_dir, pending_child);
+        if (rc)
+                CERROR("error renaming orphan %lu/%s to PENDING: rc = %d\n",
+                       dparent->d_inode->i_ino, rec->ur_name, rc);
+        else
+                mds_inode_set_orphan(dchild->d_inode);
+out_dput:
+        dput(pending_child);
+out_lock:
+        up(&pending_dir->i_sem);
+        RETURN(rc);
+}
+
+static int mds_log_op_unlink(struct obd_device *obd, struct mds_obd *mds,
+                             struct inode *inode, struct lustre_msg *repmsg,
+                             int offset)
+{
+        struct lov_stripe_md *lsm = NULL;
+        struct llog_unlink_rec *lur;
+        int rc;
+        ENTRY;
+
+        if (IS_ERR(mds->mds_osc_obd))
+                RETURN(PTR_ERR(mds->mds_osc_obd));
+
+        rc = obd_unpackmd(&mds->mds_osc_conn, &lsm,
+                          lustre_msg_buf(repmsg, offset, 0),
+                          repmsg->buflens[offset]);
+        if (rc < 0)
+                RETURN(rc);
+
+        OBD_ALLOC(lur, sizeof(*lur));
+        if (!lur)
+                RETURN(-ENOMEM);
+        lur->lur_hdr.lth_len = lur->lur_end_len = sizeof(*lur);
+        lur->lur_hdr.lth_type = MDS_UNLINK_REC;
+        lur->lur_oid = inode->i_ino;
+        lur->lur_ogen = inode->i_generation;
+
+        rc = obd_log_add(&mds->mds_osc_conn, mds->mds_catalog, &lur->lur_hdr,
+                         lsm, lustre_msg_buf(repmsg, offset + 1, 0),
+                         repmsg->buflens[offset+1]/sizeof(struct llog_cookie));
+
+        obd_free_memmd(&mds->mds_osc_conn, &lsm);
+        OBD_FREE(lur, sizeof(*lur));
+
+        RETURN(rc);
+}
+
 static int mds_reint_unlink(struct mds_update_record *rec, int offset,
                             struct ptlrpc_request *req,
 static int mds_reint_unlink(struct mds_update_record *rec, int offset,
                             struct ptlrpc_request *req,
-                            struct lustre_handle *child_lockh)
+                            struct lustre_handle *lh)
 {
 {
-        struct dentry *dir_de = NULL;
+        struct dentry *dparent = NULL;
         struct dentry *dchild = NULL;
         struct mds_obd *mds = mds_req2mds(req);
         struct obd_device *obd = req->rq_export->exp_obd;
         struct mds_body *body = NULL;
         struct dentry *dchild = NULL;
         struct mds_obd *mds = mds_req2mds(req);
         struct obd_device *obd = req->rq_export->exp_obd;
         struct mds_body *body = NULL;
-        struct inode *dir_inode = NULL, *child_inode;
-        struct lustre_handle parent_lockh;
+        struct inode *child_inode;
+        struct lustre_handle parent_lockh, child_lockh;
         void *handle = NULL;
         struct ldlm_res_id child_res_id = { .name = {0} };
         void *handle = NULL;
         struct ldlm_res_id child_res_id = { .name = {0} };
-        int rc = 0, flags = 0, return_lock = 0;
-        int cleanup_phase = 0;
+        int rc = 0, flags = 0, log_unlink = 0, cleanup_phase = 0;
         ENTRY;
 
         LASSERT(offset == 0 || offset == 2);
 
         MDS_CHECK_RESENT(req, reconstruct_reint_unlink(rec, offset, req,
         ENTRY;
 
         LASSERT(offset == 0 || offset == 2);
 
         MDS_CHECK_RESENT(req, reconstruct_reint_unlink(rec, offset, req,
-                                                       child_lockh));
+                                                       &child_lockh));
 
         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK))
                 GOTO(cleanup, rc = -ENOENT);
 
         /* Step 1: Lookup the parent by FID */
 
         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK))
                 GOTO(cleanup, rc = -ENOENT);
 
         /* Step 1: Lookup the parent by FID */
-        dir_de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW,
-                                       &parent_lockh);
-        if (IS_ERR(dir_de))
-                GOTO(cleanup, rc = PTR_ERR(dir_de));
-        dir_inode = dir_de->d_inode;
-        LASSERT(dir_inode);
+        dparent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW,
+                                        &parent_lockh);
+        if (IS_ERR(dparent))
+                GOTO(cleanup, rc = PTR_ERR(dparent));
+        LASSERT(dparent->d_inode);
 
         cleanup_phase = 1; /* Have parent dentry lock */
 
         /* Step 2: Lookup the child */
 
         cleanup_phase = 1; /* Have parent dentry lock */
 
         /* Step 2: Lookup the child */
-        dchild = ll_lookup_one_len(rec->ur_name, dir_de, rec->ur_namelen - 1);
+        dchild = ll_lookup_one_len(rec->ur_name, dparent, rec->ur_namelen - 1);
         if (IS_ERR(dchild))
                 GOTO(cleanup, rc = PTR_ERR(dchild));
 
         if (IS_ERR(dchild))
                 GOTO(cleanup, rc = PTR_ERR(dchild));
 
@@ -678,15 +894,13 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
 
         child_inode = dchild->d_inode;
         if (child_inode == NULL) {
 
         child_inode = dchild->d_inode;
         if (child_inode == NULL) {
-                CDEBUG(D_INODE,
-                       "child doesn't exist (dir %lu, name %s)\n",
-                       dir_inode->i_ino, rec->ur_name);
-                rc = -ENOENT;
-                GOTO(cleanup, rc);
+                CDEBUG(D_INODE, "child doesn't exist (dir %lu, name %s)\n",
+                       dparent->d_inode->i_ino, rec->ur_name);
+                GOTO(cleanup, rc = -ENOENT);
         }
 
         DEBUG_REQ(D_INODE, req, "parent ino %lu, child ino %lu",
         }
 
         DEBUG_REQ(D_INODE, req, "parent ino %lu, child ino %lu",
-                  dir_inode->i_ino, child_inode->i_ino);
+                  dparent->d_inode->i_ino, child_inode->i_ino);
 
         /* Step 3: Get a lock on the child */
         child_res_id.name[0] = child_inode->i_ino;
 
         /* Step 3: Get a lock on the child */
         child_res_id.name[0] = child_inode->i_ino;
@@ -695,14 +909,14 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
         rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
                               child_res_id, LDLM_PLAIN, NULL, 0, LCK_EX,
                               &flags, ldlm_completion_ast, mds_blocking_ast,
         rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
                               child_res_id, LDLM_PLAIN, NULL, 0, LCK_EX,
                               &flags, ldlm_completion_ast, mds_blocking_ast,
-                              NULL, child_lockh);
+                              NULL, &child_lockh);
         if (rc != ELDLM_OK)
                 GOTO(cleanup, rc);
 
         cleanup_phase = 3; /* child lock */
 
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE,
         if (rc != ELDLM_OK)
                 GOTO(cleanup, rc);
 
         cleanup_phase = 3; /* child lock */
 
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE,
-                       to_kdev_t(dir_inode->i_sb->s_dev));
+                       to_kdev_t(dparent->d_inode->i_sb->s_dev));
 
         /* ldlm_reply in buf[0] if called via intent */
         if (offset)
 
         /* ldlm_reply in buf[0] if called via intent */
         if (offset)
@@ -711,43 +925,89 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
         body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*body));
         LASSERT(body != NULL);
 
         body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*body));
         LASSERT(body != NULL);
 
-        /* Step 4: Do the unlink: client decides between rmdir/unlink!
-         * (bug 72) */
+#ifdef ENABLE_ORPHANS
+        if (unlikely(mds->mds_osc_obd == NULL))
+                mds_osc_connect(obd, mds);
+#endif
+
+        /* If this is the last reference to this inode, get the OBD EA
+         * data first so the client can destroy OST objects */
+        if (S_ISREG(child_inode->i_mode) && child_inode->i_nlink == 1) {
+                mds_pack_inode2fid(&body->fid1, child_inode);
+                mds_pack_inode2body(body, child_inode);
+                mds_pack_md(obd, req->rq_repmsg, offset + 1, body, child_inode);
+                if (!(body->valid & OBD_MD_FLEASIZE)) {
+                        body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                        OBD_MD_FLATIME | OBD_MD_FLMTIME);
+                        log_unlink = 1;
+                }
+        }
+
+        /* We have to do these checks ourselves, in case we are making an
+         * orphan.  The client tells us whether rmdir() or unlink() was called,
+         * so we need to return appropriate errors (bug 72).
+         *
+         * We don't have to check permissions, because vfs_rename (called from
+         * mds_unlink_orphan) also calls may_delete. */
+        if ((rec->ur_mode & S_IFMT) == S_IFDIR) {
+                if (!S_ISDIR(child_inode->i_mode))
+                        GOTO(cleanup, rc = -ENOTDIR);
+        } else {
+                if (S_ISDIR(child_inode->i_mode))
+                        GOTO(cleanup, rc = -EISDIR);
+        }
+
+        if (mds_open_orphan_count(child_inode) > 0) {
+                rc = mds_unlink_orphan(rec, obd, dparent, dchild, &handle);
+#ifdef ENABLE_ORPHANS
+                if (!rc && mds_log_op_unlink(obd, mds, child_inode,
+                                             req->rq_repmsg, offset + 1) > 0)
+                        body->valid |= OBD_MD_FLCOOKIE;
+#endif
+                GOTO(cleanup, rc);
+        }
+
+        // Step 4: Do the unlink: client decides between rmdir/unlink! (bug 72)
         switch (rec->ur_mode & S_IFMT) {
         case S_IFDIR:
                 /* Drop any lingering child directories before we start our
                  * transaction, to avoid doing multiple inode dirty/delete
         switch (rec->ur_mode & S_IFMT) {
         case S_IFDIR:
                 /* Drop any lingering child directories before we start our
                  * transaction, to avoid doing multiple inode dirty/delete
-                 * in our compound transaction (bug 1321).
-                 */
+                 * in our compound transaction (bug 1321). */
                 shrink_dcache_parent(dchild);
                 shrink_dcache_parent(dchild);
-                handle = fsfilt_start(obd, dir_inode, FSFILT_OP_RMDIR);
+                handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_RMDIR,
+                                      NULL);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 cleanup_phase = 4;
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 cleanup_phase = 4;
-                rc = vfs_rmdir(dir_inode, dchild);
+                rc = vfs_rmdir(dparent->d_inode, dchild);
                 break;
                 break;
-        case S_IFREG:
-                /* If this is the last reference to this inode, get the OBD EA
-                 * data first so the client can destroy OST objects */
-                if (S_ISREG(child_inode->i_mode) && child_inode->i_nlink == 1) {
-                        mds_pack_inode2fid(&body->fid1, child_inode);
-                        mds_pack_inode2body(body, child_inode);
-                        mds_pack_md(obd, req->rq_repmsg, offset + 1,
-                                    body, child_inode);
-                        if (body->valid & OBD_MD_FLEASIZE)
-                                return_lock = 1;
-                }
-                /* no break */
+        case S_IFREG: {
+                handle = fsfilt_start(obd, dparent->d_inode,
+                                      FSFILT_OP_UNLINK_LOG, NULL);
+                if (IS_ERR(handle))
+                        GOTO(cleanup, rc = PTR_ERR(handle));
+
+                cleanup_phase = 4;
+                rc = vfs_unlink(dparent->d_inode, dchild);
+#ifdef ENABLE_ORPHANS
+                if (!rc && log_unlink)
+                        if (mds_log_op_unlink(obd, mds, child_inode,
+                                              req->rq_repmsg, offset + 1) > 0)
+                                body->valid |= OBD_MD_FLCOOKIE;
+#endif
+                break;
+        }
         case S_IFLNK:
         case S_IFCHR:
         case S_IFBLK:
         case S_IFIFO:
         case S_IFSOCK:
         case S_IFLNK:
         case S_IFCHR:
         case S_IFBLK:
         case S_IFIFO:
         case S_IFSOCK:
-                handle = fsfilt_start(obd, dir_inode, FSFILT_OP_UNLINK);
+                handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK,
+                                      NULL);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 cleanup_phase = 4;
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 cleanup_phase = 4;
-                rc = vfs_unlink(dir_inode, dchild);
+                rc = vfs_unlink(dparent->d_inode, dchild);
                 break;
         default:
                 CERROR("bad file type %o unlinking %s\n", rec->ur_mode,
                 break;
         default:
                 CERROR("bad file type %o unlinking %s\n", rec->ur_mode,
@@ -758,29 +1018,29 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
 
  cleanup:
         switch(cleanup_phase) {
 
  cleanup:
         switch(cleanup_phase) {
-            case 4:
-                rc = mds_finish_transno(mds, dir_inode, handle, req, rc, 0);
-                if (rc && body) {
-                        /* Don't unlink the OST objects if the MDS unlink failed */
+        case 4:
+                rc = mds_finish_transno(mds, dparent->d_inode, handle, req,
+                                        rc, 0);
+                if (rc && body != NULL) {
+                        // Don't unlink the OST objects if the MDS unlink failed
                         body->valid = 0;
                 }
                         body->valid = 0;
                 }
-            case 3: /* child lock */
-                if (rc != 0 || return_lock == 0)
-                        ldlm_lock_decref(child_lockh, LCK_EX);
-            case 2: /* child dentry */
+        case 3: /* child lock */
+                ldlm_lock_decref(&child_lockh, LCK_EX);
+        case 2: /* child dentry */
                 l_dput(dchild);
                 l_dput(dchild);
-            case 1: /* parent dentry and lock */
+        case 1: /* parent dentry and lock */
                 if (rc) {
                 if (rc) {
-                        ldlm_lock_decref(&parent_lockh, LCK_EX);
+                        ldlm_lock_decref(&parent_lockh, LCK_PW);
                 } else {
                         memcpy(&req->rq_ack_locks[0].lock, &parent_lockh,
                                sizeof(parent_lockh));
                 } else {
                         memcpy(&req->rq_ack_locks[0].lock, &parent_lockh,
                                sizeof(parent_lockh));
-                        req->rq_ack_locks[0].mode = LCK_EX;
+                        req->rq_ack_locks[0].mode = LCK_PW;
                 }
                 }
-                l_dput(dir_de);
-            case 0:
+                l_dput(dparent);
+        case 0:
                 break;
                 break;
-            default:
+        default:
                 CERROR("invalid cleanup_phase %d\n", cleanup_phase);
                 LBUG();
         }
                 CERROR("invalid cleanup_phase %d\n", cleanup_phase);
                 LBUG();
         }
@@ -857,8 +1117,10 @@ static int mds_reint_link(struct mds_update_record *rec, int offset,
         /* Step 3: Lookup the child */
         dchild = ll_lookup_one_len(rec->ur_name, de_tgt_dir, rec->ur_namelen-1);
         if (IS_ERR(dchild)) {
         /* Step 3: Lookup the child */
         dchild = ll_lookup_one_len(rec->ur_name, de_tgt_dir, rec->ur_namelen-1);
         if (IS_ERR(dchild)) {
-                CERROR("child lookup error %ld\n", PTR_ERR(dchild));
-                GOTO(cleanup, rc = PTR_ERR(dchild));
+                rc = PTR_ERR(dchild);
+                if (rc != -EPERM && rc != -EACCES)
+                        CERROR("child lookup error %d\n", rc);
+                GOTO(cleanup, rc);
         }
 
         cleanup_phase = 4; /* child dentry */
         }
 
         cleanup_phase = 4; /* child dentry */
@@ -874,15 +1136,15 @@ static int mds_reint_link(struct mds_update_record *rec, int offset,
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE,
                        to_kdev_t(de_src->d_inode->i_sb->s_dev));
 
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE,
                        to_kdev_t(de_src->d_inode->i_sb->s_dev));
 
-        handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK);
+        handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK, NULL);
         if (IS_ERR(handle)) {
                 rc = PTR_ERR(handle);
                 GOTO(cleanup, rc);
         }
 
         rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild);
         if (IS_ERR(handle)) {
                 rc = PTR_ERR(handle);
                 GOTO(cleanup, rc);
         }
 
         rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild);
-        if (rc)
-                CERROR("link error %d\n", rc);
+        if (rc && rc != -EPERM && rc != -EACCES)
+                CERROR("vfs_link error %d\n", rc);
 cleanup:
         rc = mds_finish_transno(mds, de_tgt_dir ? de_tgt_dir->d_inode : NULL,
                                 handle, req, rc, 0);
 cleanup:
         rc = mds_finish_transno(mds, de_tgt_dir ? de_tgt_dir->d_inode : NULL,
                                 handle, req, rc, 0);
@@ -1057,13 +1319,12 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset,
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE,
                        to_kdev_t(de_srcdir->d_inode->i_sb->s_dev));
 
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE,
                        to_kdev_t(de_srcdir->d_inode->i_sb->s_dev));
 
-        handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME);
+        handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME, NULL);
         if (IS_ERR(handle))
                 GOTO(cleanup, rc = PTR_ERR(handle));
 
         lock_kernel();
         if (IS_ERR(handle))
                 GOTO(cleanup, rc = PTR_ERR(handle));
 
         lock_kernel();
-        rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new,
-                        NULL);
+        rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new);
         unlock_kernel();
 
         EXIT;
         unlock_kernel();
 
         EXIT;
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
 Makefile.in
 .deps
 TAGS
+.*.cmd
index 61f4bc2..06d60d4 100644 (file)
@@ -1,5 +1,3 @@
-# FIXME: we need to make it clear that obdclass.o depends on
-# lustre_build_version, or 'make -j2' breaks!
 DEFS=
 MODULE = obdclass
 
 DEFS=
 MODULE = obdclass
 
@@ -9,15 +7,13 @@ else
 FSMOD = fsfilt_ext3
 endif
 
 FSMOD = fsfilt_ext3
 endif
 
+class_obd.o: lustre_build_version
+
 if LIBLUSTRE
 lib_LIBRARIES = liblustreclass.a
 if LIBLUSTRE
 lib_LIBRARIES = liblustreclass.a
-liblustreclass_a_SOURCES = uuid.c statfs_pack.c genops.c debug.c class_obd.c lustre_handles.c lustre_peer.c lprocfs_status.c simple.c
-
-class_obd.o: lustre_version
-
-lustre_version:
-       echo '#define LUSTRE_VERSION 12' > $(top_builddir)/include/linux/lustre_build_version.h
-       echo '#define BUILD_VERSION "1"' >> $(top_builddir)/include/linux/lustre_build_version.h
+liblustreclass_a_SOURCES = uuid.c statfs_pack.c genops.c debug.c class_obd.c
+liblustreclass_a_SOURCES += lustre_handles.c lustre_peer.c lprocfs_status.c
+liblustreclass_a_SOURCES += simple.c recov_log.c obdo.c
 
 else
 modulefs_DATA = lustre_build_version obdclass.o $(FSMOD).o fsfilt_reiserfs.o
 
 else
 modulefs_DATA = lustre_build_version obdclass.o $(FSMOD).o fsfilt_reiserfs.o
@@ -25,15 +21,16 @@ EXTRA_PROGRAMS = obdclass $(FSMOD) fsfilt_reiserfs
 
 obdclass_SOURCES = class_obd.c debug.c genops.c sysctl.c uuid.c simple.c
 obdclass_SOURCES += lprocfs_status.c lustre_handles.c lustre_peer.c
 
 obdclass_SOURCES = class_obd.c debug.c genops.c sysctl.c uuid.c simple.c
 obdclass_SOURCES += lprocfs_status.c lustre_handles.c lustre_peer.c
-obdclass_SOURCES += fsfilt.c statfs_pack.c otree.c
+obdclass_SOURCES += fsfilt.c statfs_pack.c otree.c recov_log.c obdo.c
 endif
 
 include $(top_srcdir)/Rules
 
 endif
 
 include $(top_srcdir)/Rules
 
-# XXX I'm sure there's some automake mv-if-different helper for this.
 lustre_build_version:
        perl $(top_srcdir)/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver
 lustre_build_version:
        perl $(top_srcdir)/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver
+       echo #define LUSTRE_RELEASE @RELEASE@ >> tmpver
        cmp -s $(top_builddir)/include/linux/lustre_build_version.h tmpver \
        cmp -s $(top_builddir)/include/linux/lustre_build_version.h tmpver \
-               2> /dev/null &&                                            \
-               $(RM) tmpver ||                                            \
-               mv tmpver $(top_builddir)/include/linux/lustre_build_version.h
+                2> /dev/null &&                                            \
+                $(RM) tmpver ||                                            \
+                mv tmpver $(top_builddir)/include/linux/lustre_build_version.h
+
index b497aa3..8275ed8 100644 (file)
@@ -53,9 +53,7 @@
 #include <linux/miscdevice.h>
 #include <linux/smp_lock.h>
 #else
 #include <linux/miscdevice.h>
 #include <linux/smp_lock.h>
 #else
-
 # include <liblustre.h>
 # include <liblustre.h>
-
 #endif
 
 #include <linux/obd_support.h>
 #endif
 
 #include <linux/obd_support.h>
@@ -64,6 +62,7 @@
 #include <linux/lprocfs_status.h>
 #include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
 #include <linux/lustre_build_version.h>
 #include <linux/lprocfs_status.h>
 #include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
 #include <linux/lustre_build_version.h>
+#include <portals/list.h>
 
 struct semaphore obd_conf_sem;   /* serialize configuration commands */
 struct obd_device obd_dev[MAX_OBD_DEVICES];
 
 struct semaphore obd_conf_sem;   /* serialize configuration commands */
 struct obd_device obd_dev[MAX_OBD_DEVICES];
@@ -181,12 +180,10 @@ static inline void obd_conn2data(struct obd_ioctl_data *data,
 
 static void dump_exports(struct obd_device *obd)
 {
 
 static void dump_exports(struct obd_device *obd)
 {
-        struct list_head *tmp, *n;
+        struct obd_export *exp, *n;
 
 
-        list_for_each_safe(tmp, n, &obd->obd_exports) {
-                struct obd_export *exp = list_entry(tmp, struct obd_export,
-                                                    exp_obd_chain);
-                CDEBUG(D_ERROR, "%s: %p %s %d %d %p\n",
+        list_for_each_entry_safe(exp, n, &obd->obd_exports, exp_obd_chain) {
+                CERROR("%s: %p %s %d %d %p\n",
                        obd->obd_name, exp, exp->exp_client_uuid.uuid,
                        atomic_read(&exp->exp_refcount),
                        exp->exp_failed, exp->exp_outstanding_reply );
                        obd->obd_name, exp, exp->exp_client_uuid.uuid,
                        atomic_read(&exp->exp_refcount),
                        exp->exp_failed, exp->exp_outstanding_reply );
@@ -543,6 +540,7 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
                 obd->obd_type->typ_refcnt--;
                 class_put_type(obd->obd_type);
                 obd->obd_type = NULL;
                 obd->obd_type->typ_refcnt--;
                 class_put_type(obd->obd_type);
                 obd->obd_type = NULL;
+                memset(obd, 0, sizeof(*obd));
                 GOTO(out, err = 0);
         }
 
                 GOTO(out, err = 0);
         }
 
@@ -562,7 +560,7 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
 
                 atomic_set(&obd->obd_refcount, 0);
 
 
                 atomic_set(&obd->obd_refcount, 0);
 
-                if ( OBT(obd) && OBP(obd, setup) )
+                if (OBT(obd) && OBP(obd, setup))
                         err = obd_setup(obd, sizeof(*data), data);
 
                 if (!err) {
                         err = obd_setup(obd, sizeof(*data), data);
 
                 if (!err) {
@@ -574,8 +572,8 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
                 GOTO(out, err);
         }
         case OBD_IOC_CLEANUP: {
                 GOTO(out, err);
         }
         case OBD_IOC_CLEANUP: {
-                int force = 0, failover = 0;
-                char * flag;
+                int flags = 0;
+                char *flag;
 
                 if (!obd->obd_set_up) {
                         CERROR("Device %d not setup\n", obd->obd_minor);
 
                 if (!obd->obd_set_up) {
                         CERROR("Device %d not setup\n", obd->obd_minor);
@@ -586,18 +584,19 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
                         for (flag = data->ioc_inlbuf1; *flag != 0; flag++)
                                 switch (*flag) {
                                 case 'F':
                         for (flag = data->ioc_inlbuf1; *flag != 0; flag++)
                                 switch (*flag) {
                                 case 'F':
-                                        force = 1;
+                                        flags |= OBD_OPT_FORCE;
                                         break;
                                 case 'A':
                                         break;
                                 case 'A':
-                                        failover = 1;
+                                        flags |= OBD_OPT_FAILOVER;
                                         break;
                                 default:
                                         break;
                                 default:
-                                        CERROR("unrecognised flag '%c'\n", 
+                                        CERROR("unrecognised flag '%c'\n",
                                                *flag);
                                 }
                 }
                                                *flag);
                                 }
                 }
-                
-                if (atomic_read(&obd->obd_refcount) == 1 || force) {
+
+                if (atomic_read(&obd->obd_refcount) == 1 ||
+                    flags & OBD_OPT_FORCE) {
                         /* this will stop new connections, and need to
                            do it before class_disconnect_exports() */
                         obd->obd_stopping = 1;
                         /* this will stop new connections, and need to
                            do it before class_disconnect_exports() */
                         obd->obd_stopping = 1;
@@ -607,19 +606,19 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
                         struct l_wait_info lwi = LWI_TIMEOUT_INTR(60 * HZ, NULL,
                                                                   NULL, NULL);
                         int rc;
                         struct l_wait_info lwi = LWI_TIMEOUT_INTR(60 * HZ, NULL,
                                                                   NULL, NULL);
                         int rc;
-                        
-                        if (!force) {
+
+                        if (!(flags & OBD_OPT_FORCE)) {
                                 CERROR("OBD device %d (%p) has refcount %d\n",
                                 CERROR("OBD device %d (%p) has refcount %d\n",
-                                       obd->obd_minor, obd, 
+                                       obd->obd_minor, obd,
                                        atomic_read(&obd->obd_refcount));
                                 dump_exports(obd);
                                 GOTO(out, err = -EBUSY);
                         }
                                        atomic_read(&obd->obd_refcount));
                                 dump_exports(obd);
                                 GOTO(out, err = -EBUSY);
                         }
-                        class_disconnect_exports(obd, failover);
-                        CDEBUG(D_IOCTL, 
-                               "%s: waiting for obd refs to go away: %d\n", 
+                        class_disconnect_exports(obd, flags);
+                        CDEBUG(D_IOCTL,
+                               "%s: waiting for obd refs to go away: %d\n",
                                obd->obd_name, atomic_read(&obd->obd_refcount));
                                obd->obd_name, atomic_read(&obd->obd_refcount));
-                
+
                         rc = l_wait_event(obd->obd_refcount_waitq,
                                      atomic_read(&obd->obd_refcount) < 2, &lwi);
                         if (rc == 0) {
                         rc = l_wait_event(obd->obd_refcount_waitq,
                                      atomic_read(&obd->obd_refcount) < 2, &lwi);
                         if (rc == 0) {
@@ -630,12 +629,12 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
                                        atomic_read(&obd->obd_refcount));
                                 dump_exports(obd);
                         }
                                        atomic_read(&obd->obd_refcount));
                                 dump_exports(obd);
                         }
-                        CDEBUG(D_IOCTL, "%s: awake, now finishing cleanup\n", 
+                        CDEBUG(D_IOCTL, "%s: awake, now finishing cleanup\n",
                                obd->obd_name);
                 }
 
                 if (OBT(obd) && OBP(obd, cleanup))
                                obd->obd_name);
                 }
 
                 if (OBT(obd) && OBP(obd, cleanup))
-                        err = obd_cleanup(obd, force, failover);
+                        err = obd_cleanup(obd, flags);
 
                 if (!err) {
                         obd->obd_set_up = obd->obd_stopping = 0;
 
                 if (!err) {
                         obd->obd_set_up = obd->obd_stopping = 0;
@@ -807,10 +806,10 @@ EXPORT_SYMBOL(class_conn2cliimp);
 EXPORT_SYMBOL(class_conn2ldlmimp);
 EXPORT_SYMBOL(class_disconnect);
 EXPORT_SYMBOL(class_disconnect_exports);
 EXPORT_SYMBOL(class_conn2ldlmimp);
 EXPORT_SYMBOL(class_disconnect);
 EXPORT_SYMBOL(class_disconnect_exports);
-EXPORT_SYMBOL(lustre_uuid_to_peer);
 
 /* uuid.c */
 EXPORT_SYMBOL(class_uuid_unparse);
 
 /* uuid.c */
 EXPORT_SYMBOL(class_uuid_unparse);
+EXPORT_SYMBOL(lustre_uuid_to_peer);
 EXPORT_SYMBOL(client_tgtuuid2obd);
 
 EXPORT_SYMBOL(class_handle_hash);
 EXPORT_SYMBOL(client_tgtuuid2obd);
 
 EXPORT_SYMBOL(class_handle_hash);
@@ -831,12 +830,15 @@ int init_obdclass(void)
                       ", info@clusterfs.com\n");
 
         class_init_uuidlist();
                       ", info@clusterfs.com\n");
 
         class_init_uuidlist();
-        class_handle_init();
+        err = class_handle_init();
+        if (err)
+                return err;
 
         sema_init(&obd_conf_sem, 1);
         INIT_LIST_HEAD(&obd_types);
 
 
         sema_init(&obd_conf_sem, 1);
         INIT_LIST_HEAD(&obd_types);
 
-        if ((err = misc_register(&obd_psdev))) {
+        err = misc_register(&obd_psdev);
+        if (err) {
                 CERROR("cannot register %d err %d\n", OBD_MINOR, err);
                 return err;
         }
                 CERROR("cannot register %d err %d\n", OBD_MINOR, err);
                 return err;
         }
@@ -875,7 +877,7 @@ int obd_proc_read_version(char *page, char **start, off_t off, int count, int *e
 #endif
 
 #ifdef __KERNEL__
 #endif
 
 #ifdef __KERNEL__
-static void __exit cleanup_obdclass(void)
+static void /*__exit*/ cleanup_obdclass(void)
 #else
 static void cleanup_obdclass(void)
 #endif
 #else
 static void cleanup_obdclass(void)
 #endif
@@ -914,8 +916,8 @@ static void cleanup_obdclass(void)
  * kernel patch */
 #ifdef __KERNEL__
 #include <linux/lustre_version.h>
  * kernel patch */
 #ifdef __KERNEL__
 #include <linux/lustre_version.h>
-#define LUSTRE_MIN_VERSION 18
-#define LUSTRE_MAX_VERSION 19
+#define LUSTRE_MIN_VERSION 21
+#define LUSTRE_MAX_VERSION 21
 #if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION)
 # error Cannot continue: Your Lustre kernel patch is older than the sources
 #elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION)
 #if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION)
 # error Cannot continue: Your Lustre kernel patch is older than the sources
 #elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION)
index 4357b79..d0abdfe 100644 (file)
@@ -64,7 +64,7 @@ void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops)
         /* unlock fsfilt_types list */
 }
 
         /* unlock fsfilt_types list */
 }
 
-struct fsfilt_operations *fsfilt_get_ops(char *type)
+struct fsfilt_operations *fsfilt_get_ops(const char *type)
 {
         struct fsfilt_operations *fs_ops;
 
 {
         struct fsfilt_operations *fs_ops;
 
@@ -89,7 +89,7 @@ struct fsfilt_operations *fsfilt_get_ops(char *type)
                        /* unlock fsfilt_types list */
                 }
         }
                        /* unlock fsfilt_types list */
                 }
         }
-        __MOD_INC_USE_COUNT(fs_ops->fs_owner);
+        try_module_get(fs_ops->fs_owner);
         /* unlock fsfilt_types list */
 
         return fs_ops;
         /* unlock fsfilt_types list */
 
         return fs_ops;
@@ -97,7 +97,7 @@ struct fsfilt_operations *fsfilt_get_ops(char *type)
 
 void fsfilt_put_ops(struct fsfilt_operations *fs_ops)
 {
 
 void fsfilt_put_ops(struct fsfilt_operations *fs_ops)
 {
-        __MOD_DEC_USE_COUNT(fs_ops->fs_owner);
+        module_put(fs_ops->fs_owner);
 }
 
 
 }
 
 
index 5f6322f..5dd196d 100644 (file)
 #include <linux/quotaops.h>
 #include <linux/ext3_fs.h>
 #include <linux/ext3_jbd.h>
 #include <linux/quotaops.h>
 #include <linux/ext3_fs.h>
 #include <linux/ext3_jbd.h>
-#include <linux/ext3_xattr.h>
+#include <linux/version.h>
+/* XXX ugh */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ #include <linux/ext3_xattr.h>
+#else 
+ #include <linux/../../fs/ext3/xattr.h>
+#endif
 #include <linux/kp30.h>
 #include <linux/lustre_fsfilt.h>
 #include <linux/obd.h>
 #include <linux/kp30.h>
 #include <linux/lustre_fsfilt.h>
 #include <linux/obd.h>
@@ -43,10 +49,11 @@ static kmem_cache_t *fcb_cache;
 static atomic_t fcb_cache_count = ATOMIC_INIT(0);
 
 struct fsfilt_cb_data {
 static atomic_t fcb_cache_count = ATOMIC_INIT(0);
 
 struct fsfilt_cb_data {
-        struct journal_callback cb_jcb; /* data private to jbd */
+        struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
         fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
         struct obd_device *cb_obd;      /* MDS/OBD completion device */
         __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
         fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
         struct obd_device *cb_obd;      /* MDS/OBD completion device */
         __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
+        void *cb_data;                  /* MDS/OST completion function data */
 };
 
 #define EXT3_XATTR_INDEX_LUSTRE         5
 };
 
 #define EXT3_XATTR_INDEX_LUSTRE         5
@@ -58,13 +65,24 @@ struct fsfilt_cb_data {
  * the inode (which we will be changing anyways as part of this
  * transaction).
  */
  * the inode (which we will be changing anyways as part of this
  * transaction).
  */
-static void *fsfilt_ext3_start(struct inode *inode, int op)
+static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private)
 {
         /* For updates to the last recieved file */
         int nblocks = EXT3_DATA_TRANS_BLOCKS;
         void *handle;
 
         switch(op) {
 {
         /* For updates to the last recieved file */
         int nblocks = EXT3_DATA_TRANS_BLOCKS;
         void *handle;
 
         switch(op) {
+        case FSFILT_OP_CREATE_LOG:
+                nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
+                op = FSFILT_OP_CREATE;
+                break;
+        case FSFILT_OP_UNLINK_LOG:
+                nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
+                op = FSFILT_OP_UNLINK;
+                break;
+        }
+
+        switch(op) {
         case FSFILT_OP_RMDIR:
         case FSFILT_OP_UNLINK:
                 nblocks += EXT3_DELETE_TRANS_BLOCKS;
         case FSFILT_OP_RMDIR:
         case FSFILT_OP_UNLINK:
                 nblocks += EXT3_DELETE_TRANS_BLOCKS;
@@ -95,7 +113,7 @@ static void *fsfilt_ext3_start(struct inode *inode, int op)
                  LBUG();
         }
 
                  LBUG();
         }
 
-        LASSERT(!current->journal_info);
+        LASSERT(current->journal_info == desc_private);
         lock_kernel();
         handle = journal_start(EXT3_JOURNAL(inode), nblocks);
         unlock_kernel();
         lock_kernel();
         handle = journal_start(EXT3_JOURNAL(inode), nblocks);
         unlock_kernel();
@@ -185,14 +203,14 @@ static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso)
  * the pages have been written.
  */
 static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso,
  * the pages have been written.
  */
 static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso,
-                                   int niocount, struct niobuf_remote *nb)
+                                   int niocount, void *desc_private)
 {
         journal_t *journal;
         handle_t *handle;
         int needed;
         ENTRY;
 
 {
         journal_t *journal;
         handle_t *handle;
         int needed;
         ENTRY;
 
-        LASSERT(!current->journal_info);
+        LASSERT(current->journal_info == desc_private);
         journal = EXT3_SB(fso->fso_dentry->d_inode->i_sb)->s_journal;
         needed = fsfilt_ext3_credits_needed(objcount, fso);
 
         journal = EXT3_SB(fso->fso_dentry->d_inode->i_sb)->s_journal;
         needed = fsfilt_ext3_credits_needed(objcount, fso);
 
@@ -218,6 +236,8 @@ static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso,
         if (IS_ERR(handle))
                 CERROR("can't get handle for %d credits: rc = %ld\n", needed,
                        PTR_ERR(handle));
         if (IS_ERR(handle))
                 CERROR("can't get handle for %d credits: rc = %ld\n", needed,
                        PTR_ERR(handle));
+        else
+                LASSERT(handle->h_buffer_credits >= needed);
 
         RETURN(handle);
 }
 
         RETURN(handle);
 }
@@ -249,24 +269,26 @@ static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle,
          * in the block pointers; this is really the "small" stripe MD data.
          * We can avoid further hackery by virtue of the MDS file size being
          * zero all the time (which doesn't invoke block truncate at unlink
          * in the block pointers; this is really the "small" stripe MD data.
          * We can avoid further hackery by virtue of the MDS file size being
          * zero all the time (which doesn't invoke block truncate at unlink
-         * time), so we assert we never change the MDS file size from zero.
-         */
+         * time), so we assert we never change the MDS file size from zero. */
         if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
                 /* ATTR_SIZE would invoke truncate: clear it */
                 iattr->ia_valid &= ~ATTR_SIZE;
         if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
                 /* ATTR_SIZE would invoke truncate: clear it */
                 iattr->ia_valid &= ~ATTR_SIZE;
-                inode->i_size = iattr->ia_size;
+                EXT3_I(inode)->i_disksize = inode->i_size = iattr->ia_size;
 
                 /* make sure _something_ gets set - so new inode
 
                 /* make sure _something_ gets set - so new inode
-                 * goes to disk (probably won't work over XFS
-                 */
-                if (!iattr->ia_valid & ATTR_MODE) {
+                 * goes to disk (probably won't work over XFS */
+                if (!(iattr->ia_valid & (ATTR_MODE | ATTR_MTIME | ATTR_CTIME))){
                         iattr->ia_valid |= ATTR_MODE;
                         iattr->ia_mode = inode->i_mode;
                 }
         }
                         iattr->ia_valid |= ATTR_MODE;
                         iattr->ia_mode = inode->i_mode;
                 }
         }
-        if (inode->i_op->setattr)
+
+        /* Don't allow setattr to change file type */
+        iattr->ia_mode = (inode->i_mode & S_IFMT)|(iattr->ia_mode & ~S_IFMT);
+
+        if (inode->i_op->setattr) {
                 rc = inode->i_op->setattr(dentry, iattr);
                 rc = inode->i_op->setattr(dentry, iattr);
-        else{
+        } else {
                 rc = inode_change_ok(inode, iattr);
                 if (!rc)
                         rc = inode_setattr(inode, iattr);
                 rc = inode_change_ok(inode, iattr);
                 if (!rc)
                         rc = inode_setattr(inode, iattr);
@@ -286,8 +308,8 @@ static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
          * it will fit, because putting it in an EA currently kills the MDS
          * performance.  We'll fix this with "fast EAs" in the future.
          */
          * it will fit, because putting it in an EA currently kills the MDS
          * performance.  We'll fix this with "fast EAs" in the future.
          */
-        if (lmm_size <= sizeof(EXT3_I(inode)->i_data) -
-                        sizeof(EXT3_I(inode)->i_data[0])) {
+        if (inode->i_blocks == 0 && lmm_size <= sizeof(EXT3_I(inode)->i_data) -
+                                            sizeof(EXT3_I(inode)->i_data[0])) {
                 /* XXX old_size is debugging only */
                 int old_size = EXT3_I(inode)->i_data[0];
                 if (old_size != 0) {
                 /* XXX old_size is debugging only */
                 int old_size = EXT3_I(inode)->i_data[0];
                 if (old_size != 0) {
@@ -303,8 +325,15 @@ static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
         } else {
                 down(&inode->i_sem);
                 lock_kernel();
         } else {
                 down(&inode->i_sem);
                 lock_kernel();
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
                 rc = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_LUSTRE,
                                     XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0);
                 rc = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_LUSTRE,
                                     XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0);
+#else
+                rc = ext3_xattr_set_handle(handle, inode, 
+                                           EXT3_XATTR_INDEX_LUSTRE,
+                                           XATTR_LUSTRE_MDS_OBJID, lmm, 
+                                           lmm_size, 0);
+#endif
                 unlock_kernel();
                 up(&inode->i_sem);
         }
                 unlock_kernel();
                 up(&inode->i_sem);
         }
@@ -319,7 +348,7 @@ static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size)
 {
         int rc;
 
 {
         int rc;
 
-        if (EXT3_I(inode)->i_data[0]) {
+        if (inode->i_blocks == 0 && EXT3_I(inode)->i_data[0]) {
                 int size = le32_to_cpu(EXT3_I(inode)->i_data[0]);
                 LASSERT(size < sizeof(EXT3_I(inode)->i_data));
                 if (lmm) {
                 int size = le32_to_cpu(EXT3_I(inode)->i_data[0]);
                 LASSERT(size < sizeof(EXT3_I(inode)->i_data));
                 if (lmm) {
@@ -411,14 +440,15 @@ static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error)
 {
         struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
 
 {
         struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
 
-        fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error);
+        fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error);
 
         OBD_SLAB_FREE(fcb, fcb_cache, sizeof *fcb);
         atomic_dec(&fcb_cache_count);
 }
 
 static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
 
         OBD_SLAB_FREE(fcb, fcb_cache, sizeof *fcb);
         atomic_dec(&fcb_cache_count);
 }
 
 static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
-                                     void *handle, fsfilt_cb_t cb_func)
+                                     void *handle, fsfilt_cb_t cb_func,
+                                     void *cb_data)
 {
         struct fsfilt_cb_data *fcb;
 
 {
         struct fsfilt_cb_data *fcb;
 
@@ -430,10 +460,10 @@ static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
         fcb->cb_func = cb_func;
         fcb->cb_obd = obd;
         fcb->cb_last_rcvd = last_rcvd;
         fcb->cb_func = cb_func;
         fcb->cb_obd = obd;
         fcb->cb_last_rcvd = last_rcvd;
+        fcb->cb_data = cb_data;
 
         CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
         lock_kernel();
 
         CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
         lock_kernel();
-        /* Note that an "incompatible pointer" warning here is OK for now */
         journal_callback_set(handle, fsfilt_ext3_cb_func,
                              (struct journal_callback *)fcb);
         unlock_kernel();
         journal_callback_set(handle, fsfilt_ext3_cb_func,
                              (struct journal_callback *)fcb);
         unlock_kernel();
@@ -443,10 +473,11 @@ static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
 
 static int fsfilt_ext3_journal_data(struct file *filp)
 {
 
 static int fsfilt_ext3_journal_data(struct file *filp)
 {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        /* bug 1576: enable data journaling on 2.5 when appropriate */
         struct inode *inode = filp->f_dentry->d_inode;
         struct inode *inode = filp->f_dentry->d_inode;
-
         EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
         EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
-
+#endif
         return 0;
 }
 
         return 0;
 }
 
@@ -459,7 +490,7 @@ static int fsfilt_ext3_journal_data(struct file *filp)
  */
 static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs)
 {
  */
 static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs)
 {
-        struct statfs sfs;
+        struct kstatfs sfs;
         int rc = vfs_statfs(sb, &sfs);
 
         if (!rc && sfs.f_bfree < sfs.f_ffree) {
         int rc = vfs_statfs(sb, &sfs);
 
         if (!rc && sfs.f_bfree < sfs.f_ffree) {
@@ -484,6 +515,110 @@ static int fsfilt_ext3_prep_san_write(struct inode *inode, long *blocks,
         return ext3_prep_san_write(inode, blocks, nblocks, newsize);
 }
 
         return ext3_prep_san_write(inode, blocks, nblocks, newsize);
 }
 
+static int fsfilt_ext3_read_record(struct file * file, char *buf,
+                                   int size, loff_t *offs)
+{
+        struct buffer_head *bh;
+        unsigned long block, boffs;
+        struct inode *inode = file->f_dentry->d_inode;
+        int err;
+
+        if (inode->i_size < *offs + size) {
+                CERROR("file size %llu is too short for read %u@%llu\n",
+                       inode->i_size, size, *offs);
+                return -EIO;
+        }
+
+        block = *offs >> inode->i_blkbits;
+        bh = ext3_bread(NULL, inode, block, 0, &err);
+        if (!bh) {
+                CERROR("can't read block: %d\n", err);
+                return err;
+        }
+
+        boffs = (unsigned)*offs % bh->b_size;
+        if (boffs + size > bh->b_size) {
+                CERROR("request crosses block's border. offset %llu, size %u\n",
+                       *offs, size);
+                brelse(bh);
+                return -EIO;
+        }
+
+        memcpy(buf, bh->b_data + boffs, size);
+        brelse(bh);
+        *offs += size;
+        return size;
+}
+
+static int fsfilt_ext3_write_record(struct file * file, char *buf,
+                                    int size, loff_t *offs)
+{
+        struct buffer_head *bh;
+        unsigned long block, boffs;
+        struct inode *inode = file->f_dentry->d_inode;
+        loff_t old_size = inode->i_size;
+        journal_t *journal;
+        handle_t *handle;
+        int err;
+
+        journal = EXT3_SB(inode->i_sb)->s_journal;
+        handle = journal_start(journal, EXT3_DATA_TRANS_BLOCKS + 2);
+        if (handle == NULL) {
+                CERROR("can't start transaction\n");
+                return -EIO;
+        }
+
+        block = *offs >> inode->i_blkbits;
+        if (*offs + size > inode->i_size) {
+                down(&inode->i_sem);
+                if (*offs + size > inode->i_size)
+                        inode->i_size = ((loff_t)block + 1) << inode->i_blkbits;
+                up(&inode->i_sem);
+        }
+
+        bh = ext3_bread(handle, inode, block, 1, &err);
+        if (!bh) {
+                CERROR("can't read/create block: %d\n", err);
+                goto out;
+        }
+
+        /* This is a hack only needed because ext3_get_block_handle() updates
+         * i_disksize after marking the inode dirty in ext3_splice_branch().
+         * We will fix that when we get a chance, as ext3_mark_inode_dirty()
+         * is not without cost, nor is it even exported.
+         */
+        if (inode->i_size > old_size)
+                mark_inode_dirty(inode);
+
+        boffs = (unsigned)*offs % bh->b_size;
+        if (boffs + size > bh->b_size) {
+                CERROR("request crosses block's border. offset %llu, size %u\n",
+                       *offs, size);
+                err = -EIO;
+                goto out;
+        }
+
+        err = ext3_journal_get_write_access(handle, bh);
+        if (err) {
+                CERROR("journal_get_write_access() returned error %d\n", err);
+                goto out;
+        }
+        memcpy(bh->b_data + boffs, buf, size);
+        err = ext3_journal_dirty_metadata(handle, bh);
+        if (err) {
+                CERROR("journal_dirty_metadata() returned error %d\n", err);
+                goto out;
+        }
+        err = size;
+out:
+        if (bh)
+                brelse(bh);
+        journal_stop(handle);
+        if (err > 0)
+                *offs += size;
+        return err;
+}
+
 static struct fsfilt_operations fsfilt_ext3_ops = {
         fs_type:                "ext3",
         fs_owner:               THIS_MODULE,
 static struct fsfilt_operations fsfilt_ext3_ops = {
         fs_type:                "ext3",
         fs_owner:               THIS_MODULE,
@@ -499,6 +634,8 @@ static struct fsfilt_operations fsfilt_ext3_ops = {
         fs_statfs:              fsfilt_ext3_statfs,
         fs_sync:                fsfilt_ext3_sync,
         fs_prep_san_write:      fsfilt_ext3_prep_san_write,
         fs_statfs:              fsfilt_ext3_statfs,
         fs_sync:                fsfilt_ext3_sync,
         fs_prep_san_write:      fsfilt_ext3_prep_san_write,
+        fs_write_record:        fsfilt_ext3_write_record,
+        fs_read_record:         fsfilt_ext3_read_record,
 };
 
 static int __init fsfilt_ext3_init(void)
 };
 
 static int __init fsfilt_ext3_init(void)
index 1fba0f4..80f7e50 100644 (file)
@@ -43,10 +43,11 @@ static kmem_cache_t *fcb_cache;
 static atomic_t fcb_cache_count = ATOMIC_INIT(0);
 
 struct fsfilt_cb_data {
 static atomic_t fcb_cache_count = ATOMIC_INIT(0);
 
 struct fsfilt_cb_data {
-        struct journal_callback cb_jcb; /* data private to jbd */
+        struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
         fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
         struct obd_device *cb_obd;      /* MDS/OBD completion device */
         __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
         fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
         struct obd_device *cb_obd;      /* MDS/OBD completion device */
         __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
+        void *cb_data;                  /* MDS/OST completion function data */
 };
 
 #define EXTN_XATTR_INDEX_LUSTRE         5
 };
 
 #define EXTN_XATTR_INDEX_LUSTRE         5
@@ -58,13 +59,24 @@ struct fsfilt_cb_data {
  * the inode (which we will be changing anyways as part of this
  * transaction).
  */
  * the inode (which we will be changing anyways as part of this
  * transaction).
  */
-static void *fsfilt_extN_start(struct inode *inode, int op)
+static void *fsfilt_extN_start(struct inode *inode, int op, void *desc_private)
 {
         /* For updates to the last recieved file */
         int nblocks = EXTN_DATA_TRANS_BLOCKS;
         void *handle;
 
         switch(op) {
 {
         /* For updates to the last recieved file */
         int nblocks = EXTN_DATA_TRANS_BLOCKS;
         void *handle;
 
         switch(op) {
+        case FSFILT_OP_CREATE_LOG:
+                nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS;
+                op = FSFILT_OP_CREATE;
+                break;
+        case FSFILT_OP_UNLINK_LOG:
+                nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS;
+                op = FSFILT_OP_UNLINK;
+                break;
+        }
+
+        switch(op) {
         case FSFILT_OP_RMDIR:
         case FSFILT_OP_UNLINK:
                 nblocks += EXTN_DELETE_TRANS_BLOCKS;
         case FSFILT_OP_RMDIR:
         case FSFILT_OP_UNLINK:
                 nblocks += EXTN_DELETE_TRANS_BLOCKS;
@@ -95,7 +107,7 @@ static void *fsfilt_extN_start(struct inode *inode, int op)
                  LBUG();
         }
 
                  LBUG();
         }
 
-        LASSERT(!current->journal_info);
+        LASSERT(current->journal_info == desc_private);
         lock_kernel();
         handle = journal_start(EXTN_JOURNAL(inode), nblocks);
         unlock_kernel();
         lock_kernel();
         handle = journal_start(EXTN_JOURNAL(inode), nblocks);
         unlock_kernel();
@@ -124,7 +136,7 @@ static void *fsfilt_extN_start(struct inode *inode, int op)
  * objcount inode blocks
  * 1 superblock
  * 2 * EXTN_SINGLEDATA_TRANS_BLOCKS for the quota files
  * objcount inode blocks
  * 1 superblock
  * 2 * EXTN_SINGLEDATA_TRANS_BLOCKS for the quota files
- * 
+ *
  * 1 EXTN_DATA_TRANS_BLOCKS for the last_rcvd update.
  */
 static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso)
  * 1 EXTN_DATA_TRANS_BLOCKS for the last_rcvd update.
  */
 static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso)
@@ -155,7 +167,7 @@ static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso)
                 ngdblocks = EXTN_SB(sb)->s_gdb_count;
 
         needed += nbitmaps + ngdblocks;
                 ngdblocks = EXTN_SB(sb)->s_gdb_count;
 
         needed += nbitmaps + ngdblocks;
-        
+
         /* last_rcvd update */
         needed += EXTN_DATA_TRANS_BLOCKS;
 
         /* last_rcvd update */
         needed += EXTN_DATA_TRANS_BLOCKS;
 
@@ -185,14 +197,14 @@ static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso)
  * the pages have been written.
  */
 static void *fsfilt_extN_brw_start(int objcount, struct fsfilt_objinfo *fso,
  * the pages have been written.
  */
 static void *fsfilt_extN_brw_start(int objcount, struct fsfilt_objinfo *fso,
-                                   int niocount, struct niobuf_remote *nb)
+                                   int niocount, void *desc_private)
 {
         journal_t *journal;
         handle_t *handle;
         int needed;
         ENTRY;
 
 {
         journal_t *journal;
         handle_t *handle;
         int needed;
         ENTRY;
 
-        LASSERT(!current->journal_info);
+        LASSERT(current->journal_info == desc_private);
         journal = EXTN_SB(fso->fso_dentry->d_inode->i_sb)->s_journal;
         needed = fsfilt_extN_credits_needed(objcount, fso);
 
         journal = EXTN_SB(fso->fso_dentry->d_inode->i_sb)->s_journal;
         needed = fsfilt_extN_credits_needed(objcount, fso);
 
@@ -218,6 +230,8 @@ static void *fsfilt_extN_brw_start(int objcount, struct fsfilt_objinfo *fso,
         if (IS_ERR(handle))
                 CERROR("can't get handle for %d credits: rc = %ld\n", needed,
                        PTR_ERR(handle));
         if (IS_ERR(handle))
                 CERROR("can't get handle for %d credits: rc = %ld\n", needed,
                        PTR_ERR(handle));
+        else
+                LASSERT(handle->h_buffer_credits >= needed);
 
         RETURN(handle);
 }
 
         RETURN(handle);
 }
@@ -249,24 +263,26 @@ static int fsfilt_extN_setattr(struct dentry *dentry, void *handle,
          * in the block pointers; this is really the "small" stripe MD data.
          * We can avoid further hackery by virtue of the MDS file size being
          * zero all the time (which doesn't invoke block truncate at unlink
          * in the block pointers; this is really the "small" stripe MD data.
          * We can avoid further hackery by virtue of the MDS file size being
          * zero all the time (which doesn't invoke block truncate at unlink
-         * time), so we assert we never change the MDS file size from zero.
-         */
+         * time), so we assert we never change the MDS file size from zero. */
         if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
                 /* ATTR_SIZE would invoke truncate: clear it */
                 iattr->ia_valid &= ~ATTR_SIZE;
         if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
                 /* ATTR_SIZE would invoke truncate: clear it */
                 iattr->ia_valid &= ~ATTR_SIZE;
-                inode->i_size = iattr->ia_size;
+                EXTN_I(inode)->i_disksize = inode->i_size = iattr->ia_size;
 
                 /* make sure _something_ gets set - so new inode
 
                 /* make sure _something_ gets set - so new inode
-                 * goes to disk (probably won't work over XFS
-                 */
-                if (!iattr->ia_valid & ATTR_MODE) {
+                 * goes to disk (probably won't work over XFS */
+                if (!(iattr->ia_valid & (ATTR_MODE | ATTR_MTIME | ATTR_CTIME))){
                         iattr->ia_valid |= ATTR_MODE;
                         iattr->ia_mode = inode->i_mode;
                 }
         }
                         iattr->ia_valid |= ATTR_MODE;
                         iattr->ia_mode = inode->i_mode;
                 }
         }
-        if (inode->i_op->setattr)
+
+        /* Don't allow setattr to change file type */
+        iattr->ia_mode = (inode->i_mode & S_IFMT)|(iattr->ia_mode & ~S_IFMT);
+
+        if (inode->i_op->setattr) {
                 rc = inode->i_op->setattr(dentry, iattr);
                 rc = inode->i_op->setattr(dentry, iattr);
-        else{
+        } else {
                 rc = inode_change_ok(inode, iattr);
                 if (!rc)
                         rc = inode_setattr(inode, iattr);
                 rc = inode_change_ok(inode, iattr);
                 if (!rc)
                         rc = inode_setattr(inode, iattr);
@@ -286,8 +302,8 @@ static int fsfilt_extN_set_md(struct inode *inode, void *handle,
          * it will fit, because putting it in an EA currently kills the MDS
          * performance.  We'll fix this with "fast EAs" in the future.
          */
          * it will fit, because putting it in an EA currently kills the MDS
          * performance.  We'll fix this with "fast EAs" in the future.
          */
-        if (lmm_size <= sizeof(EXTN_I(inode)->i_data) -
-                        sizeof(EXTN_I(inode)->i_data[0])) {
+        if (inode->i_blocks == 0 && lmm_size <= sizeof(EXTN_I(inode)->i_data) -
+                                            sizeof(EXTN_I(inode)->i_data[0])) {
                 /* XXX old_size is debugging only */
                 int old_size = EXTN_I(inode)->i_data[0];
                 if (old_size != 0) {
                 /* XXX old_size is debugging only */
                 int old_size = EXTN_I(inode)->i_data[0];
                 if (old_size != 0) {
@@ -319,7 +335,7 @@ static int fsfilt_extN_get_md(struct inode *inode, void *lmm, int lmm_size)
 {
         int rc;
 
 {
         int rc;
 
-        if (EXTN_I(inode)->i_data[0]) {
+        if (inode->i_blocks == 0 && EXTN_I(inode)->i_data[0]) {
                 int size = le32_to_cpu(EXTN_I(inode)->i_data[0]);
                 LASSERT(size < sizeof(EXTN_I(inode)->i_data));
                 if (lmm) {
                 int size = le32_to_cpu(EXTN_I(inode)->i_data[0]);
                 LASSERT(size < sizeof(EXTN_I(inode)->i_data));
                 if (lmm) {
@@ -411,14 +427,15 @@ static void fsfilt_extN_cb_func(struct journal_callback *jcb, int error)
 {
         struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
 
 {
         struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
 
-        fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error);
+        fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error);
 
         OBD_SLAB_FREE(fcb, fcb_cache, sizeof *fcb);
         atomic_dec(&fcb_cache_count);
 }
 
 static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
 
         OBD_SLAB_FREE(fcb, fcb_cache, sizeof *fcb);
         atomic_dec(&fcb_cache_count);
 }
 
 static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
-                                     void *handle, fsfilt_cb_t cb_func)
+                                     void *handle, fsfilt_cb_t cb_func,
+                                     void *cb_data)
 {
         struct fsfilt_cb_data *fcb;
 
 {
         struct fsfilt_cb_data *fcb;
 
@@ -430,10 +447,10 @@ static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
         fcb->cb_func = cb_func;
         fcb->cb_obd = obd;
         fcb->cb_last_rcvd = last_rcvd;
         fcb->cb_func = cb_func;
         fcb->cb_obd = obd;
         fcb->cb_last_rcvd = last_rcvd;
+        fcb->cb_data = cb_data;
 
         CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
         lock_kernel();
 
         CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
         lock_kernel();
-        /* Note that an "incompatible pointer" warning here is OK for now */
         journal_callback_set(handle, fsfilt_extN_cb_func,
                              (struct journal_callback *)fcb);
         unlock_kernel();
         journal_callback_set(handle, fsfilt_extN_cb_func,
                              (struct journal_callback *)fcb);
         unlock_kernel();
@@ -459,7 +476,7 @@ static int fsfilt_extN_journal_data(struct file *filp)
  */
 static int fsfilt_extN_statfs(struct super_block *sb, struct obd_statfs *osfs)
 {
  */
 static int fsfilt_extN_statfs(struct super_block *sb, struct obd_statfs *osfs)
 {
-        struct statfs sfs;
+        struct kstatfs sfs;
         int rc = vfs_statfs(sb, &sfs);
 
         if (!rc && sfs.f_bfree < sfs.f_ffree) {
         int rc = vfs_statfs(sb, &sfs);
 
         if (!rc && sfs.f_bfree < sfs.f_ffree) {
@@ -484,6 +501,110 @@ static int fsfilt_extN_prep_san_write(struct inode *inode, long *blocks,
         return extN_prep_san_write(inode, blocks, nblocks, newsize);
 }
 
         return extN_prep_san_write(inode, blocks, nblocks, newsize);
 }
 
+static int fsfilt_extN_read_record(struct file * file, char *buf,
+                                   int size, loff_t *offs)
+{
+        struct buffer_head *bh;
+        unsigned long block, boffs;
+        struct inode *inode = file->f_dentry->d_inode;
+        int err;
+
+        if (inode->i_size < *offs + size) {
+                CERROR("file size %llu is too short for read %u@%llu\n",
+                       inode->i_size, size, *offs);
+                return -EIO;
+        }
+
+        block = *offs >> inode->i_blkbits;
+        bh = extN_bread(NULL, inode, block, 0, &err);
+        if (!bh) {
+                CERROR("can't read block: %d\n", err);
+                return err;
+        }
+
+        boffs = (unsigned)*offs % bh->b_size;
+        if (boffs + size > bh->b_size) {
+                CERROR("request crosses block's border. offset %llu, size %u\n",
+                       *offs, size);
+                brelse(bh);
+                return -EIO;
+        }
+
+        memcpy(buf, bh->b_data + boffs, size);
+        brelse(bh);
+        *offs += size;
+        return size;
+}
+
+static int fsfilt_extN_write_record(struct file * file, char *buf,
+                                    int size, loff_t *offs)
+{
+        struct buffer_head *bh;
+        unsigned long block, boffs;
+        struct inode *inode = file->f_dentry->d_inode;
+        loff_t old_size = inode->i_size;
+        journal_t *journal;
+        handle_t *handle;
+        int err;
+
+        journal = EXTN_SB(inode->i_sb)->s_journal;
+        handle = journal_start(journal, EXTN_DATA_TRANS_BLOCKS + 2);
+        if (handle == NULL) {
+                CERROR("can't start transaction\n");
+                return -EIO;
+        }
+
+        block = *offs >> inode->i_blkbits;
+        if (*offs + size > inode->i_size) {
+                down(&inode->i_sem);
+                if (*offs + size > inode->i_size)
+                        inode->i_size = ((loff_t)block + 1) << inode->i_blkbits;
+                up(&inode->i_sem);
+        }
+
+        bh = extN_bread(handle, inode, block, 1, &err);
+        if (!bh) {
+                CERROR("can't read/create block: %d\n", err);
+                goto out;
+        }
+
+        /* This is a hack only needed because extN_get_block_handle() updates
+         * i_disksize after marking the inode dirty in extN_splice_branch().
+         * We will fix that when we get a chance, as extN_mark_inode_dirty()
+         * is not without cost, nor is it even exported.
+         */
+        if (inode->i_size > old_size)
+                mark_inode_dirty(inode);
+
+        boffs = (unsigned)*offs % bh->b_size;
+        if (boffs + size > bh->b_size) {
+                CERROR("request crosses block's border. offset %llu, size %u\n",
+                       *offs, size);
+                err = -EIO;
+                goto out;
+        }
+
+        err = extN_journal_get_write_access(handle, bh);
+        if (err) {
+                CERROR("journal_get_write_access() returned error %d\n", err);
+                goto out;
+        }
+        memcpy(bh->b_data + boffs, buf, size);
+        err = extN_journal_dirty_metadata(handle, bh);
+        if (err) {
+                CERROR("journal_dirty_metadata() returned error %d\n", err);
+                goto out;
+        }
+        err = size;
+out:
+        if (bh)
+                brelse(bh);
+        journal_stop(handle);
+        if (err > 0)
+                *offs += size;
+        return err;
+}
+
 static struct fsfilt_operations fsfilt_extN_ops = {
         fs_type:                "extN",
         fs_owner:               THIS_MODULE,
 static struct fsfilt_operations fsfilt_extN_ops = {
         fs_type:                "extN",
         fs_owner:               THIS_MODULE,
@@ -499,6 +620,8 @@ static struct fsfilt_operations fsfilt_extN_ops = {
         fs_statfs:              fsfilt_extN_statfs,
         fs_sync:                fsfilt_extN_sync,
         fs_prep_san_write:      fsfilt_extN_prep_san_write,
         fs_statfs:              fsfilt_extN_statfs,
         fs_sync:                fsfilt_extN_sync,
         fs_prep_san_write:      fsfilt_extN_prep_san_write,
+        fs_write_record:        fsfilt_extN_write_record,
+        fs_read_record:         fsfilt_extN_read_record,
 };
 
 static int __init fsfilt_extN_init(void)
 };
 
 static int __init fsfilt_extN_init(void)
index ccefb92..3d118fc 100644 (file)
 #include <linux/obd_class.h>
 #include <linux/module.h>
 
 #include <linux/obd_class.h>
 #include <linux/module.h>
 
-static void *fsfilt_reiserfs_start(struct inode *inode, int op)
+static void *fsfilt_reiserfs_start(struct inode *inode, int op,
+                                   void *desc_private)
 {
         return (void *)0xf00f00be;
 }
 
 static void *fsfilt_reiserfs_brw_start(int objcount, struct fsfilt_objinfo *fso,
 {
         return (void *)0xf00f00be;
 }
 
 static void *fsfilt_reiserfs_brw_start(int objcount, struct fsfilt_objinfo *fso,
-                                   int niocount, struct niobuf_remote *nb)
+                                       int niocount, void *desc_private)
 {
         return (void *)0xf00f00be;
 }
 
 {
         return (void *)0xf00f00be;
 }
 
-static int fsfilt_reiserfs_commit(struct inode *inode, void *handle, 
+static int fsfilt_reiserfs_commit(struct inode *inode, void *handle,
                                   int force_sync)
 {
         if (handle != (void *)0xf00f00be) {
                                   int force_sync)
 {
         if (handle != (void *)0xf00f00be) {
@@ -131,8 +132,9 @@ static ssize_t fsfilt_reiserfs_readpage(struct file *file, char *buf, size_t cou
         return file->f_op->read(file, buf, count, offset);
 }
 
         return file->f_op->read(file, buf, count, offset);
 }
 
-static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
-                                         void *handle, fsfilt_cb_t cb_func)
+static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd,
+                                         __u64 last_rcvd, void *handle,
+                                         fsfilt_cb_t cb_func, void *cb_data)
 {
         static long next = 0;
 
 {
         static long next = 0;
 
@@ -141,7 +143,7 @@ static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd
                 next = jiffies + 300 * HZ;
         }
 
                 next = jiffies + 300 * HZ;
         }
 
-        cb_func(obd, last_rcvd, 0);
+        cb_func(obd, last_rcvd, cb_data, 0);
 
         return 0;
 }
 
         return 0;
 }
index 4862cf3..bb48e5d 100644 (file)
 
 #define EXPORT_SYMTAB
 #define DEBUG_SUBSYSTEM S_CLASS
 
 #define EXPORT_SYMTAB
 #define DEBUG_SUBSYSTEM S_CLASS
-#ifdef __KERNEL__
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/version.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
-#include <linux/seq_file.h>
 
 
-#else
-#include <liblustre.h>
+#ifdef __KERNEL__
+# include <linux/config.h>
+# include <linux/module.h>
+# include <linux/version.h>
+# include <linux/slab.h>
+# include <linux/types.h>
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#  include <asm/statfs.h>
+# endif
+# include <linux/seq_file.h>
+#else /* __KERNEL__ */
+# include <liblustre.h>
 #endif
 
 #include <linux/obd_class.h>
 #include <linux/lprocfs_status.h>
 #endif
 
 #include <linux/obd_class.h>
 #include <linux/lprocfs_status.h>
+#include <linux/lustre_fsfilt.h>
 
 #ifdef LPROCFS
 
 struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
                                     const char *name)
 {
 
 #ifdef LPROCFS
 
 struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
                                     const char *name)
 {
-        struct proc_dir_entrytemp;
+        struct proc_dir_entry *temp;
 
 
-        if (!head)
+        if (head == NULL)
                 return NULL;
 
         temp = head->subdir;
         while (temp != NULL) {
                 return NULL;
 
         temp = head->subdir;
         while (temp != NULL) {
-                if (!strcmp(temp->name, name))
+                if (strcmp(temp->name, name) == 0)
                         return temp;
 
                 temp = temp->next;
                         return temp;
 
                 temp = temp->next;
@@ -65,26 +66,30 @@ struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
 int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
                      void *data)
 {
 int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
                      void *data)
 {
-        if ((root == NULL) || (list == NULL))
+        if (root == NULL || list == NULL)
                 return -EINVAL;
 
                 return -EINVAL;
 
-        while (list->name) {
+        while (list->name != NULL) {
                 struct proc_dir_entry *cur_root, *proc;
                 struct proc_dir_entry *cur_root, *proc;
-                char *pathcopy, *cur, *next;
-                int pathsize = strlen(list->name)+1;
+                char *pathcopy, *cur, *next, pathbuf[64];
+                int pathsize = strlen(list->name) + 1;
 
                 proc = NULL;
                 cur_root = root;
 
                 /* need copy of path for strsep */
 
                 proc = NULL;
                 cur_root = root;
 
                 /* need copy of path for strsep */
-                OBD_ALLOC(pathcopy, pathsize);
-                if (!pathcopy)
-                        return -ENOMEM;
+                if (strlen(list->name) > sizeof(pathbuf) - 1) {
+                        OBD_ALLOC(pathcopy, pathsize);
+                        if (pathcopy == NULL)
+                                return -ENOMEM;
+                } else {
+                        pathcopy = pathbuf;
+                }
 
                 next = pathcopy;
                 strcpy(pathcopy, list->name);
 
 
                 next = pathcopy;
                 strcpy(pathcopy, list->name);
 
-                while (cur_root && (cur = strsep(&next, "/"))) {
+                while (cur_root != NULL && (cur = strsep(&next, "/"))) {
                         if (*cur =='\0') /* skip double/trailing "/" */
                                 continue;
 
                         if (*cur =='\0') /* skip double/trailing "/" */
                                 continue;
 
@@ -92,10 +97,10 @@ int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
                         CDEBUG(D_OTHER, "cur_root=%s, cur=%s, next=%s, (%s)\n",
                                cur_root->name, cur, next,
                                (proc ? "exists" : "new"));
                         CDEBUG(D_OTHER, "cur_root=%s, cur=%s, next=%s, (%s)\n",
                                cur_root->name, cur, next,
                                (proc ? "exists" : "new"));
-                        if (next)
+                        if (next != NULL) {
                                 cur_root = (proc ? proc :
                                 cur_root = (proc ? proc :
-                                                   proc_mkdir(cur, cur_root));
-                        else if (!proc) {
+                                            proc_mkdir(cur, cur_root));
+                        } else if (proc == NULL) {
                                 mode_t mode = 0444;
                                 if (list->write_fptr)
                                         mode = 0644;
                                 mode_t mode = 0444;
                                 if (list->write_fptr)
                                         mode = 0644;
@@ -103,9 +108,10 @@ int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
                         }
                 }
 
                         }
                 }
 
+                if (pathcopy != pathbuf)
                 OBD_FREE(pathcopy, pathsize);
 
                 OBD_FREE(pathcopy, pathsize);
 
-                if ((cur_root == NULL) || (proc == NULL)) {
+                if (cur_root == NULL || proc == NULL) {
                         CERROR("LprocFS: No memory to create /proc entry %s",
                                list->name);
                         return -ENOMEM;
                         CERROR("LprocFS: No memory to create /proc entry %s",
                                list->name);
                         return -ENOMEM;
@@ -119,7 +125,7 @@ int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
         return 0;
 }
 
         return 0;
 }
 
-void lprocfs_remove(struct proc_dir_entryroot)
+void lprocfs_remove(struct proc_dir_entry *root)
 {
         struct proc_dir_entry *temp = root;
         struct proc_dir_entry *rm_entry;
 {
         struct proc_dir_entry *temp = root;
         struct proc_dir_entry *rm_entry;
@@ -130,7 +136,7 @@ void lprocfs_remove(struct proc_dir_entry* root)
         LASSERT(parent != NULL);
 
         while (1) {
         LASSERT(parent != NULL);
 
         while (1) {
-                while (temp->subdir)
+                while (temp->subdir != NULL)
                         temp = temp->subdir;
 
                 rm_entry = temp;
                         temp = temp->subdir;
 
                 rm_entry = temp;
@@ -148,14 +154,14 @@ struct proc_dir_entry *lprocfs_register(const char *name,
         struct proc_dir_entry *newchild;
 
         newchild = lprocfs_srch(parent, name);
         struct proc_dir_entry *newchild;
 
         newchild = lprocfs_srch(parent, name);
-        if (newchild) {
+        if (newchild != NULL) {
                 CERROR(" Lproc: Attempting to register %s more than once \n",
                        name);
                 return ERR_PTR(-EALREADY);
         }
 
         newchild = proc_mkdir(name, parent);
                 CERROR(" Lproc: Attempting to register %s more than once \n",
                        name);
                 return ERR_PTR(-EALREADY);
         }
 
         newchild = proc_mkdir(name, parent);
-        if (newchild && list) {
+        if (newchild != NULL && list != NULL) {
                 int rc = lprocfs_add_vars(newchild, list, data);
                 if (rc) {
                         lprocfs_remove(newchild);
                 int rc = lprocfs_add_vars(newchild, list, data);
                 if (rc) {
                         lprocfs_remove(newchild);
@@ -175,10 +181,10 @@ int lprocfs_rd_u64(char *page, char **start, off_t off,
         return snprintf(page, count, LPU64"\n", *(__u64 *)data);
 }
 
         return snprintf(page, count, LPU64"\n", *(__u64 *)data);
 }
 
-int lprocfs_rd_uuid(charpage, char **start, off_t off, int count,
+int lprocfs_rd_uuid(char *page, char **start, off_t off, int count,
                     int *eof, void *data)
 {
                     int *eof, void *data)
 {
-        struct obd_devicedev = (struct obd_device*)data;
+        struct obd_device *dev = (struct obd_device*)data;
 
         LASSERT(dev != NULL);
         *eof = 1;
 
         LASSERT(dev != NULL);
         *eof = 1;
@@ -186,9 +192,9 @@ int lprocfs_rd_uuid(char* page, char **start, off_t off, int count,
 }
 
 int lprocfs_rd_name(char *page, char **start, off_t off, int count,
 }
 
 int lprocfs_rd_name(char *page, char **start, off_t off, int count,
-                    int *eof, void *data)
+                    int *eof, voiddata)
 {
 {
-        struct obd_devicedev = (struct obd_device *)data;
+        struct obd_device *dev = (struct obd_device *)data;
 
         LASSERT(dev != NULL);
         LASSERT(dev->obd_name != NULL);
 
         LASSERT(dev != NULL);
         LASSERT(dev->obd_name != NULL);
@@ -196,72 +202,98 @@ int lprocfs_rd_name(char *page, char **start, off_t off, int count,
         return snprintf(page, count, "%s\n", dev->obd_name);
 }
 
         return snprintf(page, count, "%s\n", dev->obd_name);
 }
 
-int lprocfs_rd_blksize(char* page, char **start, off_t off, int count,
-                       int *eof, struct statfs *sfs)
+int lprocfs_rd_fstype(char *page, char **start, off_t off, int count, int *eof,
+                      void *data)
 {
 {
-        LASSERT(sfs != NULL);
-        *eof = 1;
-        return snprintf(page, count, "%lu\n", sfs->f_bsize);
+        struct obd_device *obd = (struct obd_device *)data;
+
+        LASSERT(obd != NULL);
+        LASSERT(obd->obd_fsops != NULL);
+        LASSERT(obd->obd_fsops->fs_type != NULL);
+        return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
 }
 
 }
 
-int lprocfs_rd_kbytestotal(char* page, char **start, off_t off, int count,
-                           int *eof, struct statfs *sfs)
+int lprocfs_rd_blksize(char *page, char **start, off_t off, int count,
+                       int *eof, void *data)
 {
 {
-        __u32 blk_size;
-        __u64 result;
+        struct obd_statfs osfs;
+        int rc = obd_statfs(data, &osfs, jiffies - HZ);
+        if (!rc) {
+                *eof = 1;
+                rc = snprintf(page, count, "%u\n", osfs.os_bsize);
+        }
+        return rc;
+}
 
 
-        LASSERT(sfs != NULL);
-        blk_size = sfs->f_bsize >> 10;
-        result = sfs->f_blocks;
+int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, int count,
+                           int *eof, void *data)
+{
+        struct obd_statfs osfs;
+        int rc = obd_statfs(data, &osfs, jiffies - HZ);
+        if (!rc) {
+                __u32 blk_size = osfs.os_bsize >> 10;
+                __u64 result = osfs.os_blocks;
 
 
-        while (blk_size >>= 1)
-                result <<= 1;
+                while (blk_size >>= 1)
+                        result <<= 1;
 
 
-        *eof = 1;
-        return snprintf(page, count, LPU64"\n", result);
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", result);
+        }
+        return rc;
 }
 
 }
 
-int lprocfs_rd_kbytesfree(charpage, char **start, off_t off, int count,
-                          int *eof, struct statfs *sfs)
+int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, int count,
+                          int *eof, void *data)
 {
 {
-        __u32 blk_size;
-        __u64 result;
+        struct obd_statfs osfs;
+        int rc = obd_statfs(data, &osfs, jiffies - HZ);
+        if (!rc) {
+                __u32 blk_size = osfs.os_bsize >> 10;
+                __u64 result = osfs.os_bfree;
 
 
-        LASSERT(sfs != NULL);
-        blk_size = sfs->f_bsize >> 10;
-        result = sfs->f_bfree;
+                while (blk_size >>= 1)
+                        result <<= 1;
 
 
-        while (blk_size >>= 1)
-                result <<= 1;
-
-        *eof = 1;
-        return snprintf(page, count, LPU64"\n", result);
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", result);
+        }
+        return rc;
 }
 
 }
 
-int lprocfs_rd_filestotal(charpage, char **start, off_t off, int count,
-                          int *eof, struct statfs *sfs)
+int lprocfs_rd_filestotal(char *page, char **start, off_t off, int count,
+                          int *eof, void *data)
 {
 {
-        LASSERT(sfs != NULL);
-        *eof = 1;
-        return snprintf(page, count, "%ld\n", sfs->f_files);
+        struct obd_statfs osfs;
+        int rc = obd_statfs(data, &osfs, jiffies - HZ);
+        if (!rc) {
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", osfs.os_files);
+        }
+
+        return rc;
 }
 
 }
 
-int lprocfs_rd_filesfree(charpage, char **start, off_t off, int count,
-                         int *eof, struct statfs *sfs)
+int lprocfs_rd_filesfree(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
 {
 {
-        LASSERT(sfs != NULL);
-        *eof = 1;
-        return snprintf(page, count, "%ld\n", sfs->f_ffree);
+        struct obd_statfs osfs;
+        int rc = obd_statfs(data, &osfs, jiffies - HZ);
+        if (!rc) {
+                *eof = 1;
+                rc = snprintf(page, count, LPU64"\n", osfs.os_ffree);
+        }
+        return rc;
 }
 
 }
 
-int lprocfs_rd_filegroups(charpage, char **start, off_t off, int count,
-                          int *eof, struct statfs *sfs)
+int lprocfs_rd_filegroups(char *page, char **start, off_t off, int count,
+                          int *eof, void *data)
 {
         *eof = 1;
         return snprintf(page, count, "unimplemented\n");
 }
 
 {
         *eof = 1;
         return snprintf(page, count, "unimplemented\n");
 }
 
-int lprocfs_rd_server_uuid(charpage, char **start, off_t off, int count,
+int lprocfs_rd_server_uuid(char *page, char **start, off_t off, int count,
                            int *eof, void *data)
 {
         struct obd_device *obd = (struct obd_device *)data;
                            int *eof, void *data)
 {
         struct obd_device *obd = (struct obd_device *)data;
@@ -290,7 +322,7 @@ int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count,
 int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count,
                        int *eof, void *data)
 {
 int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count,
                        int *eof, void *data)
 {
-        struct obd_typeclass = (struct obd_type*) data;
+        struct obd_type *class = (struct obd_type*) data;
 
         LASSERT(class != NULL);
         *eof = 1;
 
         LASSERT(class != NULL);
         *eof = 1;
@@ -334,21 +366,21 @@ struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num)
         if (num == 0)
                 return NULL;
 
         if (num == 0)
                 return NULL;
 
-        OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus]));
+        OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[num_online_cpus()]));
         if (stats == NULL)
                 return NULL;
 
         percpusize = L1_CACHE_ALIGN(offsetof(typeof(*percpu), lp_cntr[num]));
         if (stats == NULL)
                 return NULL;
 
         percpusize = L1_CACHE_ALIGN(offsetof(typeof(*percpu), lp_cntr[num]));
-        stats->ls_percpu_size = smp_num_cpus * percpusize;
+        stats->ls_percpu_size = num_online_cpus() * percpusize;
         OBD_ALLOC(stats->ls_percpu[0], stats->ls_percpu_size);
         if (stats->ls_percpu[0] == NULL) {
                 OBD_FREE(stats, offsetof(typeof(*stats),
         OBD_ALLOC(stats->ls_percpu[0], stats->ls_percpu_size);
         if (stats->ls_percpu[0] == NULL) {
                 OBD_FREE(stats, offsetof(typeof(*stats),
-                                         ls_percpu[smp_num_cpus]));
+                                         ls_percpu[num_online_cpus()]));
                 return NULL;
         }
 
         stats->ls_num = num;
                 return NULL;
         }
 
         stats->ls_num = num;
-        for (i = 1; i < smp_num_cpus; i++)
+        for (i = 1; i < num_online_cpus(); i++)
                 stats->ls_percpu[i] = (void *)(stats->ls_percpu[i - 1]) +
                         percpusize;
 
                 stats->ls_percpu[i] = (void *)(stats->ls_percpu[i - 1]) +
                         percpusize;
 
@@ -361,7 +393,7 @@ void lprocfs_free_stats(struct lprocfs_stats *stats)
                 return;
 
         OBD_FREE(stats->ls_percpu[0], stats->ls_percpu_size);
                 return;
 
         OBD_FREE(stats->ls_percpu[0], stats->ls_percpu_size);
-        OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus]));
+        OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_online_cpus()]));
 }
 
 /* Reset counter under lock */
 }
 
 /* Reset counter under lock */
@@ -410,17 +442,18 @@ static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
        }
        idx = cntr - &(stats->ls_percpu[0])->lp_cntr[0];
 
        }
        idx = cntr - &(stats->ls_percpu[0])->lp_cntr[0];
 
-       for (i = 0; i < smp_num_cpus; i++) {
+       for (i = 0; i < num_online_cpus(); i++) {
                struct lprocfs_counter *percpu_cntr =
                        &(stats->ls_percpu[i])->lp_cntr[idx];
                int centry;
                struct lprocfs_counter *percpu_cntr =
                        &(stats->ls_percpu[i])->lp_cntr[idx];
                int centry;
+
                do {
                do {
-                        centry = atomic_read(&percpu_cntr->lc_cntl.la_entry);
-                        t.lc_count = percpu_cntr->lc_count;
-                        t.lc_sum = percpu_cntr->lc_sum;
-                        t.lc_min = percpu_cntr->lc_min;
-                        t.lc_max = percpu_cntr->lc_max;
-                        t.lc_sumsquare = percpu_cntr->lc_sumsquare;
+                       centry = atomic_read(&percpu_cntr->lc_cntl.la_entry);
+                       t.lc_count = percpu_cntr->lc_count;
+                       t.lc_sum = percpu_cntr->lc_sum;
+                       t.lc_min = percpu_cntr->lc_min;
+                       t.lc_max = percpu_cntr->lc_max;
+                       t.lc_sumsquare = percpu_cntr->lc_sumsquare;
                } while (centry != atomic_read(&percpu_cntr->lc_cntl.la_entry) &&
                         centry != atomic_read(&percpu_cntr->lc_cntl.la_exit));
                ret.lc_count += t.lc_count;
                } while (centry != atomic_read(&percpu_cntr->lc_cntl.la_entry) &&
                         centry != atomic_read(&percpu_cntr->lc_cntl.la_exit));
                ret.lc_count += t.lc_count;
@@ -453,10 +486,10 @@ static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
 }
 
 struct seq_operations lprocfs_stats_seq_sops = {
 }
 
 struct seq_operations lprocfs_stats_seq_sops = {
-        .start = lprocfs_stats_seq_start,
-        .stop = lprocfs_stats_seq_stop,
-        .next = lprocfs_stats_seq_next,
-        .show = lprocfs_stats_seq_show,
+        start: lprocfs_stats_seq_start,
+        stop:  lprocfs_stats_seq_stop,
+        next:  lprocfs_stats_seq_next,
+        show:  lprocfs_stats_seq_show,
 };
 
 static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
 };
 
 static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
@@ -474,13 +507,13 @@ static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
 }
 
 struct file_operations lprocfs_stats_seq_fops = {
 }
 
 struct file_operations lprocfs_stats_seq_fops = {
-        .open    = lprocfs_stats_seq_open,
-        .read    = seq_read,
-        .llseek  = seq_lseek,
-        .release = seq_release,
+        open:    lprocfs_stats_seq_open,
+        read:    seq_read,
+        llseek:  seq_lseek,
+        release: seq_release,
 };
 
 };
 
-int lprocfs_register_stats(struct proc_dir_entry *root, const charname,
+int lprocfs_register_stats(struct proc_dir_entry *root, const char *name,
                            struct lprocfs_stats *stats)
 {
         struct proc_dir_entry *entry;
                            struct lprocfs_stats *stats)
 {
         struct proc_dir_entry *entry;
@@ -502,7 +535,7 @@ void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
         int i;
 
         LASSERT(stats != NULL);
         int i;
 
         LASSERT(stats != NULL);
-        for (i = 0; i < smp_num_cpus; i++) {
+        for (i = 0; i < num_online_cpus(); i++) {
                 c = &(stats->ls_percpu[i]->lp_cntr[index]);
                 c->lc_config = conf;
                 c->lc_min = ~(__u64)0;
                 c = &(stats->ls_percpu[i]->lp_cntr[index]);
                 c->lc_config = conf;
                 c->lc_min = ~(__u64)0;
@@ -515,7 +548,7 @@ EXPORT_SYMBOL(lprocfs_counter_init);
 #define LPROCFS_OBD_OP_INIT(base, stats, op)                               \
 do {                                                                       \
         unsigned int coffset = base + OBD_COUNTER_OFFSET(op);              \
 #define LPROCFS_OBD_OP_INIT(base, stats, op)                               \
 do {                                                                       \
         unsigned int coffset = base + OBD_COUNTER_OFFSET(op);              \
-        LASSERT(coffset < stats->ls_num);                                     \
+        LASSERT(coffset < stats->ls_num);                                  \
         lprocfs_counter_init(stats, coffset, 0, #op, "reqs");              \
 } while (0)
 
         lprocfs_counter_init(stats, coffset, 0, #op, "reqs");              \
 } while (0)
 
@@ -529,10 +562,10 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
         LASSERT(obd->obd_proc_entry != NULL);
         LASSERT(obd->obd_cntr_base == 0);
 
         LASSERT(obd->obd_proc_entry != NULL);
         LASSERT(obd->obd_cntr_base == 0);
 
-        num_stats = 1 + OBD_COUNTER_OFFSET(destroy_export) +
+        num_stats = 1 + OBD_COUNTER_OFFSET(unpin) +
                 num_private_stats;
         stats = lprocfs_alloc_stats(num_stats);
                 num_private_stats;
         stats = lprocfs_alloc_stats(num_stats);
-        if (!stats)
+        if (stats == NULL)
                 return -ENOMEM;
 
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, iocontrol);
                 return -ENOMEM;
 
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, iocontrol);
@@ -569,16 +602,28 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, match);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, match);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, log_add);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, log_cancel);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, san_preprw);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, san_preprw);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, mark_page_dirty);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, clear_dirty_pages);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, last_dirty_offset);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, pin); 
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpin);
 
         for (i = num_private_stats; i < num_stats; i++) {
 
         for (i = num_private_stats; i < num_stats; i++) {
-                /* If this assertion failed, it is likely that an obd
+                /* If this LBUGs, it is likely that an obd
                  * operation was added to struct obd_ops in
                  * <linux/obd.h>, and that the corresponding line item
                  * LPROCFS_OBD_OP_INIT(.., .., opname)
                  * is missing from the list above. */
                  * operation was added to struct obd_ops in
                  * <linux/obd.h>, and that the corresponding line item
                  * LPROCFS_OBD_OP_INIT(.., .., opname)
                  * is missing from the list above. */
-                LASSERT(&(stats->ls_percpu[0])->lp_cntr[i].lc_name != NULL);
+                if (stats->ls_percpu[0]->lp_cntr[i].lc_name == NULL) {
+                        CERROR("Missing obd_stat initializer obd_op "
+                               "operation at offset %d. Aborting.\n",
+                               i - num_private_stats);
+                        LBUG();
+                }
         }
         rc = lprocfs_register_stats(obd->obd_proc_entry, "stats", stats);
         if (rc < 0) {
         }
         rc = lprocfs_register_stats(obd->obd_proc_entry, "stats", stats);
         if (rc < 0) {
@@ -617,6 +662,7 @@ EXPORT_SYMBOL(lprocfs_free_obd_stats);
 EXPORT_SYMBOL(lprocfs_rd_u64);
 EXPORT_SYMBOL(lprocfs_rd_uuid);
 EXPORT_SYMBOL(lprocfs_rd_name);
 EXPORT_SYMBOL(lprocfs_rd_u64);
 EXPORT_SYMBOL(lprocfs_rd_uuid);
 EXPORT_SYMBOL(lprocfs_rd_name);
+EXPORT_SYMBOL(lprocfs_rd_fstype);
 EXPORT_SYMBOL(lprocfs_rd_server_uuid);
 EXPORT_SYMBOL(lprocfs_rd_conn_uuid);
 EXPORT_SYMBOL(lprocfs_rd_numrefs);
 EXPORT_SYMBOL(lprocfs_rd_server_uuid);
 EXPORT_SYMBOL(lprocfs_rd_conn_uuid);
 EXPORT_SYMBOL(lprocfs_rd_numrefs);
index 06f86ad..bc07df9 100644 (file)
@@ -4,32 +4,31 @@
  * Copyright (C) 2002 Cluster File Systems, Inc.
  *   Author: Phil Schwan <phil@clusterfs.com>
  *
  * Copyright (C) 2002 Cluster File Systems, Inc.
  *   Author: Phil Schwan <phil@clusterfs.com>
  *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *   This file is part of Lustre, http://www.lustre.org/
  *
  *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2.1 of the GNU Lesser General
- *   Public License as published by the Free Software Foundation.
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
  *
  *
- *   Portals is distributed in the hope that it will be useful,
+ *   Lustre is distributed in the hope that it will be useful,
  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU Lesser General Public License for more details.
+ *   GNU General Public License for more details.
  *
  *
- *   You should have received a copy of the GNU Lesser General Public
- *   License along with Portals; if not, write to the Free Software
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
 #ifdef __KERNEL__
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
 #ifdef __KERNEL__
-#include <linux/types.h>
-#include <linux/random.h>
+# include <linux/types.h>
+# include <linux/random.h>
 #else 
 #else 
-#include <liblustre.h>
+# include <liblustre.h>
 #endif 
 
 #endif 
 
-
-#include <linux/kp30.h>
+#include <linux/obd_support.h>
 #include <linux/lustre_handles.h>
 
 static spinlock_t handle_lock = SPIN_LOCK_UNLOCKED;
 #include <linux/lustre_handles.h>
 
 static spinlock_t handle_lock = SPIN_LOCK_UNLOCKED;
@@ -118,7 +117,7 @@ int class_handle_init(void)
 
         LASSERT(handle_hash == NULL);
 
 
         LASSERT(handle_hash == NULL);
 
-        PORTAL_ALLOC(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
+        OBD_VMALLOC(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
         if (handle_hash == NULL)
                 return -ENOMEM;
 
         if (handle_hash == NULL)
                 return -ENOMEM;
 
@@ -158,7 +157,7 @@ void class_handle_cleanup(void)
                 cleanup_all_handles();
         }
 
                 cleanup_all_handles();
         }
 
-        PORTAL_FREE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
+        OBD_VFREE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
         handle_hash = NULL;
 
         if (handle_count)
         handle_hash = NULL;
 
         if (handle_count)
index 016354c..5987d2e 100644 (file)
@@ -64,8 +64,8 @@ void class_exit_uuidlist(void)
                 struct uuid_nid_data *data =
                         list_entry(tmp, struct uuid_nid_data, head);
 
                 struct uuid_nid_data *data =
                         list_entry(tmp, struct uuid_nid_data, head);
 
-                PORTAL_FREE(data->uuid, strlen(data->uuid) + 1);
-                PORTAL_FREE(data, sizeof(*data));
+                OBD_FREE(data->uuid, strlen(data->uuid) + 1);
+                OBD_FREE(data, sizeof(*data));
         }
 }
 
         }
 }
 
@@ -109,11 +109,11 @@ int class_add_uuid(char *uuid, __u64 nid, __u32 nal)
         }
 
         rc = -ENOMEM;
         }
 
         rc = -ENOMEM;
-        PORTAL_ALLOC(data, sizeof(*data));
+        OBD_ALLOC(data, sizeof(*data));
         if (data == NULL)
                 goto fail_0;
 
         if (data == NULL)
                 goto fail_0;
 
-        PORTAL_ALLOC(data->uuid, nob);
+        OBD_ALLOC(data->uuid, nob);
         if (data == NULL)
                 goto fail_1;
 
         if (data == NULL)
                 goto fail_1;
 
@@ -131,7 +131,7 @@ int class_add_uuid(char *uuid, __u64 nid, __u32 nal)
         return 0;
 
  fail_1:
         return 0;
 
  fail_1:
-        PORTAL_FREE (data, sizeof (*data));
+        OBD_FREE (data, sizeof (*data));
  fail_0:
         kportal_put_ni (nal);
         return (rc);
  fail_0:
         kportal_put_ni (nal);
         return (rc);
@@ -171,8 +171,8 @@ int class_del_uuid (char *uuid)
                 list_del (&data->head);
 
                 kportal_put_ni (data->nal);
                 list_del (&data->head);
 
                 kportal_put_ni (data->nal);
-                PORTAL_FREE(data->uuid, strlen(data->uuid) + 1);
-                PORTAL_FREE(data, sizeof(*data));
+                OBD_FREE(data->uuid, strlen(data->uuid) + 1);
+                OBD_FREE(data, sizeof(*data));
         } while (!list_empty (&deathrow));
 
         return 0;
         } while (!list_empty (&deathrow));
 
         return 0;
index 0ce54a3..bd1363a 100644 (file)
@@ -139,7 +139,6 @@ void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx,
                 current->fsgid = saved->ouc.ouc_fsgid;
                 current->cap_effective = saved->ouc.ouc_cap;
                 current->ngroups = saved->ngroups;
                 current->fsgid = saved->ouc.ouc_fsgid;
                 current->cap_effective = saved->ouc.ouc_cap;
                 current->ngroups = saved->ngroups;
-
                 current->groups[0] = saved->ouc.ouc_suppgid1;
                 current->groups[1] = saved->ouc.ouc_suppgid2;
         }
                 current->groups[0] = saved->ouc.ouc_suppgid1;
                 current->groups[1] = saved->ouc.ouc_suppgid2;
         }
@@ -167,7 +166,7 @@ struct dentry *simple_mknod(struct dentry *dir, char *name, int mode)
         ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
         CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name);
 
         ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
         CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name);
 
-        dchild = lookup_one_len(name, dir, strlen(name));
+        dchild = ll_lookup_one_len(name, dir, strlen(name));
         if (IS_ERR(dchild))
                 GOTO(out_up, dchild);
 
         if (IS_ERR(dchild))
                 GOTO(out_up, dchild);
 
@@ -201,7 +200,7 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode)
 
         ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
         CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name);
 
         ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
         CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name);
-        dchild = lookup_one_len(name, dir, strlen(name));
+        dchild = ll_lookup_one_len(name, dir, strlen(name));
         if (IS_ERR(dchild))
                 GOTO(out_up, dchild);
 
         if (IS_ERR(dchild))
                 GOTO(out_up, dchild);
 
index 786a768..8bb78cc 100644 (file)
@@ -31,7 +31,7 @@
 #else
 #include <linux/version.h>
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #else
 #include <linux/version.h>
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
+#include <linux/statfs.h>
 #endif
 #endif
 
 #endif
 #endif
 
@@ -40,8 +40,9 @@
 #include <linux/obd_support.h>
 #include <linux/obd_class.h>
 
 #include <linux/obd_support.h>
 #include <linux/obd_class.h>
 
-void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs)
+void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs)
 {
 {
+        memset(osfs, 0, sizeof(*osfs));
         osfs->os_type = sfs->f_type;
         osfs->os_blocks = sfs->f_blocks;
         osfs->os_bfree = sfs->f_bfree;
         osfs->os_type = sfs->f_type;
         osfs->os_blocks = sfs->f_blocks;
         osfs->os_bfree = sfs->f_bfree;
@@ -52,8 +53,9 @@ void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs)
         osfs->os_namelen = sfs->f_namelen;
 }
 
         osfs->os_namelen = sfs->f_namelen;
 }
 
-void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs)
+void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs)
 {
 {
+        memset(sfs, 0, sizeof(*sfs));
         sfs->f_type = osfs->os_type;
         sfs->f_blocks = osfs->os_blocks;
         sfs->f_bfree = osfs->os_bfree;
         sfs->f_type = osfs->os_type;
         sfs->f_blocks = osfs->os_blocks;
         sfs->f_bfree = osfs->os_bfree;
@@ -64,39 +66,5 @@ void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs)
         sfs->f_namelen = osfs->os_namelen;
 }
 
         sfs->f_namelen = osfs->os_namelen;
 }
 
-int obd_self_statfs(struct obd_device *obd, struct statfs *sfs)
-{
-        struct obd_export *export, *my_export = NULL;
-        struct obd_statfs osfs = { 0 };
-        int rc;
-        ENTRY;
-
-        LASSERT( obd != NULL );
-
-        spin_lock(&obd->obd_dev_lock);
-        if (list_empty(&obd->obd_exports)) {
-                spin_unlock(&obd->obd_dev_lock);
-                export = my_export = class_new_export(obd);
-                if (export == NULL)
-                        RETURN(-ENOMEM);
-        } else {
-                export = list_entry(obd->obd_exports.next, typeof(*export),
-                                    exp_obd_chain);
-                export = class_export_get(export);
-                spin_unlock(&obd->obd_dev_lock);
-        }
-
-        rc = obd_statfs(export, &osfs);
-        if (!rc)
-                statfs_unpack(sfs, &osfs);
-
-        if (my_export)
-                class_unlink_export(my_export);
-
-        class_export_put(export);
-        RETURN(rc);
-}
-
 EXPORT_SYMBOL(statfs_pack);
 EXPORT_SYMBOL(statfs_unpack);
 EXPORT_SYMBOL(statfs_pack);
 EXPORT_SYMBOL(statfs_unpack);
-EXPORT_SYMBOL(obd_self_statfs);
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
 Makefile.in
 .deps
 TAGS
+.*.cmd
index f89df07..887889a 100644 (file)
@@ -64,7 +64,7 @@ static int echo_connect(struct lustre_handle *conn, struct obd_device *obd,
         return class_connect(conn, obd, cluuid);
 }
 
         return class_connect(conn, obd, cluuid);
 }
 
-static int echo_disconnect(struct lustre_handle *conn, int failover)
+static int echo_disconnect(struct lustre_handle *conn, int flags)
 {
         struct obd_export *exp = class_conn2export(conn);
 
 {
         struct obd_export *exp = class_conn2export(conn);
 
@@ -72,7 +72,7 @@ static int echo_disconnect(struct lustre_handle *conn, int failover)
 
         ldlm_cancel_locks_for_export(exp);
         class_export_put(exp);
 
         ldlm_cancel_locks_for_export(exp);
         class_export_put(exp);
-        return (class_disconnect(conn, failover));
+        return class_disconnect(conn, flags);
 }
 
 static __u64 echo_next_id(struct obd_device *obddev)
 }
 
 static __u64 echo_next_id(struct obd_device *obddev)
@@ -235,7 +235,7 @@ static int echo_setattr(struct lustre_handle *conn, struct obdo *oa,
 int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
                 int objcount, struct obd_ioobj *obj, int niocount,
                 struct niobuf_remote *nb, struct niobuf_local *res,
 int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
                 int objcount, struct obd_ioobj *obj, int niocount,
                 struct niobuf_remote *nb, struct niobuf_local *res,
-                void **desc_private, struct obd_trans_info *oti)
+                struct obd_trans_info *oti)
 {
         struct obd_device *obd;
         struct niobuf_local *r = res;
 {
         struct obd_device *obd;
         struct niobuf_local *r = res;
@@ -253,7 +253,8 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
         CDEBUG(D_PAGE, "%s %d obdos with %d IOs\n",
                cmd == OBD_BRW_READ ? "reading" : "writing", objcount, niocount);
 
         CDEBUG(D_PAGE, "%s %d obdos with %d IOs\n",
                cmd == OBD_BRW_READ ? "reading" : "writing", objcount, niocount);
 
-        *desc_private = (void *)DESC_PRIV;
+        if (oti)
+                oti->oti_handle = (void *)DESC_PRIV;
 
         for (i = 0; i < objcount; i++, obj++) {
                 int gfp_mask = (obj->ioo_id & 1) ? GFP_HIGHUSER : GFP_KERNEL;
 
         for (i = 0; i < objcount; i++, obj++) {
                 int gfp_mask = (obj->ioo_id & 1) ? GFP_HIGHUSER : GFP_KERNEL;
@@ -285,7 +286,7 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
 
                         r->offset = nb->offset;
                         r->len = nb->len;
 
                         r->offset = nb->offset;
                         r->len = nb->len;
-                        LASSERT ((r->offset & (PAGE_SIZE - 1)) + r->len <= PAGE_SIZE);
+                        LASSERT((r->offset & ~PAGE_MASK) + r->len <= PAGE_SIZE);
 
                         CDEBUG(D_PAGE, "$$$$ get page %p @ "LPU64" for %d\n",
                                r->page, r->offset, r->len);
 
                         CDEBUG(D_PAGE, "$$$$ get page %p @ "LPU64" for %d\n",
                                r->page, r->offset, r->len);
@@ -339,9 +340,9 @@ preprw_cleanup:
         return rc;
 }
 
         return rc;
 }
 
-int echo_commitrw(int cmd, struct obd_export *export, int objcount,
-                  struct obd_ioobj *obj, int niocount, struct niobuf_local *res,
-                  void *desc_private, struct obd_trans_info *oti)
+int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa,
+                  int objcount, struct obd_ioobj *obj, int niocount,
+                  struct niobuf_local *res, struct obd_trans_info *oti)
 {
         struct obd_device *obd;
         struct niobuf_local *r = res;
 {
         struct obd_device *obd;
         struct niobuf_local *r = res;
@@ -365,7 +366,7 @@ int echo_commitrw(int cmd, struct obd_export *export, int objcount,
                 RETURN(-EINVAL);
         }
 
                 RETURN(-EINVAL);
         }
 
-        LASSERT(desc_private == (void *)DESC_PRIV);
+        LASSERT(oti == NULL || oti->oti_handle == (void *)DESC_PRIV);
 
         for (i = 0; i < objcount; i++, obj++) {
                 int verify = obj->ioo_id != 0;
 
         for (i = 0; i < objcount; i++, obj++) {
                 int verify = obj->ioo_id != 0;
@@ -437,7 +438,7 @@ static int echo_setup(struct obd_device *obddev, obd_count len, void *buf)
         RETURN(0);
 }
 
         RETURN(0);
 }
 
-static int echo_cleanup(struct obd_device *obddev, int force, int failover)
+static int echo_cleanup(struct obd_device *obddev, int flags)
 {
         ENTRY;
 
 {
         ENTRY;
 
@@ -453,7 +454,7 @@ int echo_attach(struct obd_device *obd, obd_count len, void *data)
         struct lprocfs_static_vars lvars;
         int rc;
 
         struct lprocfs_static_vars lvars;
         int rc;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(echo, &lvars);
         rc = lprocfs_obd_attach(obd, lvars.obd_vars);
         if (rc != 0)
                 return rc;
         rc = lprocfs_obd_attach(obd, lvars.obd_vars);
         if (rc != 0)
                 return rc;
@@ -539,7 +540,7 @@ static int __init obdecho_init(void)
 
         printk(KERN_INFO "Lustre Echo OBD driver; info@clusterfs.com\n");
 
 
         printk(KERN_INFO "Lustre Echo OBD driver; info@clusterfs.com\n");
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(echo, &lvars);
 
         rc = echo_object0_pages_init ();
         if (rc != 0)
 
         rc = echo_object0_pages_init ();
         if (rc != 0)
@@ -561,7 +562,7 @@ static int __init obdecho_init(void)
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-static void __exit obdecho_exit(void)
+static void /*__exit*/ obdecho_exit(void)
 {
         echo_client_cleanup();
         class_unregister_type(OBD_ECHO_DEVICENAME);
 {
         echo_client_cleanup();
         class_unregister_type(OBD_ECHO_DEVICENAME);
index 79da7ea..c010798 100644 (file)
@@ -484,7 +484,7 @@ echo_client_kbrw (struct obd_device *obd, int rw,
                 }
         }
 
                 }
         }
 
-        rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, NULL);
+        rc = obd_brw(rw, &ec->ec_conn, oa, lsm, npages, pga, NULL);
 
  out:
         if (rc != 0)
 
  out:
         if (rc != 0)
@@ -568,7 +568,7 @@ static int echo_client_ubrw(struct obd_device *obd, int rw,
                 pgp->flag = 0;
         }
 
                 pgp->flag = 0;
         }
 
-        rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, NULL);
+        rc = obd_brw(rw, &ec->ec_conn, oa, lsm, npages, pga, NULL);
 
         //        if (rw == OBD_BRW_READ)
         //                mark_dirty_kiobuf (kiobuf, count);
 
         //        if (rw == OBD_BRW_READ)
         //                mark_dirty_kiobuf (kiobuf, count);
@@ -1009,7 +1009,7 @@ static int echo_setup(struct obd_device *obddev, obd_count len, void *buf)
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-static int echo_cleanup(struct obd_device * obddev, int force, int failover)
+static int echo_cleanup(struct obd_device *obddev, int flags)
 {
         struct list_head       *el;
         struct ec_object       *eco;
 {
         struct list_head       *el;
         struct ec_object       *eco;
@@ -1023,21 +1023,21 @@ static int echo_cleanup(struct obd_device * obddev, int force, int failover)
         }
 
         /* XXX assuming sole access */
         }
 
         /* XXX assuming sole access */
-        while (!list_empty (&ec->ec_objects)) {
+        while (!list_empty(&ec->ec_objects)) {
                 el = ec->ec_objects.next;
                 el = ec->ec_objects.next;
-                eco = list_entry (el, struct ec_object, eco_obj_chain);
+                eco = list_entry(el, struct ec_object, eco_obj_chain);
 
 
-                LASSERT (eco->eco_refcount == 0);
+                LASSERT(eco->eco_refcount == 0);
                 eco->eco_refcount = 1;
                 eco->eco_deleted = 1;
                 eco->eco_refcount = 1;
                 eco->eco_deleted = 1;
-                echo_put_object (eco);
+                echo_put_object(eco);
         }
 
         }
 
-        rc = obd_disconnect (&ec->ec_conn, 0);
+        rc = obd_disconnect(&ec->ec_conn, 0);
         if (rc != 0)
                 CERROR("fail to disconnect device: %d\n", rc);
 
         if (rc != 0)
                 CERROR("fail to disconnect device: %d\n", rc);
 
-        RETURN (rc);
+        RETURN(rc);
 }
 
 static int echo_connect(struct lustre_handle *conn, struct obd_device *src,
 }
 
 static int echo_connect(struct lustre_handle *conn, struct obd_device *src,
@@ -1057,7 +1057,7 @@ static int echo_connect(struct lustre_handle *conn, struct obd_device *src,
         RETURN (rc);
 }
 
         RETURN (rc);
 }
 
-static int echo_disconnect(struct lustre_handle *conn, int failover)
+static int echo_disconnect(struct lustre_handle *conn, int flags)
 {
         struct obd_export      *exp = class_conn2export (conn);
         struct obd_device      *obd;
 {
         struct obd_export      *exp = class_conn2export (conn);
         struct obd_device      *obd;
@@ -1128,7 +1128,7 @@ int echo_client_init(void)
 {
         struct lprocfs_static_vars lvars;
 
 {
         struct lprocfs_static_vars lvars;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(echo, &lvars);
         return class_register_type(&echo_obd_ops, lvars.module_vars,
                                    OBD_ECHO_CLIENT_DEVICENAME);
 }
         return class_register_type(&echo_obd_ops, lvars.module_vars,
                                    OBD_ECHO_CLIENT_DEVICENAME);
 }
index 6a16001..c25d156 100644 (file)
 #include <linux/obd_class.h>
 
 #ifndef LPROCFS
 #include <linux/obd_class.h>
 
 #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
 #else
 
 #else
 
-int rd_fstype(char* page, char **start, off_t off, int count, int *eof,
-              void *data)
-{
-        struct obd_device* dev = (struct obd_device*)data;
-        
-        LASSERT(dev != NULL);
-        *eof = 1;
-        return snprintf(page, count, "%s\n", dev->u.echo.eo_fstype);
-}
-
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",     lprocfs_rd_uuid,    0, 0 },
-        { "fstype",   rd_fstype,          0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",         lprocfs_rd_uuid,        0, 0 },
         { 0 }
 };
 
         { 0 }
 };
 
-struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
         { 0 }
 };
 
 #endif /* LPROCFS */
         { 0 }
 };
 
 #endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(echo, lprocfs_module_vars, lprocfs_obd_vars)
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
 Makefile.in
 .deps
 TAGS
+.*.cmd
index b9addf1..ed4ca1e 100644 (file)
@@ -6,6 +6,7 @@
 MODULE = obdfilter
 modulefs_DATA = obdfilter.o
 EXTRA_PROGRAMS = obdfilter
 MODULE = obdfilter
 modulefs_DATA = obdfilter.o
 EXTRA_PROGRAMS = obdfilter
-obdfilter_SOURCES = filter.c lproc_obdfilter.c
+obdfilter_SOURCES = filter.c filter_io.c filter_log.c filter_san.c \
+lproc_obdfilter.c filter_internal.h
 
 include $(top_srcdir)/Rules
 
 include $(top_srcdir)/Rules
index 6f2d96c..b6c1bd9 100644 (file)
  *            threaded operation on the OST.
  */
 
  *            threaded operation on the OST.
  */
 
-#define EXPORT_SYMTAB
 #define DEBUG_SUBSYSTEM S_FILTER
 
 #include <linux/config.h>
 #include <linux/module.h>
 #define DEBUG_SUBSYSTEM S_FILTER
 
 #include <linux/config.h>
 #include <linux/module.h>
-#include <linux/pagemap.h> // XXX kill me soon
 #include <linux/fs.h>
 #include <linux/dcache.h>
 #include <linux/fs.h>
 #include <linux/dcache.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_dlm.h>
-#include <linux/obd_filter.h>
 #include <linux/init.h>
 #include <linux/init.h>
-#include <linux/random.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lprocfs_status.h>
 #include <linux/version.h>
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
 #include <linux/version.h>
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-#include <linux/mount.h>
+# include <linux/mount.h>
+# include <linux/buffer_head.h>
 #endif
 
 #endif
 
-enum {
-        LPROC_FILTER_READ_BYTES = 0,
-        LPROC_FILTER_WRITE_BYTES = 1,
-        LPROC_FILTER_LAST,
-};
+#include <linux/obd_class.h>
+#include <linux/lustre_dlm.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/lprocfs_status.h>
+#include <linux/lustre_log.h>
+#include <linux/lustre_commit_confd.h>
+
+#include "filter_internal.h"
 
 #define S_SHIFT 12
 static char *obd_type_by_mode[S_IFMT >> S_SHIFT] = {
 
 #define S_SHIFT 12
 static char *obd_type_by_mode[S_IFMT >> S_SHIFT] = {
@@ -132,19 +128,79 @@ static void filter_ffd_destroy(struct filter_file_data *ffd)
         filter_ffd_put(ffd);
 }
 
         filter_ffd_put(ffd);
 }
 
-static void filter_commit_cb(struct obd_device *obd, __u64 transno, int error)
+static void filter_commit_cb(struct obd_device *obd, __u64 transno,
+                             void *cb_data, int error)
 {
         obd_transno_commit_cb(obd, transno, error);
 }
 {
         obd_transno_commit_cb(obd, transno, error);
 }
-/* Assumes caller has already pushed us into the kernel context. */
-int filter_finish_transno(struct obd_export *export, void *handle,
-                          struct obd_trans_info *oti, int rc)
+
+static int filter_client_log_cancel(struct lustre_handle *conn,
+                                    struct lov_stripe_md *lsm, int count,
+                                    struct llog_cookie *cookies, int flags)
 {
 {
-        __u64 last_rcvd;
-        struct obd_device *obd = export->exp_obd;
+        struct obd_device *obd = class_conn2obd(conn);
+        struct llog_commit_data *llcd;
         struct filter_obd *filter = &obd->u.filter;
         struct filter_obd *filter = &obd->u.filter;
-        struct filter_export_data *fed = &export->exp_filter_data;
+        int rc = 0;
+        ENTRY;
+
+        if (count == 0 || cookies == NULL) {
+                down(&filter->fo_sem);
+                if (filter->fo_llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW))
+                        GOTO(out, rc);
+
+                llcd = filter->fo_llcd;
+                GOTO(send_now, rc);
+        }
+
+        down(&filter->fo_sem);
+        llcd = filter->fo_llcd;
+        if (llcd == NULL) {
+                llcd = llcd_grab();
+                if (llcd == NULL) {
+                        CERROR("couldn't get an llcd - dropped "LPX64":%x+%u\n",
+                               cookies->lgc_lgl.lgl_oid,
+                               cookies->lgc_lgl.lgl_ogen, cookies->lgc_index);
+                        GOTO(out, rc = -ENOMEM);
+                }
+                llcd->llcd_import = filter->fo_mdc_imp;
+                filter->fo_llcd = llcd;
+        }
+
+        memcpy(llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies,
+               sizeof(*cookies));
+        llcd->llcd_cookiebytes += sizeof(*cookies);
+
+        GOTO(send_now, rc);
+send_now:
+        if ((PAGE_SIZE - llcd->llcd_cookiebytes < sizeof(*cookies) ||
+             flags & OBD_LLOG_FL_SENDNOW)) {
+                filter->fo_llcd = NULL;
+                llcd_send(llcd);
+        }
+out:
+        up(&filter->fo_sem);
+
+        return rc;
+}
+
+/* When this (destroy) operation is committed, return the cancel cookie */
+static void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
+                                     void *cb_data, int error)
+{
+        filter_client_log_cancel(&obd->u.filter.fo_mdc_conn, NULL, 1,
+                                 cb_data, OBD_LLOG_FL_SENDNOW);
+        OBD_FREE(cb_data, sizeof(struct llog_cookie));
+}
+
+/* Assumes caller has already pushed us into the kernel context. */
+int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti,
+                          int rc)
+{
+        struct filter_obd *filter = &exp->exp_obd->u.filter;
+        struct filter_export_data *fed = &exp->exp_filter_data;
         struct filter_client_data *fcd = fed->fed_fcd;
         struct filter_client_data *fcd = fed->fed_fcd;
+        __u64 last_rcvd;
         loff_t off;
         ssize_t written;
 
         loff_t off;
         ssize_t written;
 
@@ -152,14 +208,14 @@ int filter_finish_transno(struct obd_export *export, void *handle,
         if (rc)
                 RETURN(rc);
 
         if (rc)
                 RETURN(rc);
 
-        if (!obd->obd_replayable)
+        if (!exp->exp_obd->obd_replayable)
                 RETURN(rc);
 
         /* we don't allocate new transnos for replayed requests */
                 RETURN(rc);
 
         /* we don't allocate new transnos for replayed requests */
-        if (oti && oti->oti_transno == 0) {
+        if (oti != NULL && oti->oti_transno == 0) {
                 spin_lock(&filter->fo_translock);
                 spin_lock(&filter->fo_translock);
-                last_rcvd = le64_to_cpu(filter->fo_fsd->fsd_last_rcvd) + 1;
-                filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd);
+                last_rcvd = le64_to_cpu(filter->fo_fsd->fsd_last_transno) + 1;
+                filter->fo_fsd->fsd_last_transno = cpu_to_le64(last_rcvd);
                 spin_unlock(&filter->fo_translock);
                 oti->oti_transno = last_rcvd;
                 fcd->fcd_last_rcvd = cpu_to_le64(last_rcvd);
                 spin_unlock(&filter->fo_translock);
                 oti->oti_transno = last_rcvd;
                 fcd->fcd_last_rcvd = cpu_to_le64(last_rcvd);
@@ -169,27 +225,28 @@ int filter_finish_transno(struct obd_export *export, void *handle,
                 fcd->fcd_last_xid = 0;
 
                 off = fed->fed_lr_off;
                 fcd->fcd_last_xid = 0;
 
                 off = fed->fed_lr_off;
-                fsfilt_set_last_rcvd(obd, last_rcvd, handle, filter_commit_cb);
-                written = lustre_fwrite(filter->fo_rcvd_filp, (char *)fcd, 
-                                        sizeof(*fcd), &off);
+                fsfilt_set_last_rcvd(exp->exp_obd, last_rcvd, oti->oti_handle,
+                                     filter_commit_cb, NULL);
+                written = fsfilt_write_record(exp->exp_obd,
+                                              filter->fo_rcvd_filp, (char *)fcd,
+                                              sizeof(*fcd), &off);
                 CDEBUG(D_HA, "wrote trans #"LPD64" for client %s at #%d: "
                 CDEBUG(D_HA, "wrote trans #"LPD64" for client %s at #%d: "
-                       "written = "LPSZ"\n", last_rcvd, fcd->fcd_uuid, 
+                       "written = "LPSZ"\n", last_rcvd, fcd->fcd_uuid,
                        fed->fed_lr_idx, written);
 
                 if (written == sizeof(*fcd))
                         RETURN(0);
                        fed->fed_lr_idx, written);
 
                 if (written == sizeof(*fcd))
                         RETURN(0);
-                CERROR("error writing to last_rcvd file: rc = %d\n", 
+                CERROR("error writing to %s: rc = %d\n", LAST_RCVD,
                        (int)written);
                 if (written >= 0)
                        (int)written);
                 if (written >= 0)
-                        RETURN(-EIO);
-
+                        RETURN(-ENOSPC);
                 RETURN(written);
                 RETURN(written);
-        }                 
+        }
 
         RETURN(0);
 }
 
 
         RETURN(0);
 }
 
-static inline void f_dput(struct dentry *dentry)
+void f_dput(struct dentry *dentry)
 {
         /* Can't go inside filter_ddelete because it can block */
         CDEBUG(D_INODE, "putting %s: %p, count = %d\n",
 {
         /* Can't go inside filter_ddelete because it can block */
         CDEBUG(D_INODE, "putting %s: %p, count = %d\n",
@@ -207,26 +264,19 @@ static void filter_drelease(struct dentry *dentry)
 }
 
 struct dentry_operations filter_dops = {
 }
 
 struct dentry_operations filter_dops = {
-        .d_release = filter_drelease,
+        d_release: filter_drelease,
 };
 
 };
 
-#define LAST_RCVD "last_rcvd"
-#define INIT_OBJID 2
-
-/* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
-#define FILTER_LR_MAX_CLIENTS (PAGE_SIZE * 8)
-#define FILTER_LR_MAX_CLIENT_WORDS (FILTER_LR_MAX_CLIENTS/sizeof(unsigned long))
-
 /* Add client data to the FILTER.  We use a bitmap to locate a free space
  * in the last_rcvd file if cl_idx is -1 (i.e. a new client).
  * Otherwise, we have just read the data from the last_rcvd file and
 /* Add client data to the FILTER.  We use a bitmap to locate a free space
  * in the last_rcvd file if cl_idx is -1 (i.e. a new client).
  * Otherwise, we have just read the data from the last_rcvd file and
- * we know its offset.
- */
-int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
-                      struct filter_export_data *fed, int cl_idx)
+ * we know its offset. */
+static int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
+                             struct filter_export_data *fed, int cl_idx)
 {
         unsigned long *bitmap = filter->fo_last_rcvd_slots;
         int new_client = (cl_idx == -1);
 {
         unsigned long *bitmap = filter->fo_last_rcvd_slots;
         int new_client = (cl_idx == -1);
+        ENTRY;
 
         LASSERT(bitmap != NULL);
 
 
         LASSERT(bitmap != NULL);
 
@@ -242,7 +292,7 @@ int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
         repeat:
                 if (cl_idx >= FILTER_LR_MAX_CLIENTS) {
                         CERROR("no client slots - fix FILTER_LR_MAX_CLIENTS\n");
         repeat:
                 if (cl_idx >= FILTER_LR_MAX_CLIENTS) {
                         CERROR("no client slots - fix FILTER_LR_MAX_CLIENTS\n");
-                        return -ENOMEM;
+                        RETURN(-ENOMEM);
                 }
                 if (test_and_set_bit(cl_idx, bitmap)) {
                         CERROR("FILTER client %d: found bit is set in bitmap\n",
                 }
                 if (test_and_set_bit(cl_idx, bitmap)) {
                         CERROR("FILTER client %d: found bit is set in bitmap\n",
@@ -270,23 +320,23 @@ int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
         if (new_client) {
                 struct obd_run_ctxt saved;
                 loff_t off = fed->fed_lr_off;
         if (new_client) {
                 struct obd_run_ctxt saved;
                 loff_t off = fed->fed_lr_off;
-                ssize_t written;
+                int written;
                 void *handle;
 
                 CDEBUG(D_INFO, "writing client fcd at idx %u (%llu) (len %u)\n",
                        fed->fed_lr_idx,off,(unsigned int)sizeof(*fed->fed_fcd));
 
                 push_ctxt(&saved, &filter->fo_ctxt, NULL);
                 void *handle;
 
                 CDEBUG(D_INFO, "writing client fcd at idx %u (%llu) (len %u)\n",
                        fed->fed_lr_idx,off,(unsigned int)sizeof(*fed->fed_fcd));
 
                 push_ctxt(&saved, &filter->fo_ctxt, NULL);
-                /* Transaction eeded to fix for bug 1403 */
+                /* Transaction needed to fix bug 1403 */
                 handle = fsfilt_start(obd,
                                       filter->fo_rcvd_filp->f_dentry->d_inode,
                 handle = fsfilt_start(obd,
                                       filter->fo_rcvd_filp->f_dentry->d_inode,
-                                      FSFILT_OP_SETATTR);
+                                      FSFILT_OP_SETATTR, NULL);
                 if (IS_ERR(handle)) {
                         written = PTR_ERR(handle);
                         CERROR("unable to start transaction: rc %d\n",
                                (int)written);
                 } else {
                 if (IS_ERR(handle)) {
                         written = PTR_ERR(handle);
                         CERROR("unable to start transaction: rc %d\n",
                                (int)written);
                 } else {
-                        written = lustre_fwrite(filter->fo_rcvd_filp,
+                        written = fsfilt_write_record(obd, filter->fo_rcvd_filp,
                                                 (char *)fed->fed_fcd,
                                                 sizeof(*fed->fed_fcd), &off);
                         fsfilt_commit(obd,
                                                 (char *)fed->fed_fcd,
                                                 sizeof(*fed->fed_fcd), &off);
                         fsfilt_commit(obd,
@@ -296,32 +346,35 @@ int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
                 pop_ctxt(&saved, &filter->fo_ctxt, NULL);
 
                 if (written != sizeof(*fed->fed_fcd)) {
                 pop_ctxt(&saved, &filter->fo_ctxt, NULL);
 
                 if (written != sizeof(*fed->fed_fcd)) {
+                        CERROR("error writing %s client idx %u: rc %d\n",
+                               LAST_RCVD, fed->fed_lr_idx, written);
                         if (written < 0)
                                 RETURN(written);
                         if (written < 0)
                                 RETURN(written);
-                        RETURN(-EIO);
+                        RETURN(-ENOSPC);
                 }
         }
                 }
         }
-        return 0;
+        RETURN(0);
 }
 
 }
 
-int filter_client_free(struct obd_export *exp, int failover)
+static int filter_client_free(struct obd_export *exp, int flags)
 {
         struct filter_export_data *fed = &exp->exp_filter_data;
         struct filter_obd *filter = &exp->exp_obd->u.filter;
 {
         struct filter_export_data *fed = &exp->exp_filter_data;
         struct filter_obd *filter = &exp->exp_obd->u.filter;
+        struct obd_device *obd = exp->exp_obd;
         struct filter_client_data zero_fcd;
         struct obd_run_ctxt saved;
         int written;
         loff_t off;
         ENTRY;
 
         struct filter_client_data zero_fcd;
         struct obd_run_ctxt saved;
         int written;
         loff_t off;
         ENTRY;
 
-        if (!fed->fed_fcd)
+        if (fed->fed_fcd == NULL)
                 RETURN(0);
 
                 RETURN(0);
 
-        if (failover != 0)
+        if (flags & OBD_OPT_FAILOVER)
                 GOTO(free, 0);
 
         /* XXX if fcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
                 GOTO(free, 0);
 
         /* XXX if fcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
-        if (!strcmp(fed->fed_fcd->fcd_uuid, "OBD_CLASS_UUID"))
+        if (strcmp(fed->fed_fcd->fcd_uuid, "OBD_CLASS_UUID") == 0)
                 GOTO(free, 0);
 
         LASSERT(filter->fo_last_rcvd_slots != NULL);
                 GOTO(free, 0);
 
         LASSERT(filter->fo_last_rcvd_slots != NULL);
@@ -339,8 +392,9 @@ int filter_client_free(struct obd_export *exp, int failover)
 
         memset(&zero_fcd, 0, sizeof zero_fcd);
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
 
         memset(&zero_fcd, 0, sizeof zero_fcd);
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
-        written = lustre_fwrite(filter->fo_rcvd_filp, (const char *)&zero_fcd,
-                                sizeof(zero_fcd), &off);
+        written = fsfilt_write_record(obd, filter->fo_rcvd_filp,
+                                      (char *)&zero_fcd, sizeof(zero_fcd),
+                                      &off);
 
         /* XXX: this write gets lost sometimes, unless this sync is here. */
         if (written > 0)
 
         /* XXX: this write gets lost sometimes, unless this sync is here. */
         if (written > 0)
@@ -374,29 +428,30 @@ static int filter_free_server_data(struct filter_obd *filter)
         return 0;
 }
 
         return 0;
 }
 
-
 /* assumes caller is already in kernel ctxt */
 /* assumes caller is already in kernel ctxt */
-static int filter_update_server_data(struct file *filp,
-                                     struct filter_server_data *fsd)
+int filter_update_server_data(struct obd_device *obd,
+                              struct file *filp, struct filter_server_data *fsd)
 {
         loff_t off = 0;
         int rc;
 {
         loff_t off = 0;
         int rc;
+        ENTRY;
 
         CDEBUG(D_INODE, "server uuid      : %s\n", fsd->fsd_uuid);
         CDEBUG(D_INODE, "server last_objid: "LPU64"\n",
                le64_to_cpu(fsd->fsd_last_objid));
         CDEBUG(D_INODE, "server last_rcvd : "LPU64"\n",
 
         CDEBUG(D_INODE, "server uuid      : %s\n", fsd->fsd_uuid);
         CDEBUG(D_INODE, "server last_objid: "LPU64"\n",
                le64_to_cpu(fsd->fsd_last_objid));
         CDEBUG(D_INODE, "server last_rcvd : "LPU64"\n",
-               le64_to_cpu(fsd->fsd_last_rcvd));
+               le64_to_cpu(fsd->fsd_last_transno));
         CDEBUG(D_INODE, "server last_mount: "LPU64"\n",
                le64_to_cpu(fsd->fsd_mount_count));
 
         CDEBUG(D_INODE, "server last_mount: "LPU64"\n",
                le64_to_cpu(fsd->fsd_mount_count));
 
-        rc = lustre_fwrite(filp, (char *)fsd, sizeof(*fsd), &off);
-        if (rc != sizeof(*fsd)) {
-                CDEBUG(D_INODE, "error writing filter_server_data: rc = %d\n",
-                       rc);
-                RETURN(-EIO);
-        }
-        RETURN(0);
+        rc = fsfilt_write_record(obd, filp, (char *)fsd, sizeof(*fsd), &off);
+        if (rc == sizeof(*fsd))
+                RETURN(0);
+
+        CDEBUG(D_INODE, "error writing filter_server_data: rc = %d\n", rc);
+        if (rc >= 0)
+                RETURN(-ENOSPC);
+        RETURN(rc);
 }
 
 /* assumes caller has already in kernel ctxt */
 }
 
 /* assumes caller has already in kernel ctxt */
@@ -432,11 +487,11 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
         }
 
         if (last_rcvd_size == 0) {
         }
 
         if (last_rcvd_size == 0) {
-                CERROR("%s: initializing new last_rcvd\n", obd->obd_name);
+                CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD);
 
                 memcpy(fsd->fsd_uuid, obd->obd_uuid.uuid,sizeof(fsd->fsd_uuid));
                 fsd->fsd_last_objid = cpu_to_le64(init_lastobjid);
 
                 memcpy(fsd->fsd_uuid, obd->obd_uuid.uuid,sizeof(fsd->fsd_uuid));
                 fsd->fsd_last_objid = cpu_to_le64(init_lastobjid);
-                fsd->fsd_last_rcvd = 0;
+                fsd->fsd_last_transno = 0;
                 mount_count = fsd->fsd_mount_count = 0;
                 fsd->fsd_server_size = cpu_to_le32(FILTER_LR_SERVER_SIZE);
                 fsd->fsd_client_start = cpu_to_le32(FILTER_LR_CLIENT_START);
                 mount_count = fsd->fsd_mount_count = 0;
                 fsd->fsd_server_size = cpu_to_le32(FILTER_LR_SERVER_SIZE);
                 fsd->fsd_client_start = cpu_to_le32(FILTER_LR_CLIENT_START);
@@ -444,15 +499,18 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
                 fsd->fsd_subdir_count = cpu_to_le16(FILTER_SUBDIR_COUNT);
                 filter->fo_subdir_count = FILTER_SUBDIR_COUNT;
         } else {
                 fsd->fsd_subdir_count = cpu_to_le16(FILTER_SUBDIR_COUNT);
                 filter->fo_subdir_count = FILTER_SUBDIR_COUNT;
         } else {
-                ssize_t retval = lustre_fread(filp, (char *)fsd, sizeof(*fsd),
-                                              &off);
+                int retval = fsfilt_read_record(obd, filp, (char *)fsd,
+                                                sizeof(*fsd), &off);
                 if (retval != sizeof(*fsd)) {
                 if (retval != sizeof(*fsd)) {
-                        CDEBUG(D_INODE,"OBD filter: error reading %s\n",
-                               LAST_RCVD);
+                        CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n",
+                               LAST_RCVD, retval);
                         GOTO(err_fsd, rc = -EIO);
                 }
                 mount_count = le64_to_cpu(fsd->fsd_mount_count);
                 filter->fo_subdir_count = le16_to_cpu(fsd->fsd_subdir_count);
                         GOTO(err_fsd, rc = -EIO);
                 }
                 mount_count = le64_to_cpu(fsd->fsd_mount_count);
                 filter->fo_subdir_count = le16_to_cpu(fsd->fsd_subdir_count);
+                fsd->fsd_last_objid =
+                        cpu_to_le64(le64_to_cpu(fsd->fsd_last_objid) +
+                                    FILTER_SKIP_OBJID);
         }
 
         if (fsd->fsd_feature_incompat) {
         }
 
         if (fsd->fsd_feature_incompat) {
@@ -470,7 +528,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
         CDEBUG(D_INODE, "%s: server last_objid: "LPU64"\n",
                obd->obd_name, le64_to_cpu(fsd->fsd_last_objid));
         CDEBUG(D_INODE, "%s: server last_rcvd : "LPU64"\n",
         CDEBUG(D_INODE, "%s: server last_objid: "LPU64"\n",
                obd->obd_name, le64_to_cpu(fsd->fsd_last_objid));
         CDEBUG(D_INODE, "%s: server last_rcvd : "LPU64"\n",
-               obd->obd_name, le64_to_cpu(fsd->fsd_last_rcvd));
+               obd->obd_name, le64_to_cpu(fsd->fsd_last_transno));
         CDEBUG(D_INODE, "%s: server last_mount: "LPU64"\n",
                obd->obd_name, mount_count);
         CDEBUG(D_INODE, "%s: server data size: %u\n",
         CDEBUG(D_INODE, "%s: server last_mount: "LPU64"\n",
                obd->obd_name, mount_count);
         CDEBUG(D_INODE, "%s: server data size: %u\n",
@@ -482,13 +540,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
         CDEBUG(D_INODE, "%s: server subdir_count: %u\n",
                obd->obd_name, le16_to_cpu(fsd->fsd_subdir_count));
 
         CDEBUG(D_INODE, "%s: server subdir_count: %u\n",
                obd->obd_name, le16_to_cpu(fsd->fsd_subdir_count));
 
-        /*
-         * When we do a clean FILTER shutdown, we save the last_rcvd into
-         * the header.  If we find clients with higher last_rcvd values
-         * then those clients may need recovery done.
-         */
         if (!obd->obd_replayable) {
         if (!obd->obd_replayable) {
-                CERROR("%s: recovery support OFF\n", obd->obd_name);
+                CWARN("%s: recovery support OFF\n", obd->obd_name);
                 GOTO(out, rc = 0);
         }
 
                 GOTO(out, rc = 0);
         }
 
@@ -507,7 +560,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
                  */
                 off = le32_to_cpu(fsd->fsd_client_start) +
                         cl_idx * le16_to_cpu(fsd->fsd_client_size);
                  */
                 off = le32_to_cpu(fsd->fsd_client_start) +
                         cl_idx * le16_to_cpu(fsd->fsd_client_size);
-                rc = lustre_fread(filp, (char *)fcd, sizeof(*fcd), &off);
+                rc = fsfilt_read_record(obd, filp, (char *)fcd, sizeof(*fcd),
+                                        &off);
                 if (rc != sizeof(*fcd)) {
                         CERROR("error reading FILTER %s offset %d: rc = %d\n",
                                LAST_RCVD, cl_idx, rc);
                 if (rc != sizeof(*fcd)) {
                         CERROR("error reading FILTER %s offset %d: rc = %d\n",
                                LAST_RCVD, cl_idx, rc);
@@ -534,7 +588,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
                         CERROR("RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64
                                " srv lr: "LPU64" mnt: "LPU64" last mount: "
                                LPU64"\n", fcd->fcd_uuid, cl_idx,
                         CERROR("RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64
                                " srv lr: "LPU64" mnt: "LPU64" last mount: "
                                LPU64"\n", fcd->fcd_uuid, cl_idx,
-                               last_rcvd, le64_to_cpu(fsd->fsd_last_rcvd),
+                               last_rcvd, le64_to_cpu(fsd->fsd_last_transno),
                                le64_to_cpu(fcd->fcd_mount_count), mount_count);
                         if (exp == NULL) {
                                 /* XXX this rc is ignored  */
                                le64_to_cpu(fcd->fcd_mount_count), mount_count);
                         if (exp == NULL) {
                                 /* XXX this rc is ignored  */
@@ -563,15 +617,16 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
                 CDEBUG(D_OTHER, "client at idx %d has last_rcvd = "LPU64"\n",
                        cl_idx, last_rcvd);
 
                 CDEBUG(D_OTHER, "client at idx %d has last_rcvd = "LPU64"\n",
                        cl_idx, last_rcvd);
 
-                if (last_rcvd > le64_to_cpu(filter->fo_fsd->fsd_last_rcvd))
-                        filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd);
+                if (last_rcvd > le64_to_cpu(filter->fo_fsd->fsd_last_transno))
+                        filter->fo_fsd->fsd_last_transno=cpu_to_le64(last_rcvd);
 
                 obd->obd_last_committed =
 
                 obd->obd_last_committed =
-                        le64_to_cpu(filter->fo_fsd->fsd_last_rcvd);
+                        le64_to_cpu(filter->fo_fsd->fsd_last_transno);
+
                 if (obd->obd_recoverable_clients) {
                         CERROR("RECOVERY: %d recoverable clients, last_rcvd "
                                LPU64"\n", obd->obd_recoverable_clients,
                 if (obd->obd_recoverable_clients) {
                         CERROR("RECOVERY: %d recoverable clients, last_rcvd "
                                LPU64"\n", obd->obd_recoverable_clients,
-                               le64_to_cpu(filter->fo_fsd->fsd_last_rcvd));
+                               le64_to_cpu(filter->fo_fsd->fsd_last_transno));
                         obd->obd_next_recovery_transno =
                                 obd->obd_last_committed + 1;
                         obd->obd_recovering = 1;
                         obd->obd_next_recovery_transno =
                                 obd->obd_last_committed + 1;
                         obd->obd_recovering = 1;
@@ -585,8 +640,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp,
 out:
         fsd->fsd_mount_count = cpu_to_le64(mount_count + 1);
 
 out:
         fsd->fsd_mount_count = cpu_to_le64(mount_count + 1);
 
-        /* save it,so mount count and last_recvd is current */
-        rc = filter_update_server_data(filp, filter->fo_fsd);
+        /* save it, so mount count and last_transno is current */
+        rc = filter_update_server_data(obd, filp, filter->fo_fsd);
 
         RETURN(rc);
 
 
         RETURN(rc);
 
@@ -639,7 +694,7 @@ static int filter_prep(struct obd_device *obd)
                 filter->fo_dentry_O_mode[mode] = dentry;
         }
 
                 filter->fo_dentry_O_mode[mode] = dentry;
         }
 
-        file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0700);
+        file = filp_open(LAST_RCVD, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
         if (!file || IS_ERR(file)) {
                 rc = PTR_ERR(file);
                 CERROR("OBD filter: cannot open/create %s: rc = %d\n",
         if (!file || IS_ERR(file)) {
                 rc = PTR_ERR(file);
                 CERROR("OBD filter: cannot open/create %s: rc = %d\n",
@@ -663,8 +718,15 @@ static int filter_prep(struct obd_device *obd)
         filter->fo_fop = file->f_op;
         filter->fo_iop = inode->i_op;
         filter->fo_aops = inode->i_mapping->a_ops;
         filter->fo_fop = file->f_op;
         filter->fo_iop = inode->i_op;
         filter->fo_aops = inode->i_mapping->a_ops;
+#ifdef I_SKIP_PDFLUSH
+        /*
+         * we need this to protect from deadlock
+         * pdflush vs. lustre_fwrite()
+         */
+        inode->i_flags |= I_SKIP_PDFLUSH;
+#endif
 
 
-        rc = filter_init_server_data(obd, file, INIT_OBJID);
+        rc = filter_init_server_data(obd, file, FILTER_INIT_OBJID);
         if (rc) {
                 CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
                 GOTO(err_client, rc);
         if (rc) {
                 CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
                 GOTO(err_client, rc);
@@ -740,9 +802,10 @@ static void filter_post(struct obd_device *obd)
          * from lastobjid */
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
          * from lastobjid */
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
-        rc = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd);
+        rc = filter_update_server_data(obd, filter->fo_rcvd_filp,
+                                       filter->fo_fsd);
         if (rc)
         if (rc)
-                CERROR("OBD filter: error writing lastobjid: rc = %ld\n", rc);
+                CERROR("error writing lastobjid: rc = %ld\n", rc);
 
 
         if (filter->fo_rcvd_filp) {
 
 
         if (filter->fo_rcvd_filp) {
@@ -751,7 +814,7 @@ static void filter_post(struct obd_device *obd)
                 filp_close(filter->fo_rcvd_filp, 0);
                 filter->fo_rcvd_filp = NULL;
                 if (rc)
                 filp_close(filter->fo_rcvd_filp, 0);
                 filter->fo_rcvd_filp = NULL;
                 if (rc)
-                        CERROR("last_rcvd file won't closed rc = %ld\n", rc);
+                        CERROR("error closing %s: rc = %ld\n", LAST_RCVD, rc);
         }
 
         if (filter->fo_subdir_count) {
         }
 
         if (filter->fo_subdir_count) {
@@ -777,8 +840,7 @@ static void filter_post(struct obd_device *obd)
         pop_ctxt(&saved, &filter->fo_ctxt, NULL);
 }
 
         pop_ctxt(&saved, &filter->fo_ctxt, NULL);
 }
 
-
-static __u64 filter_next_id(struct filter_obd *filter)
+__u64 filter_next_id(struct filter_obd *filter)
 {
         obd_id id;
         LASSERT(filter->fo_fsd != NULL);
 {
         obd_id id;
         LASSERT(filter->fo_fsd != NULL);
@@ -792,8 +854,9 @@ static __u64 filter_next_id(struct filter_obd *filter)
 }
 
 /* direct cut-n-paste of mds_blocking_ast() */
 }
 
 /* direct cut-n-paste of mds_blocking_ast() */
-int filter_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
-                     void *data, int flag)
+static int filter_blocking_ast(struct ldlm_lock *lock,
+                               struct ldlm_lock_desc *desc,
+                               void *data, int flag)
 {
         int do_ast;
         ENTRY;
 {
         int do_ast;
         ENTRY;
@@ -852,6 +915,7 @@ static int filter_lock_dentry(struct obd_device *obd, struct dentry *de,
         RETURN(rc == ELDLM_OK ? 0 : -ENOLCK);  /* XXX translate ldlm code */
 }
 
         RETURN(rc == ELDLM_OK ? 0 : -ENOLCK);  /* XXX translate ldlm code */
 }
 
+/* We never dget the object parent, so DON'T dput it either */
 static void filter_parent_unlock(struct dentry *dparent,
                                  struct lustre_handle *lockh,
                                  ldlm_mode_t lock_mode)
 static void filter_parent_unlock(struct dentry *dparent,
                                  struct lustre_handle *lockh,
                                  ldlm_mode_t lock_mode)
@@ -860,8 +924,8 @@ static void filter_parent_unlock(struct dentry *dparent,
 }
 
 /* We never dget the object parent, so DON'T dput it either */
 }
 
 /* We never dget the object parent, so DON'T dput it either */
-static inline struct dentry *filter_parent(struct obd_device *obd,
-                                           obd_mode mode, obd_id objid)
+struct dentry *filter_parent(struct obd_device *obd, obd_mode mode,
+                             obd_id objid)
 {
         struct filter_obd *filter = &obd->u.filter;
 
 {
         struct filter_obd *filter = &obd->u.filter;
 
@@ -873,10 +937,9 @@ static inline struct dentry *filter_parent(struct obd_device *obd,
 }
 
 /* We never dget the object parent, so DON'T dput it either */
 }
 
 /* We never dget the object parent, so DON'T dput it either */
-static inline struct dentry *filter_parent_lock(struct obd_device *obd,
-                                                obd_mode mode, obd_id objid,
-                                                ldlm_mode_t lock_mode,
-                                                struct lustre_handle *lockh)
+struct dentry *filter_parent_lock(struct obd_device *obd, obd_mode mode,
+                                  obd_id objid, ldlm_mode_t lock_mode,
+                                  struct lustre_handle *lockh)
 {
         unsigned long now = jiffies;
         struct dentry *de = filter_parent(obd, mode, objid);
 {
         unsigned long now = jiffies;
         struct dentry *de = filter_parent(obd, mode, objid);
@@ -886,7 +949,7 @@ static inline struct dentry *filter_parent_lock(struct obd_device *obd,
                 return de;
 
         rc = filter_lock_dentry(obd, de, lock_mode, lockh);
                 return de;
 
         rc = filter_lock_dentry(obd, de, lock_mode, lockh);
-        if (time_after(jiffies, now + 15*HZ))
+        if (time_after(jiffies, now + 15 * HZ))
                 CERROR("slow parent lock %lus\n", (jiffies - now) / HZ);
         return rc ? ERR_PTR(rc) : de;
 }
                 CERROR("slow parent lock %lus\n", (jiffies - now) / HZ);
         return rc ? ERR_PTR(rc) : de;
 }
@@ -897,13 +960,11 @@ static inline struct dentry *filter_parent_lock(struct obd_device *obd,
  * appropriately for this operation (normally a write lock).  If
  * dir_dentry is NULL, we do a read lock while we do the lookup to
  * avoid races with create/destroy and such changing the directory
  * appropriately for this operation (normally a write lock).  If
  * dir_dentry is NULL, we do a read lock while we do the lookup to
  * avoid races with create/destroy and such changing the directory
- * internal to the filesystem code.
- */
-static struct dentry *filter_fid2dentry(struct obd_device *obd,
-                                        struct dentry *dir_dentry,
-                                        obd_mode mode, obd_id id)
+ * internal to the filesystem code. */
+struct dentry *filter_fid2dentry(struct obd_device *obd,
+                                 struct dentry *dir_dentry,
+                                 obd_mode mode, obd_id id)
 {
 {
-        struct super_block *sb = obd->u.filter.fo_sb;
         struct lustre_handle lockh;
         struct dentry *dparent = dir_dentry;
         struct dentry *dchild;
         struct lustre_handle lockh;
         struct dentry *dparent = dir_dentry;
         struct dentry *dchild;
@@ -911,11 +972,6 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd,
         int len;
         ENTRY;
 
         int len;
         ENTRY;
 
-        if (!sb || !sb->s_dev) {
-                CERROR("device not initialized.\n");
-                RETURN(ERR_PTR(-ENXIO));
-        }
-
         if (id == 0) {
                 CERROR("fatal: invalid object id 0\n");
                 LBUG();
         if (id == 0) {
                 CERROR("fatal: invalid object id 0\n");
                 LBUG();
@@ -923,7 +979,7 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd,
         }
 
         len = sprintf(name, LPU64, id);
         }
 
         len = sprintf(name, LPU64, id);
-        if (!dir_dentry) {
+        if (dir_dentry == NULL) {
                 dparent = filter_parent_lock(obd, mode, id, LCK_PR, &lockh);
                 if (IS_ERR(dparent))
                         RETURN(dparent);
                 dparent = filter_parent_lock(obd, mode, id, LCK_PR, &lockh);
                 if (IS_ERR(dparent))
                         RETURN(dparent);
@@ -931,7 +987,7 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd,
         CDEBUG(D_INODE, "looking up object O/%*s/%s\n",
                dparent->d_name.len, dparent->d_name.name, name);
         dchild = ll_lookup_one_len(name, dparent, len);
         CDEBUG(D_INODE, "looking up object O/%*s/%s\n",
                dparent->d_name.len, dparent->d_name.name, name);
         dchild = ll_lookup_one_len(name, dparent, len);
-        if (!dir_dentry)
+        if (dir_dentry == NULL)
                 filter_parent_unlock(dparent, &lockh, LCK_PR);
         if (IS_ERR(dchild)) {
                 CERROR("child lookup error %ld\n", PTR_ERR(dchild));
                 filter_parent_unlock(dparent, &lockh, LCK_PR);
         if (IS_ERR(dchild)) {
                 CERROR("child lookup error %ld\n", PTR_ERR(dchild));
@@ -947,13 +1003,12 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd,
 }
 
 static struct file *filter_obj_open(struct obd_export *export,
 }
 
 static struct file *filter_obj_open(struct obd_export *export,
-                                    __u64 id, __u32 type,
-                                    ldlm_mode_t parent_mode,
+                                    struct obd_trans_info *oti,
+                                    __u64 id, __u32 type, int parent_mode,
                                     struct lustre_handle *parent_lockh)
 {
         struct obd_device *obd = export->exp_obd;
         struct filter_obd *filter = &obd->u.filter;
                                     struct lustre_handle *parent_lockh)
 {
         struct obd_device *obd = export->exp_obd;
         struct filter_obd *filter = &obd->u.filter;
-        struct super_block *sb = filter->fo_sb;
         struct dentry *dchild = NULL, *dparent = NULL;
         struct filter_export_data *fed = &export->exp_filter_data;
         struct filter_dentry_data *fdd = NULL;
         struct dentry *dchild = NULL, *dparent = NULL;
         struct filter_export_data *fed = &export->exp_filter_data;
         struct filter_dentry_data *fdd = NULL;
@@ -966,11 +1021,6 @@ static struct file *filter_obj_open(struct obd_export *export,
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
 
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
 
-        if (!sb || !sb->s_dev) {
-                CERROR("fatal: device not initialized.\n");
-                GOTO(cleanup, file = ERR_PTR(-ENXIO));
-        }
-
         if (!id) {
                 CERROR("fatal: invalid obdo "LPU64"\n", id);
                 GOTO(cleanup, file = ERR_PTR(-ESTALE));
         if (!id) {
                 CERROR("fatal: invalid obdo "LPU64"\n", id);
                 GOTO(cleanup, file = ERR_PTR(-ESTALE));
@@ -1014,6 +1064,7 @@ static struct file *filter_obj_open(struct obd_export *export,
 
         if (dchild->d_inode == NULL) {
                 CERROR("opening non-existent object %s - O_CREAT?\n", name);
 
         if (dchild->d_inode == NULL) {
                 CERROR("opening non-existent object %s - O_CREAT?\n", name);
+                /* dput(dchild); call filter_create_internal here */
                 file = ERR_PTR(-ENOENT);
                 GOTO(cleanup, file);
         }
                 file = ERR_PTR(-ENOENT);
                 GOTO(cleanup, file);
         }
@@ -1083,9 +1134,8 @@ cleanup:
 }
 
 /* Caller must hold LCK_PW on parent and push us into kernel context.
 }
 
 /* Caller must hold LCK_PW on parent and push us into kernel context.
- * Caller is also required to ensure that dchild->d_inode exists.
- */
-static int filter_destroy_internal(struct obd_device *obd,
+ * Caller is also required to ensure that dchild->d_inode exists. */
+static int filter_destroy_internal(struct obd_device *obd, obd_id objid,
                                    struct dentry *dparent,
                                    struct dentry *dchild)
 {
                                    struct dentry *dparent,
                                    struct dentry *dchild)
 {
@@ -1099,6 +1149,39 @@ static int filter_destroy_internal(struct obd_device *obd,
                        inode->i_nlink, atomic_read(&inode->i_count));
         }
 
                        inode->i_nlink, atomic_read(&inode->i_count));
         }
 
+        
+#if 0
+        /* Tell the clients that the object is gone now and that they should
+         * throw away any cached pages.  We don't need to wait until they're
+         * done, so just decref the lock right away and let ldlm_completion_ast
+         * clean up when it's all over. */
+        ldlm_cli_enqueue(..., LCK_PW, AST_INTENT_DESTROY, &lockh);
+        ldlm_lock_decref(&lockh, LCK_PW);
+#endif
+
+        if (0) {
+                struct lustre_handle lockh;
+                int flags = 0, rc;
+                struct ldlm_res_id res_id = { .name = { objid } };
+
+                /* This part is a wee bit iffy: we really only want to bust the
+                 * locks on our stripe, so that we don't end up bouncing
+                 * [0->EOF] locks around on each of the OSTs as the rest of the
+                 * destroys get processed.  Because we're only talking to
+                 * the local LDLM, though, we should only end up locking the 
+                 * whole of our stripe.  When bug 1425 (take all locks on OST
+                 * for stripe 0) is fixed, this code should be revisited. */
+                struct ldlm_extent extent = { 0, OBD_OBJECT_EOF };
+
+                rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+                                      res_id, LDLM_EXTENT, &extent,
+                                      sizeof(extent), LCK_PW, &flags,
+                                      ldlm_completion_ast, filter_blocking_ast,
+                                      NULL, &lockh);
+                /* We only care about the side-effects, just drop the lock. */
+                ldlm_lock_decref(&lockh, LCK_PW);
+        }
+
         rc = vfs_unlink(dparent->d_inode, dchild);
 
         if (rc)
         rc = vfs_unlink(dparent->d_inode, dchild);
 
         if (rc)
@@ -1113,8 +1196,7 @@ static int filter_destroy_internal(struct obd_device *obd,
 */
 static int filter_close_internal(struct obd_export *exp,
                                  struct filter_file_data *ffd,
 */
 static int filter_close_internal(struct obd_export *exp,
                                  struct filter_file_data *ffd,
-                                 struct obd_trans_info *oti,
-                                 int failover)
+                                 struct obd_trans_info *oti, int flags)
 {
         struct obd_device *obd = exp->exp_obd;
         struct filter_obd *filter = &obd->u.filter;
 {
         struct obd_device *obd = exp->exp_obd;
         struct filter_obd *filter = &obd->u.filter;
@@ -1128,13 +1210,14 @@ static int filter_close_internal(struct obd_export *exp,
         ENTRY;
 
         LASSERT(filp->private_data == ffd);
         ENTRY;
 
         LASSERT(filp->private_data == ffd);
-        LASSERT(fdd);
+        LASSERT(fdd != NULL);
         LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
 
         rc = filp_close(filp, 0);
 
         if (atomic_dec_and_test(&fdd->fdd_open_count) &&
         LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
 
         rc = filp_close(filp, 0);
 
         if (atomic_dec_and_test(&fdd->fdd_open_count) &&
-            fdd->fdd_flags & FILTER_FLAG_DESTROY && !failover) {
+            (fdd->fdd_flags & FILTER_FLAG_DESTROY) &&
+            !(flags & OBD_OPT_FAILOVER)) {
                 void *handle;
 
                 push_ctxt(&saved, &filter->fo_ctxt, NULL);
                 void *handle;
 
                 push_ctxt(&saved, &filter->fo_ctxt, NULL);
@@ -1148,15 +1231,27 @@ static int filter_close_internal(struct obd_export *exp,
                 cleanup_phase = 2;
 
                 handle = fsfilt_start(obd, dparent->d_inode,
                 cleanup_phase = 2;
 
                 handle = fsfilt_start(obd, dparent->d_inode,
-                                      FSFILT_OP_UNLINK);
+                                      FSFILT_OP_UNLINK_LOG, oti);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
 
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
 
+                if (oti != NULL) {
+                        if (oti->oti_handle == NULL)
+                                oti->oti_handle = handle;
+                        else
+                                LASSERT(oti->oti_handle == handle);
+                }
+
+#ifdef ENABLE_ORPHANS
+                /* Remove orphan unlink record from log */
+                llog_cancel_records(filter->fo_catalog, 1, &fdd->fdd_cookie);
+#endif
                 /* XXX unlink from PENDING directory now too */
                 /* XXX unlink from PENDING directory now too */
-                rc2 = filter_destroy_internal(obd, dparent, dchild);
+                rc2 = filter_destroy_internal(obd, fdd->fdd_objid, dparent,
+                                              dchild);
                 if (rc2 && !rc)
                         rc = rc2;
                 if (rc2 && !rc)
                         rc = rc2;
-                rc = filter_finish_transno(exp, handle, oti, rc);
+                rc = filter_finish_transno(exp, oti, rc);
                 rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0);
                 if (rc2) {
                         CERROR("error on commit, err = %d\n", rc2);
                 rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0);
                 if (rc2) {
                         CERROR("error on commit, err = %d\n", rc2);
@@ -1189,14 +1284,12 @@ cleanup:
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-/* obd methods */
 /* mount the file system (secretly) */
 /* mount the file system (secretly) */
-static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
-                               char *option)
+int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
+                        char *option)
 {
         struct obd_ioctl_data* data = buf;
         struct filter_obd *filter = &obd->u.filter;
 {
         struct obd_ioctl_data* data = buf;
         struct filter_obd *filter = &obd->u.filter;
-
         struct vfsmount *mnt;
         int rc = 0;
         ENTRY;
         struct vfsmount *mnt;
         int rc = 0;
         ENTRY;
@@ -1208,7 +1301,8 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
         if (IS_ERR(obd->obd_fsops))
                 RETURN(PTR_ERR(obd->obd_fsops));
 
         if (IS_ERR(obd->obd_fsops))
                 RETURN(PTR_ERR(obd->obd_fsops));
 
-        mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, option);
+        mnt = do_kern_mount(data->ioc_inlbuf2, MS_NOATIME | MS_NODIRATIME,
+                            data->ioc_inlbuf1, option);
         rc = PTR_ERR(mnt);
         if (IS_ERR(mnt))
                 GOTO(err_ops, rc);
         rc = PTR_ERR(mnt);
         if (IS_ERR(mnt))
                 GOTO(err_ops, rc);
@@ -1257,14 +1351,27 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
         spin_lock_init(&filter->fo_objidlock);
         INIT_LIST_HEAD(&filter->fo_export_list);
 
         spin_lock_init(&filter->fo_objidlock);
         INIT_LIST_HEAD(&filter->fo_export_list);
 
+        ptlrpc_init_client(MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL,
+                           "filter_mdc", &filter->fo_mdc_client);
+        sema_init(&filter->fo_sem, 1);
+
         obd->obd_namespace = ldlm_namespace_new("filter-tgt",
                                                 LDLM_NAMESPACE_SERVER);
         obd->obd_namespace = ldlm_namespace_new("filter-tgt",
                                                 LDLM_NAMESPACE_SERVER);
-        if (!obd->obd_namespace)
+        if (obd->obd_namespace == NULL)
                 GOTO(err_post, rc = -ENOMEM);
 
         ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
                            "filter_ldlm_cb_client", &obd->obd_ldlm_client);
 
                 GOTO(err_post, rc = -ENOMEM);
 
         ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
                            "filter_ldlm_cb_client", &obd->obd_ldlm_client);
 
+        /* Create a non-replaying connection for recovery logging, so that
+         * we don't create a client entry for this local connection, and do
+         * not log or assign transaction numbers for logging operations. */
+#ifdef ENABLE_ORPHANS
+        filter->fo_catalog = filter_get_catalog(obd);
+        if (IS_ERR(filter->fo_catalog))
+                GOTO(err_post, rc = PTR_ERR(filter->fo_catalog));
+#endif
+
         RETURN(0);
 
 err_post:
         RETURN(0);
 
 err_post:
@@ -1284,82 +1391,67 @@ static int filter_setup(struct obd_device *obd, obd_count len, void *buf)
         struct obd_ioctl_data* data = buf;
         char *option = NULL;
 
         struct obd_ioctl_data* data = buf;
         char *option = NULL;
 
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        /* bug 1577: implement async-delete for 2.5 */
         if (!strcmp(data->ioc_inlbuf2, "ext3"))
                 option = "asyncdel";
         if (!strcmp(data->ioc_inlbuf2, "ext3"))
                 option = "asyncdel";
+#endif
 
         return filter_common_setup(obd, len, buf, option);
 }
 
 
         return filter_common_setup(obd, len, buf, option);
 }
 
-/* sanobd setup methods - use a specific mount option */
-static int filter_san_setup(struct obd_device *obd, obd_count len, void *buf)
-{
-        struct obd_ioctl_data* data = buf;
-        char *option = NULL;
-
-        if (!data->ioc_inlbuf2)
-                RETURN(-EINVAL);
-
-        /* for extN/ext3 filesystem, we must mount it with 'writeback' mode */
-        if (!strcmp(data->ioc_inlbuf2, "extN"))
-                option = "data=writeback";
-        else if (!strcmp(data->ioc_inlbuf2, "ext3"))
-                option = "data=writeback,asyncdel";
-        else
-                LBUG(); /* just a reminder */
-
-        return filter_common_setup(obd, len, buf, option);
-}
-
-static int filter_cleanup(struct obd_device *obd, int force, int failover)
+static int filter_cleanup(struct obd_device *obd, int flags)
 {
 {
-        struct super_block *sb;
+        struct filter_obd *filter = &obd->u.filter;
         ENTRY;
 
         ENTRY;
 
-        if (failover)
+        if (flags & OBD_OPT_FAILOVER)
                 CERROR("%s: shutting down for failover; client state will"
                        " be preserved.\n", obd->obd_name);
 
         if (!list_empty(&obd->obd_exports)) {
                 CERROR("%s: still has clients!\n", obd->obd_name);
                 CERROR("%s: shutting down for failover; client state will"
                        " be preserved.\n", obd->obd_name);
 
         if (!list_empty(&obd->obd_exports)) {
                 CERROR("%s: still has clients!\n", obd->obd_name);
-                class_disconnect_exports(obd, failover);
+                class_disconnect_exports(obd, flags);
                 if (!list_empty(&obd->obd_exports)) {
                         CERROR("still has exports after forced cleanup?\n");
                         RETURN(-EBUSY);
                 }
         }
 
                 if (!list_empty(&obd->obd_exports)) {
                         CERROR("still has exports after forced cleanup?\n");
                         RETURN(-EBUSY);
                 }
         }
 
+#ifdef ENABLE_ORPHANS
+        filter_put_catalog(filter->fo_catalog);
+#endif
+
         ldlm_namespace_free(obd->obd_namespace);
 
         ldlm_namespace_free(obd->obd_namespace);
 
-        sb = obd->u.filter.fo_sb;
-        if (!sb)
+        if (filter->fo_sb == NULL)
                 RETURN(0);
 
         filter_post(obd);
 
                 RETURN(0);
 
         filter_post(obd);
 
-        shrink_dcache_parent(sb->s_root);
-        unlock_kernel();
+        shrink_dcache_parent(filter->fo_sb->s_root);
+        filter->fo_sb = 0;
 
 
-        if (atomic_read(&obd->u.filter.fo_vfsmnt->mnt_count) > 1){
+        if (atomic_read(&filter->fo_vfsmnt->mnt_count) > 1)
                 CERROR("%s: mount point busy, mnt_count: %d\n", obd->obd_name,
                 CERROR("%s: mount point busy, mnt_count: %d\n", obd->obd_name,
-                       atomic_read(&obd->u.filter.fo_vfsmnt->mnt_count));
-        }
-
-        mntput(obd->u.filter.fo_vfsmnt);
-        obd->u.filter.fo_sb = 0;
-/*        destroy_buffers(obd->u.filter.fo_sb->s_dev);*/
+                       atomic_read(&filter->fo_vfsmnt->mnt_count));
 
 
+        unlock_kernel();
+        mntput(filter->fo_vfsmnt);
+        //destroy_buffers(filter->fo_sb->s_dev);
+        filter->fo_sb = NULL;
         fsfilt_put_ops(obd->obd_fsops);
         lock_kernel();
 
         RETURN(0);
 }
 
         fsfilt_put_ops(obd->obd_fsops);
         lock_kernel();
 
         RETURN(0);
 }
 
-int filter_attach(struct obd_device *obd, obd_count len, void *data)
+static int filter_attach(struct obd_device *obd, obd_count len, void *data)
 {
         struct lprocfs_static_vars lvars;
         int rc;
 
 {
         struct lprocfs_static_vars lvars;
         int rc;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(filter, &lvars);
         rc = lprocfs_obd_attach(obd, lvars.obd_vars);
         if (rc != 0)
                 return rc;
         rc = lprocfs_obd_attach(obd, lvars.obd_vars);
         if (rc != 0)
                 return rc;
@@ -1376,7 +1468,7 @@ int filter_attach(struct obd_device *obd, obd_count len, void *data)
         return rc;
 }
 
         return rc;
 }
 
-int filter_detach(struct obd_device *dev)
+static int filter_detach(struct obd_device *dev)
 {
         lprocfs_free_obd_stats(dev);
         return lprocfs_obd_detach(dev);
 {
         lprocfs_free_obd_stats(dev);
         return lprocfs_obd_detach(dev);
@@ -1391,17 +1483,16 @@ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd,
         struct filter_client_data *fcd;
         struct filter_obd *filter = &obd->u.filter;
         int rc;
         struct filter_client_data *fcd;
         struct filter_obd *filter = &obd->u.filter;
         int rc;
-
         ENTRY;
 
         ENTRY;
 
-        if (!conn || !obd || !cluuid)
+        if (conn == NULL || obd == NULL || cluuid == NULL)
                 RETURN(-EINVAL);
 
         rc = class_connect(conn, obd, cluuid);
         if (rc)
                 RETURN(rc);
         exp = class_conn2export(conn);
                 RETURN(-EINVAL);
 
         rc = class_connect(conn, obd, cluuid);
         if (rc)
                 RETURN(rc);
         exp = class_conn2export(conn);
-        LASSERT(exp);
+        LASSERT(exp != NULL);
 
         fed = &exp->exp_filter_data;
         class_export_put(exp);
 
         fed = &exp->exp_filter_data;
         class_export_put(exp);
@@ -1450,37 +1541,37 @@ static void filter_destroy_export(struct obd_export *exp)
                 list_del(&ffd->ffd_export_list);
                 spin_unlock(&fed->fed_lock);
 
                 list_del(&ffd->ffd_export_list);
                 spin_unlock(&fed->fed_lock);
 
-                CERROR("force close file %*s (hdl %p:"LPX64") on disconnect\n",
-                       ffd->ffd_file->f_dentry->d_name.len,
+                CDEBUG(D_INFO, "force close file %*s (hdl %p:"LPX64") on "
+                       "disconnect\n", ffd->ffd_file->f_dentry->d_name.len,
                        ffd->ffd_file->f_dentry->d_name.name,
                        ffd, ffd->ffd_handle.h_cookie);
 
                        ffd->ffd_file->f_dentry->d_name.name,
                        ffd, ffd->ffd_handle.h_cookie);
 
-                filter_close_internal(exp, ffd, NULL, exp->exp_failover);
+                filter_close_internal(exp, ffd, NULL, exp->exp_flags);
                 spin_lock(&fed->fed_lock);
         }
         spin_unlock(&fed->fed_lock);
 
         if (exp->exp_obd->obd_replayable)
                 spin_lock(&fed->fed_lock);
         }
         spin_unlock(&fed->fed_lock);
 
         if (exp->exp_obd->obd_replayable)
-                filter_client_free(exp, exp->exp_failover);
+                filter_client_free(exp, exp->exp_flags);
         EXIT;
 }
 
 /* also incredibly similar to mds_disconnect */
         EXIT;
 }
 
 /* also incredibly similar to mds_disconnect */
-static int filter_disconnect(struct lustre_handle *conn, int failover)
+static int filter_disconnect(struct lustre_handle *conn, int flags)
 {
         struct obd_export *exp = class_conn2export(conn);
 {
         struct obd_export *exp = class_conn2export(conn);
+        unsigned long irqflags;
         int rc;
         int rc;
-        unsigned long flags;
         ENTRY;
 
         LASSERT(exp);
         ldlm_cancel_locks_for_export(exp);
 
         ENTRY;
 
         LASSERT(exp);
         ldlm_cancel_locks_for_export(exp);
 
-        spin_lock_irqsave(&exp->exp_lock, flags);
-        exp->exp_failover = failover;
-        spin_unlock_irqrestore(&exp->exp_lock, flags);
+        spin_lock_irqsave(&exp->exp_lock, irqflags);
+        exp->exp_flags = flags;
+        spin_unlock_irqrestore(&exp->exp_lock, irqflags);
 
 
-        rc = class_disconnect(conn, failover);
+        rc = class_disconnect(conn, flags);
 
         fsfilt_sync(exp->exp_obd, exp->exp_obd->u.filter.fo_sb);
         class_export_put(exp);
 
         fsfilt_sync(exp->exp_obd, exp->exp_obd->u.filter.fo_sb);
         class_export_put(exp);
@@ -1488,29 +1579,8 @@ static int filter_disconnect(struct lustre_handle *conn, int failover)
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-static void filter_from_inode(struct obdo *oa, struct inode *inode, int valid)
-{
-        int type = oa->o_mode & S_IFMT;
-        ENTRY;
-
-        CDEBUG(D_INFO, "src inode %lu (%p), dst obdo "LPU64" valid 0x%08x\n",
-               inode->i_ino, inode, oa->o_id, valid);
-        /* Don't copy the inode number in place of the object ID */
-        obdo_from_inode(oa, inode, valid);
-        oa->o_mode &= ~S_IFMT;
-        oa->o_mode |= type;
-
-        if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
-                obd_rdev rdev = kdev_t_to_nr(inode->i_rdev);
-                oa->o_rdev = rdev;
-                oa->o_valid |= OBD_MD_FLRDEV;
-        }
-
-        EXIT;
-}
-
-static struct dentry *__filter_oa2dentry(struct lustre_handle *conn,
-                                         struct obdo *oa, char *what)
+struct dentry *__filter_oa2dentry(struct obd_device *obd,
+                                  struct obdo *oa, const char *what)
 {
         struct dentry *dchild = NULL;
 
 {
         struct dentry *dchild = NULL;
 
@@ -1525,22 +1595,14 @@ static struct dentry *__filter_oa2dentry(struct lustre_handle *conn,
                         LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
                         filter_ffd_put(ffd);
 
                         LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
                         filter_ffd_put(ffd);
 
-                        CDEBUG(D_INODE,
-                               "got child objid %*s: %p, count = %d\n",
-                               dchild->d_name.len, dchild->d_name.name,
+                        CDEBUG(D_INODE,"%s got child objid %*s: %p, count %d\n",
+                               what, dchild->d_name.len, dchild->d_name.name,
                                dchild, atomic_read(&dchild->d_count));
                 }
         }
 
                                dchild, atomic_read(&dchild->d_count));
                 }
         }
 
-        if (!dchild) {
-                struct obd_device *obd = class_conn2obd(conn);
-
-                if (!obd) {
-                        CERROR("invalid client cookie "LPX64"\n", conn->cookie);
-                        RETURN(ERR_PTR(-EINVAL));
-                }
+        if (!dchild)
                 dchild = filter_fid2dentry(obd, NULL, oa->o_mode, oa->o_id);
                 dchild = filter_fid2dentry(obd, NULL, oa->o_mode, oa->o_id);
-        }
 
         if (IS_ERR(dchild)) {
                 CERROR("%s error looking up object: "LPU64"\n", what, oa->o_id);
 
         if (IS_ERR(dchild)) {
                 CERROR("%s error looking up object: "LPU64"\n", what, oa->o_id);
@@ -1556,20 +1618,27 @@ static struct dentry *__filter_oa2dentry(struct lustre_handle *conn,
         return dchild;
 }
 
         return dchild;
 }
 
-#define filter_oa2dentry(conn, oa) __filter_oa2dentry(conn, oa, __FUNCTION__)
-
 static int filter_getattr(struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md *md)
 {
         struct dentry *dentry = NULL;
 static int filter_getattr(struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md *md)
 {
         struct dentry *dentry = NULL;
+        struct obd_device *obd;
         int rc = 0;
         ENTRY;
 
         int rc = 0;
         ENTRY;
 
-        dentry = filter_oa2dentry(conn, oa);
+        obd = class_conn2obd(conn);
+        if (obd == NULL) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
+                RETURN(-EINVAL);
+        }
+
+        dentry = filter_oa2dentry(obd, oa);
         if (IS_ERR(dentry))
                 RETURN(PTR_ERR(dentry));
 
         if (IS_ERR(dentry))
                 RETURN(PTR_ERR(dentry));
 
-        filter_from_inode(oa, dentry->d_inode, oa->o_valid);
+        /* Limit the valid bits in the return data to what we actually use */
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS);
 
         f_dput(dentry);
         RETURN(rc);
 
         f_dput(dentry);
         RETURN(rc);
@@ -1580,48 +1649,55 @@ static int filter_setattr(struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md *md, struct obd_trans_info *oti)
 {
         struct obd_run_ctxt saved;
                           struct lov_stripe_md *md, struct obd_trans_info *oti)
 {
         struct obd_run_ctxt saved;
-        struct obd_export *export = class_conn2export(conn);
-        struct obd_device *obd = class_conn2obd(conn);
-        struct filter_obd *filter = &obd->u.filter;
+        struct obd_export *exp;
+        struct filter_obd *filter;
         struct dentry *dentry;
         struct iattr iattr;
         struct dentry *dentry;
         struct iattr iattr;
-        struct inode *inode;
-        void * handle;
+        void *handle;
         int rc, rc2;
         ENTRY;
 
         int rc, rc2;
         ENTRY;
 
-        dentry = filter_oa2dentry(conn, oa);
+        LASSERT(oti != NULL);
+        exp = class_conn2export(conn);
+        if (!exp) {
+                CERROR("invalid client cookie "LPX64"\n", conn->cookie);
+                RETURN(-EINVAL);
+        }
 
 
+        dentry = filter_oa2dentry(exp->exp_obd, oa);
         if (IS_ERR(dentry))
                 GOTO(out_exp, rc = PTR_ERR(dentry));
 
         if (IS_ERR(dentry))
                 GOTO(out_exp, rc = PTR_ERR(dentry));
 
+        filter = &exp->exp_obd->u.filter;
+
         iattr_from_obdo(&iattr, oa, oa->o_valid);
         iattr_from_obdo(&iattr, oa, oa->o_valid);
-        iattr.ia_mode = (iattr.ia_mode & ~S_IFMT) | S_IFREG;
-        inode = dentry->d_inode;
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
         lock_kernel();
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
         lock_kernel();
+
+        /* XXX this could be a rwsem instead, if filter_preprw played along */
         if (iattr.ia_valid & ATTR_SIZE)
         if (iattr.ia_valid & ATTR_SIZE)
-                down(&inode->i_sem);
+                down(&dentry->d_inode->i_sem);
 
 
-        handle = fsfilt_start(obd, dentry->d_inode, FSFILT_OP_SETATTR);
+        handle = fsfilt_start(exp->exp_obd, dentry->d_inode, FSFILT_OP_SETATTR,
+                              oti);
         if (IS_ERR(handle))
                 GOTO(out_unlock, rc = PTR_ERR(handle));
 
         if (IS_ERR(handle))
                 GOTO(out_unlock, rc = PTR_ERR(handle));
 
-        rc = fsfilt_setattr(obd, dentry, handle, &iattr, 1);
-        rc = filter_finish_transno(export, handle, oti, rc);
-        rc2 = fsfilt_commit(obd, dentry->d_inode, handle, 0);
+        rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1);
+        rc = filter_finish_transno(exp, oti, rc);
+        rc2 = fsfilt_commit(exp->exp_obd, dentry->d_inode, handle, 0);
         if (rc2) {
                 CERROR("error on commit, err = %d\n", rc2);
                 if (!rc)
                         rc = rc2;
         }
 
         if (rc2) {
                 CERROR("error on commit, err = %d\n", rc2);
                 if (!rc)
                         rc = rc2;
         }
 
-        if (iattr.ia_valid & ATTR_SIZE) {
-                up(&inode->i_sem);
-                oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLCTIME | OBD_MD_FLMTIME;
-                obdo_from_inode(oa, inode, oa->o_valid);
-        }
+        if (iattr.ia_valid & ATTR_SIZE)
+                up(&dentry->d_inode->i_sem);
+
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS);
 
 out_unlock:
         unlock_kernel();
 
 out_unlock:
         unlock_kernel();
@@ -1629,7 +1705,7 @@ out_unlock:
 
         f_dput(dentry);
  out_exp:
 
         f_dput(dentry);
  out_exp:
-        class_export_put(export);
+        class_export_put(exp);
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
@@ -1637,7 +1713,7 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa,
                        struct lov_stripe_md *ea, struct obd_trans_info *oti,
                        struct obd_client_handle *och)
 {
                        struct lov_stripe_md *ea, struct obd_trans_info *oti,
                        struct obd_client_handle *och)
 {
-        struct obd_export *export = NULL;
+        struct obd_export *exp;
         struct lustre_handle *handle;
         struct filter_file_data *ffd;
         struct file *filp;
         struct lustre_handle *handle;
         struct filter_file_data *ffd;
         struct file *filp;
@@ -1645,19 +1721,19 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa,
         int rc = 0;
         ENTRY;
 
         int rc = 0;
         ENTRY;
 
-        export = class_conn2export(conn);
-        if (!export) {
-                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
-                       conn->cookie);
-                GOTO(out, rc = -EINVAL);
+        exp = class_conn2export(conn);
+        if (exp == NULL) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
+                RETURN(-EINVAL);
         }
 
         }
 
-        filp = filter_obj_open(export, oa->o_id, oa->o_mode,
+        filp = filter_obj_open(exp, oti, oa->o_id, oa->o_mode,
                                LCK_PR, &parent_lockh);
         if (IS_ERR(filp))
                 GOTO(out, rc = PTR_ERR(filp));
 
                                LCK_PR, &parent_lockh);
         if (IS_ERR(filp))
                 GOTO(out, rc = PTR_ERR(filp));
 
-        filter_from_inode(oa, filp->f_dentry->d_inode, oa->o_valid);
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa, filp->f_dentry->d_inode, FILTER_VALID_FLAGS);
 
         ffd = filp->private_data;
         handle = obdo_handle(oa);
 
         ffd = filp->private_data;
         handle = obdo_handle(oa);
@@ -1665,7 +1741,7 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa,
         oa->o_valid |= OBD_MD_FLHANDLE;
 
 out:
         oa->o_valid |= OBD_MD_FLHANDLE;
 
 out:
-        class_export_put(export);
+        class_export_put(exp);
         if (!rc) {
                 memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh,
                        sizeof(parent_lockh));
         if (!rc) {
                 memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh,
                        sizeof(parent_lockh));
@@ -1677,15 +1753,16 @@ out:
 static int filter_close(struct lustre_handle *conn, struct obdo *oa,
                         struct lov_stripe_md *ea, struct obd_trans_info *oti)
 {
 static int filter_close(struct lustre_handle *conn, struct obdo *oa,
                         struct lov_stripe_md *ea, struct obd_trans_info *oti)
 {
-        struct obd_export *exp = class_conn2export(conn);
+        struct obd_export *exp;
         struct filter_file_data *ffd;
         struct filter_export_data *fed;
         int rc;
         ENTRY;
 
         struct filter_file_data *ffd;
         struct filter_export_data *fed;
         int rc;
         ENTRY;
 
-        if (!exp) {
-                CDEBUG(D_IOCTL, "invalid client cookie"LPX64"\n", conn->cookie);
-                GOTO(out, rc = -EINVAL);
+        exp = class_conn2export(conn);
+        if (exp == NULL) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
+                RETURN(-EINVAL);
         }
 
         if (!(oa->o_valid & OBD_MD_FLHANDLE)) {
         }
 
         if (!(oa->o_valid & OBD_MD_FLHANDLE)) {
@@ -1705,6 +1782,9 @@ static int filter_close(struct lustre_handle *conn, struct obdo *oa,
         list_del(&ffd->ffd_export_list);
         spin_unlock(&fed->fed_lock);
 
         list_del(&ffd->ffd_export_list);
         spin_unlock(&fed->fed_lock);
 
+        oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oa,ffd->ffd_file->f_dentry->d_inode,FILTER_VALID_FLAGS);
+
         rc = filter_close_internal(exp, ffd, oti, 0);
         filter_ffd_put(ffd);
         GOTO(out, rc);
         rc = filter_close_internal(exp, ffd, oti, 0);
         filter_ffd_put(ffd);
         GOTO(out, rc);
@@ -1717,24 +1797,25 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa,
                          struct lov_stripe_md **ea, struct obd_trans_info *oti)
 {
         struct obd_export *exp;
                          struct lov_stripe_md **ea, struct obd_trans_info *oti)
 {
         struct obd_export *exp;
-        struct obd_device *obd = class_conn2obd(conn);
-        struct filter_obd *filter = &obd->u.filter;
+        struct obd_device *obd;
+        struct filter_obd *filter;
         struct obd_run_ctxt saved;
         struct lustre_handle parent_lockh;
         struct dentry *dparent;
         struct obd_run_ctxt saved;
         struct lustre_handle parent_lockh;
         struct dentry *dparent;
+        struct ll_fid mds_fid = { .id = 0 };
         struct dentry *dchild = NULL;
         struct dentry *dchild = NULL;
-        struct iattr;
         void *handle;
         int err, rc, cleanup_phase;
         ENTRY;
 
         void *handle;
         int err, rc, cleanup_phase;
         ENTRY;
 
-        if (!obd) {
-                CERROR("invalid client cookie "LPX64"\n", conn->cookie);
+        exp = class_conn2export(conn);
+        if (exp == NULL) {
+                CDEBUG(D_IOCTL,"invalid client cookie "LPX64"\n", conn->cookie);
                 RETURN(-EINVAL);
         }
 
                 RETURN(-EINVAL);
         }
 
-        exp = class_conn2export(conn);
-
+        obd = exp->exp_obd;
+        filter = &obd->u.filter;
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
  retry:
         oa->o_id = filter_next_id(filter);
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
  retry:
         oa->o_id = filter_next_id(filter);
@@ -1760,21 +1841,42 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa,
         }
 
         cleanup_phase = 2;
         }
 
         cleanup_phase = 2;
-        handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_CREATE);
+        handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_CREATE_LOG, oti);
         if (IS_ERR(handle))
                 GOTO(cleanup, rc = PTR_ERR(handle));
 
         rc = vfs_create(dparent->d_inode, dchild, oa->o_mode);
         if (IS_ERR(handle))
                 GOTO(cleanup, rc = PTR_ERR(handle));
 
         rc = vfs_create(dparent->d_inode, dchild, oa->o_mode);
-        if (rc)
+        if (rc) {
                 CERROR("create failed rc = %d\n", rc);
                 CERROR("create failed rc = %d\n", rc);
+        } else if (oa->o_valid & (OBD_MD_FLCTIME|OBD_MD_FLMTIME|OBD_MD_FLSIZE)){
+                struct iattr attr;
 
 
-        rc = filter_finish_transno(exp, handle, oti, rc);
-        err = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd);
-        if (err) {
-                CERROR("unable to write lastobjid but file created\n");
-                if (!rc)
-                        rc = err;
+                iattr_from_obdo(&attr, oa, oa->o_valid);
+                rc = fsfilt_setattr(obd, dchild, handle, &attr, 1);
+                if (rc)
+                        CERROR("create setattr failed rc = %d\n", rc);
         }
         }
+        rc = filter_finish_transno(exp, oti, rc);
+        err = filter_update_server_data(obd, filter->fo_rcvd_filp,
+                                        filter->fo_fsd);
+        if (err)
+                CERROR("unable to write lastobjid but file created\n");
+
+        /* Set flags for fields we have set in the inode struct */
+        if (!rc && mds_fid.id && (oa->o_valid & OBD_MD_FLCOOKIE)) {
+                err = filter_log_op_create(obd->u.filter.fo_catalog, &mds_fid,
+                                           dchild->d_inode->i_ino,
+                                           dchild->d_inode->i_generation,
+                                           oti->oti_logcookies);
+                if (err) {
+                        CERROR("error logging create record: rc %d\n", err);
+                        oa->o_valid = OBD_MD_FLID;
+                } else {
+                        oa->o_valid = OBD_MD_FLID | OBD_MD_FLCOOKIE;
+                }
+        } else
+                oa->o_valid = OBD_MD_FLID;
+
         err = fsfilt_commit(obd, dparent->d_inode, handle, 0);
         if (err) {
                 CERROR("error on commit, err = %d\n", err);
         err = fsfilt_commit(obd, dparent->d_inode, handle, 0);
         if (err) {
                 CERROR("error on commit, err = %d\n", err);
@@ -1786,9 +1888,7 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa,
                 GOTO(cleanup, rc);
 
         /* Set flags for fields we have set in the inode struct */
                 GOTO(cleanup, rc);
 
         /* Set flags for fields we have set in the inode struct */
-        oa->o_valid = OBD_MD_FLID | OBD_MD_FLBLKSZ | OBD_MD_FLBLOCKS |
-                 OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME;
-        filter_from_inode(oa, dchild->d_inode, oa->o_valid);
+        obdo_from_inode(oa, dchild->d_inode, FILTER_VALID_FLAGS);
 
         EXIT;
 cleanup:
 
         EXIT;
 cleanup:
@@ -1819,24 +1919,25 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md *ea, struct obd_trans_info *oti)
 {
         struct obd_export *exp;
                           struct lov_stripe_md *ea, struct obd_trans_info *oti)
 {
         struct obd_export *exp;
-        struct obd_device *obd = class_conn2obd(conn);
-        struct filter_obd *filter = &obd->u.filter;
-        struct dentry *dparent, *dchild = NULL;
+        struct obd_device *obd;
+        struct filter_obd *filter;
+        struct dentry *dchild = NULL, *dparent = NULL;
         struct filter_dentry_data *fdd;
         struct obd_run_ctxt saved;
         void *handle = NULL;
         struct lustre_handle parent_lockh;
         struct filter_dentry_data *fdd;
         struct obd_run_ctxt saved;
         void *handle = NULL;
         struct lustre_handle parent_lockh;
+        struct llog_cookie *fcc = NULL;
         int rc, rc2, cleanup_phase = 0;
         ENTRY;
 
         int rc, rc2, cleanup_phase = 0;
         ENTRY;
 
-        if (!obd) {
-                CERROR("invalid client cookie "LPX64"\n", conn->cookie);
+        exp = class_conn2export(conn);
+        if (exp == NULL) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
                 RETURN(-EINVAL);
         }
 
                 RETURN(-EINVAL);
         }
 
-        exp = class_conn2export(conn);
-
-        CDEBUG(D_INODE, "destroying objid "LPU64"\n", oa->o_id);
+        obd = exp->exp_obd;
+        filter = &obd->u.filter;
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
         dparent = filter_parent_lock(obd, oa->o_mode, oa->o_id,
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
         dparent = filter_parent_lock(obd, oa->o_mode, oa->o_id,
@@ -1850,38 +1951,53 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
                 GOTO(cleanup, rc = -ENOENT);
         cleanup_phase = 2;
 
                 GOTO(cleanup, rc = -ENOENT);
         cleanup_phase = 2;
 
-        if (!dchild->d_inode) {
+        if (dchild->d_inode == NULL) {
                 CERROR("destroying non-existent object "LPU64"\n", oa->o_id);
                 GOTO(cleanup, rc = -ENOENT);
         }
                 CERROR("destroying non-existent object "LPU64"\n", oa->o_id);
                 GOTO(cleanup, rc = -ENOENT);
         }
-
-        handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK);
+        handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK_LOG, oti);
         if (IS_ERR(handle))
                 GOTO(cleanup, rc = PTR_ERR(handle));
         cleanup_phase = 3;
 
         fdd = dchild->d_fsdata;
         if (IS_ERR(handle))
                 GOTO(cleanup, rc = PTR_ERR(handle));
         cleanup_phase = 3;
 
         fdd = dchild->d_fsdata;
-        if (fdd && atomic_read(&fdd->fdd_open_count)) {
-                LASSERT(fdd->fdd_magic = FILTER_DENTRY_MAGIC);
+
+        /* Our MDC connection is established by the MDS to us */
+        if ((oa->o_valid & OBD_MD_FLCOOKIE) && filter->fo_mdc_imp != NULL) {
+                OBD_ALLOC(fcc, sizeof(*fcc));
+                if (fcc != NULL)
+                        memcpy(fcc, obdo_logcookie(oa), sizeof(*fcc));
+        }
+
+        if (fdd != NULL && atomic_read(&fdd->fdd_open_count)) {
+                LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
                 if (!(fdd->fdd_flags & FILTER_FLAG_DESTROY)) {
                         fdd->fdd_flags |= FILTER_FLAG_DESTROY;
                 if (!(fdd->fdd_flags & FILTER_FLAG_DESTROY)) {
                         fdd->fdd_flags |= FILTER_FLAG_DESTROY;
-                        /* XXX put into PENDING directory in case of crash */
+
+#ifdef ENABLE_ORPHANS
+                        filter_log_op_orphan(filter->fo_catalog, oa->o_id,
+                                             oa->o_generation,&fdd->fdd_cookie);
+#endif
                         CDEBUG(D_INODE,
                                "defer destroy of %dx open objid "LPU64"\n",
                                atomic_read(&fdd->fdd_open_count), oa->o_id);
                         CDEBUG(D_INODE,
                                "defer destroy of %dx open objid "LPU64"\n",
                                atomic_read(&fdd->fdd_open_count), oa->o_id);
-                } else
+                } else {
                         CDEBUG(D_INODE,
                                "repeat destroy of %dx open objid "LPU64"\n",
                                atomic_read(&fdd->fdd_open_count), oa->o_id);
                         CDEBUG(D_INODE,
                                "repeat destroy of %dx open objid "LPU64"\n",
                                atomic_read(&fdd->fdd_open_count), oa->o_id);
+                }
                 GOTO(cleanup, rc = 0);
         }
 
                 GOTO(cleanup, rc = 0);
         }
 
-        rc = filter_destroy_internal(obd, dparent, dchild);
+        rc = filter_destroy_internal(obd, oa->o_id, dparent, dchild);
 
 cleanup:
         switch(cleanup_phase) {
         case 3:
 
 cleanup:
         switch(cleanup_phase) {
         case 3:
-                rc = filter_finish_transno(exp, handle, oti, rc);
+                if (fcc != NULL)
+                        fsfilt_set_last_rcvd(obd, 0, oti->oti_handle,
+                                             filter_cancel_cookies_cb, fcc);
+                rc = filter_finish_transno(exp, oti, rc);
                 rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0);
                 if (rc2) {
                         CERROR("error on commit, err = %d\n", rc2);
                 rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0);
                 if (rc2) {
                         CERROR("error on commit, err = %d\n", rc2);
@@ -1930,742 +2046,17 @@ static int filter_truncate(struct lustre_handle *conn, struct obdo *oa,
         RETURN(error);
 }
 
         RETURN(error);
 }
 
-static inline void lustre_put_page(struct page *page)
-{
-        page_cache_release(page);
-}
-
-static int filter_start_page_read(struct inode *inode, struct niobuf_local *lnb)
-{
-        struct address_space *mapping = inode->i_mapping;
-        struct page *page;
-        unsigned long index = lnb->offset >> PAGE_SHIFT;
-        int rc;
-
-        page = grab_cache_page(mapping, index); /* locked page */
-        if (IS_ERR(page))
-                return lnb->rc = PTR_ERR(page);
-
-        lnb->page = page;
-
-        if (inode->i_size < lnb->offset + lnb->len - 1)
-                lnb->rc = inode->i_size - lnb->offset;
-        else
-                lnb->rc = lnb->len;
-
-        if (PageUptodate(page)) {
-                unlock_page(page);
-                return 0;
-        }
-
-        rc = mapping->a_ops->readpage(NULL, page);
-        if (rc < 0) {
-                CERROR("page index %lu, rc = %d\n", index, rc);
-                lnb->page = NULL;
-                lustre_put_page(page);
-                return lnb->rc = rc;
-        }
-
-        return 0;
-}
-
-static int filter_finish_page_read(struct niobuf_local *lnb)
-{
-        if (lnb->page == NULL)
-                return 0;
-
-        if (PageUptodate(lnb->page))
-                return 0;
-
-        wait_on_page(lnb->page);
-        if (!PageUptodate(lnb->page)) {
-                CERROR("page index %lu/offset "LPX64" not uptodate\n",
-                       lnb->page->index, lnb->offset);
-                GOTO(err_page, lnb->rc = -EIO);
-        }
-        if (PageError(lnb->page)) {
-                CERROR("page index %lu/offset "LPX64" has error\n",
-                       lnb->page->index, lnb->offset);
-                GOTO(err_page, lnb->rc = -EIO);
-        }
-
-        return 0;
-
-err_page:
-        lustre_put_page(lnb->page);
-        lnb->page = NULL;
-        return lnb->rc;
-}
-
-static struct page *lustre_get_page_write(struct inode *inode,
-                                          unsigned long index)
-{
-        struct address_space *mapping = inode->i_mapping;
-        struct page *page;
-        int rc;
-
-        page = grab_cache_page(mapping, index); /* locked page */
-
-        if (!IS_ERR(page)) {
-                /* Note: Called with "O" and "PAGE_SIZE" this is essentially
-                 * a no-op for most filesystems, because we write the whole
-                 * page.  For partial-page I/O this will read in the page.
-                 */
-                rc = mapping->a_ops->prepare_write(NULL, page, 0, PAGE_SIZE);
-                if (rc) {
-                        CERROR("page index %lu, rc = %d\n", index, rc);
-                        if (rc != -ENOSPC)
-                                LBUG();
-                        GOTO(err_unlock, rc);
-                }
-                /* XXX not sure if we need this if we are overwriting page */
-                if (PageError(page)) {
-                        CERROR("error on page index %lu, rc = %d\n", index, rc);
-                        LBUG();
-                        GOTO(err_unlock, rc = -EIO);
-                }
-        }
-        return page;
-
-err_unlock:
-        unlock_page(page);
-        lustre_put_page(page);
-        return ERR_PTR(rc);
-}
-
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-int waitfor_one_page(struct page *page)
-{
-        wait_on_page_locked(page);
-        return 0;
-}
-#endif
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-/* We should only change the file mtime (and not the ctime, like
- * update_inode_times() in generic_file_write()) when we only change data.
- */
-static inline void inode_update_time(struct inode *inode, int ctime_too)
-{
-        time_t now = CURRENT_TIME;
-        if (inode->i_mtime == now && (!ctime_too || inode->i_ctime == now))
-                return;
-        inode->i_mtime = now;
-        if (ctime_too)
-                inode->i_ctime = now;
-        mark_inode_dirty_sync(inode);
-}
-#endif
-
-static int lustre_commit_write(struct niobuf_local *lnb)
-{
-        struct page *page = lnb->page;
-        unsigned from = lnb->offset & ~PAGE_MASK;
-        unsigned to = from + lnb->len;
-        struct inode *inode = page->mapping->host;
-        int err;
-
-        LASSERT(to <= PAGE_SIZE);
-        err = page->mapping->a_ops->commit_write(NULL, page, from, to);
-        if (!err && IS_SYNC(inode))
-                err = waitfor_one_page(page);
-        //SetPageUptodate(page); // the client commit_write will do this
-
-        SetPageReferenced(page);
-        unlock_page(page);
-        lustre_put_page(page);
-        return err;
-}
-
-int filter_get_page_write(struct inode *inode, struct niobuf_local *lnb,
-                          int *pglocked)
-{
-        unsigned long index = lnb->offset >> PAGE_SHIFT;
-        struct address_space *mapping = inode->i_mapping;
-        struct page *page;
-        int rc;
-
-        //ASSERT_PAGE_INDEX(index, GOTO(err, rc = -EINVAL));
-        if (*pglocked)
-                page = grab_cache_page_nowait(mapping, index); /* locked page */
-        else
-                page = grab_cache_page(mapping, index); /* locked page */
-
-
-        /* This page is currently locked, so get a temporary page instead. */
-        if (!page) {
-                CDEBUG(D_ERROR,"ino %lu page %ld locked\n", inode->i_ino,index);
-                page = alloc_pages(GFP_KERNEL, 0); /* locked page */
-                if (!page) {
-                        CERROR("no memory for a temp page\n");
-                        GOTO(err, rc = -ENOMEM);
-                }
-                page->index = index;
-                lnb->page = page;
-                lnb->flags |= N_LOCAL_TEMP_PAGE;
-        } else if (!IS_ERR(page)) {
-                (*pglocked)++;
-
-                rc = mapping->a_ops->prepare_write(NULL, page,
-                                                   lnb->offset & ~PAGE_MASK,
-                                                   lnb->len);
-                if (rc) {
-                        if (rc != -ENOSPC)
-                                CERROR("page index %lu, rc = %d\n", index, rc);
-                        GOTO(err_unlock, rc);
-                }
-                /* XXX not sure if we need this if we are overwriting page */
-                if (PageError(page)) {
-                        CERROR("error on page index %lu, rc = %d\n", index, rc);
-                        LBUG();
-                        GOTO(err_unlock, rc = -EIO);
-                }
-                lnb->page = page;
-        }
-
-        return 0;
-
-err_unlock:
-        unlock_page(page);
-        lustre_put_page(page);
-err:
-        return lnb->rc = rc;
-}
-
-/*
- * We need to balance prepare_write() calls with commit_write() calls.
- * If the page has been prepared, but we have no data for it, we don't
- * want to overwrite valid data on disk, but we still need to zero out
- * data for space which was newly allocated.  Like part of what happens
- * in __block_prepare_write() for newly allocated blocks.
- *
- * XXX currently __block_prepare_write() creates buffers for all the
- *     pages, and the filesystems mark these buffers as BH_New if they
- *     were newly allocated from disk. We use the BH_New flag similarly.
- */
-static int filter_commit_write(struct niobuf_local *lnb, int err)
-{
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        if (err) {
-                unsigned block_start, block_end;
-                struct buffer_head *bh, *head = lnb->page->buffers;
-                unsigned blocksize = head->b_size;
-
-                /* debugging: just seeing if this ever happens */
-                CDEBUG(err == -ENOSPC ? D_INODE : D_ERROR,
-                       "called for ino %lu:%lu on err %d\n",
-                       lnb->page->mapping->host->i_ino, lnb->page->index, err);
-
-                /* Currently one buffer per page, but in the future... */
-                for (bh = head, block_start = 0; bh != head || !block_start;
-                     block_start = block_end, bh = bh->b_this_page) {
-                        block_end = block_start + blocksize;
-                        if (buffer_new(bh)) {
-                                memset(kmap(lnb->page) + block_start, 0,
-                                       blocksize);
-                                kunmap(lnb->page);
-                        }
-                }
-        }
-#endif
-        return lustre_commit_write(lnb);
-}
-
-static int filter_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
-                         int objcount, struct obd_ioobj *obj,
-                         int niocount, struct niobuf_remote *nb,
-                         struct niobuf_local *res, void **desc_private,
-                         struct obd_trans_info *oti)
-{
-        struct obd_run_ctxt saved;
-        struct obd_device *obd;
-        struct obd_ioobj *o;
-        struct niobuf_remote *rnb;
-        struct niobuf_local *lnb;
-        struct fsfilt_objinfo *fso;
-        struct dentry *dentry;
-        struct inode *inode;
-        int pglocked = 0, rc = 0, i, j, tot_bytes = 0;
-        unsigned long now = jiffies;
-        ENTRY;
-
-        memset(res, 0, niocount * sizeof(*res));
-
-        obd = exp->exp_obd;
-        if (obd == NULL)
-                RETURN(-EINVAL);
-
-        // theoretically we support multi-obj BRW RPCs, but until then...
-        LASSERT(objcount == 1);
-
-        OBD_ALLOC(fso, objcount * sizeof(*fso));
-        if (!fso)
-                RETURN(-ENOMEM);
-
-        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-
-        for (i = 0, o = obj; i < objcount; i++, o++) {
-                struct filter_dentry_data *fdd;
-
-                LASSERT(o->ioo_bufcnt);
-
-                dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id);
-
-                if (IS_ERR(dentry))
-                        GOTO(out_objinfo, rc = PTR_ERR(dentry));
-
-                fso[i].fso_dentry = dentry;
-                fso[i].fso_bufcnt = o->ioo_bufcnt;
-
-                if (!dentry->d_inode) {
-                        CERROR("trying to BRW to non-existent file "LPU64"\n",
-                               o->ioo_id);
-                        f_dput(dentry);
-                        GOTO(out_objinfo, rc = -ENOENT);
-                }
-
-                /* If we ever start to support mutli-object BRW RPCs, we will
-                 * need to get locks on mulitple inodes (in order) or use the
-                 * DLM to do the locking for us (and use the same locking in
-                 * filter_setattr() for truncate).  That isn't all, because
-                 * there still exists the possibility of a truncate starting
-                 * a new transaction while holding the ext3 rwsem = write
-                 * while some writes (which have started their transactions
-                 * here) blocking on the ext3 rwsem = read => lock inversion.
-                 *
-                 * The handling gets very ugly when dealing with locked pages.
-                 * It may be easier to just get rid of the locked page code
-                 * (which has problems of its own) and either discover we do
-                 * not need it anymore (i.e. it was a symptom of another bug)
-                 * or ensure we get the page locks in an appropriate order.
-                 */
-                if (cmd & OBD_BRW_WRITE)
-                        down(&dentry->d_inode->i_sem);
-                fdd = dentry->d_fsdata;
-                if (!fdd || !atomic_read(&fdd->fdd_open_count))
-                        CDEBUG(D_PAGE, "I/O to unopened object "LPU64"\n",
-                               o->ioo_id);
-        }
-
-        if (time_after(jiffies, now + 15*HZ))
-                CERROR("slow prep setup %lus\n", (jiffies - now) / HZ);
-
-        if (cmd & OBD_BRW_WRITE) {
-                *desc_private = fsfilt_brw_start(obd, objcount, fso,
-                                                 niocount, nb);
-                if (IS_ERR(*desc_private)) {
-                        rc = PTR_ERR(*desc_private);
-                        CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
-                               "error starting transaction: rc = %d\n", rc);
-                        *desc_private = NULL;
-                        GOTO(out_objinfo, rc);
-                }
-        }
-
-        for (i = 0, o = obj, rnb = nb, lnb = res; i < objcount; i++, o++) {
-                dentry = fso[i].fso_dentry;
-                inode = dentry->d_inode;
-
-                for (j = 0; j < o->ioo_bufcnt; j++, rnb++, lnb++) {
-                        if (j == 0)
-                                lnb->dentry = dentry;
-                        else
-                                lnb->dentry = dget(dentry);
-
-                        lnb->offset = rnb->offset;
-                        lnb->len    = rnb->len;
-                        lnb->flags  = rnb->flags;
-                        lnb->start  = jiffies;
-
-                        if (cmd & OBD_BRW_WRITE) {
-                                rc = filter_get_page_write(inode,lnb,&pglocked);
-                                if (rc)
-                                        up(&dentry->d_inode->i_sem);
-                        } else if (inode->i_size <= rnb->offset) {
-                                /* If there's no more data, abort early.
-                                 * lnb->page == NULL and lnb->rc == 0, so it's
-                                 * easy to detect later. */
-                                f_dput(dentry);
-                                lnb->dentry = NULL;
-                                break;
-                        } else {
-                                rc = filter_start_page_read(inode, lnb);
-                        }
-
-                        if (rc) {
-                                CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
-                                       "page err %u@"LPU64" %u/%u %p: rc %d\n",
-                                       lnb->len, lnb->offset, j, o->ioo_bufcnt,
-                                       dentry, rc);
-                                f_dput(dentry);
-                                GOTO(out_pages, rc);
-                        }
-
-                        tot_bytes += lnb->len;
-
-                        if ((cmd & OBD_BRW_READ) && lnb->rc < lnb->len) {
-                                /* Likewise with a partial read */
-                                break;
-                        }
-                }
-        }
-
-        if (time_after(jiffies, now + 15*HZ))
-                CERROR("slow prep get page %lus\n", (jiffies - now) / HZ);
-
-        if (cmd & OBD_BRW_READ) {
-                lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES,
-                                    tot_bytes);
-                while (lnb-- > res) {
-                        rc = filter_finish_page_read(lnb);
-                        if (rc) {
-                                CERROR("error page %u@"LPU64" %u %p: rc %d\n",
-                                       lnb->len, lnb->offset, lnb - res,
-                                       lnb->dentry, rc);
-                                f_dput(lnb->dentry);
-                                GOTO(out_pages, rc);
-                        }
-                }
-        } else
-                lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_WRITE_BYTES,
-                                    tot_bytes);
-
-        if (time_after(jiffies, now + 15*HZ))
-                CERROR("slow prep finish page %lus\n", (jiffies - now) / HZ);
-
-        EXIT;
-out:
-        OBD_FREE(fso, objcount * sizeof(*fso));
-        current->journal_info = NULL;
-        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-        return rc;
-
-out_pages:
-        while (lnb-- > res) {
-                if (cmd & OBD_BRW_WRITE) {
-                        filter_commit_write(lnb, rc);
-                        up(&lnb->dentry->d_inode->i_sem);
-                } else {
-                        lustre_put_page(lnb->page);
-                }
-                f_dput(lnb->dentry);
-        }
-        if (cmd & OBD_BRW_WRITE) {
-                filter_finish_transno(exp, *desc_private, oti, rc);
-                fsfilt_commit(obd,
-                              filter_parent(obd,S_IFREG,obj->ioo_id)->d_inode,
-                              *desc_private, 0);
-        }
-        goto out; /* dropped the dentry refs already (one per page) */
-
-out_objinfo:
-        for (i = 0; i < objcount && fso[i].fso_dentry; i++) {
-                if (cmd & OBD_BRW_WRITE)
-                        up(&fso[i].fso_dentry->d_inode->i_sem);
-                f_dput(fso[i].fso_dentry);
-        }
-        goto out;
-}
-
-static int filter_write_locked_page(struct niobuf_local *lnb)
-{
-        struct page *lpage;
-        void        *lpage_addr;
-        void        *lnb_addr;
-        int rc;
-        ENTRY;
-
-        lpage = lustre_get_page_write(lnb->dentry->d_inode, lnb->page->index);
-        if (IS_ERR(lpage)) {
-                /* It is highly unlikely that we would ever get an error here.
-                 * The page we want to get was previously locked, so it had to
-                 * have already allocated the space, and we were just writing
-                 * over the same data, so there would be no hole in the file.
-                 *
-                 * XXX: possibility of a race with truncate could exist, need
-                 *      to check that.  There are no guarantees w.r.t.
-                 *      write order even on a local filesystem, although the
-                 *      normal response would be to return the number of bytes
-                 *      successfully written and leave the rest to the app.
-                 */
-                rc = PTR_ERR(lpage);
-                CERROR("error getting locked page index %ld: rc = %d\n",
-                       lnb->page->index, rc);
-                LBUG();
-                lustre_commit_write(lnb);
-                RETURN(rc);
-        }
-
-        /* 2 kmaps == vanishingly small deadlock opportunity */
-        lpage_addr = kmap(lpage);
-        lnb_addr = kmap(lnb->page);
-
-        memcpy(lpage_addr, lnb_addr, PAGE_SIZE);
-
-        kunmap(lnb->page);
-        kunmap(lpage);
-
-        lustre_put_page(lnb->page);
-
-        lnb->page = lpage;
-        rc = lustre_commit_write(lnb);
-        if (rc)
-                CERROR("error committing locked page %ld: rc = %d\n",
-                       lnb->page->index, rc);
-
-        RETURN(rc);
-}
-
 static int filter_syncfs(struct obd_export *exp)
 {
 static int filter_syncfs(struct obd_export *exp)
 {
-        struct obd_device *obd = exp->exp_obd;
         ENTRY;
 
         ENTRY;
 
-        RETURN(fsfilt_sync(obd, obd->u.filter.fo_sb));
-}
-
-static int filter_commitrw(int cmd, struct obd_export *exp,
-                           int objcount, struct obd_ioobj *obj,
-                           int niocount, struct niobuf_local *res,
-                           void *desc_private, struct obd_trans_info *oti)
-{
-        struct obd_run_ctxt saved;
-        struct obd_ioobj *o;
-        struct niobuf_local *lnb;
-        struct obd_device *obd = exp->exp_obd;
-        int found_locked = 0, rc = 0, i;
-        unsigned long now = jiffies;  /* DEBUGGING OST TIMEOUTS */
-        ENTRY;
-
-        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-
-        LASSERT(!current->journal_info);
-        current->journal_info = desc_private;
-
-        for (i = 0, o = obj, lnb = res; i < objcount; i++, o++) {
-                int j;
-
-                if (cmd & OBD_BRW_WRITE) {
-                        inode_update_time(lnb->dentry->d_inode, 1);
-                        up(&lnb->dentry->d_inode->i_sem);
-                }
-                for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) {
-                        if (lnb->page == NULL) {
-                                continue;
-                        }
-
-                        if (lnb->flags & N_LOCAL_TEMP_PAGE) {
-                                found_locked++;
-                                continue;
-                        }
-
-                        if (time_after(jiffies, lnb->start + 15*HZ))
-                                CERROR("slow commitrw %lus\n",
-                                       (jiffies - lnb->start) / HZ);
-
-                        if (cmd & OBD_BRW_WRITE) {
-                                int err = filter_commit_write(lnb, 0);
-
-                                if (!rc)
-                                        rc = err;
-                        } else {
-                                lustre_put_page(lnb->page);
-                        }
-
-                        f_dput(lnb->dentry);
-                        if (time_after(jiffies, lnb->start + 15*HZ))
-                                CERROR("slow commit_write %lus\n",
-                                       (jiffies - lnb->start) / HZ);
-                }
-        }
-
-        for (i = 0, o = obj, lnb = res; found_locked > 0 && i < objcount;
-             i++, o++) {
-                int j;
-                for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) {
-                        int err;
-                        if (!(lnb->flags & N_LOCAL_TEMP_PAGE))
-                                continue;
-
-                        if (time_after(jiffies, lnb->start + 15*HZ))
-                                CERROR("slow commitrw locked %lus\n",
-                                       (jiffies - lnb->start) / HZ);
-
-                        err = filter_write_locked_page(lnb);
-                        if (!rc)
-                                rc = err;
-                        f_dput(lnb->dentry);
-                        found_locked--;
-
-                        if (time_after(jiffies, lnb->start + 15*HZ))
-                                CERROR("slow commit_write locked %lus\n",
-                                       (jiffies - lnb->start) / HZ);
-                }
-        }
-
-        if (cmd & OBD_BRW_WRITE) {
-                /* We just want any dentry for the commit, for now */
-                struct dentry *dparent = filter_parent(obd, S_IFREG, 0);
-                int err;
-
-                rc = filter_finish_transno(exp, desc_private, oti, rc);
-                err = fsfilt_commit(obd, dparent->d_inode, desc_private,
-                                    obd_sync_filter);
-                if (err)
-                        rc = err;
-                if (obd_sync_filter)
-                        LASSERT(oti->oti_transno <= obd->obd_last_committed);
-
-                if (time_after(jiffies, now + 15*HZ))
-                        CERROR("slow commitrw commit %lus\n", (jiffies-now)/HZ);
-        }
-
-        LASSERT(!current->journal_info);
-
-        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-        RETURN(rc);
+        RETURN(fsfilt_sync(exp->exp_obd, exp->exp_obd->u.filter.fo_sb));
 }
 
 }
 
-static int filter_brw(int cmd, struct lustre_handle *conn,
-                      struct lov_stripe_md *lsm, obd_count oa_bufs,
-                      struct brw_page *pga, struct obd_trans_info *oti)
+static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                         unsigned long max_age)
 {
 {
-        struct obd_export *export = class_conn2export(conn);
-        struct obd_ioobj        ioo;
-        struct niobuf_local     *lnb;
-        struct niobuf_remote    *rnb;
-        obd_count               i;
-        void                    *desc_private;
-        int                     ret = 0;
         ENTRY;
         ENTRY;
-
-        if (export == NULL)
-                RETURN(-EINVAL);
-
-        OBD_ALLOC(lnb, oa_bufs * sizeof(struct niobuf_local));
-        OBD_ALLOC(rnb, oa_bufs * sizeof(struct niobuf_remote));
-
-        if (lnb == NULL || rnb == NULL)
-                GOTO(out, ret = -ENOMEM);
-
-        for (i = 0; i < oa_bufs; i++) {
-                rnb[i].offset = pga[i].off;
-                rnb[i].len = pga[i].count;
-        }
-
-        ioo.ioo_id = lsm->lsm_object_id;
-        ioo.ioo_gr = 0;
-        ioo.ioo_type = S_IFREG;
-        ioo.ioo_bufcnt = oa_bufs;
-
-        ret = filter_preprw(cmd, export, NULL, 1, &ioo, oa_bufs, rnb, lnb,
-                            &desc_private, oti);
-        if (ret != 0)
-                GOTO(out, ret);
-
-        for (i = 0; i < oa_bufs; i++) {
-                void *virt = kmap(pga[i].pg);
-                obd_off off = pga[i].off & ~PAGE_MASK;
-                void *addr = kmap(lnb[i].page);
-
-                /* 2 kmaps == vanishingly small deadlock opportunity */
-
-                if (cmd & OBD_BRW_WRITE)
-                        memcpy(addr + off, virt + off, pga[i].count);
-                else
-                        memcpy(virt + off, addr + off, pga[i].count);
-
-                kunmap(addr);
-                kunmap(virt);
-        }
-
-        ret = filter_commitrw(cmd, export, 1, &ioo, oa_bufs, lnb, desc_private,
-                              oti);
-
-out:
-        if (lnb)
-                OBD_FREE(lnb, oa_bufs * sizeof(struct niobuf_local));
-        if (rnb)
-                OBD_FREE(rnb, oa_bufs * sizeof(struct niobuf_remote));
-        class_export_put(export);
-        RETURN(ret);
-}
-
-static int filter_san_preprw(int cmd, struct lustre_handle *conn,
-                             int objcount, struct obd_ioobj *obj,
-                             int niocount, struct niobuf_remote *nb)
-{
-        struct obd_device *obd;
-        struct obd_ioobj *o = obj;
-        struct niobuf_remote *rnb = nb;
-        int rc = 0;
-        int i;
-        ENTRY;
-
-        obd = class_conn2obd(conn);
-        if (!obd) {
-                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
-                       conn->cookie);
-                RETURN(-EINVAL);
-        }
-
-        for (i = 0; i < objcount; i++, o++) {
-                struct dentry *dentry;
-                struct inode *inode;
-                int (*fs_bmap)(struct address_space *, long);
-                int j;
-
-                dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id);
-                if (IS_ERR(dentry))
-                        GOTO(out, rc = PTR_ERR(dentry));
-                inode = dentry->d_inode;
-                if (!inode) {
-                        CERROR("trying to BRW to non-existent file "LPU64"\n",
-                               o->ioo_id);
-                        f_dput(dentry);
-                        GOTO(out, rc = -ENOENT);
-                }
-                fs_bmap = inode->i_mapping->a_ops->bmap;
-
-                for (j = 0; j < o->ioo_bufcnt; j++, rnb++) {
-                        long block;
-
-                        block = rnb->offset >> inode->i_blkbits;
-
-                        if (cmd == OBD_BRW_READ) {
-                                block = fs_bmap(inode->i_mapping, block);
-                        } else {
-                                loff_t newsize = rnb->offset + rnb->len;
-                                /* fs_prep_san_write will also update inode
-                                 * size for us:
-                                 * (1) new alloced block
-                                 * (2) existed block but size extented
-                                 */
-                                /* FIXME We could call fs_prep_san_write()
-                                 * only once for all the blocks allocation.
-                                 * Now call it once for each block, for
-                                 * simplicity. And if error happens, we
-                                 * probably need to release previous alloced
-                                 * block */
-                                rc = fs_prep_san_write(obd, inode, &block,
-                                                       1, newsize);
-                                if (rc)
-                                        break;
-                        }
-
-                        rnb->offset = block;
-                }
-                f_dput(dentry);
-        }
-out:
-        RETURN(rc);
-}
-
-static int filter_statfs(struct obd_export *exp, struct obd_statfs *osfs)
-{
-        struct obd_device *obd = exp->exp_obd;
-        ENTRY;
-
         RETURN(fsfilt_statfs(obd, obd->u.filter.fo_sb, osfs));
 }
 
         RETURN(fsfilt_statfs(obd, obd->u.filter.fo_sb, osfs));
 }
 
@@ -2676,7 +2067,7 @@ static int filter_get_info(struct lustre_handle *conn, __u32 keylen,
         ENTRY;
 
         obd = class_conn2obd(conn);
         ENTRY;
 
         obd = class_conn2obd(conn);
-        if (!obd) {
+        if (obd == NULL) {
                 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
                        conn->cookie);
                 RETURN(-EINVAL);
                 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
                        conn->cookie);
                 RETURN(-EINVAL);
@@ -2702,77 +2093,46 @@ static int filter_get_info(struct lustre_handle *conn, __u32 keylen,
         RETURN(-EINVAL);
 }
 
         RETURN(-EINVAL);
 }
 
-int filter_copy_data(struct lustre_handle *dst_conn, struct obdo *dst,
-                  struct lustre_handle *src_conn, struct obdo *src,
-                  obd_size count, obd_off offset, struct obd_trans_info *oti)
+static int filter_set_info(struct lustre_handle *conn, __u32 keylen,
+                           void *key, __u32 vallen, void *val)
 {
 {
-        struct page *page;
-        struct lov_stripe_md srcmd, dstmd;
-        unsigned long index = 0;
-        int err = 0;
-
-        LBUG(); /* THIS CODE IS NOT CORRECT -phil */
-
-        memset(&srcmd, 0, sizeof(srcmd));
-        memset(&dstmd, 0, sizeof(dstmd));
-        srcmd.lsm_object_id = src->o_id;
-        dstmd.lsm_object_id = dst->o_id;
-
+        struct obd_device *obd;
+        struct obd_export *exp;
+        struct obd_import *imp;
         ENTRY;
         ENTRY;
-        CDEBUG(D_INFO, "src: ino "LPU64" blocks "LPU64", size "LPU64
-               ", dst: ino "LPU64"\n",
-               src->o_id, src->o_blocks, src->o_size, dst->o_id);
-        page = alloc_page(GFP_USER);
-        if (page == NULL)
-                RETURN(-ENOMEM);
-
-        wait_on_page(page);
 
 
-        /* XXX with brw vector I/O, we could batch up reads and writes here,
-         *     all we need to do is allocate multiple pages to handle the I/Os
-         *     and arrays to handle the request parameters.
-         */
-        while (index < ((src->o_size + PAGE_SIZE - 1) >> PAGE_SHIFT)) {
-                struct brw_page pg;
-
-                pg.pg = page;
-                pg.count = PAGE_SIZE;
-                pg.off = (page->index) << PAGE_SHIFT;
-                pg.flag = 0;
-
-                page->index = index;
-                err = obd_brw(OBD_BRW_READ, src_conn, &srcmd, 1, &pg, NULL);
-                if (err) {
-                        EXIT;
-                        break;
-                }
+        obd = class_conn2obd(conn);
+        if (obd == NULL) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
+                       conn->cookie);
+                RETURN(-EINVAL);
+        }
 
 
-                pg.flag = OBD_BRW_CREATE;
-                CDEBUG(D_INFO, "Read page %ld ...\n", page->index);
+        if (keylen < strlen("mds_conn") ||
+            memcmp(key, "mds_conn", keylen) != 0)
+                RETURN(-EINVAL);
 
 
-                err = obd_brw(OBD_BRW_WRITE, dst_conn, &dstmd, 1, &pg, oti);
+        CERROR("Received MDS connection ("LPX64")\n", conn->cookie);
+        memcpy(&obd->u.filter.fo_mdc_conn, conn, sizeof(*conn));
 
 
-                /* XXX should handle dst->o_size, dst->o_blocks here */
-                if (err) {
-                        EXIT;
-                        break;
-                }
+        imp = obd->u.filter.fo_mdc_imp = class_new_import();
 
 
-                CDEBUG(D_INFO, "Wrote page %ld ...\n", page->index);
+        exp = class_conn2export(conn);
+        imp->imp_connection = ptlrpc_connection_addref(exp->exp_connection);
+        class_export_put(exp);
 
 
-                index++;
-        }
-        dst->o_size = src->o_size;
-        dst->o_blocks = src->o_blocks;
-        dst->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
-        unlock_page(page);
-        __free_page(page);
+        imp->imp_client = &obd->u.filter.fo_mdc_client;
+        imp->imp_remote_handle = *conn;
+        imp->imp_obd = obd;
+        imp->imp_dlm_fake = 1; /* XXX rename imp_dlm_fake to something else */
+        imp->imp_level = LUSTRE_CONN_FULL;
+        class_import_put(imp);
 
 
-        RETURN(err);
+        RETURN(0);
 }
 
 int filter_iocontrol(unsigned int cmd, struct lustre_handle *conn,
 }
 
 int filter_iocontrol(unsigned int cmd, struct lustre_handle *conn,
-                  int len, void *karg, void *uarg)
+                     int len, void *karg, void *uarg)
 {
         struct obd_device *obd = class_conn2obd(conn);
 
 {
         struct obd_device *obd = class_conn2obd(conn);
 
@@ -2788,12 +2148,12 @@ int filter_iocontrol(unsigned int cmd, struct lustre_handle *conn,
         RETURN(0);
 }
 
         RETURN(0);
 }
 
-
 static struct obd_ops filter_obd_ops = {
         o_owner:          THIS_MODULE,
         o_attach:         filter_attach,
         o_detach:         filter_detach,
         o_get_info:       filter_get_info,
 static struct obd_ops filter_obd_ops = {
         o_owner:          THIS_MODULE,
         o_attach:         filter_attach,
         o_detach:         filter_detach,
         o_get_info:       filter_get_info,
+        o_set_info:       filter_set_info,
         o_setup:          filter_setup,
         o_cleanup:        filter_cleanup,
         o_connect:        filter_connect,
         o_setup:          filter_setup,
         o_cleanup:        filter_cleanup,
         o_connect:        filter_connect,
@@ -2810,15 +2170,9 @@ static struct obd_ops filter_obd_ops = {
         o_punch:          filter_truncate,
         o_preprw:         filter_preprw,
         o_commitrw:       filter_commitrw,
         o_punch:          filter_truncate,
         o_preprw:         filter_preprw,
         o_commitrw:       filter_commitrw,
+        o_log_cancel:     filter_log_cancel,
         o_destroy_export: filter_destroy_export,
         o_iocontrol:      filter_iocontrol,
         o_destroy_export: filter_destroy_export,
         o_iocontrol:      filter_iocontrol,
-#if 0
-        o_san_preprw:  filter_san_preprw,
-        o_preallocate: filter_preallocate_inodes,
-        o_migrate:     filter_migrate,
-        o_copy:        filter_copy_data,
-        o_iterate:     filter_iterate
-#endif
 };
 
 static struct obd_ops filter_sanobd_ops = {
 };
 
 static struct obd_ops filter_sanobd_ops = {
@@ -2826,6 +2180,7 @@ static struct obd_ops filter_sanobd_ops = {
         o_attach:         filter_attach,
         o_detach:         filter_detach,
         o_get_info:       filter_get_info,
         o_attach:         filter_attach,
         o_detach:         filter_detach,
         o_get_info:       filter_get_info,
+        o_set_info:       filter_set_info,
         o_setup:          filter_san_setup,
         o_cleanup:        filter_cleanup,
         o_connect:        filter_connect,
         o_setup:          filter_san_setup,
         o_cleanup:        filter_cleanup,
         o_connect:        filter_connect,
@@ -2841,18 +2196,12 @@ static struct obd_ops filter_sanobd_ops = {
         o_punch:          filter_truncate,
         o_preprw:         filter_preprw,
         o_commitrw:       filter_commitrw,
         o_punch:          filter_truncate,
         o_preprw:         filter_preprw,
         o_commitrw:       filter_commitrw,
+        o_log_cancel:     filter_log_cancel,
         o_san_preprw:     filter_san_preprw,
         o_destroy_export: filter_destroy_export,
         o_iocontrol:      filter_iocontrol,
         o_san_preprw:     filter_san_preprw,
         o_destroy_export: filter_destroy_export,
         o_iocontrol:      filter_iocontrol,
-#if 0
-        o_preallocate:  filter_preallocate_inodes,
-        o_migrate:      filter_migrate,
-        o_copy:         filter_copy_data,
-        o_iterate:      filter_iterate
-#endif
 };
 
 };
 
-
 static int __init obdfilter_init(void)
 {
         struct lprocfs_static_vars lvars;
 static int __init obdfilter_init(void)
 {
         struct lprocfs_static_vars lvars;
@@ -2860,7 +2209,7 @@ static int __init obdfilter_init(void)
 
         printk(KERN_INFO "Lustre Filtering OBD driver; info@clusterfs.com\n");
 
 
         printk(KERN_INFO "Lustre Filtering OBD driver; info@clusterfs.com\n");
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(filter, &lvars);
 
         rc = class_register_type(&filter_obd_ops, lvars.module_vars,
                                  OBD_FILTER_DEVICENAME);
 
         rc = class_register_type(&filter_obd_ops, lvars.module_vars,
                                  OBD_FILTER_DEVICENAME);
index 1319dbd..411a9fb 100644 (file)
 #define DEBUG_SUBSYSTEM S_CLASS
 
 #include <linux/version.h>
 #define DEBUG_SUBSYSTEM S_CLASS
 
 #include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
 #include <linux/lprocfs_status.h>
 #include <linux/obd.h>
 
 #ifndef LPROCFS
 #include <linux/lprocfs_status.h>
 #include <linux/obd.h>
 
 #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
 #else
 
 #else
 
-static inline int lprocfs_filter_statfs(void *data, struct statfs *sfs)
-{
-        struct obd_device *dev = (struct obd_device *) data;
-        LASSERT(dev != NULL);
-        return vfs_statfs(dev->u.filter.fo_sb, sfs);
-}
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  lprocfs_filter_statfs);
-
-int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
-              void *data)
-{
-        struct obd_device *dev = (struct obd_device *)data;
-        LASSERT(dev != NULL);
-        return snprintf(page, count, "%s\n", dev->u.filter.fo_fstype);
-}
-
-int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off, int count,
-                             int *eof, void *data)
+static int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off,
+                                    int count, int *eof, void *data)
 {
         struct obd_device* obd = (struct obd_device *)data;
 
 {
         struct obd_device* obd = (struct obd_device *)data;
 
@@ -67,23 +42,23 @@ int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off, int count,
                         obd->u.filter.fo_vfsmnt->mnt_devname);
 }
 
                         obd->u.filter.fo_vfsmnt->mnt_devname);
 }
 
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",        lprocfs_rd_uuid,    0, 0 },
-        { "blocksize",   rd_blksize,         0, 0 },
-        { "kbytestotal", rd_kbytestotal,     0, 0 },
-        { "kbytesfree",  rd_kbytesfree,      0, 0 },
-        { "filestotal",  rd_filestotal,      0, 0 },
-        { "filesfree",   rd_filesfree,       0, 0 },
-        { "filegroups",  rd_filegroups,      0, 0 },
-        { "fstype",      rd_fstype,          0, 0 },
-        { "mntdev",      lprocfs_filter_rd_mntdev,    0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",         lprocfs_rd_uuid,          0, 0 },
+        { "blocksize",    lprocfs_rd_blksize,       0, 0 },
+        { "kbytestotal",  lprocfs_rd_kbytestotal,   0, 0 },
+        { "kbytesfree",   lprocfs_rd_kbytesfree,    0, 0 },
+        { "filestotal",   lprocfs_rd_filestotal,    0, 0 },
+        { "filesfree",    lprocfs_rd_filesfree,     0, 0 },
+        //{ "filegroups",   lprocfs_rd_filegroups,    0, 0 },
+        { "fstype",       lprocfs_rd_fstype,        0, 0 },
+        { "mntdev",       lprocfs_filter_rd_mntdev, 0, 0 },
         { 0 }
 };
 
         { 0 }
 };
 
-struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs",    lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",     lprocfs_rd_numrefs,       0, 0 },
         { 0 }
 };
 
 #endif /* LPROCFS */
         { 0 }
 };
 
 #endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(filter,lprocfs_module_vars, lprocfs_obd_vars)
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
 Makefile.in
 .deps
 TAGS
+.*.cmd
index d5e4ec1..e9affd0 100644 (file)
 #include <linux/lprocfs_status.h>
 
 #ifndef LPROCFS
 #include <linux/lprocfs_status.h>
 
 #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
 #else
 #else
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  obd_self_statfs);
-
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",            lprocfs_rd_uuid, 0, 0 },
-        { "blocksize",       rd_blksize, 0, 0 },
-        { "kbytestotal",     rd_kbytestotal, 0, 0 },
-        { "kbytesfree",      rd_kbytesfree, 0, 0 },
-        { "filestotal",      rd_filestotal, 0, 0 },
-        { "filesfree",       rd_filesfree, 0, 0   },
-        { "filegroups",      rd_filegroups, 0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",            lprocfs_rd_uuid,        0, 0 },
+        { "blocksize",       lprocfs_rd_blksize,     0, 0 },
+        { "kbytestotal",     lprocfs_rd_kbytestotal, 0, 0 },
+        { "kbytesfree",      lprocfs_rd_kbytesfree,  0, 0 },
+        { "filestotal",      lprocfs_rd_filestotal,  0, 0 },
+        { "filesfree",       lprocfs_rd_filesfree,   0, 0 },
+        //{ "filegroups",      lprocfs_rd_filegroups,  0, 0 },
         { "ost_server_uuid", lprocfs_rd_server_uuid, 0, 0 },
         { "ost_server_uuid", lprocfs_rd_server_uuid, 0, 0 },
-        { "ost_conn_uuid",   lprocfs_rd_conn_uuid, 0, 0 },
+        { "ost_conn_uuid",   lprocfs_rd_conn_uuid,   0, 0 },
         { 0 }
 };
 
         { 0 }
 };
 
-struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs",        lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",        lprocfs_rd_numrefs,     0, 0 },
         { 0 }
 };
 
 #endif /* LPROCFS */
         { 0 }
 };
 
 #endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(osc,lprocfs_module_vars, lprocfs_obd_vars)
index aa04a1a..c8cd6ad 100644 (file)
@@ -34,8 +34,7 @@ static kdev_t path2dev(char *path)
 {
         struct dentry *dentry;
         struct nameidata nd;
 {
         struct dentry *dentry;
         struct nameidata nd;
-        kdev_t dev;
-        KDEVT_VAL(dev, 0);
+        kdev_t dev = KDEVT_INIT(0);
 
         if (!path_init(path, LOOKUP_FOLLOW, &nd))
                 return 0;
 
         if (!path_init(path, LOOKUP_FOLLOW, &nd))
                 return 0;
index 4bda8de..89061fd 100644 (file)
 #define DEBUG_SUBSYSTEM S_OSC
 
 #ifdef __KERNEL__
 #define DEBUG_SUBSYSTEM S_OSC
 
 #ifdef __KERNEL__
-#include <linux/version.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/lustre_dlm.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <linux/workqueue.h>
-#include <linux/smp_lock.h>
-#else
-#include <linux/locks.h>
-#endif
-#else
-#include <liblustre.h>
+# include <linux/version.h>
+# include <linux/module.h>
+# include <linux/mm.h>
+# include <linux/highmem.h>
+# include <linux/lustre_dlm.h>
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#  include <linux/workqueue.h>
+#  include <linux/smp_lock.h>
+# else
+#  include <linux/locks.h>
+# endif
+#else /* __KERNEL__ */
+# include <liblustre.h>
 #endif
 
 #include <linux/kp30.h>
 #include <linux/lustre_mds.h> /* for mds_objid */
 #include <linux/lustre_otree.h>
 #include <linux/obd_ost.h>
 #endif
 
 #include <linux/kp30.h>
 #include <linux/lustre_mds.h> /* for mds_objid */
 #include <linux/lustre_otree.h>
 #include <linux/obd_ost.h>
+#include <linux/lustre_commit_confd.h>
 #include <linux/obd_lov.h>
 
 #ifndef  __CYGWIN__
 #include <linux/obd_lov.h>
 
 #ifndef  __CYGWIN__
-#include <linux/ctype.h>
-#include <linux/init.h>
+# include <linux/ctype.h>
+# include <linux/init.h>
 #else
 #else
-#include <ctype.h>
+# include <ctype.h>
 #endif
 
 #include <linux/lustre_ha.h>
 #endif
 
 #include <linux/lustre_ha.h>
 #include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
 #include <linux/lprocfs_status.h>
 
 #include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
 #include <linux/lprocfs_status.h>
 
+static struct llog_cookie zero_cookie = { { 0 } };
+
 static int osc_attach(struct obd_device *dev, obd_count len, void *data)
 {
         struct lprocfs_static_vars lvars;
 
 static int osc_attach(struct obd_device *dev, obd_count len, void *data)
 {
         struct lprocfs_static_vars lvars;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(osc,&lvars);
         return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
         return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
@@ -119,29 +122,29 @@ static int osc_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
                 if (lmm_bytes < sizeof (*lmm)) {
                         CERROR("lov_mds_md too small: %d, need %d\n",
                                lmm_bytes, (int)sizeof(*lmm));
                 if (lmm_bytes < sizeof (*lmm)) {
                         CERROR("lov_mds_md too small: %d, need %d\n",
                                lmm_bytes, (int)sizeof(*lmm));
-                        RETURN (-EINVAL);
+                        RETURN(-EINVAL);
                 }
                 /* XXX LOV_MAGIC etc check? */
 
                 }
                 /* XXX LOV_MAGIC etc check? */
 
-                if (lmm->lmm_object_id == cpu_to_le64 (0)) {
-                        CERROR ("lov_mds_md: zero lmm_object_id\n");
-                        RETURN (-EINVAL);
+                if (lmm->lmm_object_id == cpu_to_le64(0)) {
+                        CERROR("lov_mds_md: zero lmm_object_id\n");
+                        RETURN(-EINVAL);
                 }
         }
 
         lsm_size = lov_stripe_md_size(1);
                 }
         }
 
         lsm_size = lov_stripe_md_size(1);
-        if (!lsmp)
+        if (lsmp == NULL)
                 RETURN(lsm_size);
 
                 RETURN(lsm_size);
 
-        if (*lsmp && !lmm) {
+        if (*lsmp != NULL && lmm == NULL) {
                 OBD_FREE(*lsmp, lsm_size);
                 *lsmp = NULL;
                 RETURN(0);
         }
 
                 OBD_FREE(*lsmp, lsm_size);
                 *lsmp = NULL;
                 RETURN(0);
         }
 
-        if (!*lsmp) {
+        if (*lsmp == NULL) {
                 OBD_ALLOC(*lsmp, lsm_size);
                 OBD_ALLOC(*lsmp, lsm_size);
-                if (!*lsmp)
+                if (*lsmp == NULL)
                         RETURN(-ENOMEM);
 
                 (*lsmp)->lsm_oinfo[0].loi_dirty_ot =
                         RETURN(-ENOMEM);
 
                 (*lsmp)->lsm_oinfo[0].loi_dirty_ot =
@@ -149,7 +152,7 @@ static int osc_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
                 ot_init((*lsmp)->lsm_oinfo[0].loi_dirty_ot);
         }
 
                 ot_init((*lsmp)->lsm_oinfo[0].loi_dirty_ot);
         }
 
-        if (lmm) {
+        if (lmm != NULL) {
                 /* XXX zero *lsmp? */
                 (*lsmp)->lsm_object_id = le64_to_cpu (lmm->lmm_object_id);
                 LASSERT((*lsmp)->lsm_object_id);
                 /* XXX zero *lsmp? */
                 (*lsmp)->lsm_object_id = le64_to_cpu (lmm->lmm_object_id);
                 LASSERT((*lsmp)->lsm_object_id);
@@ -167,29 +170,27 @@ static int osc_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
 static int osc_getattr_interpret(struct ptlrpc_request *req,
                                  struct osc_getattr_async_args *aa, int rc)
 {
 static int osc_getattr_interpret(struct ptlrpc_request *req,
                                  struct osc_getattr_async_args *aa, int rc)
 {
-        struct obdo     *oa = aa->aa_oa;
         struct ost_body *body;
         ENTRY;
 
         struct ost_body *body;
         ENTRY;
 
-        if (rc != 0) {
-                CERROR("failed: rc = %d\n", rc);
-                RETURN (rc);
-        }
-
-        body = lustre_swab_repbuf(req, 0, sizeof (*body), lustre_swab_ost_body);
-        if (body == NULL) {
-                CERROR ("can't unpack ost_body\n");
-                RETURN (-EPROTO);
-        }
+        if (rc != 0)
+                RETURN(rc);
 
 
-        CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
-        memcpy(oa, &body->oa, sizeof(*oa));
+        body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
+        if (body) {
+                CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
+                memcpy(aa->aa_oa, &body->oa, sizeof(*aa->aa_oa));
 
 
-        /* This should really be sent by the OST */
-        oa->o_blksize = OSC_BRW_MAX_SIZE;
-        oa->o_valid |= OBD_MD_FLBLKSZ;
+                /* This should really be sent by the OST */
+                aa->aa_oa->o_blksize = OSC_BRW_MAX_SIZE;
+                aa->aa_oa->o_valid |= OBD_MD_FLBLKSZ;
+        } else {
+                CERROR("can't unpack ost_body\n");
+                rc = -EPROTO;
+                aa->aa_oa->o_valid = 0;
+        }
 
 
-        RETURN (0);
+        RETURN(rc);
 }
 
 static int osc_getattr_async(struct lustre_handle *conn, struct obdo *oa,
 }
 
 static int osc_getattr_async(struct lustre_handle *conn, struct obdo *oa,
@@ -505,7 +506,7 @@ static int osc_create(struct lustre_handle *conn, struct obdo *oa,
                 GOTO(out, rc = -ENOMEM);
 
         body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
                 GOTO(out, rc = -ENOMEM);
 
         body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
-        memcpy(&body->oa, oa, sizeof(*oa));
+        memcpy(&body->oa, oa, sizeof(body->oa));
 
         request->rq_replen = lustre_msg_size(1, &size);
 
 
         request->rq_replen = lustre_msg_size(1, &size);
 
@@ -513,8 +514,8 @@ static int osc_create(struct lustre_handle *conn, struct obdo *oa,
         if (rc)
                 GOTO(out_req, rc);
 
         if (rc)
                 GOTO(out_req, rc);
 
-        body = lustre_swab_repbuf (request, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_repbuf(request, 0, sizeof(*body),
+                                  lustre_swab_ost_body);
         if (body == NULL) {
                 CERROR ("can't unpack ost_body\n");
                 GOTO (out_req, rc = -EPROTO);
         if (body == NULL) {
                 CERROR ("can't unpack ost_body\n");
                 GOTO (out_req, rc = -EPROTO);
@@ -531,13 +532,19 @@ static int osc_create(struct lustre_handle *conn, struct obdo *oa,
          * This needs to be fixed in a big way.
          */
         lsm->lsm_object_id = oa->o_id;
          * This needs to be fixed in a big way.
          */
         lsm->lsm_object_id = oa->o_id;
-        lsm->lsm_stripe_count = 0;
-        lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
         *ea = lsm;
 
         *ea = lsm;
 
-        if (oti != NULL)
+        if (oti != NULL) {
                 oti->oti_transno = request->rq_repmsg->transno;
 
                 oti->oti_transno = request->rq_repmsg->transno;
 
+                if (oa->o_valid & OBD_MD_FLCOOKIE) {
+                        if (!oti->oti_logcookies)
+                                oti_alloc_cookies(oti, 1);
+                        memcpy(oti->oti_logcookies, obdo_logcookie(oa),
+                               sizeof(oti->oti_onecookie));
+                }
+        }
+
         CDEBUG(D_HA, "transno: "LPD64"\n", request->rq_repmsg->transno);
         EXIT;
 out_req:
         CDEBUG(D_HA, "transno: "LPD64"\n", request->rq_repmsg->transno);
         EXIT;
 out_req:
@@ -616,14 +623,20 @@ static int osc_destroy(struct lustre_handle *conn, struct obdo *oa,
         body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
         memcpy(&body->oa, oa, sizeof(*oa));
 
         body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
         memcpy(&body->oa, oa, sizeof(*oa));
 
+        if (oti && oa->o_valid & OBD_MD_FLCOOKIE) {
+                memcpy(obdo_logcookie(oa), oti->oti_logcookies,
+                       sizeof(*oti->oti_logcookies));
+                oti->oti_logcookies++;
+        }
+
         request->rq_replen = lustre_msg_size(1, &size);
 
         rc = ptlrpc_queue_wait(request);
         if (rc)
                 GOTO(out, rc);
 
         request->rq_replen = lustre_msg_size(1, &size);
 
         rc = ptlrpc_queue_wait(request);
         if (rc)
                 GOTO(out, rc);
 
-        body = lustre_swab_repbuf (request, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_repbuf(request, 0, sizeof(*body),
+                                  lustre_swab_ost_body);
         if (body == NULL) {
                 CERROR ("Can't unpack body\n");
                 GOTO (out, rc = -EPROTO);
         if (body == NULL) {
                 CERROR ("Can't unpack body\n");
                 GOTO (out, rc = -EPROTO);
@@ -663,7 +676,7 @@ static void osc_update_grant(struct client_obd *cli, struct ost_body *body)
                 return;
         }
 
                 return;
         }
 
-        CDEBUG(D_INODE, "got "LPU64" grant\n", body->oa.o_rdev);
+        CDEBUG(D_ERROR, "got "LPU64" grant\n", body->oa.o_rdev);
         down(&cli->cl_dirty_sem);
         cli->cl_dirty_granted = body->oa.o_rdev;
         /* XXX check for over-run and wake up the io thread that
         down(&cli->cl_dirty_sem);
         cli->cl_dirty_granted = body->oa.o_rdev;
         /* XXX check for over-run and wake up the io thread that
@@ -708,9 +721,8 @@ static void handle_short_read(int nob_read, obd_count page_count,
         }
 }
 
         }
 }
 
-static int check_write_rcs (struct ptlrpc_request *request,
-                            int niocount, obd_count page_count,
-                            struct brw_page *pga)
+static int check_write_rcs(struct ptlrpc_request *request, int niocount,
+                           obd_count page_count, struct brw_page *pga)
 {
         int    i;
         __u32 *remote_rcs;
 {
         int    i;
         __u32 *remote_rcs;
@@ -778,11 +790,10 @@ static obd_count cksum_pages(int nob, obd_count page_count,
 }
 #endif
 
 }
 #endif
 
-static int osc_brw_prep_request(struct obd_import *imp,
+static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
                                 struct lov_stripe_md *lsm, obd_count page_count,
                                 struct lov_stripe_md *lsm, obd_count page_count,
-                                struct brw_page *pga, int cmd,
-                                int *requested_nobp, int *niocountp,
-                                struct ptlrpc_request **reqp)
+                                struct brw_page *pga, int *requested_nobp,
+                                int *niocountp, struct ptlrpc_request **reqp)
 {
         struct ptlrpc_request   *req;
         struct ptlrpc_bulk_desc *desc;
 {
         struct ptlrpc_request   *req;
         struct ptlrpc_bulk_desc *desc;
@@ -804,11 +815,11 @@ static int osc_brw_prep_request(struct obd_import *imp,
                 if (!can_merge_pages (&pga[i - 1], &pga[i]))
                         niocount++;
 
                 if (!can_merge_pages (&pga[i - 1], &pga[i]))
                         niocount++;
 
-        size[0] = sizeof (*body);
-        size[1] = sizeof (*ioobj);
-        size[2] = niocount * sizeof (*niobuf);
+        size[0] = sizeof(*body);
+        size[1] = sizeof(*ioobj);
+        size[2] = niocount * sizeof(*niobuf);
 
 
-        req = ptlrpc_prep_req (imp, opc, 3, size, NULL);
+        req = ptlrpc_prep_req(imp, opc, 3, size, NULL);
         if (req == NULL)
                 return (-ENOMEM);
 
         if (req == NULL)
                 return (-ENOMEM);
 
@@ -819,16 +830,18 @@ static int osc_brw_prep_request(struct obd_import *imp,
                 desc = ptlrpc_prep_bulk_imp(req, BULK_PUT_SINK,
                                             OST_BULK_PORTAL);
         if (desc == NULL)
                 desc = ptlrpc_prep_bulk_imp(req, BULK_PUT_SINK,
                                             OST_BULK_PORTAL);
         if (desc == NULL)
-                GOTO (out, rc = -ENOMEM);
+                GOTO(out, rc = -ENOMEM);
         /* NB request now owns desc and will free it when it gets freed */
 
         body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body));
         ioobj = lustre_msg_buf(req->rq_reqmsg, 1, sizeof(*ioobj));
         niobuf = lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf));
 
         /* NB request now owns desc and will free it when it gets freed */
 
         body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body));
         ioobj = lustre_msg_buf(req->rq_reqmsg, 1, sizeof(*ioobj));
         niobuf = lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf));
 
-        ioobj->ioo_id = lsm->lsm_object_id;
-        ioobj->ioo_gr = 0;
-        ioobj->ioo_type = S_IFREG;
+        memcpy(&body->oa, oa, sizeof(*oa));
+
+        ioobj->ioo_id = oa->o_id;
+        ioobj->ioo_gr = oa->o_valid & 0 ? oa->o_gr : 0;
+        ioobj->ioo_type = oa->o_mode;
         ioobj->ioo_bufcnt = niocount;
 
         LASSERT (page_count > 0);
         ioobj->ioo_bufcnt = niocount;
 
         LASSERT (page_count > 0);
@@ -836,19 +849,18 @@ static int osc_brw_prep_request(struct obd_import *imp,
                 struct brw_page *pg = &pga[i];
                 struct brw_page *pg_prev = pg - 1;
 
                 struct brw_page *pg = &pga[i];
                 struct brw_page *pg_prev = pg - 1;
 
-                LASSERT (pg->count > 0);
-                LASSERT ((pg->off & (PAGE_SIZE - 1)) + pg->count <= PAGE_SIZE);
-                LASSERT (i == 0 || pg->off > pg_prev->off);
+                LASSERT(pg->count > 0);
+                LASSERT((pg->off & ~PAGE_MASK) + pg->count <= PAGE_SIZE);
+                LASSERT(i == 0 || pg->off > pg_prev->off);
 
 
-                rc = ptlrpc_prep_bulk_page (desc, pg->pg,
-                                            pg->off & (PAGE_SIZE - 1),
-                                            pg->count);
+                rc = ptlrpc_prep_bulk_page(desc, pg->pg, pg->off & ~PAGE_MASK,
+                                           pg->count);
                 if (rc != 0)
                 if (rc != 0)
-                        GOTO (out, rc);
+                        GOTO(out, rc);
 
                 requested_nob += pg->count;
 
 
                 requested_nob += pg->count;
 
-                if (i > 0 && can_merge_pages (pg_prev, pg)) {
+                if (i > 0 && can_merge_pages(pg_prev, pg)) {
                         niobuf--;
                         niobuf->len += pg->count;
                 } else {
                         niobuf--;
                         niobuf->len += pg->count;
                 } else {
@@ -858,17 +870,17 @@ static int osc_brw_prep_request(struct obd_import *imp,
                 }
         }
 
                 }
         }
 
-        LASSERT ((void *)(niobuf - niocount) ==
-                 lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf)));
+        LASSERT((void *)(niobuf - niocount) ==
+                lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf)));
 #if CHECKSUM_BULK
         body->oa.o_valid |= OBD_MD_FLCKSUM;
         if (opc == OST_BRW_WRITE)
 #if CHECKSUM_BULK
         body->oa.o_valid |= OBD_MD_FLCKSUM;
         if (opc == OST_BRW_WRITE)
-                body->oa.o_nlink = cksum_pages (requested_nob, page_count, pga);
+                body->oa.o_nlink = cksum_pages(requested_nob, page_count, pga);
 #endif
         osc_announce_cached(cli, body);
 #endif
         osc_announce_cached(cli, body);
-        spin_lock_irqsave (&req->rq_lock, flags);
+        spin_lock_irqsave(&req->rq_lock, flags);
         req->rq_no_resend = 1;
         req->rq_no_resend = 1;
-        spin_unlock_irqrestore (&req->rq_lock, flags);
+        spin_unlock_irqrestore(&req->rq_lock, flags);
 
         /* size[0] still sizeof (*body) */
         if (opc == OST_WRITE) {
 
         /* size[0] still sizeof (*body) */
         if (opc == OST_WRITE) {
@@ -890,21 +902,23 @@ static int osc_brw_prep_request(struct obd_import *imp,
         return (rc);
 }
 
         return (rc);
 }
 
-static int osc_brw_fini_request (struct ptlrpc_request *req,
-                                 int requested_nob, int niocount,
-                                 obd_count page_count, struct brw_page *pga,
-                                 int rc)
+static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa,
+                                int requested_nob, int niocount,
+                                obd_count page_count, struct brw_page *pga,
+                                int rc)
 {
         struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
         struct ost_body *body;
 {
         struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
         struct ost_body *body;
+
         if (rc < 0)
                 return (rc);
 
         if (rc < 0)
                 return (rc);
 
-        body = lustre_swab_repbuf(req, 0, sizeof (*body), lustre_swab_ost_body);
+        body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL) {
                 CERROR ("Can't unpack body\n");
         if (body == NULL) {
                 CERROR ("Can't unpack body\n");
-                RETURN(-EPROTO);
+                return (-EPROTO);
         }
         }
+
         osc_update_grant(cli, body);
 
         if (req->rq_reqmsg->opc == OST_WRITE) {
         osc_update_grant(cli, body);
 
         if (req->rq_reqmsg->opc == OST_WRITE) {
@@ -913,22 +927,23 @@ static int osc_brw_fini_request (struct ptlrpc_request *req,
                         return (-EPROTO);
                 }
 
                         return (-EPROTO);
                 }
 
-                return (check_write_rcs(req, niocount, page_count, pga));
+                return(check_write_rcs(req, niocount, page_count, pga));
         }
 
         if (rc > requested_nob) {
         }
 
         if (rc > requested_nob) {
-                CERROR ("Unexpected rc %d (%d requested)\n",
-                        rc, requested_nob);
+                CERROR("Unexpected rc %d (%d requested)\n", rc, requested_nob);
                 return (-EPROTO);
         }
 
         if (rc < requested_nob)
                 handle_short_read(rc, page_count, pga);
 
                 return (-EPROTO);
         }
 
         if (rc < requested_nob)
                 handle_short_read(rc, page_count, pga);
 
+        memcpy(oa, &body->oa, sizeof(*oa));
+
 #if CHECKSUM_BULK
 #if CHECKSUM_BULK
-        if (body->oa.o_valid & OBD_MD_FLCKSUM) {
+        if (oa->o_valid & OBD_MD_FLCKSUM) {
                 static int cksum_counter;
                 static int cksum_counter;
-                obd_count server_cksum = body->oa.o_nlink;
+                obd_count server_cksum = oa->o_nlink;
                 obd_count cksum = cksum_pages(rc, page_count, pga);
 
                 cksum_counter++;
                 obd_count cksum = cksum_pages(rc, page_count, pga);
 
                 cksum_counter++;
@@ -937,6 +952,7 @@ static int osc_brw_fini_request (struct ptlrpc_request *req,
                                ", server NID "LPX64"\n", server_cksum, cksum,
                                imp->imp_connection->c_peer.peer_nid);
                         cksum_counter = 0;
                                ", server NID "LPX64"\n", server_cksum, cksum,
                                imp->imp_connection->c_peer.peer_nid);
                         cksum_counter = 0;
+                        oa->o_rdev = cksum;
                 } else if ((cksum_counter & (-cksum_counter)) == cksum_counter)
                         CERROR("Checksum %u from "LPX64" OK: %x\n",
                                cksum_counter,
                 } else if ((cksum_counter & (-cksum_counter)) == cksum_counter)
                         CERROR("Checksum %u from "LPX64" OK: %x\n",
                                cksum_counter,
@@ -953,9 +969,9 @@ static int osc_brw_fini_request (struct ptlrpc_request *req,
         return (0);
 }
 
         return (0);
 }
 
-static int osc_brw_internal(struct lustre_handle *conn,
+static int osc_brw_internal(int cmd, struct lustre_handle *conn,struct obdo *oa,
                             struct lov_stripe_md *lsm,
                             struct lov_stripe_md *lsm,
-                            obd_count page_count, struct brw_page *pga, int cmd)
+                            obd_count page_count, struct brw_page *pga)
 {
         int                    requested_nob;
         int                    niocount;
 {
         int                    requested_nob;
         int                    niocount;
@@ -964,8 +980,9 @@ static int osc_brw_internal(struct lustre_handle *conn,
         ENTRY;
 
 restart_bulk:
         ENTRY;
 
 restart_bulk:
-        rc = osc_brw_prep_request(class_conn2cliimp(conn), lsm, page_count, pga,
-                                  cmd, &requested_nob, &niocount, &request);
+        rc = osc_brw_prep_request(cmd, class_conn2cliimp(conn), oa, lsm,
+                                  page_count, pga, &requested_nob, &niocount,
+                                  &request);
         /* NB ^ sets rq_no_resend */
 
         if (rc != 0)
         /* NB ^ sets rq_no_resend */
 
         if (rc != 0)
@@ -979,8 +996,8 @@ restart_bulk:
                 goto restart_bulk;
         }
 
                 goto restart_bulk;
         }
 
-        rc = osc_brw_fini_request (request, requested_nob, niocount,
-                                   page_count, pga, rc);
+        rc = osc_brw_fini_request(request, oa, requested_nob, niocount,
+                                  page_count, pga, rc);
 
         ptlrpc_req_finished(request);
         RETURN (rc);
 
         ptlrpc_req_finished(request);
         RETURN (rc);
@@ -989,6 +1006,7 @@ restart_bulk:
 static int brw_interpret(struct ptlrpc_request *request,
                          struct osc_brw_async_args *aa, int rc)
 {
 static int brw_interpret(struct ptlrpc_request *request,
                          struct osc_brw_async_args *aa, int rc)
 {
+        struct obdo *oa      = aa->aa_oa;
         int requested_nob    = aa->aa_requested_nob;
         int niocount         = aa->aa_nio_count;
         obd_count page_count = aa->aa_page_count;
         int requested_nob    = aa->aa_requested_nob;
         int niocount         = aa->aa_nio_count;
         obd_count page_count = aa->aa_page_count;
@@ -1002,14 +1020,14 @@ static int brw_interpret(struct ptlrpc_request *request,
                 //goto restart_bulk;
         }
 
                 //goto restart_bulk;
         }
 
-        rc = osc_brw_fini_request (request, requested_nob, niocount,
-                                   page_count, pga, rc);
+        rc = osc_brw_fini_request(request, oa, requested_nob, niocount,
+                                  page_count, pga, rc);
         RETURN (rc);
 }
 
         RETURN (rc);
 }
 
-static int async_internal(struct lustre_handle *conn, struct lov_stripe_md *lsm,
-                          obd_count page_count, struct brw_page *pga,
-                          struct ptlrpc_request_set *set, int cmd)
+static int async_internal(int cmd, struct lustre_handle *conn, struct obdo *oa,
+                          struct lov_stripe_md *lsm, obd_count page_count,
+                          struct brw_page *pga, struct ptlrpc_request_set *set)
 {
         struct ptlrpc_request     *request;
         int                        requested_nob;
 {
         struct ptlrpc_request     *request;
         int                        requested_nob;
@@ -1018,14 +1036,15 @@ static int async_internal(struct lustre_handle *conn, struct lov_stripe_md *lsm,
         int                        rc;
         ENTRY;
 
         int                        rc;
         ENTRY;
 
-        rc = osc_brw_prep_request (class_conn2cliimp(conn),
-                                   lsm, page_count, pga, cmd,
-                                   &requested_nob, &nio_count, &request);
+        rc = osc_brw_prep_request(cmd, class_conn2cliimp(conn), oa, lsm,
+                                  page_count, pga, &requested_nob, &nio_count,
+                                  &request);
         /* NB ^ sets rq_no_resend */
 
         if (rc == 0) {
         /* NB ^ sets rq_no_resend */
 
         if (rc == 0) {
-                LASSERT (sizeof (*aa) <= sizeof (request->rq_async_args));
+                LASSERT(sizeof(*aa) <= sizeof(request->rq_async_args));
                 aa = (struct osc_brw_async_args *)&request->rq_async_args;
                 aa = (struct osc_brw_async_args *)&request->rq_async_args;
+                aa->aa_oa = oa;
                 aa->aa_requested_nob = requested_nob;
                 aa->aa_nio_count = nio_count;
                 aa->aa_page_count = page_count;
                 aa->aa_requested_nob = requested_nob;
                 aa->aa_nio_count = nio_count;
                 aa->aa_page_count = page_count;
@@ -1096,7 +1115,7 @@ static obd_count check_elan_limit(struct brw_page *pg, obd_count pages)
         return i;
 }
 
         return i;
 }
 
-static int osc_brw(int cmd, struct lustre_handle *conn,
+static int osc_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
                    struct lov_stripe_md *md, obd_count page_count,
                    struct brw_page *pga, struct obd_trans_info *oti)
 {
                    struct lov_stripe_md *md, obd_count page_count,
                    struct brw_page *pga, struct obd_trans_info *oti)
 {
@@ -1124,7 +1143,7 @@ static int osc_brw(int cmd, struct lustre_handle *conn,
                 sort_brw_pages(pga, pages_per_brw);
                 pages_per_brw = check_elan_limit(pga, pages_per_brw);
 
                 sort_brw_pages(pga, pages_per_brw);
                 pages_per_brw = check_elan_limit(pga, pages_per_brw);
 
-                rc = osc_brw_internal(conn, md, pages_per_brw, pga, cmd);
+                rc = osc_brw_internal(cmd, conn, oa, md, pages_per_brw, pga);
 
                 if (rc != 0)
                         RETURN(rc);
 
                 if (rc != 0)
                         RETURN(rc);
@@ -1135,7 +1154,7 @@ static int osc_brw(int cmd, struct lustre_handle *conn,
         RETURN(0);
 }
 
         RETURN(0);
 }
 
-static int osc_brw_async(int cmd, struct lustre_handle *conn,
+static int osc_brw_async(int cmd, struct lustre_handle *conn, struct obdo *oa,
                          struct lov_stripe_md *md, obd_count page_count,
                          struct brw_page *pga, struct ptlrpc_request_set *set,
                          struct obd_trans_info *oti)
                          struct lov_stripe_md *md, obd_count page_count,
                          struct brw_page *pga, struct ptlrpc_request_set *set,
                          struct obd_trans_info *oti)
@@ -1164,7 +1183,7 @@ static int osc_brw_async(int cmd, struct lustre_handle *conn,
                 sort_brw_pages(pga, pages_per_brw);
                 pages_per_brw = check_elan_limit(pga, pages_per_brw);
 
                 sort_brw_pages(pga, pages_per_brw);
                 pages_per_brw = check_elan_limit(pga, pages_per_brw);
 
-                rc = async_internal(conn, md, pages_per_brw, pga, set, cmd);
+                rc = async_internal(cmd, conn, oa, md, pages_per_brw, pga, set);
 
                 if (rc != 0)
                         RETURN(rc);
 
                 if (rc != 0)
                         RETURN(rc);
@@ -1178,9 +1197,8 @@ static int osc_brw_async(int cmd, struct lustre_handle *conn,
 #ifdef __KERNEL__
 /* Note: caller will lock/unlock, and set uptodate on the pages */
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 #ifdef __KERNEL__
 /* Note: caller will lock/unlock, and set uptodate on the pages */
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-static int sanosc_brw_read(struct lustre_handle *conn,
-                           struct lov_stripe_md *lsm,
-                           obd_count page_count,
+static int sanosc_brw_read(struct lustre_handle *conn, struct obdo *oa,
+                           struct lov_stripe_md *lsm, obd_count page_count,
                            struct brw_page *pga)
 {
         struct ptlrpc_request *request = NULL;
                            struct brw_page *pga)
 {
         struct ptlrpc_request *request = NULL;
@@ -1201,14 +1219,16 @@ static int sanosc_brw_read(struct lustre_handle *conn,
         if (!request)
                 RETURN(-ENOMEM);
 
         if (!request)
                 RETURN(-ENOMEM);
 
-        body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
-        iooptr = lustre_msg_buf(request->rq_reqmsg, 1, sizeof (*iooptr));
+        body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof(*body));
+        iooptr = lustre_msg_buf(request->rq_reqmsg, 1, sizeof(*iooptr));
         nioptr = lustre_msg_buf(request->rq_reqmsg, 2,
         nioptr = lustre_msg_buf(request->rq_reqmsg, 2,
-                                sizeof (*nioptr) * page_count);
+                                sizeof(*nioptr) * page_count);
+
+        memcpy(&body->oa, oa, sizeof(body->oa));
 
 
-        iooptr->ioo_id = lsm->lsm_object_id;
-        iooptr->ioo_gr = 0;
-        iooptr->ioo_type = S_IFREG;
+        iooptr->ioo_id = oa->o_id;
+        iooptr->ioo_gr = oa->o_valid & 0 ? oa->o_gr : 0;
+        iooptr->ioo_type = oa->o_mode;
         iooptr->ioo_bufcnt = page_count;
 
         for (mapped = 0; mapped < page_count; mapped++, nioptr++) {
         iooptr->ioo_bufcnt = page_count;
 
         for (mapped = 0; mapped < page_count; mapped++, nioptr++) {
@@ -1227,8 +1247,17 @@ static int sanosc_brw_read(struct lustre_handle *conn,
         if (rc)
                 GOTO(out_req, rc);
 
         if (rc)
                 GOTO(out_req, rc);
 
-        swab = lustre_msg_swabbed (request->rq_repmsg);
-        LASSERT_REPSWAB (request, 1);
+        body = lustre_swab_repbuf(request, 0, sizeof(*body),
+                                  lustre_swab_ost_body);
+        if (body == NULL) {
+                CERROR("Can't unpack body\n");
+                GOTO(out_req, rc = -EPROTO);
+        }
+
+        memcpy(oa, &body->oa, sizeof(*oa));
+
+        swab = lustre_msg_swabbed(request->rq_repmsg);
+        LASSERT_REPSWAB(request, 1);
         nioptr = lustre_msg_buf(request->rq_repmsg, 1, size[1]);
         if (!nioptr) {
                 /* nioptr missing or short */
         nioptr = lustre_msg_buf(request->rq_repmsg, 1, size[1]);
         if (!nioptr) {
                 /* nioptr missing or short */
@@ -1300,9 +1329,8 @@ out_req:
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-static int sanosc_brw_write(struct lustre_handle *conn,
-                            struct lov_stripe_md *lsm,
-                            obd_count page_count,
+static int sanosc_brw_write(struct lustre_handle *conn, struct obdo *oa,
+                            struct lov_stripe_md *lsm, obd_count page_count,
                             struct brw_page *pga)
 {
         struct ptlrpc_request *request = NULL;
                             struct brw_page *pga)
 {
         struct ptlrpc_request *request = NULL;
@@ -1326,9 +1354,11 @@ static int sanosc_brw_write(struct lustre_handle *conn,
         nioptr = lustre_msg_buf(request->rq_reqmsg, 2,
                                 sizeof (*nioptr) * page_count);
 
         nioptr = lustre_msg_buf(request->rq_reqmsg, 2,
                                 sizeof (*nioptr) * page_count);
 
-        iooptr->ioo_id = lsm->lsm_object_id;
-        iooptr->ioo_gr = 0;
-        iooptr->ioo_type = S_IFREG;
+        memcpy(&body->oa, oa, sizeof(body->oa));
+
+        iooptr->ioo_id = oa->o_id;
+        iooptr->ioo_gr = oa->o_valid & 0 ? oa->o_gr : 0;
+        iooptr->ioo_type = oa->o_mode;
         iooptr->ioo_bufcnt = page_count;
 
         /* pack request */
         iooptr->ioo_bufcnt = page_count;
 
         /* pack request */
@@ -1414,7 +1444,7 @@ out_req:
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-static int sanosc_brw(int cmd, struct lustre_handle *conn,
+static int sanosc_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
                       struct lov_stripe_md *lsm, obd_count page_count,
                       struct brw_page *pga, struct obd_trans_info *oti)
 {
                       struct lov_stripe_md *lsm, obd_count page_count,
                       struct brw_page *pga, struct obd_trans_info *oti)
 {
@@ -1430,9 +1460,9 @@ static int sanosc_brw(int cmd, struct lustre_handle *conn,
                         pages_per_brw = page_count;
 
                 if (cmd & OBD_BRW_WRITE)
                         pages_per_brw = page_count;
 
                 if (cmd & OBD_BRW_WRITE)
-                        rc = sanosc_brw_write(conn, lsm, pages_per_brw, pga);
+                        rc = sanosc_brw_write(conn, oa, lsm, pages_per_brw,pga);
                 else
                 else
-                        rc = sanosc_brw_read(conn, lsm, pages_per_brw, pga);
+                        rc = sanosc_brw_read(conn, oa, lsm, pages_per_brw, pga);
 
                 if (rc != 0)
                         RETURN(rc);
 
                 if (rc != 0)
                         RETURN(rc);
@@ -1445,7 +1475,7 @@ static int sanosc_brw(int cmd, struct lustre_handle *conn,
 #endif
 #endif
 
 #endif
 #endif
 
-static int osc_mark_page_dirty(struct lustre_handle *conn, 
+static int osc_mark_page_dirty(struct lustre_handle *conn,
                                struct lov_stripe_md *lsm, unsigned long offset)
 {
         struct client_obd *cli = &class_conn2obd(conn)->u.cli;
                                struct lov_stripe_md *lsm, unsigned long offset)
 {
         struct client_obd *cli = &class_conn2obd(conn)->u.cli;
@@ -1455,12 +1485,14 @@ static int osc_mark_page_dirty(struct lustre_handle *conn,
 
         down(&cli->cl_dirty_sem);
 
 
         down(&cli->cl_dirty_sem);
 
-        if (cli->cl_ost_can_grant && 
+#if 0
+        if (cli->cl_ost_can_grant &&
             (cli->cl_dirty + PAGE_CACHE_SIZE >= cli->cl_dirty_granted)) {
                 CDEBUG(D_INODE, "granted "LPU64" < "LPU64"\n",
                        cli->cl_dirty_granted, cli->cl_dirty + PAGE_CACHE_SIZE);
                 GOTO(out, rc = -EDQUOT);
         }
             (cli->cl_dirty + PAGE_CACHE_SIZE >= cli->cl_dirty_granted)) {
                 CDEBUG(D_INODE, "granted "LPU64" < "LPU64"\n",
                        cli->cl_dirty_granted, cli->cl_dirty + PAGE_CACHE_SIZE);
                 GOTO(out, rc = -EDQUOT);
         }
+#endif
 
         rc = ot_mark_offset(dirty_ot, offset);
         if (rc)
 
         rc = ot_mark_offset(dirty_ot, offset);
         if (rc)
@@ -1474,7 +1506,7 @@ out:
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-static int osc_clear_dirty_pages(struct lustre_handle *conn, 
+static int osc_clear_dirty_pages(struct lustre_handle *conn,
                                  struct lov_stripe_md *lsm,
                                  unsigned long start, unsigned long end,
                                  unsigned long *cleared)
                                  struct lov_stripe_md *lsm,
                                  unsigned long start, unsigned long end,
                                  unsigned long *cleared)
@@ -1526,7 +1558,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
                        struct lustre_handle *lockh)
 {
         struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
                        struct lustre_handle *lockh)
 {
         struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
-        struct obd_device *obddev = class_conn2obd(connh);
+        struct obd_device *obd = class_conn2obd(connh);
         struct ldlm_extent *extent = extentp;
         int rc;
         ENTRY;
         struct ldlm_extent *extent = extentp;
         int rc;
         ENTRY;
@@ -1537,7 +1569,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
         extent->end |= ~PAGE_MASK;
 
         /* Next, search for already existing extent locks that will cover us */
         extent->end |= ~PAGE_MASK;
 
         /* Next, search for already existing extent locks that will cover us */
-        rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_MATCH_DATA, &res_id,
+        rc = ldlm_lock_match(obd->obd_namespace, LDLM_FL_MATCH_DATA, &res_id,
                              type, extent, sizeof(extent), mode, data, lockh);
         if (rc == 1)
                 /* We already have a lock, and it's referenced */
                              type, extent, sizeof(extent), mode, data, lockh);
         if (rc == 1)
                 /* We already have a lock, and it's referenced */
@@ -1556,7 +1588,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
          * locks out from other users right now, too. */
 
         if (mode == LCK_PR) {
          * locks out from other users right now, too. */
 
         if (mode == LCK_PR) {
-                rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_MATCH_DATA,
+                rc = ldlm_lock_match(obd->obd_namespace, LDLM_FL_MATCH_DATA,
                                      &res_id, type, extent, sizeof(extent),
                                      LCK_PW, data, lockh);
                 if (rc == 1) {
                                      &res_id, type, extent, sizeof(extent),
                                      LCK_PW, data, lockh);
                 if (rc == 1) {
@@ -1570,7 +1602,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
                 }
         }
 
                 }
         }
 
-        rc = ldlm_cli_enqueue(connh, NULL, obddev->obd_namespace, parent_lock,
+        rc = ldlm_cli_enqueue(connh, NULL, obd->obd_namespace, parent_lock,
                               res_id, type, extent, sizeof(extent), mode, flags,
                               ldlm_completion_ast, callback, data, lockh);
         RETURN(rc);
                               res_id, type, extent, sizeof(extent), mode, flags,
                               ldlm_completion_ast, callback, data, lockh);
         RETURN(rc);
@@ -1581,7 +1613,7 @@ static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm,
                        int *flags, void *data, struct lustre_handle *lockh)
 {
         struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
                        int *flags, void *data, struct lustre_handle *lockh)
 {
         struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
-        struct obd_device *obddev = class_conn2obd(connh);
+        struct obd_device *obd = class_conn2obd(connh);
         struct ldlm_extent *extent = extentp;
         int rc;
         ENTRY;
         struct ldlm_extent *extent = extentp;
         int rc;
         ENTRY;
@@ -1592,7 +1624,7 @@ static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm,
         extent->end |= ~PAGE_MASK;
 
         /* Next, search for already existing extent locks that will cover us */
         extent->end |= ~PAGE_MASK;
 
         /* Next, search for already existing extent locks that will cover us */
-        rc = ldlm_lock_match(obddev->obd_namespace, *flags, &res_id, type,
+        rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id, type,
                              extent, sizeof(extent), mode, data, lockh);
         if (rc)
                 RETURN(rc);
                              extent, sizeof(extent), mode, data, lockh);
         if (rc)
                 RETURN(rc);
@@ -1601,7 +1633,7 @@ static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm,
          * VFS and page cache already protect us locally, so lots of readers/
          * writers can share a single PW lock. */
         if (mode == LCK_PR) {
          * VFS and page cache already protect us locally, so lots of readers/
          * writers can share a single PW lock. */
         if (mode == LCK_PR) {
-                rc = ldlm_lock_match(obddev->obd_namespace, *flags, &res_id,
+                rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id,
                                      type, extent, sizeof(extent), LCK_PW,
                                      data, lockh);
                 if (rc == 1) {
                                      type, extent, sizeof(extent), LCK_PW,
                                      data, lockh);
                 if (rc == 1) {
@@ -1628,22 +1660,28 @@ static int osc_cancel(struct lustre_handle *oconn, struct lov_stripe_md *md,
 static int osc_cancel_unused(struct lustre_handle *connh,
                              struct lov_stripe_md *lsm, int flags, void *opaque)
 {
 static int osc_cancel_unused(struct lustre_handle *connh,
                              struct lov_stripe_md *lsm, int flags, void *opaque)
 {
-        struct obd_device *obddev = class_conn2obd(connh);
+        struct obd_device *obd = class_conn2obd(connh);
         struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
 
         struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
 
-        return ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags,
+        return ldlm_cli_cancel_unused(obd->obd_namespace, &res_id, flags,
                                       opaque);
 }
 
                                       opaque);
 }
 
-static int osc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+static int osc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                      unsigned long max_age)
 {
         struct obd_statfs *msfs;
         struct ptlrpc_request *request;
         int rc, size = sizeof(*osfs);
         ENTRY;
 
 {
         struct obd_statfs *msfs;
         struct ptlrpc_request *request;
         int rc, size = sizeof(*osfs);
         ENTRY;
 
-        request = ptlrpc_prep_req(exp->exp_obd->u.cli.cl_import, OST_STATFS, 0, 
-                                  NULL, NULL);
+        /* We could possibly pass max_age in the request (as an absolute
+         * timestamp or a "seconds.usec ago") so the target can avoid doing
+         * extra calls into the filesystem if that isn't necessary (e.g.
+         * during mount that would help a bit).  Having relative timestamps
+         * is not so great if request processing is slow, while absolute
+         * timestamps are not ideal because they need time synchronization. */
+        request = ptlrpc_prep_req(obd->u.cli.cl_import, OST_STATFS,0,NULL,NULL);
         if (!request)
                 RETURN(-ENOMEM);
 
         if (!request)
                 RETURN(-ENOMEM);
 
@@ -1655,14 +1693,14 @@ static int osc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
                 GOTO(out, rc);
         }
 
                 GOTO(out, rc);
         }
 
-        msfs = lustre_swab_repbuf (request, 0, sizeof (*msfs),
-                                   lustre_swab_obd_statfs);
+        msfs = lustre_swab_repbuf(request, 0, sizeof(*msfs),
+                                  lustre_swab_obd_statfs);
         if (msfs == NULL) {
         if (msfs == NULL) {
-                CERROR ("Can't unpack obd_statfs\n");
-                GOTO (out, rc = -EPROTO);
+                CERROR("Can't unpack obd_statfs\n");
+                GOTO(out, rc = -EPROTO);
         }
 
         }
 
-        memcpy (osfs, msfs, sizeof (*msfs));
+        memcpy(osfs, msfs, sizeof(*osfs));
 
         EXIT;
  out:
 
         EXIT;
  out:
@@ -1717,16 +1755,16 @@ static int osc_getstripe(struct lustre_handle *conn, struct lov_stripe_md *lsm,
 static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                          void *karg, void *uarg)
 {
 static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                          void *karg, void *uarg)
 {
-        struct obd_device *obddev = class_conn2obd(conn);
+        struct obd_device *obd = class_conn2obd(conn);
         struct obd_ioctl_data *data = karg;
         int err = 0;
         ENTRY;
 
         switch (cmd) {
         case IOC_OSC_REGISTER_LOV: {
         struct obd_ioctl_data *data = karg;
         int err = 0;
         ENTRY;
 
         switch (cmd) {
         case IOC_OSC_REGISTER_LOV: {
-                if (obddev->u.cli.cl_containing_lov)
+                if (obd->u.cli.cl_containing_lov)
                         GOTO(out, err = -EALREADY);
                         GOTO(out, err = -EALREADY);
-                obddev->u.cli.cl_containing_lov = (struct obd_device *)karg;
+                obd->u.cli.cl_containing_lov = (struct obd_device *)karg;
                 GOTO(out, err);
         }
         case OBD_IOC_LOV_GET_CONFIG: {
                 GOTO(out, err);
         }
         case OBD_IOC_LOV_GET_CONFIG: {
@@ -1758,9 +1796,9 @@ static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                 desc->ld_default_stripe_size = 0;
                 desc->ld_default_stripe_offset = 0;
                 desc->ld_pattern = 0;
                 desc->ld_default_stripe_size = 0;
                 desc->ld_default_stripe_offset = 0;
                 desc->ld_pattern = 0;
-                memcpy(&desc->ld_uuid, &obddev->obd_uuid, sizeof(uuid));
+                memcpy(&desc->ld_uuid, &obd->obd_uuid, sizeof(uuid));
 
 
-                memcpy(data->ioc_inlbuf2, &obddev->obd_uuid, sizeof(uuid));
+                memcpy(data->ioc_inlbuf2, &obd->obd_uuid, sizeof(uuid));
 
                 err = copy_to_user((void *)uarg, buf, len);
                 if (err)
 
                 err = copy_to_user((void *)uarg, buf, len);
                 if (err)
@@ -1777,15 +1815,15 @@ static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                 err = osc_getstripe(conn, karg, uarg);
                 GOTO(out, err);
         case OBD_IOC_CLIENT_RECOVER:
                 err = osc_getstripe(conn, karg, uarg);
                 GOTO(out, err);
         case OBD_IOC_CLIENT_RECOVER:
-                err = ptlrpc_recover_import(obddev->u.cli.cl_import,
+                err = ptlrpc_recover_import(obd->u.cli.cl_import,
                                             data->ioc_inlbuf1);
                 GOTO(out, err);
         case IOC_OSC_SET_ACTIVE:
                                             data->ioc_inlbuf1);
                 GOTO(out, err);
         case IOC_OSC_SET_ACTIVE:
-                err = ptlrpc_set_import_active(obddev->u.cli.cl_import,
+                err = ptlrpc_set_import_active(obd->u.cli.cl_import,
                                                data->ioc_offset);
                 GOTO(out, err);
         default:
                                                data->ioc_offset);
                 GOTO(out, err);
         default:
-                CERROR ("osc_ioctl(): unrecognised ioctl %#x\n", cmd);
+                CERROR("unrecognised ioctl %#x by %s\n", cmd, current->comm);
                 GOTO(out, err = -ENOTTY);
         }
 out:
                 GOTO(out, err = -ENOTTY);
         }
 out:
@@ -1809,6 +1847,104 @@ static int osc_get_info(struct lustre_handle *conn, obd_count keylen,
         RETURN(-EINVAL);
 }
 
         RETURN(-EINVAL);
 }
 
+static int osc_set_info(struct lustre_handle *conn, obd_count keylen,
+                        void *key, obd_count vallen, void *val)
+{
+        struct ptlrpc_request *req;
+        int rc, size = keylen;
+        char *bufs[1] = {key};
+        ENTRY;
+
+        if (keylen < strlen("mds_conn") ||
+            memcmp(key, "mds_conn", strlen("mds_conn")) != 0)
+                RETURN(-EINVAL);
+
+        req = ptlrpc_prep_req(class_conn2cliimp(conn), OST_SET_INFO, 1,
+                              &size, bufs);
+        if (req == NULL)
+                RETURN(-ENOMEM);
+
+        req->rq_replen = lustre_msg_size(0, NULL);
+        rc = ptlrpc_queue_wait(req);
+        ptlrpc_req_finished(req);
+        RETURN(rc);
+}
+
+static int osc_log_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm,
+                          int count, struct llog_cookie *cookies, int flags)
+{
+        struct obd_device *obd = class_conn2obd(conn);
+        struct llog_commit_data *llcd;
+        struct client_obd *cli;
+        int rc = 0;
+        ENTRY;
+
+        cli = &obd->u.cli;
+        if ((count == 0 || cookies == NULL ||
+             memcmp(cookies, &zero_cookie, sizeof(*cookies)) == 0)) {
+                down(&cli->cl_sem);
+                if (cli->cl_llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW))
+                        GOTO(out, rc);
+
+                llcd = cli->cl_llcd;
+                GOTO(send_now, rc);
+        }
+
+        down(&cli->cl_sem);
+        llcd = cli->cl_llcd;
+        if (llcd == NULL) {
+                llcd = llcd_grab();
+                if (llcd == NULL) {
+                        CERROR("couldn't get an llcd - dropped "LPX64":%x+%u\n",
+                               cookies->lgc_lgl.lgl_oid,
+                               cookies->lgc_lgl.lgl_ogen, cookies->lgc_index);
+                        GOTO(out, rc = -ENOMEM);
+                }
+                llcd->llcd_import = cli->cl_import;
+                cli->cl_llcd = llcd;
+        }
+
+        memcpy(llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies,
+               sizeof(*cookies));
+        llcd->llcd_cookiebytes += sizeof(*cookies);
+
+        /* If we can't fit any more cookies into the page, we need to send it */
+send_now:
+        if ((PAGE_SIZE - llcd->llcd_cookiebytes < sizeof(*cookies) ||
+             flags & OBD_LLOG_FL_SENDNOW)) {
+                cli->cl_llcd = NULL;
+                llcd_send(llcd);
+        }
+out:
+        up(&cli->cl_sem);
+
+        return rc;
+}
+
+static int osc_disconnect(struct lustre_handle *conn, int flags)
+{
+        struct obd_device *obd = class_conn2obd(conn);
+
+        /* flush any remaining cancel messages out to the target */
+        if (obd->u.cli.cl_llcd)
+                osc_log_cancel(conn, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW);
+
+        return client_import_disconnect(conn, flags);
+}
+
+static int osc_log_add(struct lustre_handle *conn,
+                       struct llog_handle *cathandle,
+                       struct llog_trans_hdr *rec, struct lov_stripe_md *lsm,
+                       struct llog_cookie *logcookies, int numcookies)
+{
+        ENTRY;
+        LASSERT(logcookies && numcookies > 0);
+
+        llog_add_record(cathandle, rec, logcookies);
+
+        RETURN(1);
+}
+
 struct obd_ops osc_obd_ops = {
         o_owner:        THIS_MODULE,
         o_attach:       osc_attach,
 struct obd_ops osc_obd_ops = {
         o_owner:        THIS_MODULE,
         o_attach:       osc_attach,
@@ -1816,14 +1952,14 @@ struct obd_ops osc_obd_ops = {
         o_setup:        client_obd_setup,
         o_cleanup:      client_obd_cleanup,
         o_connect:      client_import_connect,
         o_setup:        client_obd_setup,
         o_cleanup:      client_obd_cleanup,
         o_connect:      client_import_connect,
-        o_disconnect:   client_import_disconnect,
+        o_disconnect:   osc_disconnect,
         o_statfs:       osc_statfs,
         o_packmd:       osc_packmd,
         o_unpackmd:     osc_unpackmd,
         o_create:       osc_create,
         o_destroy:      osc_destroy,
         o_getattr:      osc_getattr,
         o_statfs:       osc_statfs,
         o_packmd:       osc_packmd,
         o_unpackmd:     osc_unpackmd,
         o_create:       osc_create,
         o_destroy:      osc_destroy,
         o_getattr:      osc_getattr,
-        o_getattr_async: osc_getattr_async,
+        o_getattr_async:osc_getattr_async,
         o_setattr:      osc_setattr,
         o_open:         osc_open,
         o_close:        osc_close,
         o_setattr:      osc_setattr,
         o_open:         osc_open,
         o_close:        osc_close,
@@ -1833,14 +1969,18 @@ struct obd_ops osc_obd_ops = {
         o_enqueue:      osc_enqueue,
         o_match:        osc_match,
         o_cancel:       osc_cancel,
         o_enqueue:      osc_enqueue,
         o_match:        osc_match,
         o_cancel:       osc_cancel,
-        o_cancel_unused: osc_cancel_unused,
+        o_cancel_unused:osc_cancel_unused,
         o_iocontrol:    osc_iocontrol,
         o_get_info:     osc_get_info,
         o_iocontrol:    osc_iocontrol,
         o_get_info:     osc_get_info,
-        .o_mark_page_dirty =    osc_mark_page_dirty,
-        .o_clear_dirty_pages =  osc_clear_dirty_pages,
-        .o_last_dirty_offset =  osc_last_dirty_offset,
+        o_set_info:     osc_set_info,
+        o_log_cancel:   osc_log_cancel,
+        o_log_add:      osc_log_add,
+        o_mark_page_dirty:    osc_mark_page_dirty,
+        o_clear_dirty_pages:  osc_clear_dirty_pages,
+        o_last_dirty_offset:  osc_last_dirty_offset,
 };
 
 };
 
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 struct obd_ops sanosc_obd_ops = {
         o_owner:        THIS_MODULE,
         o_attach:       osc_attach,
 struct obd_ops sanosc_obd_ops = {
         o_owner:        THIS_MODULE,
         o_attach:       osc_attach,
@@ -1858,48 +1998,54 @@ struct obd_ops sanosc_obd_ops = {
         o_setattr:      osc_setattr,
         o_open:         osc_open,
         o_close:        osc_close,
         o_setattr:      osc_setattr,
         o_open:         osc_open,
         o_close:        osc_close,
-#ifdef __KERNEL__
         o_setup:        client_sanobd_setup,
         o_brw:          sanosc_brw,
         o_setup:        client_sanobd_setup,
         o_brw:          sanosc_brw,
-#endif
         o_punch:        osc_punch,
         o_enqueue:      osc_enqueue,
         o_match:        osc_match,
         o_cancel:       osc_cancel,
         o_cancel_unused: osc_cancel_unused,
         o_iocontrol:    osc_iocontrol,
         o_punch:        osc_punch,
         o_enqueue:      osc_enqueue,
         o_match:        osc_match,
         o_cancel:       osc_cancel,
         o_cancel_unused: osc_cancel_unused,
         o_iocontrol:    osc_iocontrol,
-        .o_mark_page_dirty =    osc_mark_page_dirty,
-        .o_clear_dirty_pages =  osc_clear_dirty_pages,
-        .o_last_dirty_offset =  osc_last_dirty_offset,
+        o_log_cancel:   osc_log_cancel,
+        o_log_add:      osc_log_add,
+        o_mark_page_dirty:   osc_mark_page_dirty,
+        o_clear_dirty_pages: osc_clear_dirty_pages,
+        o_last_dirty_offset: osc_last_dirty_offset,
 };
 };
+#endif
 
 int __init osc_init(void)
 {
 
 int __init osc_init(void)
 {
-        struct lprocfs_static_vars lvars;
+        struct lprocfs_static_vars lvars, sanlvars;
         int rc;
         ENTRY;
 
         LASSERT(sizeof(struct obd_client_handle) <= FD_OSTDATA_SIZE);
         LASSERT(sizeof(struct obd_client_handle) <= OBD_INLINESZ);
 
         int rc;
         ENTRY;
 
         LASSERT(sizeof(struct obd_client_handle) <= FD_OSTDATA_SIZE);
         LASSERT(sizeof(struct obd_client_handle) <= OBD_INLINESZ);
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(osc,&lvars);
+        lprocfs_init_vars(osc,&sanlvars);
 
         rc = class_register_type(&osc_obd_ops, lvars.module_vars,
                                  LUSTRE_OSC_NAME);
         if (rc)
                 RETURN(rc);
 
 
         rc = class_register_type(&osc_obd_ops, lvars.module_vars,
                                  LUSTRE_OSC_NAME);
         if (rc)
                 RETURN(rc);
 
-        rc = class_register_type(&sanosc_obd_ops, lvars.module_vars,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        rc = class_register_type(&sanosc_obd_ops, sanlvars.module_vars,
                                  LUSTRE_SANOSC_NAME);
         if (rc)
                 class_unregister_type(LUSTRE_OSC_NAME);
                                  LUSTRE_SANOSC_NAME);
         if (rc)
                 class_unregister_type(LUSTRE_OSC_NAME);
+#endif
 
         RETURN(rc);
 }
 
 
         RETURN(rc);
 }
 
-static void __exit osc_exit(void)
+static void /*__exit*/ osc_exit(void)
 {
 {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
         class_unregister_type(LUSTRE_SANOSC_NAME);
         class_unregister_type(LUSTRE_SANOSC_NAME);
+#endif
         class_unregister_type(LUSTRE_OSC_NAME);
 }
 
         class_unregister_type(LUSTRE_OSC_NAME);
 }
 
index e530020..49c6100 100644 (file)
@@ -6,3 +6,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
 Makefile.in
 .deps
 TAGS
+.*.cmd
index c44093c..936706d 100644 (file)
 #include <linux/lprocfs_status.h>
 
 #ifndef LPROCFS
 #include <linux/lprocfs_status.h>
 
 #ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
 #else
 #else
-struct lprocfs_vars lprocfs_obd_vars[] = {
-        { "uuid",        lprocfs_rd_uuid,   0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",            lprocfs_rd_uuid,   0, 0 },
         { 0 }
 };
 
         { 0 }
 };
 
-struct lprocfs_vars lprocfs_module_vars[] = {
-        { "num_refs",   lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",       lprocfs_rd_numrefs, 0, 0 },
         { 0 }
 };
 
 #endif /* LPROCFS */
         { 0 }
 };
 
 #endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(ost, lprocfs_module_vars, lprocfs_obd_vars)
index 023deb2..6801e92 100644 (file)
 #include <linux/lustre_export.h>
 #include <linux/init.h>
 #include <linux/lprocfs_status.h>
 #include <linux/lustre_export.h>
 #include <linux/init.h>
 #include <linux/lprocfs_status.h>
+#include <linux/lustre_commit_confd.h>
+#include <portals/list.h>
 
 
-inline void oti_init(struct obd_trans_info *oti,
-                           struct ptlrpc_request *req)
+void oti_init(struct obd_trans_info *oti, struct ptlrpc_request *req)
 {
 {
-        if(oti == NULL)
+        if (oti == NULL)
                 return;
         memset(oti, 0, sizeof *oti);
 
                 return;
         memset(oti, 0, sizeof *oti);
 
-        
         if (req->rq_repmsg && req->rq_reqmsg != 0)
                 oti->oti_transno = req->rq_repmsg->transno;
         if (req->rq_repmsg && req->rq_reqmsg != 0)
                 oti->oti_transno = req->rq_repmsg->transno;
-
-        EXIT;
 }
 
 }
 
-inline void oti_to_request(struct obd_trans_info *oti,
-                           struct ptlrpc_request *req)
+void oti_to_request(struct obd_trans_info *oti, struct ptlrpc_request *req)
 {
 {
-        int i;
         struct oti_req_ack_lock *ack_lock;
         struct oti_req_ack_lock *ack_lock;
+        int i;
 
 
-        if(oti == NULL)
+        if (oti == NULL)
                 return;
 
         if (req->rq_repmsg)
                 return;
 
         if (req->rq_repmsg)
@@ -75,7 +72,6 @@ inline void oti_to_request(struct obd_trans_info *oti,
                        sizeof(req->rq_ack_locks[i].lock));
                 req->rq_ack_locks[i].mode = ack_lock->mode;
         }
                        sizeof(req->rq_ack_locks[i].lock));
                 req->rq_ack_locks[i].mode = ack_lock->mode;
         }
-        EXIT;
 }
 
 static int ost_destroy(struct ptlrpc_request *req, struct obd_trans_info *oti)
 }
 
 static int ost_destroy(struct ptlrpc_request *req, struct obd_trans_info *oti)
@@ -85,15 +81,16 @@ static int ost_destroy(struct ptlrpc_request *req, struct obd_trans_info *oti)
         int rc, size = sizeof(*body);
         ENTRY;
 
         int rc, size = sizeof(*body);
         ENTRY;
 
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL)
         if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
+        if (body->oa.o_valid & OBD_MD_FLCOOKIE)
+                oti->oti_logcookies = obdo_logcookie(&body->oa);
         req->rq_status = obd_destroy(conn, &body->oa, NULL, oti);
         RETURN(0);
 }
         req->rq_status = obd_destroy(conn, &body->oa, NULL, oti);
         RETURN(0);
 }
@@ -105,16 +102,15 @@ static int ost_getattr(struct ptlrpc_request *req)
         int rc, size = sizeof(*body);
         ENTRY;
 
         int rc, size = sizeof(*body);
         ENTRY;
 
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL)
         if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
-        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof(*repbody));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         req->rq_status = obd_getattr(conn, &repbody->oa, NULL);
         RETURN(0);
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         req->rq_status = obd_getattr(conn, &repbody->oa, NULL);
         RETURN(0);
@@ -130,10 +126,9 @@ static int ost_statfs(struct ptlrpc_request *req)
         if (rc)
                 RETURN(rc);
 
         if (rc)
                 RETURN(rc);
 
-        osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*osfs));
-        memset(osfs, 0, size);
+        osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*osfs));
 
 
-        req->rq_status = obd_statfs(req->rq_export, osfs);
+        req->rq_status = obd_statfs(req->rq_export->exp_obd, osfs, jiffies-HZ);
         if (req->rq_status != 0)
                 CERROR("ost: statfs failed: rc %d\n", req->rq_status);
 
         if (req->rq_status != 0)
                 CERROR("ost: statfs failed: rc %d\n", req->rq_status);
 
@@ -167,16 +162,15 @@ static int ost_open(struct ptlrpc_request *req, struct obd_trans_info *oti)
         int rc, size = sizeof(*repbody);
         ENTRY;
 
         int rc, size = sizeof(*repbody);
         ENTRY;
 
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL)
         if (body == NULL)
-                return (-EFAULT);
+                RETURN(-EFAULT);
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
-        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof(*repbody));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         req->rq_status = obd_open(conn, &repbody->oa, NULL, oti, NULL);
         RETURN(0);
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         req->rq_status = obd_open(conn, &repbody->oa, NULL, oti, NULL);
         RETURN(0);
@@ -189,16 +183,15 @@ static int ost_close(struct ptlrpc_request *req, struct obd_trans_info *oti)
         int rc, size = sizeof(*repbody);
         ENTRY;
 
         int rc, size = sizeof(*repbody);
         ENTRY;
 
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL)
         if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
-        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         req->rq_status = obd_close(conn, &repbody->oa, NULL, oti);
         RETURN(0);
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         req->rq_status = obd_close(conn, &repbody->oa, NULL, oti);
         RETURN(0);
@@ -211,18 +204,19 @@ static int ost_create(struct ptlrpc_request *req, struct obd_trans_info *oti)
         int rc, size = sizeof(*repbody);
         ENTRY;
 
         int rc, size = sizeof(*repbody);
         ENTRY;
 
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL)
         if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
-        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof(*repbody));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
+        oti->oti_logcookies = obdo_logcookie(&repbody->oa);
         req->rq_status = obd_create(conn, &repbody->oa, NULL, oti);
         req->rq_status = obd_create(conn, &repbody->oa, NULL, oti);
+        //obd_log_cancel(conn, NULL, 1, oti->oti_logcookies, 0);
         RETURN(0);
 }
 
         RETURN(0);
 }
 
@@ -233,10 +227,9 @@ static int ost_punch(struct ptlrpc_request *req, struct obd_trans_info *oti)
         int rc, size = sizeof(*repbody);
         ENTRY;
 
         int rc, size = sizeof(*repbody);
         ENTRY;
 
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL)
         if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
 
         if ((body->oa.o_valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) !=
             (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))
 
         if ((body->oa.o_valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) !=
             (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))
@@ -246,7 +239,7 @@ static int ost_punch(struct ptlrpc_request *req, struct obd_trans_info *oti)
         if (rc)
                 RETURN(rc);
 
         if (rc)
                 RETURN(rc);
 
-        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         req->rq_status = obd_punch(conn, &repbody->oa, NULL, repbody->oa.o_size,
                                    repbody->oa.o_blocks, oti);
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         req->rq_status = obd_punch(conn, &repbody->oa, NULL, repbody->oa.o_size,
                                    repbody->oa.o_blocks, oti);
@@ -260,16 +253,15 @@ static int ost_setattr(struct ptlrpc_request *req, struct obd_trans_info *oti)
         int rc, size = sizeof(*repbody);
         ENTRY;
 
         int rc, size = sizeof(*repbody);
         ENTRY;
 
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL)
         if (body == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
 
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 RETURN(rc);
 
-        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody));
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
 
         req->rq_status = obd_setattr(conn, &repbody->oa, NULL, oti);
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
 
         req->rq_status = obd_setattr(conn, &repbody->oa, NULL, oti);
@@ -285,9 +277,9 @@ static int ost_bulk_timeout(void *data)
         RETURN(1);
 }
 
         RETURN(1);
 }
 
-static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo,
-                                 struct niobuf_remote *rnb, int nrnb,
-                                 struct niobuf_remote **pp_rnbp)
+static int get_per_page_niobufs(struct obd_ioobj *ioo, int nioo,
+                                struct niobuf_remote *rnb, int nrnb,
+                                struct niobuf_remote **pp_rnbp)
 {
         /* Copy a remote niobuf, splitting it into page-sized chunks
          * and setting ioo[i].ioo_bufcnt accordingly */
 {
         /* Copy a remote niobuf, splitting it into page-sized chunks
          * and setting ioo[i].ioo_bufcnt accordingly */
@@ -305,14 +297,14 @@ static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo,
                         obd_off p0 = offset >> PAGE_SHIFT;
                         obd_off pn = (offset + rnb[rnbidx].len - 1)>>PAGE_SHIFT;
 
                         obd_off p0 = offset >> PAGE_SHIFT;
                         obd_off pn = (offset + rnb[rnbidx].len - 1)>>PAGE_SHIFT;
 
-                        LASSERT (rnbidx < nrnb);
+                        LASSERT(rnbidx < nrnb);
 
                         npages += (pn + 1 - p0);
 
                         if (rnb[rnbidx].len == 0) {
                                 CERROR("zero len BRW: obj %d objid "LPX64
                                        " buf %u\n", i, ioo[i].ioo_id, j);
 
                         npages += (pn + 1 - p0);
 
                         if (rnb[rnbidx].len == 0) {
                                 CERROR("zero len BRW: obj %d objid "LPX64
                                        " buf %u\n", i, ioo[i].ioo_id, j);
-                                return (-EINVAL);
+                                return -EINVAL;
                         }
                         if (j > 0 &&
                             rnb[rnbidx].offset <= rnb[rnbidx-1].offset) {
                         }
                         if (j > 0 &&
                             rnb[rnbidx].offset <= rnb[rnbidx-1].offset) {
@@ -320,20 +312,20 @@ static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo,
                                        " buf %u offset "LPX64" <= "LPX64"\n",
                                        i, ioo[i].ioo_id, j, rnb[rnbidx].offset,
                                        rnb[rnbidx].offset);
                                        " buf %u offset "LPX64" <= "LPX64"\n",
                                        i, ioo[i].ioo_id, j, rnb[rnbidx].offset,
                                        rnb[rnbidx].offset);
-                                return (-EINVAL);
+                                return -EINVAL;
                         }
                 }
 
                         }
                 }
 
-        LASSERT (rnbidx == nrnb);
+        LASSERT(rnbidx == nrnb);
 
         if (npages == nrnb) {       /* all niobufs are for single pages */
                 *pp_rnbp = rnb;
 
         if (npages == nrnb) {       /* all niobufs are for single pages */
                 *pp_rnbp = rnb;
-                return (npages);
+                return npages;
         }
 
         }
 
-        OBD_ALLOC (pp_rnb, sizeof (*pp_rnb) * npages);
+        OBD_ALLOC(pp_rnb, sizeof(*pp_rnb) * npages);
         if (pp_rnb == NULL)
         if (pp_rnb == NULL)
-                return (-ENOMEM);
+                return -ENOMEM;
 
         /* now do the actual split */
         page = rnbidx = 0;
 
         /* now do the actual split */
         page = rnbidx = 0;
@@ -344,35 +336,35 @@ static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo,
                         obd_off off = rnb[rnbidx].offset;
                         int     nob = rnb[rnbidx].len;
 
                         obd_off off = rnb[rnbidx].offset;
                         int     nob = rnb[rnbidx].len;
 
-                        LASSERT (rnbidx < nrnb);
+                        LASSERT(rnbidx < nrnb);
                         do {
                                 obd_off  poff = off & (PAGE_SIZE - 1);
                                 int      pnob = (poff + nob > PAGE_SIZE) ?
                                                 PAGE_SIZE - poff : nob;
 
                         do {
                                 obd_off  poff = off & (PAGE_SIZE - 1);
                                 int      pnob = (poff + nob > PAGE_SIZE) ?
                                                 PAGE_SIZE - poff : nob;
 
-                                LASSERT (page < npages);
+                                LASSERT(page < npages);
                                 pp_rnb[page].len = pnob;
                                 pp_rnb[page].offset = off;
                                 pp_rnb[page].flags = rnb->flags;
 
                                 pp_rnb[page].len = pnob;
                                 pp_rnb[page].offset = off;
                                 pp_rnb[page].flags = rnb->flags;
 
-                                CDEBUG (D_PAGE, "   obj %d id "LPX64
-                                        "page %d(%d) "LPX64" for %d\n",
-                                        i, ioo[i].ioo_id, obj_pages, page,
-                                        pp_rnb[page].offset, pp_rnb[page].len);
+                                CDEBUG(D_PAGE, "   obj %d id "LPX64
+                                       "page %d(%d) "LPX64" for %d\n",
+                                       i, ioo[i].ioo_id, obj_pages, page,
+                                       pp_rnb[page].offset, pp_rnb[page].len);
                                 page++;
                                 obj_pages++;
 
                                 off += pnob;
                                 nob -= pnob;
                         } while (nob > 0);
                                 page++;
                                 obj_pages++;
 
                                 off += pnob;
                                 nob -= pnob;
                         } while (nob > 0);
-                        LASSERT (nob == 0);
+                        LASSERT(nob == 0);
                 }
                 ioo[i].ioo_bufcnt = obj_pages;
         }
                 }
                 ioo[i].ioo_bufcnt = obj_pages;
         }
-        LASSERT (page == npages);
+        LASSERT(page == npages);
 
         *pp_rnbp = pp_rnb;
 
         *pp_rnbp = pp_rnb;
-        return (npages);
+        return npages;
 }
 
 static void free_per_page_niobufs (int npages, struct niobuf_remote *pp_rnb,
 }
 
 static void free_per_page_niobufs (int npages, struct niobuf_remote *pp_rnb,
@@ -381,23 +373,19 @@ static void free_per_page_niobufs (int npages, struct niobuf_remote *pp_rnb,
         if (pp_rnb == rnb)                      /* didn't allocate above */
                 return;
 
         if (pp_rnb == rnb)                      /* didn't allocate above */
                 return;
 
-        OBD_FREE (pp_rnb, sizeof (*pp_rnb) * npages);
+        OBD_FREE(pp_rnb, sizeof(*pp_rnb) * npages);
 }
 
 #if CHECKSUM_BULK
 __u64 ost_checksum_bulk (struct ptlrpc_bulk_desc *desc)
 {
         __u64             cksum = 0;
 }
 
 #if CHECKSUM_BULK
 __u64 ost_checksum_bulk (struct ptlrpc_bulk_desc *desc)
 {
         __u64             cksum = 0;
-        struct list_head *tmp;
-        char             *ptr;
+        struct ptlrpc_bulk_page *bp;
 
 
-        list_for_each (tmp, &desc->bd_page_list) {
-                struct ptlrpc_bulk_page *bp;
-
-                bp = list_entry (tmp, struct ptlrpc_bulk_page, bp_link);
-                ptr = kmap (bp->bp_page);
-                ost_checksum (&cksum, ptr + bp->bp_pageoffset, bp->bp_buflen);
-                kunmap (bp->bp_page);
+        list_for_each_entry(bp, &desc->bd_page_list, bp_link) {
+                ost_checksum(&cksum, kmap(bp->bp_page) + bp->bp_pageoffset,
+                             bp->bp_buflen);
+                kunmap(bp->bp_page);
         }
 }
 #endif
         }
 }
 #endif
@@ -409,9 +397,9 @@ static int ost_brw_read(struct ptlrpc_request *req)
         struct niobuf_remote    *pp_rnb;
         struct niobuf_local     *local_nb;
         struct obd_ioobj        *ioo;
         struct niobuf_remote    *pp_rnb;
         struct niobuf_local     *local_nb;
         struct obd_ioobj        *ioo;
-        struct ost_body         *body;
+        struct ost_body         *body, *repbody;
         struct l_wait_info       lwi;
         struct l_wait_info       lwi;
-        void                    *desc_priv = NULL;
+        struct obd_trans_info    oti = { 0 };
         int                      size[1] = { sizeof(*body) };
         int                      comms_error = 0;
         int                      niocount;
         int                      size[1] = { sizeof(*body) };
         int                      comms_error = 0;
         int                      niocount;
@@ -426,35 +414,36 @@ static int ost_brw_read(struct ptlrpc_request *req)
 
         body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL) {
 
         body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL) {
-                CERROR ("Missing/short ost_body\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short ost_body\n");
+                GOTO(out, rc = -EFAULT);
         }
 
         }
 
-        ioo = lustre_swab_reqbuf (req, 1, sizeof (*ioo),
-                                  lustre_swab_obd_ioobj);
+        ioo = lustre_swab_reqbuf(req, 1, sizeof(*ioo), lustre_swab_obd_ioobj);
         if (ioo == NULL) {
         if (ioo == NULL) {
-                CERROR ("Missing/short ioobj\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short ioobj\n");
+                GOTO(out, rc = -EFAULT);
         }
 
         niocount = ioo->ioo_bufcnt;
         }
 
         niocount = ioo->ioo_bufcnt;
-        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb),
+        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof(*remote_nb),
                                        lustre_swab_niobuf_remote);
         if (remote_nb == NULL) {
                                        lustre_swab_niobuf_remote);
         if (remote_nb == NULL) {
-                CERROR ("Missing/short niobuf\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short niobuf\n");
+                GOTO(out, rc = -EFAULT);
         }
         }
-        if (lustre_msg_swabbed (req->rq_reqmsg)) { /* swab remaining niobufs */
+        if (lustre_msg_swabbed(req->rq_reqmsg)) { /* swab remaining niobufs */
                 for (i = 1; i < niocount; i++)
                         lustre_swab_niobuf_remote (&remote_nb[i]);
         }
 
                 for (i = 1; i < niocount; i++)
                         lustre_swab_niobuf_remote (&remote_nb[i]);
         }
 
+        size[0] = sizeof(*body);
         rc = lustre_pack_msg(1, size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 GOTO(out, rc);
 
         rc = lustre_pack_msg(1, size, NULL, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 GOTO(out, rc);
 
+        /* FIXME all niobuf splitting should be done in obdfilter if needed */
         /* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */
         /* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */
-        npages = get_per_page_niobufs (ioo, 1, remote_nb, niocount, &pp_rnb);
+        npages = get_per_page_niobufs(ioo, 1, remote_nb, niocount, &pp_rnb);
         if (npages < 0)
                 GOTO(out, rc = npages);
 
         if (npages < 0)
                 GOTO(out, rc = npages);
 
@@ -462,12 +451,12 @@ static int ost_brw_read(struct ptlrpc_request *req)
         if (local_nb == NULL)
                 GOTO(out_pp_rnb, rc = -ENOMEM);
 
         if (local_nb == NULL)
                 GOTO(out_pp_rnb, rc = -ENOMEM);
 
-        desc = ptlrpc_prep_bulk_exp (req, BULK_PUT_SOURCE, OST_BULK_PORTAL);
+        desc = ptlrpc_prep_bulk_exp(req, BULK_PUT_SOURCE, OST_BULK_PORTAL);
         if (desc == NULL)
                 GOTO(out_local, rc = -ENOMEM);
 
         if (desc == NULL)
                 GOTO(out_local, rc = -ENOMEM);
 
-        rc = obd_preprw(OBD_BRW_READ, req->rq_export, NULL, 1, ioo, npages,
-                        pp_rnb, local_nb, &desc_priv, NULL);
+        rc = obd_preprw(OBD_BRW_READ, req->rq_export, &body->oa, 1,
+                        ioo, npages, pp_rnb, local_nb, &oti);
         if (rc != 0)
                 GOTO(out_bulk, rc);
 
         if (rc != 0)
                 GOTO(out_bulk, rc);
 
@@ -480,7 +469,7 @@ static int ost_brw_read(struct ptlrpc_request *req)
                         break;
                 }
 
                         break;
                 }
 
-                LASSERT (page_rc <= pp_rnb[i].len);
+                LASSERT(page_rc <= pp_rnb[i].len);
                 nob += page_rc;
                 if (page_rc != 0) {             /* some data! */
                         LASSERT (local_nb[i].page != NULL);
                 nob += page_rc;
                 if (page_rc != 0) {             /* some data! */
                         LASSERT (local_nb[i].page != NULL);
@@ -493,8 +482,8 @@ static int ost_brw_read(struct ptlrpc_request *req)
 
                 if (page_rc != pp_rnb[i].len) { /* short read */
                         /* All subsequent pages should be 0 */
 
                 if (page_rc != pp_rnb[i].len) { /* short read */
                         /* All subsequent pages should be 0 */
-                        while (++i < npages)
-                                LASSERT (local_nb[i].rc == 0);
+                        while(++i < npages)
+                                LASSERT(local_nb[i].rc == 0);
                         break;
                 }
         }
                         break;
                 }
         }
@@ -509,7 +498,7 @@ static int ost_brw_read(struct ptlrpc_request *req)
                         if (rc) {
                                 LASSERT(rc == -ETIMEDOUT);
                                 CERROR ("timeout waiting for bulk PUT\n");
                         if (rc) {
                                 LASSERT(rc == -ETIMEDOUT);
                                 CERROR ("timeout waiting for bulk PUT\n");
-                                ptlrpc_abort_bulk (desc);
+                                ptlrpc_abort_bulk(desc);
                         }
                 } else {
                         CERROR("ptlrpc_bulk_put failed RC: %d\n", rc);
                         }
                 } else {
                         CERROR("ptlrpc_bulk_put failed RC: %d\n", rc);
@@ -518,25 +507,27 @@ static int ost_brw_read(struct ptlrpc_request *req)
         }
 
         /* Must commit after prep above in all cases */
         }
 
         /* Must commit after prep above in all cases */
-        rc = obd_commitrw(OBD_BRW_READ, req->rq_export, 1, ioo, npages,
-                          local_nb, desc_priv, NULL);
+        rc = obd_commitrw(OBD_BRW_READ, req->rq_export, &body->oa, 1,
+                          ioo, npages, local_nb, &oti);
+
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
+        memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa));
 
 #if CHECKSUM_BULK
         if (rc == 0) {
 
 #if CHECKSUM_BULK
         if (rc == 0) {
-                body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
-                body->oa.o_rdev = ost_checksum_bulk (desc);
-                body->oa.o_valid |= OBD_MD_FLCKSUM;
+                repbody->oa.o_rdev = ost_checksum_bulk(desc);
+                repbody->oa.o_valid |= OBD_MD_FLCKSUM;
         }
 #endif
 
  out_bulk:
         }
 #endif
 
  out_bulk:
-        ptlrpc_free_bulk (desc);
+        ptlrpc_free_bulk(desc);
  out_local:
         OBD_FREE(local_nb, sizeof(*local_nb) * npages);
  out_pp_rnb:
  out_local:
         OBD_FREE(local_nb, sizeof(*local_nb) * npages);
  out_pp_rnb:
-        free_per_page_niobufs (npages, pp_rnb, remote_nb);
+        free_per_page_niobufs(npages, pp_rnb, remote_nb);
  out:
  out:
-        LASSERT (rc <= 0);
+        LASSERT(rc <= 0);
         if (rc == 0) {
                 req->rq_status = nob;
                 ptlrpc_reply(req);
         if (rc == 0) {
                 req->rq_status = nob;
                 ptlrpc_reply(req);
@@ -547,7 +538,7 @@ static int ost_brw_read(struct ptlrpc_request *req)
         } else {
                 if (req->rq_repmsg != NULL) {
                         /* reply out callback would free */
         } else {
                 if (req->rq_repmsg != NULL) {
                         /* reply out callback would free */
-                        OBD_FREE (req->rq_repmsg, req->rq_replen);
+                        OBD_FREE(req->rq_repmsg, req->rq_replen);
                 }
                 CERROR("bulk IO comms error: evicting %s@%s nid "LPU64"\n",
                        req->rq_export->exp_client_uuid.uuid,
                 }
                 CERROR("bulk IO comms error: evicting %s@%s nid "LPU64"\n",
                        req->rq_export->exp_client_uuid.uuid,
@@ -566,11 +557,10 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
         struct niobuf_remote    *pp_rnb;
         struct niobuf_local     *local_nb;
         struct obd_ioobj        *ioo;
         struct niobuf_remote    *pp_rnb;
         struct niobuf_local     *local_nb;
         struct obd_ioobj        *ioo;
-        struct ost_body         *body;
+        struct ost_body         *body, *repbody;
         struct l_wait_info       lwi;
         struct l_wait_info       lwi;
-        void                    *desc_priv = NULL;
         __u32                   *rcs;
         __u32                   *rcs;
-        int                      size[2] = { sizeof (*body) };
+        int                      size[2] = { sizeof(*body) };
         int                      objcount, niocount, npages;
         int                      comms_error = 0;
         int                      rc, rc2, swab, i, j;
         int                      objcount, niocount, npages;
         int                      comms_error = 0;
         int                      rc, rc2, swab, i, j;
@@ -580,39 +570,38 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 GOTO(out, rc = -EIO);
 
         /* pause before transaction has been started */
                 GOTO(out, rc = -EIO);
 
         /* pause before transaction has been started */
-        OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE, 
+        OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE,
                          obd_timeout +1);
 
                          obd_timeout +1);
 
-        swab = lustre_msg_swabbed (req->rq_reqmsg);
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        swab = lustre_msg_swabbed(req->rq_reqmsg);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL) {
         if (body == NULL) {
-                CERROR ("Missing/short ost_body\n");
+                CERROR("Missing/short ost_body\n");
                 GOTO(out, rc = -EFAULT);
         }
 
                 GOTO(out, rc = -EFAULT);
         }
 
-        LASSERT_REQSWAB (req, 1);
+        LASSERT_REQSWAB(req, 1);
         objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
         if (objcount == 0) {
         objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
         if (objcount == 0) {
-                CERROR ("Missing/short ioobj\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short ioobj\n");
+                GOTO(out, rc = -EFAULT);
         }
         }
-        ioo = lustre_msg_buf (req->rq_reqmsg, 1, objcount * sizeof (*ioo));
+        ioo = lustre_msg_buf (req->rq_reqmsg, 1, objcount * sizeof(*ioo));
         LASSERT (ioo != NULL);
         for (niocount = i = 0; i < objcount; i++) {
                 if (swab)
                         lustre_swab_obd_ioobj (&ioo[i]);
                 if (ioo[i].ioo_bufcnt == 0) {
         LASSERT (ioo != NULL);
         for (niocount = i = 0; i < objcount; i++) {
                 if (swab)
                         lustre_swab_obd_ioobj (&ioo[i]);
                 if (ioo[i].ioo_bufcnt == 0) {
-                        CERROR ("ioo[%d] has zero bufcnt\n", i);
-                        GOTO (out, rc = -EFAULT);
+                        CERROR("ioo[%d] has zero bufcnt\n", i);
+                        GOTO(out, rc = -EFAULT);
                 }
                 niocount += ioo[i].ioo_bufcnt;
         }
 
                 }
                 niocount += ioo[i].ioo_bufcnt;
         }
 
-        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb),
+        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof(*remote_nb),
                                        lustre_swab_niobuf_remote);
         if (remote_nb == NULL) {
                                        lustre_swab_niobuf_remote);
         if (remote_nb == NULL) {
-                CERROR ("Missing/short niobuf\n");
+                CERROR("Missing/short niobuf\n");
                 GOTO(out, rc = -EFAULT);
         }
         if (swab) {                             /* swab the remaining niobufs */
                 GOTO(out, rc = -EFAULT);
         }
         if (swab) {                             /* swab the remaining niobufs */
@@ -620,30 +609,31 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                         lustre_swab_niobuf_remote (&remote_nb[i]);
         }
 
                         lustre_swab_niobuf_remote (&remote_nb[i]);
         }
 
-        size[1] = niocount * sizeof (*rcs);
+        size[1] = niocount * sizeof(*rcs);
         rc = lustre_pack_msg(2, size, NULL, &req->rq_replen,
                              &req->rq_repmsg);
         if (rc != 0)
         rc = lustre_pack_msg(2, size, NULL, &req->rq_replen,
                              &req->rq_repmsg);
         if (rc != 0)
-                GOTO (out, rc);
-        rcs = lustre_msg_buf (req->rq_repmsg, 1, niocount * sizeof (*rcs));
+                GOTO(out, rc);
+        rcs = lustre_msg_buf(req->rq_repmsg, 1, niocount * sizeof(*rcs));
 
 
+        /* FIXME all niobuf splitting should be done in obdfilter if needed */
         /* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */
         npages = get_per_page_niobufs(ioo, objcount,remote_nb,niocount,&pp_rnb);
         if (npages < 0)
         /* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */
         npages = get_per_page_niobufs(ioo, objcount,remote_nb,niocount,&pp_rnb);
         if (npages < 0)
-                GOTO (out, rc = npages);
+                GOTO(out, rc = npages);
 
         OBD_ALLOC(local_nb, sizeof(*local_nb) * npages);
         if (local_nb == NULL)
                 GOTO(out_pp_rnb, rc = -ENOMEM);
 
 
         OBD_ALLOC(local_nb, sizeof(*local_nb) * npages);
         if (local_nb == NULL)
                 GOTO(out_pp_rnb, rc = -ENOMEM);
 
-        desc = ptlrpc_prep_bulk_exp (req, BULK_GET_SINK, OST_BULK_PORTAL);
+        desc = ptlrpc_prep_bulk_exp(req, BULK_GET_SINK, OST_BULK_PORTAL);
         if (desc == NULL)
                 GOTO(out_local, rc = -ENOMEM);
 
         if (desc == NULL)
                 GOTO(out_local, rc = -ENOMEM);
 
-        rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, NULL, objcount, ioo,
-                        npages, pp_rnb, local_nb, &desc_priv, oti);
+        rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, &body->oa, objcount,
+                        ioo, npages, pp_rnb, local_nb, oti);
         if (rc != 0)
         if (rc != 0)
-                GOTO (out_bulk, rc);
+                GOTO(out_bulk, rc);
 
         /* NB Having prepped, we must commit... */
 
 
         /* NB Having prepped, we must commit... */
 
@@ -664,8 +654,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                                           ptlrpc_bulk_complete(desc), &lwi);
                         if (rc) {
                                 LASSERT(rc == -ETIMEDOUT);
                                           ptlrpc_bulk_complete(desc), &lwi);
                         if (rc) {
                                 LASSERT(rc == -ETIMEDOUT);
-                                CERROR ("timeout waiting for bulk GET\n");
-                                ptlrpc_abort_bulk (desc);
+                                CERROR("timeout waiting for bulk GET\n");
+                                ptlrpc_abort_bulk(desc);
                         }
                 } else {
                        CERROR("ptlrpc_bulk_get failed RC: %d\n", rc);
                         }
                 } else {
                        CERROR("ptlrpc_bulk_get failed RC: %d\n", rc);
@@ -673,17 +663,21 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                comms_error = rc != 0;
         }
 
                comms_error = rc != 0;
         }
 
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
+        memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa));
+
 #if CHECKSUM_BULK
         if (rc == 0 && (body->oa.o_valid & OBD_MD_FLCKSUM) != 0) {
                 static int cksum_counter;
                 __u64 client_cksum = body->oa.o_rdev;
 #if CHECKSUM_BULK
         if (rc == 0 && (body->oa.o_valid & OBD_MD_FLCKSUM) != 0) {
                 static int cksum_counter;
                 __u64 client_cksum = body->oa.o_rdev;
-                __u64 cksum = ost_checksum_bulk (desc);
+                __u64 cksum = ost_checksum_bulk(desc);
 
                 if (client_cksum != cksum) {
                         CERROR("Bad checksum: client "LPX64", server "LPX64
                                ", client NID "LPX64"\n", client_cksum, cksum,
                                req->rq_connection->c_peer.peer_nid);
                         cksum_counter = 1;
 
                 if (client_cksum != cksum) {
                         CERROR("Bad checksum: client "LPX64", server "LPX64
                                ", client NID "LPX64"\n", client_cksum, cksum,
                                req->rq_connection->c_peer.peer_nid);
                         cksum_counter = 1;
+                        repbody->oa.o_rdev = cksum;
                 } else {
                         cksum_counter++;
                         if ((cksum_counter & (-cksum_counter)) == cksum_counter)
                 } else {
                         cksum_counter++;
                         if ((cksum_counter & (-cksum_counter)) == cksum_counter)
@@ -695,8 +689,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
         }
 #endif
         /* Must commit after prep above in all cases */
         }
 #endif
         /* Must commit after prep above in all cases */
-        rc2 = obd_commitrw(OBD_BRW_WRITE, req->rq_export, objcount, ioo,
-                           npages, local_nb, desc_priv, oti);
+        rc2 = obd_commitrw(OBD_BRW_WRITE, req->rq_export, &repbody->oa,
+                           objcount, ioo, npages, local_nb, oti);
 
         if (rc == 0) {
                 /* set per-requested niobuf return codes */
 
         if (rc == 0) {
                 /* set per-requested niobuf return codes */
@@ -705,25 +699,25 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
 
                         rcs[i] = 0;
                         do {
 
                         rcs[i] = 0;
                         do {
-                                LASSERT (j < npages);
+                                LASSERT(j < npages);
                                 if (local_nb[j].rc < 0)
                                         rcs[i] = local_nb[j].rc;
                                 nob -= pp_rnb[j].len;
                                 j++;
                         } while (nob > 0);
                                 if (local_nb[j].rc < 0)
                                         rcs[i] = local_nb[j].rc;
                                 nob -= pp_rnb[j].len;
                                 j++;
                         } while (nob > 0);
-                        LASSERT (nob == 0);
+                        LASSERT(nob == 0);
                 }
                 }
-                LASSERT (j == npages);
+                LASSERT(j == npages);
         }
         if (rc == 0)
                 rc = rc2;
 
  out_bulk:
         }
         if (rc == 0)
                 rc = rc2;
 
  out_bulk:
-        ptlrpc_free_bulk (desc);
+        ptlrpc_free_bulk(desc);
  out_local:
         OBD_FREE(local_nb, sizeof(*local_nb) * npages);
  out_pp_rnb:
  out_local:
         OBD_FREE(local_nb, sizeof(*local_nb) * npages);
  out_pp_rnb:
-        free_per_page_niobufs (npages, pp_rnb, remote_nb);
+        free_per_page_niobufs(npages, pp_rnb, remote_nb);
  out:
         if (rc == 0) {
                 oti_to_request(oti, req);
  out:
         if (rc == 0) {
                 oti_to_request(oti, req);
@@ -748,10 +742,9 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
 
 static int ost_san_brw(struct ptlrpc_request *req, int cmd)
 {
 
 static int ost_san_brw(struct ptlrpc_request *req, int cmd)
 {
-        struct lustre_handle *conn = &req->rq_reqmsg->handle;
         struct niobuf_remote *remote_nb, *res_nb;
         struct obd_ioobj *ioo;
         struct niobuf_remote *remote_nb, *res_nb;
         struct obd_ioobj *ioo;
-        struct ost_body *body;
+        struct ost_body *body, *repbody;
         int rc, i, j, objcount, niocount, size[2] = {sizeof(*body)};
         int n;
         int swab;
         int rc, i, j, objcount, niocount, size[2] = {sizeof(*body)};
         int n;
         int swab;
@@ -759,19 +752,17 @@ static int ost_san_brw(struct ptlrpc_request *req, int cmd)
 
         /* XXX not set to use latest protocol */
 
 
         /* XXX not set to use latest protocol */
 
-        swab = lustre_msg_swabbed (req->rq_reqmsg);
-        body = lustre_swab_reqbuf (req, 0, sizeof (*body),
-                                   lustre_swab_ost_body);
+        swab = lustre_msg_swabbed(req->rq_reqmsg);
+        body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
         if (body == NULL) {
         if (body == NULL) {
-                CERROR ("Missing/short ost_body\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short ost_body\n");
+                GOTO(out, rc = -EFAULT);
         }
 
         }
 
-        ioo = lustre_swab_reqbuf(req, 1, sizeof (*ioo),
-                                 lustre_swab_obd_ioobj);
+        ioo = lustre_swab_reqbuf(req, 1, sizeof(*ioo), lustre_swab_obd_ioobj);
         if (ioo == NULL) {
         if (ioo == NULL) {
-                CERROR ("Missing/short ioobj\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short ioobj\n");
+                GOTO(out, rc = -EFAULT);
         }
         objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
         niocount = ioo[0].ioo_bufcnt;
         }
         objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
         niocount = ioo[0].ioo_bufcnt;
@@ -781,11 +772,11 @@ static int ost_san_brw(struct ptlrpc_request *req, int cmd)
                 niocount += ioo[i].ioo_bufcnt;
         }
 
                 niocount += ioo[i].ioo_bufcnt;
         }
 
-        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb),
+        remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof(*remote_nb),
                                        lustre_swab_niobuf_remote);
         if (remote_nb == NULL) {
                                        lustre_swab_niobuf_remote);
         if (remote_nb == NULL) {
-                CERROR ("Missing/short niobuf\n");
-                GOTO (out, rc = -EFAULT);
+                CERROR("Missing/short niobuf\n");
+                GOTO(out, rc = -EFAULT);
         }
         if (swab) {                             /* swab the remaining niobufs */
                 for (i = 1; i < niocount; i++)
         }
         if (swab) {                             /* swab the remaining niobufs */
                 for (i = 1; i < niocount; i++)
@@ -814,14 +805,17 @@ static int ost_san_brw(struct ptlrpc_request *req, int cmd)
         if (rc)
                 GOTO(out, rc);
 
         if (rc)
                 GOTO(out, rc);
 
-        req->rq_status = obd_san_preprw(cmd, conn, objcount, ioo,
-                                        niocount, remote_nb);
+        req->rq_status = obd_san_preprw(cmd, req->rq_export, &body->oa,
+                                        objcount, ioo, niocount, remote_nb);
 
         if (req->rq_status)
 
         if (req->rq_status)
-                GOTO (out, rc = 0);
+                GOTO(out, rc = 0);
+
+        repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
+        memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
 
         res_nb = lustre_msg_buf(req->rq_repmsg, 1, size[1]);
 
         res_nb = lustre_msg_buf(req->rq_repmsg, 1, size[1]);
-        memcpy (res_nb, remote_nb, size[1]);
+        memcpy(res_nb, remote_nb, size[1]);
         rc = 0;
 out:
         if (rc) {
         rc = 0;
 out:
         if (rc) {
@@ -835,6 +829,57 @@ out:
         return rc;
 }
 
         return rc;
 }
 
+static int ost_log_cancel(struct ptlrpc_request *req)
+{
+        struct lustre_handle *conn;
+        struct llog_cookie *logcookies;
+        int num_cookies, rc = 0;
+        ENTRY;
+
+        logcookies = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*logcookies));
+        if (logcookies == NULL) {
+                DEBUG_REQ(D_HA, req, "no cookies sent");
+                RETURN(-EFAULT);
+        }
+        num_cookies = req->rq_reqmsg->buflens[0] / sizeof(*logcookies);
+
+        /* workaround until we don't need to send replies */
+        rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
+                RETURN(rc);
+        req->rq_repmsg->status = 0;
+        /* end workaround */
+
+        conn = (struct lustre_handle *)&req->rq_reqmsg->handle;
+        rc = obd_log_cancel(conn, NULL, num_cookies, logcookies, 0);
+
+        RETURN(rc);
+}
+
+static int ost_set_info(struct ptlrpc_request *req)
+{
+        struct lustre_handle *conn;
+        char *key;
+        int keylen, rc = 0;
+        ENTRY;
+
+        key = lustre_msg_buf(req->rq_reqmsg, 0, 1);
+        if (key == NULL) {
+                DEBUG_REQ(D_HA, req, "no set_info key");
+                RETURN(-EFAULT);
+        }
+        keylen = req->rq_reqmsg->buflens[0];
+
+        rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
+                RETURN(rc);
+
+        conn = (struct lustre_handle *)&req->rq_reqmsg->handle;
+        rc = obd_set_info(conn, keylen, key, 0, NULL);
+        req->rq_repmsg->status = 0;
+        RETURN(rc);
+}
+
 static int filter_recovery_request(struct ptlrpc_request *req,
                                    struct obd_device *obd, int *process)
 {
 static int filter_recovery_request(struct ptlrpc_request *req,
                                    struct obd_device *obd, int *process)
 {
@@ -850,9 +895,10 @@ static int filter_recovery_request(struct ptlrpc_request *req,
         case OST_DESTROY:
         case OST_OPEN:
         case OST_PUNCH:
         case OST_DESTROY:
         case OST_OPEN:
         case OST_PUNCH:
-        case OST_SETATTR: 
+        case OST_SETATTR:
         case OST_SYNCFS:
         case OST_WRITE:
         case OST_SYNCFS:
         case OST_WRITE:
+        case OBD_LOG_CANCEL:
         case LDLM_ENQUEUE:
                 *process = target_queue_recovery_request(req, obd);
                 RETURN(0);
         case LDLM_ENQUEUE:
                 *process = target_queue_recovery_request(req, obd);
                 RETURN(0);
@@ -881,7 +927,7 @@ static int ost_handle(struct ptlrpc_request *req)
                 int abort_recovery, recovering;
 
                 if (req->rq_export == NULL) {
                 int abort_recovery, recovering;
 
                 if (req->rq_export == NULL) {
-                        CERROR("lustre_ost: operation %d on unconnected OST\n",
+                        CDEBUG(D_HA, "operation %d on unconnected OST\n",
                                req->rq_reqmsg->opc);
                         req->rq_status = -ENOTCONN;
                         GOTO(out, rc = -ENOTCONN);
                                req->rq_reqmsg->opc);
                         req->rq_status = -ENOTCONN;
                         GOTO(out, rc = -ENOTCONN);
@@ -901,7 +947,7 @@ static int ost_handle(struct ptlrpc_request *req)
                         if (rc || !should_process)
                                 RETURN(rc);
                 }
                         if (rc || !should_process)
                                 RETURN(rc);
                 }
-        } 
+        }
 
         if (strcmp(req->rq_obd->obd_type->typ_name, "ost") != 0)
                 GOTO(out, rc = -EINVAL);
 
         if (strcmp(req->rq_obd->obd_type->typ_name, "ost") != 0)
                 GOTO(out, rc = -EINVAL);
@@ -988,10 +1034,18 @@ static int ost_handle(struct ptlrpc_request *req)
                 OBD_FAIL_RETURN(OBD_FAIL_OST_SYNCFS_NET, 0);
                 rc = ost_syncfs(req);
                 break;
                 OBD_FAIL_RETURN(OBD_FAIL_OST_SYNCFS_NET, 0);
                 rc = ost_syncfs(req);
                 break;
+        case OST_SET_INFO:
+                DEBUG_REQ(D_INODE, req, "set_info");
+                rc = ost_set_info(req);
         case OBD_PING:
                 DEBUG_REQ(D_INODE, req, "ping");
                 rc = target_handle_ping(req);
                 break;
         case OBD_PING:
                 DEBUG_REQ(D_INODE, req, "ping");
                 rc = target_handle_ping(req);
                 break;
+        case OBD_LOG_CANCEL:
+                CDEBUG(D_INODE, "log cancel\n");
+                OBD_FAIL_RETURN(OBD_FAIL_OBD_LOG_CANCEL_NET, 0);
+                rc = ost_log_cancel(req);
+                break;
         case LDLM_ENQUEUE:
                 CDEBUG(D_INODE, "enqueue\n");
                 OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
         case LDLM_ENQUEUE:
                 CDEBUG(D_INODE, "enqueue\n");
                 OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
@@ -1058,17 +1112,22 @@ out:
 static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
 {
         struct ost_obd *ost = &obddev->u.ost;
 static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
 {
         struct ost_obd *ost = &obddev->u.ost;
-        int err;
-        int i;
+        int err, i;
         ENTRY;
 
         ENTRY;
 
+#ifdef ENABLE_ORPHANS
+        err = llog_start_commit_thread();
+        if (err < 0)
+                RETURN(err);
+#endif
+
         ost->ost_service = ptlrpc_init_svc(OST_NEVENTS, OST_NBUFS,
                                            OST_BUFSIZE, OST_MAXREQSIZE,
                                            OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
                                            ost_handle, "ost", obddev);
         if (!ost->ost_service) {
                 CERROR("failed to start service\n");
         ost->ost_service = ptlrpc_init_svc(OST_NEVENTS, OST_NBUFS,
                                            OST_BUFSIZE, OST_MAXREQSIZE,
                                            OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
                                            ost_handle, "ost", obddev);
         if (!ost->ost_service) {
                 CERROR("failed to start service\n");
-                GOTO(error_disc, err = -ENOMEM);
+                RETURN(-ENOMEM);
         }
 
         for (i = 0; i < OST_NUM_THREADS; i++) {
         }
 
         for (i = 0; i < OST_NUM_THREADS; i++) {
@@ -1077,17 +1136,14 @@ static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
                 err = ptlrpc_start_thread(obddev, ost->ost_service, name);
                 if (err) {
                         CERROR("error starting thread #%d: rc %d\n", i, err);
                 err = ptlrpc_start_thread(obddev, ost->ost_service, name);
                 if (err) {
                         CERROR("error starting thread #%d: rc %d\n", i, err);
-                        GOTO(error_disc, err = -EINVAL);
+                        RETURN(-EINVAL);
                 }
         }
 
         RETURN(0);
                 }
         }
 
         RETURN(0);
-
-error_disc:
-        RETURN(err);
 }
 
 }
 
-static int ost_cleanup(struct obd_device *obddev, int force, int failover)
+static int ost_cleanup(struct obd_device *obddev, int flags)
 {
         struct ost_obd *ost = &obddev->u.ost;
         int err = 0;
 {
         struct ost_obd *ost = &obddev->u.ost;
         int err = 0;
@@ -1106,7 +1162,7 @@ int ost_attach(struct obd_device *dev, obd_count len, void *data)
 {
         struct lprocfs_static_vars lvars;
 
 {
         struct lprocfs_static_vars lvars;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(ost,&lvars);
         return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
         return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
@@ -1115,7 +1171,7 @@ int ost_detach(struct obd_device *dev)
         return lprocfs_obd_detach(dev);
 }
 
         return lprocfs_obd_detach(dev);
 }
 
-/* I don't think this function is ever used, since nothing 
+/* I don't think this function is ever used, since nothing
  * connects directly to this module.
  */
 static int ost_connect(struct lustre_handle *conn,
  * connects directly to this module.
  */
 static int ost_connect(struct lustre_handle *conn,
@@ -1153,12 +1209,12 @@ static int __init ost_init(void)
         struct lprocfs_static_vars lvars;
         ENTRY;
 
         struct lprocfs_static_vars lvars;
         ENTRY;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(ost,&lvars);
         RETURN(class_register_type(&ost_obd_ops, lvars.module_vars,
                                    LUSTRE_OST_NAME));
 }
 
         RETURN(class_register_type(&ost_obd_ops, lvars.module_vars,
                                    LUSTRE_OST_NAME));
 }
 
-static void __exit ost_exit(void)
+static void /*__exit*/ ost_exit(void)
 {
         class_unregister_type(LUSTRE_OST_NAME);
 }
 {
         class_unregister_type(LUSTRE_OST_NAME);
 }
index 99ac885..c1a9bdf 100644 (file)
@@ -6,3 +6,4 @@ autom4te.cache
 config.log
 config.status
 configure
 config.log
 config.status
 configure
+.*.o.cmd
index 29a713f..7a48c58 100644 (file)
@@ -1 +1,6 @@
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+# portals/utils/debug.c wants <linux/version.h> from userspace.  sigh.
+HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS)
+LIBREADLINE := @LIBREADLINE@
+# 2.5's makefiles aren't nice to cross dir libraries in host programs
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
index 29a713f..7c66dfa 100644 (file)
@@ -1 +1,4 @@
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+HOSTCFLAGS := $(EXTRA_CFLAGS)
+# the kernel doesn't want us to build archives for host binaries :/
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
index be0e51a..73a19df 100644 (file)
@@ -1,6 +1,12 @@
-include fs/lustre/portals/Kernelenv
+include $(src)/Kernelenv
 
 
-obj-y += portals/
+# The ordering of these determines the order that each subsystem's 
+# module_init() functions are called in.  if these are changed make sure
+# they reflect the dependencies between each subsystem's _init functions.
 obj-y += libcfs/
 obj-y += libcfs/
-obj-y += knals/
+obj-y += portals/
 obj-y += router/
 obj-y += router/
+obj-y += knals/
+obj-y += tests/
+
+obj-m += utils/
index 7a4e05c..1a7741b 100644 (file)
@@ -11,8 +11,13 @@ AC_ARG_WITH(lib, [  --with-lib compile lustre library], host_cpu="lib")
 
 AC_ARG_WITH(linux, [  --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux)
 AC_SUBST(LINUX)
 
 AC_ARG_WITH(linux, [  --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux)
 AC_SUBST(LINUX)
+if test x$enable_inkernel = xyes ; then
+        echo ln -s `pwd` $LINUX/fs/lustre
+        rm $LINUX/fs/lustre
+        ln -s `pwd` $LINUX/fs/lustre
+fi
 
 
-# --------- UML?  --------------------
+#  --------------------
 AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...)
 if test $host_cpu = "lib" ; then 
         host_cpu="lib"
 AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...)
 if test $host_cpu = "lib" ; then 
         host_cpu="lib"
@@ -111,6 +116,13 @@ case ${host_cpu} in
         MOD_LINK=elf64_ia64
 ;;
 
         MOD_LINK=elf64_ia64
 ;;
 
+       x86_64 )
+       AC_MSG_RESULT($host_cpu)
+        KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -fomit-frame-pointer -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -finline-limit=2000 -fno-strength-reduce -fno-asynchronous-unwind-tables'
+       KCPPFLAGS='-D__KERNEL__ -DMODULE'
+        MOD_LINK=elf_x86_64
+;;
+
        sparc64 )
        AC_MSG_RESULT($host_cpu)
         KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs'
        sparc64 )
        AC_MSG_RESULT($host_cpu)
         KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs'
@@ -160,21 +172,33 @@ if test $host_cpu != "lib" ; then
       AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.)
   fi
 
       AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.)
   fi
 
-# ------------ RELEASE and moduledir ------------------
+# ------------ LINUXRELEASE and moduledir ------------------
   AC_MSG_CHECKING(for Linux release)
   
   dnl We need to rid ourselves of the nasty [ ] quotes.
   changequote(, )
   dnl Get release from version.h
   AC_MSG_CHECKING(for Linux release)
   
   dnl We need to rid ourselves of the nasty [ ] quotes.
   changequote(, )
   dnl Get release from version.h
-  RELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
+  LINUXRELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
   changequote([, ])
   
   changequote([, ])
   
-  moduledir='$(libdir)/modules/'$RELEASE/kernel
+  moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel
   AC_SUBST(moduledir)
   
   modulefsdir='$(moduledir)/fs/$(PACKAGE)'
   AC_SUBST(modulefsdir)
   
   AC_SUBST(moduledir)
   
   modulefsdir='$(moduledir)/fs/$(PACKAGE)'
   AC_SUBST(modulefsdir)
   
+  AC_MSG_RESULT($LINUXRELEASE)
+  AC_SUBST(LINUXRELEASE)
+
+# ------------ RELEASE --------------------------------
+  AC_MSG_CHECKING(lustre release)
+  
+  dnl We need to rid ourselves of the nasty [ ] quotes.
+  changequote(, )
+  dnl Get release from version.h
+  RELEASE="`sed -ne 's/-/_/g' -e 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_]*\).*/\1/p' $LINUX/include/linux/version.h`_`date +%Y%m%d%H%M`"
+  changequote([, ])
+
   AC_MSG_RESULT($RELEASE)
   AC_SUBST(RELEASE)
 
   AC_MSG_RESULT($RELEASE)
   AC_SUBST(RELEASE)
 
@@ -302,7 +326,7 @@ AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib)
 # This needs to run after we've defined the KCPPFLAGS
 
 AC_MSG_CHECKING(for kernel version)
 # This needs to run after we've defined the KCPPFLAGS
 
 AC_MSG_CHECKING(for kernel version)
-AC_TRY_LINK([#define __KERNEL__
+AC_TRY_COMPILE([#define __KERNEL__
              #include <linux/sched.h>],
             [struct task_struct p;
              p.sighand = NULL;],
              #include <linux/sched.h>],
             [struct task_struct p;
              p.sighand = NULL;],
@@ -313,5 +337,5 @@ if test $RH_2_4_20 = 1; then
        AC_MSG_RESULT(redhat-2.4.20)
        CPPFLAGS="$CPPFLAGS -DCONFIG_RH_2_4_20"
 else
        AC_MSG_RESULT(redhat-2.4.20)
        CPPFLAGS="$CPPFLAGS -DCONFIG_RH_2_4_20"
 else
-       AC_MSG_RESULT($RELEASE)
+       AC_MSG_RESULT($LINUXRELEASE)
 fi 
 fi 
index 3aa6909..f9605ab 100644 (file)
@@ -1,5 +1,11 @@
 /* portals/include/config.h.in.  Generated from configure.in by autoheader.  */
 
 /* portals/include/config.h.in.  Generated from configure.in by autoheader.  */
 
+/* Compile with orphan support */
+#undef ENABLE_ORPHANS
+
+/* Use the Pinger */
+#undef ENABLE_PINGER
+
 /* Define to 1 if you have the <inttypes.h> header file. */
 #undef HAVE_INTTYPES_H
 
 /* Define to 1 if you have the <inttypes.h> header file. */
 #undef HAVE_INTTYPES_H
 
index ee3b9fc..2133391 100644 (file)
@@ -4,7 +4,6 @@
 #ifndef _KP30_INCLUDED
 #define _KP30_INCLUDED
 
 #ifndef _KP30_INCLUDED
 #define _KP30_INCLUDED
 
-
 #define PORTAL_DEBUG
 
 #ifndef offsetof
 #define PORTAL_DEBUG
 
 #ifndef offsetof
 
 #define LOWEST_BIT_SET(x)      ((x) & ~((x) - 1))
 
 
 #define LOWEST_BIT_SET(x)      ((x) & ~((x) - 1))
 
-#ifndef CONFIG_SMP
-# define smp_processor_id() 0
-#endif
-
 /*
  *  Debugging
  */
 /*
  *  Debugging
  */
@@ -24,39 +19,34 @@ extern unsigned int portal_subsystem_debug;
 extern unsigned int portal_stack;
 extern unsigned int portal_debug;
 extern unsigned int portal_printk;
 extern unsigned int portal_stack;
 extern unsigned int portal_debug;
 extern unsigned int portal_printk;
-/* Debugging subsystems  (8 bit ID)
- *
- * If you add debug subsystem #32, you need to send email to phil, because
- * you're going to break kernel subsystem debug filtering. */
-#define S_UNDEFINED    (0 << 24)
-#define S_MDC          (1 << 24)
-#define S_MDS          (2 << 24)
-#define S_OSC          (3 << 24)
-#define S_OST          (4 << 24)
-#define S_CLASS        (5 << 24)
-#define S_OBDFS        (6 << 24) /* obsolete */
-#define S_LLITE        (7 << 24)
-#define S_RPC          (8 << 24)
-#define S_EXT2OBD      (9 << 24) /* obsolete */
-#define S_PORTALS     (10 << 24)
-#define S_SOCKNAL     (11 << 24)
-#define S_QSWNAL      (12 << 24)
-#define S_PINGER      (13 << 24)
-#define S_FILTER      (14 << 24)
-#define S_TRACE       (15 << 24) /* obsolete */
-#define S_ECHO        (16 << 24)
-#define S_LDLM        (17 << 24)
-#define S_LOV         (18 << 24)
-#define S_GMNAL       (19 << 24)
-#define S_PTLROUTER   (20 << 24)
-#define S_COBD        (21 << 24)
-#define S_PTLBD       (22 << 24)
-#define S_LOG         (23 << 24)
-
-/* If you change these values, please keep portals/linux/utils/debug.c
+/* Debugging subsystems (32 bits, non-overlapping) */
+#define S_UNDEFINED    (1 << 0)
+#define S_MDC          (1 << 1)
+#define S_MDS          (1 << 2)
+#define S_OSC          (1 << 3)
+#define S_OST          (1 << 4)
+#define S_CLASS        (1 << 5)
+#define S_LOG          (1 << 6)
+#define S_LLITE        (1 << 7)
+#define S_RPC          (1 << 8)
+#define S_MGMT         (1 << 9)
+#define S_PORTALS     (1 << 10)
+#define S_SOCKNAL     (1 << 11)
+#define S_QSWNAL      (1 << 12)
+#define S_PINGER      (1 << 13)
+#define S_FILTER      (1 << 14)
+#define S_PTLBD       (1 << 15)
+#define S_ECHO        (1 << 16)
+#define S_LDLM        (1 << 17)
+#define S_LOV         (1 << 18)
+#define S_GMNAL       (1 << 19)
+#define S_PTLROUTER   (1 << 20)
+#define S_COBD        (1 << 21)
+
+/* If you change these values, please keep portals/utils/debug.c
  * up to date! */
 
  * up to date! */
 
-/* Debugging masks (24 bits, non-overlapping) */
+/* Debugging masks (32 bits, non-overlapping) */
 #define D_TRACE     (1 << 0) /* ENTRY/EXIT markers */
 #define D_INODE     (1 << 1)
 #define D_SUPER     (1 << 2)
 #define D_TRACE     (1 << 0) /* ENTRY/EXIT markers */
 #define D_INODE     (1 << 1)
 #define D_SUPER     (1 << 2)
@@ -80,20 +70,23 @@ extern unsigned int portal_printk;
 #define D_RPCTRACE  (1 << 20) /* for distributed debugging */
 #define D_VFSTRACE  (1 << 21)
 
 #define D_RPCTRACE  (1 << 20) /* for distributed debugging */
 #define D_VFSTRACE  (1 << 21)
 
-#ifndef __KERNEL__
-#define THREAD_SIZE 8192
+#ifdef __KERNEL__
+# include <linux/sched.h> /* THREAD_SIZE */
+#else
+# define THREAD_SIZE 8192
 #endif
 #endif
-#ifdef  __ia64__
-#define CDEBUG_STACK() (THREAD_SIZE -                                      \
+
+#ifdef __KERNEL__
+# ifdef  __ia64__
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
                         ((unsigned long)__builtin_dwarf_cfa() &            \
                          (THREAD_SIZE - 1)))
                         ((unsigned long)__builtin_dwarf_cfa() &            \
                          (THREAD_SIZE - 1)))
-#else
-#define CDEBUG_STACK() (THREAD_SIZE -                                      \
+# else
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
                         ((unsigned long)__builtin_frame_address(0) &       \
                          (THREAD_SIZE - 1)))
                         ((unsigned long)__builtin_frame_address(0) &       \
                          (THREAD_SIZE - 1)))
-#endif
+# endif
 
 
-#ifdef __KERNEL__
 #define CHECK_STACK(stack)                                                    \
         do {                                                                  \
                 if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) {    \
 #define CHECK_STACK(stack)                                                    \
         do {                                                                  \
                 if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) {    \
@@ -105,20 +98,21 @@ extern unsigned int portal_printk;
                       /*panic("LBUG");*/                                      \
                 }                                                             \
         } while (0)
                       /*panic("LBUG");*/                                      \
                 }                                                             \
         } while (0)
-#else
+#else /* __KERNEL__ */
 #define CHECK_STACK(stack) do { } while(0)
 #define CHECK_STACK(stack) do { } while(0)
-#endif
+#define CDEBUG_STACK (0L)
+#endif /* __KERNEL__ */
 
 #if 1
 #define CDEBUG(mask, format, a...)                                            \
 do {                                                                          \
 
 #if 1
 #define CDEBUG(mask, format, a...)                                            \
 do {                                                                          \
-        CHECK_STACK(CDEBUG_STACK());                                          \
+        CHECK_STACK(CDEBUG_STACK);                                            \
         if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) ||                      \
             (portal_debug & (mask) &&                                         \
         if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) ||                      \
             (portal_debug & (mask) &&                                         \
-             portal_subsystem_debug & (1 << (DEBUG_SUBSYSTEM >> 24))))        \
+             portal_subsystem_debug & DEBUG_SUBSYSTEM))                       \
                 portals_debug_msg(DEBUG_SUBSYSTEM, mask,                      \
                                   __FILE__, __FUNCTION__, __LINE__,           \
                 portals_debug_msg(DEBUG_SUBSYSTEM, mask,                      \
                                   __FILE__, __FUNCTION__, __LINE__,           \
-                                  CDEBUG_STACK(), format , ## a);             \
+                                  CDEBUG_STACK, format, ## a);                \
 } while (0)
 
 #define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
 } while (0)
 
 #define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
@@ -162,7 +156,6 @@ do {                                                                    \
 #define EXIT                            do { } while (0)
 #endif
 
 #define EXIT                            do { } while (0)
 #endif
 
-
 #ifdef __KERNEL__
 # include <linux/vmalloc.h>
 # include <linux/time.h>
 #ifdef __KERNEL__
 # include <linux/vmalloc.h>
 # include <linux/time.h>
@@ -210,7 +203,8 @@ static inline void our_cond_resched(void)
 #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
 
 #ifdef PORTAL_DEBUG
 #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
 
 #ifdef PORTAL_DEBUG
-extern void kportal_assertion_failed(char *expr,char *file,char *func,int line);
+extern void kportal_assertion_failed(char *expr, char *file, const char *func,
+                                     const int line);
 #define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__,  \
                                                         __FUNCTION__, __LINE__))
 #else
 #define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__,  \
                                                         __FUNCTION__, __LINE__))
 #else
@@ -560,14 +554,14 @@ extern struct prof_ent prof_ents[MAX_PROFS];
 #endif /* PORTALS_PROFILING */
 
 /* debug.c */
 #endif /* PORTALS_PROFILING */
 
 /* debug.c */
-void portals_run_lbug_upcall(char * file, char *fn, int line);
+void portals_run_lbug_upcall(char * file, const char *fn, const int line);
 void portals_debug_dumplog(void);
 int portals_debug_init(unsigned long bufsize);
 int portals_debug_cleanup(void);
 int portals_debug_clear_buffer(void);
 int portals_debug_mark_buffer(char *text);
 int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
 void portals_debug_dumplog(void);
 int portals_debug_init(unsigned long bufsize);
 int portals_debug_cleanup(void);
 int portals_debug_clear_buffer(void);
 int portals_debug_mark_buffer(char *text);
 int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
-                char *file, unsigned int size);
+                             char *file, unsigned int size);
 __s32 portals_debug_copy_to_user(char *buf, unsigned long len);
 #if (__GNUC__)
 /* Use the special GNU C __attribute__ hack to have the compiler check the
 __s32 portals_debug_copy_to_user(char *buf, unsigned long len);
 #if (__GNUC__)
 /* Use the special GNU C __attribute__ hack to have the compiler check the
@@ -578,13 +572,14 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len);
 # warning printf has been defined as a macro...
 # undef printf
 #endif
 # warning printf has been defined as a macro...
 # undef printf
 #endif
-void portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                        unsigned long stack, const char *format, ...)
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                       const int line, unsigned long stack,
+                       const char *format, ...)
         __attribute__ ((format (printf, 7, 8)));
 #else
         __attribute__ ((format (printf, 7, 8)));
 #else
-void portals_debug_msg (int subsys, int mask, char *file, char *fn,
-                        int line, unsigned long stack,
-                        const char *format, ...);
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                       const int line, unsigned long stack,
+                       const char *format, ...);
 #endif /* __GNUC__ */
 void portals_debug_set_level(unsigned int debug_level);
 
 #endif /* __GNUC__ */
 void portals_debug_set_level(unsigned int debug_level);
 
@@ -618,9 +613,9 @@ extern void kportal_blockallsigs (void);
 # define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0);
 # define PORTAL_FREE(a, b) do { free(a); } while (0);
 # define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \
 # define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0);
 # define PORTAL_FREE(a, b) do { free(a); } while (0);
 # define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \
-    printf ("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format,                    \
-            (subsys) >> 24, (mask), (long)time(0), file, fn, line,            \
-            getpid() , stack, ## a);
+    printf("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format,                    \
+           (subsys), (mask), (long)time(0), file, fn, line,                   \
+           getpid() , stack, ## a);
 #endif
 
 #ifndef CURRENT_TIME
 #endif
 
 #ifndef CURRENT_TIME
@@ -911,13 +906,13 @@ ptl_handle_ni_t *kportal_get_ni (int nal);
 void kportal_put_ni (int nal);
 
 #ifdef __CYGWIN__
 void kportal_put_ni (int nal);
 
 #ifdef __CYGWIN__
-#ifndef BITS_PER_LONG
-#if (~0UL) == 0xffffffffUL
-#define BITS_PER_LONG 32
-#else
-#define BITS_PER_LONG 64
-#endif
-#endif
+# ifndef BITS_PER_LONG
+#  if (~0UL) == 0xffffffffUL
+#   define BITS_PER_LONG 32
+#  else
+#   define BITS_PER_LONG 64
+#  endif
+# endif
 #endif
 
 #if (BITS_PER_LONG == 32 || __WORDSIZE == 32)
 #endif
 
 #if (BITS_PER_LONG == 32 || __WORDSIZE == 32)
index e28fbac..a7cb4d1 100644 (file)
@@ -1,13 +1,56 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _PORTALS_COMPAT_H
+#define _PORTALS_COMPAT_H
+
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+#if SPINLOCK_DEBUG
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
+#  define SIGNAL_MASK_ASSERT() \
+   LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC)
+# else
+#  define SIGNAL_MASK_ASSERT() \
+   LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC)
+# endif
+#else
+# define SIGNAL_MASK_ASSERT()
+#endif
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
-# define SIGNAL_MASK_LOCK(task, flags)                              \
+
+# define SIGNAL_MASK_LOCK(task, flags)                                  \
   spin_lock_irqsave(&task->sighand->siglock, flags)
   spin_lock_irqsave(&task->sighand->siglock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags)                            \
+# define SIGNAL_MASK_UNLOCK(task, flags)                                \
   spin_unlock_irqrestore(&task->sighand->siglock, flags)
   spin_unlock_irqrestore(&task->sighand->siglock, flags)
+# define USERMODEHELPER(path, argv, envp)                               \
+  call_usermodehelper(path, argv, envp, 1)
 # define RECALC_SIGPENDING         recalc_sigpending()
 # define RECALC_SIGPENDING         recalc_sigpending()
-#else
-# define SIGNAL_MASK_LOCK(task, flags)                              \
+# define CURRENT_SECONDS           get_seconds()
+
+#else /* 2.4.x */
+
+# define SIGNAL_MASK_LOCK(task, flags)                                  \
   spin_lock_irqsave(&task->sigmask_lock, flags)
   spin_lock_irqsave(&task->sigmask_lock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags)                            \
+# define SIGNAL_MASK_UNLOCK(task, flags)                                \
   spin_unlock_irqrestore(&task->sigmask_lock, flags)
   spin_unlock_irqrestore(&task->sigmask_lock, flags)
+# define USERMODEHELPER(path, argv, envp)                               \
+  call_usermodehelper(path, argv, envp)
 # define RECALC_SIGPENDING         recalc_sigpending(current)
 # define RECALC_SIGPENDING         recalc_sigpending(current)
+# define CURRENT_SECONDS           CURRENT_TIME
+
+#endif
+
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt "|%d", ## a, current->thread.extern_pid)
+#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt "|%d", ## a, current->thread.mode.tt.extern_pid)
+#else
+# define THREAD_NAME(comm, fmt, a...)                                   \
+        sprintf(comm, fmt, ## a)
 #endif
 #endif
+
+#endif /* _PORTALS_COMPAT_H */
index 2b63312..78a1e2d 100644 (file)
@@ -1,6 +1,4 @@
 #ifndef _LINUX_LIST_H
 #ifndef _LINUX_LIST_H
-#define _LINUX_LIST_H
-
 
 /*
  * Simple doubly linked list implementation.
 
 /*
  * Simple doubly linked list implementation.
@@ -101,7 +99,9 @@ static inline void list_del_init(struct list_head *entry)
        __list_del(entry->prev, entry->next);
        INIT_LIST_HEAD(entry);
 }
        __list_del(entry->prev, entry->next);
        INIT_LIST_HEAD(entry);
 }
+#endif
 
 
+#ifndef list_for_each_entry
 /**
  * list_move - delete from one list and add as another's head
  * @list: the entry to move
 /**
  * list_move - delete from one list and add as another's head
  * @list: the entry to move
@@ -124,7 +124,10 @@ static inline void list_move_tail(struct list_head *list,
        __list_del(list->prev, list->next);
        list_add_tail(list, head);
 }
        __list_del(list->prev, list->next);
        list_add_tail(list, head);
 }
+#endif
 
 
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
 /**
  * list_empty - tests whether a list is empty
  * @head: the list to test.
 /**
  * list_empty - tests whether a list is empty
  * @head: the list to test.
index 7d1b304..d389aab 100644 (file)
@@ -2,7 +2,7 @@
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
  * Compile with:
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
  * Compile with:
- * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl 
+ * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
  */
 #ifndef __LTRACE_H_
 #define __LTRACE_H_
  */
 #ifndef __LTRACE_H_
 #define __LTRACE_H_
@@ -31,20 +31,20 @@ static inline int ltrace_write_file(char* fname)
         argv[0] = "debug_kernel";
         argv[1] = fname;
         argv[2] = "1";
         argv[0] = "debug_kernel";
         argv[1] = fname;
         argv[2] = "1";
-        
+
         fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
         fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
-        
+
         return jt_dbg_debug_kernel(3, argv);
 }
 
 static inline int ltrace_clear()
 {
         char* argv[1];
         return jt_dbg_debug_kernel(3, argv);
 }
 
 static inline int ltrace_clear()
 {
         char* argv[1];
-        
+
         argv[0] = "clear";
         argv[0] = "clear";
-        
+
         fprintf(stderr, "[ptlctl] %s\n", argv[0]);
         fprintf(stderr, "[ptlctl] %s\n", argv[0]);
-        
+
         return jt_dbg_clear_debug_buf(1, argv);
 }
 
         return jt_dbg_clear_debug_buf(1, argv);
 }
 
@@ -52,9 +52,9 @@ static inline int ltrace_mark(int indent_level, char* text)
 {
         char* argv[2];
         char mark_buf[PATH_MAX];
 {
         char* argv[2];
         char mark_buf[PATH_MAX];
-        
+
         snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
         snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
-        
+
         argv[0] = "mark";
         argv[1] = mark_buf;
         return jt_dbg_mark_debug_buf(2, argv);
         argv[0] = "mark";
         argv[1] = mark_buf;
         return jt_dbg_mark_debug_buf(2, argv);
@@ -65,9 +65,9 @@ static inline int ltrace_applymasks()
         char* argv[2];
         argv[0] = "list";
         argv[1] = "applymasks";
         char* argv[2];
         argv[0] = "list";
         argv[1] = "applymasks";
-        
+
         fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
         fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
-        
+
         return jt_dbg_list(2, argv);
 }
 
         return jt_dbg_list(2, argv);
 }
 
@@ -95,19 +95,19 @@ static inline int ltrace_start()
 #ifdef PORTALS_DEV_ID
         rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
 #endif
 #ifdef PORTALS_DEV_ID
         rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
 #endif
-        ltrace_filter("class"); 
+        ltrace_filter("class");
         ltrace_filter("socknal");
         ltrace_filter("socknal");
-        ltrace_filter("qswnal"); 
-        ltrace_filter("gmnal");  
-        ltrace_filter("portals");  
-        
-        ltrace_show("all_types");  
-        ltrace_filter("trace");  
-        ltrace_filter("malloc"); 
-        ltrace_filter("net"); 
-        ltrace_filter("page"); 
-        ltrace_filter("other"); 
-        ltrace_filter("info"); 
+        ltrace_filter("qswnal");
+        ltrace_filter("gmnal");
+        ltrace_filter("portals");
+
+        ltrace_show("all_types");
+        ltrace_filter("trace");
+        ltrace_filter("malloc");
+        ltrace_filter("net");
+        ltrace_filter("page");
+        ltrace_filter("other");
+        ltrace_filter("info");
         ltrace_applymasks();
 
         return rc;
         ltrace_applymasks();
 
         return rc;
@@ -146,7 +146,7 @@ static inline void ltrace_add_processnames(char* fname)
         struct timezone tz;
         int nob;
         int underuml = !not_uml();
         struct timezone tz;
         int nob;
         int underuml = !not_uml();
-        
+
         gettimeofday(&tv, &tz);
 
         nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
         gettimeofday(&tv, &tz);
 
         nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
@@ -167,7 +167,7 @@ static inline void ltrace_add_processnames(char* fname)
                                  "(%s:%d:%s() %d+%lu): ",
                                  "lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
         }
                                  "(%s:%d:%s() %d+%lu): ",
                                  "lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
         }
-         
+
         nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
         system(cmdbuf);
 }
         nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
         system(cmdbuf);
 }
index 12b1925..13790f7 100644 (file)
@@ -1,6 +1,3 @@
-/*
-*/
-
 #ifndef MYRNAL_H
 #define MYRNAL_H
 
 #ifndef MYRNAL_H
 #define MYRNAL_H
 
index 88be63c..7cb3ab7 100644 (file)
@@ -1,5 +1,3 @@
-/*
-*/
 #ifndef _NAL_H_
 #define _NAL_H_
 
 #ifndef _NAL_H_
 #define _NAL_H_
 
index 4727599..760f465 100644 (file)
@@ -1,6 +1,3 @@
-/*
- */
-
 #ifndef _INCppidh_
 #define _INCppidh_
 
 #ifndef _INCppidh_
 #define _INCppidh_
 
index d4038b6..0269290 100644 (file)
@@ -2,14 +2,19 @@
 #define _P30_TYPES_H_
 
 #ifdef __linux__
 #define _P30_TYPES_H_
 
 #ifdef __linux__
-#include <asm/types.h>
-#include <asm/timex.h>
+# include <asm/types.h>
+# include <asm/timex.h>
 #else
 #else
-#include <sys/types.h>
+# include <sys/types.h>
 typedef u_int32_t __u32;
 typedef u_int64_t __u64;
 typedef u_int32_t __u32;
 typedef u_int64_t __u64;
-typedef unsigned long long cycles_t;
-static inline cycles_t get_cycles(void) { return 0; }
+#endif
+
+#ifdef __KERNEL__
+# include <linux/time.h>
+#else
+# include <sys/time.h>
+# define do_gettimeofday(tv) gettimeofday(tv, NULL)
 #endif
 
 typedef __u64 ptl_nid_t;
 #endif
 
 typedef __u64 ptl_nid_t;
@@ -31,7 +36,7 @@ typedef ptl_handle_any_t ptl_handle_md_t;
 typedef ptl_handle_any_t ptl_handle_me_t;
 
 #define PTL_HANDLE_NONE \
 typedef ptl_handle_any_t ptl_handle_me_t;
 
 #define PTL_HANDLE_NONE \
-((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
+    ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
 #define PTL_EQ_NONE PTL_HANDLE_NONE
 
 static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
 #define PTL_EQ_NONE PTL_HANDLE_NONE
 
 static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
@@ -108,17 +113,15 @@ typedef struct {
         ptl_handle_me_t unlinked_me;
         ptl_md_t mem_desc;
         ptl_hdr_data_t hdr_data;
         ptl_handle_me_t unlinked_me;
         ptl_md_t mem_desc;
         ptl_hdr_data_t hdr_data;
-        cycles_t  arrival_time;
+        struct timeval arrival_time;
         volatile ptl_seq_t sequence;
 } ptl_event_t;
 
         volatile ptl_seq_t sequence;
 } ptl_event_t;
 
-
 typedef enum {
         PTL_ACK_REQ,
         PTL_NOACK_REQ
 } ptl_ack_req_t;
 
 typedef enum {
         PTL_ACK_REQ,
         PTL_NOACK_REQ
 } ptl_ack_req_t;
 
-
 typedef struct {
         volatile ptl_seq_t sequence;
         ptl_size_t size;
 typedef struct {
         volatile ptl_seq_t sequence;
         ptl_size_t size;
@@ -130,7 +133,6 @@ typedef struct {
         ptl_eq_t *eq;
 } ptl_ni_t;
 
         ptl_eq_t *eq;
 } ptl_ni_t;
 
-
 typedef struct {
         int max_match_entries;    /* max number of match entries */
         int max_mem_descriptors;  /* max number of memory descriptors */
 typedef struct {
         int max_match_entries;    /* max number of match entries */
         int max_mem_descriptors;  /* max number of memory descriptors */
index 282522d..89a4aa6 100644 (file)
@@ -1,2 +1,3 @@
 Makefile
 Makefile.in
 Makefile
 Makefile.in
+.*.o.cmd
index ce40a60..cd5d9d6 100644 (file)
@@ -1,4 +1,4 @@
-include ../Kernelenv
+include $(obj)/../Kernelenv
 
 obj-y = socknal/
 
 obj-y = socknal/
-# more coming...
\ No newline at end of file
+# more coming...
index ceeea2a..0cffc15 100644 (file)
@@ -124,7 +124,7 @@ static nal_t *kgmnal_init(int interface, ptl_pt_index_t ptl_size,
         return &kgmnal_api;
 }
 
         return &kgmnal_api;
 }
 
-static void __exit
+static void /*__exit*/
 kgmnal_finalize(void)
 {
         struct list_head *tmp;
 kgmnal_finalize(void)
 {
         struct list_head *tmp;
index 1066d69..479cc2c 100644 (file)
@@ -112,7 +112,7 @@ static nal_t *kscimacnal_init(int interface, ptl_pt_index_t  ptl_size,
 
 
 /* Called by kernel at module unload time */
 
 
 /* Called by kernel at module unload time */
-static void __exit 
+static void /*__exit*/ 
 kscimacnal_finalize(void)
 {
         /* FIXME: How should the shutdown procedure really look? */
 kscimacnal_finalize(void)
 {
         /* FIXME: How should the shutdown procedure really look? */
index e995588..95973d6 100644 (file)
@@ -1,3 +1,4 @@
 .deps
 Makefile
 Makefile.in
 .deps
 Makefile
 Makefile.in
+.*.o.cmd
index 46edf01..5c1b366 100644 (file)
@@ -3,7 +3,7 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-include ../../Kernelenv
+include $(src)/../../Kernelenv
 
 obj-y += ksocknal.o
 ksocknal-objs    := socknal.o socknal_cb.o
 
 obj-y += ksocknal.o
 ksocknal-objs    := socknal.o socknal_cb.o
index 1f5dc38..77ee473 100644 (file)
@@ -379,7 +379,7 @@ ktoenal_cmd(struct portal_ioctl_data * data, void * private)
 }
 
 
 }
 
 
-void __exit
+void /*__exit*/
 ktoenal_module_fini (void)
 {
         CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
 ktoenal_module_fini (void)
 {
         CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
index ec37f6f..abd0731 100644 (file)
@@ -893,6 +893,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags)
                                 spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
                                 goto get_fmb;   /* => go get a fwd msg buffer */
                         default:
                                 spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
                                 goto get_fmb;   /* => go get a fwd msg buffer */
                         default:
+                                break;
                         }
                         /* Not Reached */
                         LBUG ();
                         }
                         /* Not Reached */
                         LBUG ();
@@ -934,6 +935,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags)
                 goto out;                       /* (later) */
 
         default:
                 goto out;                       /* (later) */
 
         default:
+                break;
         }
 
         /* Not Reached */
         }
 
         /* Not Reached */
index 67d1a3d..7fa686f 100644 (file)
@@ -2,3 +2,4 @@
 Makefile
 Makefile.in
 link-stamp
 Makefile
 Makefile.in
 link-stamp
+.*.o.cmd
index 3196ea2..9aa838f 100644 (file)
@@ -6,4 +6,4 @@
 include fs/lustre/portals/Kernelenv
 
 obj-y += libcfs.o
 include fs/lustre/portals/Kernelenv
 
 obj-y += libcfs.o
-licfs-objs    := module.o proc.o debug.o
\ No newline at end of file
+libcfs-objs    := module.o proc.o debug.o
index 8d26dbb..f37cd96 100644 (file)
@@ -571,8 +571,8 @@ int portals_debug_init(unsigned long bufsize)
         memset(debug_buf, 0, debug_size);
         debug_wrapped = 0;
 
         memset(debug_buf, 0, debug_size);
         debug_wrapped = 0;
 
-        printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
-               bufsize, debug_buf);
+        //printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
+               //bufsize, debug_buf);
         atomic_set(&debug_off_a, debug_off);
         notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier);
         debug_size = bufsize;
         atomic_set(&debug_off_a, debug_off);
         notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier);
         debug_size = bufsize;
@@ -632,9 +632,9 @@ int portals_debug_mark_buffer(char *text)
         if (debug_buf == NULL)
                 return -EINVAL;
 
         if (debug_buf == NULL)
                 return -EINVAL;
 
-        CDEBUG(0, "*******************************************************************************\n");
+        CDEBUG(0, "********************************************************\n");
         CDEBUG(0, "DEBUG MARKER: %s\n", text);
         CDEBUG(0, "DEBUG MARKER: %s\n", text);
-        CDEBUG(0, "*******************************************************************************\n");
+        CDEBUG(0, "********************************************************\n");
 
         return 0;
 }
 
         return 0;
 }
@@ -672,8 +672,8 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len)
 
 /* FIXME: I'm not very smart; someone smarter should make this better. */
 void
 
 /* FIXME: I'm not very smart; someone smarter should make this better. */
 void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                   unsigned long stack, const char *format, ...)
+portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                  const int line, unsigned long stack, const char *format, ...)
 {
         va_list       ap;
         unsigned long flags;
 {
         va_list       ap;
         unsigned long flags;
@@ -728,8 +728,8 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
         do_gettimeofday(&tv);
 
         prefix_nob = snprintf(debug_buf + debug_off, max_nob,
         do_gettimeofday(&tv);
 
         prefix_nob = snprintf(debug_buf + debug_off, max_nob,
-                              "%02x:%06x:%d:%lu.%06lu ",
-                              subsys >> 24, mask, smp_processor_id(),
+                              "%06x:%06x:%d:%lu.%06lu ",
+                              subsys, mask, smp_processor_id(),
                               tv.tv_sec, tv.tv_usec);
         max_nob -= prefix_nob;
 
                               tv.tv_sec, tv.tv_usec);
         max_nob -= prefix_nob;
 
@@ -752,7 +752,7 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
 
         va_start(ap, format);
         msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob,
 
         va_start(ap, format);
         msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob,
-                            max_nob, format, ap);
+                             max_nob, format, ap);
         max_nob -= msg_nob;
         va_end(ap);
 
         max_nob -= msg_nob;
         va_end(ap);
 
@@ -790,7 +790,7 @@ void portals_debug_set_level(unsigned int debug_level)
         portal_debug = debug_level;
 }
 
         portal_debug = debug_level;
 }
 
-void portals_run_lbug_upcall(char * file, char *fn, int line)
+void portals_run_lbug_upcall(char *file, const char *fn, const int line)
 {
         char *argv[6];
         char *envp[3];
 {
         char *argv[6];
         char *envp[3];
@@ -803,7 +803,7 @@ void portals_run_lbug_upcall(char * file, char *fn, int line)
         argv[0] = portals_upcall;
         argv[1] = "LBUG";
         argv[2] = file;
         argv[0] = portals_upcall;
         argv[1] = "LBUG";
         argv[2] = file;
-        argv[3] = fn;
+        argv[3] = (char *)fn;
         argv[4] = buf;
         argv[5] = NULL;
 
         argv[4] = buf;
         argv[5] = NULL;
 
index 14cc325..e8eb290 100644 (file)
@@ -62,10 +62,10 @@ static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
 struct semaphore nal_cmd_sem;
 
 #ifdef PORTAL_DEBUG
 struct semaphore nal_cmd_sem;
 
 #ifdef PORTAL_DEBUG
-void
-kportal_assertion_failed (char *expr, char *file, char *func, int line)
+void kportal_assertion_failed(char *expr, char *file, const char *func,
+                              const int line)
 {
 {
-        portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK(),
+        portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK,
                           "ASSERTION(%s) failed\n", expr);
         LBUG_WITH_LOC(file, func, line);
 }
                           "ASSERTION(%s) failed\n", expr);
         LBUG_WITH_LOC(file, func, line);
 }
index e995588..95973d6 100644 (file)
@@ -1,3 +1,4 @@
 .deps
 Makefile
 Makefile.in
 .deps
 Makefile
 Makefile.in
+.*.o.cmd
index 5627ef7..7822846 100644 (file)
@@ -3,7 +3,10 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-include ../Kernelenv
+include $(src)/../Kernelenv
 
 obj-y += portals.o
 
 obj-y += portals.o
-portals-objs    := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o lib-move.o lib-msg.o lib-ni.o lib-not-impl.o lib-pid.o api-eq.o api-errno.o api-init.o api-md.o api-me.o api-ni.o api-wrap.o
+portals-objs    :=     lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
+                       lib-move.o lib-msg.o lib-ni.o lib-pid.o \
+                       api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
+                       api-wrap.o
index e59c922..dc1fead 100644 (file)
@@ -26,7 +26,7 @@
 #include <portals/api-support.h>
 
 int ptl_init;
 #include <portals/api-support.h>
 
 int ptl_init;
-unsigned int portal_subsystem_debug = 0xfff7e3ff;
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL | S_GMNAL);
 unsigned int portal_debug = ~0;
 unsigned int portal_printk;
 unsigned int portal_stack;
 unsigned int portal_debug = ~0;
 unsigned int portal_printk;
 unsigned int portal_stack;
index fde4f16..02f8b60 100644 (file)
@@ -544,7 +544,7 @@ get_new_msg (nal_cb_t *nal, lib_md_t *md)
         msg->send_ack = 0;
 
         msg->md = md;
         msg->send_ack = 0;
 
         msg->md = md;
-        msg->ev.arrival_time = get_cycles();
+        do_gettimeofday(&msg->ev.arrival_time);
         md->pending++;
         if (md->threshold != PTL_MD_THRESH_INF) {
                 LASSERT (md->threshold > 0);
         md->pending++;
         if (md->threshold != PTL_MD_THRESH_INF) {
                 LASSERT (md->threshold > 0);
index e995588..95973d6 100644 (file)
@@ -1,3 +1,4 @@
 .deps
 Makefile
 Makefile.in
 .deps
 Makefile
 Makefile.in
+.*.o.cmd
index 64bd09b..9b02c03 100644 (file)
@@ -3,7 +3,7 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-include ../Kernelenv
+include $(src)/../Kernelenv
 
 obj-y += kptlrouter.o
 kptlrouter-objs    := router.o proc.o
 
 obj-y += kptlrouter.o
 kptlrouter-objs    := router.o proc.o
index 6074c3c..27a7fba 100644 (file)
@@ -23,8 +23,8 @@
 
 #include "router.h"
 
 
 #include "router.h"
 
-struct list_head kpr_routes;
-struct list_head kpr_nals;
+LIST_HEAD(kpr_routes);
+LIST_HEAD(kpr_nals);
 
 unsigned long long kpr_fwd_bytes;
 unsigned long      kpr_fwd_packets;
 
 unsigned long long kpr_fwd_bytes;
 unsigned long      kpr_fwd_packets;
@@ -35,7 +35,7 @@ atomic_t           kpr_queue_depth;
  *
  * Once in a blue moon we register/deregister NALs and add/remove routing
  * entries (thread context only)... */
  *
  * Once in a blue moon we register/deregister NALs and add/remove routing
  * entries (thread context only)... */
-rwlock_t         kpr_rwlock;
+rwlock_t         kpr_rwlock = RW_LOCK_UNLOCKED;
 
 kpr_router_interface_t kpr_router_interface = {
        kprri_register:         kpr_register_nal,
 
 kpr_router_interface_t kpr_router_interface = {
        kprri_register:         kpr_register_nal,
@@ -55,7 +55,7 @@ kpr_control_interface_t kpr_control_interface = {
 int
 kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
 {
 int
 kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
 {
-       long               flags;
+       unsigned long      flags;
        struct list_head  *e;
        kpr_nal_entry_t   *ne;
 
        struct list_head  *e;
        kpr_nal_entry_t   *ne;
 
@@ -98,7 +98,7 @@ kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
 void
 kpr_shutdown_nal (void *arg)
 {
 void
 kpr_shutdown_nal (void *arg)
 {
-       long             flags;
+       unsigned long    flags;
        kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
 
         CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
        kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
 
         CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
@@ -123,7 +123,7 @@ kpr_shutdown_nal (void *arg)
 void
 kpr_deregister_nal (void *arg)
 {
 void
 kpr_deregister_nal (void *arg)
 {
-       long              flags;
+       unsigned long     flags;
        kpr_nal_entry_t  *ne = (kpr_nal_entry_t *)arg;
 
         CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
        kpr_nal_entry_t  *ne = (kpr_nal_entry_t *)arg;
 
         CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
@@ -296,7 +296,7 @@ int
 kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
                ptl_nid_t hi_nid)
 {
 kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
                ptl_nid_t hi_nid)
 {
-       long               flags;
+       unsigned long      flags;
        struct list_head  *e;
        kpr_route_entry_t *re;
 
        struct list_head  *e;
        kpr_route_entry_t *re;
 
@@ -345,7 +345,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
 int
 kpr_del_route (ptl_nid_t nid)
 {
 int
 kpr_del_route (ptl_nid_t nid)
 {
-       long               flags;
+       unsigned long      flags;
        struct list_head  *e;
 
         CDEBUG(D_OTHER, "Del route "LPX64"\n", nid);
        struct list_head  *e;
 
         CDEBUG(D_OTHER, "Del route "LPX64"\n", nid);
@@ -398,7 +398,7 @@ kpr_get_route(int idx, int *gateway_nalid, ptl_nid_t *gateway_nid,
         return (-ENOENT);
 }
 
         return (-ENOENT);
 }
 
-static void __exit
+static void /*__exit*/
 kpr_finalise (void)
 {
         LASSERT (list_empty (&kpr_nals));
 kpr_finalise (void)
 {
         LASSERT (list_empty (&kpr_nals));
@@ -427,10 +427,6 @@ kpr_initialise (void)
         CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
                atomic_read(&portal_kmemory));
 
         CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
                atomic_read(&portal_kmemory));
 
-       rwlock_init(&kpr_rwlock);
-       INIT_LIST_HEAD(&kpr_routes);
-       INIT_LIST_HEAD(&kpr_nals);
-
         kpr_proc_init();
 
         PORTAL_SYMBOL_REGISTER(kpr_router_interface);
         kpr_proc_init();
 
         PORTAL_SYMBOL_REGISTER(kpr_router_interface);
index 051d1bd..d0c4c88 100644 (file)
@@ -1,3 +1,4 @@
 Makefile
 Makefile.in
 .deps
 Makefile
 Makefile.in
 .deps
+.*.o.cmd
index 389ffbb..4d04ffb 100644 (file)
@@ -260,7 +260,7 @@ pingcli_start(struct portal_ioctl_data *args)
 
 
 /* called by the portals_ioctl for ping requests */
 
 
 /* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
 {
         PORTAL_ALLOC (client, sizeof(struct pingcli_data));
         if (client == NULL)
 {
         PORTAL_ALLOC (client, sizeof(struct pingcli_data));
         if (client == NULL)
@@ -282,7 +282,7 @@ static int __init pingcli_init(void)
 } /* pingcli_init() */
 
 
 } /* pingcli_init() */
 
 
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
 {
         PORTAL_SYMBOL_UNREGISTER (kping_client);
 } /* pingcli_cleanup() */
 {
         PORTAL_SYMBOL_UNREGISTER (kping_client);
 } /* pingcli_cleanup() */
index 1037d09..873e11c 100644 (file)
 #include <asm/semaphore.h>
 
 #define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
 #include <asm/semaphore.h>
 
 #define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
-#define MAXSIZE (16*1024*1024)
+#define MAXSIZE (16*1024)
 
 static unsigned ping_head_magic;
 static unsigned ping_bulk_magic;
 
 static unsigned ping_head_magic;
 static unsigned ping_bulk_magic;
-static int nal  = 0;                            // Your NAL,
+static int nal  = SOCKNAL;                            // Your NAL,
 static unsigned long packets_valid = 0;         // Valid packets 
 static int running = 1;
 atomic_t pkt;
 static unsigned long packets_valid = 0;         // Valid packets 
 static int running = 1;
 atomic_t pkt;
@@ -282,7 +282,7 @@ static int __init pingsrv_init(void)
 } /* pingsrv_init() */
 
 
 } /* pingsrv_init() */
 
 
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
 {
         remove_proc_entry ("net/pingsrv", NULL);
         
 {
         remove_proc_entry ("net/pingsrv", NULL);
         
index 4cef08b..35e114b 100644 (file)
@@ -235,7 +235,7 @@ pingcli_start(struct portal_ioctl_data *args)
 
 
 /* called by the portals_ioctl for ping requests */
 
 
 /* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
 {
 
         PORTAL_ALLOC (client, sizeof(struct pingcli_data));
 {
 
         PORTAL_ALLOC (client, sizeof(struct pingcli_data));
@@ -258,7 +258,7 @@ static int __init pingcli_init(void)
 } /* pingcli_init() */
 
 
 } /* pingcli_init() */
 
 
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
 {
         PORTAL_SYMBOL_UNREGISTER (kping_client);
 } /* pingcli_cleanup() */
 {
         PORTAL_SYMBOL_UNREGISTER (kping_client);
 } /* pingcli_cleanup() */
index a18ea35..2b45a46 100644 (file)
@@ -269,7 +269,7 @@ static int __init pingsrv_init(void)
 } /* pingsrv_init() */
 
 
 } /* pingsrv_init() */
 
 
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
 {
         remove_proc_entry ("net/pingsrv", NULL);
         
 {
         remove_proc_entry ("net/pingsrv", NULL);
         
index 529bb2d..b73f042 100644 (file)
@@ -84,8 +84,8 @@ int portals_debug_copy_to_user(char *buf, unsigned long len)
 
 /* FIXME: I'm not very smart; someone smarter should make this better. */
 void
 
 /* FIXME: I'm not very smart; someone smarter should make this better. */
 void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
-                   const char *format, ...)
+portals_debug_msg (int subsys, int mask, char *file, const char *fn, 
+                   const int line, const char *format, ...)
 {
         va_list       ap;
         unsigned long flags;
 {
         va_list       ap;
         unsigned long flags;
index 148310a..8e474ad 100644 (file)
@@ -5,4 +5,5 @@ debugctl
 ptlctl
 .deps
 routerstat
 ptlctl
 .deps
 routerstat
-wirecheck
\ No newline at end of file
+wirecheck
+.*.cmd
index 9ab1c73..0a009d2 100644 (file)
@@ -53,17 +53,18 @@ static char rawbuf[8192];
 static char *buf = rawbuf;
 static int max = 8192;
 //static int g_pfd = -1;
 static char *buf = rawbuf;
 static int max = 8192;
 //static int g_pfd = -1;
-static int subsystem_array[1 << 8];
+static int subsystem_mask = ~0;
 static int debug_mask = ~0;
 
 static const char *portal_debug_subsystems[] =
 static int debug_mask = ~0;
 
 static const char *portal_debug_subsystems[] =
-        {"undefined", "mdc", "mds", "osc", "ost", "class", "obdfs", "llite",
-         "rpc", "ext2obd", "portals", "socknal", "qswnal", "pinger", "filter",
-         "obdtrace", "echo", "ldlm", "lov", "gmnal", "router", "ptldb", NULL};
+        {"undefined", "mdc", "mds", "osc", "ost", "class", "log", "llite",
+         "rpc", "mgmt", "portals", "socknal", "qswnal", "pinger", "filter",
+         "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", NULL};
 static const char *portal_debug_masks[] =
         {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
          "blocks", "net", "warning", "buffs", "other", "dentry", "portals",
 static const char *portal_debug_masks[] =
         {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
          "blocks", "net", "warning", "buffs", "other", "dentry", "portals",
-         "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", NULL};
+         "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace",
+         NULL};
 
 struct debug_daemon_cmd {
         char *cmd;
 
 struct debug_daemon_cmd {
         char *cmd;
@@ -88,7 +89,10 @@ static int do_debug_mask(char *name, int enable)
                         printf("%s output from subsystem \"%s\"\n",
                                 enable ? "Enabling" : "Disabling",
                                 portal_debug_subsystems[i]);
                         printf("%s output from subsystem \"%s\"\n",
                                 enable ? "Enabling" : "Disabling",
                                 portal_debug_subsystems[i]);
-                        subsystem_array[i] = enable;
+                        if (enable)
+                                subsystem_mask |= (1 << i);
+                        else
+                                subsystem_mask &= ~(1 << i);
                         found = 1;
                 }
         }
                         found = 1;
                 }
         }
@@ -111,7 +115,6 @@ static int do_debug_mask(char *name, int enable)
 
 int dbg_initialize(int argc, char **argv)
 {
 
 int dbg_initialize(int argc, char **argv)
 {
-        memset(subsystem_array, 1, sizeof(subsystem_array));
         return 0;
 }
 
         return 0;
 }
 
@@ -213,12 +216,7 @@ int jt_dbg_list(int argc, char **argv)
                 for (i = 0; portal_debug_masks[i] != NULL; i++)
                         printf(", %s", portal_debug_masks[i]);
                 printf("\n");
                 for (i = 0; portal_debug_masks[i] != NULL; i++)
                         printf(", %s", portal_debug_masks[i]);
                 printf("\n");
-        }
-        else if (strcasecmp(argv[1], "applymasks") == 0) {
-                unsigned int subsystem_mask = 0;
-                for (i = 0; portal_debug_subsystems[i] != NULL; i++) {
-                        if (subsystem_array[i]) subsystem_mask |= (1 << i);
-                }
+        } else if (strcasecmp(argv[1], "applymasks") == 0) {
                 applymask_all(subsystem_mask, debug_mask);
         }
         return 0;
                 applymask_all(subsystem_mask, debug_mask);
         }
         return 0;
@@ -230,12 +228,6 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw)
 {
         char *p, *z;
         unsigned long subsystem, debug, dropped = 0, kept = 0;
 {
         char *p, *z;
         unsigned long subsystem, debug, dropped = 0, kept = 0;
-        int max_sub, max_type;
-
-        for (max_sub = 0; portal_debug_subsystems[max_sub] != NULL; max_sub++)
-                ;
-        for (max_type = 0; portal_debug_masks[max_type] != NULL; max_type++)
-                ;
 
         while (size) {
                 p = memchr(buf, '\n', size);
 
         while (size) {
                 p = memchr(buf, '\n', size);
@@ -247,8 +239,7 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw)
                 z++;
                 /* for some reason %*s isn't working. */
                 *p = '\0';
                 z++;
                 /* for some reason %*s isn't working. */
                 *p = '\0';
-                if (subsystem < max_sub &&
-                    subsystem_array[subsystem] &&
+                if ((subsystem_mask & subsystem) &&
                     (!debug || (debug_mask & debug))) {
                         if (raw)
                                 fprintf(fd, "%s\n", buf);
                     (!debug || (debug_mask & debug))) {
                         if (raw)
                                 fprintf(fd, "%s\n", buf);
@@ -551,6 +542,8 @@ int jt_dbg_modules(int argc, char **argv)
                 {"mds_ext3", "lustre/mds"},
                 {"mds_extN", "lustre/mds"},
                 {"ptlbd", "lustre/ptlbd"},
                 {"mds_ext3", "lustre/mds"},
                 {"mds_extN", "lustre/mds"},
                 {"ptlbd", "lustre/ptlbd"},
+                {"mgmt_svc", "lustre/mgmt"},
+                {"mgmt_cli", "lustre/mgmt"},
                 {NULL, NULL}
         };
         char *path = "..";
                 {NULL, NULL}
         };
         char *path = "..";
index 90d66f5..a89f4f7 100644 (file)
@@ -22,6 +22,7 @@
 
 #include <stdio.h>
 #include <sys/types.h>
 
 #include <stdio.h>
 #include <sys/types.h>
+#include <netdb.h>
 #include <sys/socket.h>
 #include <netinet/tcp.h>
 #include <netdb.h>
 #include <sys/socket.h>
 #include <netinet/tcp.h>
 #include <netdb.h>
@@ -106,6 +107,27 @@ nal2name (int nal)
         return ((e == NULL) ? "???" : e->name);
 }
 
         return ((e == NULL) ? "???" : e->name);
 }
 
+static struct hostent *
+ptl_gethostbyname(char * hname) {
+        struct hostent *he;
+        he = gethostbyname(hname);
+        if (!he) {
+                switch(h_errno) {
+                case HOST_NOT_FOUND:
+                case NO_ADDRESS:
+                        fprintf(stderr, "Unable to resolve hostname: %s\n",
+                                hname);
+                        break;
+                default:
+                        fprintf(stderr, "gethostbyname error: %s\n",
+                                strerror(errno));
+                        break;
+                }
+                return NULL;
+        }
+        return he;
+}
+
 int
 ptl_parse_nid (ptl_nid_t *nidp, char *str)
 {
 int
 ptl_parse_nid (ptl_nid_t *nidp, char *str)
 {
@@ -127,7 +149,7 @@ ptl_parse_nid (ptl_nid_t *nidp, char *str)
         
         if ((('a' <= str[0] && str[0] <= 'z') ||
              ('A' <= str[0] && str[0] <= 'Z')) &&
         
         if ((('a' <= str[0] && str[0] <= 'z') ||
              ('A' <= str[0] && str[0] <= 'Z')) &&
-             (he = gethostbyname (str)) != NULL)
+             (he = ptl_gethostbyname (str)) != NULL)
         {
                 __u32 addr = *(__u32 *)he->h_addr;
 
         {
                 __u32 addr = *(__u32 *)he->h_addr;
 
@@ -351,12 +373,9 @@ int jt_ptl_connect(int argc, char **argv)
                         goto usage;
                 }
 
                         goto usage;
                 }
 
-                he = gethostbyname(argv[1]);
-                if (!he) {
-                        fprintf(stderr, "gethostbyname error: %s\n",
-                                strerror(errno));
+                he = ptl_gethostbyname(argv[1]);
+                if (!he)
                         return -1;
                         return -1;
-                }
 
                 g_port = atol(argv[2]);
 
 
                 g_port = atol(argv[2]);
 
@@ -525,12 +544,9 @@ int jt_ptl_disconnect(int argc, char **argv)
 
                 PORTAL_IOC_INIT(data);
                 if (argc == 2) {
 
                 PORTAL_IOC_INIT(data);
                 if (argc == 2) {
-                        he = gethostbyname(argv[1]);
-                        if (!he) {
-                                fprintf(stderr, "gethostbyname error: %s\n",
-                                        strerror(errno));
+                        he = ptl_gethostbyname(argv[1]);
+                        if (!he) 
                                 return -1;
                                 return -1;
-                        }
                         
                         data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
 
                         
                         data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
 
@@ -582,12 +598,9 @@ int jt_ptl_push_connection (int argc, char **argv)
 
                 PORTAL_IOC_INIT(data);
                 if (argc == 2) {
 
                 PORTAL_IOC_INIT(data);
                 if (argc == 2) {
-                        he = gethostbyname(argv[1]);
-                        if (!he) {
-                                fprintf(stderr, "gethostbyname error: %s\n",
-                                        strerror(errno));
+                        he = ptl_gethostbyname(argv[1]);
+                        if (!he)
                                 return -1;
                                 return -1;
-                        }
                         
                         data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
 
                         
                         data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
 
index af76523..0a6ad8f 100644 (file)
@@ -76,7 +76,7 @@ static int ptlbd_cl_setup(struct obd_device *obd, obd_count len, void *buf)
         RETURN(0);
 }
 
         RETURN(0);
 }
 
-static int ptlbd_cl_cleanup(struct obd_device *obd, int force, int failover)
+static int ptlbd_cl_cleanup(struct obd_device *obd, int flags)
 {
         struct ptlbd_obd *ptlbd = &obd->u.ptlbd;
         struct obd_import *imp;
 {
         struct ptlbd_obd *ptlbd = &obd->u.ptlbd;
         struct obd_import *imp;
@@ -99,9 +99,8 @@ static int ptlbd_cl_cleanup(struct obd_device *obd, int force, int failover)
 
 
 /* modelled after ptlrpc_import_connect() */
 
 
 /* modelled after ptlrpc_import_connect() */
-int ptlbd_cl_connect(struct lustre_handle *conn,
-                      struct obd_device *obd, 
-                      struct obd_uuid *target_uuid)
+int ptlbd_cl_connect(struct lustre_handle *conn, struct obd_device *obd,
+                     struct obd_uuid *target_uuid)
 {
         struct ptlbd_obd *ptlbd = &obd->u.ptlbd;
         struct obd_import *imp = ptlbd->bd_import;
 {
         struct ptlbd_obd *ptlbd = &obd->u.ptlbd;
         struct obd_import *imp = ptlbd->bd_import;
@@ -196,7 +195,7 @@ int ptlbd_cl_init(void)
 {
         struct lprocfs_static_vars lvars;
 
 {
         struct lprocfs_static_vars lvars;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(ptlbd,&lvars);
         return class_register_type(&ptlbd_cl_obd_ops, lvars.module_vars,
                                    OBD_PTLBD_CL_DEVICENAME);
 }
         return class_register_type(&ptlbd_cl_obd_ops, lvars.module_vars,
                                    OBD_PTLBD_CL_DEVICENAME);
 }
index e3fde99..dc591f4 100644 (file)
@@ -57,7 +57,7 @@ out_cl:
         RETURN(ret);
 }
 
         RETURN(ret);
 }
 
-static void __exit ptlbd_exit(void)
+static void /*__exit*/ ptlbd_exit(void)
 {
         ENTRY;
         ptlbd_cl_exit();
 {
         ENTRY;
         ptlbd_cl_exit();
index 34ec737..d293a86 100644 (file)
@@ -74,7 +74,7 @@ out_filp:
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-static int ptlbd_sv_cleanup(struct obd_device *obddev, int force, int failover)
+static int ptlbd_sv_cleanup(struct obd_device *obddev, int flags)
 {
         struct ptlbd_obd *ptlbd = &obddev->u.ptlbd;
         ENTRY;
 {
         struct ptlbd_obd *ptlbd = &obddev->u.ptlbd;
         ENTRY;
@@ -102,7 +102,7 @@ int ptlbd_sv_init(void)
 {
         struct lprocfs_static_vars lvars;
 
 {
         struct lprocfs_static_vars lvars;
 
-        lprocfs_init_vars(&lvars);
+        lprocfs_init_vars(ptlbd,&lvars);
         return class_register_type(&ptlbd_sv_obd_ops, lvars.module_vars,
                                    OBD_PTLBD_SV_DEVICENAME);
 }
         return class_register_type(&ptlbd_sv_obd_ops, lvars.module_vars,
                                    OBD_PTLBD_SV_DEVICENAME);
 }
index 067f05c..cf51f30 100644 (file)
@@ -7,3 +7,4 @@ Makefile.in
 .deps
 tags
 TAGS
 .deps
 tags
 TAGS
+.*.cmd
index eb44329..355d48c 100644 (file)
@@ -16,7 +16,7 @@ EXTRA_PROGRAMS = ptlrpc
 
 ptlrpc_SOURCES = recover.c connection.c ptlrpc_module.c events.c service.c \
 client.c niobuf.c pack_generic.c lproc_ptlrpc.c pinger.c ptlrpc_lib.c \
 
 ptlrpc_SOURCES = recover.c connection.c ptlrpc_module.c events.c service.c \
 client.c niobuf.c pack_generic.c lproc_ptlrpc.c pinger.c ptlrpc_lib.c \
-ptlrpc_internal.h
+ptlrpc_internal.h recov_thread.c
 endif
 
 include $(top_srcdir)/Rules
 endif
 
 include $(top_srcdir)/Rules
index a98af3e..50ea587 100644 (file)
@@ -78,13 +78,13 @@ void ptlrpc_readdress_connection(struct ptlrpc_connection *conn,
         struct ptlrpc_peer peer;
         int err;
 
         struct ptlrpc_peer peer;
         int err;
 
-        err = ptlrpc_uuid_to_peer (uuid, &peer);
+        err = ptlrpc_uuid_to_peer(uuid, &peer);
         if (err != 0) {
                 CERROR("cannot find peer %s!\n", uuid->uuid);
                 return;
         }
 
         if (err != 0) {
                 CERROR("cannot find peer %s!\n", uuid->uuid);
                 return;
         }
 
-        memcpy (&conn->c_peer, &peer, sizeof (peer));
+        memcpy(&conn->c_peer, &peer, sizeof (peer));
         return;
 }
 
         return;
 }
 
@@ -96,7 +96,7 @@ static inline struct ptlrpc_bulk_desc *new_bulk(void)
         if (!desc)
                 return NULL;
 
         if (!desc)
                 return NULL;
 
-        spin_lock_init (&desc->bd_lock);
+        spin_lock_init(&desc->bd_lock);
         init_waitqueue_head(&desc->bd_waitq);
         INIT_LIST_HEAD(&desc->bd_page_list);
         desc->bd_md_h = PTL_HANDLE_NONE;
         init_waitqueue_head(&desc->bd_waitq);
         INIT_LIST_HEAD(&desc->bd_page_list);
         desc->bd_md_h = PTL_HANDLE_NONE;
@@ -108,10 +108,10 @@ static inline struct ptlrpc_bulk_desc *new_bulk(void)
 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req,
                                                int type, int portal)
 {
 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req,
                                                int type, int portal)
 {
-        struct obd_import       *imp = req->rq_import;
+        struct obd_import *imp = req->rq_import;
         struct ptlrpc_bulk_desc *desc;
 
         struct ptlrpc_bulk_desc *desc;
 
-        LASSERT (type == BULK_PUT_SINK || type == BULK_GET_SOURCE);
+        LASSERT(type == BULK_PUT_SINK || type == BULK_GET_SOURCE);
 
         desc = new_bulk();
         if (desc == NULL)
 
         desc = new_bulk();
         if (desc == NULL)
@@ -132,10 +132,10 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req,
 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp (struct ptlrpc_request *req,
                                                int type, int portal)
 {
 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp (struct ptlrpc_request *req,
                                                int type, int portal)
 {
-        struct obd_export       *exp = req->rq_export;
+        struct obd_export *exp = req->rq_export;
         struct ptlrpc_bulk_desc *desc;
 
         struct ptlrpc_bulk_desc *desc;
 
-        LASSERT (type == BULK_PUT_SOURCE || type == BULK_GET_SINK);
+        LASSERT(type == BULK_PUT_SOURCE || type == BULK_GET_SINK);
 
         desc = new_bulk();
         if (desc == NULL)
 
         desc = new_bulk();
         if (desc == NULL)
@@ -159,12 +159,12 @@ int ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
 
         OBD_ALLOC(bulk, sizeof(*bulk));
         if (bulk == NULL)
 
         OBD_ALLOC(bulk, sizeof(*bulk));
         if (bulk == NULL)
-                return (-ENOMEM);
+                return -ENOMEM;
 
 
-        LASSERT (page != NULL);
-        LASSERT (pageoffset >= 0);
-        LASSERT (len > 0);
-        LASSERT (pageoffset + len <= PAGE_SIZE);
+        LASSERT(page != NULL);
+        LASSERT(pageoffset >= 0);
+        LASSERT(len > 0);
+        LASSERT(pageoffset + len <= PAGE_SIZE);
 
         bulk->bp_page = page;
         bulk->bp_pageoffset = pageoffset;
 
         bulk->bp_page = page;
         bulk->bp_pageoffset = pageoffset;
@@ -181,9 +181,9 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
         struct list_head *tmp, *next;
         ENTRY;
 
         struct list_head *tmp, *next;
         ENTRY;
 
-        LASSERT (desc != NULL);
-        LASSERT (desc->bd_page_count != 0x5a5a5a5a); /* not freed already */
-        LASSERT (!desc->bd_network_rw);         /* network hands off or */
+        LASSERT(desc != NULL);
+        LASSERT(desc->bd_page_count != 0x5a5a5a5a); /* not freed already */
+        LASSERT(!desc->bd_network_rw);         /* network hands off or */
 
         list_for_each_safe(tmp, next, &desc->bd_page_list) {
                 struct ptlrpc_bulk_page *bulk;
 
         list_for_each_safe(tmp, next, &desc->bd_page_list) {
                 struct ptlrpc_bulk_page *bulk;
@@ -191,7 +191,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
                 ptlrpc_free_bulk_page(bulk);
         }
 
                 ptlrpc_free_bulk_page(bulk);
         }
 
-        LASSERT (desc->bd_page_count == 0);
+        LASSERT(desc->bd_page_count == 0);
         LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
 
         if (desc->bd_export)
         LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
 
         if (desc->bd_export)
@@ -205,7 +205,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
 
 void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk)
 {
 
 void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk)
 {
-        LASSERT (bulk != NULL);
+        LASSERT(bulk != NULL);
 
         list_del(&bulk->bp_link);
         bulk->bp_desc->bd_page_count--;
 
         list_del(&bulk->bp_link);
         bulk->bp_desc->bd_page_count--;
@@ -247,7 +247,7 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
 
         request->rq_connection = ptlrpc_connection_addref(imp->imp_connection);
 
 
         request->rq_connection = ptlrpc_connection_addref(imp->imp_connection);
 
-        spin_lock_init (&request->rq_lock);
+        spin_lock_init(&request->rq_lock);
         INIT_LIST_HEAD(&request->rq_list);
         init_waitqueue_head(&request->rq_wait_for_rep);
         request->rq_xid = ptlrpc_next_xid();
         INIT_LIST_HEAD(&request->rq_list);
         init_waitqueue_head(&request->rq_wait_for_rep);
         request->rq_xid = ptlrpc_next_xid();
@@ -289,18 +289,18 @@ void ptlrpc_set_destroy(struct ptlrpc_request_set *set)
                 struct ptlrpc_request *req =
                         list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
                 struct ptlrpc_request *req =
                         list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
-                LASSERT (req->rq_phase == expected_phase);
+                LASSERT(req->rq_phase == expected_phase);
                 n++;
         }
 
                 n++;
         }
 
-        LASSERT (set->set_remaining == 0 || set->set_remaining == n);
+        LASSERT(set->set_remaining == 0 || set->set_remaining == n);
 
         list_for_each_safe(tmp, next, &set->set_requests) {
                 struct ptlrpc_request *req =
                         list_entry(tmp, struct ptlrpc_request, rq_set_chain);
                 list_del_init(&req->rq_set_chain);
 
 
         list_for_each_safe(tmp, next, &set->set_requests) {
                 struct ptlrpc_request *req =
                         list_entry(tmp, struct ptlrpc_request, rq_set_chain);
                 list_del_init(&req->rq_set_chain);
 
-                LASSERT (req->rq_phase == expected_phase);
+                LASSERT(req->rq_phase == expected_phase);
 
                 if (req->rq_phase == RQ_PHASE_NEW) {
 
 
                 if (req->rq_phase == RQ_PHASE_NEW) {
 
@@ -312,7 +312,8 @@ void ptlrpc_set_destroy(struct ptlrpc_request_set *set)
                                 /* higher level (i.e. LOV) failed;
                                  * let the sub reqs clean up */
                                 req->rq_status = -EBADR;
                                 /* higher level (i.e. LOV) failed;
                                  * let the sub reqs clean up */
                                 req->rq_status = -EBADR;
-                                interpreter(req, &req->rq_async_args, req->rq_status);
+                                interpreter(req, &req->rq_async_args,
+                                            req->rq_status);
                         }
                         set->set_remaining--;
                 }
                         }
                         set->set_remaining--;
                 }
@@ -402,8 +403,8 @@ static int after_reply(struct ptlrpc_request *req, int *restartp)
         int rc;
         ENTRY;
 
         int rc;
         ENTRY;
 
-        LASSERT (!req->rq_receiving_reply);
-        LASSERT (req->rq_replied);
+        LASSERT(!req->rq_receiving_reply);
+        LASSERT(req->rq_replied);
 
         if (restartp != NULL)
                 *restartp = 0;
 
         if (restartp != NULL)
                 *restartp = 0;
@@ -418,14 +419,14 @@ static int after_reply(struct ptlrpc_request *req, int *restartp)
         rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
         if (rc) {
                 CERROR("unpack_rep failed: %d\n", rc);
         rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
         if (rc) {
                 CERROR("unpack_rep failed: %d\n", rc);
-                RETURN (-EPROTO);
+                RETURN(-EPROTO);
         }
 
         if (req->rq_repmsg->type != PTL_RPC_MSG_REPLY &&
             req->rq_repmsg->type != PTL_RPC_MSG_ERR) {
                 CERROR("invalid packet type received (type=%u)\n",
                        req->rq_repmsg->type);
         }
 
         if (req->rq_repmsg->type != PTL_RPC_MSG_REPLY &&
             req->rq_repmsg->type != PTL_RPC_MSG_ERR) {
                 CERROR("invalid packet type received (type=%u)\n",
                        req->rq_repmsg->type);
-                RETURN (-EPROTO);
+                RETURN(-EPROTO);
         }
 
         /* Store transno in reqmsg for replay. */
         }
 
         /* Store transno in reqmsg for replay. */
@@ -447,6 +448,9 @@ static int after_reply(struct ptlrpc_request *req, int *restartp)
                 if (req->rq_err)
                         RETURN(-EIO);
 
                 if (req->rq_err)
                         RETURN(-EIO);
 
+                if (req->rq_no_resend)
+                        RETURN(rc); /* -ENOTCONN */
+
                 if (req->rq_resend) {
                         if (restartp == NULL)
                                 LBUG(); /* async resend not supported yet */
                 if (req->rq_resend) {
                         if (restartp == NULL)
                                 LBUG(); /* async resend not supported yet */
@@ -456,7 +460,7 @@ static int after_reply(struct ptlrpc_request *req, int *restartp)
                         *restartp = 1;
                         lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
                         DEBUG_REQ(D_HA, req, "resending: ");
                         *restartp = 1;
                         lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
                         DEBUG_REQ(D_HA, req, "resending: ");
-                        RETURN (0);
+                        RETURN(0);
                 }
 
                 CERROR("request should be err or resend: %p\n", req);
                 }
 
                 CERROR("request should be err or resend: %p\n", req);
@@ -472,10 +476,9 @@ static int after_reply(struct ptlrpc_request *req, int *restartp)
                         imp->imp_max_transno = req->rq_transno;
 
                 /* Replay-enabled imports return commit-status information. */
                         imp->imp_max_transno = req->rq_transno;
 
                 /* Replay-enabled imports return commit-status information. */
-                if (req->rq_repmsg->last_committed) {
+                if (req->rq_repmsg->last_committed)
                         imp->imp_peer_committed_transno =
                                 req->rq_repmsg->last_committed;
                         imp->imp_peer_committed_transno =
                                 req->rq_repmsg->last_committed;
-                }
                 ptlrpc_free_committed(imp);
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
         }
                 ptlrpc_free_committed(imp);
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
         }
@@ -510,8 +513,8 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                         continue;
 
                 if (req->rq_phase == RQ_PHASE_INTERPRET)
                         continue;
 
                 if (req->rq_phase == RQ_PHASE_INTERPRET)
-                        GOTO (interpret, req->rq_status);
-                
+                        GOTO(interpret, req->rq_status);
+
                 if (req->rq_err) {
                         ptlrpc_unregister_reply(req);
                         if (req->rq_status == 0)
                 if (req->rq_err) {
                         ptlrpc_unregister_reply(req);
                         if (req->rq_status == 0)
@@ -522,7 +525,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                         list_del_init(&req->rq_list);
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
                         list_del_init(&req->rq_list);
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
-                        GOTO (interpret, req->rq_status);
+                        GOTO(interpret, req->rq_status);
                 }
 
                 if (req->rq_intr) {
                 }
 
                 if (req->rq_intr) {
@@ -535,7 +538,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                         list_del_init(&req->rq_list);
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
                         list_del_init(&req->rq_list);
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
-                        GOTO (interpret, req->rq_status);
+                        GOTO(interpret, req->rq_status);
                 }
 
                 if (req->rq_phase == RQ_PHASE_RPC) {
                 }
 
                 if (req->rq_phase == RQ_PHASE_RPC) {
@@ -553,13 +556,13 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                                 list_add_tail(&req->rq_list,
                                               &imp->imp_sending_list);
 
                                 list_add_tail(&req->rq_list,
                                               &imp->imp_sending_list);
 
-                                if (req->rq_import_generation < 
+                                if (req->rq_import_generation <
                                     imp->imp_generation) {
                                         req->rq_status = -EIO;
                                         req->rq_phase = RQ_PHASE_INTERPRET;
                                     imp->imp_generation) {
                                         req->rq_status = -EIO;
                                         req->rq_phase = RQ_PHASE_INTERPRET;
-                                        spin_unlock_irqrestore(&imp->imp_lock, 
+                                        spin_unlock_irqrestore(&imp->imp_lock,
                                                                flags);
                                                                flags);
-                                        GOTO (interpret, req->rq_status);
+                                        GOTO(interpret, req->rq_status);
                                 }
                                 spin_unlock_irqrestore(&imp->imp_lock, flags);
 
                                 }
                                 spin_unlock_irqrestore(&imp->imp_lock, flags);
 
@@ -571,16 +574,17 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                                         req->rq_resend = 0;
                                         spin_unlock_irqrestore(&req->rq_lock,
                                                                flags);
                                         req->rq_resend = 0;
                                         spin_unlock_irqrestore(&req->rq_lock,
                                                                flags);
+
                                         ptlrpc_unregister_reply(req);
                                         if (req->rq_bulk)
                                                 ptlrpc_unregister_bulk(req);
                                         ptlrpc_unregister_reply(req);
                                         if (req->rq_bulk)
                                                 ptlrpc_unregister_bulk(req);
-                               }
+                                }
 
                                 rc = ptl_send_rpc(req);
                                 if (rc) {
                                         req->rq_status = rc;
                                         req->rq_phase = RQ_PHASE_INTERPRET;
 
                                 rc = ptl_send_rpc(req);
                                 if (rc) {
                                         req->rq_status = rc;
                                         req->rq_phase = RQ_PHASE_INTERPRET;
-                                        GOTO (interpret, req->rq_status);
+                                        GOTO(interpret, req->rq_status);
                                 }
 
                         }
                                 }
 
                         }
@@ -612,21 +616,21 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                          */
                         if (req->rq_bulk == NULL || req->rq_status != 0) {
                                 req->rq_phase = RQ_PHASE_INTERPRET;
                          */
                         if (req->rq_bulk == NULL || req->rq_status != 0) {
                                 req->rq_phase = RQ_PHASE_INTERPRET;
-                                GOTO (interpret, req->rq_status);
+                                GOTO(interpret, req->rq_status);
                         }
 
                         req->rq_phase = RQ_PHASE_BULK;
                 }
 
                         }
 
                         req->rq_phase = RQ_PHASE_BULK;
                 }
 
-                LASSERT (req->rq_phase == RQ_PHASE_BULK);
+                LASSERT(req->rq_phase == RQ_PHASE_BULK);
                 if (!ptlrpc_bulk_complete (req->rq_bulk))
                         continue;
 
                 req->rq_phase = RQ_PHASE_INTERPRET;
 
         interpret:
                 if (!ptlrpc_bulk_complete (req->rq_bulk))
                         continue;
 
                 req->rq_phase = RQ_PHASE_INTERPRET;
 
         interpret:
-                LASSERT (req->rq_phase == RQ_PHASE_INTERPRET);
-                LASSERT (!req->rq_receiving_reply);
+                LASSERT(req->rq_phase == RQ_PHASE_INTERPRET);
+                LASSERT(!req->rq_receiving_reply);
 
                 ptlrpc_unregister_reply(req);
                 if (req->rq_bulk != NULL)
 
                 ptlrpc_unregister_reply(req);
                 if (req->rq_bulk != NULL)
@@ -651,7 +655,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                 set->set_remaining--;
         }
 
                 set->set_remaining--;
         }
 
-        RETURN (set->set_remaining == 0);
+        RETURN(set->set_remaining == 0);
 }
 
 int ptlrpc_expire_one_request(struct ptlrpc_request *req)
 }
 
 int ptlrpc_expire_one_request(struct ptlrpc_request *req)
@@ -695,7 +699,7 @@ static int expired_set(void *data)
         time_t                     now = LTIME_S (CURRENT_TIME);
         ENTRY;
 
         time_t                     now = LTIME_S (CURRENT_TIME);
         ENTRY;
 
-        LASSERT (set != NULL);
+        LASSERT(set != NULL);
 
         /* A timeout expired; see which reqs it applies to... */
         list_for_each (tmp, &set->set_requests) {
 
         /* A timeout expired; see which reqs it applies to... */
         list_for_each (tmp, &set->set_requests) {
@@ -728,7 +732,7 @@ static void interrupted_set(void *data)
         struct list_head *tmp;
         unsigned long flags;
 
         struct list_head *tmp;
         unsigned long flags;
 
-        LASSERT (set != NULL);
+        LASSERT(set != NULL);
         CERROR("INTERRUPTED SET %p\n", set);
 
         list_for_each(tmp, &set->set_requests) {
         CERROR("INTERRUPTED SET %p\n", set);
 
         list_for_each(tmp, &set->set_requests) {
@@ -757,12 +761,13 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
         int                    timeout;
         ENTRY;
 
         int                    timeout;
         ENTRY;
 
+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
         LASSERT(!list_empty(&set->set_requests));
         list_for_each(tmp, &set->set_requests) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
         LASSERT(!list_empty(&set->set_requests));
         list_for_each(tmp, &set->set_requests) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
-                LASSERT (req->rq_level == LUSTRE_CONN_FULL);
-                LASSERT (req->rq_phase == RQ_PHASE_NEW);
+                LASSERT(req->rq_level == LUSTRE_CONN_FULL);
+                LASSERT(req->rq_phase == RQ_PHASE_NEW);
                 req->rq_phase = RQ_PHASE_RPC;
 
                 imp = req->rq_import;
                 req->rq_phase = RQ_PHASE_RPC;
 
                 imp = req->rq_import;
@@ -789,7 +794,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                         spin_lock (&req->rq_lock);
                         req->rq_waiting = 1;
                         spin_unlock (&req->rq_lock);
                         spin_lock (&req->rq_lock);
                         req->rq_waiting = 1;
                         spin_unlock (&req->rq_lock);
-                        LASSERT (list_empty (&req->rq_list));
+                        LASSERT(list_empty (&req->rq_list));
                         // list_del(&req->rq_list);
                         list_add_tail(&req->rq_list, &imp->imp_delayed_list);
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
                         // list_del(&req->rq_list);
                         list_add_tail(&req->rq_list, &imp->imp_delayed_list);
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
@@ -801,6 +806,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                 list_add_tail(&req->rq_list, &imp->imp_sending_list);
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
 
                 list_add_tail(&req->rq_list, &imp->imp_sending_list);
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
 
+                req->rq_reqmsg->status = current->pid;
                 CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:ni:nid:opc"
                        " %s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm,
                        imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status,
                 CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:ni:nid:opc"
                        " %s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm,
                        imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status,
@@ -820,7 +826,8 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                 now = LTIME_S (CURRENT_TIME);
                 timeout = 0;
                 list_for_each (tmp, &set->set_requests) {
                 now = LTIME_S (CURRENT_TIME);
                 timeout = 0;
                 list_for_each (tmp, &set->set_requests) {
-                        req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
+                        req = list_entry(tmp, struct ptlrpc_request,
+                                         rq_set_chain);
 
                         /* request in-flight? */
                         if (!((req->rq_phase == RQ_PHASE_RPC &&
 
                         /* request in-flight? */
                         if (!((req->rq_phase == RQ_PHASE_RPC &&
@@ -846,7 +853,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                                        expired_set, interrupted_set, set);
                 rc = l_wait_event(set->set_waitq, ptlrpc_check_set(set), &lwi);
 
                                        expired_set, interrupted_set, set);
                 rc = l_wait_event(set->set_waitq, ptlrpc_check_set(set), &lwi);
 
-                LASSERT (rc == 0 || rc == -EINTR || rc == -ETIMEDOUT);
+                LASSERT(rc == 0 || rc == -EINTR || rc == -ETIMEDOUT);
 
                 /* -EINTR => all requests have been flagged rq_intr so next
                  * check completes.
 
                 /* -EINTR => all requests have been flagged rq_intr so next
                  * check completes.
@@ -857,13 +864,13 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                  * the error cases -eeb. */
         } while (rc != 0);
 
                  * the error cases -eeb. */
         } while (rc != 0);
 
-        LASSERT (set->set_remaining == 0);
+        LASSERT(set->set_remaining == 0);
 
         rc = 0;
         list_for_each(tmp, &set->set_requests) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
 
         rc = 0;
         list_for_each(tmp, &set->set_requests) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
-                LASSERT (req->rq_phase == RQ_PHASE_COMPLETE);
+                LASSERT(req->rq_phase == RQ_PHASE_COMPLETE);
                 if (req->rq_status != 0)
                         rc = req->rq_status;
         }
                 if (req->rq_status != 0)
                         rc = req->rq_status;
         }
@@ -885,7 +892,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
                 return;
         }
 
                 return;
         }
 
-        LASSERT (!request->rq_receiving_reply);
+        LASSERT(!request->rq_receiving_reply);
 
         /* We must take it off the imp_replay_list first.  Otherwise, we'll set
          * request->rq_reqmsg to NULL while osc_close is dereferencing it. */
 
         /* We must take it off the imp_replay_list first.  Otherwise, we'll set
          * request->rq_reqmsg to NULL while osc_close is dereferencing it. */
@@ -940,7 +947,7 @@ static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked)
         if (request == NULL)
                 RETURN(1);
 
         if (request == NULL)
                 RETURN(1);
 
-        if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a) || 
+        if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a) ||
             request->rq_obd == (void *)(long)(0x5a5a5a5a5a5a5a5a)) {
                 CERROR("dereferencing freed request (bug 575)\n");
                 LBUG();
             request->rq_obd == (void *)(long)(0x5a5a5a5a5a5a5a5a)) {
                 CERROR("dereferencing freed request (bug 575)\n");
                 LBUG();
@@ -981,7 +988,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
         int           rc;
         ENTRY;
 
         int           rc;
         ENTRY;
 
-        LASSERT (!in_interrupt ());             /* might sleep */
+        LASSERT(!in_interrupt ());             /* might sleep */
 
         spin_lock_irqsave (&request->rq_lock, flags);
         if (!request->rq_receiving_reply) {     /* not waiting for a reply */
 
         spin_lock_irqsave (&request->rq_lock, flags);
         if (!request->rq_receiving_reply) {     /* not waiting for a reply */
@@ -991,7 +998,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
                 return;
         }
 
                 return;
         }
 
-        LASSERT (!request->rq_replied);         /* callback hasn't completed */
+        LASSERT(!request->rq_replied);         /* callback hasn't completed */
         spin_unlock_irqrestore (&request->rq_lock, flags);
 
         rc = PtlMDUnlink (request->rq_reply_md_h);
         spin_unlock_irqrestore (&request->rq_lock, flags);
 
         rc = PtlMDUnlink (request->rq_reply_md_h);
@@ -1000,8 +1007,8 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
                 LBUG ();
 
         case PTL_OK:                            /* unlinked before completion */
                 LBUG ();
 
         case PTL_OK:                            /* unlinked before completion */
-                LASSERT (request->rq_receiving_reply);
-                LASSERT (!request->rq_replied);
+                LASSERT(request->rq_receiving_reply);
+                LASSERT(!request->rq_replied);
                 spin_lock_irqsave (&request->rq_lock, flags);
                 request->rq_receiving_reply = 0;
                 spin_unlock_irqrestore (&request->rq_lock, flags);
                 spin_lock_irqsave (&request->rq_lock, flags);
                 request->rq_receiving_reply = 0;
                 spin_unlock_irqrestore (&request->rq_lock, flags);
@@ -1018,7 +1025,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
 
                         rc = l_wait_event (request->rq_wait_for_rep,
                                            request->rq_replied, &lwi);
 
                         rc = l_wait_event (request->rq_wait_for_rep,
                                            request->rq_replied, &lwi);
-                        LASSERT (rc == 0 || rc == -ETIMEDOUT);
+                        LASSERT(rc == 0 || rc == -ETIMEDOUT);
                         if (rc == 0) {
                                 spin_lock_irqsave (&request->rq_lock, flags);
                                 /* Ensure the callback has completed scheduling
                         if (rc == 0) {
                                 spin_lock_irqsave (&request->rq_lock, flags);
                                 /* Ensure the callback has completed scheduling
@@ -1032,8 +1039,8 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
                 /* fall through */
 
         case PTL_INV_MD:                        /* callback completed */
                 /* fall through */
 
         case PTL_INV_MD:                        /* callback completed */
-                LASSERT (!request->rq_receiving_reply);
-                LASSERT (request->rq_replied);
+                LASSERT(!request->rq_receiving_reply);
+                LASSERT(request->rq_replied);
                 EXIT;
                 return;
         }
                 EXIT;
                 return;
         }
@@ -1061,7 +1068,7 @@ void ptlrpc_free_committed(struct obd_import *imp)
                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
 
                 /* XXX ok to remove when 1357 resolved - rread 05/29/03  */
                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
 
                 /* XXX ok to remove when 1357 resolved - rread 05/29/03  */
-                LASSERT (req != last_req);
+                LASSERT(req != last_req);
                 last_req = req;
 
                 if (req->rq_import_generation < imp->imp_generation) {
                 last_req = req;
 
                 if (req->rq_import_generation < imp->imp_generation) {
@@ -1208,14 +1215,13 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
         struct l_wait_info lwi;
         struct obd_import *imp = req->rq_import;
         struct obd_device *obd = imp->imp_obd;
         struct l_wait_info lwi;
         struct obd_import *imp = req->rq_import;
         struct obd_device *obd = imp->imp_obd;
-        struct ptlrpc_connection *conn = imp->imp_connection;
-        unsigned int flags;
+        unsigned long flags;
         int do_restart = 0;
         int timeout = 0;
         ENTRY;
 
         int do_restart = 0;
         int timeout = 0;
         ENTRY;
 
-        LASSERT (req->rq_set == NULL);
-        LASSERT (!req->rq_receiving_reply);
+        LASSERT(req->rq_set == NULL);
+        LASSERT(!req->rq_receiving_reply);
 
         /* for distributed debugging */
         req->rq_reqmsg->status = current->pid;
 
         /* for distributed debugging */
         req->rq_reqmsg->status = current->pid;
@@ -1224,7 +1230,8 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
                "%s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm,
                imp->imp_obd->obd_uuid.uuid,
                req->rq_reqmsg->status, req->rq_xid,
                "%s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm,
                imp->imp_obd->obd_uuid.uuid,
                req->rq_reqmsg->status, req->rq_xid,
-               conn->c_peer.peer_ni->pni_name, conn->c_peer.peer_nid,
+               imp->imp_connection->c_peer.peer_ni->pni_name,
+               imp->imp_connection->c_peer.peer_nid,
                req->rq_reqmsg->opc);
 
         /* Mark phase here for a little debug help */
                req->rq_reqmsg->opc);
 
         /* Mark phase here for a little debug help */
@@ -1242,13 +1249,13 @@ restart:
         if (req->rq_import->imp_invalid && req->rq_level == LUSTRE_CONN_FULL) {
                 DEBUG_REQ(D_ERROR, req, "IMP_INVALID:");
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
         if (req->rq_import->imp_invalid && req->rq_level == LUSTRE_CONN_FULL) {
                 DEBUG_REQ(D_ERROR, req, "IMP_INVALID:");
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
-                GOTO (out, rc = -EIO);
+                GOTO(out, rc = -EIO);
         }
 
         if (req->rq_import_generation < imp->imp_generation) {
                 DEBUG_REQ(D_ERROR, req, "req old gen:");
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
         }
 
         if (req->rq_import_generation < imp->imp_generation) {
                 DEBUG_REQ(D_ERROR, req, "req old gen:");
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
-                GOTO (out, rc = -EIO);
+                GOTO(out, rc = -EIO);
         }
 
         if (req->rq_level > imp->imp_level) {
         }
 
         if (req->rq_level > imp->imp_level) {
@@ -1256,7 +1263,7 @@ restart:
                 if (req->rq_no_recov || obd->obd_no_recov ||
                     imp->imp_dlm_fake) {
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
                 if (req->rq_no_recov || obd->obd_no_recov ||
                     imp->imp_dlm_fake) {
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
-                        GOTO (out, rc = -EWOULDBLOCK);
+                        GOTO(out, rc = -EWOULDBLOCK);
                 }
 
                 list_add_tail(&req->rq_list, &imp->imp_delayed_list);
                 }
 
                 list_add_tail(&req->rq_list, &imp->imp_delayed_list);
@@ -1269,23 +1276,24 @@ restart:
                                   (req->rq_level <= imp->imp_level ||
                                    req->rq_err),
                                   &lwi);
                                   (req->rq_level <= imp->imp_level ||
                                    req->rq_err),
                                   &lwi);
-                DEBUG_REQ(D_HA, req, "\"%s\" awake: (%d > %d)",
-                          current->comm, req->rq_level, imp->imp_level);
+                DEBUG_REQ(D_HA, req, "\"%s\" awake: (%d > %d or %d == 1)",
+                          current->comm, imp->imp_level, req->rq_level,
+                          req->rq_err);
 
                 spin_lock_irqsave(&imp->imp_lock, flags);
                 list_del_init(&req->rq_list);
 
 
                 spin_lock_irqsave(&imp->imp_lock, flags);
                 list_del_init(&req->rq_list);
 
-                if (req->rq_err || 
+                if (req->rq_err ||
                     req->rq_import_generation < imp->imp_generation)
                         rc = -EIO;
 
 
                 if (rc) {
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
                     req->rq_import_generation < imp->imp_generation)
                         rc = -EIO;
 
 
                 if (rc) {
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
-                        GOTO (out, rc);
+                        GOTO(out, rc);
                 }
 
                 }
 
-                CERROR("process %d resumed\n", current->pid);
+                DEBUG_REQ(D_HA, req, "resumed");
         }
 
         /* XXX this is the same as ptlrpc_set_wait */
         }
 
         /* XXX this is the same as ptlrpc_set_wait */
@@ -1335,7 +1343,7 @@ restart:
                           &reply_ev);
                 reply_in_callback(&reply_ev);
 
                           &reply_ev);
                 reply_in_callback(&reply_ev);
 
-                LASSERT (reply_ev.mem_desc.user_ptr == (void *)req);
+                LASSERT(reply_ev.mem_desc.user_ptr == (void *)req);
                 // ptlrpc_check_reply(req);
                 // not required now it only tests
         }
                 // ptlrpc_check_reply(req);
                 // not required now it only tests
         }
@@ -1347,7 +1355,8 @@ restart:
                "%s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm,
                imp->imp_obd->obd_uuid.uuid,
                req->rq_reqmsg->status, req->rq_xid,
                "%s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm,
                imp->imp_obd->obd_uuid.uuid,
                req->rq_reqmsg->status, req->rq_xid,
-               conn->c_peer.peer_ni->pni_name, conn->c_peer.peer_nid,
+               imp->imp_connection->c_peer.peer_ni->pni_name,
+               imp->imp_connection->c_peer.peer_nid,
                req->rq_reqmsg->opc);
 
         spin_lock_irqsave(&imp->imp_lock, flags);
                req->rq_reqmsg->opc);
 
         spin_lock_irqsave(&imp->imp_lock, flags);
@@ -1421,7 +1430,7 @@ restart:
                                            ptlrpc_bulk_complete(req->rq_bulk),
                                            &lwi);
                         if (brc != 0) {
                                            ptlrpc_bulk_complete(req->rq_bulk),
                                            &lwi);
                         if (brc != 0) {
-                                LASSERT (brc == -ETIMEDOUT);
+                                LASSERT(brc == -ETIMEDOUT);
                                 CERROR ("Timed out waiting for bulk\n");
                                 rc = brc;
                         }
                                 CERROR ("Timed out waiting for bulk\n");
                                 rc = brc;
                         }
@@ -1429,14 +1438,14 @@ restart:
                 if (rc < 0) {
                         /* MDS blocks for put ACKs before replying */
                         /* OSC sets rq_no_resend for the time being */
                 if (rc < 0) {
                         /* MDS blocks for put ACKs before replying */
                         /* OSC sets rq_no_resend for the time being */
-                        LASSERT (req->rq_no_resend);
+                        LASSERT(req->rq_no_resend);
                         ptlrpc_unregister_bulk (req);
                 }
         }
 
                         ptlrpc_unregister_bulk (req);
                 }
         }
 
-        LASSERT (!req->rq_receiving_reply);
+        LASSERT(!req->rq_receiving_reply);
         req->rq_phase = RQ_PHASE_INTERPRET;
         req->rq_phase = RQ_PHASE_INTERPRET;
-        RETURN (rc);
+        RETURN(rc);
 }
 
 int ptlrpc_replay_req(struct ptlrpc_request *req)
 }
 
 int ptlrpc_replay_req(struct ptlrpc_request *req)
@@ -1450,7 +1459,7 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
          * state it was left in */
 
         /* Not handling automatic bulk replay yet (or ever?) */
          * state it was left in */
 
         /* Not handling automatic bulk replay yet (or ever?) */
-        LASSERT (req->rq_bulk == NULL);
+        LASSERT(req->rq_bulk == NULL);
 
         DEBUG_REQ(D_NET, req, "about to replay");
 
 
         DEBUG_REQ(D_NET, req, "about to replay");
 
index 07be1af..c4c47d3 100644 (file)
@@ -50,6 +50,7 @@ struct ll_rpc_opcode {
         { OST_SAN_READ,     "ost_san_read" },
         { OST_SAN_WRITE,    "ost_san_write" },
         { OST_SYNCFS,       "ost_syncfs" },
         { OST_SAN_READ,     "ost_san_read" },
         { OST_SAN_WRITE,    "ost_san_write" },
         { OST_SYNCFS,       "ost_syncfs" },
+        { OST_SET_INFO,     "ost_set_info" },
         { MDS_GETATTR,      "mds_getattr" },
         { MDS_GETATTR_NAME, "mds_getattr_name" },
         { MDS_CLOSE,        "mds_close" },
         { MDS_GETATTR,      "mds_getattr" },
         { MDS_GETATTR_NAME, "mds_getattr_name" },
         { MDS_CLOSE,        "mds_close" },
@@ -60,6 +61,8 @@ struct ll_rpc_opcode {
         { MDS_GETSTATUS,    "mds_getstatus" },
         { MDS_STATFS,       "mds_statfs" },
         { MDS_GETLOVINFO,   "mds_getlovinfo" },
         { MDS_GETSTATUS,    "mds_getstatus" },
         { MDS_STATFS,       "mds_statfs" },
         { MDS_GETLOVINFO,   "mds_getlovinfo" },
+        { MDS_PIN,          "mds_pin" },
+        { MDS_UNPIN,        "mds_unpin" },
         { LDLM_ENQUEUE,     "ldlm_enqueue" },
         { LDLM_CONVERT,     "ldlm_convert" },
         { LDLM_CANCEL,      "ldlm_cancel" },
         { LDLM_ENQUEUE,     "ldlm_enqueue" },
         { LDLM_CONVERT,     "ldlm_convert" },
         { LDLM_CANCEL,      "ldlm_cancel" },
@@ -71,7 +74,8 @@ struct ll_rpc_opcode {
         { PTLBD_FLUSH,      "ptlbd_flush" },
         { PTLBD_CONNECT,    "ptlbd_connect" },
         { PTLBD_DISCONNECT, "ptlbd_disconnect" },
         { PTLBD_FLUSH,      "ptlbd_flush" },
         { PTLBD_CONNECT,    "ptlbd_connect" },
         { PTLBD_DISCONNECT, "ptlbd_disconnect" },
-        { OBD_PING,         "obd_ping" }
+        { OBD_PING,         "obd_ping" },
+        { OBD_LOG_CANCEL,   "obd_log_cancel" },
 };
 
 const char* ll_opcode2str(__u32 opcode)
 };
 
 const char* ll_opcode2str(__u32 opcode)
@@ -119,7 +123,7 @@ void ptlrpc_lprocfs_register_service(struct obd_device *obddev,
         }
 
         lprocfs_counter_init(svc_stats, PTLRPC_REQWAIT_CNTR,
         }
 
         lprocfs_counter_init(svc_stats, PTLRPC_REQWAIT_CNTR,
-                             svc_counter_config, "req_waittime", "cycles");
+                             svc_counter_config, "req_waittime", "usec");
         /* Wait for b_eq branch
         lprocfs_counter_init(svc_stats, PTLRPC_SVCEQDEPTH_CNTR,
                              svc_counter_config, "svc_eqdepth", "reqs");
         /* Wait for b_eq branch
         lprocfs_counter_init(svc_stats, PTLRPC_SVCEQDEPTH_CNTR,
                              svc_counter_config, "svc_eqdepth", "reqs");
@@ -127,12 +131,12 @@ void ptlrpc_lprocfs_register_service(struct obd_device *obddev,
         /* no stddev on idletime */
         lprocfs_counter_init(svc_stats, PTLRPC_SVCIDLETIME_CNTR,
                              (LPROCFS_CNTR_EXTERNALLOCK|LPROCFS_CNTR_AVGMINMAX),
         /* no stddev on idletime */
         lprocfs_counter_init(svc_stats, PTLRPC_SVCIDLETIME_CNTR,
                              (LPROCFS_CNTR_EXTERNALLOCK|LPROCFS_CNTR_AVGMINMAX),
-                             "svc_idletime", "cycles");
+                             "svc_idletime", "usec");
         for (i = 0; i < LUSTRE_MAX_OPCODES; i++) {
                 __u32 opcode = ll_rpc_opcode_table[i].opcode;
                 lprocfs_counter_init(svc_stats, PTLRPC_LAST_CNTR + i,
                                      svc_counter_config, ll_opcode2str(opcode),
         for (i = 0; i < LUSTRE_MAX_OPCODES; i++) {
                 __u32 opcode = ll_rpc_opcode_table[i].opcode;
                 lprocfs_counter_init(svc_stats, PTLRPC_LAST_CNTR + i,
                                      svc_counter_config, ll_opcode2str(opcode),
-                                     "cycles");
+                                     "usec");
         }
 
         rc = lprocfs_register_stats(svc_procroot, "stats", svc_stats);
         }
 
         rc = lprocfs_register_stats(svc_procroot, "stats", svc_stats);
index 3811d2a..0e2d651 100644 (file)
@@ -187,14 +187,9 @@ void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size)
         }
 
         buflen = m->buflens[n];
         }
 
         buflen = m->buflens[n];
-        if (buflen == 0) {
-                CERROR("msg %p buffer[%d] is zero length\n", m, n);
-                return NULL;
-        }
-
         if (buflen < min_size) {
                 CERROR("msg %p buffer[%d] size %d too small (required %d)\n",
         if (buflen < min_size) {
                 CERROR("msg %p buffer[%d] size %d too small (required %d)\n",
-                        m, n, buflen, min_size);
+                       m, n, buflen, min_size);
                 return NULL;
         }
 
                 return NULL;
         }
 
@@ -249,17 +244,16 @@ void *lustre_swab_reqbuf (struct ptlrpc_request *req, int index, int min_size,
 {
         void *ptr;
 
 {
         void *ptr;
 
-        LASSERT_REQSWAB (req, index);
+        LASSERT_REQSWAB(req, index);
 
         ptr = lustre_msg_buf(req->rq_reqmsg, index, min_size);
         if (ptr == NULL)
 
         ptr = lustre_msg_buf(req->rq_reqmsg, index, min_size);
         if (ptr == NULL)
-                return (NULL);
+                return NULL;
 
 
-        if (swabber != NULL &&
-            lustre_msg_swabbed (req->rq_reqmsg))
+        if (swabber != NULL && lustre_msg_swabbed(req->rq_reqmsg))
                 ((void (*)(void *))swabber)(ptr);
 
                 ((void (*)(void *))swabber)(ptr);
 
-        return (ptr);
+        return ptr;
 }
 
 /* Wrap up the normal fixed length case */
 }
 
 /* Wrap up the normal fixed length case */
@@ -268,17 +262,16 @@ void *lustre_swab_repbuf (struct ptlrpc_request *req, int index, int min_size,
 {
         void *ptr;
 
 {
         void *ptr;
 
-        LASSERT_REPSWAB (req, index);
+        LASSERT_REPSWAB(req, index);
 
 
-        ptr = lustre_msg_buf (req->rq_repmsg, index, min_size);
+        ptr = lustre_msg_buf(req->rq_repmsg, index, min_size);
         if (ptr == NULL)
         if (ptr == NULL)
-                return (NULL);
+                return NULL;
 
 
-        if (swabber != NULL &&
-            lustre_msg_swabbed (req->rq_repmsg))
+        if (swabber != NULL && lustre_msg_swabbed(req->rq_repmsg))
                 ((void (*)(void *))swabber)(ptr);
 
                 ((void (*)(void *))swabber)(ptr);
 
-        return (ptr);
+        return ptr;
 }
 
 /* byte flipping routines for all wire types declared in
 }
 
 /* byte flipping routines for all wire types declared in
@@ -638,12 +631,12 @@ void lustre_assert_wire_constants (void)
         LASSERT (REINT_RENAME == 5);
         LASSERT (REINT_OPEN == 6);
         LASSERT (REINT_MAX == 6);
         LASSERT (REINT_RENAME == 5);
         LASSERT (REINT_OPEN == 6);
         LASSERT (REINT_MAX == 6);
-        LASSERT (IT_INTENT_EXEC == 1);
-        LASSERT (IT_OPEN_LOOKUP == 2);
-        LASSERT (IT_OPEN_NEG == 4);
-        LASSERT (IT_OPEN_POS == 8);
-        LASSERT (IT_OPEN_CREATE == 16);
-        LASSERT (IT_OPEN_OPEN == 32);
+        LASSERT (DISP_IT_EXECD == 1);
+        LASSERT (DISP_LOOKUP_EXECD == 2);
+        LASSERT (DISP_LOOKUP_NEG == 4);
+        LASSERT (DISP_LOOKUP_POS == 8);
+        LASSERT (DISP_OPEN_CREATE == 16);
+        LASSERT (DISP_OPEN_OPEN == 32);
         LASSERT (MDS_STATUS_CONN == 1);
         LASSERT (MDS_STATUS_LOV == 2);
         LASSERT (MDS_OPEN_HAS_EA == 1);
         LASSERT (MDS_STATUS_CONN == 1);
         LASSERT (MDS_STATUS_LOV == 2);
         LASSERT (MDS_OPEN_HAS_EA == 1);
index ebc69e1..c81fb51 100644 (file)
@@ -47,12 +47,12 @@ void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
 
 int ptlrpc_pinger_add_import(struct obd_import *imp)
 {
 
 int ptlrpc_pinger_add_import(struct obd_import *imp)
 {
+#ifndef ENABLE_PINGER
+        return 0;
+#else
         int rc;
         ENTRY;
 
         int rc;
         ENTRY;
 
-#ifndef ENABLE_PINGER
-        RETURN(0);
-#else
         if (!list_empty(&imp->imp_pinger_chain))
                 RETURN(-EALREADY);
 
         if (!list_empty(&imp->imp_pinger_chain))
                 RETURN(-EALREADY);
 
@@ -77,12 +77,12 @@ int ptlrpc_pinger_add_import(struct obd_import *imp)
 
 int ptlrpc_pinger_del_import(struct obd_import *imp)
 {
 
 int ptlrpc_pinger_del_import(struct obd_import *imp)
 {
+#ifndef ENABLE_PINGER
+        return 0;
+#else
         int rc;
         ENTRY;
 
         int rc;
         ENTRY;
 
-#ifndef ENABLE_PINGER
-        RETURN(0);
-#else
         if (list_empty(&imp->imp_pinger_chain))
                 RETURN(-ENOENT);
 
         if (list_empty(&imp->imp_pinger_chain))
                 RETURN(-ENOENT);
 
@@ -118,14 +118,7 @@ static int ptlrpc_pinger_main(void *arg)
         RECALC_SIGPENDING;
         SIGNAL_MASK_UNLOCK(current, flags);
 
         RECALC_SIGPENDING;
         SIGNAL_MASK_UNLOCK(current, flags);
 
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
-        sprintf(current->comm, "%s|%d", data->name,current->thread.extern_pid);
-#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        sprintf(current->comm, "%s|%d", data->name,
-                current->thread.mode.tt.extern_pid);
-#else
-        strcpy(current->comm, data->name);
-#endif
+        THREAD_NAME(current->comm, "%s", data->name);
         unlock_kernel();
 
         /* Record that the thread is running */
         unlock_kernel();
 
         /* Record that the thread is running */
@@ -147,7 +140,8 @@ static int ptlrpc_pinger_main(void *arg)
                 down(&pinger_sem);
                 list_for_each(iter, &pinger_imports) {
                         struct obd_import *imp =
                 down(&pinger_sem);
                 list_for_each(iter, &pinger_imports) {
                         struct obd_import *imp =
-                                list_entry(iter, struct obd_import, imp_pinger_chain);
+                                list_entry(iter, struct obd_import,
+                                           imp_pinger_chain);
                         int generation, level;
                         unsigned long flags;
 
                         int generation, level;
                         unsigned long flags;
 
@@ -159,16 +153,19 @@ static int ptlrpc_pinger_main(void *arg)
                                 spin_unlock_irqrestore(&imp->imp_lock, flags);
 
                                 if (level != LUSTRE_CONN_FULL) {
                                 spin_unlock_irqrestore(&imp->imp_lock, flags);
 
                                 if (level != LUSTRE_CONN_FULL) {
-                                        CDEBUG(D_HA, "not pinging %s (in recovery)\n",
+                                        CDEBUG(D_HA,
+                                               "not pinging %s (in recovery)\n",
                                                imp->imp_target_uuid.uuid);
                                         continue;
                                 }
 
                                                imp->imp_target_uuid.uuid);
                                         continue;
                                 }
 
-                                req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL);
+                                req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL,
+                                                      NULL);
                                 if (!req) {
                                         CERROR("OOM trying to ping\n");
                                         break;
                                 }
                                 if (!req) {
                                         CERROR("OOM trying to ping\n");
                                         break;
                                 }
+                                req->rq_no_resend = 1;
                                 req->rq_replen = lustre_msg_size(0, NULL);
                                 req->rq_level = LUSTRE_CONN_FULL;
                                 req->rq_phase = RQ_PHASE_RPC;
                                 req->rq_replen = lustre_msg_size(0, NULL);
                                 req->rq_level = LUSTRE_CONN_FULL;
                                 req->rq_phase = RQ_PHASE_RPC;
index cb96c3c..8d66c88 100644 (file)
@@ -33,19 +33,22 @@ struct ptlrpc_request_set;
 /* ldlm hooks that we need, managed via inter_module_{get,put} */
 extern int (*ptlrpc_ldlm_namespace_cleanup)(struct ldlm_namespace *, int);
 extern int (*ptlrpc_ldlm_cli_cancel_unused)(struct ldlm_namespace *,
 /* ldlm hooks that we need, managed via inter_module_{get,put} */
 extern int (*ptlrpc_ldlm_namespace_cleanup)(struct ldlm_namespace *, int);
 extern int (*ptlrpc_ldlm_cli_cancel_unused)(struct ldlm_namespace *,
-                                     struct ldlm_res_id *, int);
+                                            struct ldlm_res_id *, int);
 extern int (*ptlrpc_ldlm_replay_locks)(struct obd_import *);
 
 int ptlrpc_get_ldlm_hooks(void);
 void ptlrpc_daemonize(void);
 
 void ptlrpc_request_handle_eviction(struct ptlrpc_request *);
 extern int (*ptlrpc_ldlm_replay_locks)(struct obd_import *);
 
 int ptlrpc_get_ldlm_hooks(void);
 void ptlrpc_daemonize(void);
 
 void ptlrpc_request_handle_eviction(struct ptlrpc_request *);
-void lustre_assert_wire_constants (void);
+void lustre_assert_wire_constants(void);
 
 void ptlrpc_lprocfs_register_service(struct obd_device *obddev,
                                      struct ptlrpc_service *svc);
 void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc);
 
 
 void ptlrpc_lprocfs_register_service(struct obd_device *obddev,
                                      struct ptlrpc_service *svc);
 void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc);
 
+/* recovd_thread.c */
+int llog_init_commit_master(void);
+int llog_cleanup_commit_master(int force);
 
 static inline int opcode_offset(__u32 opc) {
         if (opc < OST_LAST_OPC) {
 
 static inline int opcode_offset(__u32 opc) {
         if (opc < OST_LAST_OPC) {
@@ -66,9 +69,9 @@ static inline int opcode_offset(__u32 opc) {
                         (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
                         (MDS_LAST_OPC - MDS_FIRST_OPC) +
                         (OST_LAST_OPC - OST_FIRST_OPC));
                         (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
                         (MDS_LAST_OPC - MDS_FIRST_OPC) +
                         (OST_LAST_OPC - OST_FIRST_OPC));
-        } else if (opc == OBD_PING) {
+        } else if (opc < OBD_LAST_OPC) {
                 /* OBD Ping */
                 /* OBD Ping */
-                return (opc - OBD_PING +
+                return (opc - OBD_FIRST_OPC +
                         (PTLBD_LAST_OPC - PTLBD_FIRST_OPC) +
                         (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
                         (MDS_LAST_OPC - MDS_FIRST_OPC) +
                         (PTLBD_LAST_OPC - PTLBD_FIRST_OPC) +
                         (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
                         (MDS_LAST_OPC - MDS_FIRST_OPC) +
@@ -79,10 +82,11 @@ static inline int opcode_offset(__u32 opc) {
         }
 }
 
         }
 }
 
-#define LUSTRE_MAX_OPCODES (1 + (PTLBD_LAST_OPC - PTLBD_FIRST_OPC) \
-                              + (LDLM_LAST_OPC - LDLM_FIRST_OPC)   \
-                              + (MDS_LAST_OPC - MDS_FIRST_OPC)     \
-                              + (OST_LAST_OPC - OST_FIRST_OPC))
+#define LUSTRE_MAX_OPCODES ((PTLBD_LAST_OPC - PTLBD_FIRST_OPC) + \
+                            (LDLM_LAST_OPC - LDLM_FIRST_OPC)   + \
+                            (MDS_LAST_OPC - MDS_FIRST_OPC)     + \
+                            (OST_LAST_OPC - OST_FIRST_OPC)     + \
+                            (OBD_LAST_OPC - OBD_FIRST_OPC))
 
 enum {
         PTLRPC_REQWAIT_CNTR     = 0,
 
 enum {
         PTLRPC_REQWAIT_CNTR     = 0,
index ccc05dc..3dfec9a 100644 (file)
 
 #ifdef __KERNEL__
 # include <linux/module.h>
 
 #ifdef __KERNEL__
 # include <linux/module.h>
-#else 
+#else
 # include <liblustre.h>
 #endif
 #include <linux/obd.h>
 #include <linux/obd_ost.h>
 # include <liblustre.h>
 #endif
 #include <linux/obd.h>
 #include <linux/obd_ost.h>
+#include <linux/lustre_mgmt.h>
 #include <linux/lustre_net.h>
 #include <linux/lustre_dlm.h>
 
 #include <linux/lustre_net.h>
 #include <linux/lustre_dlm.h>
 
@@ -40,19 +41,27 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
         struct obd_import *imp;
         struct obd_uuid server_uuid;
         int rq_portal, rp_portal, connect_op;
         struct obd_import *imp;
         struct obd_uuid server_uuid;
         int rq_portal, rp_portal, connect_op;
-        char *name;
+        char *name = obddev->obd_type->typ_name;
         ENTRY;
 
         ENTRY;
 
-        if (obddev->obd_type->typ_ops->o_brw) {
+        /* In a more perfect world, we would hang a ptlrpc_client off of
+         * obd_type and just use the values from there. */
+        if (!strcmp(name, LUSTRE_OSC_NAME)) {
                 rq_portal = OST_REQUEST_PORTAL;
                 rp_portal = OSC_REPLY_PORTAL;
                 rq_portal = OST_REQUEST_PORTAL;
                 rp_portal = OSC_REPLY_PORTAL;
-                name = "osc";
                 connect_op = OST_CONNECT;
                 connect_op = OST_CONNECT;
-        } else {
+        } else if (!strcmp(name, LUSTRE_MDC_NAME)) {
                 rq_portal = MDS_REQUEST_PORTAL;
                 rp_portal = MDC_REPLY_PORTAL;
                 rq_portal = MDS_REQUEST_PORTAL;
                 rp_portal = MDC_REPLY_PORTAL;
-                name = "mdc";
                 connect_op = MDS_CONNECT;
                 connect_op = MDS_CONNECT;
+        } else if (!strcmp(name, LUSTRE_MGMTCLI_NAME)) {
+                rq_portal = MGMT_REQUEST_PORTAL;
+                rp_portal = MGMT_REPLY_PORTAL;
+                connect_op = MGMT_CONNECT;
+        } else {
+                CERROR("unknown client OBD type \"%s\", can't setup\n",
+                       name);
+                RETURN(-EINVAL);
         }
 
         if (data->ioc_inllen1 < 1) {
         }
 
         if (data->ioc_inllen1 < 1) {
@@ -108,18 +117,60 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
 
         cli->cl_import = imp;
         cli->cl_max_mds_easize = sizeof(struct lov_mds_md);
 
         cli->cl_import = imp;
         cli->cl_max_mds_easize = sizeof(struct lov_mds_md);
+        cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie);
         cli->cl_sandev = to_kdev_t(0);
 
         cli->cl_sandev = to_kdev_t(0);
 
+        /* Register with management client if we need to. */
+        if (data->ioc_inllen3 > 0) {
+                char *mgmt_name = data->ioc_inlbuf3;
+                int rc;
+                struct obd_device *mgmt_obd;
+                mgmtcli_register_for_events_t register_f;
+
+                CDEBUG(D_HA, "%s registering with %s for events about %s\n",
+                       obddev->obd_name, mgmt_name, server_uuid.uuid);
+
+                mgmt_obd = class_name2obd(mgmt_name);
+                if (!mgmt_obd) {
+                        CERROR("can't find mgmtcli %s to register\n",
+                               mgmt_name);
+                        class_destroy_import(imp);
+                        RETURN(-ENOENT);
+                }
+                
+                register_f = inter_module_get("mgmtcli_register_for_events");
+                if (!register_f) {
+                        CERROR("can't i_m_g mgmtcli_register_for_events\n");
+                        class_destroy_import(imp);
+                        RETURN(-ENOSYS);
+                }
+                
+                rc = register_f(mgmt_obd, obddev, &imp->imp_target_uuid);
+                inter_module_put("mgmtcli_register_for_events");
+
+                if (!rc)
+                        cli->cl_mgmtcli_obd = mgmt_obd;
+
+                RETURN(rc);
+        }
+
         RETURN(0);
 }
 
         RETURN(0);
 }
 
-int client_obd_cleanup(struct obd_device *obddev, int force, int failover)
+int client_obd_cleanup(struct obd_device *obddev, int flags)
 {
 {
-        struct client_obd *client = &obddev->u.cli;
+        struct client_obd *cli = &obddev->u.cli;
 
 
-        if (!client->cl_import)
+        if (!cli->cl_import)
                 RETURN(-EINVAL);
                 RETURN(-EINVAL);
-        class_destroy_import(client->cl_import);
-        client->cl_import = NULL;
+        if (cli->cl_mgmtcli_obd) {
+                mgmtcli_deregister_for_events_t dereg_f;
+                
+                dereg_f = inter_module_get("mgmtcli_deregister_for_events");
+                dereg_f(cli->cl_mgmtcli_obd, obddev);
+                inter_module_put("mgmtcli_deregister_for_events");
+        }
+        class_destroy_import(cli->cl_import);
+        cli->cl_import = NULL;
         RETURN(0);
 }
         RETURN(0);
 }
index 57f3653..4b75026 100644 (file)
@@ -100,13 +100,14 @@ __init int ptlrpc_init(void)
         int rc;
         ENTRY;
 
         int rc;
         ENTRY;
 
-        lustre_assert_wire_constants ();
-        
+        lustre_assert_wire_constants();
+
         rc = ptlrpc_init_portals();
         if (rc)
                 RETURN(rc);
 
         ptlrpc_init_connection();
         rc = ptlrpc_init_portals();
         if (rc)
                 RETURN(rc);
 
         ptlrpc_init_connection();
+        llog_init_commit_master();
 
         ptlrpc_put_connection_superhack = ptlrpc_put_connection;
         ptlrpc_abort_inflight_superhack = ptlrpc_abort_inflight;
 
         ptlrpc_put_connection_superhack = ptlrpc_put_connection;
         ptlrpc_abort_inflight_superhack = ptlrpc_abort_inflight;
@@ -117,6 +118,9 @@ static void __exit ptlrpc_exit(void)
 {
         ptlrpc_exit_portals();
         ptlrpc_cleanup_connection();
 {
         ptlrpc_exit_portals();
         ptlrpc_cleanup_connection();
+#ifdef ENABLE_ORPHANS
+        llog_cleanup_commit_master(0);
+#endif
 }
 
 /* connection.c */
 }
 
 /* connection.c */
index ca2afad..70e9b5c 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_RPC
 #ifdef __KERNEL__
 
 #define DEBUG_SUBSYSTEM S_RPC
 #ifdef __KERNEL__
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
+# include <linux/config.h>
+# include <linux/module.h>
+# include <linux/kmod.h>
 #else
 #else
-#include <liblustre.h>
+# include <liblustre.h>
 #endif
 
 #include <linux/obd_support.h>
 #endif
 
 #include <linux/obd_support.h>
@@ -62,7 +62,7 @@ int ptlrpc_reconnect_import(struct obd_import *imp)
         struct lustre_handle old_hdl;
         __u64 committed_before_reconnect = imp->imp_peer_committed_transno;
 
         struct lustre_handle old_hdl;
         __u64 committed_before_reconnect = imp->imp_peer_committed_transno;
 
-        CERROR("reconnect handle "LPX64"\n", 
+        CERROR("reconnect handle "LPX64"\n",
                imp->imp_dlm_handle.cookie);
 
         req = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp);
                imp->imp_dlm_handle.cookie);
 
         req = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp);
@@ -89,7 +89,7 @@ int ptlrpc_reconnect_import(struct obd_import *imp)
                         GOTO(out_disc, rc = -ENOTCONN);
                 }
 
                         GOTO(out_disc, rc = -ENOTCONN);
                 }
 
-                if (memcmp(&imp->imp_remote_handle, &req->rq_repmsg->handle, 
+                if (memcmp(&imp->imp_remote_handle, &req->rq_repmsg->handle,
                            sizeof(imp->imp_remote_handle))) {
                         CERROR("%s@%s changed handle from "LPX64" to "LPX64
                                "; copying, but this may foreshadow disaster\n",
                            sizeof(imp->imp_remote_handle))) {
                         CERROR("%s@%s changed handle from "LPX64" to "LPX64
                                "; copying, but this may foreshadow disaster\n",
@@ -104,12 +104,13 @@ int ptlrpc_reconnect_import(struct obd_import *imp)
                 CERROR("reconnected to %s@%s after partition\n",
                        imp->imp_target_uuid.uuid, conn->c_remote_uuid.uuid);
                 GOTO(out_disc, rc = RECON_RESULT_RECONNECTED);
                 CERROR("reconnected to %s@%s after partition\n",
                        imp->imp_target_uuid.uuid, conn->c_remote_uuid.uuid);
                 GOTO(out_disc, rc = RECON_RESULT_RECONNECTED);
-        } else if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECOVERING) {
+        } else if (lustre_msg_get_op_flags(req->rq_repmsg) &
+                   MSG_CONNECT_RECOVERING) {
                 rc = RECON_RESULT_RECOVERING;
         } else {
                 rc = RECON_RESULT_EVICTED;
         }
                 rc = RECON_RESULT_RECOVERING;
         } else {
                 rc = RECON_RESULT_EVICTED;
         }
-        
+
         old_hdl = imp->imp_remote_handle;
         imp->imp_remote_handle = req->rq_repmsg->handle;
         CERROR("reconnected to %s@%s ("LPX64", was "LPX64")!\n",
         old_hdl = imp->imp_remote_handle;
         imp->imp_remote_handle = req->rq_repmsg->handle;
         CERROR("reconnected to %s@%s ("LPX64", was "LPX64")!\n",
@@ -150,9 +151,9 @@ void ptlrpc_run_recovery_over_upcall(struct obd_device *obd)
         rc = USERMODEHELPER(argv[0], argv, envp);
         if (rc < 0) {
                 CERROR("Error invoking recovery upcall %s %s %s: %d; check "
         rc = USERMODEHELPER(argv[0], argv, envp);
         if (rc < 0) {
                 CERROR("Error invoking recovery upcall %s %s %s: %d; check "
-                       "/proc/sys/lustre/upcall\n",                
+                       "/proc/sys/lustre/upcall\n",
                        argv[0], argv[1], argv[2], rc);
                        argv[0], argv[1], argv[2], rc);
-                
+
         } else {
                 CERROR("Invoked upcall %s %s %s",
                        argv[0], argv[1], argv[2]);
         } else {
                 CERROR("Invoked upcall %s %s %s",
                        argv[0], argv[1], argv[2]);
@@ -180,10 +181,10 @@ void ptlrpc_run_failed_import_upcall(struct obd_import* imp)
 
         rc = USERMODEHELPER(argv[0], argv, envp);
         if (rc < 0) {
 
         rc = USERMODEHELPER(argv[0], argv, envp);
         if (rc < 0) {
-                CERROR("Error invoking recovery upcall %s %s %s %s %s: %d; check "
-                       "/proc/sys/lustre/lustre_upcall\n",                
+                CERROR("Error invoking recovery upcall %s %s %s %s %s: %d; "
+                       "check /proc/sys/lustre/lustre_upcall\n",
                        argv[0], argv[1], argv[2], argv[3], argv[4],rc);
                        argv[0], argv[1], argv[2], argv[3], argv[4],rc);
-                
+
         } else {
                 CERROR("Invoked upcall %s %s %s %s %s\n",
                        argv[0], argv[1], argv[2], argv[3], argv[4]);
         } else {
                 CERROR("Invoked upcall %s %s %s %s %s\n",
                        argv[0], argv[1], argv[2], argv[3], argv[4]);
@@ -196,7 +197,6 @@ int ptlrpc_replay(struct obd_import *imp)
         struct list_head *tmp, *pos;
         struct ptlrpc_request *req;
         unsigned long flags;
         struct list_head *tmp, *pos;
         struct ptlrpc_request *req;
         unsigned long flags;
-        __u64 committed = imp->imp_peer_committed_transno;
         ENTRY;
 
         /* It might have committed some after we last spoke, so make sure we
         ENTRY;
 
         /* It might have committed some after we last spoke, so make sure we
@@ -207,7 +207,7 @@ int ptlrpc_replay(struct obd_import *imp)
         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
         CDEBUG(D_HA, "import %p from %s has committed "LPD64"\n",
         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
         CDEBUG(D_HA, "import %p from %s has committed "LPD64"\n",
-               imp, imp->imp_target_uuid.uuid, committed);
+               imp, imp->imp_target_uuid.uuid, imp->imp_peer_committed_transno);
 
         list_for_each(tmp, &imp->imp_replay_list) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
 
         list_for_each(tmp, &imp->imp_replay_list) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
@@ -221,7 +221,7 @@ int ptlrpc_replay(struct obd_import *imp)
          * than the one we're replaying (it can't be committed until it's
          * replayed, and we're doing that here).  l_f_e_safe protects against
          * problems with the current request being committed, in the unlikely
          * than the one we're replaying (it can't be committed until it's
          * replayed, and we're doing that here).  l_f_e_safe protects against
          * problems with the current request being committed, in the unlikely
-         * event of that race.  So, in conclusion, I think that it's safe to 
+         * event of that race.  So, in conclusion, I think that it's safe to
          * perform this list-walk without the imp_lock held.
          *
          * But, the {mdc,osc}_replay_open callbacks both iterate
          * perform this list-walk without the imp_lock held.
          *
          * But, the {mdc,osc}_replay_open callbacks both iterate
@@ -235,7 +235,7 @@ int ptlrpc_replay(struct obd_import *imp)
                 DEBUG_REQ(D_HA, req, "REPLAY:");
 
                 rc = ptlrpc_replay_req(req);
                 DEBUG_REQ(D_HA, req, "REPLAY:");
 
                 rc = ptlrpc_replay_req(req);
-        
+
                 if (rc) {
                         CERROR("recovery replay error %d for req "LPD64"\n",
                                rc, req->rq_xid);
                 if (rc) {
                         CERROR("recovery replay error %d for req "LPD64"\n",
                                rc, req->rq_xid);
@@ -307,7 +307,6 @@ inline void ptlrpc_invalidate_import_state(struct obd_import *imp)
         ptlrpc_abort_inflight(imp);
 }
 
         ptlrpc_abort_inflight(imp);
 }
 
-
 void ptlrpc_handle_failed_import(struct obd_import *imp)
 {
         ENTRY;
 void ptlrpc_handle_failed_import(struct obd_import *imp)
 {
         ENTRY;
@@ -329,7 +328,6 @@ void ptlrpc_request_handle_eviction(struct ptlrpc_request *failed_req)
         int rc;
         struct obd_import *imp= failed_req->rq_import;
         unsigned long flags;
         int rc;
         struct obd_import *imp= failed_req->rq_import;
         unsigned long flags;
-        struct ptlrpc_request *req;
         ENTRY;
 
         CDEBUG(D_HA, "import %s of %s@%s evicted: reconnecting\n",
         ENTRY;
 
         CDEBUG(D_HA, "import %s of %s@%s evicted: reconnecting\n",
@@ -347,7 +345,6 @@ void ptlrpc_request_handle_eviction(struct ptlrpc_request *failed_req)
                 failed_req->rq_err = 1;
                 spin_unlock_irqrestore (&failed_req->rq_lock, flags);
         }
                 failed_req->rq_err = 1;
                 spin_unlock_irqrestore (&failed_req->rq_lock, flags);
         }
-        ptlrpc_req_finished(req);
         EXIT;
 }
 
         EXIT;
 }
 
@@ -361,17 +358,23 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
 
         notify_obd = imp->imp_obd->u.cli.cl_containing_lov;
 
 
         notify_obd = imp->imp_obd->u.cli.cl_containing_lov;
 
-        /* When deactivating, mark import invalid, and 
-           abort in-flight requests. */
+        /* When deactivating, mark import invalid, and abort in-flight
+         * requests. */
         if (!active) {
         if (!active) {
-                CDEBUG(D_ERROR, "setting import %s INVALID\n", imp->imp_target_uuid.uuid);
                 spin_lock_irqsave(&imp->imp_lock, flags);
                 spin_lock_irqsave(&imp->imp_lock, flags);
-                imp->imp_invalid = 1;
+                /* This is a bit of a hack, but invalidating replayable
+                 * imports makes a temporary reconnect failure into a much more
+                 * ugly -- and hard to remedy -- situation. */
+                if (!imp->imp_replayable) {
+                        CDEBUG(D_HA, "setting import %s INVALID\n",
+                               imp->imp_target_uuid.uuid);
+                        imp->imp_invalid = 1;
+                }
                 imp->imp_generation++;
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
                 ptlrpc_invalidate_import_state(imp);
                 imp->imp_generation++;
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
                 ptlrpc_invalidate_import_state(imp);
-//                ptlrpc_abort_inflight(imp);
-        } 
+                //ptlrpc_abort_inflight(imp);
+        }
 
         if (notify_obd == NULL)
                 GOTO(out, rc = 0);
 
         if (notify_obd == NULL)
                 GOTO(out, rc = 0);
@@ -403,8 +406,9 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
 
 out:
         /* When activating, mark import valid */
 
 out:
         /* When activating, mark import valid */
-        if (active) {
-                CDEBUG(D_ERROR, "setting import %s VALID\n", imp->imp_target_uuid.uuid);
+        if (active && !rc) {
+                CDEBUG(D_HA, "setting import %s VALID\n",
+                       imp->imp_target_uuid.uuid);
                 spin_lock_irqsave(&imp->imp_lock, flags);
                 imp->imp_invalid = 0;
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
                 spin_lock_irqsave(&imp->imp_lock, flags);
                 imp->imp_invalid = 0;
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
@@ -420,7 +424,7 @@ void ptlrpc_fail_import(struct obd_import *imp, int generation)
         ENTRY;
 
         LASSERT (!imp->imp_dlm_fake);
         ENTRY;
 
         LASSERT (!imp->imp_dlm_fake);
-        
+
         spin_lock_irqsave(&imp->imp_lock, flags);
         if (imp->imp_level != LUSTRE_CONN_FULL)
                 in_recovery = 1;
         spin_lock_irqsave(&imp->imp_lock, flags);
         if (imp->imp_level != LUSTRE_CONN_FULL)
                 in_recovery = 1;
@@ -466,14 +470,14 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
         ENTRY;
 
         spin_lock_irqsave(&imp->imp_lock, flags);
         ENTRY;
 
         spin_lock_irqsave(&imp->imp_lock, flags);
-        if (imp->imp_level == LUSTRE_CONN_FULL || 
+        if (imp->imp_level == LUSTRE_CONN_FULL ||
             imp->imp_level == LUSTRE_CONN_NOTCONN)
                     imp->imp_level = LUSTRE_CONN_RECOVER;
         else
                 in_recover = 1;
         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
             imp->imp_level == LUSTRE_CONN_NOTCONN)
                     imp->imp_level = LUSTRE_CONN_RECOVER;
         else
                 in_recover = 1;
         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
-        if (in_recover == 1) 
+        if (in_recover == 1)
                 RETURN(-EALREADY);
 
         if (new_uuid) {
                 RETURN(-EALREADY);
 
         if (new_uuid) {
index f2a1089..22ccb09 100644 (file)
@@ -289,18 +289,24 @@ void ptlrpc_daemonize(void)
         reparent_to_init();
 }
 
         reparent_to_init();
 }
 
+static long timeval_sub(struct timeval *large, struct timeval *small)
+{
+        return (large->tv_sec - small->tv_sec) * 1000000 +
+                (large->tv_usec - small->tv_usec);
+}
+
 static int ptlrpc_main(void *arg)
 {
 static int ptlrpc_main(void *arg)
 {
-        struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
+        struct ptlrpc_svc_data *data = arg;
         struct obd_device *obddev = data->dev;
         struct ptlrpc_service *svc = data->svc;
         struct ptlrpc_thread *thread = data->thread;
         struct ptlrpc_request *request;
         ptl_event_t *event;
         struct obd_device *obddev = data->dev;
         struct ptlrpc_service *svc = data->svc;
         struct ptlrpc_thread *thread = data->thread;
         struct ptlrpc_request *request;
         ptl_event_t *event;
-        int rc = 0;
         unsigned long flags;
         unsigned long flags;
-        cycles_t workdone_time = -1;
-        cycles_t svc_workcycles = -1;
+        struct timeval start_time, finish_time;
+        long total;
+        int rc = 0;
         ENTRY;
 
         lock_kernel();
         ENTRY;
 
         lock_kernel();
@@ -311,21 +317,14 @@ static int ptlrpc_main(void *arg)
         RECALC_SIGPENDING;
         SIGNAL_MASK_UNLOCK(current, flags);
 
         RECALC_SIGPENDING;
         SIGNAL_MASK_UNLOCK(current, flags);
 
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
-        sprintf(current->comm, "%s|%d", data->name,current->thread.extern_pid);
-#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        sprintf(current->comm, "%s|%d", data->name,
-                current->thread.mode.tt.extern_pid);
-#else
-        strcpy(current->comm, data->name);
-#endif
+        THREAD_NAME(current->comm, "%s", data->name);
         unlock_kernel();
 
         OBD_ALLOC(event, sizeof(*event));
         unlock_kernel();
 
         OBD_ALLOC(event, sizeof(*event));
-        if (!event)
+        if (event == NULL)
                 GOTO(out, rc = -ENOMEM);
         OBD_ALLOC(request, sizeof(*request));
                 GOTO(out, rc = -ENOMEM);
         OBD_ALLOC(request, sizeof(*request));
-        if (!request)
+        if (request == NULL)
                 GOTO(out_event, rc = -ENOMEM);
 
         /* Record that the thread is running */
                 GOTO(out_event, rc = -ENOMEM);
 
         /* Record that the thread is running */
@@ -334,14 +333,15 @@ static int ptlrpc_main(void *arg)
 
         /* XXX maintain a list of all managed devices: insert here */
 
 
         /* XXX maintain a list of all managed devices: insert here */
 
+        do_gettimeofday(&finish_time);
         /* And now, loop forever on requests */
         while (1) {
                 struct l_wait_info lwi = { 0 };
                 l_wait_event(svc->srv_waitq,
                              ptlrpc_check_event(svc, thread, event), &lwi);
 
         /* And now, loop forever on requests */
         while (1) {
                 struct l_wait_info lwi = { 0 };
                 l_wait_event(svc->srv_waitq,
                              ptlrpc_check_event(svc, thread, event), &lwi);
 
+                spin_lock(&svc->srv_lock);
                 if (thread->t_flags & SVC_STOPPING) {
                 if (thread->t_flags & SVC_STOPPING) {
-                        spin_lock(&svc->srv_lock);
                         thread->t_flags &= ~SVC_STOPPING;
                         spin_unlock(&svc->srv_lock);
 
                         thread->t_flags &= ~SVC_STOPPING;
                         spin_unlock(&svc->srv_lock);
 
@@ -349,65 +349,64 @@ static int ptlrpc_main(void *arg)
                         break;
                 }
 
                         break;
                 }
 
-                if (thread->t_flags & SVC_EVENT) {
-                        cycles_t  workstart_time;
-
-                        spin_lock(&svc->srv_lock);
-                        thread->t_flags &= ~SVC_EVENT;
-                        /* Update Service Statistics */
-                        workstart_time = get_cycles();
-                        if (workdone_time != -1 && svc->svc_stats != NULL) {
-                                /* Stats for req(n) are updated just before
-                                 * req(n+1) is executed. This avoids need to
-                                 * reacquire svc->srv_lock after
-                                 * call to handling_request().
-                                 */
-                                int opc;
-
-                                /* req_waittime */
-                                lprocfs_counter_add(svc->svc_stats,
-                                                    PTLRPC_REQWAIT_CNTR,
-                                                    (workstart_time -
-                                                     event->arrival_time));
-                                /* svc_eqdepth */
-                                /* Wait for b_eq branch
-                                lprocfs_counter_add(svc->svc_stats,
-                                                    PTLRPC_SVCEQDEPTH_CNTR,
-                                                    0);
-                                */
-                                /* svc_idletime */
-                                lprocfs_counter_add(svc->svc_stats,
-                                                    PTLRPC_SVCIDLETIME_CNTR,
-                                                    (workstart_time -
-                                                     workdone_time));
-                                /* previous request */
-                                opc = opcode_offset(request->rq_reqmsg->opc);
-                                if (opc > 0) {
-                                        LASSERT(opc < LUSTRE_MAX_OPCODES);
-                                        lprocfs_counter_add(svc->svc_stats, opc,
-                                                            PTLRPC_LAST_CNTR +
-                                                            svc_workcycles);
-                                }
-                        }
+                if (!(thread->t_flags & SVC_EVENT)) {
+                        CERROR("unknown flag in service");
                         spin_unlock(&svc->srv_lock);
                         spin_unlock(&svc->srv_lock);
+                        LBUG();
+                        EXIT;
+                        break;
+                }
+
+                thread->t_flags &= ~SVC_EVENT;
+                spin_unlock(&svc->srv_lock);
+
+                do_gettimeofday(&start_time);
+                total = timeval_sub(&start_time, &event->arrival_time);
+                if (svc->svc_stats != NULL) {
+                        lprocfs_counter_add(svc->svc_stats, PTLRPC_REQWAIT_CNTR,
+                                            total);
+                        lprocfs_counter_add(svc->svc_stats,
+                                            PTLRPC_SVCIDLETIME_CNTR,
+                                            timeval_sub(&start_time,
+                                                        &finish_time));
+#if 0 /* Wait for b_eq branch */
+                        lprocfs_counter_add(svc->svc_stats,
+                                            PTLRPC_SVCEQDEPTH_CNTR, 0);
+#endif
+                }
 
 
+                if (total / 1000000 > (long)obd_timeout) {
+                        CERROR("Dropping request from NID "LPX64" because it's "
+                               "%ld seconds old.\n", event->initiator.nid,
+                               total / 1000000); /* bug 1502 */
+                } else {
+                        CDEBUG(D_HA, "request from NID "LPX64" noticed after "
+                               "%ldus\n", event->initiator.nid, total);
                         rc = handle_incoming_request(obddev, svc, event,
                                                      request);
                         rc = handle_incoming_request(obddev, svc, event,
                                                      request);
-                        workdone_time = get_cycles();
-                        svc_workcycles = workdone_time - workstart_time;
-                        continue;
                 }
                 }
-
-                CERROR("unknown break in service");
-                LBUG();
-                EXIT;
-                break;
+                do_gettimeofday(&finish_time);
+                total = timeval_sub(&finish_time, &start_time);
+
+                CDEBUG((total / 1000000 > (long)obd_timeout) ? D_ERROR : D_HA,
+                       "request "LPU64" from NID "LPX64" processed in %ldus "
+                       "(%ldus total)\n", request->rq_xid, event->initiator.nid,
+                       total, timeval_sub(&finish_time, &event->arrival_time));
+
+                if (svc->svc_stats != NULL) {
+                        int opc = opcode_offset(request->rq_reqmsg->opc);
+                        if (opc > 0) {
+                                LASSERT(opc < LUSTRE_MAX_OPCODES);
+                                lprocfs_counter_add(svc->svc_stats,
+                                                    opc + PTLRPC_LAST_CNTR,
+                                                    total);
+                        }
+                }
         }
 
         /* NB should wait for all SENT callbacks to complete before exiting
          * here.  Unfortunately at this time there is no way to track this
         }
 
         /* NB should wait for all SENT callbacks to complete before exiting
          * here.  Unfortunately at this time there is no way to track this
-         * state.
-         */
+         * state. */
         OBD_FREE(request, sizeof(*request));
 out_event:
         OBD_FREE(event, sizeof(*event));
         OBD_FREE(request, sizeof(*request));
 out_event:
         OBD_FREE(event, sizeof(*event));
index a24a26a..1b2ba01 100644 (file)
@@ -1,17 +1,17 @@
 # lustre.spec
 %define version b_devel
 # lustre.spec
 %define version b_devel
-%define kversion @RELEASE@
+%define kversion @LINUXRELEASE@
 %define linuxdir @LINUX@
 %define linuxdir @LINUX@
-Release: 0306170928kernel
 
 Summary: Lustre Lite File System
 Name: lustre-lite
 Version: %{version}
 
 Summary: Lustre Lite File System
 Name: lustre-lite
 Version: %{version}
+Release: @RELEASE@
 Copyright: GPL
 Group: Utilities/System
 Requires: lustre-modules, PyXML
 Copyright: GPL
 Group: Utilities/System
 Requires: lustre-modules, PyXML
-BuildRoot: /var/tmp/lustre-%{version}-root
 Source: ftp://ftp.lustre.com/pub/lustre/lustre-%{version}.tar.gz
 Source: ftp://ftp.lustre.com/pub/lustre/lustre-%{version}.tar.gz
+BuildRoot: /var/tmp/lustre-%{version}-root
 
 %description
 The Lustre Lite Cluster File System: kernel drivers for file system,
 
 %description
 The Lustre Lite Cluster File System: kernel drivers for file system,
@@ -69,21 +69,10 @@ cd $RPM_BUILD_DIR/lustre-%{version}
 ./configure --with-linux='%{linuxdir}' 
 make
 
 ./configure --with-linux='%{linuxdir}' 
 make
 
-#%ifarch i386
-#cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version}
-#./configure --with-lib 
-#make
-#%endif
-
 %install
 cd $RPM_BUILD_DIR/lustre-%{version}
 make install prefix=$RPM_BUILD_ROOT
 
 %install
 cd $RPM_BUILD_DIR/lustre-%{version}
 make install prefix=$RPM_BUILD_ROOT
 
-#%ifarch i386
-#cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version}
-#make install prefix=$RPM_BUILD_ROOT
-#%endif
-
 %ifarch alpha
 # this hurts me
   conf_flag=
 %ifarch alpha
 # this hurts me
   conf_flag=
@@ -226,20 +215,20 @@ if [ ! -e /dev/portals ]; then
 fi
 depmod -ae || exit 0
 
 fi
 depmod -ae || exit 0
 
-grep -q obdclass /etc/modules.conf || \
-       echo 'alias char-major-10-241 obdclass' >> /etc/modules.conf
+#grep -q obdclass /etc/modules.conf || \
+#      echo 'alias char-major-10-241 obdclass' >> /etc/modules.conf
 
 
-grep -q '/dev/obd' /etc/modules.conf || \
-       echo 'alias /dev/obd obdclass' >> /etc/modules.conf
+#grep -q '/dev/obd' /etc/modules.conf || \
+#      echo 'alias /dev/obd obdclass' >> /etc/modules.conf
 
 
-grep -q '/dev/lustre' /etc/modules.conf || \
-       echo 'alias /dev/lustre obdclass' >> /etc/modules.conf
+#grep -q '/dev/lustre' /etc/modules.conf || \
+#      echo 'alias /dev/lustre obdclass' >> /etc/modules.conf
 
 
-grep -q portals /etc/modules.conf || \
-        echo 'alias char-major-10-240 portals' >> /etc/modules.conf
+#grep -q portals /etc/modules.conf || \
+#        echo 'alias char-major-10-240 portals' >> /etc/modules.conf
 
 
-grep -q '/dev/portals' /etc/modules.conf || \
-        echo 'alias /dev/portals portals' >> /etc/modules.conf
+#grep -q '/dev/portals' /etc/modules.conf || \
+#        echo 'alias /dev/portals portals' >> /etc/modules.conf
 
 %postun
 depmod -ae || exit 0
 
 %postun
 depmod -ae || exit 0
@@ -257,6 +246,7 @@ if grep -q slapd-lustre $slapd; then
    cp $tmp $slapd
    rm $tmp
 fi
    cp $tmp $slapd
    rm $tmp
 fi
+
 %clean
 #rm -rf $RPM_BUILD_ROOT
 
 %clean
 #rm -rf $RPM_BUILD_ROOT
 
index 2e5c1fe..21575d0 100644 (file)
@@ -41,5 +41,9 @@ runas
 openfile
 unlinkmany
 fchdir_test
 openfile
 unlinkmany
 fchdir_test
+*.cmd
 getdents
 o_directory
 getdents
 o_directory
+mkdirdeep
+utime
+small_write
index 064de98..6600962 100644 (file)
@@ -6,18 +6,21 @@ CFLAGS := -g -Wall
 # LDADD := -lreadline -ltermcap # -lefence
 EXTRA_DIST = $(pkgexample_SCRIPTS) $(noinst_SCRIPTS) $(noinst_DATA) \
        sanity.sh          rundbench    mcreate
 # LDADD := -lreadline -ltermcap # -lefence
 EXTRA_DIST = $(pkgexample_SCRIPTS) $(noinst_SCRIPTS) $(noinst_DATA) \
        sanity.sh          rundbench    mcreate
-pkgexample_SCRIPTS = llmount.sh llmountcleanup.sh llecho.sh llechocleanup.sh local.sh echo.sh uml.sh lov.sh
+pkgexample_SCRIPTS = llmount.sh llmountcleanup.sh llecho.sh llechocleanup.sh
+pkgexample_SCRIPTS += local.sh echo.sh uml.sh lov.sh
 noinst_DATA =
 noinst_DATA =
-noinst_SCRIPTS = leak_finder.pl llecho.sh llmount.sh llmountcleanup.sh tbox.sh \
-       llrmount.sh runfailure-mds runvmstat runfailure-net runfailure-ost \
-       runiozone runregression-net.sh runtests sanity.sh rundbench
+noinst_SCRIPTS = leak_finder.pl llecho.sh llmount.sh llmountcleanup.sh tbox.sh
+noinst_SCRIPTS += llrmount.sh runfailure-mds runvmstat runfailure-net
+noinst_SCRIPTS += runfailure-ost runiozone runregression-net.sh runtests
+noinst_SCRIPTS += sanity.sh rundbench
 noinst_PROGRAMS = openunlink testreq truncate directio openme writeme open_delay
 noinst_PROGRAMS = openunlink testreq truncate directio openme writeme open_delay
-noinst_PROGRAMS += munlink tchmod toexcl fsx test_brw openclose createdestroy
-noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink
+noinst_PROGRAMS += tchmod toexcl fsx test_brw openclose createdestroy
+noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink utime
 noinst_PROGRAMS += opendirunlink opendevunlink unlinkmany fchdir_test checkstat
 noinst_PROGRAMS += opendirunlink opendevunlink unlinkmany fchdir_test checkstat
-noinst_PROGRAMS += wantedi statone runas openfile getdents o_directory
+noinst_PROGRAMS += wantedi statone runas openfile getdents mkdirdeep o_directory
+noinst_PROGRAMS += small_write
 # noinst_PROGRAMS += ldaptest
 # noinst_PROGRAMS += ldaptest
-sbin_PROGRAMS = mcreate mkdirmany
+sbin_PROGRAMS = mcreate munlink mkdirmany
 
 # ldaptest_SOURCES = ldaptest.c
 tchmod_SOURCES = tchmod.c
 
 # ldaptest_SOURCES = ldaptest.c
 tchmod_SOURCES = tchmod.c
@@ -48,13 +51,15 @@ openfile_SOURCES = openfile.c
 wantedi_SOURCES = wantedi.c
 createtest_SOURCES = createtest.c
 open_delay_SOURCES = open_delay.c
 wantedi_SOURCES = wantedi.c
 createtest_SOURCES = createtest.c
 open_delay_SOURCES = open_delay.c
-opendirunlink_SOURCES=opendirunlink.c
-opendevunlink_SOURCES=opendirunlink.c
-fchdir_test_SOURCES=fchdir_test.c
+opendirunlink_SOURCES = opendirunlink.c
+opendevunlink_SOURCES = opendevunlink.c
+fchdir_test_SOURCES = fchdir_test.c
 getdents_SOURCES=getdents.c
 o_directory_SOURCES = o_directory.c
 getdents_SOURCES=getdents.c
 o_directory_SOURCES = o_directory.c
-#mkdirdeep_SOURCES= mkdirdeep.c
-#mkdirdeep_LDADD=-L../portals/util -lptlctl
-#mkdirdeep_CPPFLAGS=-I$(top_srcdir)/portals/include
+utime_SOURCES = utime.c
+mkdirdeep_SOURCES = mkdirdeep.c
+mkdirdeep_LDADD=-L$(top_builddir)/portals/utils -lptlctl
+mkdirdeep_CPPFLAGS=-I$(top_srcdir)/portals/include
+small_write_SOURCES = small_write.c
 
 include $(top_srcdir)/Rules
 
 include $(top_srcdir)/Rules
index f647a55..496f3b4 100644 (file)
@@ -8,6 +8,7 @@ set -e
 
 SRCDIR="`dirname $0`"
 CREATE=$SRCDIR/create.pl
 
 SRCDIR="`dirname $0`"
 CREATE=$SRCDIR/create.pl
+RENAME=$SRCDIR/rename.pl
 
 debug_client_on()
 {
 
 debug_client_on()
 {
@@ -23,118 +24,71 @@ MNT=${MNT:-/mnt/lustre}
 
 debug_client_on
 echo "create.pl, 2 mounts, 1 thread, 10 ops, debug on"
 
 debug_client_on
 echo "create.pl, 2 mounts, 1 thread, 10 ops, debug on"
-perl $CREATE -- $MNT 2 10
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=10
 echo "create.pl, 2 mounts, 1 thread, 100 ops, debug on"
 echo "create.pl, 2 mounts, 1 thread, 100 ops, debug on"
-perl $CREATE --silent -- $MNT 2 100
-echo "create.pl --mcreate=0, 2 mounts, 1 thread, 10 ops, debug on"
-perl $CREATE --mcreate=0 -- $MNT 2 10
-echo "create.pl --mcreate=0, 2 mounts, 1 thread, 100 ops, debug on"
-perl $CREATE --mcreate=0 --silent -- $MNT 2 100
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 1 thread, 10 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=10 --use_mcreate=0
+echo "create.pl --use_mcreate=0, 2 mounts, 1 thread, 100 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 1 thread, 10 ops, debug on"
 echo "rename.pl, 2 mounts, 1 thread, 10 ops, debug on"
-perl rename.pl --count=2 $MNT 10
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=10
 echo "rename.pl, 2 mounts, 1 thread, 100 ops, debug on"
 echo "rename.pl, 2 mounts, 1 thread, 100 ops, debug on"
-perl rename.pl --count=2 --silent $MNT 100
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=100 --silent
 
 debug_client_off
 echo "create.pl, 2 mounts, 1 thread, 1000 ops, debug off"
 
 debug_client_off
 echo "create.pl, 2 mounts, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent -- $MNT 2 1000
-echo "create.pl --mcreate=0, 2 mounts, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT 2 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=1000 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 1 thread, 1000 ops, debug off"
+perl $CREATE --silent --use_mcreate=0 -- $MNT 2 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=1000 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 1 thread, 1000 ops, debug off"
 echo "rename.pl, 2 mounts, 1 thread, 1000 ops, debug off"
-perl rename.pl --count=2 --silent $MNT 1000
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=1000 --silent
 
 debug_client_on
 echo "create.pl, 2 mounts, 2 threads, 100 ops, debug on"
 
 debug_client_on
 echo "create.pl, 2 mounts, 2 threads, 100 ops, debug on"
-perl $CREATE --silent -- $MNT 2 100 &
-perl $CREATE --silent -- $MNT 2 100 &
-wait
-echo "create.pl --mcreate=0, 2 mounts, 2 threads, 100 ops, debug on"
-perl $CREATE --silent --mcreate=0 -- $MNT 2 100 &
-perl $CREATE --silent --mcreate=0 -- $MNT 2 100 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=2 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 2 threads, 100 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=2 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 2 thread, 1000 ops, debug on"
 echo "rename.pl, 2 mounts, 2 thread, 1000 ops, debug on"
-perl rename.pl --count=2 --silent $MNT 1000 &
-perl rename.pl --count=2 --silent $MNT 1000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=1000 --num_threads=2 --silent
 
 debug_client_off
 echo "create.pl, 2 mounts, 2 threads, 2000 ops, debug off"
 
 debug_client_off
 echo "create.pl, 2 mounts, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent -- $MNT 2 2000 &
-perl $CREATE --silent -- $MNT 2 2000 &
-wait
-echo "create.pl --mcreate=0, 2 mounts, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=2 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 2 threads, 2000 ops, debug off"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=2 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 2 threads, 2000 ops, debug off"
 echo "rename.pl, 2 mounts, 2 threads, 2000 ops, debug off"
-perl rename.pl --count=2 --silent $MNT 2000 &
-perl rename.pl --count=2 --silent $MNT 2000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=2 --silent
 
 debug_client_on
 echo "create.pl, 2 mounts, 4 threads, 100 ops, debug on"
 
 debug_client_on
 echo "create.pl, 2 mounts, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
-  perl $CREATE --silent -- $MNT 2 100 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT 2 100 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=4 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 4 threads, 100 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=4 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug on"
 echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug on"
-for i in `seq 1 4`; do
-  perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --silent
 
 debug_client_off
 echo "create.pl, 2 mounts, 4 threads, 2000 ops, debug off"
 
 debug_client_off
 echo "create.pl, 2 mounts, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl $CREATE --silent -- $MNT 2 2000 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 4 threads, 2000 ops, debug off"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug off"
 echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --silent
 
 debug_client_on
 echo "create.pl, 2 mounts, 8 threads, 500 ops, debug on"
 
 debug_client_on
 echo "create.pl, 2 mounts, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
-  perl $CREATE --silent -- $MNT 2 500 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT 2 500 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=500 --num_threads=8 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 8 threads, 500 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=500 --num_threads=8 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug on"
 echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug on"
-for i in `seq 1 8`; do
-  perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --silent
 
 debug_client_off
 echo "create.pl, 2 mounts, 8 threads, 2000 ops, debug off"
 
 debug_client_off
 echo "create.pl, 2 mounts, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl $CREATE --silent -- $MNT 2 2000 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 8 threads, 2000 ops, debug off"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --use_mcreate=0 --silent
 echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug off"
 echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --silent
index 53774e5..2bf0a53 100644 (file)
@@ -8,6 +8,7 @@ set -e
 
 SRCDIR="`dirname $0`"
 CREATE=$SRCDIR/create.pl
 
 SRCDIR="`dirname $0`"
 CREATE=$SRCDIR/create.pl
+RENAME=$SRCDIR/rename.pl
 
 debug_client_on()
 {
 
 debug_client_on()
 {
@@ -23,121 +24,75 @@ MNT=${MNT:-/mnt/lustre}
 
 debug_client_on
 echo "create.pl, 1 mount, 1 thread, 10 ops, debug on"
 
 debug_client_on
 echo "create.pl, 1 mount, 1 thread, 10 ops, debug on"
-perl $CREATE -- $MNT -1 10
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=10
 echo "create.pl, 1 mount, 1 thread, 100 ops, debug on"
 echo "create.pl, 1 mount, 1 thread, 100 ops, debug on"
-perl $CREATE --silent -- $MNT -1 100
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --silent
 echo "create.pl --mcreate=0, 1 mount, 1 thread, 10 ops, debug on"
 echo "create.pl --mcreate=0, 1 mount, 1 thread, 10 ops, debug on"
-perl $CREATE --mcreate=0 -- $MNT -1 10
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=10 --use_mcreate=0
 echo "create.pl --mcreate=0, 1 mount, 1 thread, 100 ops, debug on"
 echo "create.pl --mcreate=0, 1 mount, 1 thread, 100 ops, debug on"
-perl $CREATE --mcreate=0 --silent -- $MNT -1 100
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --use_mcreate=0 --silent
 echo "rename.pl, 1 mount, 1 thread, 10 ops, debug on"
 echo "rename.pl, 1 mount, 1 thread, 10 ops, debug on"
-perl rename.pl $MNT 10
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=10
 echo "rename.pl, 1 mount, 1 thread, 100 ops, debug on"
 echo "rename.pl, 1 mount, 1 thread, 100 ops, debug on"
-perl rename.pl --silent $MNT 100
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=100 --silent
 
 debug_client_off
 echo "create.pl, 1 mount, 1 thread, 1000 ops, debug off"
 
 debug_client_off
 echo "create.pl, 1 mount, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent -- $MNT -1 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --silent
 echo "create.pl --mcreate=0, 1 mount, 1 thread, 1000 ops, debug off"
 echo "create.pl --mcreate=0, 1 mount, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT -1 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --use_mcreate=0 --silent
 echo "rename.pl, 1 mount, 1 thread, 1000 ops, debug off"
 echo "rename.pl, 1 mount, 1 thread, 1000 ops, debug off"
-perl rename.pl --silent $MNT 1000
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --silent
 
 debug_client_on
 echo "create.pl, 1 mount, 2 threads, 100 ops, debug on"
 
 debug_client_on
 echo "create.pl, 1 mount, 2 threads, 100 ops, debug on"
-perl $CREATE --silent -- $MNT -1 100 &
-perl $CREATE --silent -- $MNT -1 100 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=2 --silent
 echo "create.pl --mcreate=0, 1 mount, 2 threads, 100 ops, debug on"
 echo "create.pl --mcreate=0, 1 mount, 2 threads, 100 ops, debug on"
-perl $CREATE --silent --mcreate=0 -- $MNT -1 100 &
-perl $CREATE --silent --mcreate=0 -- $MNT -1 100 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=2 --use_mcreate=0 --silent
 echo "rename.pl, 1 mount, 2 thread, 1000 ops, debug on"
 echo "rename.pl, 1 mount, 2 thread, 1000 ops, debug on"
-perl rename.pl --silent $MNT 1000 &
-perl rename.pl --silent $MNT 1000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --num_threads=2 --silent
 
 debug_client_off
 echo "create.pl, 1 mount, 2 threads, 2000 ops, debug off"
 
 debug_client_off
 echo "create.pl, 1 mount, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent -- $MNT -1 2000 &
-perl $CREATE --silent -- $MNT -1 2000 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=2 --silent
 echo "create.pl --mcreate=0, 1 mount, 2 threads, 2000 ops, debug off"
 echo "create.pl --mcreate=0, 1 mount, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
-perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=2 --use_mcreate=0  --silent
 wait
 echo "rename.pl, 1 mount, 2 threads, 2000 ops, debug off"
 wait
 echo "rename.pl, 1 mount, 2 threads, 2000 ops, debug off"
-perl rename.pl --silent $MNT 2000 &
-perl rename.pl --silent $MNT 2000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=2 --silent
 
 debug_client_on
 echo "create.pl, 1 mount, 4 threads, 100 ops, debug on"
 
 debug_client_on
 echo "create.pl, 1 mount, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
-  perl $CREATE --silent -- $MNT -1 100 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=4 --silent
 echo "create.pl --mcreate=0, 1 mount, 4 threads, 100 ops, debug on"
 echo "create.pl --mcreate=0, 1 mount, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT -1 100 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=4 --use_mcreate=0 --silent
 echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug on"
 echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug on"
-for i in `seq 1 4`; do
-  perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4 --silent
 
 debug_client_off
 echo "create.pl, 1 mount, 4 threads, 2000 ops, debug off"
 
 debug_client_off
 echo "create.pl, 1 mount, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl $CREATE --silent -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4  --silent
 echo "create.pl --mcreate=0, 1 mount, 4 threads, 2000 ops, debug off"
 echo "create.pl --mcreate=0, 1 mount, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4  --use_mcreate=0 --silent
 echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug off"
 echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
-  perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4 --silent
 
 debug_client_on
 echo "create.pl, 1 mount, 8 threads, 500 ops, debug on"
 
 debug_client_on
 echo "create.pl, 1 mount, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
-  perl $CREATE --silent -- $MNT -1 500 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=500 --num_threads=8  --silent
 echo "create.pl --mcreate=0, 1 mount, 8 threads, 500 ops, debug on"
 echo "create.pl --mcreate=0, 1 mount, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT -1 500 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=500 --num_threads=8  --use_mcreate=0 --silent
 echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug on"
 echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug on"
-for i in `seq 1 8`; do
-  perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8 --silent
 
 debug_client_off
 echo "create.pl, 1 mount, 8 threads, 2000 ops, debug off"
 
 debug_client_off
 echo "create.pl, 1 mount, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl $CREATE --silent -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8  --silent
 echo "create.pl --mcreate=0, 1 mount, 8 threads, 2000 ops, debug off"
 echo "create.pl --mcreate=0, 1 mount, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8  --use_mcreate=0 --silent
 echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug off"
 echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
-  perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8 --silent
+
 sh rundbench 1
 sh rundbench 2
 sh rundbench 4
 sh rundbench 1
 sh rundbench 2
 sh rundbench 4
index 0d2d836..919ea1f 100755 (executable)
@@ -5,7 +5,7 @@ set -vxe
 
 [ "$CONFIGS" -a -z "$SANITYN" ] && SANITYN=no
 [ "$CONFIGS" ] || CONFIGS="local lov"
 
 [ "$CONFIGS" -a -z "$SANITYN" ] && SANITYN=no
 [ "$CONFIGS" ] || CONFIGS="local lov"
-[ "$MAX_THREADS" ] || MAX_THREADS=50
+[ "$MAX_THREADS" ] || MAX_THREADS=10
 if [ -z "$THREADS" ]; then
        KB=`awk '/MemTotal:/ { print $2 }' /proc/meminfo`
        THREADS=`expr $KB / 16384`
 if [ -z "$THREADS" ]; then
        KB=`awk '/MemTotal:/ { print $2 }' /proc/meminfo`
        THREADS=`expr $KB / 16384`
@@ -76,7 +76,7 @@ for NAME in $CONFIGS; do
        if [ "$IOZONE_DIR" != "no" ]; then
                mount | grep $MNT || sh llmount.sh
                SPACE=`df $MNT | tail -1 | awk '{ print $4 }'`
        if [ "$IOZONE_DIR" != "no" ]; then
                mount | grep $MNT || sh llmount.sh
                SPACE=`df $MNT | tail -1 | awk '{ print $4 }'`
-               IOZ_THREADS=`expr $SPACE / $SIZE`
+               IOZ_THREADS=`expr $SPACE / \( $SIZE + $SIZE / 1000 \)`
                [ $THREADS -lt $IOZ_THREADS ] && IOZ_THREADS=$THREADS
 
                $DEBUG_OFF
                [ $THREADS -lt $IOZ_THREADS ] && IOZ_THREADS=$THREADS
 
                $DEBUG_OFF
index cb4f94d..983df93 100755 (executable)
@@ -6,10 +6,11 @@ config=${1:-$(basename $0 .sh)}.xml
 LMC=${LMC:-../utils/lmc -m $config}
 TMP=${TMP:-/tmp}
 
 LMC=${LMC:-../utils/lmc -m $config}
 TMP=${TMP:-/tmp}
 
-MDSDEV=$TMP/mds1
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
 MDSSIZE=50000
 MDSSIZE=50000
+FSTYPE=${FSTYPE:-ext3}
 
 
-OSTDEV=$TMP/ost1
+OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`}
 OSTSIZE=200000
 
 rm -f $config
 OSTSIZE=200000
 
 rm -f $config
@@ -18,12 +19,12 @@ ${LMC} --add node --node localhost || exit 10
 ${LMC} --add net --node  localhost --nid localhost --nettype tcp || exit 11
 
 # configure mds server
 ${LMC} --add net --node  localhost --nid localhost --nettype tcp || exit 11
 
 # configure mds server
-${LMC}  --add mds  --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20
+${LMC}  --add mds  --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
 
 # configure ost
 
 # configure ost
-${LMC} --add ost --node localhost --obd obd1 --obdtype obdecho || exit 30
+${LMC} --add ost --node localhost --obd obd1 --fstype $FSTYPE --obdtype obdecho || exit 30
 # configure ost
 # configure ost
-${LMC} --add ost --node localhost --obd obd2 --obdtype obdecho || exit 30
+${LMC} --add ost --node localhost --obd obd2 --fstype $FSTYPE --obdtype obdecho || exit 30
 
 ${LMC} --add cobd --node localhost --real_obd obd1 --cache_obd obd2
 
 
 ${LMC} --add cobd --node localhost --real_obd obd1 --cache_obd obd2
 
index 6156869..c5f3f12 100644 (file)
-#!/usr/bin/perl
+#!/usr/bin/perl -w
+use strict;
+$|++;
+
+$ENV{PATH}="/bin:/usr/bin";
+$ENV{ENV}="";
+$ENV{BASH_ENV}="";
+use POSIX ":sys_wait_h";
+
+use diagnostics;
 use Getopt::Long;
 
 use Getopt::Long;
 
+use vars qw(
+           $MAX_THREADS
+           );
+
+# Don't try to run more than this many threads concurrently.
+$MAX_THREADS = 16;
+
+# Initialize variables
 my $silent = 0;
 my $silent = 0;
-my $mcreate = 1; # should we use mcreate or open?
-my $files = 5;
+my $use_mcreate = 1; # should we use mcreate or open?
+my $num_files = 5;   # number of files to create
+my $iterations = 1;
+my $num_threads = 1;
+my $mountpt;
+my $num_mounts = -1;
 
 
+# Get options from the command line.
 GetOptions("silent!" => \$silent,
 GetOptions("silent!" => \$silent,
-           "mcreate=i" => \$mcreate,
-           "files=i" => \$files);
+           "use_mcreate=i" => \$use_mcreate,
+           "num_files=i" => \$num_files,
+          "mountpt=s" => \$mountpt,
+          "num_mounts=i" => \$num_mounts,
+          "iterations=i" => \$iterations,
+          "num_threads=i" => \$num_threads,
+          ) || die &usage;
+
+# Check for mandatory args.
+if (!$mountpt || 
+    !$num_mounts) {
+    die &usage;
+}
+
+if ($num_threads > $MAX_THREADS) {
+    print "\nMAX_THREADS is currently set to $MAX_THREADS.\n\n";
+    print "You will have to change this in the source\n";
+    print "if you really want to run with $num_threads threads.\n\n";
+    exit 1;
+}
 
 
-my $mtpt = shift || usage();
-my $mount_count = shift || usage();
-my $i = shift || usage();
-my $count = $i;
+# Initialize rand() function.
+srand (time ^ $$ ^ unpack "%L*", `ps axww | gzip`);
+
+#########################################################################
+### MAIN
+
+for (my $i=1; $i<=$num_threads; $i++) {
+    my $status = &fork_and_create($i);
+    last if ($status != 0);
+}
+
+# Wait for all our threads to finish.
+my $child = 0;
+do {
+    $child = waitpid(-1, WNOHANG);
+} until $child > 0;
+sleep 1;
+
+exit 0;
+
+#########################################################################
+### SUBROUTINES
 
 sub usage () {
 
 sub usage () {
-    print "Usage: $0 [--silent] [--mcreate=n] [--files=n] <mnt prefix> <mnt count> <iterations>\n";
-    print "example: $0 /mnt/lustre 2 50\n";
-    print "         will test in /mnt/lustre1 and /mnt/lustre2\n";
-    print "         $0 /mnt/lustre -1 50\n";
-    print "         will test in /mnt/lustre only\n";
+    print "\nUsage: $0 [--silent] [--use_mcreate=n] [--num_files=n] [--iterations=n] [--num_threads=n] --mountpt=/path/to/lustre/mount --num_mounts=n\n\n";
+    print "\t--silent\tminimal output\n";
+    print "\t--use_mcreate=n\tuse mcreate to create files, default=1 (yes)\n";
+    print "\t--num_files=n\tnumber of files to create per iteration, default=5\n";
+    print "\t--iterations=n\tnumber of iterations to perform, default=1\n";
+    print "\t--num_threads=n\tnumber of thread to run, default=1\n";
+    print "\t--mountpt\tlocation of lustre mount\n";
+    print "\t--num_mounts=n\tnumber of lustre mounts to test across, default=-1 (single mount point without numeric suffix)\n\n";
+    print "example: $0 --mountpt=/mnt/lustre --num_mounts=2 --iterations=50\n";
+    print "         will perform 50 interations in /mnt/lustre1 and /mnt/lustre2\n";
+    print "         $0 --mountpt=/mnt/lustre --num_mounts=-1 --iterations=50\n";
+    print "         will perform 50 iterations in /mnt/lustre only\n\n";
     exit;
 }
 
     exit;
 }
 
-sub do_open($) {
-    my $path = shift;
+#########################################################################
+sub fork_and_create ($) {
+    my ($thread_num) = @_;
+    
+  FORK: {
+      if (my $pid = fork) {
+         # parent here
+         # child process pid is available in $pid
+         return 0;
+      } elsif (defined $pid) { # $pid is zero here if defined
+         my $current_iteration=1;
+         while ($current_iteration <= $iterations) {
+             for (my $i=1; $i<=$num_files; $i++) {
+                 my $which = "";
+                 if ($num_mounts > 0) {
+                     $which = int(rand() * $num_mounts) + 1;
+                 }
+                 my $d = int(rand() * $num_files);
+                 do_open("${mountpt}${which}/thread${thread_num}.${d}");
+                 
+                 if ($num_mounts > 0) {
+                     $which = int(rand() * $num_mounts) + 1;
+                 }
+                 $d = int(rand() * $num_files);
+                 my $path = "${mountpt}${which}/thread${thread_num}.${d}";
+                 print  "Thread $thread_num: Unlink $path start [" . $$."]...\n" if !$silent;
+                 if (unlink($path)) {
+                     print "Thread $thread_num: Unlink done [$$] $path: Success\n" if !$silent;
+                 } else {
+                     print "Thread $thread_num: Unlink done [$$] $path: $!\n"if !$silent;
+                 }
+             }
+             if (($current_iteration) % 100 == 0) {
+                 print STDERR "Thread $thread_num: " . $current_iteration . " operations [" . $$ . "]\n";
+             }
+             $current_iteration++;
+         }
+         
+         my $which = "";
+         if ($num_mounts > 0) {
+             $which = int(rand() * $num_mounts) + 1;
+         }
+         for (my $d = 0; $d < $num_files; $d++) {
+             my $path = "${mountpt}${which}/thread${thread_num}.${d}";
+             unlink("$path") if (-e $path);
+         }
+         
+         print "Thread $thread_num: Done.\n";
+         
+         exit 0;
+
+      } elsif ($! =~ /No more process/) {
+          # EAGAIN, supposedly recoverable fork error
+         sleep 5;
+         redo FORK;
+      } else {
+          # weird fork error
+         die "Can't fork: $!\n";
+      }
+  }
+
+}
+
+#########################################################################
+
+sub do_open ($) {
+    my ($path) = @_;;
 
 
-    if ($mcreate) {
+    if ($use_mcreate) {
         my $tmp = `./mcreate $path`;
         if ($tmp) {
             print  "Creating $path [" . $$."]...\n" if !$silent;
         my $tmp = `./mcreate $path`;
         if ($tmp) {
             print  "Creating $path [" . $$."]...\n" if !$silent;
@@ -37,42 +167,9 @@ sub do_open($) {
         }
     } else {
         print  "Opening $path [" . $$."]...\n"if !$silent;
         }
     } else {
         print  "Opening $path [" . $$."]...\n"if !$silent;
-        open(FH, ">$path") || die "open($PATH): $!";
+        open(FH, ">$path") || die "open($path: $!";
         print  "Open done [$$] $path: Success\n"if !$silent;
         close(FH) || die;
     }
 }
 
         print  "Open done [$$] $path: Success\n"if !$silent;
         close(FH) || die;
     }
 }
 
-while ($i--) {
-    my $which = "";
-    if ($mount_count > 0) {
-        $which = int(rand() * $mount_count) + 1;
-    }
-    $d = int(rand() * $files);
-    do_open("$mtpt$which/$d");
-
-    if ($mount_count > 0) {
-        $which = int(rand() * $mount_count) + 1;
-    }
-    $d = int(rand() * $files);
-    $path = "$mtpt$which/$d";
-    print  "Unlink $path start [" . $$."]...\n"if !$silent;
-    if (unlink($path)) {
-        print  "Unlink done [$$] $path: Success\n"if !$silent;
-    } else {
-        print  "Unlink done [$$] $path: $!\n"if !$silent;
-    }
-    if (($count - $i) % 100 == 0) {
-        print STDERR ($count - $i) . " operations [" . $$ . "]\n";
-    }
-}
-
-my $which = "";
-if ($mount_count > 0) {
-    $which = int(rand() * $mount_count) + 1;
-}
-for ($d = 0; $d < $files; $d++) {
-    unlink("$mtpt$which/$d");
-}
-
-print "Done.\n";
index e660ea4..cc92c80 100644 (file)
@@ -41,7 +41,7 @@ int main(int argc, char **argv)
                 return 1;
         }
 
                 return 1;
         }
 
-        printf("directio on %s for %dx%lu blocks \n", argv[1], blocks,
+        printf("directio on %s for %dx%lu bytes \n", argv[1], blocks,
                st.st_blksize);
 
         seek = (off64_t)seek_blocks * (off64_t)st.st_blksize;
                st.st_blksize);
 
         seek = (off64_t)seek_blocks * (off64_t)st.st_blksize;
@@ -75,5 +75,6 @@ int main(int argc, char **argv)
                 return 1;
         }
 
                 return 1;
         }
 
+       printf("PASS\n");
         return 0;
 }
         return 0;
 }
index 335db41..b4fe5a4 100755 (executable)
@@ -21,8 +21,9 @@ CLIENTNID=${CLIENTNID:-$CLIENT}
 
 
 # FIXME: make LMC not require MDS for obdecho LOV
 
 
 # FIXME: make LMC not require MDS for obdecho LOV
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
 MDSSIZE=10000
 MDSSIZE=10000
+FSTYPE=${FSTYPE:-ext3}
 
 STRIPE_BYTES=65536
 STRIPES_PER_OBJ=2      # 0 means stripe over all OSTs
 
 STRIPE_BYTES=65536
 STRIPES_PER_OBJ=2      # 0 means stripe over all OSTs
@@ -33,7 +34,7 @@ $LMC --add node --node $SERVER  || exit 1
 $LMC --add net --node $SERVER --nid $SERVERNID --nettype $NET || exit 2
 
 if (($LOV)); then
 $LMC --add net --node $SERVER --nid $SERVERNID --nettype $NET || exit 2
 
 if (($LOV)); then
-    $LMC --add mds --node $SERVER --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10
+    $LMC --add mds --node $SERVER --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 10
     $LMC --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 11
     $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 12
     $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 13
     $LMC --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 11
     $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 12
     $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 13
index a2b1d5e..92a2342 100644 (file)
@@ -294,9 +294,10 @@ save_buffer(char *buffer, off_t bufferlength, int fd)
                if (size_by_seek == (off_t)-1)
                        prterr("save_buffer: lseek eof");
                else if (bufferlength > size_by_seek) {
                if (size_by_seek == (off_t)-1)
                        prterr("save_buffer: lseek eof");
                else if (bufferlength > size_by_seek) {
-                       warn("save_buffer: .fsxgood file too short... will
-save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek,
-                            (unsigned long long)bufferlength);
+                       warn("save_buffer: .fsxgood file too short... will"
+                               "save 0x%llx bytes instead of 0x%llx\n", 
+                               (unsigned long long)size_by_seek,
+                               (unsigned long long)bufferlength);
                        bufferlength = size_by_seek;
                }
        }
                        bufferlength = size_by_seek;
                }
        }
@@ -310,8 +311,8 @@ save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek,
                if (byteswritten == -1)
                        prterr("save_buffer write");
                else
                if (byteswritten == -1)
                        prterr("save_buffer write");
                else
-                       warn("save_buffer: short write, 0x%x bytes instead
-of 0x%llx\n",
+                       warn("save_buffer: short write, 0x%x bytes instead"
+                               "of 0x%llx\n",
                             (unsigned)byteswritten,
                             (unsigned long long)bufferlength);
        }
                             (unsigned)byteswritten,
                             (unsigned long long)bufferlength);
        }
@@ -372,11 +373,11 @@ check_buffers(unsigned offset, unsigned size)
                if (n) {
                        prt("\t0x%5x\n", n);
                        if (bad)
                if (n) {
                        prt("\t0x%5x\n", n);
                        if (bad)
-                               prt("operation# (mod 256) for the bad data
-may be %u\n", ((unsigned)op & 0xff));
+                               prt("operation# (mod 256) for the bad data"
+                                       "may be %u\n", ((unsigned)op & 0xff));
                        else
                        else
-                               prt("operation# (mod 256) for the bad data
-unknown, check HOLE and EXTEND ops\n");
+                               prt("operation# (mod 256) for the bad data"
+                                       "unknown, check HOLE and EXTEND ops\n");
                } else
                        prt("????????????????\n");
                report_failure(110);
                } else
                        prt("????????????????\n");
                report_failure(110);
@@ -927,33 +928,33 @@ void
 usage(void)
 {
        fprintf(stdout, "usage: %s",
 usage(void)
 {
        fprintf(stdout, "usage: %s",
-               "fsx [-dnqLOW] [-b opnum] [-c Prob] [-l flen] [-m
-start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t
-truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed]
-fname\n\
-       -b opnum: beginning operation number (default 1)\n\
-       -c P: 1 in P chance of file close+open at each op (default infinity)\n\
-       -d: debug output for all operations [-d -d = more debugging]\n\
-       -l flen: the upper bound on file size (default 262144)\n\
-       -m startop:endop: monitor (print debug output) specified byte range
-(default 0:infinity)\n\
-       -n: no verifications of file size\n\
-       -o oplen: the upper bound on operation size (default 65536)\n\
-       -p progressinterval: debug output at specified operation interval\n\
-       -q: quieter operation\n\
-       -r readbdy: 4096 would make reads page aligned (default 1)\n\
-       -s style: 1 gives smaller truncates (default 0)\n\
-       -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
-       -w writebdy: 4096 would make writes page aligned (default 1)\n\
-       -D startingop: debug output starting at specified operation\n\
-       -L: fsxLite - no file creations & no file size changes\n\
-       -N numops: total # operations to do (default infinity)\n\
-       -O: use oplen (see -o flag) for every op (default random)\n\
-       -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n\
-       -S seed: for random # generator (default 1) 0 gets timestamp\n\
-       -W: mapped write operations DISabled\n\
-        -R: read() system calls only (mapped reads disabled)\n\
-       fname: this filename is REQUIRED (no default)\n");
+               "fsx [-dnqLOW] [-b opnum] [-c Prob] [-l flen] [-m "
+"start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t "
+"truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] "
+"fname\n"
+"      -b opnum: beginning operation number (default 1)\n"
+"      -c P: 1 in P chance of file close+open at each op (default infinity)\n"
+"      -d: debug output for all operations [-d -d = more debugging]\n"
+"      -l flen: the upper bound on file size (default 262144)\n"
+"      -m startop:endop: monitor (print debug output) specified byte rang"
+"(default 0:infinity)\n"
+"      -n: no verifications of file size\n"
+"      -o oplen: the upper bound on operation size (default 65536)\n"
+"      -p progressinterval: debug output at specified operation interval\n"
+"      -q: quieter operation\n"
+"      -r readbdy: 4096 would make reads page aligned (default 1)\n"
+"      -s style: 1 gives smaller truncates (default 0)\n"
+"      -t truncbdy: 4096 would make truncates page aligned (default 1)\n"
+"      -w writebdy: 4096 would make writes page aligned (default 1)\n"
+"      -D startingop: debug output starting at specified operation\n"
+"      -L: fsxLite - no file creations & no file size changes\n"
+"      -N numops: total # operations to do (default infinity)\n"
+"      -O: use oplen (see -o flag) for every op (default random)\n"
+"      -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n"
+"      -S seed: for random # generator (default 1) 0 gets timestamp\n"
+"      -W: mapped write operations DISabled\n"
+"        -R: read() system calls only (mapped reads disabled)\n"
+"      fname: this filename is REQUIRED (no default)\n");
        exit(90);
 }
 
        exit(90);
 }
 
@@ -1020,8 +1021,8 @@ main(int argc, char **argv)
                case 'b':
                        simulatedopcount = getnum(optarg, &endp);
                        if (!quiet)
                case 'b':
                        simulatedopcount = getnum(optarg, &endp);
                        if (!quiet)
-                               fprintf(stdout, "Will begin at operation
-%ld\n",
+                               fprintf(stdout, "Will begin at operation"
+                                       "%ld\n",
                                        simulatedopcount);
                        if (simulatedopcount == 0)
                                usage();
                                        simulatedopcount);
                        if (simulatedopcount == 0)
                                usage();
@@ -1206,8 +1207,8 @@ main(int argc, char **argv)
                                prterr(fname);
                                warn("main: error on write");
                        } else
                                prterr(fname);
                                warn("main: error on write");
                        } else
-                               warn("main: short write, 0x%x bytes instead
-of 0x%x\n",
+                               warn("main: short write, 0x%x bytes instead"
+                                       "of 0x%x\n",
                                     (unsigned)written, maxfilelen);
                        exit(98);
                }
                                     (unsigned)written, maxfilelen);
                        exit(98);
                }
index b8d234b..745f113 100644 (file)
@@ -8,17 +8,21 @@ STDERR->autoflush(1);
 my ($line, $memory);
 my $debug_line = 0;
 
 my ($line, $memory);
 my $debug_line = 0;
 
+my $total = 0;
+my $max = 0;
+
 while ($line = <>) {
     $debug_line++;
     my ($file, $func, $lno, $name, $size, $addr, $type);
 while ($line = <>) {
     $debug_line++;
     my ($file, $func, $lno, $name, $size, $addr, $type);
-    if ($line =~ m/^.*\((.*):(\d+):(.*)\(\) (\d+ \| )?\d+\+\d+\): [vk](.*) '(.*)': (\d+) at (.*) \(tot .*$/) {
+    if ($line =~ m/^.*\((.*):(\d+):(.*)\(\) (\d+ \| )?\d+\+\d+\): (k|v|slab-)(.*) '(.*)': (\d+) at (.*) \(tot (.*)\).*$/) {
         $file = $1;
         $lno = $2;
         $func = $3;
         $file = $1;
         $lno = $2;
         $func = $3;
-        $type = $5;
-        $name = $6;
-        $size = $7;
-        $addr = $8;
+        $type = $6;
+        $name = $7;
+        $size = $8;
+        $addr = $9;
+        $tot = $10;
 
        # we can't dump the log after portals has exited, so skip "leaks"
        # from memory freed in the portals module unloading.
 
        # we can't dump the log after portals has exited, so skip "leaks"
        # from memory freed in the portals module unloading.
@@ -31,13 +35,24 @@ while ($line = <>) {
         next;
     }
 
         next;
     }
 
-    if ($type eq 'malloced') {
+    if (index($type, 'alloced') >= 0) {
+        if (defined($memory->{$addr})) {
+            print STDERR "*** Two allocs with the same address ($size bytes at $addr, $file:$func:$lno)\n";
+            print STDERR "    first malloc at $memory->{$addr}->{file}:$memory->{$addr}->{func}:$memory->{$addr}->{lno}, second at $file:$func:$lno\n";
+            next;
+        }
+
         $memory->{$addr}->{name} = $name;
         $memory->{$addr}->{size} = $size;
         $memory->{$addr}->{file} = $file;
         $memory->{$addr}->{func} = $func;
         $memory->{$addr}->{lno} = $lno;
         $memory->{$addr}->{debug_line} = $debug_line;
         $memory->{$addr}->{name} = $name;
         $memory->{$addr}->{size} = $size;
         $memory->{$addr}->{file} = $file;
         $memory->{$addr}->{func} = $func;
         $memory->{$addr}->{lno} = $lno;
         $memory->{$addr}->{debug_line} = $debug_line;
+
+        $total += $size;
+        if ($total > $max) {
+            $max = $total;
+        }
     } else {
         if (!defined($memory->{$addr})) {
             print STDERR "*** Free without malloc ($size bytes at $addr, $file:$func:$lno)\n";
     } else {
         if (!defined($memory->{$addr})) {
             print STDERR "*** Free without malloc ($size bytes at $addr, $file:$func:$lno)\n";
@@ -52,6 +67,11 @@ while ($line = <>) {
         }
 
         delete $memory->{$addr};
         }
 
         delete $memory->{$addr};
+        $total -= $size;
+    }
+    if ($total != int($tot)) {
+        print "kernel total $tot != my total $total\n";
+        $total = $tot;
     }
 }
 
     }
 }
 
@@ -66,4 +86,4 @@ foreach $key (@sorted) {
     print STDERR "*** Leak: $memory->{$key}->{size} bytes allocated at $key ($memory->{$key}->{file}:$memory->{$key}->{func}:$memory->{$key}->{lno}, debug file line $memory->{$key}->{debug_line})\n";
 }
 
     print STDERR "*** Leak: $memory->{$key}->{size} bytes allocated at $key ($memory->{$key}->{file}:$memory->{$key}->{func}:$memory->{$key}->{lno}, debug file line $memory->{$key}->{debug_line})\n";
 }
 
-print "Done.\n";
+print "maximum used: $max, amount leaked: $total\n";
index 20c8c20..dbfd7f0 100755 (executable)
@@ -4,10 +4,10 @@ LCMD=$TMP/lkcd-cmds-`hostname`
 echo "Storing LKCD module info in $LCMD"
 cat /tmp/ogdb-`hostname` | while read JUNK M JUNK; do
        MOD="../$M"
 echo "Storing LKCD module info in $LCMD"
 cat /tmp/ogdb-`hostname` | while read JUNK M JUNK; do
        MOD="../$M"
-       MAP=`echo $MOD | sed -e 's/\.o$/.map/'`
-       MODNAME=`basename $MOD | sed -e 's/\.o$//'`
+       MODNAME="`basename $MOD .o`"
+       MAP="$TMP/$MODNAME.map"
 
        nm $MOD > $MAP
        echo namelist -a $PWD/$MOD  | tee -a $LCMD
 
        nm $MOD > $MAP
        echo namelist -a $PWD/$MOD  | tee -a $LCMD
-       echo symtab -a $PWD/$MAP $MODNAME | tee -a $LCMD
+       echo symtab -a $MAP $MODNAME | tee -a $LCMD
 done
 done
index 5afade1..3e3e03b 100644 (file)
@@ -1,6 +1,8 @@
 #!/bin/sh
 
 #!/bin/sh
 
-LCONF=${LCONF:-../utils/lconf}
+PATH=`dirname $0`/../utils:$PATH
+
+LCONF=${LCONF:-lconf}
 NAME=${NAME:-echo}
 
 config=$NAME.xml
 NAME=${NAME:-echo}
 
 config=$NAME.xml
@@ -17,5 +19,5 @@ $LCONF $lustre_opt --reformat --gdb $OPTS $config || exit 4
 cat <<EOF
 
 run getattr tests as:
 cat <<EOF
 
 run getattr tests as:
-../utils/lctl --device '\$ECHO_$SERVER' test_getattr 1000000
+`dirname $0`../utils/lctl --device '\$ECHO_$SERVER' test_getattr 1000000
 EOF
 EOF
index 8e3b37b..d8f37c5 100755 (executable)
@@ -30,5 +30,5 @@ if [ "$1" = "-v" ]; then
   verbose="-v"
 fi
 
   verbose="-v"
 fi
 
-${LCONF} $portals_opt $lustre_opt $node_opt ${REFORMAT:---reformat} --gdb \
-    $verbose $conf_opt  || exit 2
+${LCONF} $portals_opt $lustre_opt $node_opt ${REFORMAT:---reformat} \
+    ${GDB:---gdb} $verbose $conf_opt  || exit 2
index 25d05d2..00f2391 100755 (executable)
@@ -7,12 +7,12 @@ config=${1:-local.xml}
 LMC="${LMC:-lmc} -m $config"
 TMP=${TMP:-/tmp}
 
 LMC="${LMC:-lmc} -m $config"
 TMP=${TMP:-/tmp}
 
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
 MDSSIZE=${MDSSIZE:-50000}
 MDSSIZE=${MDSSIZE:-50000}
+FSTYPE=${FSTYPE:-ext3}
 
 
-OSTDEV=${OSTDEV:-$TMP/ost1}
+OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`}
 OSTSIZE=${OSTSIZE:-200000}
 OSTSIZE=${OSTSIZE:-200000}
-FSTYPE=${FSTYPE:-ext3}
 
 rm -f $config
 
 
 rm -f $config
 
@@ -21,7 +21,7 @@ ${LMC} --add node --node localhost || exit 10
 ${LMC} --add net --node  localhost --nid localhost --nettype tcp || exit 11
 
 # configure mds server
 ${LMC} --add net --node  localhost --nid localhost --nettype tcp || exit 11
 
 # configure mds server
-${LMC} --add mds --nspath /mnt/mds_ns  --node localhost --mds mds1  --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
+${LMC} --add mds --nspath /mnt/mds_ns  --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
 
 # configure ost
 ${LMC} --add ost --nspath /mnt/ost_ns --node localhost --ost ost1  --fstype $FSTYPE --dev $OSTDEV --size  $OSTSIZE || exit 30
 
 # configure ost
 ${LMC} --add ost --nspath /mnt/ost_ns --node localhost --ost ost1  --fstype $FSTYPE --dev $OSTDEV --size  $OSTSIZE || exit 30
index 3956f9e..79e9590 100755 (executable)
@@ -7,13 +7,16 @@ config=${1:-lov.xml}
 LMC=${LMC:-lmc}
 TMP=${TMP:-/tmp}
 
 LMC=${LMC:-lmc}
 TMP=${TMP:-/tmp}
 
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
 MDSSIZE=${MDSSIZE:-50000}
 MDSSIZE=${MDSSIZE:-50000}
+FSTYPE=${FSTYPE:-ext3}
 
 
-OSTDEV1=${OSTDEV1:-$TMP/ost1}
-OSTDEV2=${OSTDEV2:-$TMP/ost2}
-OSTDEV3=${OSTDEV3:-$TMP/ost3}
+OSTDEV1=${OSTDEV1:-$TMP/ost1-`hostname`}
+OSTDEV2=${OSTDEV2:-$TMP/ost2-`hostname`}
+OSTDEV3=${OSTDEV3:-$TMP/ost3-`hostname`}
 OSTSIZE=${OSTSIZE:-100000}
 OSTSIZE=${OSTSIZE:-100000}
+# 1 to config an echo client instead of llite
+ECHO_CLIENT=${ECHO_CLIENT:-}
 
 STRIPE_BYTES=65536
 STRIPES_PER_OBJ=2      # 0 means stripe over all OSTs
 
 STRIPE_BYTES=65536
 STRIPES_PER_OBJ=2      # 0 means stripe over all OSTs
@@ -22,13 +25,17 @@ STRIPES_PER_OBJ=2   # 0 means stripe over all OSTs
 ${LMC} -o $config --add net --node localhost --nid localhost --nettype tcp || exit 1
 
 # configure mds server
 ${LMC} -o $config --add net --node localhost --nid localhost --nettype tcp || exit 1
 
 # configure mds server
-${LMC} -m $config --format --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10
+${LMC} -m $config --format --add mds --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 10
 
 # configure ost
 ${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 20
 
 # configure ost
 ${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 20
-${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV1 --size $OSTSIZE || exit 21
-${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV2 --size $OSTSIZE || exit 22
-${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV3 --size $OSTSIZE || exit 23
-
-# create client config
-${LMC} -m $config  --add mtpt --node localhost --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
+${LMC} -m $config --add ost --node localhost --lov lov1 --fstype $FSTYPE --dev $OSTDEV1 --size $OSTSIZE || exit 21
+${LMC} -m $config --add ost --node localhost --lov lov1 --fstype $FSTYPE --dev $OSTDEV2 --size $OSTSIZE || exit 22
+${LMC} -m $config --add ost --node localhost --lov lov1 --fstype $FSTYPE --dev $OSTDEV3 --size $OSTSIZE || exit 23
+
+if [ -z "$ECHO_CLIENT" ]; then
+       # create client config
+       ${LMC} -m $config  --add mtpt --node localhost --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
+else
+       ${LMC} -m $config  --add echo_client --node localhost --ost lov1 || exit 31
+fi
index 07de3ed..40ef46a 100644 (file)
@@ -7,10 +7,11 @@ PATH=$SRCDIR:$SRCDIR/../utils:$PATH
 LMC="${LMC:-lmc} -m $config"
 TMP=${TMP:-/tmp}
 
 LMC="${LMC:-lmc} -m $config"
 TMP=${TMP:-/tmp}
 
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
 MDSSIZE=${MDSSIZE:-50000}
 MDSSIZE=${MDSSIZE:-50000}
+FSTYPE=${FSTYPE:-ext3}
 
 
-OSTDEV=${OSTDEV:-$TMP/ost1}
+OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`}
 OSTSIZE=${OSTSIZE:-200000}
 
 rm -f $config
 OSTSIZE=${OSTSIZE:-200000}
 
 rm -f $config
@@ -20,10 +21,10 @@ ${LMC} --add node --node localhost || exit 10
 ${LMC} --add net --node  localhost --nid localhost --nettype tcp || exit 11
 
 # configure mds server
 ${LMC} --add net --node  localhost --nid localhost --nettype tcp || exit 11
 
 # configure mds server
-${LMC} --add mds  --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20
+${LMC} --add mds  --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
 
 # configure ost
 
 # configure ost
-${LMC} --add ost --node localhost --ost ost1 --dev $OSTDEV --size  $OSTSIZE || exit 30
+${LMC} --add ost --node localhost --ost ost1 --fstype $FSTYPE --dev $OSTDEV --size  $OSTSIZE || exit 30
 
 # create client config
 ${LMC} --add mtpt --node localhost --path /mnt/lustre1 --mds mds1 --ost ost1 || exit 40
 
 # create client config
 ${LMC} --add mtpt --node localhost --path /mnt/lustre1 --mds mds1 --ost ost1 || exit 40
index 27b570d..6d94362 100644 (file)
@@ -7,12 +7,13 @@ config=${1-mds-bug.xml}
 LMC=${LMC-../utils/lmc}
 TMP=${TMP:-/tmp}
 
 LMC=${LMC-../utils/lmc}
 TMP=${TMP:-/tmp}
 
-MDSDEV=$TMP/mds1
-MDSDEV2=$TMP/mds2
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
+MDSDEV2=${MDSDEV:-$TMP/mds2-`hostname`}
 MDSSIZE=50000
 MDSSIZE=50000
+FSTYPE=${FSTYPE:-ext3}
 
 
-OSTDEV1=$TMP/ost1
-OSTDEV2=$TMP/ost2
+OSTDEV1=${OSTDEV1:-$TMP/ost1-`hostname`}
+OSTDEV2=${OSTDEV2:-$TMP/ost2-`hostname`}
 OSTSIZE=100000
 
 MDSNODE=uml1
 OSTSIZE=100000
 
 MDSNODE=uml1
@@ -25,19 +26,15 @@ ${LMC} -m $config --add net --node $OSTNODE --nid $OSTNODE --nettype tcp || exit
 ${LMC} -m $config --add net --node $CLIENT --nid $CLIENT --nettype tcp || exit 3
 
 # configure mds server
 ${LMC} -m $config --add net --node $CLIENT --nid $CLIENT --nettype tcp || exit 3
 
 # configure mds server
-${LMC} -m $config --format --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV --size $MDSSIZE ||exit 10
-${LMC} -m $config --format --add mds --node $MDSNODE --mds mds2 --dev $MDSDEV2 --size $MDSSIZE ||exit 10
+${LMC} -m $config --format --add mds --node $MDSNODE --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE ||exit 10
+${LMC} -m $config --format --add mds --node $MDSNODE --mds mds2 --fstype $FSTYPE --dev $MDSDEV2 --size $MDSSIZE ||exit 10
 
 # configure ost
 ${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20
 ${LMC} -m $config --add lov --lov lov2 --mds mds2 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20
 
 # configure ost
 ${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20
 ${LMC} -m $config --add lov --lov lov2 --mds mds2 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20
-${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --dev $OSTDEV1 --size $OSTSIZE || exit 21
-${LMC} -m $config --add ost --node $OSTNODE --lov lov2 --dev $OSTDEV2 --size $OSTSIZE || exit 22
+${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --fstype $FSTYPE --dev $OSTDEV1 --size $OSTSIZE || exit 21
+${LMC} -m $config --add ost --node $OSTNODE --lov lov2 --fstype $FSTYPE --dev $OSTDEV2 --size $OSTSIZE || exit 22
 
 # create client config
 ${LMC} -m $config --add mtpt --node $CLIENT --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
 ${LMC} -m $config --add mtpt --node $CLIENT --path /mnt/lustre2 --mds mds2 --lov lov2 || exit 30
 
 # create client config
 ${LMC} -m $config --add mtpt --node $CLIENT --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
 ${LMC} -m $config --add mtpt --node $CLIENT --path /mnt/lustre2 --mds mds2 --lov lov2 || exit 30
-
-
-
-
index fde7d36..8250f96 100644 (file)
@@ -11,6 +11,8 @@
 #include <sys/stat.h>
 #include <dirent.h>
 #include <string.h>
 #include <sys/stat.h>
 #include <dirent.h>
 #include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
 
 int main(int argc, char **argv)
 {
 
 int main(int argc, char **argv)
 {
@@ -34,7 +36,7 @@ int main(int argc, char **argv)
         fprintf(stderr, "creating special file %s\n", dname1);
         rc = mknod(dname1, 0777|S_IFIFO, 0);
         if (rc == -1) {
         fprintf(stderr, "creating special file %s\n", dname1);
         rc = mknod(dname1, 0777|S_IFIFO, 0);
         if (rc == -1) {
-                fprintf(stderr, "creating %s fails: %s\n", 
+                fprintf(stderr, "creating %s fails: %s\n",
                         dname1, strerror(errno));
                 exit(1);
         }
                         dname1, strerror(errno));
                 exit(1);
         }
@@ -47,7 +49,7 @@ int main(int argc, char **argv)
                         dname1, strerror(errno));
                 exit(1);
         }
                         dname1, strerror(errno));
                 exit(1);
         }
-        
+
         // doesn't matter if the two dirs are the same??
         fddev2 = open(dname2, O_RDONLY | O_NONBLOCK);
         if (fddev2 == -1) {
         // doesn't matter if the two dirs are the same??
         fddev2 = open(dname2, O_RDONLY | O_NONBLOCK);
         if (fddev2 == -1) {
@@ -55,40 +57,38 @@ int main(int argc, char **argv)
                         dname2, strerror(errno));
                 exit(1);
         }
                         dname2, strerror(errno));
                 exit(1);
         }
-        
+
         // delete the special file
         fprintf (stderr, "unlinking %s\n", dname1);
         rc = unlink(dname1);
         if (rc) {
         // delete the special file
         fprintf (stderr, "unlinking %s\n", dname1);
         rc = unlink(dname1);
         if (rc) {
-                fprintf(stderr, "unlink %s error: %s\n", 
+                fprintf(stderr, "unlink %s error: %s\n",
                         dname1, strerror(errno));
                 exit(1);
         }
 
                         dname1, strerror(errno));
                 exit(1);
         }
 
-        if (access(dname2, F_OK) == 0){
+        if (access(dname2, F_OK) == 0) {
                 fprintf(stderr, "%s still exists\n", dname2);
                 exit(1);
         }
 
                 fprintf(stderr, "%s still exists\n", dname2);
                 exit(1);
         }
 
-        if (access(dname1, F_OK) == 0){
+        if (access(dname1, F_OK) == 0) {
                 fprintf(stderr, "%s still exists\n", dname1);
                 exit(1);
         }
 
         // fchmod one special file
         rc = fchmod (fddev1, 0777);
                 fprintf(stderr, "%s still exists\n", dname1);
                 exit(1);
         }
 
         // fchmod one special file
         rc = fchmod (fddev1, 0777);
-        if(rc == -1)
-        {
-                fprintf(stderr, "fchmod unlinked special file %s fails: %s\n", 
+        if (rc == -1) {
+                fprintf(stderr, "fchmod unlinked special file %s fails: %s\n",
                         dname1, strerror(errno));
                 exit(1);
         }
                         dname1, strerror(errno));
                 exit(1);
         }
-                
+
         // fstat two files to check if they are the same
         rc = fstat(fddev1, &st1);
         // fstat two files to check if they are the same
         rc = fstat(fddev1, &st1);
-        if(rc == -1)
-        {
-                fprintf(stderr, "fstat unlinked special file %s fails: %s\n", 
+        if (rc == -1) {
+                fprintf(stderr, "fstat unlinked special file %s fails: %s\n",
                         dname1, strerror(errno));
                 exit(1);
         }
                         dname1, strerror(errno));
                 exit(1);
         }
@@ -103,7 +103,7 @@ int main(int argc, char **argv)
         if (st1.st_mode != st2.st_mode) {  // can we do this?
                 fprintf(stderr, "fstat different value on %s and %s\n",                                 dname1, dname2);
                 exit(1);
         if (st1.st_mode != st2.st_mode) {  // can we do this?
                 fprintf(stderr, "fstat different value on %s and %s\n",                                 dname1, dname2);
                 exit(1);
-        }        
+        }
 
         fprintf(stderr, "Ok, everything goes well.\n");
         return 0;
 
         fprintf(stderr, "Ok, everything goes well.\n");
         return 0;
index 7d8cc6b..7b97309 100644 (file)
@@ -18,8 +18,8 @@
 #include <unistd.h>
 
 typedef struct flag_mapping {
 #include <unistd.h>
 
 typedef struct flag_mapping {
-       char string[20];
-       int  flag;
+       const char *string;
+       const int  flag;
 } FLAG_MAPPING;
 
 FLAG_MAPPING flag_table[] = {
 } FLAG_MAPPING;
 
 FLAG_MAPPING flag_table[] = {
@@ -67,13 +67,13 @@ int main(int argc, char** argv)
                 case 'f': {
                         char *tmp;
 
                 case 'f': {
                         char *tmp;
 
-                        cloned_flags = (char *)malloc(strlen(optarg));
+                        cloned_flags = (char *)malloc(strlen(optarg)+1);
                         if (cloned_flags == NULL) {
                                 fprintf(stderr, "Insufficient memory.\n");
                                 exit(-1);
                         }
 
                         if (cloned_flags == NULL) {
                                 fprintf(stderr, "Insufficient memory.\n");
                                 exit(-1);
                         }
 
-                        strncpy(cloned_flags, optarg, strlen(optarg));
+                        strncpy(cloned_flags, optarg, strlen(optarg)+1);
                         for (tmp = strtok(optarg, ":|"); tmp;
                              tmp = strtok(NULL, ":|")) {
                                 int i = 0;
                         for (tmp = strtok(optarg, ":|"); tmp;
                              tmp = strtok(NULL, ":|")) {
                                 int i = 0;
index e7671c8..96632a9 100644 (file)
@@ -3,16 +3,18 @@
 #include <string.h>
 #include <errno.h>
 #include <sys/types.h>
 #include <string.h>
 #include <errno.h>
 #include <sys/types.h>
+#include <sys/stat.h>
 #include <stdlib.h>
 #include <unistd.h>
 
 #include <stdlib.h>
 #include <unistd.h>
 
-#define T1 "write before unlink\n"
-#define T2 "write after unlink\n"
+#define T1 "write data before unlink\n"
+#define T2 "write data after unlink\n"
 char buf[128];
 
 int main(int argc, char **argv)
 {
 char buf[128];
 
 int main(int argc, char **argv)
 {
-       char *fname, *fname2;
+        char *fname, *fname2;
+        struct stat st;
         int fd, rc;
 
         if (argc < 2 || argc > 3) {
         int fd, rc;
 
         if (argc < 2 || argc > 3) {
@@ -20,11 +22,11 @@ int main(int argc, char **argv)
                 exit(1);
         }
 
                 exit(1);
         }
 
-       fname = argv[1];
-       if (argc == 3)
-               fname2 = argv[2];
-       else
-               fname2 = argv[1];
+        fname = argv[1];
+        if (argc == 3)
+                fname2 = argv[2];
+        else
+                fname2 = argv[1];
 
         fprintf(stderr, "opening\n");
         fd = open(fname, O_RDWR | O_TRUNC | O_CREAT, 0644);
 
         fprintf(stderr, "opening\n");
         fd = open(fname, O_RDWR | O_TRUNC | O_CREAT, 0644);
@@ -36,50 +38,67 @@ int main(int argc, char **argv)
         fprintf(stderr, "writing\n");
         rc = write(fd, T1, strlen(T1) + 1);
         if (rc != strlen(T1) + 1) {
         fprintf(stderr, "writing\n");
         rc = write(fd, T1, strlen(T1) + 1);
         if (rc != strlen(T1) + 1) {
-                fprintf(stderr, "write (normal) %s\n", strerror(errno));
+                fprintf(stderr, "write (normal) %s (rc %d)\n",
+                        strerror(errno), rc);
+                exit(1);
+        }
+
+        if (argc == 3) {
+                fprintf(stderr, "closing %s\n", fname);
+                rc = close(fd);
+                if (rc) {
+                        fprintf(stderr, "close (normal) %s\n", strerror(errno));
+                        exit(1);
+                }
+
+                fprintf(stderr, "opening %s\n", fname2);
+                fd = open(fname2, O_RDWR);
+                if (fd == -1) {
+                        fprintf(stderr, "open (unlink) %s\n", strerror(errno));
+                        exit(1);
+                }
+
+                fprintf (stderr, "unlinking %s\n", fname2);
+                rc = unlink(fname2);
+                if (rc) {
+                        fprintf(stderr, "unlink %s\n", strerror(errno));
+                        exit(1);
+                }
+
+                if (access(fname2, F_OK) == 0) {
+                        fprintf(stderr, "%s still exists\n", fname2);
+                        exit(1);
+                }
+        } else {
+                fprintf(stderr, "resetting fd offset\n");
+                rc = lseek(fd, 0, SEEK_SET);
+                if (rc) {
+                        fprintf(stderr, "seek %s\n", strerror(errno));
+                        exit(1);
+                }
+
+                printf("unlink %s and press enter\n", fname);
+                getc(stdin);
+        }
+
+        if (access(fname, F_OK) == 0) {
+                fprintf(stderr, "%s still exists\n", fname);
                 exit(1);
         }
 
                 exit(1);
         }
 
-       if (argc == 3) {
-               fprintf(stderr, "closing %s\n", fname);
-               rc = close(fd);
-               if (rc) {
-                       fprintf(stderr, "close (normal) %s\n", strerror(errno));
-                       exit(1);
-               }
-
-               fprintf(stderr, "opening %s\n", fname2);
-               fd = open(fname2, O_RDWR);
-               if (fd == -1) {
-                       fprintf(stderr, "open (unlink) %s\n", strerror(errno));
-                       exit(1);
-               }
-
-               fprintf (stderr, "unlinking %s\n", fname2);
-               rc = unlink(fname2);
-               if (rc) {
-                       fprintf(stderr, "unlink %s\n", strerror(errno));
-                       exit(1);
-               }
-
-               if (access(fname2, F_OK) == 0) {
-                       fprintf(stderr, "%s still exists\n", fname2);
-                       exit(1);
-               }
-       } else {
-               printf("unlink %s and press enter\n", fname);
-               getc(stdin);
-       }
-
-       if (access(fname, F_OK) == 0) {
-               fprintf(stderr, "%s still exists\n", fname);
-               exit(1);
-       }
+        fprintf(stderr, "fstating\n");
+        rc = fstat(fd, &st);
+        if (rc) {
+                fprintf(stderr, "fstat (unlink) %s\n", strerror(errno));
+                exit(1);
+        }
+        if (st.st_nlink != 0)
+                fprintf(stderr, "st_nlink = %d\n", (int)st.st_nlink);
 
         fprintf(stderr, "reading\n");
         rc = read(fd, buf, strlen(T1) + 1);
         if (rc != strlen(T1) + 1) {
 
         fprintf(stderr, "reading\n");
         rc = read(fd, buf, strlen(T1) + 1);
         if (rc != strlen(T1) + 1) {
-                fprintf(stderr, "read (unlink) %s rc %d\n",
+                fprintf(stderr, "read (unlink) %s (rc %d)\n",
                         strerror(errno), rc);
                 exit(1);
         }
                         strerror(errno), rc);
                 exit(1);
         }
@@ -92,7 +111,7 @@ int main(int argc, char **argv)
 
         fprintf(stderr, "truncating\n");
         rc = ftruncate(fd, 0);
 
         fprintf(stderr, "truncating\n");
         rc = ftruncate(fd, 0);
-        if (rc ) {
+        if (rc) {
                 fprintf(stderr, "truncate (unlink) %s\n", strerror(errno));
                 exit(1);
         }
                 fprintf(stderr, "truncate (unlink) %s\n", strerror(errno));
                 exit(1);
         }
@@ -124,8 +143,8 @@ int main(int argc, char **argv)
         fprintf(stderr, "reading again\n");
         rc = read(fd, buf, strlen(T2) + 1);
         if (rc != strlen(T2) + 1) {
         fprintf(stderr, "reading again\n");
         rc = read(fd, buf, strlen(T2) + 1);
         if (rc != strlen(T2) + 1) {
-                fprintf(stderr, "read (after unlink rewrite) %s\n",
-                        strerror(errno));
+                fprintf(stderr, "read (after unlink rewrite) %s (rc %d)\n",
+                        strerror(errno), rc);
                 exit(1);
         }
 
                 exit(1);
         }
 
@@ -135,7 +154,7 @@ int main(int argc, char **argv)
                 exit(1);
         }
 
                 exit(1);
         }
 
-        fprintf(stderr, "closing again\n");
+        fprintf(stderr, "closing\n");
         rc = close(fd);
         if (rc) {
                 fprintf(stderr, "close (unlink) %s\n", strerror(errno));
         rc = close(fd);
         if (rc) {
                 fprintf(stderr, "close (unlink) %s\n", strerror(errno));
index c8f85ee..fefd2d6 100755 (executable)
@@ -22,9 +22,10 @@ CLIENT=${CLIENT:-mdev8}
 NETWORKTYPE=${NETWORKTYPE:-tcp}
 MOUNTPT=${MOUNTPT:-/mnt/lustre}
 CONFIG=${CONFIG:-recovery-cleanup.xml}
 NETWORKTYPE=${NETWORKTYPE:-tcp}
 MOUNTPT=${MOUNTPT:-/mnt/lustre}
 CONFIG=${CONFIG:-recovery-cleanup.xml}
-MDSDEV=${MDSDEV:-/tmp/mds}
-OSTDEV=${OSTDEV:-/tmp/ost}
+MDSDEV=${MDSDEV:-/tmp/mds-`hostname`}
 MDSSIZE=${MDSSIZE:-100000}
 MDSSIZE=${MDSSIZE:-100000}
+FSTYPE=${FSTYPE:-ext3}
+OSTDEV=${OSTDEV:-/tmp/ost-`hostname`}
 OSTSIZE=${OSTSIZE:-100000}
 
 do_mds() {
 OSTSIZE=${OSTSIZE:-100000}
 
 do_mds() {
@@ -51,10 +52,10 @@ make_config() {
        lmc -m $CONFIG --add net --node $NODE --nid `h2$NETWORKTYPE $NODE` \
            --nettype $NETWORKTYPE || exit 4
     done
        lmc -m $CONFIG --add net --node $NODE --nid `h2$NETWORKTYPE $NODE` \
            --nettype $NETWORKTYPE || exit 4
     done
-    lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV \
-        --size $MDSSIZE || exit 5
-    lmc -m $CONFIG --add ost --node $OSTNODE --ost ost1 --dev $OSTDEV \
-        --size $OSTSIZE || exit 6
+    lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --fstype $FSTYPE \
+       --dev $MDSDEV --size $MDSSIZE || exit 5
+    lmc -m $CONFIG --add ost --node $OSTNODE --ost ost1 --fstype $FSTYPE \
+       --dev $OSTDEV --size $OSTSIZE || exit 6
     lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --mds mds1 \
         --ost ost1 || exit 7
 }
     lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --mds mds1 \
         --ost ost1 || exit 7
 }
index ebf0a0c..bc6a9c1 100755 (executable)
@@ -25,9 +25,9 @@ CLIENT=${CLIENT:-mdev8}
 NETWORKTYPE=${NETWORKTYPE:-tcp}
 MOUNTPT=${MOUNTPT:-/mnt/lustre}
 CONFIG=${CONFIG:-recovery-small.xml}
 NETWORKTYPE=${NETWORKTYPE:-tcp}
 MOUNTPT=${MOUNTPT:-/mnt/lustre}
 CONFIG=${CONFIG:-recovery-small.xml}
-MDSDEV=${MDSDEV:-/tmp/mds}
-OSTDEV=${OSTDEV:-/tmp/ost}
+MDSDEV=${MDSDEV:-/tmp/mds-`hostname`}
 MDSSIZE=${MDSSIZE:-100000}
 MDSSIZE=${MDSSIZE:-100000}
+OSTDEV=${OSTDEV:-/tmp/ost-`hostname`}
 OSTSIZE=${OSTSIZE:-100000}
 UPCALL=${UPCALL:-$RPWD/recovery-small-upcall.sh}
 FSTYPE=${FSTYPE:-ext3}
 OSTSIZE=${OSTSIZE:-100000}
 UPCALL=${UPCALL:-$RPWD/recovery-small-upcall.sh}
 FSTYPE=${FSTYPE:-ext3}
index 3ba9368..4ea020f 100644 (file)
-#!/usr/bin/perl
+#!/usr/bin/perl -w
 use strict;
 use strict;
+$|++;
+
+$ENV{PATH}="/bin:/usr/bin";
+$ENV{ENV}="";
+$ENV{BASH_ENV}="";
+
 use diagnostics;
 use Getopt::Long;
 use diagnostics;
 use Getopt::Long;
+use POSIX ":sys_wait_h";
 
 
-sub usage () {
-    print "Usage: $0 <mount point prefix> <iterations>\n";
-    print "example: $0 --count=2 /mnt/lustre 50\n";
-    print "         will test in /mnt/lustre1 and /mnt/lustre2\n";
-    print "         $0 --count=0 /mnt/lustre 50\n";
-    print "         will test in /mnt/lustre only\n";
-    exit;
-}
-my ($j, $k, $d, $f1, $f2, $path, $silent);
-my $count = 0;
-my $create = 10;
+use vars qw(
+            $MAX_THREADS
+            );
+# Don't try to run more than this many threads concurrently.
+$MAX_THREADS = 16;
+
+# Initialize variables
+my $silent = 0;
+my $create_files = 1; # should we create files or not?
+my $use_mcreate = 1;  # should we use mcreate or open?
+my $num_dirs = 3;     # number of directories to create
+my $num_files = 6;    # number of files to create
+my $iterations = 1;
+my $num_threads = 1;
+my $mountpt;
+my $num_mounts = -1;
 
 GetOptions("silent!"=> \$silent,
 
 GetOptions("silent!"=> \$silent,
-           "count=i" => \$count,
-           "create=i" => \$create);
+          "use_mcreate=i" => \$use_mcreate,
+           "create_files=i" => \$create_files,
+          "use_mcreate=i" => \$use_mcreate,
+          "num_files=i" => \$num_files,
+          "num_dirs=i" => \$num_dirs,
+          "mountpt=s" => \$mountpt,
+           "num_mounts=i" => \$num_mounts,
+          "iterations=i" => \$iterations,
+           "num_threads=i" => \$num_threads,
+           ) || die &usage;
 
 
-my $mtpt = shift || usage();
-my $i = shift || usage();
-my $total = $i;
-my $files = 6;
-my $dirs = 3;
-my $mcreate = 0; # should we use mcreate or open?
+# Check for mandatory args.
+if (!$mountpt ||
+    !$num_mounts) {
+    die &usage;
+}
 
 
-my $which = "";
-if ($count > 0) {
-    $which = int(rand() * $count) + 1;
+if ($num_threads > $MAX_THREADS) {
+    print "\nMAX_THREADS is currently set to $MAX_THREADS.\n\n";
+    print "You will have to change this in the source\n";
+    print "if you really want to run with $num_threads threads.\n\n";
+    exit 1;
 }
 
 }
 
-$k = $dirs;
-if ($create == 0) {
-    $k = 0;
+# Initialize rand() function.
+srand (time ^ $$ ^ unpack "%L*", `ps axww | gzip`);
+
+#########################################################################
+### MAIN
+
+my $which = "";
+if ($num_mounts > 0) {
+    $which = int(rand() * $num_mounts) + 1;
 }
 }
-while ($k--) {
-    $path = "$mtpt$which/$k";
-    my $rc = mkdir $path, 0755;
-    print "mkdir $path failed: $!\n" if !$rc;
-    $j = $files;
-    while ($j--) {
-        `./mcreate $path/$j`;
+
+# Create files and directories (if necessary)
+if ($create_files) {
+    for (my $i=1; $i<=$num_threads;$i++) {
+       for (my $j=0; $j<$num_dirs;$j++) {
+           my $path = "${mountpt}${which}/${i}.${j}";
+           mkdir $path, 0755 || die "Can't mkdir $path: $!\n";
+           for (my $k=0; $k<$num_files; $k++) {
+               my $filepath = "${path}/${k}";
+               &create_file($filepath);
+               if (! -e $filepath) {
+                   die "Error creating $filepath\n";
+               }
+           }
+       }
     }
 }
 
     }
 }
 
-while ($i--) {
-    my $which = "";
-    if ($count > 0) {
-        $which = int(rand() * $count) + 1;
-    }
-    $d = int(rand() * $dirs);
-    $f1 = int(rand() * $files);
-    $f2 = int(rand() * $files);
-    print "[$$] $mtpt$which/$d/$f1 $mtpt$which/$d/$f2 ...\n" if !$silent;
-    my $rc = rename "$mtpt$which/$d/$f1", "$mtpt$which/$d/$f2";
-    print "[$$] done: $rc\n" if !$silent;
-    if (($total - $i) % 100 == 0) {
-        print STDERR "[" . $$ . "]" . ($total - $i) . " operations\n";
+for (my $i=1; $i<=$num_threads; $i++) {
+    my $status = &fork_and_rename($i);
+    last if ($status != 0);
+}
+
+# Wait for all our threads to finish.
+# Wait for all our threads to finish.
+my $child = 0;
+do {
+    $child = waitpid(-1, WNOHANG);
+} until $child > 0;
+sleep 1;
+
+# Unlink files and directories (if necessary)
+if ($create_files) {
+    for (my $i=1; $i<=$num_threads;$i++) {
+       for (my $j=0; $j<$num_dirs;$j++) {
+           my $path = "${mountpt}${which}/${i}.${j}";
+           for (my $k=0; $k<=$num_files; $k++) {
+               my $filepath = "${path}/${k}";
+               unlink("$filepath") if (-e $filepath);
+           }
+           my $rc = rmdir $path;
+           print "rmdir $path failed: $!\n" if !$rc;       
+       }
     }
 }
 
     }
 }
 
-$k = $dirs;
-if ($create == 0) {
-    $k = 0;
+exit 0;
+
+#########################################################################
+### SUBROUTINES
+
+sub usage () {
+    print "\nUsage: $0 [--silent] [--create_files=n] [--use_mcreate=n] [--num_dirs=n] [--num_files=n] [--iterations=n] [--num_threads=n] --num_mounts=n --mountpt=/path/to/lustre/mount\n\n";
+    print "\t--silent\tminimal output\n";
+    print "\t--create_files=n\create files at start, default=1 (yes)\n";
+    print "\t--use_mcreate=n\tuse mcreate to create files, default=1 (yes)\n";
+    print "\t--num_dirs=n\tnumber of directories to create per iteration, default=3\n";
+    print "\t--num_files=n\tnumber of files to create per directory, default=6\n";
+    print "\t--iterations=n\tnumber of iterations to perform, default=1\n";
+    print "\t--num_threads=n\tnumber of thread to run, default=1\n";
+    print "\t--mountpt\tlocation of lustre mount\n";
+    print "\t--num_mounts=n\tnumber of lustre mounts to test across, default=-1 (single mount point without numeric suffix)\n\n";
+    print "example: $0 --mountpt=/mnt/lustre --num_mounts=2 --iterations=50\n";
+    print "         will perform 50 interations in /mnt/lustre1 and /mnt/lustre2\n";
+    print "         $0 --mountpt=/mnt/lustre --num_mounts=-1 --iterations=50\n";
+    print "         will perform 50 iterations in /mnt/lustre only\n\n";
+    exit;
 }
 }
-while ($k--) {
-    $path = "$mtpt$which/$k";
-    $j = $files;
-    while ($j--) {
-        unlink "$path/$j";
+
+
+#########################################################################
+sub create_file ($) {
+    my ($path) = @_;;
+    
+    if ($use_mcreate) {
+        my $tmp = `./mcreate $path`;
+       if ($tmp =~ /.*error: (.*)\n/) {
+           die "Error mcreating $path: $!\n";
+       }
+    } else {
+        open(FH, ">$path") || die "Error opening $path: $!\n";
+        close(FH) || die;
     }
     }
-    my $rc = rmdir $path;
-    print "rmdir $path failed: $!\n" if !$rc;
+    return 0;
 }
 
 }
 
-print "Done.\n";
+#########################################################################
+sub fork_and_rename ($) {
+    my ($thread_num) = @_;
+    
+  FORK: {
+      if (my $pid = fork) {
+          # parent here
+          # child process pid is available in $pid
+         return 0;
+      } elsif (defined $pid) { # $pid is zero here if defined
+         
+         my $current_iteration=1;
+          while ($current_iteration <= $iterations) {
+             for (my $i=0; $i<$num_files; $i++) {
+                 my $which = "";
+                 if ($num_mounts > 0) {
+                     $which = int(rand() * $num_mounts) + 1;
+                 }
+                 
+                 my $d = int(rand() * $num_dirs);
+                 my $f1 = int(rand() * $num_files);
+                 my $f2 = int(rand() * $num_files);
+                 my $path_f1 = "${mountpt}${which}/${thread_num}.${d}/${f1}";
+                 my $path_f2 = "${mountpt}${which}/${thread_num}.${d}/${f2}";
+                 
+                 print "Thread $thread_num: [$$] $path_f1 $path_f2 ...\n" if !$silent;
+                 my $rc = rename $path_f1, $path_f2;
+                 print "Thread $thread_num: [$$] done: $rc\n" if !$silent;
+             }
+             if (($current_iteration) % 100 == 0) {
+                 print STDERR "Thread $thread_num: " . $current_iteration . " operations [" . $$ . "]\n";
+                 
+             }
+             $current_iteration++;
+         }
+
+         print "Thread $thread_num: Done.\n";
+
+         exit 0;
+
+      } elsif ($! =~ /No more process/) {
+          # EAGAIN, supposedly recoverable fork error
+          sleep 5;
+          redo FORK;
+      } else {
+          # weird fork error
+          die "Can't fork: $!\n";
+      }
+  }
+    
+}
index 20981e8..1e859aa 100644 (file)
@@ -7,40 +7,39 @@
 #include <string.h>
 #include <errno.h>
 #include <sys/types.h>
 #include <string.h>
 #include <errno.h>
 #include <sys/types.h>
+#include <grp.h>
 #include <sys/wait.h>
 
 #define DEBUG 0
 
 #include <sys/wait.h>
 
 #define DEBUG 0
 
-void Usage_and_abort(void)
+static const char usage[] =
+"Usage: %s -u user_id [-g grp_id ] [ -G ] command\n"
+"  -u user_id      switch to UID user_id\n"
+"  -g grp_id       switch to GID grp_id\n"
+"  -G              clear supplementary groups\n";
+
+void Usage_and_abort(const char *name)
 {
 {
-       fprintf(stderr, "Usage: runas -u user_id [ -g grp_id ]"
-               " command_to_be_run \n");
-       exit(-1);
+        fprintf(stderr, usage, name);
+        exit(-1);
 }
 
 }
 
-// Usage: runas -u user_id [ -g grp_id ] [--] command_to_be_run
-// return: the return value of "command_to_be_run"
-// NOTE: returning -1 might be the return code of this program itself or
-// the "command_to_be_run"
-
-// ROOT runs "runas" for free
-// Other users run "runas" requires  chmod 6755 "command_to_be_run"
-
 int main(int argc, char **argv)
 {
 int main(int argc, char **argv)
 {
-        char **my_argv;
+        char **my_argv, *name = argv[0];
         int status;
         int c,i;
         int gid_is_set = 0;
         int uid_is_set = 0;
         int status;
         int c,i;
         int gid_is_set = 0;
         int uid_is_set = 0;
+        int clear_supp_groups = 0;
         uid_t user_id;
         gid_t grp_id;
 
         if (argc == 1)
         uid_t user_id;
         gid_t grp_id;
 
         if (argc == 1)
-                Usage_and_abort();
+                Usage_and_abort(name);
 
         // get UID and GID
 
         // get UID and GID
-        while ((c = getopt (argc, argv, "+u:g:h")) != -1) {
+        while ((c = getopt (argc, argv, "+u:g:hG")) != -1) {
                 switch (c) {
                 case 'u':
                         user_id = (uid_t)atoi(optarg);
                 switch (c) {
                 case 'u':
                         user_id = (uid_t)atoi(optarg);
@@ -54,23 +53,23 @@ int main(int argc, char **argv)
                         gid_is_set = 1;
                         break;
 
                         gid_is_set = 1;
                         break;
 
-                case 'h':
-                        Usage_and_abort();
+                case 'G':
+                        clear_supp_groups = 1;
                         break;
 
                 default:
                         break;
 
                 default:
-                        //fprintf(stderr, "Bad parameters.\n");
-                        //Usage_and_abort ();
+                case 'h':
+                        Usage_and_abort(name);
                         break;
                 }
         }
 
         if (!uid_is_set)
                         break;
                 }
         }
 
         if (!uid_is_set)
-                Usage_and_abort();
+                Usage_and_abort(name);
 
         if (optind == argc) {
 
         if (optind == argc) {
-                fprintf(stderr, "Bad parameters.\n");
-                Usage_and_abort();
+                fputs("Must specify command to run.\n", stderr);
+                Usage_and_abort(name);
         }
 
         // assemble the command
         }
 
         // assemble the command
@@ -99,6 +98,14 @@ int main(int argc, char **argv)
                  exit(-1);
         }
 
                  exit(-1);
         }
 
+        if (clear_supp_groups) {
+                status = setgroups(0, NULL);
+                if (status == -1) {
+                        perror("clearing supplementary groups");
+                        exit(-1);
+                }
+        }
+        
         // set UID
         status = setreuid(user_id, user_id );
         if(status == -1) {
         // set UID
         status = setreuid(user_id, user_id );
         if(status == -1) {
@@ -107,8 +114,8 @@ int main(int argc, char **argv)
                   exit(-1);
         }
 
                   exit(-1);
         }
 
-
-        fprintf(stderr, "running as USER(%d), Grp (%d):  ", user_id, grp_id );
+        fprintf(stderr, "running as UID %d, GID %d%s:", user_id, grp_id,
+                clear_supp_groups ? ", cleared groups" : "");
 
         for (i = 0; i < argc - optind; i++)
                  fprintf(stderr, " [%s]", my_argv[i]);
 
         for (i = 0; i < argc - optind; i++)
                  fprintf(stderr, " [%s]", my_argv[i]);
index cb417d2..821ac46 100755 (executable)
@@ -1,6 +1,6 @@
 #!/bin/sh
 #!/bin/sh
-
-DIR=${DIR:-/mnt/lustre/`hostname`}
+MNT=${MNT:-/mnt/lustre}
+DIR=${DIR:-$MNT/`hostname`}
 #[ -e /proc/sys/portals/debug ] && echo 0 > /proc/sys/portals/debug 
 mkdir -p $DIR
 TGT=$DIR/client.txt
 #[ -e /proc/sys/portals/debug ] && echo 0 > /proc/sys/portals/debug 
 mkdir -p $DIR
 TGT=$DIR/client.txt
index 886ce8f..ad60d6d 100644 (file)
@@ -1,7 +1,7 @@
 #!/bin/sh
 PATH=`dirname $0`/../utils:$PATH
 
 #!/bin/sh
 PATH=`dirname $0`/../utils:$PATH
 
-obdstat filter 1 | while read LINE; do
+llobdstat.pl $1 1 | while read LINE; do
        echo "`date +s`: $LINE"
        echo "`date +s`: $LINE"
-       [ "$1" ] && echo "`date +s`: $LINE" >> $1
+       [ "$2" ] && echo "`date +s`: $LINE" >> $2
 done
 done
index 4d86248..395ceb5 100644 (file)
@@ -1,6 +1,6 @@
 #!/bin/sh
 SRCDIR="`dirname $0`/"
 #!/bin/sh
 SRCDIR="`dirname $0`/"
-export PATH=/sbin:/usr/sbin:$SRCDIR:$PATH
+export PATH=/sbin:/usr/sbin:$SRCDIR/../utils:$PATH
 
 LOOPS=${LOOPS:-1}
 COUNT=${COUNT:-1000000}
 
 LOOPS=${LOOPS:-1}
 COUNT=${COUNT:-1000000}
index e59f5f4..6a8aac8 100755 (executable)
@@ -35,41 +35,42 @@ while [ "$1" ]; do
        shift
 done
 
        shift
 done
 
-OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
-if [ -z "$OSCMT" ]; then
+MOUNT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
+if [ -z "$MOUNT" ]; then
        sh llmount.sh
        sh llmount.sh
-       OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
-       [ -z "$OSCMT" ] && fail "no lustre filesystem mounted" 1
+       MOUNT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
+       [ -z "$MOUNT" ] && fail "no lustre filesystem mounted" 1
        I_MOUNTED="yes"
 fi
 
        I_MOUNTED="yes"
 fi
 
-OSCTMP=`echo $OSCMT | tr "/" "."`
+OSCTMP=`echo $MOUNT | tr "/" "."`
 USED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
 USED=`expr $USED + 16` # Some space for the status file
 
 # let's start slowly here...
 USED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
 USED=`expr $USED + 16` # Some space for the status file
 
 # let's start slowly here...
-log "touching $OSCMT"
-touch $OSCMT || fail "can't touch $OSCMT" 2
-HOSTS=$OSCMT/hosts.$$
-
-# this will cause the following cp to trigger bug #620096
-log "create an empty file $HOSTS"
-mcreate $HOSTS
-
-log "copying /etc/hosts to $HOSTS"
-cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS" 3
-log "comparing /etc/hosts and $HOSTS"
-diff -u /etc/hosts $HOSTS || fail "$HOSTS different" 4
-log "renaming $HOSTS to $HOSTS.ren"
-mv $HOSTS $HOSTS.ren || fail "can't rename $HOSTS to $HOSTS.ren" 5
-log "copying /etc/hosts to $HOSTS again"
-cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS again" 6
-log "truncating $HOSTS"
-> $HOSTS || fail "can't truncate $HOSTS" 8
-log "removing $HOSTS"
-rm $HOSTS || fail "can't remove $HOSTS" 9
-
-DST=$OSCMT/runtest.$$
+log "touching $MOUNT"
+touch $MOUNT || fail "can't touch $MOUNT" 2
+HOSTS=$MOUNT/hosts.$$
+
+if [ $COUNT -gt 10 -o $COUNT -eq 0 ]; then
+       # this will cause the following cp to trigger bug #620096
+       log "create an empty file $HOSTS"
+       mcreate $HOSTS
+       log "copying /etc/hosts to $HOSTS"
+       cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS" 3
+       log "comparing /etc/hosts and $HOSTS"
+       diff -u /etc/hosts $HOSTS || fail "$HOSTS different" 4
+       log "renaming $HOSTS to $HOSTS.ren"
+       mv $HOSTS $HOSTS.ren || fail "can't rename $HOSTS to $HOSTS.ren" 5
+       log "copying /etc/hosts to $HOSTS again"
+       cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS again" 6
+       log "truncating $HOSTS"
+       > $HOSTS || fail "can't truncate $HOSTS" 8
+       log "removing $HOSTS"
+       rm $HOSTS || fail "can't remove $HOSTS" 9
+fi
+
+DST=$MOUNT/runtest.$$
 # let's start slowly here...
 log "creating $DST"
 mkdir $DST || fail "can't mkdir $DST" 10
 # let's start slowly here...
 log "creating $DST"
 mkdir $DST || fail "can't mkdir $DST" 10
@@ -102,27 +103,29 @@ done
 sh llmountcleanup.sh || exit 19
 sh llrmount.sh || exit 20
 
 sh llmountcleanup.sh || exit 19
 sh llrmount.sh || exit 20
 
-log "renaming $HOSTS.ren to $HOSTS"
-mv $HOSTS.ren $HOSTS || fail "can't rename $HOSTS.ren to $HOSTS" 32
-log "truncating $HOSTS"
-> $HOSTS || fail "can't truncate $HOSTS" 34
-log "removing $HOSTS"
-rm $HOSTS || fail "can't remove $HOSTS again" 36
 log "removing $DST"
 rm -r $V $DST || fail "can't remove $DST" 37
 
 log "removing $DST"
 rm -r $V $DST || fail "can't remove $DST" 37
 
+if [ $COUNT -gt 10 -o $COUNT -eq 0 ]; then
+       log "renaming $HOSTS.ren to $HOSTS"
+       mv $HOSTS.ren $HOSTS || fail "can't rename $HOSTS.ren to $HOSTS" 32
+       log "truncating $HOSTS"
+       > $HOSTS || fail "can't truncate $HOSTS" 34
+       log "removing $HOSTS"
+       rm $HOSTS || fail "can't remove $HOSTS again" 36
+fi
+
 # mkdirmany test (bug 589)
 # mkdirmany test (bug 589)
-log "running mkdirmany $OSCMT/base$$ 100"
-$MKDIRMANY $OSCMT/base$$ 100 || fail "mkdirmany failed"
+log "running mkdirmany $MOUNT/base$$ 100"
+$MKDIRMANY $MOUNT/base$$ 100 || fail "mkdirmany failed"
 log "removing mkdirmany directories"
 log "removing mkdirmany directories"
-rmdir $OSCMT/base$$* || fail "mkdirmany cleanup failed"
+rmdir $MOUNT/base$$* || fail "mkdirmany cleanup failed"
 
 log "done"
 
 NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
 
 log "done"
 
 NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
-if [ $NOWUSED -gt $USED ]; then
+if [ `expr $NOWUSED - $USED` -gt 1024 ]; then
        echo "Space not all freed: now ${NOWUSED}kB, was ${USED}kB." 1>&2
        echo "Space not all freed: now ${NOWUSED}kB, was ${USED}kB." 1>&2
-       echo "This is normal on BA OSTs, because of subdirectories." 1>&2
 fi
 
 if [ "$I_MOUNTED" = "yes" ]; then
 fi
 
 if [ "$I_MOUNTED" = "yes" ]; then
index b04d84c..f414ccc 100755 (executable)
@@ -1,6 +1,6 @@
 #!/bin/sh
 vmstat 1 | while read LINE ; do
        LINE="`date +%s`: $LINE"
 #!/bin/sh
 vmstat 1 | while read LINE ; do
        LINE="`date +%s`: $LINE"
-       echo $LINE
-       [ "$1" ] && echo $LINE >> $1
+       echo "$LINE"
+       [ "$1" ] && echo "$LINE" >> $1
 done
 done
index 46d0072..09eb8e9 100644 (file)
@@ -7,17 +7,19 @@
 set -e
 
 ONLY=${ONLY:-"$*"}
 set -e
 
 ONLY=${ONLY:-"$*"}
-ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"34 35"}        # bugs 1365 and 1360 respectively
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"35 32q 37 39"} # bugs 1360, 1504
 
 SRCDIR=`dirname $0`
 PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
 
 
 SRCDIR=`dirname $0`
 PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
 
-CHECKSTAT=${CHECKSTAT:-"./checkstat -v"}
+CHECKSTAT=${CHECKSTAT:-"checkstat -v"}
 CREATETEST=${CREATETEST:-createtest}
 LFIND=${LFIND:-lfind}
 LSTRIPE=${LSTRIPE:-lstripe}
 LCTL=${LCTL:-lctl}
 MCREATE=${MCREATE:-mcreate}
 CREATETEST=${CREATETEST:-createtest}
 LFIND=${LFIND:-lfind}
 LSTRIPE=${LSTRIPE:-lstripe}
 LCTL=${LCTL:-lctl}
 MCREATE=${MCREATE:-mcreate}
+OPENFILE=${OPENFILE:-openfile}
+OPENUNLINK=${OPENUNLINK:-openunlink}
 TOEXCL=${TOEXCL:-toexcl}
 TRUNCATE=${TRUNCATE:-truncate}
 
 TOEXCL=${TOEXCL:-toexcl}
 TRUNCATE=${TRUNCATE:-truncate}
 
@@ -29,22 +31,20 @@ else
        RUNAS=${RUNAS:-"runas -u $RUNAS_ID"}
 fi
 
        RUNAS=${RUNAS:-"runas -u $RUNAS_ID"}
 fi
 
-MOUNT=${MOUNT:-/mnt/lustre}
-DIR=${DIR:-$MOUNT}
-export NAME=$NAME
+export NAME=${NAME:-local}
 
 SAVE_PWD=$PWD
 
 clean() {
 
 SAVE_PWD=$PWD
 
 clean() {
-        echo -n "cln.."
-        sh llmountcleanup.sh > /dev/null || exit 20
+       echo -n "cln.."
+       sh llmountcleanup.sh > /dev/null || exit 20
 }
 }
-
 CLEAN=${CLEAN:-clean}
 CLEAN=${CLEAN:-clean}
+
 start() {
 start() {
-        echo -n "mnt.."
-        sh llrmount.sh > /dev/null || exit 10
-        echo "done"
+       echo -n "mnt.."
+       sh llrmount.sh > /dev/null || exit 10
+       echo "done"
 }
 START=${START:-start}
 
 }
 START=${START:-start}
 
@@ -54,7 +54,7 @@ log() {
 }
 
 run_one() {
 }
 
 run_one() {
-       if ! mount | grep -q $MOUNT; then
+       if ! mount | grep -q $DIR; then
                $START
        fi
        log "== test $1: $2"
                $START
        fi
        log "== test $1: $2"
@@ -87,23 +87,33 @@ run_test() {
 }
 
 error() { 
 }
 
 error() { 
-    echo FAIL
-    exit 1
+       echo "FAIL: $@"
+       exit 1
 }
 
 pass() { 
 }
 
 pass() { 
-    echo PASS
+       echo PASS
 }
 
 }
 
-if ! mount | grep $MOUNT; then
+MOUNT="`mount | awk '/^'$NAME' .* lustre_lite / { print $3 }'`"
+if [ -z "$MOUNT" ]; then
        sh llmount.sh
        sh llmount.sh
+       MOUNT="`mount | awk '/^'$NAME' .* lustre_lite / { print $3 }'`"
+       [ -z "$MOUNT" ] && error "NAME=$NAME not mounted"
        I_MOUNTED=yes
 fi
 
        I_MOUNTED=yes
 fi
 
+[ `echo $MOUNT | wc -w` -gt 1 ] && error "NAME=$NAME mounted more than once"
+
+DIR=${DIR:-$MOUNT}
+[ -z "`echo $DIR | grep $MOUNT`" ] && echo "$DIR not in $MOUNT" && exit 99
+
+rm -rf $DIR/[Rdfs][1-9]*
+
 echo preparing for tests involving mounts
 echo preparing for tests involving mounts
-EXT2_DEV=/tmp/SANITY.LOOP
-dd if=/dev/zero of=$EXT2_DEV bs=1k seek=1000 count=1 > /dev/null
-mke2fs -F $EXT2_DEV > /dev/null
+EXT2_DEV=${EXT2_DEV:-/tmp/SANITY.LOOP}
+touch $EXT2_DEV
+mke2fs -F $EXT2_DEV 1000 > /dev/null
 
 test_0() {
        touch $DIR/f
 
 test_0() {
        touch $DIR/f
@@ -178,12 +188,49 @@ test_5() {
 }
 run_test 5 "mkdir .../d5 .../d5/d2; chmod .../d5/d2 ============"
 
 }
 run_test 5 "mkdir .../d5 .../d5/d2; chmod .../d5/d2 ============"
 
-test_6() {
-       touch $DIR/f6
-       chmod 0666 $DIR/f6
-       $CHECKSTAT -t file -p 0666 $DIR/f6 || error
+test_6a() {
+       touch $DIR/f6a
+       chmod 0666 $DIR/f6a || error
+       $CHECKSTAT -t file -p 0666 -u \#$UID $DIR/f6a || error
+}
+run_test 6a "touch .../f6a; chmod .../f6a ======================"
+
+test_6b() {
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 6b" && return
+       $RUNAS chmod 0444 $DIR/f6a && error
+       $CHECKSTAT -t file -p 0666 -u \#$UID $DIR/f6a || error
+}
+run_test 6b "$RUNAS chmod .../f6a (should return error) =="
+
+test_6c() {
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 6c" && return
+       touch $DIR/f6c
+       chown $RUNAS_ID $DIR/f6c || error
+       $CHECKSTAT -t file -u \#$RUNAS_ID $DIR/f6c || error
+}
+run_test 6c "touch .../f6c; chown .../f6c ======================"
+
+test_6d() {
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 6d" && return
+       $RUNAS chown $UID $DIR/f6c && error
+       $CHECKSTAT -t file -u \#$RUNAS_ID $DIR/f6c || error
 }
 }
-run_test 6 "touch .../f6; chmod .../f6 ========================="
+run_test 6d "$RUNAS chown .../f6c (should return error) =="
+
+test_6e() {
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 6e" && return
+       touch $DIR/f6e
+       chgrp $RUNAS_ID $DIR/f6e || error
+       $CHECKSTAT -t file -u \#$UID -g \#$RUNAS_ID $DIR/f6e || error
+}
+run_test 6e "touch .../f6e; chgrp .../f6e ======================"
+
+test_6f() {
+       [ $RUNAS_ID -eq $UID ] && echo "skipping test 6f" && return
+       $RUNAS chgrp $UID $DIR/f6e && error
+       $CHECKSTAT -t file -u \#$UID -g \#$RUNAS_ID $DIR/f6e || error
+}
+run_test 6f "$RUNAS chgrp .../f6e (should return error) =="
 
 test_7a() {
        mkdir $DIR/d7
 
 test_7a() {
        mkdir $DIR/d7
@@ -357,7 +404,7 @@ test_23() {
 run_test 23 "O_CREAT|O_EXCL in subdir =========================="
 
 test_24a() {
 run_test 23 "O_CREAT|O_EXCL in subdir =========================="
 
 test_24a() {
-       echo '============ rename sanity ================================='
+       echo '== rename sanity =============================================='
        echo '-- same directory rename'
        mkdir $DIR/R1
        touch $DIR/R1/f
        echo '-- same directory rename'
        mkdir $DIR/R1
        touch $DIR/R1/f
@@ -440,7 +487,7 @@ test_24i() {
        $CHECKSTAT -t dir  $DIR/R9/a || error
        $CHECKSTAT -a file $DIR/R9/a/f || error
 }
        $CHECKSTAT -t dir  $DIR/R9/a || error
        $CHECKSTAT -a file $DIR/R9/a/f || error
 }
-run_test 24i "rename file to dir error: touch f ; mkdir a ; rename f a ====="
+run_test 24i "rename file to dir error: touch f ; mkdir a ; rename f a"
 
 test_24j() {
        mkdir $DIR/R10
 
 test_24j() {
        mkdir $DIR/R10
@@ -452,7 +499,7 @@ test_24j() {
 run_test 24j "source does not exist ============================" 
 
 test_25a() {
 run_test 24j "source does not exist ============================" 
 
 test_25a() {
-       echo '== symlink sanity ======================================='
+       echo '== symlink sanity ============================================='
        mkdir $DIR/d25
        ln -s d25 $DIR/s25
        touch $DIR/s25/foo || error
        mkdir $DIR/d25
        ln -s d25 $DIR/s25
        touch $DIR/s25/foo || error
@@ -473,7 +520,8 @@ test_26a() {
 run_test 26a "multiple component symlink ======================="
 
 test_26b() {
 run_test 26a "multiple component symlink ======================="
 
 test_26b() {
-       ln -s d26/d26-2/foo $DIR/s26-2
+       mkdir -p $DIR/d26b/d26-2
+       ln -s d26b/d26-2/foo $DIR/s26-2
        touch $DIR/s26-2 || error
 }
 run_test 26b "multiple component symlink at end of lookup ======"
        touch $DIR/s26-2 || error
 }
 run_test 26b "multiple component symlink at end of lookup ======"
@@ -500,12 +548,12 @@ test_26e() {
 run_test 26e "unlink multiple component recursive symlink ======"
 
 test_27a() {
 run_test 26e "unlink multiple component recursive symlink ======"
 
 test_27a() {
-       echo '== stripe sanity ========================================'
+       echo '== stripe sanity =============================================='
        mkdir $DIR/d27
        $LSTRIPE $DIR/d27/f0 8192 0 1
        $CHECKSTAT -t file $DIR/d27/f0
        pass
        mkdir $DIR/d27
        $LSTRIPE $DIR/d27/f0 8192 0 1
        $CHECKSTAT -t file $DIR/d27/f0
        pass
-       log "test_27b: write to one stripe file ========================="
+       log "== test_27b: write to one stripe file ========================="
        cp /etc/hosts $DIR/d27/f0
 }
 run_test 27a "one stripe file =================================="
        cp /etc/hosts $DIR/d27/f0
 }
 run_test 27a "one stripe file =================================="
@@ -513,7 +561,7 @@ run_test 27a "one stripe file =================================="
 test_27c() {
        $LSTRIPE $DIR/d27/f01 8192 0 2
        pass
 test_27c() {
        $LSTRIPE $DIR/d27/f01 8192 0 2
        pass
-       log "test_27d: write to two stripe file file f01 ================"
+       log "== test_27d: write to two stripe file file f01 ================"
        dd if=/dev/zero of=$DIR/d27/f01 bs=4k count=4
 }
 run_test 27c "create two stripe file f01 ======================="
        dd if=/dev/zero of=$DIR/d27/f01 bs=4k count=4
 }
 run_test 27c "create two stripe file f01 ======================="
@@ -537,14 +585,15 @@ run_test 27e "lstripe existing file (should return error) ======"
 test_27f() {
        $LSTRIPE $DIR/d27/fbad 100 1 2 || true
        dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4
 test_27f() {
        $LSTRIPE $DIR/d27/fbad 100 1 2 || true
        dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4
+       $LFIND $DIR/d27/fbad
 }
 run_test 27f "lstripe with bad stripe size (should return error on LOV)"
 
 test_27g() {
        $MCREATE $DIR/d27/fnone || error
        pass
 }
 run_test 27f "lstripe with bad stripe size (should return error on LOV)"
 
 test_27g() {
        $MCREATE $DIR/d27/fnone || error
        pass
-       log "test 27.9: lfind ============================================"
-       $LFIND $DIR/d27
+       log "== test 27h: lfind ============================================"
+       $LFIND $DIR/d27/fnone | grep -q "Has no stripe info" || error
 }
 run_test 27g "mcreate file without objects to test lfind ======="
 
 }
 run_test 27g "mcreate file without objects to test lfind ======="
 
@@ -586,7 +635,7 @@ test_30() {
 run_test 30 "run binary from Lustre (execve) ==================="
 
 test_31() {
 run_test 30 "run binary from Lustre (execve) ==================="
 
 test_31() {
-       ./openunlink $DIR/f31 $DIR/f31 || error
+       $OPENUNLINK $DIR/f31 $DIR/f31 || error
 }
 run_test 31 "open-unlink file =================================="
 
 }
 run_test 31 "open-unlink file =================================="
 
@@ -627,7 +676,7 @@ test_32d() {
        ls -al $DIR/d32d/ext2-mountpoint/../d2/test_dir || error
        umount $DIR/d32d/ext2-mountpoint || error
 }
        ls -al $DIR/d32d/ext2-mountpoint/../d2/test_dir || error
        umount $DIR/d32d/ext2-mountpoint || error
 }
-run_test 32d "open d32d/ext2-mountpoint/../d2/test_dir =========="
+run_test 32d "open d32d/ext2-mountpoint/../d2/test_dir ========="
 
 test_32e() {
        [ -e $DIR/d32e ] && rm -fr $DIR/d32e
 
 test_32e() {
        [ -e $DIR/d32e ] && rm -fr $DIR/d32e
@@ -638,7 +687,7 @@ test_32e() {
        $CHECKSTAT -t link $DIR/d32e/tmp/symlink11 || error
        $CHECKSTAT -t link $DIR/d32e/symlink01 || error
 }
        $CHECKSTAT -t link $DIR/d32e/tmp/symlink11 || error
        $CHECKSTAT -t link $DIR/d32e/symlink01 || error
 }
-run_test 32e "stat d32e/symlink->tmp/symlink->lustre-subdir ====="
+run_test 32e "stat d32e/symlink->tmp/symlink->lustre-subdir ===="
 
 test_32f() {
        [ -e $DIR/d32f ] && rm -fr $DIR/d32f
 
 test_32f() {
        [ -e $DIR/d32f ] && rm -fr $DIR/d32f
@@ -649,7 +698,7 @@ test_32f() {
        ls $DIR/d32f/tmp/symlink11  || error
        ls $DIR/d32f/symlink01 || error
 }
        ls $DIR/d32f/tmp/symlink11  || error
        ls $DIR/d32f/symlink01 || error
 }
-run_test 32f "open d32f/symlink->tmp/symlink->lustre-subdir ====="
+run_test 32f "open d32f/symlink->tmp/symlink->lustre-subdir ===="
 
 test_32g() {
        [ -e $DIR/d32g ] && rm -fr $DIR/d32g
 
 test_32g() {
        [ -e $DIR/d32g ] && rm -fr $DIR/d32g
@@ -687,7 +736,7 @@ test_32i() {
        $CHECKSTAT -t file $DIR/d32i/ext2-mountpoint/../test_file || error  
        umount $DIR/d32i/ext2-mountpoint || error
 }
        $CHECKSTAT -t file $DIR/d32i/ext2-mountpoint/../test_file || error  
        umount $DIR/d32i/ext2-mountpoint || error
 }
-run_test 32i "stat d32i/ext2-mountpoint/../test_file ============"
+run_test 32i "stat d32i/ext2-mountpoint/../test_file ==========="
 
 test_32j() {
        [ -e $DIR/d32j ] && rm -fr $DIR/d32j
 
 test_32j() {
        [ -e $DIR/d32j ] && rm -fr $DIR/d32j
@@ -697,10 +746,10 @@ test_32j() {
        cat $DIR/d32j/ext2-mountpoint/../test_file || error
        umount $DIR/d32j/ext2-mountpoint || error
 }
        cat $DIR/d32j/ext2-mountpoint/../test_file || error
        umount $DIR/d32j/ext2-mountpoint || error
 }
-run_test 32j "open d32j/ext2-mountpoint/../test_file ============"
+run_test 32j "open d32j/ext2-mountpoint/../test_file ==========="
 
 test_32k() {
 
 test_32k() {
-       [ -e $DIR/d32k ] && rm -fr $DIR/d32k
+       rm -fr $DIR/d32k
        mkdir -p $DIR/d32k/ext2-mountpoint 
        mount -t ext2 -o loop $EXT2_DEV $DIR/d32k/ext2-mountpoint  
        mkdir -p $DIR/d32k/d2
        mkdir -p $DIR/d32k/ext2-mountpoint 
        mount -t ext2 -o loop $EXT2_DEV $DIR/d32k/ext2-mountpoint  
        mkdir -p $DIR/d32k/d2
@@ -708,10 +757,10 @@ test_32k() {
        $CHECKSTAT -t file $DIR/d32k/ext2-mountpoint/../d2/test_file || error
        umount $DIR/d32k/ext2-mountpoint || error
 }
        $CHECKSTAT -t file $DIR/d32k/ext2-mountpoint/../d2/test_file || error
        umount $DIR/d32k/ext2-mountpoint || error
 }
-run_test 32k "stat d32k/ext2-mountpoint/../d2/test_file ========="
+run_test 32k "stat d32k/ext2-mountpoint/../d2/test_file ========"
 
 test_32l() {
 
 test_32l() {
-       [ -e $DIR/d32l ] && rm -fr $DIR/d32l
+       rm -fr $DIR/d32l
        mkdir -p $DIR/d32l/ext2-mountpoint 
        mount -t ext2 -o loop $EXT2_DEV $DIR/d32l/ext2-mountpoint || error
        mkdir -p $DIR/d32l/d2
        mkdir -p $DIR/d32l/ext2-mountpoint 
        mount -t ext2 -o loop $EXT2_DEV $DIR/d32l/ext2-mountpoint || error
        mkdir -p $DIR/d32l/d2
@@ -719,10 +768,10 @@ test_32l() {
        cat  $DIR/d32l/ext2-mountpoint/../d2/test_file || error
        umount $DIR/d32l/ext2-mountpoint || error
 }
        cat  $DIR/d32l/ext2-mountpoint/../d2/test_file || error
        umount $DIR/d32l/ext2-mountpoint || error
 }
-run_test 32l "open d32l/ext2-mountpoint/../d2/test_file ========="
+run_test 32l "open d32l/ext2-mountpoint/../d2/test_file ========"
 
 test_32m() {
 
 test_32m() {
-       [ -e $DIR/d32m ] && rm -fr $DIR/d32m
+       rm -fr $DIR/d32m
        mkdir -p $DIR/d32m/tmp    
        TMP_DIR=$DIR/d32m/tmp       
        ln -s $DIR $TMP_DIR/symlink11 
        mkdir -p $DIR/d32m/tmp    
        TMP_DIR=$DIR/d32m/tmp       
        ln -s $DIR $TMP_DIR/symlink11 
@@ -730,10 +779,10 @@ test_32m() {
        $CHECKSTAT -t link $DIR/d32m/tmp/symlink11 || error
        $CHECKSTAT -t link $DIR/d32m/symlink01 || error
 }
        $CHECKSTAT -t link $DIR/d32m/tmp/symlink11 || error
        $CHECKSTAT -t link $DIR/d32m/symlink01 || error
 }
-run_test 32m "stat d32m/symlink->tmp/symlink->lustre-root ======="
+run_test 32m "stat d32m/symlink->tmp/symlink->lustre-root ======"
 
 test_32n() {
 
 test_32n() {
-       [ -e $DIR/d32n ] && rm -fr $DIR/d32n
+       rm -fr $DIR/d32n
        mkdir -p $DIR/d32n/tmp    
        TMP_DIR=$DIR/d32n/tmp       
        ln -s $DIR $TMP_DIR/symlink11 
        mkdir -p $DIR/d32n/tmp    
        TMP_DIR=$DIR/d32n/tmp       
        ln -s $DIR $TMP_DIR/symlink11 
@@ -741,11 +790,11 @@ test_32n() {
        ls -l $DIR/d32n/tmp/symlink11  || error
        ls -l $DIR/d32n/symlink01 || error
 }
        ls -l $DIR/d32n/tmp/symlink11  || error
        ls -l $DIR/d32n/symlink01 || error
 }
-run_test 32n "open d32n/symlink->tmp/symlink->lustre-root ======="
+run_test 32n "open d32n/symlink->tmp/symlink->lustre-root ======"
 
 test_32o() {
 
 test_32o() {
-       [ -e $DIR/d32o ] && rm -fr $DIR/d32o
-       [ -e $DIR/test_file ] && rm -fr $DIR/test_file
+       rm -fr $DIR/d32o
+       rm -f $DIR/test_file
        touch $DIR/test_file 
        mkdir -p $DIR/d32o/tmp    
        TMP_DIR=$DIR/d32o/tmp       
        touch $DIR/test_file 
        mkdir -p $DIR/d32o/tmp    
        TMP_DIR=$DIR/d32o/tmp       
@@ -759,8 +808,8 @@ test_32o() {
 run_test 32o "stat d32o/symlink->tmp/symlink->lustre-root/test_file"
 
 test_32p() {
 run_test 32o "stat d32o/symlink->tmp/symlink->lustre-root/test_file"
 
 test_32p() {
-       [ -e $DIR/d32p ] && rm -fr $DIR/d32p
-       [ -e $DIR/test_file ] && rm -fr $DIR/test_file
+       rm -fr $DIR/d32p
+       rm -f $DIR/test_file
        touch $DIR/test_file 
        mkdir -p $DIR/d32p/tmp    
        TMP_DIR=$DIR/d32p/tmp       
        touch $DIR/test_file 
        mkdir -p $DIR/d32p/tmp    
        TMP_DIR=$DIR/d32p/tmp       
@@ -771,109 +820,220 @@ test_32p() {
 }
 run_test 32p "open d32p/symlink->tmp/symlink->lustre-root/test_file"
 
 }
 run_test 32p "open d32p/symlink->tmp/symlink->lustre-root/test_file"
 
+test_32q() {
+       [ -e $DIR/d32q ] && rm -fr $DIR/d32q
+       mkdir -p $DIR/d32q
+       mount -t ext2 -o loop $EXT2_DEV $DIR/d32q
+       ls $DIR/d32q || error
+       umount $DIR/d32q || error
+}
+run_test 32q "ls a mounted file system ========================="
+
 #   chmod 444 /mnt/lustre/somefile
 #   open(/mnt/lustre/somefile, O_RDWR)
 #   Should return -1
 test_33() {
 #   chmod 444 /mnt/lustre/somefile
 #   open(/mnt/lustre/somefile, O_RDWR)
 #   Should return -1
 test_33() {
-       [ -e $DIR/test_33_file ] && rm -fr $DIR/test_33_file
+       rm -f $DIR/test_33_file
        touch $DIR/test_33_file
        chmod 444 $DIR/test_33_file
        chown $RUNAS_ID $DIR/test_33_file
        touch $DIR/test_33_file
        chmod 444 $DIR/test_33_file
        chown $RUNAS_ID $DIR/test_33_file
-       $RUNAS openfile -f O_RDWR $DIR/test_33_file && error || true
+       $RUNAS $OPENFILE -f O_RDWR $DIR/test_33_file && error || true
 }
 run_test 33 "write file with mode 444 (should return error) ===="
 
 }
 run_test 33 "write file with mode 444 (should return error) ===="
 
-test_34() {
-       $MCREATE $DIR/f
-       $TRUNCATE $DIR/f 100
-       rm $DIR/f
+TEST_34_SIZE=${TEST_34_SIZE:-2000000000000}
+test_34a() {
+       rm -f $DIR/test_34_file
+       $MCREATE $DIR/test_34_file || error
+       $LFIND $DIR/test_34_file | grep -q "Has no stripe information" || error
+       $TRUNCATE $DIR/test_34_file $TEST_34_SIZE || error
+       $LFIND $DIR/test_34_file | grep -q "Has no stripe information" || error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+}
+run_test 34a "truncate file that has not been opened ==========="
+
+test_34b() {
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+       $OPENFILE -f O_RDONLY $DIR/test_34_file
+       $LFIND $DIR/test_34_file | grep -q "Has no stripe information" || error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
 }
 }
-run_test 34 "truncate file that has not been opened ============"
+run_test 34b "O_RDONLY opening file doesn't create objects ====="
+
+test_34c() {
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+       $OPENFILE -f O_RDWR $DIR/test_34_file
+       $LFIND $DIR/test_34_file | grep -q "Has no stripe information" && error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+}
+run_test 34c "O_RDWR opening file-with-size works =============="
+
+test_34d() {
+       dd if=/dev/zero of=$DIR/test_34_file conv=notrunc bs=4k count=1 || error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+       rm $DIR/test_34_file
+}
+run_test 34d "write to sparse file ============================="
+
+test_34e() {
+       rm -f $DIR/test_34_file
+       $MCREATE $DIR/test_34_file || error
+       $TRUNCATE $DIR/test_34_file 1000 || error
+       $CHECKSTAT -s 1000 $DIR/test_34_file || error
+       $OPENFILE -f O_RDWR $DIR/test_34_file
+       $CHECKSTAT -s 1000 $DIR/test_34_file || error
+}
+run_test 34e "create objects, some with size and some without =="
 
 test_35() {
 
 test_35() {
-       [ -e $DIR/test_35_file ] && rm -fr $DIR/test_35_file
        cp /bin/sh $DIR/test_35_file
        chmod 444 $DIR/test_35_file
        chown $RUNAS_ID $DIR/test_35_file
        cp /bin/sh $DIR/test_35_file
        chmod 444 $DIR/test_35_file
        chown $RUNAS_ID $DIR/test_35_file
-       $DIR/test_35_file && error
-       return 0
+       $DIR/test_35_file && error || true
+       rm $DIR/test_35_file
 }
 run_test 35 "exec file with mode 444 (should return error) ====="
 
 test_36a() {
 }
 run_test 35 "exec file with mode 444 (should return error) ====="
 
 test_36a() {
-       log 36  "cvs operations ===================================="
-       mkdir -p $DIR/cvsroot
-       chown $RUNAS_ID $DIR/cvsroot
-       $RUNAS cvs -d $DIR/cvsroot init 
+       sleep 1         # we need a rest, or UMLs clock becomes skewed
+       rm -f $DIR/test_36_file
+       utime $DIR/test_36_file || error
 }
 }
-run_test 36a "cvs init ========================================="
+run_test 36a "MDS utime check (mknod, utime) ==================="
 
 test_36b() {
 
 test_36b() {
-       # on the LLNL clusters, runas will still pick up root's $TMP settings,
-        # which will not be writable for the runas user, and then you get a CVS
-       # error message with a corrupt path string (CVS bug) and panic.
-       # We're not using much space, so just stick it in /tmp, which is
-       # safe.
-       OLDTMPDIR=$TMPDIR
-       OLDTMP=$TMP
-       TMPDIR=/tmp
-       TMP=/tmp
-
-       cd /etc/init.d
-       $RUNAS cvs -d $DIR/cvsroot import -m "nomesg"  reposname vtag rtag
-
-       TMPDIR=$OLDTMPDIR
-       TMP=$OLDTMP
+       sleep 1
+       echo "" > $DIR/test_36_file
+       utime $DIR/test_36_file || error
 }
 }
-run_test 36b "cvs import ======================================="
+run_test 36b "OST utime check (open, utime) ===================="
 
 test_36c() {
 
 test_36c() {
-       cd $DIR
-       mkdir -p $DIR/reposname
-       chown $RUNAS_ID $DIR/reposname
-       $RUNAS cvs -d $DIR/cvsroot co reposname
+       sleep 1
+       rm -f $DIR/d36/test_36_file
+       mkdir $DIR/d36
+       chown $RUNAS_ID $DIR/d36
+       $RUNAS utime $DIR/d36/test_36_file || error
 }
 }
-run_test 36c "cvs checkout ====================================="
+run_test 36c "non-root MDS utime check (mknod, utime) =========="
 
 test_36d() {
 
 test_36d() {
-       cd $DIR/reposname
-       $RUNAS touch foo36
-       $RUNAS cvs add -m 'addmsg' foo36
+       sleep 1
+       echo "" > $DIR/d36/test_36_file
+       $RUNAS utime $DIR/d36/test_36_file || error
 }
 }
-run_test 36d "cvs add =========================================="
+run_test 36d "non-root OST utime check (open, utime) ==========="
 
 test_36e() {
 
 test_36e() {
-       cd $DIR/reposname
-       $RUNAS cvs update
-}
-run_test 36e "cvs update ======================================="
-
-# XXX change this: use a non root user
-test_36f() {
-       cd $DIR/reposname
-       $RUNAS cvs commit -m 'nomsg' foo36
+       sleep 1
+       [ $RUNAS_ID -eq $UID ] && return
+       touch $DIR/d36/test_36_file2
+       $RUNAS utime $DIR/d36/test_36_file2 && error || true
 }
 }
-run_test 36f "cvs commit ======================================="
+run_test 36e "utime on non-owned file (should return error) ===="
 
 test_37() {
        mkdir -p $DIR/dextra
        echo f > $DIR/dextra/fbugfile
 
 test_37() {
        mkdir -p $DIR/dextra
        echo f > $DIR/dextra/fbugfile
-       mount -t ext2 -o loop /$EXT2_DEV $DIR/dextra
-       ls $DIR/dextra |grep "\<fbugfile\>" && error
-       umount /$EXT2_DEV
-       rm -f DIR/dextra/fbugfile
+       mount -t ext2 -o loop $EXT2_DEV $DIR/dextra
+       ls $DIR/dextra | grep "\<fbugfile\>" && error
+       umount $DIR/dextra || error
+       rm -f $DIR/dextra/fbugfile || error
 }
 }
-run_test 37 "ls a mounted file system to check the old contents ====="
+run_test 37 "ls a mounted file system to check old content ====="
 
 # open(file, O_DIRECTORY) will leak a request and not cleanup (bug 1501)
 test_38() {
 
 # open(file, O_DIRECTORY) will leak a request and not cleanup (bug 1501)
 test_38() {
-        o_directory $DIR/test38
+       o_directory $DIR/test38
 }
 run_test 38 "open a regular file with O_DIRECTORY =============="
 }
 run_test 38 "open a regular file with O_DIRECTORY =============="
-        
+
+test_39() {
+       touch $DIR/test_39_file
+       touch $DIR/test_39_file2
+#      ls -l  $DIR/test_39_file $DIR/test_39_file2
+#      ls -lu  $DIR/test_39_file $DIR/test_39_file2
+#      ls -lc  $DIR/test_39_file $DIR/test_39_file2
+       sleep 2
+       $OPENFILE -f O_CREAT:O_TRUNC:O_WRONLY $DIR/test_39_file2
+#      ls -l  $DIR/test_39_file $DIR/test_39_file2
+#      ls -lu  $DIR/test_39_file $DIR/test_39_file2
+#      ls -lc  $DIR/test_39_file $DIR/test_39_file2
+       [ $DIR/test_39_file2 -nt $DIR/test_39_file ] || error
+}
+run_test 39 "mtime changed on create ==========================="
+
+test_40() {
+       dd if=/dev/zero of=$DIR/f40 bs=4096 count=1
+       $RUNAS $OPENFILE -f O_WRONLY:O_TRUNC $DIR/f40 && error
+       $CHECKSTAT -t file -s 4096 $DIR/f40 || error
+}
+run_test 40 "failed open(O_TRUNC) doesn't truncate ============="
+
+test_41() {
+       # bug 1553
+       small_write $DIR/f41 18
+}
+run_test 41 "test small file write + fstat ====================="
+
+# on the LLNL clusters, runas will still pick up root's $TMP settings,
+# which will not be writable for the runas user, and then you get a CVS
+# error message with a corrupt path string (CVS bug) and panic.
+# We're not using much space, so just stick it in /tmp, which is safe.
+OLDTMPDIR=$TMPDIR
+OLDTMP=$TMP
+TMPDIR=/tmp
+TMP=/tmp
+OLDHOME=$HOME
+[ $RUNAS_ID -ne $UID ] && HOME=/tmp
+
+test_99a() {
+       echo 99 "cvs operations ===================================="
+       mkdir -p $DIR/d99cvsroot
+       chown $RUNAS_ID $DIR/d99cvsroot
+       $RUNAS cvs -d $DIR/d99cvsroot init || error
+}
+run_test 99a "cvs init ========================================="
+
+test_99b() {
+       cd /etc/init.d
+       $RUNAS cvs -d $DIR/d99cvsroot import -m "nomesg" d99reposname vtag rtag
+}
+run_test 99b "cvs import ======================================="
+
+test_99c() {
+       cd $DIR
+       mkdir -p $DIR/d99reposname
+       chown $RUNAS_ID $DIR/d99reposname
+       $RUNAS cvs -d $DIR/d99cvsroot co d99reposname
+}
+run_test 99c "cvs checkout ====================================="
+
+test_99d() {
+       cd $DIR/d99reposname
+       $RUNAS touch foo99
+       $RUNAS cvs add -m 'addmsg' foo99
+}
+run_test 99d "cvs add =========================================="
+
+test_99e() {
+       cd $DIR/d99reposname
+       $RUNAS cvs update
+}
+run_test 99e "cvs update ======================================="
+
+test_99f() {
+       cd $DIR/d99reposname
+       $RUNAS cvs commit -m 'nomsg' foo99
+}
+run_test 99f "cvs commit ======================================="
+
+TMPDIR=$OLDTMPDIR
+TMP=$OLDTMP
+HOME=$OLDHOME
 
 log "cleanup: ======================================================"
 
 log "cleanup: ======================================================"
-rm -r $DIR/[Rdfs][1-9]*
+rm -rf $DIR/[Rdfs][1-9]*
 if [ "$I_MOUNTED" = "yes" ]; then
        sh llmountcleanup.sh || error
 fi
 if [ "$I_MOUNTED" = "yes" ]; then
        sh llmountcleanup.sh || error
 fi
index 8145e63..1895c8a 100644 (file)
 
 set -e
 
 
 set -e
 
-PATH=$PATH:.
+ONLY=${ONLY:-"$*"}
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"8"} # bug 1557
+
+SRCDIR=`dirname $0`
+PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
 
 CHECKSTAT=${CHECKSTAT:-"checkstat -v"}
 
 CHECKSTAT=${CHECKSTAT:-"checkstat -v"}
-MOUNT1=${MOUNT1:-/mnt/lustre1}
-MOUNT2=${MOUNT2:-/mnt/lustre2}
+CREATETEST=${CREATETEST:-createtest}
+LFIND=${LFIND:-lfind}
+LSTRIPE=${LSTRIPE:-lstripe}
+LCTL=${LCTL:-lctl}
+MCREATE=${MCREATE:-mcreate}
+OPENFILE=${OPENFILE:-openfile}
+OPENUNLINK=${OPENUNLINK:-openunlink}
+TOEXCL=${TOEXCL:-toexcl}
+TRUNCATE=${TRUNCATE:-truncate}
+
+if [ $UID -ne 0 ]; then
+       RUNAS_ID="$UID"
+       RUNAS=""
+else
+       RUNAS_ID=${RUNAS_ID:-500}
+       RUNAS=${RUNAS:-"runas -u $RUNAS_ID"}
+fi
+
 export NAME=${NAME:-mount2}
 
 export NAME=${NAME:-mount2}
 
+SAVE_PWD=$PWD
+
 clean() {
 clean() {
-        echo -n "cln.."
-        sh llmountcleanup.sh > /dev/null
+       echo -n "cln.."
+       sh llmountcleanup.sh > /dev/null || exit 20
 }
 }
-
 CLEAN=${CLEAN:-clean}
 CLEAN=${CLEAN:-clean}
+
 start() {
 start() {
-        echo -n "mnt.."
-        sh llrmount.sh > /dev/null
-        echo -n "done"
+       echo -n "mnt.."
+       sh llrmount.sh > /dev/null || exit 10
+       echo "done"
 }
 START=${START:-start}
 
 }
 START=${START:-start}
 
-error () { 
-    echo FAIL
-    exit 1
-}
-
-pass() { 
-    echo PASS
-}
-
-mkdir -p $MOUNT2
-mount | grep $MOUNT1 || sh llmount.sh
-
-echo -n "test 1: check create on 2 mtpt's..."
-touch $MOUNT1/f1
-[ -f $MOUNT2/f1 ] || error
-pass
-
-echo "test 2: check attribute updates on 2 mtpt's..."
-chmod 777 $MOUNT2/f1
-$CHECKSTAT -t file -p 0777 $MOUNT1/f1 || error
-pass
-
-echo "test 2b: check cached attribute updates on 2 mtpt's..."
-touch $MOUNT1/f2b
-ls -l $MOUNT2/f2b
-chmod 777 $MOUNT2/f2b
-$CHECKSTAT -t file -p 0777 $MOUNT1/f2b || error
-pass
-
-echo "test 2c: check cached attribute updates on 2 mtpt's..."
-touch $MOUNT1/f2c
-ls -l $MOUNT2/f2c
-chmod 777 $MOUNT1/f2c
-$CHECKSTAT -t file -p 0777 $MOUNT2/f2c || error
-pass
-
-echo "test 3: check after remount attribute updates on 2 mtpt's..."
-chmod a-x $MOUNT2/f1
-$CLEAN
-$START
-$CHECKSTAT -t file -p 0666 $MOUNT1/f1 || error
-pass
-
-echo "test 4: unlink on one mountpoint removes file on other..."
-rm $MOUNT2/f1
-$CHECKSTAT -a $MOUNT1/f1 || error
-pass
-
-echo -n "test 5: symlink on one mtpt, readlink on another..."
-( cd $MOUNT1 ; ln -s this/is/good lnk )
-
-[ "this/is/good" = "`perl -e 'print readlink("/mnt/lustre2/lnk");'`" ] || error
-pass
-
-echo -n "test 6: fstat validation on multiple mount points..."
-./multifstat $MOUNT1/f6 $MOUNT2/f6
-pass
-
-if [ -n "$BUG_1365" ]; then
-echo -n "test 7: create a file on one mount, truncate it on the other..."
-mcreate $MOUNT1/f1
-truncate $MOUNT2/f1 100
-rm $MOUNT1/f1
-pass
-else
-echo "Skipping test for 1365: set \$BUG_1365 to run it (and crash, likely)."
-fi
+log() {
+       echo "$*"
+       lctl mark "$*" || true
+}
+
+run_one() {
+       if ! mount | grep -q $DIR1; then
+               $START
+       fi
+       log "== test $1: $2"
+       test_$1 || error
+       pass
+       cd $SAVE_PWD
+       $CLEAN
+}
+
+run_test() {
+       for O in $ONLY; do
+               if [ "`echo $1 | grep '\<'$O'[a-z]*\>'`" ]; then
+                       echo ""
+                       run_one $1 "$2"
+                       return $?
+               else
+                       echo -n "."
+               fi
+       done
+       for X in $EXCEPT $ALWAYS_EXCEPT; do
+               if [ "`echo $1 | grep '\<'$X'[a-z]*\>'`" ]; then
+                       echo "skipping excluded test $1"
+                       return 0
+               fi
+       done
+       if [ -z "$ONLY" ]; then
+               run_one $1 "$2"
+               return $?
+       fi
+}
+
+error () {
+       echo "FAIL: $@"
+       exit 1
+}
+
+pass() {
+       echo PASS
+}
+
+MOUNT1=`mount| awk '/^'$NAME' .* lustre_lite / { print $3 }'| head -1`
+MOUNT2=`mount| awk '/^'$NAME' .* lustre_lite / { print $3 }'| tail -1`
+[ -z "$MOUNT1" ] && error "NAME=$NAME not mounted once"
+[ "$MOUNT1" = "$MOUNT2" ] && error "NAME=$NAME not mounted twice"
+[ `mount| awk '/^'$NAME' .* lustre_lite / { print $3 }'| wc -l` -ne 2 ] && \
+       error "NAME=$NAME mounted more than twice"
+
+DIR1=${DIR1:-$MOUNT1}
+DIR2=${DIR2:-$MOUNT2}
+[ -z "`echo $DIR1 | grep $MOUNT1`" ] && echo "$DIR1 not in $MOUNT1" && exit 96
+[ -z "`echo $DIR2 | grep $MOUNT2`" ] && echo "$DIR2 not in $MOUNT2" && exit 95
+
+rm -f $DIR1/[df][0-9]* $DIR1/lnk
+
+test_1a() {
+       touch $DIR1/f1
+       [ -f $DIR2/f1 ] || error
+}
+run_test 1a "check create on 2 mtpt's =========================="
+
+test_1b() {
+       chmod 777 $DIR2/f1
+       $CHECKSTAT -t file -p 0777 $DIR1/f1 || error
+       chmod a-x $DIR2/f1
+}
+run_test 1b "check attribute updates on 2 mtpt's ==============="
+
+test_1c() {
+       $CHECKSTAT -t file -p 0666 $DIR1/f1 || error
+}
+run_test 1c "check after remount attribute updates on 2 mtpt's ="
+
+test_1d() {
+       rm $DIR2/f1
+       $CHECKSTAT -a $DIR1/f1 || error
+}
+run_test 1d "unlink on one mountpoint removes file on other ===="
+
+test_2a() {
+       touch $DIR1/f2a
+       ls -l $DIR2/f2a
+       chmod 777 $DIR2/f2a
+       $CHECKSTAT -t file -p 0777 $DIR1/f2a || error
+}
+run_test 2a "check cached attribute updates on 2 mtpt's ========"
+
+test_2b() {
+       touch $DIR1/f2b
+       ls -l $DIR2/f2b
+       chmod 777 $DIR1/f2b
+       $CHECKSTAT -t file -p 0777 $DIR2/f2b || error
+}
+run_test 2b "check cached attribute updates on 2 mtpt's ========"
+
+test_3() {
+       ( cd $DIR1 ; ln -s this/is/good lnk )
+       [ "this/is/good" = "`perl -e 'print readlink("'$DIR2/lnk'");'`" ] || \
+               error
+}
+run_test 3 "symlink on one mtpt, readlink on another ==========="
+
+test_4() {
+       ./multifstat $DIR1/f6 $DIR2/f6
+}
+run_test 4 "fstat validation on multiple mount points =========="
+
+test_5() {
+       mcreate $DIR1/f5
+       truncate $DIR2/f5 100
+       rm $DIR1/f5
+}
+run_test 5 "create a file on one mount, truncate it on the other"
+
+test_6() {
+       ./openunlink $DIR1/f6 $DIR2/f6 || error
+}
+run_test 6 "remove of open file on other node =================="
+
+test_7() {
+       ./opendirunlink $DIR1/d7 $DIR2/d7 || error
+}
+run_test 7 "remove of open directory on other node ============="
+
+test_8() {
+       ./opendevunlink $DIR1/dev8 $DIR2/dev8 || error
+}
+run_test 8 "remove of open special file on other node =========="
+
+test_9() {
+       MTPT=1
+       > $DIR2/f9
+       for C in a b c d e f g h i j k l; do
+               DIR=`eval echo \\$DIR$MTPT`
+               echo -n $C >> $DIR/f9
+               [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
+       done
+       [ "`cat $DIR1/f9`" = "abcdefghijkl" ] || error
+}
+run_test 9 "append of file with sub-page size on multiple mounts"
+
+test_10() {
+       MTPT=1
+       OFFSET=0
+       > $DIR2/f10
+       for C in a b c d e f g h i j k l; do
+               DIR=`eval echo \\$DIR$MTPT`
+               echo -n $C | dd of=$DIR/f10 bs=1 seek=$OFFSET count=1
+               [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
+               OFFSET=`expr $OFFSET + 1`
+       done
+       [ "`cat $DIR1/f10`" = "abcdefghijkl" ] || error
+}
+run_test 10 "write of file with sub-page size on multiple mounts "
 
 
-echo "test 9: remove of open file on other node..."
-./openunlink $MOUNT1/f9 $MOUNT2/f9 || error
-pass
-
-echo "test 9b: remove of open directory on other node..."
-./opendirunlink $MOUNT1/dir1 $MOUNT2/dir1 || error
-pass
-
-#echo "test 9c: remove of open special file on other node..."
-#./opendevunlink $MOUNT1/dev1 $MOUNT2/dev1 || error
-#pass
-
-echo -n "test 10: append of file with sub-page size on multiple mounts..."
-MTPT=1
-> $MOUNT2/f10
-for C in a b c d e f g h i j k l; do
-       MOUNT=`eval echo \\$MOUNT$MTPT`
-       echo -n $C >> $MOUNT/f10
-       [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
-done
-[ "`cat $MOUNT1/f10`" = "abcdefghijkl" ] && pass || error
-       
-echo -n "test 11: write of file with sub-page size on multiple mounts..."
-MTPT=1
-OFFSET=0
-> $MOUNT2/f11
-for C in a b c d e f g h i j k l; do
-       MOUNT=`eval echo \\$MOUNT$MTPT`
-       echo -n $C | dd of=$MOUNT/f11 bs=1 seek=$OFFSET count=1
-       [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
-       OFFSET=`expr $OFFSET + 1`
-done
-[ "`cat $MOUNT1/f11`" = "abcdefghijkl" ] && pass || error
-       
-rm -f $MOUNT1/f[0-9]* $MOUNT1/lnk
-
-$CLEAN
-
-exit
+rm -f $DIR1/f[0-9]* $DIR1/lnk
index 2b3adc3..f7a9241 100644 (file)
@@ -6,7 +6,7 @@ config=${1:-uml.xml}
 LMC=${LMC:-lmc}
 TMP=${TMP:-/tmp}
 
 LMC=${LMC:-lmc}
 TMP=${TMP:-/tmp}
 
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
 MDSSIZE=${MDSSIZE:-50000}
 
 OSTDEVBASE=$TMP/ost
 MDSSIZE=${MDSSIZE:-50000}
 
 OSTDEVBASE=$TMP/ost
@@ -19,6 +19,7 @@ STRIPECNT=${STRIPECNT:-1}
 FSTYPE=${FSTYPE:-ext3}
 
 NETTYPE=${NETTYPE:-tcp}
 FSTYPE=${FSTYPE:-ext3}
 
 NETTYPE=${NETTYPE:-tcp}
+NIDTYPE=${NIDTYPE:-$NODETYPE}
 
 # NOTE - You can't have different MDS/OST nodes and also have clients on the
 #        MDS/OST nodes without using --endlevel and --startlevel during lconf.
 
 # NOTE - You can't have different MDS/OST nodes and also have clients on the
 #        MDS/OST nodes without using --endlevel and --startlevel during lconf.
@@ -50,6 +51,10 @@ CLIENTS=${CLIENTS:-"uml3"}
 
 rm -f $config
 
 
 rm -f $config
 
+h2localhost () {
+       echo localhost
+}
+       
 h2tcp () {
        case $1 in
        client) echo '\*' ;;
 h2tcp () {
        case $1 in
        client) echo '\*' ;;
@@ -68,7 +73,7 @@ h2elan () {
 echo -n "adding NET for:"
 for NODE in `echo $MDSNODE $OSTNODES $CLIENTS | tr -s " " "\n" | sort -u`; do
        echo -n " $NODE"
 echo -n "adding NET for:"
 for NODE in `echo $MDSNODE $OSTNODES $CLIENTS | tr -s " " "\n" | sort -u`; do
        echo -n " $NODE"
-       ${LMC} -m $config --add net --node $NODE --nid `h2$NETTYPE $NODE` --nettype $NETTYPE || exit 1
+       ${LMC} -m $config --add net --node $NODE --nid `h2$NIDTYPE $NODE` --nettype $NETTYPE || exit 1
 done
 
 # configure mds server
 done
 
 # configure mds server
@@ -82,7 +87,7 @@ echo -n "adding OST on:"
 for NODE in $OSTNODES; do
        eval OSTDEV=\$OSTDEV$COUNT
        echo -n " $NODE"
 for NODE in $OSTNODES; do
        eval OSTDEV=\$OSTDEV$COUNT
        echo -n " $NODE"
-       OSTDEV=${OSTDEV:-$OSTDEVBASE$COUNT}
+       OSTDEV=${OSTDEV:-$OSTDEVBASE$COUNT-`hostname`}
         ${LMC} -m $config --add ost --node $NODE --lov lov1 --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 21
        COUNT=`expr $COUNT + 1`
 done
         ${LMC} -m $config --add ost --node $NODE --lov lov1 --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 21
        COUNT=`expr $COUNT + 1`
 done
index c6a5d7d..9fe9f26 100644 (file)
@@ -30,7 +30,14 @@ int main(int argc, char *argv[])
        if (argc != 2)
                usage(argv[0]);
 
        if (argc != 2)
                usage(argv[0]);
 
-       before_mknod = time(0);
+       /* Adjust the before time back one second, because the kernel's
+        * CURRENT_TIME (lockless clock reading, used to set inode times)
+        * may drift against the do_gettimeofday() time (TSC-corrected and
+        * locked clock reading, used to return timestamps to user space).
+        * This means that the mknod time could be a second older than the
+        * before time, even for a local filesystem such as ext3.
+        */
+       before_mknod = time(0) - 1;
        rc = mknod(filename, 0700, S_IFREG);
        after_mknod = time(0);
        if (rc && errno != EEXIST) {
        rc = mknod(filename, 0700, S_IFREG);
        after_mknod = time(0);
        if (rc && errno != EEXIST) {
@@ -52,13 +59,15 @@ int main(int argc, char *argv[])
                        return 4;
                }
 
                        return 4;
                }
 
-               printf("%s: good mknod times %lu <= %lu <= %lu\n",
-                      prog, before_mknod, st.st_mtime, after_mknod);
+               printf("%s: good mknod times %lu%s <= %lu <= %lu\n",
+                      prog, before_mknod, before_mknod == st.st_mtime ? "*":"",
+                      st.st_mtime, after_mknod);
 
                sleep(5);
        }
 
 
                sleep(5);
        }
 
-       before_utime = time(0);
+       /* See above */
+       before_utime = time(0) - 1;
        rc = utime(filename, NULL);
        after_utime = time(0);
        if (rc) {
        rc = utime(filename, NULL);
        after_utime = time(0);
        if (rc) {
@@ -80,8 +89,9 @@ int main(int argc, char *argv[])
                return 7;
        }
 
                return 7;
        }
 
-       printf("%s: good utime times %lu <= %lu <= %lu\n",
-              prog, before_utime, st.st_mtime, after_utime);
+       printf("%s: good utime times %lu%s <= %lu <= %lu\n",
+              prog, before_utime, before_utime == st.st_mtime ? "*" : "",
+              st.st_mtime, after_utime);
 
        return 0;
 }
 
        return 0;
 }
index 06a1588..20f4185 100644 (file)
@@ -15,4 +15,6 @@ obdstat
 obdio
 obdbarrier
 lload
 obdio
 obdbarrier
 lload
-wirecheck
\ No newline at end of file
+wirecheck
+.*.cmd
+.*.d
index c1b93e6..7a21df3 100644 (file)
@@ -4,4 +4,4 @@ from lustredb import LustreDB, LustreDB_XML, LustreDB_LDAP
 from error import LconfError, OptionError
 from cmdline import Options
 
 from error import LconfError, OptionError
 from cmdline import Options
 
-CONFIG_VERSION="2003060501"
+CONFIG_VERSION="2003070801"
index a5e8580..04841eb 100644 (file)
@@ -31,6 +31,7 @@
 import sys, getopt, types
 import string, os
 import ldap
 import sys, getopt, types
 import string, os
 import ldap
+from stat import S_IROTH, S_IRGRP
 PYMOD_DIR = "/usr/lib/lustre/python"
 
 def development_mode():
 PYMOD_DIR = "/usr/lib/lustre/python"
 
 def development_mode():
@@ -43,13 +44,14 @@ if not development_mode():
     sys.path.append(PYMOD_DIR)
 
 import Lustre
     sys.path.append(PYMOD_DIR)
 
 import Lustre
+PARAM = Lustre.Options.PARAM
 
 lactive_options = [
 
 lactive_options = [
-    ('ldapurl',"LDAP server URL", Lustre.Options.PARAM,
-     "ldap://localhost"),
-    ('config', "Cluster config name used for LDAP query", Lustre.Options.PARAM),
-    ('group', "The group of devices to update", Lustre.Options.PARAM),
-    ('active', "The active node name", Lustre.Options.PARAM),
+    ('ldapurl',"LDAP server URL", PARAM, "ldap://localhost"),
+    ('config', "Cluster config name used for LDAP query", PARAM),
+    ('group', "The group of devices to update", PARAM),
+    ('active', "The active node name", PARAM),
+    ('pwfile', "File containing password", PARAM),
     ]
 
 def fatal(*args):
     ]
 
 def fatal(*args):
@@ -57,7 +59,6 @@ def fatal(*args):
     print "! " + msg
     sys.exit(1)
 
     print "! " + msg
     sys.exit(1)
 
-
 cl = Lustre.Options("lactive","", lactive_options)
 config, args = cl.parse(sys.argv[1:])
 
 cl = Lustre.Options("lactive","", lactive_options)
 config, args = cl.parse(sys.argv[1:])
 
@@ -66,10 +67,32 @@ if not (config.group or config.active):
 
 if not config.config:
     fatal("Missing config")
 
 if not config.config:
     fatal("Missing config")
-    
+
+if config.pwfile:
+    try:
+        pwperm = os.stat(config.pwfile)[0]
+        pwreadable = pwperm & (S_IRGRP | S_IROTH)
+        if pwreadable:
+            if pwreadable == (S_IRGRP | S_IROTH):
+                readable_by = "group and others"
+            elif pwreadable == S_IRGRP:
+                readable_by = "group"
+            else:
+                readable_by = "others"
+            print "WARNING: Password file %s is readable by %s" % (
+                config.pwfile, readable_by)
+                 
+        pwfile = open(config.pwfile, "r")
+        pw = string.strip(pwfile.readline())
+        pwfile.close()
+    except Exception, e:
+        fatal("Can't read secret from pwfile %s: %s" % (config.pwfile, e))
+else:
+    print "no pwfile specified, binding anonymously"
+    pw = ""
+
 base = "config=%s,fs=lustre" % (config.config,)
 base = "config=%s,fs=lustre" % (config.config,)
-db = Lustre.LustreDB_LDAP('', {}, base=base, pw = "secret",
-                          url = config.ldapurl)
+db = Lustre.LustreDB_LDAP('', {}, base=base, pw = pw, url = config.ldapurl)
 
 active_node = db.lookup_name(config.active)
 if not active_node:
 
 active_node = db.lookup_name(config.active)
 if not active_node:
index 15e5a2c..92ec8e2 100755 (executable)
@@ -1,7 +1,8 @@
 #!/usr/bin/env python
 #
 #!/usr/bin/env python
 #
-#  Copyright (C) 2002 Cluster File Systems, Inc.
-#   Author: Robert Read <rread@clusterfs.com>
+#  Copyright (C) 2002-2003 Cluster File Systems, Inc.
+#   Authors: Robert Read <rread@clusterfs.com>
+#            Mike Shaver <shaver@clusterfs.com>
 #   This file is part of Lustre, http://www.lustre.org.
 #
 #   Lustre is free software; you can redistribute it and/or
 #   This file is part of Lustre, http://www.lustre.org.
 #
 #   Lustre is free software; you can redistribute it and/or
@@ -26,7 +27,7 @@
 
 import sys, getopt, types
 import string, os, stat, popen2, socket, time, random, fcntl, select
 
 import sys, getopt, types
 import string, os, stat, popen2, socket, time, random, fcntl, select
-import re, exceptions, signal
+import re, exceptions, signal, traceback
 import xml.dom.minidom
 
 if sys.version[0] == '1':
 import xml.dom.minidom
 
 if sys.version[0] == '1':
@@ -57,7 +58,7 @@ MAX_LOOP_DEVICES = 256
 PORTALS_DIR = 'portals'
 
 
 PORTALS_DIR = 'portals'
 
 
-# Please keep these uptodate with the values in portals/kp30.h
+# Please keep these in sync with the values in portals/kp30.h
 ptldebug_names = { 
     "trace" :     (1 << 0),
     "inode" :     (1 << 1),
 ptldebug_names = { 
     "trace" :     (1 << 0),
     "inode" :     (1 << 1),
@@ -107,6 +108,8 @@ subsystem_names = {
     "ptlrouter" :   (20 << 24),
     "cobd" :        (21 << 24),
     "ptlbd" :       (22 << 24),
     "ptlrouter" :   (20 << 24),
     "cobd" :        (21 << 24),
     "ptlbd" :       (22 << 24),
+    "log" :         (23 << 24),
+    "mgmt" :        (24 << 24),
     }
 
 
     }
 
 
@@ -423,8 +426,11 @@ class LCTLInterface:
   add_route %s %s %s
   quit  """ % (net,
                gw, lo, hi)
   add_route %s %s %s
   quit  """ % (net,
                gw, lo, hi)
-        self.run(cmds)
-
+        try:
+            self.run(cmds)
+        except CommandError, e:
+            log ("ignore: ")
+            e.dump()
                 
     def del_route(self, net, gw, lo, hi):
         cmds =  """
                 
     def del_route(self, net, gw, lo, hi):
         cmds =  """
@@ -443,7 +449,11 @@ class LCTLInterface:
   quit """ % (net,
               uuid, tgt, net,
               gw, tgt)
   quit """ % (net,
               uuid, tgt, net,
               gw, tgt)
-        self.run(cmds)
+        try:
+            self.run(cmds)
+        except CommandError, e:
+            log ("ignore: ")
+            e.dump()
 
     # add a route to a range
     def del_route_host(self, net, uuid, gw, tgt):
 
     # add a route to a range
     def del_route_host(self, net, uuid, gw, tgt):
@@ -795,7 +805,6 @@ def get_local_address(net_type, wildcard):
         local=string.rstrip(local[0])
 
     return local
         local=string.rstrip(local[0])
 
     return local
-        
 
 # XXX: instead of device_list, ask for $name and see what we get
 def is_prepared(name):
 
 # XXX: instead of device_list, ask for $name and see what we get
 def is_prepared(name):
@@ -1020,8 +1029,8 @@ class Network(Module):
                             self_nid = self.nid
                         if gw_nid < self_nid:
                             try:
                             self_nid = self.nid
                         if gw_nid < self_nid:
                             try:
-                                lctl.disconnect(router.net_type, router.nid, router.port,
-                                                router.uuid)
+                                lctl.disconnect(gw.net_type, gw.nid, gw.port,
+                                                gw.uuid)
                             except CommandError, e:
                                 print "disconnectAll failed: ", self.name
                                 e.dump()
                             except CommandError, e:
                                 print "disconnectAll failed: ", self.name
                                 e.dump()
@@ -1087,6 +1096,27 @@ class RouteTable(Module):
                 e.dump()
                 cleanup_error(e.rc)
 
                 e.dump()
                 cleanup_error(e.rc)
 
+class Management(Module):
+    def __init__(self, db):
+        Module.__init__(self, 'MGMT', db)
+        self.add_lustre_module('obdclass', 'obdclass')
+        self.add_lustre_module('ptlrpc', 'ptlrpc')
+        self.add_lustre_module('ldlm', 'ldlm')
+        self.add_lustre_module('mgmt', 'mgmt_svc')
+
+    def prepare(self):
+        if is_prepared(self.name):
+            return
+        self.info()
+        lctl.newdev(attach="mgmt %s %s" % (self.name, self.uuid))
+
+    def safe_to_clean(self):
+        return 1
+
+    def cleanup(self):
+        if is_prepared(self.name):
+            Module.cleanup(self)
+
 class LDLM(Module):
     def __init__(self,db):
         Module.__init__(self, 'LDLM', db)
 class LDLM(Module):
     def __init__(self,db):
         Module.__init__(self, 'LDLM', db)
@@ -1109,7 +1139,7 @@ class LDLM(Module):
             Module.cleanup(self)
 
 class LOV(Module):
             Module.cleanup(self)
 
 class LOV(Module):
-    def __init__(self, db, uuid):
+    def __init__(self, db, uuid, fs_name):
         Module.__init__(self, 'LOV', db)
         self.add_lustre_module('mdc', 'mdc')
         self.add_lustre_module('lov', 'lov')
         Module.__init__(self, 'LOV', db)
         self.add_lustre_module('mdc', 'mdc')
         self.add_lustre_module('lov', 'lov')
@@ -1123,11 +1153,12 @@ class LOV(Module):
         self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
         self.osclist = []
         self.client_uuid = generate_client_uuid(self.name)
         self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
         self.osclist = []
         self.client_uuid = generate_client_uuid(self.name)
+        self.fs_name = fs_name
         self.mdc_name = ''
         self.mdc_name = ''
-        self.mdc = get_mdc(db, self.client_uuid, self.name, self.mds_uuid)
+        self.mdc = get_mdc(db, self.client_uuid, fs_name, self.mds_uuid)
         for obd_uuid in self.devlist:
             obd = self.db.lookup(obd_uuid)
         for obd_uuid in self.devlist:
             obd = self.db.lookup(obd_uuid)
-            osc = get_osc(obd, self.client_uuid, self.name)
+            osc = get_osc(obd, self.client_uuid, fs_name)
             if osc:
                 self.osclist.append(osc)
             else:
             if osc:
                 self.osclist.append(osc)
             else:
@@ -1142,7 +1173,7 @@ class LOV(Module):
                 # isn't implemented here yet.
                 osc.prepare(ignore_connect_failure=0)
             except CommandError, e:
                 # isn't implemented here yet.
                 osc.prepare(ignore_connect_failure=0)
             except CommandError, e:
-                print "Error preparing OSC %s (inactive)\n" % osc.uuid
+                print "Error preparing OSC %s\n" % osc.uuid
                 raise e
         self.mdc.prepare()
         self.mdc_name = self.mdc.name
                 raise e
         self.mdc.prepare()
         self.mdc_name = self.mdc.name
@@ -1156,7 +1187,7 @@ class LOV(Module):
             Module.cleanup(self)
         for osc in self.osclist:
             osc.cleanup()
             Module.cleanup(self)
         for osc in self.osclist:
             osc.cleanup()
-        mdc = get_mdc(self.db, self.client_uuid, self.name, self.mds_uuid)
+        mdc = get_mdc(self.db, self.client_uuid, self.fs_name, self.mds_uuid)
         mdc.cleanup()
 
     def load_module(self):
         mdc.cleanup()
 
     def load_module(self):
@@ -1172,12 +1203,12 @@ class LOV(Module):
             break
 
 class LOVConfig(Module):
             break
 
 class LOVConfig(Module):
-    def __init__(self,db):
+    def __init__(self, db):
         Module.__init__(self, 'LOVConfig', db)
 
         self.lov_uuid = self.db.get_first_ref('lov')
         l = self.db.lookup(self.lov_uuid)
         Module.__init__(self, 'LOVConfig', db)
 
         self.lov_uuid = self.db.get_first_ref('lov')
         l = self.db.lookup(self.lov_uuid)
-        self.lov = LOV(l, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
+        self.lov = LOV(l, "YOU_SHOULD_NEVER_SEE_THIS_UUID", '')
         
     def prepare(self):
         lov = self.lov
         
     def prepare(self):
         lov = self.lov
@@ -1410,9 +1441,20 @@ class OSD(Module):
         if not self.osdtype == 'obdecho':
             clean_loop(self.devpath)
 
         if not self.osdtype == 'obdecho':
             clean_loop(self.devpath)
 
+def mgmt_uuid_for_fs(mtpt_name):
+    if not mtpt_name:
+        return ''
+    mtpt_db = toplevel.lookup_name(mtpt_name)
+    fs_uuid = mtpt_db.get_first_ref('filesystem')
+    fs = toplevel.lookup(fs_uuid)
+    if not fs:
+        return ''
+    return fs.get_first_ref('mgmt')
+
 # Generic client module, used by OSC and MDC
 class Client(Module):
 # Generic client module, used by OSC and MDC
 class Client(Module):
-    def __init__(self, tgtdb, uuid, module, owner):
+    def __init__(self, tgtdb, uuid, module, fs_name, self_name=None,
+                 module_dir=None):
         self.target_name = tgtdb.getName()
         self.target_uuid = tgtdb.getUUID()
         self.db = tgtdb
         self.target_name = tgtdb.getName()
         self.target_uuid = tgtdb.getUUID()
         self.db = tgtdb
@@ -1427,11 +1469,22 @@ class Client(Module):
 
         self.module = module
         self.module_name = string.upper(module)
 
         self.module = module
         self.module_name = string.upper(module)
-        self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
-                                     self.target_name, owner)
+        if not self_name:
+            self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
+                                         self.target_name, fs_name)
+        else:
+            self.name = self_name
         self.uuid = uuid
         self.lookup_server(self.tgt_dev_uuid)
         self.uuid = uuid
         self.lookup_server(self.tgt_dev_uuid)
-        self.add_lustre_module(module, module)
+        mgmt_uuid = mgmt_uuid_for_fs(fs_name)
+        if mgmt_uuid:
+            self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid)
+        else:
+            self.mgmt_name = ''
+        self.fs_name = fs_name
+        if not module_dir:
+            module_dir = module
+        self.add_lustre_module(module_dir, module)
 
     def lookup_server(self, srv_uuid):
         """ Lookup a server's network information """
 
     def lookup_server(self, srv_uuid):
         """ Lookup a server's network information """
@@ -1461,7 +1514,8 @@ class Client(Module):
                 raise e
         if srv:
             lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
                 raise e
         if srv:
             lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
-                        setup ="%s %s" %(self.target_uuid, srv.uuid))
+                        setup ="%s %s %s" % (self.target_uuid, srv.uuid,
+                                             self.mgmt_name))
 
     def cleanup(self):
         if is_prepared(self.name):
 
     def cleanup(self):
         if is_prepared(self.name):
@@ -1473,7 +1527,7 @@ class Client(Module):
                 else:
                     srv, r =  find_route(self.get_servers())
                     if srv:
                 else:
                     srv, r =  find_route(self.get_servers())
                     if srv:
-                        lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
+                        lctl.del_route_host(r[0], srv.uuid, r[1], r[3])
             except CommandError, e:
                 log(self.module_name, "cleanup failed: ", self.name)
                 e.dump()
             except CommandError, e:
                 log(self.module_name, "cleanup failed: ", self.name)
                 e.dump()
@@ -1481,13 +1535,22 @@ class Client(Module):
 
 
 class MDC(Client):
 
 
 class MDC(Client):
-    def __init__(self, db, uuid, owner):
-         Client.__init__(self, db, uuid, 'mdc', owner)
+    def __init__(self, db, uuid, fs_name):
+         Client.__init__(self, db, uuid, 'mdc', fs_name)
+
 
 class OSC(Client):
 
 class OSC(Client):
-    def __init__(self, db, uuid, owner):
-         Client.__init__(self, db, uuid, 'osc', owner)
+    def __init__(self, db, uuid, fs_name):
+         Client.__init__(self, db, uuid, 'osc', fs_name)
 
 
+def mgmtcli_name_for_uuid(uuid):
+    return 'MGMTCLI_%s' % uuid
+
+class ManagementClient(Client):
+    def __init__(self, db, uuid):
+        Client.__init__(self, db, uuid, 'mgmt_cli', '',
+                        self_name = mgmtcli_name_for_uuid(db.getUUID()),
+                        module_dir = 'mgmt')
             
 class COBD(Module):
     def __init__(self, db):
             
 class COBD(Module):
     def __init__(self, db):
@@ -1509,12 +1572,12 @@ class COBD(Module):
 
 # virtual interface for  OSC and LOV
 class VOSC(Module):
 
 # virtual interface for  OSC and LOV
 class VOSC(Module):
-    def __init__(self, db, uuid, owner):
+    def __init__(self, db, uuid, fs_name):
         Module.__init__(self, 'VOSC', db)
         if db.get_class() == 'lov':
         Module.__init__(self, 'VOSC', db)
         if db.get_class() == 'lov':
-            self.osc = LOV(db, uuid)
+            self.osc = LOV(db, uuid, fs_name)
         else:
         else:
-            self.osc = get_osc(db, uuid, owner)
+            self.osc = get_osc(db, uuid, fs_name)
     def get_uuid(self):
         return self.osc.uuid
     def get_name(self):
     def get_uuid(self):
         return self.osc.uuid
     def get_name(self):
@@ -1560,10 +1623,12 @@ class ECHO_CLIENT(Module):
     def load_module(self):
         self.osc.load_module()
         Module.load_module(self)
     def load_module(self):
         self.osc.load_module()
         Module.load_module(self)
+
     def cleanup_module(self):
         Module.cleanup_module(self)
         self.osc.cleanup_module()
 
     def cleanup_module(self):
         Module.cleanup_module(self)
         self.osc.cleanup_module()
 
+
 def generate_client_uuid(name):
         client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
                                                name,
 def generate_client_uuid(name):
         client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
                                                name,
@@ -1571,6 +1636,7 @@ def generate_client_uuid(name):
                                                int(random.random() * 1048576))
         return client_uuid[:36]
 
                                                int(random.random() * 1048576))
         return client_uuid[:36]
 
+
 class Mountpoint(Module):
     def __init__(self,db):
         Module.__init__(self, 'MTPT', db)
 class Mountpoint(Module):
     def __init__(self,db):
         Module.__init__(self, 'MTPT', db)
@@ -1579,6 +1645,7 @@ class Mountpoint(Module):
         fs = self.db.lookup(self.fs_uuid)
         self.mds_uuid = fs.get_first_ref('mds')
         self.obd_uuid = fs.get_first_ref('obd')
         fs = self.db.lookup(self.fs_uuid)
         self.mds_uuid = fs.get_first_ref('mds')
         self.obd_uuid = fs.get_first_ref('obd')
+        self.mgmt_uuid = fs.get_first_ref('mgmt')
         obd = self.db.lookup(self.obd_uuid)
         client_uuid = generate_client_uuid(self.name)
         self.vosc = VOSC(obd, client_uuid, self.name)
         obd = self.db.lookup(self.obd_uuid)
         client_uuid = generate_client_uuid(self.name)
         self.vosc = VOSC(obd, client_uuid, self.name)
@@ -1586,12 +1653,18 @@ class Mountpoint(Module):
             self.add_lustre_module('mdc', 'mdc')
             self.mdc = get_mdc(db, client_uuid, self.name, self.mds_uuid)
         self.add_lustre_module('llite', 'llite')
             self.add_lustre_module('mdc', 'mdc')
             self.mdc = get_mdc(db, client_uuid, self.name, self.mds_uuid)
         self.add_lustre_module('llite', 'llite')
-
+        if self.mgmt_uuid:
+            self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid),
+                                            client_uuid)
+        else:
+            self.mgmtcli = None
 
     def prepare(self):
         if fs_is_mounted(self.path):
             log(self.path, "already mounted.")
             return
 
     def prepare(self):
         if fs_is_mounted(self.path):
             log(self.path, "already mounted.")
             return
+        if self.mgmtcli:
+            self.mgmtcli.prepare()
         self.vosc.prepare()
         if self.vosc.need_mdc():
             self.mdc.prepare()
         self.vosc.prepare()
         if self.vosc.need_mdc():
             self.mdc.prepare()
@@ -1632,13 +1705,20 @@ class Mountpoint(Module):
         self.vosc.cleanup()
         if self.vosc.need_mdc():
             self.mdc.cleanup()
         self.vosc.cleanup()
         if self.vosc.need_mdc():
             self.mdc.cleanup()
+        if self.mgmtcli:
+            self.mgmtcli.cleanup()
 
     def load_module(self):
 
     def load_module(self):
+        if self.mgmtcli:
+            self.mgmtcli.load_module()
         self.vosc.load_module()
         Module.load_module(self)
         self.vosc.load_module()
         Module.load_module(self)
+
     def cleanup_module(self):
         Module.cleanup_module(self)
         self.vosc.cleanup_module()
     def cleanup_module(self):
         Module.cleanup_module(self)
         self.vosc.cleanup_module()
+        if self.mgmtcli:
+            self.mgmtcli.cleanup_module()
 
 
 # ============================================================
 
 
 # ============================================================
@@ -1670,6 +1750,8 @@ def getServiceLevel(self):
         ret = 6
     elif type in ('ldlm',):
         ret = 20
         ret = 6
     elif type in ('ldlm',):
         ret = 20
+    elif type in ('mgmt',):
+        ret = 25
     elif type in ('osd', 'cobd'):
         ret = 30
     elif type in ('mdsdev',):
     elif type in ('osd', 'cobd'):
         ret = 30
     elif type in ('mdsdev',):
@@ -1707,15 +1789,15 @@ def getServices(self):
 #
 # OSC is no longer in the xml, so we have to fake it.
 # this is getting ugly and begging for another refactoring
 #
 # OSC is no longer in the xml, so we have to fake it.
 # this is getting ugly and begging for another refactoring
-def get_osc(ost_db, uuid, owner):
-    osc = OSC(ost_db, uuid, owner)
+def get_osc(ost_db, uuid, fs_name):
+    osc = OSC(ost_db, uuid, fs_name)
     return osc
 
     return osc
 
-def get_mdc(db, uuid, owner, mds_uuid):
+def get_mdc(db, uuid, fs_name, mds_uuid):
     mds_db = db.lookup(mds_uuid);
     if not mds_db:
         panic("no mds:", mds_uuid)
     mds_db = db.lookup(mds_uuid);
     if not mds_db:
         panic("no mds:", mds_uuid)
-    mdc = MDC(mds_db, uuid, owner)
+    mdc = MDC(mds_db, uuid, fs_name)
     return mdc
 
 ############################################################
     return mdc
 
 ############################################################
@@ -1842,6 +1924,8 @@ def newService(db):
         n = Mountpoint(db)
     elif type == 'echoclient':
         n = ECHO_CLIENT(db)
         n = Mountpoint(db)
     elif type == 'echoclient':
         n = ECHO_CLIENT(db)
+    elif type == 'mgmt':
+        n = Management(db)
     else:
         panic ("unknown service type:", type)
     return n
     else:
         panic ("unknown service type:", type)
     return n
@@ -2060,7 +2144,7 @@ def sys_set_ptldebug():
 def sys_set_subsystem():
     if config.subsystem != None:
         try:
 def sys_set_subsystem():
     if config.subsystem != None:
         try:
-            val = eval(config.ptldebug, ptldebug_names)
+            val = eval(config.subsystem, subsystem_names)
             val = "0x%x" % (val,)
             sysctl('portals/subsystem_debug', val)
         except NameError, e:
             val = "0x%x" % (val,)
             sysctl('portals/subsystem_debug', val)
         except NameError, e:
@@ -2191,7 +2275,7 @@ lconf_options = [
     ]      
 
 def main():
     ]      
 
 def main():
-    global lctl, config
+    global lctl, config, toplevel
 
     # in the upcall this is set to SIG_IGN
     signal.signal(signal.SIGCHLD, signal.SIG_DFL)
 
     # in the upcall this is set to SIG_IGN
     signal.signal(signal.SIGCHLD, signal.SIG_DFL)
@@ -2241,9 +2325,12 @@ def main():
         dn = "config=%s,fs=lustre" % (config.config)
         db = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
     else:
         dn = "config=%s,fs=lustre" % (config.config)
         db = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
     else:
-        cl.usage()
+        print 'Missing config file or ldap URL.'
+        print 'see lconf --help for command summary'
         sys.exit(1)
 
         sys.exit(1)
 
+    toplevel = db
+
     ver = db.get_version()
     if not ver:
         panic("No version found in config data, please recreate.")
     ver = db.get_version()
     if not ver:
         panic("No version found in config data, please recreate.")
@@ -2277,6 +2364,8 @@ if __name__ == "__main__":
         main()
     except Lustre.LconfError, e:
         print e
         main()
     except Lustre.LconfError, e:
         print e
+#        traceback.print_exc(file=sys.stdout)
+        sys.exit(1)
     except CommandError, e:
         e.dump()
         sys.exit(e.rc)
     except CommandError, e:
         e.dump()
         sys.exit(e.rc)
index 80cdcf2..a4681ec 100644 (file)
@@ -205,6 +205,9 @@ command_t cmdlist[] = {
         {"debug_kernel", jt_dbg_debug_kernel, 0,
          "get debug buffer and dump to a file"
          "usage: debug_kernel [file] [raw]"},
         {"debug_kernel", jt_dbg_debug_kernel, 0,
          "get debug buffer and dump to a file"
          "usage: debug_kernel [file] [raw]"},
+        {"dk", jt_dbg_debug_kernel, 0,
+         "get debug buffer and dump to a file"
+         "usage: dk [file] [raw]"},
         {"debug_file", jt_dbg_debug_file, 0,
          "read debug buffer from input and dump to output"
          "usage: debug_file <input> [output] [raw]"},
         {"debug_file", jt_dbg_debug_file, 0,
          "read debug buffer from input and dump to output"
          "usage: debug_file <input> [output] [raw]"},
index 847dd4f..45f837c 100644 (file)
 #define MAX_LOV_UUID_COUNT     1000
 #define OBD_NOT_FOUND          (-1)
 
 #define MAX_LOV_UUID_COUNT     1000
 #define OBD_NOT_FOUND          (-1)
 
-char *         cmd;
-struct option  longOpts[] = {
+char           *cmd;
+struct option   longOpts[] = {
                        {"help", 0, 0, 'h'},
                        {"obd", 1, 0, 'o'},
                        {"query", 0, 0, 'q'},
                        {"verbose", 0, 0, 'v'},
                        {0, 0, 0, 0}
                        {"help", 0, 0, 'h'},
                        {"obd", 1, 0, 'o'},
                        {"query", 0, 0, 'q'},
                        {"verbose", 0, 0, 'v'},
                        {0, 0, 0, 0}
-               };
-int            query;
-int            verbose;
-char *         shortOpts = "ho:qv";
-char *         usageMsg = "[ --obd <obd uuid> | --query ] <dir|file> ...";
-
-int            max_ost_count = MAX_LOV_UUID_COUNT;
-struct obd_uuid *      obduuid;
-char *         buf;
-int            buflen;
-struct obd_uuid *      uuids;
+                };
+int             query;
+int             verbose;
+char            shortOpts[] = "ho:qv";
+char            usageMsg[] = "[ --obd <obd uuid> | --query ] <dir|file> ...";
+
+int             max_ost_count = MAX_LOV_UUID_COUNT;
+struct obd_uuid *obduuid;
+char           *buf;
+int             buflen;
+struct obd_uuid *uuids;
 struct obd_ioctl_data data;
 struct obd_ioctl_data data;
-struct lov_desc desc;
-int            uuidslen;
-int            cfglen;
+struct lov_desc  desc;
+int             uuidslen;
+int             cfglen;
 struct lov_mds_md *lmm;
 struct lov_mds_md *lmm;
-int            lmmlen;
+int             lmmlen;
+int             printed_UUIDs;
 
 void   init();
 void   usage(FILE *stream);
 void   errMsg(char *fmt, ...);
 
 void   init();
 void   usage(FILE *stream);
 void   errMsg(char *fmt, ...);
-void   processPath(const char *path);
+void   processPath(char *path);
 
 
-int
-main (int argc, char **argv) {
+int main (int argc, char **argv) {
        int c;
 
        cmd = basename(argv[0]);
        int c;
 
        cmd = basename(argv[0]);
@@ -61,8 +61,8 @@ main (int argc, char **argv) {
                switch (c) {
                case 'o':
                        if (obduuid) {
                switch (c) {
                case 'o':
                        if (obduuid) {
-                               errMsg("obd '%s' already specified: '%s'.",
-                                       obduuid, optarg);
+                               printf("obd '%s' already specified: '%s'\n",
+                                       obduuid->uuid, optarg);
                                exit(1);
                        }
 
                                exit(1);
                        }
 
@@ -81,7 +81,7 @@ main (int argc, char **argv) {
                        usage(stderr);
                        exit(1);
                default:
                        usage(stderr);
                        exit(1);
                default:
-                       errMsg("Internal error. Valid '%s' unrecognized.",
+                       printf("Internal error. Valid '%s' unrecognized\n",
                                argv[optind - 1]);
                        usage(stderr);
                        exit(1);
                                argv[optind - 1]);
                        usage(stderr);
                        exit(1);
@@ -105,8 +105,7 @@ main (int argc, char **argv) {
        exit (0);
 }
 
        exit (0);
 }
 
-void
-init()
+void init()
 {
        int datalen, desclen;
 
 {
        int datalen, desclen;
 
@@ -141,8 +140,7 @@ init()
        }
 
        if ((buf = malloc(buflen)) == NULL) {
        }
 
        if ((buf = malloc(buflen)) == NULL) {
-               errMsg("Unable to allocate %d bytes of memory for ioctl's.",
-                       buflen);
+               errMsg("Unable to allocate %d bytes of memory for ioctl's");
                exit(1);
        }
 
                exit(1);
        }
 
@@ -150,112 +148,120 @@ init()
        uuids = (struct obd_uuid *)buf;
 }
 
        uuids = (struct obd_uuid *)buf;
 }
 
-void
-usage(FILE *stream)
+void usage(FILE *stream)
 {
        fprintf(stream, "usage: %s %s\n", cmd, usageMsg);
 }
 
 {
        fprintf(stream, "usage: %s %s\n", cmd, usageMsg);
 }
 
-void
-errMsg(char *fmt, ...)
+void errMsg(char *fmt, ...)
 {
        va_list args;
 {
        va_list args;
+       int tmp_errno = errno;
 
        fprintf(stderr, "%s: ", cmd);
        va_start(args, fmt);
        vfprintf(stderr, fmt, args);
        va_end(args);
 
        fprintf(stderr, "%s: ", cmd);
        va_start(args, fmt);
        vfprintf(stderr, fmt, args);
        va_end(args);
-       fprintf(stderr, "\n");
+       fprintf(stderr, ": %s (%d)\n", strerror(tmp_errno), tmp_errno);
 }
 
 }
 
-void
-processPath(const char *path)
+void processPath(char *path)
 {
        int fd;
        int rc;
        int i;
 {
        int fd;
        int rc;
        int i;
-       int obdindex;
+       int obdindex = OBD_NOT_FOUND;
        int obdcount;
        struct obd_uuid *uuidp;
        int obdcount;
        struct obd_uuid *uuidp;
+       char *fname, *dirname;
 
 
-       if (query || verbose && !obduuid) {
+       if ((query || verbose) && !obduuid) {
                printf("%s\n", path);
        }
 
                printf("%s\n", path);
        }
 
-       if ((fd = open(path, O_RDONLY | O_LOV_DELAY_CREATE)) < 0) {
-               errMsg("open \"%.20s\" failed.", path);
-               perror("open");
+       fname = strrchr(path, '/');
+       if (fname != NULL && fname[1] != '\0') {
+               *fname = '\0';
+               fname++;
+               dirname = path;
+       } else if (fname != NULL && fname[1] == '\0') {
+               printf("need getdents support\n");
                return;
                return;
+       } else {
+               dirname = ".";
+               fname = path;
        }
 
        }
 
-       memset(&data, 0, sizeof(data));
-        data.ioc_inllen1 = sizeof(desc);
-        data.ioc_inlbuf1 = (char *)&desc;
-        data.ioc_inllen2 = uuidslen;
-        data.ioc_inlbuf2 = (char *)uuids;
+       if ((fd = open(dirname, O_RDONLY)) < 0) {
+               errMsg("open \"%.20s\" failed", dirname);
+               return;
+       }
 
 
-        memset(&desc, 0, sizeof(desc));
-        desc.ld_tgt_count = max_ost_count;
+       if (!printed_UUIDs) {
+               memset(&data, 0, sizeof(data));
+               data.ioc_inllen1 = sizeof(desc);
+               data.ioc_inlbuf1 = (char *)&desc;
+               data.ioc_inllen2 = uuidslen;
+               data.ioc_inlbuf2 = (char *)uuids;
 
 
-        if (obd_ioctl_pack(&data, &buf, buflen)) {
-                errMsg("internal buffering error.");
-               exit(1);
-        }
+               memset(&desc, 0, sizeof(desc));
+               desc.ld_tgt_count = max_ost_count;
+
+               if (obd_ioctl_pack(&data, &buf, buflen)) {
+                       errMsg("internal buffering error");
+                       exit(1);
+               }
 
 
-        rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf);
-        if (rc) {
-               if (errno == ENOTTY) {
-                       if (!obduuid) {
-                               printf("Not a regular file or not Lustre file.\n\n");
+               rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf);
+               if (rc) {
+                       if (errno == ENOTTY) {
+                               if (!obduuid) {
+                                       errMsg("error getting LOV config");
+                               }
+                               return;
                        }
                        }
-                       return;
+                       errMsg("OBD_IOC_LOV_GET_CONFIG ioctl failed: %s");
+                       exit(1);
                }
                }
-               errMsg("OBD_IOC_LOV_GET_CONFIG ioctl failed: %d.", errno);
-               perror("ioctl");
-               exit(1);
-        }
 
 
-       if (obd_ioctl_unpack(&data, buf, buflen)) {
-               errMsg("Invalid reply from ioctl.");
-                exit(1);
-       }
+               if (obd_ioctl_unpack(&data, buf, buflen)) {
+                       errMsg("Invalid reply from ioctl");
+                       exit(1);
+               }
 
 
-        obdcount = desc.ld_tgt_count;
-       if (obdcount == 0)
-               return;
+               obdcount = desc.ld_tgt_count;
+               if (obdcount == 0)
+                       return;
 
 
-       obdindex = OBD_NOT_FOUND;
+               obdindex = OBD_NOT_FOUND;
 
 
-       if (obduuid) {
-               for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) {
-                       if (strncmp((const char *)obduuid, (const char *)uuidp,
-                                   sizeof(*uuidp)) == 0) {
-                               obdindex = i;
+               if (obduuid) {
+                       for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) {
+                               if (strncmp((char *)obduuid, (char *)uuidp,
+                                       sizeof(*uuidp)) == 0) {
+                                       obdindex = i;
+                               }
                        }
                        }
-               }
 
 
-               if (obdindex == OBD_NOT_FOUND)
-                       return;
-       } else  if (query || verbose) {
-               printf("OBDS:\n");
-               for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++)
-                       printf("%4d: %s\n", i, (char *)uuidp);
+                       if (obdindex == OBD_NOT_FOUND)
+                               return;
+               } else if (query || verbose) {
+                       printf("OBDS:\n");
+                       for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++)
+                               printf("%4d: %s\n", i, (char *)uuidp);
+               }
+               printed_UUIDs = 1;
        }
 
        }
 
-       memset((void *)buf, 0, buflen);
-       lmm->lmm_magic = LOV_MAGIC;
-        lmm->lmm_ost_count = max_ost_count;
-
-       rc = ioctl(fd, LL_IOC_LOV_GETSTRIPE, (void *)lmm);
+       strcpy((char *)lmm, fname);
+       rc = ioctl(fd, IOC_MDC_GETSTRIPE, (void *)lmm);
        if (rc) {
                if (errno == ENODATA) {
        if (rc) {
                if (errno == ENODATA) {
-                       if(!obduuid) {
-                               printf("Has no stripe information.\n\n");
-                       }
+                       if (!obduuid)
+                               printf("Has no stripe information.\n");
                }
                else {
                }
                else {
-                       errMsg("LL_IOC_LOV_GETSTRIPE ioctl failed. %d", errno);
-                       perror("ioctl");
+                       errMsg("IOC_MDC_GETSTRIPE ioctl failed");
                }
                return;
        }
                }
                return;
        }
index eaaed71..1a1bbc9 100755 (executable)
@@ -19,9 +19,9 @@
 #
 
 """
 #
 
 """
-lmc - lustre configurtion data  manager
+lmc - lustre configuration data manager
 
 
-  See lustre book for documentation for lmc.
+  See Lustre book (http://www.lustre.org/docs/lustre.pdf) for documentation on lmc.
 
 """
 
 
 """
 
@@ -98,6 +98,10 @@ Object creation command summary:
   --path /mnt/point
   --mds mds_name
   --ost ost_name OR --lov lov_name
   --path /mnt/point
   --mds mds_name
   --ost ost_name OR --lov lov_name
+
+--add mgmt  - Management/monitoring service
+  --node node_name
+  --mgmt mgmt_service_name
 """
 
 PARAM = Lustre.Options.PARAM
 """
 
 PARAM = Lustre.Options.PARAM
@@ -123,10 +127,10 @@ lmc_options = [
     # network 
     ('nettype', "Specify the network type. This can be tcp/elan/gm/scimac.", PARAM),
     ('nid', "Give the network ID, e.g ElanID/IP Address as used by portals.", PARAM),
     # network 
     ('nettype', "Specify the network type. This can be tcp/elan/gm/scimac.", PARAM),
     ('nid', "Give the network ID, e.g ElanID/IP Address as used by portals.", PARAM),
-    ('tcpbuf', "Optional arguement to specify the TCP buffer size.", PARAM, "0"),
-    ('port', "Optional arguement to specify the TCP port number.", PARAM, DEFAULT_PORT),
-    ('nid_exchange', "Optional arguement to indicate if nid exchange should be done.", PARAM, 0),
-    ('irq_affinity', "Optional arguement.", PARAM, 0),
+    ('tcpbuf', "Optional argument to specify the TCP buffer size.", PARAM, "0"),
+    ('port', "Optional argument to specify the TCP port number.", PARAM, DEFAULT_PORT),
+    ('nid_exchange', "Optional argument to indicate if nid exchange should be done.", PARAM, 0),
+    ('irq_affinity', "Optional argument.", PARAM, 0),
     ('hostaddr', "", PARAM,""),
     ('cluster_id', "Specify the cluster ID", PARAM, "0"),
 
     ('hostaddr', "", PARAM,""),
     ('cluster_id', "Specify the cluster ID", PARAM, "0"),
 
@@ -143,12 +147,12 @@ lmc_options = [
     ('mds', "Specify MDS name.", PARAM),
     ('ost', "Specify the OST name.", PARAM,""),
     ('osdtype', "This could obdfilter or obdecho.", PARAM, "obdfilter"),
     ('mds', "Specify MDS name.", PARAM),
     ('ost', "Specify the OST name.", PARAM,""),
     ('osdtype', "This could obdfilter or obdecho.", PARAM, "obdfilter"),
-    ('failover', ""),
+    ('failover', "Enable failover support on OSTs or MDS?"),
     ('group', "", PARAM),
     ('dev', "Path of the device on local system.", PARAM,""),
     ('size', "Specify the size of the device if needed.", PARAM,"0"),
     ('journal_size', "Specify new journal size for underlying ext3 file system.", PARAM,"0"),
     ('group', "", PARAM),
     ('dev', "Path of the device on local system.", PARAM,""),
     ('size', "Specify the size of the device if needed.", PARAM,"0"),
     ('journal_size', "Specify new journal size for underlying ext3 file system.", PARAM,"0"),
-    ('fstype', "Optional arguement to specify the filesystem type.", PARAM, "ext3"),
+    ('fstype', "Optional argument to specify the filesystem type.", PARAM, "ext3"),
     ('ostuuid', "", PARAM,""),
     ('nspath', "Local mount point of server namespace.", PARAM,""),
     ('format', ""),
     ('ostuuid', "", PARAM,""),
     ('nspath', "Local mount point of server namespace.", PARAM,""),
     ('format', ""),
@@ -167,6 +171,8 @@ lmc_options = [
     # cobd
     ('real_obd', "", PARAM),
     ('cache_obd', "", PARAM),
     # cobd
     ('real_obd', "", PARAM),
     ('cache_obd', "", PARAM),
+
+    ('mgmt', "Specify management/monitoring service name.", PARAM, ""),
     ]
 
 def error(*args):
     ]
 
 def error(*args):
@@ -393,16 +399,25 @@ class GenConfig:
         mdd.appendChild(self.ref("target", mds_uuid))
         return mdd
 
         mdd.appendChild(self.ref("target", mds_uuid))
         return mdd
 
+    def mgmt(self, mgmt_name, mgmt_uuid, node_uuid):
+        mgmt = self.newService("mgmt", mgmt_name, mgmt_uuid)
+        mgmt.appendChild(self.ref("node", node_uuid))
+        # Placeholder until mgmt-service failover.
+        mgmt.appendChild(self.ref("active", mgmt_uuid))
+        return mgmt
+
     def mountpoint(self, name, uuid, fs_uuid, path):
         mtpt = self.newService("mountpoint", name, uuid)
         mtpt.appendChild(self.ref("filesystem", fs_uuid))
         self.addElement(mtpt, "path", path)
         return mtpt
 
     def mountpoint(self, name, uuid, fs_uuid, path):
         mtpt = self.newService("mountpoint", name, uuid)
         mtpt.appendChild(self.ref("filesystem", fs_uuid))
         self.addElement(mtpt, "path", path)
         return mtpt
 
-    def filesystem(self, name, uuid, mds_uuid, obd_uuid):
+    def filesystem(self, name, uuid, mds_uuid, obd_uuid, mgmt_uuid):
         fs = self.newService("filesystem", name, uuid)
         fs.appendChild(self.ref("mds", mds_uuid))
         fs.appendChild(self.ref("obd", obd_uuid))
         fs = self.newService("filesystem", name, uuid)
         fs.appendChild(self.ref("mds", mds_uuid))
         fs.appendChild(self.ref("obd", obd_uuid))
+        if mgmt_uuid:
+            fs.appendChild(self.ref("mgmt", mgmt_uuid))
         return fs
         
     def echo_client(self, name, uuid, osc_uuid):
         return fs
         
     def echo_client(self, name, uuid, osc_uuid):
@@ -660,6 +675,23 @@ def add_mds(gen, lustre, options):
     lustre.appendChild(mdd)
                    
 
     lustre.appendChild(mdd)
                    
 
+def add_mgmt(gen, lustre, options):
+    node_name = get_option(options, 'node')
+    node_uuid = name2uuid(lustre, node_name)
+    mgmt_name = get_option(options, 'mgmt')
+    if not mgmt_name:
+        mgmt_name = new_name('MGMT_' + node_name)
+    mgmt_uuid = name2uuid(lustre, mgmt_name, fatal=0)
+    if not mgmt_uuid:
+        mgmt_uuid = new_uuid(mgmt_name)
+        mgmt = gen.mgmt(mgmt_name, mgmt_uuid, node_uuid)
+        lustre.appendChild(mgmt)
+    else:
+        mgmt = lookup(lustre, mgmt_uuid)
+
+    node = findByName(lustre, node_name, "node")
+    node_add_profile(gen, node, 'mgmt', mgmt_uuid)
+
 def add_ost(gen, lustre, options):
     node_name = get_option(options, 'node')
     lovname = get_option(options, 'lov')
 def add_ost(gen, lustre, options):
     node_name = get_option(options, 'node')
     lovname = get_option(options, 'lov')
@@ -793,23 +825,27 @@ def add_lov(gen, lustre, options):
     lovconfig = gen.lovconfig(lovconfig_name, lovconfig_uuid, uuid)
     lustre.appendChild(lovconfig)
 
     lovconfig = gen.lovconfig(lovconfig_name, lovconfig_uuid, uuid)
     lustre.appendChild(lovconfig)
 
-def new_filesystem(gen, lustre, mds_uuid, obd_uuid):
+def new_filesystem(gen, lustre, mds_uuid, obd_uuid, mgmt_uuid):
     fs_name = new_name("FS_fsname")
     fs_uuid = new_uuid(fs_name)
     mds = lookup(lustre, mds_uuid)
     mds.appendChild(gen.ref("filesystem", fs_uuid))
     fs_name = new_name("FS_fsname")
     fs_uuid = new_uuid(fs_name)
     mds = lookup(lustre, mds_uuid)
     mds.appendChild(gen.ref("filesystem", fs_uuid))
-    fs = gen.filesystem(fs_name, fs_uuid, mds_uuid, obd_uuid)
+    fs = gen.filesystem(fs_name, fs_uuid, mds_uuid, obd_uuid, mgmt_uuid)
     lustre.appendChild(fs)
     return fs_uuid
 
     lustre.appendChild(fs)
     return fs_uuid
 
-def get_fs_uuid(gen, lustre, mds_name, obd_name):
+def get_fs_uuid(gen, lustre, mds_name, obd_name, mgmt_name):
     mds_uuid = name2uuid(lustre, mds_name, tag='mds')
     obd_uuid = name2uuid(lustre, obd_name, tag='lov', fatal=0)
     if not obd_uuid:
         obd_uuid = name2uuid(lustre, obd_name, tag='ost', fatal=1)
     mds_uuid = name2uuid(lustre, mds_name, tag='mds')
     obd_uuid = name2uuid(lustre, obd_name, tag='lov', fatal=0)
     if not obd_uuid:
         obd_uuid = name2uuid(lustre, obd_name, tag='ost', fatal=1)
+    if mgmt_name:
+        mgmt_uuid = name2uuid(lustre, mgmt_name, tag='mgmt', fatal=1)
+    else:
+        mgmt_uuid = ''
     fs_uuid = lookup_filesystem(lustre, mds_uuid, obd_uuid)
     if not fs_uuid:
     fs_uuid = lookup_filesystem(lustre, mds_uuid, obd_uuid)
     if not fs_uuid:
-        fs_uuid = new_filesystem(gen, lustre, mds_uuid, obd_uuid)
+        fs_uuid = new_filesystem(gen, lustre, mds_uuid, obd_uuid, mgmt_uuid)
     return fs_uuid
     
 def add_mtpt(gen, lustre, options):
     return fs_uuid
     
 def add_mtpt(gen, lustre, options):
@@ -825,7 +861,8 @@ def add_mtpt(gen, lustre, options):
             lov_name = get_option(options, 'ost')
             if lov_name == '':
                 error("--add mtpt requires either --filesystem or --mds with an  --lov lov_name or --ost ost_name")
             lov_name = get_option(options, 'ost')
             if lov_name == '':
                 error("--add mtpt requires either --filesystem or --mds with an  --lov lov_name or --ost ost_name")
-        fs_uuid = get_fs_uuid(gen, lustre, mds_name, lov_name)
+        mgmt_name = get_option(options, 'mgmt')
+        fs_uuid = get_fs_uuid(gen, lustre, mds_name, lov_name, mgmt_name)
     else:
         fs_uuid = name2uuid(lustre, fs_name, tag='filesystem')
 
     else:
         fs_uuid = name2uuid(lustre, fs_name, tag='filesystem')
 
@@ -910,6 +947,8 @@ def add(devtype, gen, lustre, options):
         add_echo_client(gen, lustre, options)
     elif devtype == 'cobd':
         add_cobd(gen, lustre, options)
         add_echo_client(gen, lustre, options)
     elif devtype == 'cobd':
         add_cobd(gen, lustre, options)
+    elif devtype == 'mgmt':
+        add_mgmt(gen, lustre, options)
     else:
         error("unknown device type:", devtype)
     
     else:
         error("unknown device type:", devtype)
     
index 4373071..8774cef 100644 (file)
@@ -214,9 +214,9 @@ main (int argc, char **argv)
                 }
         }
 
                 }
         }
 
-        free (b);
+        free(b);
 
 
-        obdio_disconnect (conn);
+        obdio_disconnect(conn, 0);
 
         return (rc == 0 ? 0 : 1);
 }
 
         return (rc == 0 ? 0 : 1);
 }
index 8264761..24b9e2d 100644 (file)
@@ -294,10 +294,10 @@ main (int argc, char **argv)
         if (conn == NULL)
                 return (1);
 
         if (conn == NULL)
                 return (1);
 
-        rc = obdio_test_fixed_extent (conn, myhid, mypid, reps, locked,
-                                      oid, base_offset, size);
+        rc = obdio_test_fixed_extent(conn, myhid, mypid, reps, locked,
+                                     oid, base_offset, size);
 
 
-        obdio_disconnect (conn);
+        obdio_disconnect(conn, 0);
 
         return (rc == 0 ? 0 : 1);
 }
 
         return (rc == 0 ? 0 : 1);
 }
index c871818..04dae88 100644 (file)
@@ -116,7 +116,7 @@ obdio_connect (int device)
 }
 
 void
 }
 
 void
-obdio_disconnect (struct obdio_conn *conn)
+obdio_disconnect (struct obdio_conn *conn, int flags)
 {
         close (conn->oc_fd);
         /* obdclass will automatically close on last ref */
 {
         close (conn->oc_fd);
         /* obdclass will automatically close on last ref */
index 3811b41..b2ec6b6 100644 (file)
@@ -48,22 +48,24 @@ struct obdio_barrier {
 };
 
 extern struct obdio_conn * obdio_connect (int device);
 };
 
 extern struct obdio_conn * obdio_connect (int device);
-extern void obdio_disconnect (struct obdio_conn *conn);
-extern int obdio_open (struct obdio_conn *conn, uint64_t oid,
+extern void obdio_disconnect(struct obdio_conn *conn, int flags);
+extern int obdio_open(struct obdio_conn *conn, uint64_t oid,
+                      struct lustre_handle *fh);
+extern int obdio_close(struct obdio_conn *conn, uint64_t oid,
                        struct lustre_handle *fh);
                        struct lustre_handle *fh);
-extern int obdio_close (struct obdio_conn *conn, uint64_t oid,
-                        struct lustre_handle *fh);
-extern int obdio_pread (struct obdio_conn *conn, uint64_t oid,
+extern int obdio_pread(struct obdio_conn *conn, uint64_t oid,
+                       char *buffer, uint32_t count, uint64_t offset);
+extern int obdio_pwrite(struct obdio_conn *conn, uint64_t oid,
                         char *buffer, uint32_t count, uint64_t offset);
                         char *buffer, uint32_t count, uint64_t offset);
-extern int obdio_pwrite (struct obdio_conn *conn, uint64_t oid,
-                         char *buffer, uint32_t count, uint64_t offset);
-extern int obdio_enqueue (struct obdio_conn *conn, uint64_t oid,
-                          int mode, uint64_t offset, uint32_t count,
-                          struct lustre_handle *lh);
-extern int obdio_cancel (struct obdio_conn *conn, struct lustre_handle *lh);
-extern void *obdio_alloc_aligned_buffer (void **spacep, int size);
-extern struct obdio_barrier *obdio_new_barrier (uint64_t oid, uint64_t id, int npeers) ;
-extern int obdio_setup_barrier (struct obdio_conn *conn, struct obdio_barrier *b);
-extern int obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b);
+extern int obdio_enqueue(struct obdio_conn *conn, uint64_t oid,
+                         int mode, uint64_t offset, uint32_t count,
+                         struct lustre_handle *lh);
+extern int obdio_cancel(struct obdio_conn *conn, struct lustre_handle *lh);
+extern void *obdio_alloc_aligned_buffer(void **spacep, int size);
+extern struct obdio_barrier *obdio_new_barrier(uint64_t oid, uint64_t id,
+                                               int npeers);
+extern int obdio_setup_barrier(struct obdio_conn *conn,
+                               struct obdio_barrier *b);
+extern int obdio_barrier(struct obdio_conn *conn, struct obdio_barrier *b);
 
 #endif
 
 #endif
index 5b6a589..86ae507 100644 (file)
@@ -518,12 +518,12 @@ main (int argc, char **argv)
        CHECK_VALUE (REINT_OPEN);
        CHECK_VALUE (REINT_MAX);
 
        CHECK_VALUE (REINT_OPEN);
        CHECK_VALUE (REINT_MAX);
 
-       CHECK_VALUE (IT_INTENT_EXEC);
-       CHECK_VALUE (IT_OPEN_LOOKUP);
-       CHECK_VALUE (IT_OPEN_NEG);
-       CHECK_VALUE (IT_OPEN_POS);
-       CHECK_VALUE (IT_OPEN_CREATE);
-       CHECK_VALUE (IT_OPEN_OPEN);
+       CHECK_VALUE (DISP_IT_EXECD);
+       CHECK_VALUE (DISP_LOOKUP_EXECD);
+       CHECK_VALUE (DISP_LOOKUP_NEG);
+       CHECK_VALUE (DISP_LOOKUP_POS);
+       CHECK_VALUE (DISP_OPEN_CREATE);
+       CHECK_VALUE (DISP_OPEN_OPEN);
 
        CHECK_VALUE (MDS_STATUS_CONN);
        CHECK_VALUE (MDS_STATUS_LOV);
 
        CHECK_VALUE (MDS_STATUS_CONN);
        CHECK_VALUE (MDS_STATUS_LOV);