Land b1_2_smallfix onto b1_2 (20040616_1009)

author adilger <adilger>

Wed, 16 Jun 2004 16:50:40 +0000 (16:50 +0000)

committer adilger <adilger>

Wed, 16 Jun 2004 16:50:40 +0000 (16:50 +0000)
author adilger <adilger>
Wed, 16 Jun 2004 16:50:40 +0000 (16:50 +0000)
committer adilger <adilger>
Wed, 16 Jun 2004 16:50:40 +0000 (16:50 +0000)
diff --git a/ldiskfs/ldiskfs/autoMakefile.am b/ldiskfs/ldiskfs/autoMakefile.am

index f81e6e7..eacc902 100644 (file)
--- a/ldiskfs/ldiskfs/autoMakefile.am
+++ b/ldiskfs/ldiskfs/autoMakefile.am
@@ -33,10 +33,17 @@ patches := @top_srcdir@/kernel_patches/patches
  sources: $(ext3_sources) $(ext3_headers) $(linux_headers) $(series)
         rm -rf linux-stage linux sources $(ldiskfs_SOURCES)
         mkdir -p linux-stage/fs/ext3 linux-stage/include/linux
-       cd linux-stage && quilt setup -l ../$(series) -d ../$(patches)
         cp $(ext3_sources) $(ext3_headers) $(ext3_extra) linux-stage/fs/ext3
         cp $(linux_headers) linux-stage/include/linux
+if USE_QUILT
+       cd linux-stage && quilt setup -l ../$(series) -d ../$(patches)
         cd linux-stage && quilt push -a -q
+else
+       @cd linux-stage && for i in $$(<../$(series)) ; do \
+               echo "patch -p1 < ../$(patches)/$$i" ; \
+               patch -p1 < ../$(patches)/$$i || exit 1 ; \
+       done
+endif
         mkdir linux
         @echo -n "Replacing 'ext3' with 'ldiskfs':"
         @for i in $(notdir $(ext3_headers) $(ext3_sources)) $(new_sources) ; do \
@@ -50,6 +57,7 @@ sources: $(ext3_sources) $(ext3_headers) $(linux_headers) $(series)
                         linux-stage/include/linux/ext3$$i \
                         > linux/ldiskfs$$i ; \
         done
+       @echo
         touch sources
  
  foo-check:
diff --git a/lnet/archdep.m4 b/lnet/archdep.m4

index 2a42368..c78fc34 100644 (file)
--- a/lnet/archdep.m4
+++ b/lnet/archdep.m4
@@ -92,6 +92,7 @@ AC_CHECK_FILE([$LINUX/include/linux/namei.h],
         [
                 linux25="yes"
                 KMODEXT=".ko"
+               enable_ldiskfs="yes"
         ],[
                 KMODEXT=".o"
                 linux25="no"
@@ -101,6 +102,16 @@ AC_MSG_RESULT([$linux25])
  AM_CONDITIONAL(LINUX25, test x$linux25 = xyes)
  AC_SUBST(KMODEXT)
  
+AC_PATH_PROG(PATCH, patch, [no])
+AC_PATH_PROG(QUILT, quilt, [no])
+AM_CONDITIONAL(USE_QUILT, test x$QUILT = xno)
+
+if test x$enable_ldiskfs$enable_modules = xyesyes ; then
+       if test x$PATCH$QUILT = xnono ; then
+               AC_MSG_ERROR([Quilt or patch are needed to build the ldiskfs module (for Linux 2.6)])
+       fi
+fi
+
  # -------  Makeflags ------------------
  
  CPPFLAGS="$CRAY_PORTALS_INCLUDE $CRAY_PORTALS_COMMANDLINE -I\$(top_srcdir)/include -I\$(top_srcdir)/portals/include"
@@ -135,7 +146,7 @@ _ACEOF
  AC_DEFUN([LUSTRE_MODULE_COMPILE_IFELSE],
  [m4_ifvaln([$1], [LUSTRE_MODULE_CONFTEST([$1])])dnl
  rm -f kernel-tests/conftest.o kernel-tests/conftest.mod.c kernel-tests/conftest.ko
-AS_IF([AC_TRY_COMMAND(cp conftest.c kernel-tests && make [$2] -f $PWD/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$LINUX_CONFIG -o tmp_include_depends -o scripts -o include/config/MARKER -C $LINUX EXTRA_CFLAGS="$EXTRA_KCFLAGS" $ARCH_UM SUBDIRS=$PWD/kernel-tests) >/dev/null && AC_TRY_COMMAND([$3])],
+AS_IF([AC_TRY_COMMAND(cp conftest.c kernel-tests && make [$2] -f $PWD/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$LINUX_CONFIG -o tmp_include_depends -o scripts -o include/config/MARKER -C $LINUX EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM SUBDIRS=$PWD/kernel-tests) >/dev/null && AC_TRY_COMMAND([$3])],
         [$4],
         [_AC_MSG_LOG_CONFTEST
  m4_ifvaln([$5],[$5])dnl])dnl
@@ -446,7 +457,7 @@ LUSTRE_MODULE_TRY_COMPILE(
  # ---------- Red Hat 2.4.20 backports some 2.5 bits --------
  # This needs to run after we've defined the KCPPFLAGS
  
-AC_MSG_CHECKING([for kernel version])
+AC_MSG_CHECKING([if task_struct has a sighand field])
  LUSTRE_MODULE_TRY_COMPILE(
         [
                 #include <linux/sched.h>
@@ -455,9 +466,24 @@ LUSTRE_MODULE_TRY_COMPILE(
                 p.sighand = NULL;
         ],[
                 AC_DEFINE(CONFIG_RH_2_4_20, 1, [this kernel contains Red Hat 2.4.20 patches])
-               AC_MSG_RESULT([redhat-2.4.20])
+               AC_MSG_RESULT([yes])
         ],[
-               AC_MSG_RESULT([$LINUXRELEASE])
+               AC_MSG_RESULT([no])
+       ])
+
+# ---------- 2.4.20 introduced cond_resched --------------
+
+AC_MSG_CHECKING([if kernel offers cond_resched])
+LUSTRE_MODULE_TRY_COMPILE(
+       [
+               #include <linux/sched.h>
+       ],[
+               cond_resched();
+       ],[
+               AC_MSG_RESULT([yes])
+               AC_DEFINE(HAVE_COND_RESCHED, 1, [cond_resched found])
+       ],[
+               AC_MSG_RESULT([no])
         ])
  
  # ---------- Red Hat 2.4.21 backports some more 2.5 bits --------
diff --git a/lnet/autoMakefile.am b/lnet/autoMakefile.am

index bd57e6e..485ff04 100644 (file)
--- a/lnet/autoMakefile.am
+++ b/lnet/autoMakefile.am
@@ -3,6 +3,6 @@
  # This code is issued under the GNU General Public License.
  # See the file COPYING in this distribution
  
-EXTRA_DIST = archdep.m4 build.m4 include 
+EXTRA_DIST = archdep.m4 build.m4
  
-SUBDIRS = portals libcfs knals unals router tests doc utils
+SUBDIRS = portals libcfs knals unals router tests doc utils include
diff --git a/lnet/include/.cvsignore b/lnet/include/.cvsignore

index d45f796..94d3790 100644 (file)
--- a/lnet/include/.cvsignore
+++ b/lnet/include/.cvsignore
@@ -2,3 +2,5 @@ config.h
  stamp-h
  stamp-h1
  stamp-h.in
+Makefile
+Makefile.in
diff --git a/lnet/include/Makefile.am b/lnet/include/Makefile.am

new file mode 100644 (file)

index 0000000..2b3eb8c
--- /dev/null
+++ b/lnet/include/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = linux portals
+
+EXTRA_DIST = cygwin-ioctl.h
diff --git a/lnet/include/linux/.cvsignore b/lnet/include/linux/.cvsignore

new file mode 100644 (file)

index 0000000..282522d
--- /dev/null
+++ b/lnet/include/linux/.cvsignore
@@ -0,0 +1,2 @@
+Makefile
+Makefile.in
diff --git a/lnet/include/linux/Makefile.am b/lnet/include/linux/Makefile.am

new file mode 100644 (file)

index 0000000..3c28c6e
--- /dev/null
+++ b/lnet/include/linux/Makefile.am
@@ -0,0 +1,4 @@
+linuxdir = $(includedir)/linux
+
+EXTRA_DIST = kp30.h kpr.h libcfs.h lustre_list.h portals_compat25.h    \
+       portals_lib.h
diff --git a/lnet/include/linux/libcfs.h b/lnet/include/linux/libcfs.h

index efdc8fe..6772e82 100644 (file)
--- a/lnet/include/linux/libcfs.h
+++ b/lnet/include/linux/libcfs.h
@@ -2,7 +2,7 @@
   * vim:expandtab:shiftwidth=8:tabstop=8:
   */
  #ifndef _LIBCFS_H
-
+#define _LIBCFS_H
  
  #define PORTAL_DEBUG
  
diff --git a/lnet/include/lnet/.cvsignore b/lnet/include/lnet/.cvsignore

new file mode 100644 (file)

index 0000000..282522d
--- /dev/null
+++ b/lnet/include/lnet/.cvsignore
@@ -0,0 +1,2 @@
+Makefile
+Makefile.in
diff --git a/lnet/include/lnet/Makefile.am b/lnet/include/lnet/Makefile.am

new file mode 100644 (file)

index 0000000..5ed6090
--- /dev/null
+++ b/lnet/include/lnet/Makefile.am
@@ -0,0 +1,10 @@
+portalsdir=$(includedir)/portals
+
+if UTILS
+portals_HEADERS = list.h
+endif
+
+EXTRA_DIST = api.h api-support.h arg-blocks.h defines.h errno.h                \
+       internal.h lib-dispatch.h lib-nal.h lib-p30.h lib-types.h       \
+       list.h lltrace.h myrnal.h nal.h nalids.h p30.h ppid.h ptlctl.h  \
+       socknal.h stringtab.h types.h
diff --git a/lnet/include/lnet/types.h b/lnet/include/lnet/types.h

index 74ef493..80995e9 100644 (file)
--- a/lnet/include/lnet/types.h
+++ b/lnet/include/lnet/types.h
@@ -1,26 +1,15 @@
  #ifndef _P30_TYPES_H_
  #define _P30_TYPES_H_
  
-#ifdef __linux__
-# include <asm/types.h>
-# if defined(__powerpc__) && !defined(__KERNEL__)
-#  define __KERNEL__
-#  include <asm/timex.h>
-#  undef __KERNEL__
-# else
-#  include <asm/timex.h>
-# endif
-#else
-# include <sys/types.h>
-typedef u_int32_t __u32;
-typedef u_int64_t __u64;
-#endif
+#include <asm/types.h>
  
  #ifdef __KERNEL__
  # include <linux/time.h>
+# include <asm/timex.h>
  #else
  # include <sys/time.h>
  # define do_gettimeofday(tv) gettimeofday(tv, NULL);
+typedef unsigned long long cycles_t;
  #endif
  
  #include <portals/errno.h>
diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c

index 6bff730..08453a0 100644 (file)
--- a/lnet/klnds/qswlnd/qswlnd_cb.c
+++ b/lnet/klnds/qswlnd/qswlnd_cb.c
@@ -585,7 +585,7 @@ kqswnal_launch (kqswnal_tx_t *ktx)
          /* Don't block for transmit descriptor if we're in interrupt context */
          int   attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0;
          int   dest = kqswnal_nid2elanid (ktx->ktx_nid);
-        long  flags;
+        unsigned long flags;
          int   rc;
  
          ktx->ktx_launchtime = jiffies;
@@ -1429,7 +1429,7 @@ kqswnal_rx (kqswnal_rx_t *krx)
  void 
  kqswnal_rxhandler(EP_RXD *rxd)
  {
-        long          flags;
+        unsigned long flags;
          int           nob    = ep_rxd_len (rxd);
          int           status = ep_rxd_status (rxd);
          kqswnal_rx_t *krx    = (kqswnal_rx_t *)ep_rxd_arg (rxd);
@@ -1732,7 +1732,7 @@ kqswnal_scheduler (void *arg)
          kqswnal_rx_t    *krx;
          kqswnal_tx_t    *ktx;
          kpr_fwd_desc_t  *fwd;
-        long             flags;
+        unsigned long    flags;
          int              rc;
          int              counter = 0;
          int              shuttingdown = 0;
diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c

index f02cbda..37695c9 100644 (file)
--- a/lnet/klnds/socklnd/socklnd_cb.c
+++ b/lnet/klnds/socklnd/socklnd_cb.c
@@ -1187,7 +1187,7 @@ ksocknal_fmb_callback (void *arg, int error)
  {
          ksock_fmb_t       *fmb = (ksock_fmb_t *)arg;
          ksock_fmb_pool_t  *fmp = fmb->fmb_pool;
-        ptl_hdr_t         *hdr = (ptl_hdr_t *)page_address(fmb->fmb_kiov[0].kiov_page);
+        ptl_hdr_t         *hdr = &fmb->fmb_hdr;
          ksock_conn_t      *conn = NULL;
          ksock_sched_t     *sched;
          unsigned long      flags;
diff --git a/lnet/ulnds/Makefile.am b/lnet/ulnds/Makefile.am

index 4c842a1..15080b0 100644 (file)
--- a/lnet/ulnds/Makefile.am
+++ b/lnet/ulnds/Makefile.am
@@ -2,7 +2,12 @@ if LIBLUSTRE
  noinst_LIBRARIES = libtcpnal.a
  endif
  
-pkginclude_HEADERS =  pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h
-libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h
+noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h     \
+       ipmap.h bridge.h procbridge.h
+
+libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h     \
+       dispatch.h table.h timer.h address.c procapi.c proclib.c        \
+       connection.c tcpnal.c connection.h
+
  libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS)
  libtcpnal_a_CFLAGS = $(LLCFLAGS)
diff --git a/lnet/ulnds/socklnd/Makefile.am b/lnet/ulnds/socklnd/Makefile.am

index 4c842a1..15080b0 100644 (file)
--- a/lnet/ulnds/socklnd/Makefile.am
+++ b/lnet/ulnds/socklnd/Makefile.am
@@ -2,7 +2,12 @@ if LIBLUSTRE
  noinst_LIBRARIES = libtcpnal.a
  endif
  
-pkginclude_HEADERS =  pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h
-libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h
+noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h     \
+       ipmap.h bridge.h procbridge.h
+
+libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h     \
+       dispatch.h table.h timer.h address.c procapi.c proclib.c        \
+       connection.c tcpnal.c connection.h
+
  libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS)
  libtcpnal_a_CFLAGS = $(LLCFLAGS)
diff --git a/lnet/utils/Makefile.am b/lnet/utils/Makefile.am

index 15c1774..851a8e1 100644 (file)
--- a/lnet/utils/Makefile.am
+++ b/lnet/utils/Makefile.am
@@ -14,8 +14,10 @@ libuptlctl_a_CPPFLAGS = $(LLCPPFLAGS)
  libuptlctl_a_CFLAGS = $(LLCFLAGS)
  endif
  
+if UTILS
  sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck gmnalnid
  lib_LIBRARIES = libptlctl.a
+endif
  
  acceptor_SOURCES = acceptor.c
  
diff --git a/lustre/ChangeLog b/lustre/ChangeLog

index df7d863..30da8bf 100644 (file)
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -5,13 +5,22 @@ tbd  Cluster File Systems, Inc. <info@clusterfs.com>
         - strip trailing '/'s before comparing paths with /proc/mounts (3486)
         - remove assertions to work around "in-flight rpcs" recovery bug (3063)
         - change init script to fail more clearly if not run as root (1528)
+       - allow clients to reconnect during replay (1742)
         - fix ns_lock/i_sem lock ordering deadlock for kms update (3477)
         - don't do DNS lookups on NIDs too small for IP addresses (3442)
+       - re-awaken ptlrpcd if new requests arrive during check_set  (3554)
+       - fix cond_resched  (3554)
+       - only evict unfinished clients after recovery   (3515)
+       - allow bulk resend, prevent data loss (3570)
         - dynamic ptlrpc request buffer allocation (2102)
         - don't allow unlinking open directory if it isn't empty (2904)
         - set MDS/OST threads to umask 0 to not clobber client modes (3359)
+       - remove extraneous obd dereference causing LASSERT failure (3334)
+       - don't use get_cycles() when creating temp. files on the mds (3156)
+       - hold i_sem when setting i_size in ll_extent_lock() (3564)
         * miscellania
         - servers can dump a log evicting a client - lustre.dump_on_timeout=1
+       - fix ksocknal_fmb_callback() error messages (2918)
  
  2004-05-27  Cluster File Systems, Inc. <info@clusterfs.com>
         * version 1.2.2
diff --git a/lustre/conf/Makefile.am b/lustre/conf/Makefile.am

index 6e3666b..627f2a2 100644 (file)
--- a/lustre/conf/Makefile.am
+++ b/lustre/conf/Makefile.am
@@ -6,7 +6,10 @@
  EXTRA_DIST = lustre.dtd lustre.schema slapd-lustre.conf lustre2ldif.xsl top.ldif
  ldapconfdir = $(sysconfdir)/openldap
  ldapschemadir = $(sysconfdir)/openldap/schema
+pkglibdir = '${exec_prefix}/usr/lib/$(PACKAGE)'
+
+if UTILS
  ldapconf_SCRIPTS = slapd-lustre.conf
  ldapschema_SCRIPTS = lustre.schema
-pkglibdir = '${exec_prefix}/usr/lib/$(PACKAGE)'
  pkglib_DATA = top.ldif lustre2ldif.xsl
+endif
diff --git a/lustre/configure.in b/lustre/configure.in

index 7b14e69..99a1347 100644 (file)
--- a/lustre/configure.in
+++ b/lustre/configure.in
@@ -5,7 +5,7 @@
  
  AC_INIT
  AC_CANONICAL_SYSTEM
-AM_INIT_AUTOMAKE(lustre, 1.2.2.3)
+AM_INIT_AUTOMAKE(lustre, 1.2.2.4)
  # AM_MAINTAINER_MODE
  
  # Four main targets: lustre kernel modules, utilities, tests, and liblustre
@@ -77,7 +77,6 @@ AC_ARG_ENABLE([ldiskfs],
                         [use ldiskfs for the Lustre backing FS]),
         [BACKINGFS='ldiskfs'],[enable_ldiskfs='no'])
  AC_MSG_RESULT([$enable_ldiskfs])
-AM_CONDITIONAL(LDISKFS, test x$enable_ldiskfs = xyes)
  
  AC_MSG_CHECKING([which backing filesystem to use])
  AC_MSG_RESULT([$BACKINGFS])
@@ -158,6 +157,8 @@ AM_CONDITIONAL(SMFS, test x$enable_smfs = xyes)
  sinclude(portals/build.m4)
  sinclude(portals/archdep.m4)
  
+AM_CONDITIONAL(LDISKFS, test x$enable_ldiskfs = xyes)
+
  if test x$enable_inkernel = xyes ; then
         find . -name Makefile.mk | sed 's/.mk$//' | xargs -n 1 \
                 sh -e -x -c '(cp -f $0.mk $0.in)'
@@ -217,6 +218,9 @@ portals/Kernelenv
  portals/Makefile
  portals/autoMakefile
  portals/doc/Makefile
+portals/include/Makefile
+portals/include/linux/Makefile
+portals/include/portals/Makefile
  portals/knals/Makefile
  portals/knals/autoMakefile
  portals/knals/gmnal/Makefile
diff --git a/lustre/include/linux/Makefile.am b/lustre/include/linux/Makefile.am

index cb75fe5..4c67b12 100644 (file)
--- a/lustre/include/linux/Makefile.am
+++ b/lustre/include/linux/Makefile.am
@@ -3,6 +3,12 @@
  # This code is issued under the GNU General Public License.
  # See the file COPYING in this distribution
  
+linuxdir = $(includedir)/linux
+
+if UTILS
+linux_HEADERS = lustre_idl.h
+endif
+
  EXTRA_DIST = lprocfs_status.h lustre_debug.h lustre_ha.h lustre_lib.h \
    lustre_mgmt.h obd_cache.h obd_lov.h lustre_dlm.h lustre_handles.h \
    lustre_net.h obd_class.h obd_ost.h obd_support.h lustre_commit_confd.h \
diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h

index 13363bd..b9a295e 100644 (file)
--- a/lustre/include/linux/lustre_compat25.h
+++ b/lustre/include/linux/lustre_compat25.h
@@ -102,10 +102,6 @@ static inline int cleanup_group_info(void)
  
  #define smp_num_cpus    NR_CPUS
  
-#ifndef conditional_schedule
-#define conditional_schedule() cond_resched()
-#endif
-
  #include <linux/proc_fs.h>
  
  #else /* 2.4.. */
@@ -183,8 +179,14 @@ static inline int cleanup_group_info(void)
          return 0;
  }
  
-#ifndef conditional_schedule
-#define conditional_schedule() if (unlikely(need_resched())) schedule()
+#ifndef HAVE_COND_RESCHED
+static inline void cond_resched(void)
+{
+        if (unlikely(need_resched())) {
+                set_current_state(TASK_RUNNING);
+                schedule();
+        }
+}
  #endif
  
  /* to find proc_dir_entry from inode. 2.6 has native one -bzzz */
diff --git a/lustre/include/linux/lustre_export.h b/lustre/include/linux/lustre_export.h

index 9be781f..52b5c7a 100644 (file)
--- a/lustre/include/linux/lustre_export.h
+++ b/lustre/include/linux/lustre_export.h
@@ -73,8 +73,9 @@ struct obd_export {
          spinlock_t                exp_lock; /* protects flags int below */
          /* ^ protects exp_outstanding_replies too */
          int                       exp_flags;
-        int                       exp_failed:1;
-        int                       exp_libclient:1; /* liblustre client? */
+        int                       exp_failed:1,
+                                  exp_replay_needed:1,
+                                  exp_libclient:1; /* liblustre client? */
          union {
                  struct mds_export_data    eu_mds_data;
                  struct filter_export_data eu_filter_data;
diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h

index 72f3817..b9beff5 100644 (file)
--- a/lustre/include/linux/lustre_fsfilt.h
+++ b/lustre/include/linux/lustre_fsfilt.h
@@ -132,7 +132,7 @@ static inline void *fsfilt_brw_start_log(struct obd_device *obd,
          void *parent_handle = oti ? oti->oti_handle : NULL;
          void *handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount, nb,
                                                      parent_handle, logs);
-        CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle);
+        CDEBUG(D_INFO, "started handle %p (%p)\n", handle, parent_handle);
  
          if (oti != NULL) {
                  if (parent_handle == NULL) {
@@ -177,7 +177,7 @@ static inline int fsfilt_commit_async(struct obd_device *obd,
          unsigned long now = jiffies;
          int rc = obd->obd_fsops->fs_commit_async(inode, handle, wait_handle);
  
-        CDEBUG(D_HA, "committing handle %p (async)\n", *wait_handle);
+        CDEBUG(D_INFO, "committing handle %p (async)\n", *wait_handle);
          if (time_after(jiffies, now + 15 * HZ))
                  CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
  
@@ -189,7 +189,7 @@ static inline int fsfilt_commit_wait(struct obd_device *obd,
  {
          unsigned long now = jiffies;
          int rc = obd->obd_fsops->fs_commit_wait(inode, handle);
-        CDEBUG(D_HA, "waiting for completion %p\n", handle);
+        CDEBUG(D_INFO, "waiting for completion %p\n", handle);
          if (time_after(jiffies, now + 15 * HZ))
                  CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
          return rc;
diff --git a/lustre/include/linux/lustre_import.h b/lustre/include/linux/lustre_import.h

index d2af141..74be113 100644 (file)
--- a/lustre/include/linux/lustre_import.h
+++ b/lustre/include/linux/lustre_import.h
@@ -83,7 +83,7 @@ struct obd_import {
          int                       imp_invalid:1, imp_replayable:1,
                                    imp_dlm_fake:1, imp_server_timeout:1,
                                    imp_initial_recov:1, imp_force_verify:1,
-                                  imp_pingable:1;
+                                  imp_pingable:1, imp_resend_replay:1;
          __u32                     imp_connect_op;
  };
  
diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h

index a529860..c55e5ff 100644 (file)
--- a/lustre/include/linux/lustre_lib.h
+++ b/lustre/include/linux/lustre_lib.h
@@ -493,13 +493,9 @@ static inline void ost_checksum(obd_count *cksum, void *addr, int len)
  
  static inline int ll_insecure_random_int(void)
  {
-#ifdef __arch_um__
          struct timeval t;
          do_gettimeofday(&t);
          return (int)(t.tv_usec);
-#else
-        return (int)(get_cycles() >> 2);
-#endif
  }
  
  /*
diff --git a/lustre/include/linux/lustre_log.h b/lustre/include/linux/lustre_log.h

index 1d0ff9f..3eb75da 100644 (file)
--- a/lustre/include/linux/lustre_log.h
+++ b/lustre/include/linux/lustre_log.h
@@ -127,8 +127,8 @@ int obd_llog_finish(struct obd_device *obd, int count);
  
  /* llog_ioctl.c */
  int llog_ioctl(struct llog_ctxt *ctxt, int cmd, struct obd_ioctl_data *data);
-int llog_catlog_list(struct obd_device *obd, int count,
-                     struct obd_ioctl_data *data);
+int llog_catalog_list(struct obd_device *obd, int count,
+                      struct obd_ioctl_data *data);
  
  /* llog_net.c */
  int llog_initiator_connect(struct llog_ctxt *ctxt);
diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h

index 1a577f0..8f2f9e2 100644 (file)
--- a/lustre/include/linux/obd_class.h
+++ b/lustre/include/linux/obd_class.h
@@ -137,6 +137,7 @@ int class_connect(struct lustre_handle *conn, struct obd_device *obd,
                    struct obd_uuid *cluuid);
  int class_disconnect(struct obd_export *exp, int failover);
  void class_disconnect_exports(struct obd_device *obddev, int failover);
+void class_disconnect_stale_exports(struct obd_device *obddev, int failover);
  /* generic operations shared by various OBD types */
  int class_multi_setup(struct obd_device *obddev, uint32_t len, void *data);
  int class_multi_cleanup(struct obd_device *obddev);
diff --git a/lustre/include/lustre/Makefile.am b/lustre/include/lustre/Makefile.am

index a785ada..6faa7cd 100644 (file)
--- a/lustre/include/lustre/Makefile.am
+++ b/lustre/include/lustre/Makefile.am
@@ -3,7 +3,8 @@
  # This code is issued under the GNU General Public License.
  # See the file COPYING in this distribution
  
-
+if UTILS
  pkginclude_HEADERS = lustre_user.h liblustreapi.h
+endif
  
-EXTRA_DIST = $(pkginclude_HEADERS)
+EXTRA_DIST = lustre_user.h liblustreapi.h
diff --git a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-chaos.patch b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-chaos.patch

new file mode 100644 (file)

index 0000000..431bdc7
--- /dev/null
+++ b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-chaos.patch
@@ -0,0 +1,323 @@
+Index: linux-p4smp/arch/i386/kernel/entry.S
+===================================================================
+--- linux-p4smp.orig/arch/i386/kernel/entry.S  2004-06-14 13:13:07.000000000 -0700
++++ linux-p4smp/arch/i386/kernel/entry.S       2004-06-14 13:14:19.000000000 -0700
+@@ -46,6 +46,7 @@
+ #include <asm/segment.h>
+ #include <asm/page.h>
+ #include <asm/smp.h>
++#include <asm/current.h>
+ #include <asm/unistd.h>
+       
+ EBX           = 0x00
+@@ -94,10 +95,6 @@ pt_sys_exit = 8
+ 
+ ENOSYS = 38
+ 
+-#define GET_CURRENT(reg) \
+-      movl $-8192, reg; \
+-      andl %esp, reg
+-
+ #if CONFIG_X86_HIGH_ENTRY
+ 
+ #define call_SYMBOL_NAME_ABS(X) movl $X, %ebp; call *%ebp
+@@ -193,7 +190,7 @@ ENOSYS = 38
+       GET_CURRENT(%ebx);                              \
+       movl real_stack(%ebx), %edx;                    \
+       movl %esp, %ebx;                                \
+-      andl $0x1fff, %ebx;                             \
++      andl $(THREAD_SIZE-1), %ebx;                    \
+       orl %ebx, %edx;                                 \
+       movl %edx, %esp;
+ 
+@@ -228,7 +225,7 @@ ENOSYS = 38
+ return_path_start_marker:                             \
+       nop;                                            \
+       movl %esp, %ebx;                                \
+-      andl $0x1fff, %ebx;                             \
++      andl $(THREAD_SIZE-1), %ebx;                    \
+       orl %ebx, %edx;                                 \
+       movl %esp, %eax;                                \
+       movl %edx, %esp;                                \
+Index: linux-p4smp/arch/i386/kernel/smpboot.c
+===================================================================
+--- linux-p4smp.orig/arch/i386/kernel/smpboot.c        2004-06-14 13:13:07.000000000 -0700
++++ linux-p4smp/arch/i386/kernel/smpboot.c     2004-06-14 13:14:19.000000000 -0700
+@@ -814,7 +814,7 @@ static void __init do_boot_cpu (int apic
+ 
+       /* So we see what's up   */
+       printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+-      stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
++      stack_start.esp = (void *)idle->thread.esp;
+ 
+       /*
+        * This grunge runs the startup process for
+@@ -887,7 +887,7 @@ static void __init do_boot_cpu (int apic
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+-                      if (*((volatile unsigned char *)phys_to_virt(8192))
++                      if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE))
+                                       == 0xA5)
+                               /* trampoline started but...? */
+                               printk("Stuck ??\n");
+@@ -910,7 +910,7 @@ static void __init do_boot_cpu (int apic
+       }
+ 
+       /* mark "stuck" area as not stuck */
+-      *((volatile unsigned long *)phys_to_virt(8192)) = 0;
++      *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0;
+ 
+       if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
+               printk("Restoring NMI vector\n");
+Index: linux-p4smp/arch/i386/kernel/traps.c
+===================================================================
+--- linux-p4smp.orig/arch/i386/kernel/traps.c  2004-06-14 13:13:07.000000000 -0700
++++ linux-p4smp/arch/i386/kernel/traps.c       2004-06-14 13:14:19.000000000 -0700
+@@ -273,7 +273,7 @@ void show_trace_task(struct task_struct 
+       unsigned long esp = tsk->thread.esp;
+ 
+       /* User space on another CPU? */
+-      if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
++      if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1))
+               return;
+       show_trace((unsigned long *)esp);
+ }
+Index: linux-p4smp/arch/i386/kernel/head.S
+===================================================================
+--- linux-p4smp.orig/arch/i386/kernel/head.S   2004-06-14 13:13:07.000000000 -0700
++++ linux-p4smp/arch/i386/kernel/head.S        2004-06-14 13:14:19.000000000 -0700
+@@ -15,6 +15,7 @@
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ #include <asm/desc.h>
++#include <asm/current.h>
+ 
+ #define OLD_CL_MAGIC_ADDR     0x90020
+ #define OLD_CL_MAGIC          0xA33F
+@@ -328,7 +329,7 @@ rp_sidt:
+       ret
+ 
+ ENTRY(stack_start)
+-      .long SYMBOL_NAME(init_task_union)+8192
++      .long SYMBOL_NAME(init_task_union)+THREAD_SIZE
+       .long __KERNEL_DS
+ 
+ /* This is the default interrupt "handler" :-) */
+Index: linux-p4smp/arch/i386/kernel/irq.c
+===================================================================
+--- linux-p4smp.orig/arch/i386/kernel/irq.c    2004-06-14 13:13:07.000000000 -0700
++++ linux-p4smp/arch/i386/kernel/irq.c 2004-06-14 13:14:19.000000000 -0700
+@@ -45,6 +45,7 @@
+ #include <asm/delay.h>
+ #include <asm/desc.h>
+ #include <asm/irq.h>
++#include <asm/current.h>
+ 
+ 
+ 
+@@ -585,7 +586,7 @@ asmlinkage unsigned int do_IRQ(struct pt
+       long esp;
+ 
+       /* Debugging check for stack overflow: is there less than 1KB free? */
+-      __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191));
++      __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (THREAD_SIZE-1));
+       if (unlikely(esp < (sizeof(struct task_struct) + 1024))) {
+               extern void show_stack(unsigned long *);
+ 
+Index: linux-p4smp/arch/i386/lib/getuser.S
+===================================================================
+--- linux-p4smp.orig/arch/i386/lib/getuser.S   2004-06-14 13:13:07.000000000 -0700
++++ linux-p4smp/arch/i386/lib/getuser.S        2004-06-14 13:14:19.000000000 -0700
+@@ -21,6 +21,10 @@
+  * as they get called from within inline assembly.
+  */
+ 
++/* Duplicated from asm/processor.h */
++#include <asm/current.h>
++#include <linux/config.h>
++
+ addr_limit = 12
+ 
+ .text
+@@ -28,7 +32,7 @@ addr_limit = 12
+ .globl __get_user_1
+ __get_user_1:
+       movl %esp,%edx
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 1:    movzbl (%eax),%edx
+@@ -41,7 +45,7 @@ __get_user_2:
+       addl $1,%eax
+       movl %esp,%edx
+       jc bad_get_user
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 2:    movzwl -1(%eax),%edx
+@@ -54,7 +58,7 @@ __get_user_4:
+       addl $3,%eax
+       movl %esp,%edx
+       jc bad_get_user
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 3:    movl -3(%eax),%edx
+Index: linux-p4smp/arch/i386/config.in
+===================================================================
+--- linux-p4smp.orig/arch/i386/config.in       2004-06-14 13:13:07.000000000 -0700
++++ linux-p4smp/arch/i386/config.in    2004-06-14 13:14:05.000000000 -0700
+@@ -310,6 +310,28 @@ if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86
+    define_bool CONFIG_HAVE_DEC_LOCK y
+ fi
+ 
++choice 'Bigger Stack Size Support' \
++     "off    CONFIG_NOBIGSTACK \
++      16KB   CONFIG_STACK_SIZE_16KB \
++      32KB   CONFIG_STACK_SIZE_32KB \
++      64KB   CONFIG_STACK_SIZE_64KB" off
++
++if [ "$CONFIG_NOBIGSTACK" = "y" ]; then
++   define_int CONFIG_STACK_SIZE_SHIFT 1
++else
++  if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then
++     define_int CONFIG_STACK_SIZE_SHIFT 2
++  else
++    if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then
++      define_int CONFIG_STACK_SIZE_SHIFT 3
++    else
++      if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then
++        define_int CONFIG_STACK_SIZE_SHIFT 4
++      fi
++    fi
++  fi
++fi
++
+ source drivers/perfctr/Config.in
+ 
+ endmenu
+Index: linux-p4smp/include/asm-i386/current.h
+===================================================================
+--- linux-p4smp.orig/include/asm-i386/current.h        2004-06-14 13:13:07.000000000 -0700
++++ linux-p4smp/include/asm-i386/current.h     2004-06-14 13:41:19.000000000 -0700
+@@ -1,15 +1,64 @@
+ #ifndef _I386_CURRENT_H
+ #define _I386_CURRENT_H
++#include <asm/page.h>
++
++/*
++ * Configurable page sizes on i386, mainly for debugging purposes.
++ * (c) Balbir Singh
++ */
++
++/* enumerate the values, include/asm-i386/hw_irq.h in particular needs this */
++#if (PAGE_SIZE != 4096)
++#error PAGE_SIZE != 4096 unsupported
++#endif
++
++#if (CONFIG_STACK_SIZE_SHIFT == 0)
++#define THREAD_SIZE   4096
++#elif (CONFIG_STACK_SIZE_SHIFT == 1)
++#define THREAD_SIZE   8192
++#elif (CONFIG_STACK_SIZE_SHIFT == 2)
++#define THREAD_SIZE   16384
++#elif (CONFIG_STACK_SIZE_SHIFT == 3)
++#define THREAD_SIZE   32768
++#elif (CONFIG_STACK_SIZE_SHIFT == 4)
++#define THREAD_SIZE   65536
++#else
++#error CONFIG_STACK_SIZE_SHIFT > 4 unsupported
++#endif
++
++#if (CONFIG_STACK_SIZE_SHIFT != 1) && defined(CONFIG_X86_4G)
++#error Large stacks with 4G/4G split unsupported
++#endif
++
++#ifdef __ASSEMBLY__
++
++#define GET_CURRENT(reg) \
++      movl $-THREAD_SIZE, reg; \
++      andl %esp, reg
++
++#else /* __ASSEMBLY__ */
++
++#define __alloc_task_struct() \
++  ((struct task_struct *) __get_free_pages(GFP_KERNEL, CONFIG_STACK_SIZE_SHIFT))
++
++#define __free_task_struct(p) do { \
++  BUG_ON((p)->state < TASK_ZOMBIE); \
++  free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT); \
++} while(0)
++
++#define INIT_TASK_SIZE THREAD_SIZE
+ 
+ struct task_struct;
+ 
+ static inline struct task_struct * get_current(void)
+ {
+       struct task_struct *current;
+-      __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
++      __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1)));
+       return current;
+  }
+  
+ #define current get_current()
+ 
++#endif /* __ASSEMBLY__ */
++
+ #endif /* !(_I386_CURRENT_H) */
+Index: linux-p4smp/include/asm-i386/hw_irq.h
+===================================================================
+--- linux-p4smp.orig/include/asm-i386/hw_irq.h 2004-06-14 13:13:07.000000000 -0700
++++ linux-p4smp/include/asm-i386/hw_irq.h      2004-06-14 13:14:19.000000000 -0700
+@@ -136,21 +136,17 @@ extern char _stext, _etext;
+       "                                                               \
+       /* load the real stack - keep the offset */                     \
+                                                                       \
+-      movl $-8192, %ebx;                                              \
++      movl $- " STR(THREAD_SIZE) ", %ebx;                             \
+       andl %esp, %ebx;                                                \
+       movl 36(%ebx), %edx;                                            \
+       movl %esp, %ebx;                                                \
+-      andl $0x1fff, %ebx;                                             \
++      andl $( " STR(THREAD_SIZE) "-1), %ebx;                          \
+       orl %ebx, %edx;                                                 \
+       movl %edx, %esp;"
+ 
+ #define IRQ_NAME2(nr) nr##_interrupt(void)
+ #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+ 
+-#define GET_CURRENT \
+-      "movl %esp, %ebx\n\t" \
+-      "andl $-8192, %ebx\n\t"
+-
+ /*
+  *    SMP has a few special interrupts for IPI messages
+  */
+Index: linux-p4smp/include/asm-i386/processor.h
+===================================================================
+--- linux-p4smp.orig/include/asm-i386/processor.h      2004-06-14 13:13:07.000000000 -0700
++++ linux-p4smp/include/asm-i386/processor.h   2004-06-14 13:14:19.000000000 -0700
+@@ -14,6 +14,7 @@
+ #include <asm/types.h>
+ #include <asm/sigcontext.h>
+ #include <asm/cpufeature.h>
++#include <asm/current.h>
+ #include <linux/cache.h>
+ #include <linux/config.h>
+ #include <linux/threads.h>
+@@ -498,10 +499,6 @@ unsigned long get_wchan(struct task_stru
+ #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+ #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+ 
+-#define THREAD_SIZE (2*PAGE_SIZE)
+-#define __alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+-#define __free_task_struct(p) do { BUG_ON((p)->state < TASK_ZOMBIE); free_pages((unsigned long) (p), 1); } while (0)
+-
+ #define init_task     (init_task_union.task)
+ #define init_stack    (init_task_union.stack)
+ 
diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-1-chaos.patch b/lustre/kernel_patches/patches/ext-2.4-patch-1-chaos.patch

index 3de6a8f..f6b2f43 100644 (file)
--- a/lustre/kernel_patches/patches/ext-2.4-patch-1-chaos.patch
+++ b/lustre/kernel_patches/patches/ext-2.4-patch-1-chaos.patch
@@ -1395,7 +1395,7 @@
  +              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
  +              rec_len = EXT3_DIR_REC_LEN(de->name_len);
  +              memcpy (to, de, rec_len);
-+              ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++              ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
  +              de->inode = 0;
  +              map++;
  +              to += rec_len;
@@ -1416,9 +1416,9 @@
  +                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
  +                      if (de > to)
  +                              memmove(to, de, rec_len);
-+                      to->rec_len = rec_len;
++                      to->rec_len = cpu_to_le16(rec_len);
  +                      prev = to;
-+                      to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++                      to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
  +              }
  +              de = next;
  +      }
@@ -1642,8 +1642,8 @@
  +      data1 = bh2->b_data;
  +
  +      /* The 0th block becomes the root, move the dirents out */
-+      de = (struct ext3_dir_entry_2 *) &root->dotdot;
-+      de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len);
++      de = (struct ext3_dir_entry_2 *)&root->dotdot;
++      de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
  +      len = ((char *) root) + blocksize - (char *) de;
  +      memcpy (data1, de, len);
  +      de = (struct ext3_dir_entry_2 *) data1;
diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-1-suse.patch b/lustre/kernel_patches/patches/ext-2.4-patch-1-suse.patch

index 748671f..28a1ad6 100644 (file)
--- a/lustre/kernel_patches/patches/ext-2.4-patch-1-suse.patch
+++ b/lustre/kernel_patches/patches/ext-2.4-patch-1-suse.patch
@@ -1395,7 +1395,7 @@
  +              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
  +              rec_len = EXT3_DIR_REC_LEN(de->name_len);
  +              memcpy (to, de, rec_len);
-+              ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++              ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
  +              de->inode = 0;
  +              map++;
  +              to += rec_len;
@@ -1416,9 +1416,9 @@
  +                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
  +                      if (de > to)
  +                              memmove(to, de, rec_len);
-+                      to->rec_len = rec_len;
++                      to->rec_len = cpu_to_le16(rec_len);
  +                      prev = to;
-+                      to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++                      to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
  +              }
  +              de = next;
  +      }
@@ -1642,8 +1642,8 @@
  +      data1 = bh2->b_data;
  +
  +      /* The 0th block becomes the root, move the dirents out */
-+      de = (struct ext3_dir_entry_2 *) &root->dotdot;
-+      de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len);
++      de = (struct ext3_dir_entry_2 *)&root->dotdot;
++      de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
  +      len = ((char *) root) + blocksize - (char *) de;
  +      memcpy (data1, de, len);
  +      de = (struct ext3_dir_entry_2 *) data1;
diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-1.patch b/lustre/kernel_patches/patches/ext-2.4-patch-1.patch

index 748671f..28a1ad6 100644 (file)
--- a/lustre/kernel_patches/patches/ext-2.4-patch-1.patch
+++ b/lustre/kernel_patches/patches/ext-2.4-patch-1.patch
@@ -1395,7 +1395,7 @@
  +              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
  +              rec_len = EXT3_DIR_REC_LEN(de->name_len);
  +              memcpy (to, de, rec_len);
-+              ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++              ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
  +              de->inode = 0;
  +              map++;
  +              to += rec_len;
@@ -1416,9 +1416,9 @@
  +                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
  +                      if (de > to)
  +                              memmove(to, de, rec_len);
-+                      to->rec_len = rec_len;
++                      to->rec_len = cpu_to_le16(rec_len);
  +                      prev = to;
-+                      to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++                      to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
  +              }
  +              de = next;
  +      }
@@ -1642,8 +1642,8 @@
  +      data1 = bh2->b_data;
  +
  +      /* The 0th block becomes the root, move the dirents out */
-+      de = (struct ext3_dir_entry_2 *) &root->dotdot;
-+      de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len);
++      de = (struct ext3_dir_entry_2 *)&root->dotdot;
++      de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
  +      len = ((char *) root) + blocksize - (char *) de;
  +      memcpy (data1, de, len);
  +      de = (struct ext3_dir_entry_2 *) data1;
diff --git a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-chaos.patch

index 031e46d..63684c5 100644 (file)
--- a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-chaos.patch
+++ b/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-chaos.patch
@@ -21,8 +21,8 @@ Index: linux-2.4.21-chaos/fs/ext3/ialloc.c
  +              EXT3_I(inode)->i_extra_isize = 0;
  +
         ei->i_state = EXT3_STATE_NEW;
-       err = ext3_get_inode_loc_new(inode, &iloc, 1);
-       if (err) goto fail;
+       err = ext3_get_inode_loc_new(inode, &iloc, 1);
+       if (err) goto fail;
  Index: linux-2.4.21-chaos/fs/ext3/inode.c
  ===================================================================
  --- linux-2.4.21-chaos.orig/fs/ext3/inode.c    2003-12-12 17:39:11.000000000 +0300
diff --git a/lustre/kernel_patches/patches/ext3-htree-2.4.19-pre1.patch b/lustre/kernel_patches/patches/ext3-htree-2.4.19-pre1.patch

index c168149..0806c38 100644 (file)
--- a/lustre/kernel_patches/patches/ext3-htree-2.4.19-pre1.patch
+++ b/lustre/kernel_patches/patches/ext3-htree-2.4.19-pre1.patch
@@ -1667,7 +1667,7 @@ Index: linux-2.4.19-pre1/fs/ext3/namei.c
  +      data1 = bh2->b_data;
  +
  +      /* The 0th block becomes the root, move the dirents out */
-+      de = (struct ext3_dir_entry_2 *) &root->dotdot;
++      de = (struct ext3_dir_entry_2 *)&root->dotdot;
  +      de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
  +      len = ((char *) root) + blocksize - (char *) de;
  +      memcpy (data1, de, len);
diff --git a/lustre/kernel_patches/patches/ext3-htree-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-htree-2.4.21-chaos.patch

index b045d53..4b445f5 100644 (file)
--- a/lustre/kernel_patches/patches/ext3-htree-2.4.21-chaos.patch
+++ b/lustre/kernel_patches/patches/ext3-htree-2.4.21-chaos.patch
@@ -1667,7 +1667,7 @@ Index: linux-2.4.21-chaos/fs/ext3/namei.c
  +      data1 = bh2->b_data;
  +
  +      /* The 0th block becomes the root, move the dirents out */
-+      de = (struct ext3_dir_entry_2 *) &root->dotdot;
++      de = (struct ext3_dir_entry_2 *)&root->dotdot;
  +      de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
  +      len = ((char *) root) + blocksize - (char *) de;
  +      memcpy (data1, de, len);
diff --git a/lustre/kernel_patches/patches/ext3-htree-2.4.22-rh.patch b/lustre/kernel_patches/patches/ext3-htree-2.4.22-rh.patch

index 853fb0c..ca2cacf 100644 (file)
--- a/lustre/kernel_patches/patches/ext3-htree-2.4.22-rh.patch
+++ b/lustre/kernel_patches/patches/ext3-htree-2.4.22-rh.patch
@@ -1657,7 +1657,7 @@
  +      data1 = bh2->b_data;
  +
  +      /* The 0th block becomes the root, move the dirents out */
-+      de = (struct ext3_dir_entry_2 *) &root->dotdot;
++      de = (struct ext3_dir_entry_2 *)&root->dotdot;
  +      de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
  +      len = ((char *) root) + blocksize - (char *) de;
  +      memcpy (data1, de, len);
diff --git a/lustre/kernel_patches/patches/ext3-htree-suse.patch b/lustre/kernel_patches/patches/ext3-htree-suse.patch

index 1278f8f..3e5148e 100644 (file)
--- a/lustre/kernel_patches/patches/ext3-htree-suse.patch
+++ b/lustre/kernel_patches/patches/ext3-htree-suse.patch
@@ -1667,7 +1667,7 @@ Index: linux-2.4.21-suse/fs/ext3/namei.c
  +      data1 = bh2->b_data;
  +
  +      /* The 0th block becomes the root, move the dirents out */
-+      de = (struct ext3_dir_entry_2 *) &root->dotdot;
++      de = (struct ext3_dir_entry_2 *)&root->dotdot;
  +      de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
  +      len = ((char *) root) + blocksize - (char *) de;
  +      memcpy (data1, de, len);
diff --git a/lustre/kernel_patches/patches/ext3-htree.patch b/lustre/kernel_patches/patches/ext3-htree.patch

index 86b0061..31f2ae3 100644 (file)
--- a/lustre/kernel_patches/patches/ext3-htree.patch
+++ b/lustre/kernel_patches/patches/ext3-htree.patch
@@ -1657,7 +1657,7 @@
  +      data1 = bh2->b_data;
  +
  +      /* The 0th block becomes the root, move the dirents out */
-+      de = (struct ext3_dir_entry_2 *) &root->dotdot;
++      de = (struct ext3_dir_entry_2 *)&root->dotdot;
  +      de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
  +      len = ((char *) root) + blocksize - (char *) de;
  +      memcpy (data1, de, len);
diff --git a/lustre/kernel_patches/patches/ext3-pdirops-2.4.20-rh.patch b/lustre/kernel_patches/patches/ext3-pdirops-2.4.20-rh.patch

new file mode 100644 (file)

index 0000000..2733e7d
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-pdirops-2.4.20-rh.patch
@@ -0,0 +1,1248 @@
+ fs/ext3/ialloc.c          |    3 
+ fs/ext3/inode.c           |    3 
+ fs/ext3/namei.c           |  582 +++++++++++++++++++++++++++++++++++++---------
+ fs/ext3/super.c           |   14 +
+ include/linux/ext3_fs.h   |    1 
+ include/linux/ext3_fs_i.h |    6 
+ 6 files changed, 500 insertions(+), 109 deletions(-)
+
+Index: linux-2.4.20/fs/ext3/namei.c
+===================================================================
+--- linux-2.4.20.orig/fs/ext3/namei.c  2004-05-27 15:10:40.000000000 -0400
++++ linux-2.4.20/fs/ext3/namei.c       2004-05-27 15:29:52.000000000 -0400
+@@ -51,6 +51,9 @@
+ {
+       struct buffer_head *bh;
+ 
++      /* with parallel dir operations all appends
++       * have to be serialized -bzzz */
++      down(&EXT3_I(inode)->i_append_sem);
+       *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
+ 
+       if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
+@@ -58,6 +61,8 @@
+               EXT3_I(inode)->i_disksize = inode->i_size;
+               ext3_journal_get_write_access(handle,bh);
+       }
++      up(&EXT3_I(inode)->i_append_sem);
++      
+       return bh;
+ }
+ 
+@@ -134,6 +139,8 @@
+       struct buffer_head *bh;
+       struct dx_entry *entries;
+       struct dx_entry *at;
++      unsigned long leaf;
++      unsigned int curidx;
+ };
+ 
+ struct dx_map_entry
+@@ -142,6 +149,30 @@
+       u32 offs;
+ };
+ 
++/* FIXME: this should be reworked using bb_spin_lock
++ * introduced in -mm tree
++ */
++#define BH_DXLock     25
++
++static inline void dx_lock_bh(struct buffer_head volatile *bh)
++{
++#ifdef CONFIG_SMP
++        while (test_and_set_bit(BH_DXLock, &bh->b_state)) {
++                while (test_bit(BH_DXLock, &bh->b_state))
++                        cpu_relax();
++        }
++#endif
++}
++
++static inline void dx_unlock_bh(struct buffer_head *bh)
++{
++#ifdef CONFIG_SMP
++        smp_mb__before_clear_bit();
++        clear_bit(BH_DXLock, &bh->b_state);
++#endif
++}
++
++
+ #ifdef CONFIG_EXT3_INDEX
+ static inline unsigned dx_get_block (struct dx_entry *entry);
+ static void dx_set_block (struct dx_entry *entry, unsigned value);
+@@ -153,7 +184,7 @@
+ static void dx_set_limit (struct dx_entry *entries, unsigned value);
+ static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
+ static unsigned dx_node_limit (struct inode *dir);
+-static struct dx_frame *dx_probe(struct dentry *dentry,
++static struct dx_frame *dx_probe(struct qstr *name,
+                                struct inode *dir,
+                                struct dx_hash_info *hinfo,
+                                struct dx_frame *frame,
+@@ -165,15 +196,18 @@
+ static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
+               struct dx_map_entry *offsets, int count);
+ static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+-static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
++static void dx_insert_block (struct inode *, struct dx_frame *, u32, u32, u32);
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+                                struct dx_frame *frame,
+                                struct dx_frame *frames, int *err,
+                                __u32 *start_hash);
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+-                     struct ext3_dir_entry_2 **res_dir, int *err);
++                     struct ext3_dir_entry_2 **res_dir, int *err,
++                     int rwlock, void **lock);
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+                            struct inode *inode);
++static inline void *ext3_lock_htree(struct inode *, unsigned long, int);
++static inline void ext3_unlock_htree(struct inode *, void *);
+ 
+ /*
+  * Future: use high four bits of block for coalesce-on-delete flags
+@@ -306,6 +340,94 @@
+ #endif /* DX_DEBUG */
+ 
+ /*
++ * dx_find_position
++ *
++ * search position of specified hash in index
++ *
++ */
++
++struct dx_entry * dx_find_position(struct dx_entry * entries, u32 hash)
++{
++      struct dx_entry *p, *q, *m;
++      int count;
++
++      count = dx_get_count(entries);
++      p = entries + 1;
++      q = entries + count - 1;
++      while (p <= q)
++      {
++              m = p + (q - p)/2;
++              if (dx_get_hash(m) > hash)
++                      q = m - 1;
++              else
++                      p = m + 1;
++      }
++      return p - 1;
++}
++
++/*
++ * returns 1 if path is unchanged
++ */
++int dx_check_path(struct dx_frame *frame, u32 hash)
++{
++      struct dx_entry *p;
++      int ret = 1;
++
++      dx_lock_bh(frame->bh);
++      p = dx_find_position(frame->entries, hash);
++      if (frame->leaf != dx_get_block(p))
++              ret = 0;
++      dx_unlock_bh(frame->bh);
++      
++      return ret;
++}
++
++/*
++ * 0 - changed
++ * 1 - hasn't changed
++ */
++static int
++dx_check_full_path(struct dx_frame *frames, struct dx_hash_info *hinfo)
++{
++      struct dx_entry *p;
++      struct dx_frame *frame = frames;
++      u32 leaf;
++
++      /* check first level */
++      dx_lock_bh(frame->bh);
++      p = dx_find_position(frame->entries, hinfo->hash);
++      leaf = dx_get_block(p);
++      dx_unlock_bh(frame->bh);
++      
++      if (leaf != frame->leaf) 
++              return 0;
++      
++      /* is there 2nd level? */
++      frame++;
++      if (frame->bh == NULL)
++              return 1;
++
++      /* check second level */
++      dx_lock_bh(frame->bh);
++
++      /* probably 1st level got changed, check it */
++      if (!dx_check_path(frames, hinfo->hash)) {
++              /* path changed */
++              dx_unlock_bh(frame->bh);
++              return 0;
++      }
++
++      p = dx_find_position(frame->entries, hinfo->hash);
++      leaf = dx_get_block(p);
++      dx_unlock_bh(frame->bh);
++      
++      if (leaf != frame->leaf)
++              return 0;
++
++      return 1;
++}
++
++/*
+  * Probe for a directory leaf block to search.
+  *
+  * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
+@@ -315,19 +437,20 @@
+  * back to userspace.
+  */
+ static struct dx_frame *
+-dx_probe(struct dentry *dentry, struct inode *dir,
++dx_probe(struct qstr *name, struct inode *dir,
+        struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
+ {
+-      unsigned count, indirect;
+-      struct dx_entry *at, *entries, *p, *q, *m;
++      unsigned indirect;
++      struct dx_entry *at, *entries;
+       struct dx_root *root;
+       struct buffer_head *bh;
+       struct dx_frame *frame = frame_in;
+       u32 hash;
++      unsigned int curidx;
+ 
+       frame->bh = NULL;
+-      if (dentry)
+-              dir = dentry->d_parent->d_inode;
++      frame[1].bh = NULL;
++
+       if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
+               goto fail;
+       root = (struct dx_root *) bh->b_data;
+@@ -343,8 +466,8 @@
+       }
+       hinfo->hash_version = root->info.hash_version;
+       hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed;
+-      if (dentry)
+-              ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
++      if (name)
++              ext3fs_dirhash(name->name, name->len, hinfo);
+       hash = hinfo->hash;
+ 
+       if (root->info.unused_flags & 1) {
+@@ -356,7 +479,19 @@
+               goto fail;
+       }
+ 
++repeat:
++      curidx = 0;
++      entries = (struct dx_entry *) (((char *)&root->info) +
++                                     root->info.info_length);
++      assert(dx_get_limit(entries) == dx_root_limit(dir,
++                                                    root->info.info_length));
++      dxtrace (printk("Look up %x", hash));
++      dx_lock_bh(bh);
++      /* indirect must be initialized under bh lock because
++       * 2nd level creation procedure may change it and dx_probe()
++       * will suggest htree is still single-level -bzzz */
+       if ((indirect = root->info.indirect_levels) > 1) {
++              dx_unlock_bh(bh);
+               ext3_warning(dir->i_sb, __FUNCTION__,
+                            "Unimplemented inode hash depth: %#06x",
+                            root->info.indirect_levels);
+@@ -364,56 +499,46 @@
+               *err = ERR_BAD_DX_DIR;
+               goto fail;
+       }
+-
+-      entries = (struct dx_entry *) (((char *)&root->info) +
+-                                     root->info.info_length);
+-      assert(dx_get_limit(entries) == dx_root_limit(dir,
+-                                                    root->info.info_length));
+-      dxtrace (printk("Look up %x", hash));
++      
+       while (1)
+       {
+-              count = dx_get_count(entries);
+-              assert (count && count <= dx_get_limit(entries));
+-              p = entries + 1;
+-              q = entries + count - 1;
+-              while (p <= q)
+-              {
+-                      m = p + (q - p)/2;
+-                      dxtrace(printk("."));
+-                      if (dx_get_hash(m) > hash)
+-                              q = m - 1;
+-                      else
+-                              p = m + 1;
+-              }
+-
+-              if (0) // linear search cross check
+-              {
+-                      unsigned n = count - 1;
+-                      at = entries;
+-                      while (n--)
+-                      {
+-                              dxtrace(printk(","));
+-                              if (dx_get_hash(++at) > hash)
+-                              {
+-                                      at--;
+-                                      break;
+-                              }
+-                      }
+-                      assert (at == p - 1);
+-              }
+-
+-              at = p - 1;
+-              dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
++              at = dx_find_position(entries, hinfo->hash);
++              dxtrace(printk(" %x->%u\n",
++                              at == entries? 0: dx_get_hash(at),
++                              dx_get_block(at)));
+               frame->bh = bh;
+               frame->entries = entries;
+               frame->at = at;
+-              if (!indirect--) return frame;
+-              if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
++              frame->curidx = curidx;
++              frame->leaf = dx_get_block(at);
++              if (!indirect--) {
++                      dx_unlock_bh(bh);
++                      return frame;
++              }
++              
++              /* step into next htree level */
++              curidx = dx_get_block(at);
++              dx_unlock_bh(bh);
++              if (!(bh = ext3_bread (NULL,dir, frame->leaf, 0, err)))
+                       goto fail2;
++              
++              dx_lock_bh(bh);
++              /* splitting may change root index block and move
++               * hash we're looking for into another index block
++               * so, we have to check this situation and repeat
++               * from begining if path got changed -bzzz */
++              if (!dx_check_path(frame, hash)) {
++                      dx_unlock_bh(bh);
++                      bh = frame->bh;
++                      indirect++;
++                      goto repeat;
++              }
++              
+               at = entries = ((struct dx_node *) bh->b_data)->entries;
+               assert (dx_get_limit(entries) == dx_node_limit (dir));
+               frame++;
+       }
++      dx_unlock_bh(bh);
+ fail2:
+       while (frame >= frame_in) {
+               brelse(frame->bh);
+@@ -427,8 +552,7 @@
+ {
+       if (frames[0].bh == NULL)
+               return;
+-
+-      if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
++      if (frames[1].bh != NULL)
+               brelse(frames[1].bh);
+       brelse(frames[0].bh);
+ }
+@@ -470,8 +594,10 @@
+        * nodes need to be read.
+        */
+       while (1) {
+-              if (++(p->at) < p->entries + dx_get_count(p->entries))
++              if (++(p->at) < p->entries + dx_get_count(p->entries)) {
++                      p->leaf = dx_get_block(p->at);
+                       break;
++              }
+               if (p == frames)
+                       return 0;
+               num_frames++;
+@@ -497,13 +623,17 @@
+        * block so no check is necessary
+        */
+       while (num_frames--) {
+-              if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
+-                                    0, err)))
++              u32 idx;
++              
++              idx = p->leaf = dx_get_block(p->at);
++              if (!(bh = ext3_bread(NULL, dir, idx, 0, err)))
+                       return -1; /* Failure */
+               p++;
+               brelse (p->bh);
+               p->bh = bh;
+               p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
++              p->curidx = idx;
++              p->leaf = dx_get_block(p->at);
+       }
+       return 1;
+ }
+@@ -543,7 +673,7 @@
+       dir = dir_file->f_dentry->d_inode;
+       hinfo.hash = start_hash;
+       hinfo.minor_hash = 0;
+-      frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
++      frame = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
+       if (!frame)
+               return err;
+ 
+@@ -625,7 +755,8 @@
+                       count++;
+               }
+               /* XXX: do we need to check rec_len == 0 case? -Chris */
+-              de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
++              de = (struct ext3_dir_entry_2 *)((char*)de +
++                              le16_to_cpu(de->rec_len));
+       }
+       return count;
+ }
+@@ -658,7 +789,8 @@
+       } while(more);
+ }
+ 
+-static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
++static void dx_insert_block(struct inode *dir, struct dx_frame *frame,
++                      u32 hash, u32 block, u32 idx)
+ {
+       struct dx_entry *entries = frame->entries;
+       struct dx_entry *old = frame->at, *new = old + 1;
+@@ -670,6 +802,7 @@
+       dx_set_hash(new, hash);
+       dx_set_block(new, block);
+       dx_set_count(entries, count + 1);
++      
+ }
+ #endif
+ 
+@@ -752,7 +885,8 @@
+ 
+       
+ static struct buffer_head * ext3_find_entry (struct dentry *dentry,
+-                                      struct ext3_dir_entry_2 ** res_dir)
++                                      struct ext3_dir_entry_2 ** res_dir,
++                                      int rwlock, void **lock)
+ {
+       struct super_block * sb;
+       struct buffer_head * bh_use[NAMEI_RA_SIZE];
+@@ -768,6 +902,7 @@
+       int namelen;
+       const u8 *name;
+       unsigned blocksize;
++      int do_not_use_dx = 0;
+ 
+       *res_dir = NULL;
+       sb = dir->i_sb;
+@@ -776,9 +911,10 @@
+       name = dentry->d_name.name;
+       if (namelen > EXT3_NAME_LEN)
+               return NULL;
++repeat:
+ #ifdef CONFIG_EXT3_INDEX
+       if (is_dx(dir)) {
+-              bh = ext3_dx_find_entry(dentry, res_dir, &err);
++              bh = ext3_dx_find_entry(dentry, res_dir, &err, rwlock, lock);
+               /*
+                * On success, or if the error was file not found,
+                * return.  Otherwise, fall back to doing a search the
+@@ -787,8 +923,14 @@
+               if (bh || (err != ERR_BAD_DX_DIR))
+                       return bh;
+               dxtrace(printk("ext3_find_entry: dx failed, falling back\n"));
++              do_not_use_dx = 1;
+       }
+ #endif
++      *lock = ext3_lock_htree(dir, 0, rwlock);
++      if (is_dx(dir) && !do_not_use_dx) {
++              ext3_unlock_htree(dir, *lock);
++              goto repeat;
++      }
+       nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
+       start = EXT3_I(dir)->i_dir_start_lookup;
+       if (start >= nblocks)
+@@ -860,12 +1002,17 @@
+       /* Clean up the read-ahead blocks */
+       for (; ra_ptr < ra_max; ra_ptr++)
+               brelse (bh_use[ra_ptr]);
++      if (!ret) {
++              ext3_unlock_htree(dir, *lock);
++              *lock = NULL;
++      }
+       return ret;
+ }
+ 
+ #ifdef CONFIG_EXT3_INDEX
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+-                     struct ext3_dir_entry_2 **res_dir, int *err)
++                     struct ext3_dir_entry_2 **res_dir, int *err,
++                     int rwlock, void **lock)
+ {
+       struct super_block * sb;
+       struct dx_hash_info     hinfo;
+@@ -880,11 +1027,22 @@
+       struct inode *dir = dentry->d_parent->d_inode;
+       
+       sb = dir->i_sb;
+-      if (!(frame = dx_probe (dentry, 0, &hinfo, frames, err)))
++repeat:
++      if (!(frame = dx_probe (&dentry->d_name, dir, &hinfo, frames, err)))
+               return NULL;
++      
++      *lock = ext3_lock_htree(dir, frame->leaf, rwlock);
++      /* while locking leaf we just found may get splitted
++       * so, we need another leaf. check this */
++      if (!dx_check_full_path(frames, &hinfo)) {
++              ext3_unlock_htree(dir, *lock);
++              dx_release(frames);
++              goto repeat;
++      }
++
+       hash = hinfo.hash;
+       do {
+-              block = dx_get_block(frame->at);
++              block = frame->leaf;
+               if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
+                       goto errout;
+               de = (struct ext3_dir_entry_2 *) bh->b_data;
+@@ -918,6 +1076,8 @@
+       *err = -ENOENT;
+ errout:
+       dxtrace(printk("%s not found\n", name));
++      ext3_unlock_htree(dir, *lock);
++      *lock = NULL;
+       dx_release (frames);
+       return NULL;
+ }
+@@ -928,6 +1088,7 @@
+       struct inode * inode;
+       struct ext3_dir_entry_2 * de;
+       struct buffer_head * bh;
++    void *lock = NULL;
+ 
+       if (dentry->d_name.len > EXT3_NAME_LEN)
+               return ERR_PTR(-ENAMETOOLONG);
+@@ -935,10 +1096,11 @@
+       if (ext3_check_for_iopen(dir, dentry))
+               return NULL;
+ 
+-      bh = ext3_find_entry(dentry, &de);
++      bh = ext3_find_entry(dentry, &de, 0, &lock);
+       inode = NULL;
+       if (bh) {
+               unsigned long ino = le32_to_cpu(de->inode);
++              ext3_unlock_htree(dir, lock);
+               brelse (bh);
+               inode = iget(dir->i_sb, ino);
+ 
+@@ -975,7 +1137,8 @@
+       unsigned rec_len = 0;
+ 
+       while (count--) {
+-              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
++              struct ext3_dir_entry_2 *de =
++                      (struct ext3_dir_entry_2 *) (from + map->offs);
+               rec_len = EXT3_DIR_REC_LEN(de->name_len);
+               memcpy (to, de, rec_len);
+               ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
+@@ -988,7 +1151,8 @@
+ 
+ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
+ {
+-      struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
++      struct ext3_dir_entry_2 *next, *to, *prev;
++      struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) base;
+       unsigned rec_len = 0;
+ 
+       prev = to = de;
+@@ -1010,7 +1174,8 @@
+ 
+ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
+                       struct buffer_head **bh,struct dx_frame *frame,
+-                      struct dx_hash_info *hinfo, int *error)
++                      struct dx_hash_info *hinfo, void **target,
++                      int *error)
+ {
+       unsigned blocksize = dir->i_sb->s_blocksize;
+       unsigned count, continued;
+@@ -1057,23 +1222,30 @@
+       hash2 = map[split].hash;
+       continued = hash2 == map[split - 1].hash;
+       dxtrace(printk("Split block %i at %x, %i/%i\n",
+-              dx_get_block(frame->at), hash2, split, count-split));
+-
++              frame->leaf, hash2, split, count-split));
++      
+       /* Fancy dance to stay within two buffers */
+       de2 = dx_move_dirents(data1, data2, map + split, count - split);
+       de = dx_pack_dirents(data1,blocksize);
+       de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
+       de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
+-      dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
+-      dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
++      dxtrace(dx_show_leaf(hinfo,(struct ext3_dir_entry_2*) data1, blocksize, 1));
++      dxtrace(dx_show_leaf(hinfo,(struct ext3_dir_entry_2*) data2, blocksize, 1));
+ 
+       /* Which block gets the new entry? */
++      *target = NULL;
+       if (hinfo->hash >= hash2)
+       {
+               swap(*bh, bh2);
+               de = de2;
+-      }
+-      dx_insert_block (frame, hash2 + continued, newblock);
++
++              /* entry will be stored into new block
++               * we have to lock it before add_dirent_to_buf */
++              *target = ext3_lock_htree(dir, newblock, 1);
++      }
++      dx_lock_bh(frame->bh);
++      dx_insert_block (dir, frame, hash2 + continued, newblock, frame->curidx);
++      dx_unlock_bh(frame->bh);
+       err = ext3_journal_dirty_metadata (handle, bh2);
+       if (err)
+               goto journal_error;
+@@ -1147,7 +1319,8 @@
+       nlen = EXT3_DIR_REC_LEN(de->name_len);
+       rlen = le16_to_cpu(de->rec_len);
+       if (de->inode) {
+-              struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
++              struct ext3_dir_entry_2 *de1 =
++                      (struct ext3_dir_entry_2 *)((char *)de + nlen);
+               de1->rec_len = cpu_to_le16(rlen - nlen);
+               de->rec_len = cpu_to_le16(nlen);
+               de = de1;
+@@ -1205,7 +1378,8 @@
+       unsigned        blocksize;
+       struct dx_hash_info hinfo;
+       u32             block;
+-              
++      void            *lock, *new_lock;
++
+       blocksize =  dir->i_sb->s_blocksize;
+       dxtrace(printk("Creating index\n"));
+       retval = ext3_journal_get_write_access(handle, bh);
+@@ -1216,7 +1390,6 @@
+       }
+       root = (struct dx_root *) bh->b_data;
+               
+-      EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
+       bh2 = ext3_append (handle, dir, &block, &retval);
+       if (!(bh2)) {
+               brelse(bh);
+@@ -1224,6 +1397,8 @@
+       }
+       data1 = bh2->b_data;
+ 
++      lock = ext3_lock_htree(dir, block, 1);
++
+       /* The 0th block becomes the root, move the dirents out */
+       de = (struct ext3_dir_entry_2 *) &root->dotdot;
+       de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len);
+@@ -1253,13 +1428,25 @@
+       frame->entries = entries;
+       frame->at = entries;
+       frame->bh = bh;
++      frame->curidx = 0;
++      frame->leaf = 0;
++      frame[1].bh = NULL;
+       bh = bh2;
+-      de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
++      de = do_split(handle,dir, &bh, frame, &hinfo, &new_lock, &retval);
+       dx_release (frames);
+       if (!(de))
+-              return retval;
++              goto cleanup;
++
++      retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
++cleanup:
++      if (new_lock)
++              ext3_unlock_htree(dir, new_lock);
++      /* we mark directory indexed in order to
++       * avoid races while htree being created -bzzz */
++      EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
++      ext3_unlock_htree(dir, lock);
+ 
+-      return add_dirent_to_buf(handle, dentry, inode, de, bh);
++      return retval;
+ }
+ #endif
+ 
+@@ -1288,11 +1475,13 @@
+       unsigned blocksize;
+       unsigned nlen, rlen;
+       u32 block, blocks;
++      void *lock;
+ 
+       sb = dir->i_sb;
+       blocksize = sb->s_blocksize;
+       if (!dentry->d_name.len)
+               return -EINVAL;
++repeat:
+ #ifdef CONFIG_EXT3_INDEX
+       if (is_dx(dir)) {
+               retval = ext3_dx_add_entry(handle, dentry, inode);
+@@ -1303,36 +1492,53 @@
+               ext3_mark_inode_dirty(handle, dir);
+       }
+ #endif
++      lock = ext3_lock_htree(dir, 0, 1);
++      if (is_dx(dir)) {
++              /* we got lock for block 0
++               * probably previous holder of the lock
++               * created htree -bzzz */
++              ext3_unlock_htree(dir, lock);
++              goto repeat;
++      }
++      
+       blocks = dir->i_size >> sb->s_blocksize_bits;
+       for (block = 0, offset = 0; block < blocks; block++) {
+               bh = ext3_bread(handle, dir, block, 0, &retval);
+-              if(!bh)
++              if(!bh) {
++                      ext3_unlock_htree(dir, lock);
+                       return retval;
++              }
+               retval = add_dirent_to_buf(handle, dentry, inode, 0, bh);
+-              if (retval != -ENOSPC)
++              if (retval != -ENOSPC) {
++                      ext3_unlock_htree(dir, lock);
+                       return retval;
++              }
+ 
+ #ifdef CONFIG_EXT3_INDEX
+               if (blocks == 1 && !dx_fallback &&
+-                  EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
+-                      return make_indexed_dir(handle, dentry, inode, bh);
++                  EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) {
++                      retval = make_indexed_dir(handle, dentry, inode, bh);
++                      ext3_unlock_htree(dir, lock);
++                      return retval;
++              }
+ #endif
+               brelse(bh);
+       }
+       bh = ext3_append(handle, dir, &block, &retval);
+-      if (!bh)
++      if (!bh) {
++              ext3_unlock_htree(dir, lock);
+               return retval;
++      }
+       de = (struct ext3_dir_entry_2 *) bh->b_data;
+       de->inode = 0;
+       de->rec_len = cpu_to_le16(rlen = blocksize);
+       nlen = 0;
+-      return add_dirent_to_buf(handle, dentry, inode, de, bh);
++      retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
++      ext3_unlock_htree(dir, lock);
++      return retval;
+ }
+ 
+ #ifdef CONFIG_EXT3_INDEX
+-/*
+- * Returns 0 for success, or a negative error value
+- */
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+                            struct inode *inode)
+ {
+@@ -1344,15 +1550,28 @@
+       struct super_block * sb = dir->i_sb;
+       struct ext3_dir_entry_2 *de;
+       int err;
+-
+-      frame = dx_probe(dentry, 0, &hinfo, frames, &err);
++      int curidx;
++      void *idx_lock, *leaf_lock, *newleaf_lock;
++      
++repeat:
++      frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
+       if (!frame)
+               return err;
+-      entries = frame->entries;
+-      at = frame->at;
+ 
+-      if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
++      /* we're going to chage leaf, so lock it first */
++      leaf_lock = ext3_lock_htree(dir, frame->leaf, 1);
++
++      /* while locking leaf we just found may get splitted
++       * so we need to check this */
++      if (!dx_check_full_path(frames, &hinfo)) {
++              ext3_unlock_htree(dir, leaf_lock);
++              dx_release(frames);
++              goto repeat;
++      }
++      if (!(bh = ext3_bread(handle,dir, frame->leaf, 0, &err))) {
++              printk("can't ext3_bread(%d) = %d\n", (int) frame->leaf, err);
+               goto cleanup;
++      }
+ 
+       BUFFER_TRACE(bh, "get_write_access");
+       err = ext3_journal_get_write_access(handle, bh);
+@@ -1365,6 +1584,35 @@
+               goto cleanup;
+       }
+ 
++      /* our leaf has no enough space. hence, we have to
++       * split it. so lock index for this leaf first */
++      curidx = frame->curidx;
++      idx_lock = ext3_lock_htree(dir, curidx, 1);
++
++      /* now check did path get changed? */
++      dx_release(frames);
++
++      frame = dx_probe(&dentry->d_name, dentry->d_parent->d_inode,
++                      &hinfo, frames, &err);
++      if (!frame) {
++              /* FIXME: error handling here */
++              brelse(bh);
++              ext3_unlock_htree(dir, idx_lock);
++              return err;
++      }
++      
++      if (frame->curidx != curidx) {
++              /* path has been changed. we have to drop old lock
++               * and repeat */
++              brelse(bh);
++              ext3_unlock_htree(dir, idx_lock);
++              ext3_unlock_htree(dir, leaf_lock);
++              dx_release(frames);
++              goto repeat;
++      }
++      entries = frame->entries;
++      at = frame->at;
++
+       /* Block full, should compress but for now just split */
+       dxtrace(printk("using %u of %u node entries\n",
+                      dx_get_count(entries), dx_get_limit(entries)));
+@@ -1376,7 +1624,8 @@
+               struct dx_entry *entries2;
+               struct dx_node *node2;
+               struct buffer_head *bh2;
+-
++              void *nb_lock;
++              
+               if (levels && (dx_get_count(frames->entries) ==
+                              dx_get_limit(frames->entries))) {
+                       ext3_warning(sb, __FUNCTION__,
+@@ -1387,6 +1636,7 @@
+               bh2 = ext3_append (handle, dir, &newblock, &err);
+               if (!(bh2))
+                       goto cleanup;
++              nb_lock = ext3_lock_htree(dir, newblock, 1);
+               node2 = (struct dx_node *)(bh2->b_data);
+               entries2 = node2->entries;
+               node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
+@@ -1398,27 +1648,73 @@
+               if (levels) {
+                       unsigned icount1 = icount/2, icount2 = icount - icount1;
+                       unsigned hash2 = dx_get_hash(entries + icount1);
++                      void *ri_lock;
++
++                      /* we have to protect root htree index against
++                       * another dx_add_entry() which would want to
++                       * split it too -bzzz */
++                      ri_lock = ext3_lock_htree(dir, 0, 1);
++
++                      /* as root index block blocked we must repeat
++                       * searching for current position of our 2nd index -bzzz */
++                      dx_lock_bh(frame->bh);
++                      frames->at = dx_find_position(frames->entries, hinfo.hash);
++                      dx_unlock_bh(frame->bh);
++                      
+                       dxtrace(printk("Split index %i/%i\n", icount1, icount2));
+-                              
+-                      BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
++      
++                      BUFFER_TRACE(frame->bh, "get_write_access");
+                       err = ext3_journal_get_write_access(handle,
+                                                            frames[0].bh);
+                       if (err)
+                               goto journal_error;
+-                              
++                      
++                      /* copy index into new one */
+                       memcpy ((char *) entries2, (char *) (entries + icount1),
+                               icount2 * sizeof(struct dx_entry));
+-                      dx_set_count (entries, icount1);
+                       dx_set_count (entries2, icount2);
+                       dx_set_limit (entries2, dx_node_limit(dir));
+ 
+                       /* Which index block gets the new entry? */
+                       if (at - entries >= icount1) {
++                              /* unlock index we won't use */
++                              ext3_unlock_htree(dir, idx_lock);
++                              idx_lock = nb_lock;
+                               frame->at = at = at - entries - icount1 + entries2;
+-                              frame->entries = entries = entries2;
++                              frame->entries = entries2;
++                              frame->curidx = curidx = newblock;
+                               swap(frame->bh, bh2);
++                      } else {
++                              /* we'll use old index,so new one may be freed */
++                              ext3_unlock_htree(dir, nb_lock);
+                       }
+-                      dx_insert_block (frames + 0, hash2, newblock);
++              
++                      /* NOTE: very subtle piece of code
++                       * competing dx_probe() may find 2nd level index in root
++                       * index, then we insert new index here and set new count
++                       * in that 2nd level index. so, dx_probe() may see 2nd
++                       * level index w/o hash it looks for. the solution is
++                       * to check root index after we locked just founded 2nd
++                       * level index -bzzz */
++                      dx_lock_bh(frames[0].bh);
++                      dx_insert_block (dir, frames + 0, hash2, newblock, 0);
++                      dx_unlock_bh(frames[0].bh);
++                      
++                      /* now old and new 2nd level index blocks contain
++                       * all pointers, so dx_probe() may find it in the both.
++                       * it's OK -bzzz */
++                      
++                      dx_lock_bh(frame->bh);
++                      dx_set_count(entries, icount1);
++                      dx_unlock_bh(frame->bh);
++
++                      /* now old 2nd level index block points to first half
++                       * of leafs. it's importand that dx_probe() must
++                       * check root index block for changes under
++                       * dx_lock_bh(frame->bh) -bzzz */
++
++                      ext3_unlock_htree(dir, ri_lock);
++              
+                       dxtrace(dx_show_index ("node", frames[1].entries));
+                       dxtrace(dx_show_index ("node",
+                              ((struct dx_node *) bh2->b_data)->entries));
+@@ -1427,38 +1723,61 @@
+                               goto journal_error;
+                       brelse (bh2);
+               } else {
++                      unsigned long leaf = frame->leaf;
++
+                       dxtrace(printk("Creating second level index...\n"));
+                       memcpy((char *) entries2, (char *) entries,
+                              icount * sizeof(struct dx_entry));
+                       dx_set_limit(entries2, dx_node_limit(dir));
+ 
+                       /* Set up root */
++                      dx_lock_bh(frames[0].bh);
+                       dx_set_count(entries, 1);
+                       dx_set_block(entries + 0, newblock);
+                       ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
++                      dx_unlock_bh(frames[0].bh);
+ 
+                       /* Add new access path frame */
+                       frame = frames + 1;
+                       frame->at = at = at - entries + entries2;
+                       frame->entries = entries = entries2;
+                       frame->bh = bh2;
++                      frame->curidx = newblock;
++                      frame->leaf = leaf;
+                       err = ext3_journal_get_write_access(handle,
+                                                            frame->bh);
+                       if (err)
+                               goto journal_error;
++
++                      /* first level index was root. it's already initialized */
++                      /* we my unlock it now */
++                      ext3_unlock_htree(dir, idx_lock);
++
++                      /* current index is just created 2nd level index */
++                      curidx = newblock;
++                      idx_lock = nb_lock;
+               }
+               ext3_journal_dirty_metadata(handle, frames[0].bh);
+       }
+-      de = do_split(handle, dir, &bh, frame, &hinfo, &err);
++      de = do_split(handle, dir, &bh, frame, &hinfo, &newleaf_lock, &err);
+       if (!de)
+               goto cleanup;
++
++      /* index splitted */
++      ext3_unlock_htree(dir, idx_lock);
++      
+       err = add_dirent_to_buf(handle, dentry, inode, de, bh);
++
++      if (newleaf_lock)
++              ext3_unlock_htree(dir, newleaf_lock);
++      
+       bh = 0;
+       goto cleanup;
+       
+ journal_error:
+       ext3_std_error(dir->i_sb, err);
+ cleanup:
++      ext3_unlock_htree(dir, leaf_lock);
+       if (bh)
+               brelse(bh);
+       dx_release(frames);
+@@ -1902,6 +2221,7 @@
+       struct buffer_head * bh;
+       struct ext3_dir_entry_2 * de;
+       handle_t *handle;
++      void *lock;
+ 
+       handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+       if (IS_ERR(handle)) {
+@@ -1909,7 +2229,7 @@
+       }
+ 
+       retval = -ENOENT;
+-      bh = ext3_find_entry (dentry, &de);
++      bh = ext3_find_entry (dentry, &de, 1, &lock);
+       if (!bh)
+               goto end_rmdir;
+ 
+@@ -1920,14 +2240,19 @@
+       DQUOT_INIT(inode);
+ 
+       retval = -EIO;
+-      if (le32_to_cpu(de->inode) != inode->i_ino)
++      if (le32_to_cpu(de->inode) != inode->i_ino) {
++              ext3_unlock_htree(dir, lock);
+               goto end_rmdir;
++      }
+ 
+       retval = -ENOTEMPTY;
+-      if (!empty_dir (inode))
++      if (!empty_dir (inode)) {
++              ext3_unlock_htree(dir, lock);
+               goto end_rmdir;
++      }
+ 
+       retval = ext3_delete_entry(handle, dir, de, bh);
++      ext3_unlock_htree(dir, lock);
+       if (retval)
+               goto end_rmdir;
+       if (inode->i_nlink != 2)
+@@ -1956,6 +2281,7 @@
+       struct buffer_head * bh;
+       struct ext3_dir_entry_2 * de;
+       handle_t *handle;
++      void *lock;
+ 
+       handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+       if (IS_ERR(handle)) {
+@@ -1966,7 +2292,7 @@
+               handle->h_sync = 1;
+ 
+       retval = -ENOENT;
+-      bh = ext3_find_entry (dentry, &de);
++      bh = ext3_find_entry (dentry, &de, 1, &lock);
+       if (!bh)
+               goto end_unlink;
+ 
+@@ -1974,8 +2300,10 @@
+       DQUOT_INIT(inode);
+ 
+       retval = -EIO;
+-      if (le32_to_cpu(de->inode) != inode->i_ino)
++      if (le32_to_cpu(de->inode) != inode->i_ino) {
++              ext3_unlock_htree(dir, lock);
+               goto end_unlink;
++      }
+       
+       if (!inode->i_nlink) {
+               ext3_warning (inode->i_sb, "ext3_unlink",
+@@ -1984,6 +2312,7 @@
+               inode->i_nlink = 1;
+       }
+       retval = ext3_delete_entry(handle, dir, de, bh);
++      ext3_unlock_htree(dir, lock);
+       if (retval)
+               goto end_unlink;
+       dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+@@ -2121,6 +2450,7 @@
+       struct buffer_head * old_bh, * new_bh, * dir_bh;
+       struct ext3_dir_entry_2 * old_de, * new_de;
+       int retval;
++      void *lock1 = NULL, *lock2 = NULL, *lock3 = NULL;
+ 
+       old_bh = new_bh = dir_bh = NULL;
+ 
+@@ -2133,7 +2463,10 @@
+       if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
+               handle->h_sync = 1;
+ 
+-      old_bh = ext3_find_entry (old_dentry, &old_de);
++      if (old_dentry->d_parent == new_dentry->d_parent)
++              down(&EXT3_I(old_dentry->d_parent->d_inode)->i_rename_sem);
++
++      old_bh = ext3_find_entry (old_dentry, &old_de, 1, &lock1 /* FIXME */);
+       /*
+        *  Check for inode number is _not_ due to possible IO errors.
+        *  We might rmdir the source, keep it as pwd of some process
+@@ -2146,7 +2479,7 @@
+               goto end_rename;
+ 
+       new_inode = new_dentry->d_inode;
+-      new_bh = ext3_find_entry (new_dentry, &new_de);
++      new_bh = ext3_find_entry (new_dentry, &new_de, 1, &lock2 /* FIXME */);
+       if (new_bh) {
+               if (!new_inode) {
+                       brelse (new_bh);
+@@ -2213,7 +2546,7 @@
+               struct buffer_head *old_bh2;
+               struct ext3_dir_entry_2 *old_de2;
+ 
+-              old_bh2 = ext3_find_entry(old_dentry, &old_de2);
++              old_bh2 = ext3_find_entry(old_dentry, &old_de2, 1, &lock3 /* FIXME */);
+               if (old_bh2) {
+                       retval = ext3_delete_entry(handle, old_dir,
+                                                  old_de2, old_bh2);
+@@ -2256,6 +2589,14 @@
+       retval = 0;
+ 
+ end_rename:
++      if (lock1)
++              ext3_unlock_htree(old_dentry->d_parent->d_inode, lock1);
++      if (lock2)
++              ext3_unlock_htree(new_dentry->d_parent->d_inode, lock2);
++      if (lock3)
++              ext3_unlock_htree(old_dentry->d_parent->d_inode, lock3);
++      if (old_dentry->d_parent == new_dentry->d_parent)
++              up(&EXT3_I(old_dentry->d_parent->d_inode)->i_rename_sem);
+       brelse (dir_bh);
+       brelse (old_bh);
+       brelse (new_bh);
+@@ -2264,6 +2605,29 @@
+ }
+ 
+ /*
++ * this locking primitives are used to protect parts
++ * of dir's htree. protection unit is block: leaf or index
++ */
++static inline void *ext3_lock_htree(struct inode *dir,
++                                      unsigned long value, int rwlock)
++{
++      void *lock;
++      
++      if (!test_opt(dir->i_sb, PDIROPS))
++              return NULL;
++      lock = dynlock_lock(&EXT3_I(dir)->i_htree_lock, value, 1, GFP_KERNEL);
++      return lock;
++}
++
++static inline void ext3_unlock_htree(struct inode *dir,
++                                      void *lock)
++{
++      if (!test_opt(dir->i_sb, PDIROPS) || !lock)
++              return;
++      dynlock_unlock(&EXT3_I(dir)->i_htree_lock, lock);
++}
++
++/*
+  * directories can handle most operations...
+  */
+ struct inode_operations ext3_dir_inode_operations = {
+Index: linux-2.4.20/fs/ext3/super.c
+===================================================================
+--- linux-2.4.20.orig/fs/ext3/super.c  2004-05-27 15:10:41.000000000 -0400
++++ linux-2.4.20/fs/ext3/super.c       2004-05-27 15:10:45.000000000 -0400
+@@ -796,6 +796,8 @@
+                               return 0;
+                       }
+               }
++              else if (!strcmp (this_char, "pdirops"))
++                      set_opt (sbi->s_mount_opt, PDIROPS);
+               else if (!strcmp (this_char, "grpid") ||
+                        !strcmp (this_char, "bsdgroups"))
+                       set_opt (*mount_options, GRPID);
+@@ -822,6 +824,9 @@
+                       if (want_numeric(value, "sb", sb_block))
+                               return 0;
+               }
++              else if (!strcmp (this_char, "pdirops")) {
++                      set_opt (sbi->s_mount_opt, PDIROPS);
++              }
+ #ifdef CONFIG_JBD_DEBUG
+               else if (!strcmp (this_char, "ro-after")) {
+                       unsigned long v;
+@@ -985,6 +990,10 @@
+               ext3_check_inodes_bitmap (sb);
+       }
+ #endif
++#ifdef S_PDIROPS
++      if (test_opt (sb, PDIROPS))
++              sb->s_flags |= S_PDIROPS;
++#endif
+       setup_ro_after(sb);
+       return res;
+ }
+@@ -1484,6 +1493,11 @@
+               test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
+               "writeback");
+ 
++      if (test_opt(sb, PDIROPS)) {
++              printk (KERN_INFO "EXT3-fs: mounted filesystem with parallel dirops\n");
++              sb->s_flags |= S_PDIROPS;
++      }
++              
+       return sb;
+ 
+ failed_mount3:
+Index: linux-2.4.20/fs/ext3/inode.c
+===================================================================
+--- linux-2.4.20.orig/fs/ext3/inode.c  2004-05-27 15:10:41.000000000 -0400
++++ linux-2.4.20/fs/ext3/inode.c       2004-05-27 15:10:45.000000000 -0400
+@@ -2435,6 +2435,9 @@
+       } else if (S_ISDIR(inode->i_mode)) {
+               inode->i_op = &ext3_dir_inode_operations;
+               inode->i_fop = &ext3_dir_operations;
++              dynlock_init(&EXT3_I(inode)->i_htree_lock);
++              sema_init(&EXT3_I(inode)->i_rename_sem, 1);
++              sema_init(&EXT3_I(inode)->i_append_sem, 1);
+       } else if (S_ISLNK(inode->i_mode)) {
+               if (ext3_inode_is_fast_symlink(inode))
+                       inode->i_op = &ext3_fast_symlink_inode_operations;
+Index: linux-2.4.20/fs/ext3/ialloc.c
+===================================================================
+--- linux-2.4.20.orig/fs/ext3/ialloc.c 2004-05-27 15:10:39.000000000 -0400
++++ linux-2.4.20/fs/ext3/ialloc.c      2004-05-27 15:10:45.000000000 -0400
+@@ -601,6 +601,9 @@
+               return ERR_PTR(-EDQUOT);
+       }
+       ext3_debug ("allocating inode %lu\n", inode->i_ino);
++      dynlock_init(&EXT3_I(inode)->i_htree_lock);
++      sema_init(&EXT3_I(inode)->i_rename_sem, 1);
++      sema_init(&EXT3_I(inode)->i_append_sem, 1);
+       return inode;
+ 
+ fail:
+Index: linux-2.4.20/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.4.20.orig/include/linux/ext3_fs.h  2004-05-27 15:10:40.000000000 -0400
++++ linux-2.4.20/include/linux/ext3_fs.h       2004-05-27 15:10:45.000000000 -0400
+@@ -306,6 +306,7 @@
+ /*
+  * Mount flags
+  */
++#define EXT3_MOUNT_PDIROPS            0x800000/* Parallel dir operations */
+ #define EXT3_MOUNT_CHECK              0x0001  /* Do mount-time checks */
+ #define EXT3_MOUNT_GRPID              0x0004  /* Create files with directory's group */
+ #define EXT3_MOUNT_DEBUG              0x0008  /* Some debugging messages */
+Index: linux-2.4.20/include/linux/ext3_fs_i.h
+===================================================================
+--- linux-2.4.20.orig/include/linux/ext3_fs_i.h        2001-11-22 14:46:19.000000000 -0500
++++ linux-2.4.20/include/linux/ext3_fs_i.h     2004-05-27 15:10:45.000000000 -0400
+@@ -17,6 +17,7 @@
+ #define _LINUX_EXT3_FS_I
+ 
+ #include <linux/rwsem.h>
++#include <linux/dynlocks.h>
+ 
+ /*
+  * second extended file system inode data in memory
+@@ -73,6 +74,11 @@
+        * by other means, so we have truncate_sem.
+        */
+       struct rw_semaphore truncate_sem;
++
++      /* following fields for parallel directory operations -bzzz */
++      struct dynlock i_htree_lock;
++      struct semaphore i_append_sem;
++      struct semaphore i_rename_sem;
+ };
+ 
+ #endif        /* _LINUX_EXT3_FS_I */
diff --git a/lustre/kernel_patches/patches/ext3-trusted_ea-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-trusted_ea-2.4.21-chaos.patch

new file mode 100644 (file)

index 0000000..92753de
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-trusted_ea-2.4.21-chaos.patch
@@ -0,0 +1,170 @@
+ fs/ext3/xattr.c            |   12 +++++-
+ fs/ext3/xattr_trusted.c    |   86 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_xattr.h |    6 +++
+ 3 files changed, 102 insertions(+), 2 deletions(-)
+
+Index: linux-p4smp/fs/ext3/Makefile
+===================================================================
+--- linux-p4smp.orig/fs/ext3/Makefile  2004-06-14 13:46:11.000000000 -0700
++++ linux-p4smp/fs/ext3/Makefile       2004-06-14 13:50:46.000000000 -0700
+@@ -12,7 +12,8 @@ O_TARGET := ext3.o
+ export-objs := ext3-exports.o
+ 
+ obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+-              ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o
++              ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o \
++              xattr_trusted.o
+ obj-m    := $(O_TARGET)
+ 
+ export-objs += xattr.o
+Index: linux-p4smp/fs/ext3/xattr.c
+===================================================================
+--- linux-p4smp.orig/fs/ext3/xattr.c   2004-06-14 13:46:44.000000000 -0700
++++ linux-p4smp/fs/ext3/xattr.c        2004-06-14 13:50:46.000000000 -0700
+@@ -1780,18 +1780,25 @@ static void ext3_xattr_rehash(struct ext
+ int __init
+ init_ext3_xattr(void)
+ {
++      int error;
++
+       ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
+               sizeof(struct mb_cache_entry) +
+               sizeof(struct mb_cache_entry_index), 1, 61);
+       if (!ext3_xattr_cache)
+               return -ENOMEM;
+ 
+-      return 0;
++      error = init_ext3_xattr_trusted();
++      if (error)
++              mb_cache_destroy(ext3_xattr_cache);
++
++      return error;
+ }
+ 
+ void
+ exit_ext3_xattr(void)
+ {
++      exit_ext3_xattr_trusted();
+       if (ext3_xattr_cache)
+               mb_cache_destroy(ext3_xattr_cache);
+       ext3_xattr_cache = NULL;
+@@ -1802,12 +1809,13 @@ exit_ext3_xattr(void)
+ int __init
+ init_ext3_xattr(void)
+ {
+-      return 0;
++      return init_ext3_xattr_trusted();
+ }
+ 
+ void
+ exit_ext3_xattr(void)
+ {
++      exit_ext3_xattr_trusted();
+ }
+ 
+ #endif  /* CONFIG_EXT3_FS_XATTR_SHARING */
+Index: linux-p4smp/fs/ext3/xattr_trusted.c
+===================================================================
+--- linux-p4smp.orig/fs/ext3/xattr_trusted.c   2004-06-14 13:41:58.000000000 -0700
++++ linux-p4smp/fs/ext3/xattr_trusted.c        2004-06-14 13:50:46.000000000 -0700
+@@ -0,0 +1,86 @@
++/*
++ * linux/fs/ext3/xattr_trusted.c
++ * Handler for trusted extended attributes.
++ *
++ * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
++ */
++
++#include <linux/module.h>
++#include <linux/string.h>
++#include <linux/fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_xattr.h>
++
++#define XATTR_TRUSTED_PREFIX "trusted."
++
++static size_t
++ext3_xattr_trusted_list(char *list, struct inode *inode,
++                      const char *name, int name_len)
++{
++      const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
++
++      if (!capable(CAP_SYS_ADMIN))
++              return 0;
++
++      if (list) {
++              memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
++              memcpy(list+prefix_len, name, name_len);
++              list[prefix_len + name_len] = '\0';
++      }
++      return prefix_len + name_len + 1;
++}
++
++static int
++ext3_xattr_trusted_get(struct inode *inode, const char *name,
++                     void *buffer, size_t size)
++{
++      if (strcmp(name, "") == 0)
++              return -EINVAL;
++      if (!capable(CAP_SYS_ADMIN))
++              return -EPERM;
++      return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name,
++                            buffer, size);
++}
++
++static int
++ext3_xattr_trusted_set(struct inode *inode, const char *name,
++                     const void *value, size_t size, int flags)
++{
++      handle_t *handle;
++      int error;
++
++      if (strcmp(name, "") == 0)
++              return -EINVAL;
++      if (!capable(CAP_SYS_ADMIN))
++              return -EPERM;
++      handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS);
++      if (IS_ERR(handle))
++              return PTR_ERR(handle);
++      error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_TRUSTED, name,
++                             value, size, flags);
++      ext3_journal_stop(handle, inode);
++
++      return error;
++}
++
++struct ext3_xattr_handler ext3_xattr_trusted_handler = {
++      .prefix = XATTR_TRUSTED_PREFIX,
++      .list   = ext3_xattr_trusted_list,
++      .get    = ext3_xattr_trusted_get,
++      .set    = ext3_xattr_trusted_set,
++};
++
++int __init
++init_ext3_xattr_trusted(void)
++{
++      return ext3_xattr_register(EXT3_XATTR_INDEX_TRUSTED,
++                                 &ext3_xattr_trusted_handler);
++}
++
++void
++exit_ext3_xattr_trusted(void)
++{
++      ext3_xattr_unregister(EXT3_XATTR_INDEX_TRUSTED,
++                            &ext3_xattr_trusted_handler);
++}
+Index: linux-p4smp/include/linux/ext3_xattr.h
+===================================================================
+--- linux-p4smp.orig/include/linux/ext3_xattr.h        2004-06-14 13:41:58.000000000 -0700
++++ linux-p4smp/include/linux/ext3_xattr.h     2004-06-14 13:50:46.000000000 -0700
+@@ -93,6 +93,9 @@ extern void ext3_xattr_put_super(struct 
+ extern int init_ext3_xattr(void) __init;
+ extern void exit_ext3_xattr(void);
+ 
++extern int init_ext3_xattr_trusted(void) __init;
++extern void exit_ext3_xattr_trusted(void);
++
+ # else  /* CONFIG_EXT3_FS_XATTR */
+ #  define ext3_setxattr               NULL
+ #  define ext3_getxattr               NULL
diff --git a/lustre/kernel_patches/patches/iopen-2.4.19-suse.patch b/lustre/kernel_patches/patches/iopen-2.4.19-suse.patch

index ad213c9..c0940cf 100644 (file)
--- a/lustre/kernel_patches/patches/iopen-2.4.19-suse.patch
+++ b/lustre/kernel_patches/patches/iopen-2.4.19-suse.patch
@@ -70,11 +70,11 @@ Index: linux-2.4.19.SuSE/fs/ext3/inode.c
         if(ext3_get_inode_loc(inode, &iloc))
                 goto bad_inode;
         bh = iloc.bh;
-Index: linux-2.4.19.SuSE/fs/ext3/iopen.c
+Index: lum/fs/ext3/iopen.c
  ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/iopen.c     Sun Nov 16 01:27:31 2003
-+++ linux-2.4.19.SuSE/fs/ext3/iopen.c  Sun Nov 16 01:27:31 2003
-@@ -0,0 +1,258 @@
+--- lum.orig/fs/ext3/iopen.c   2004-03-09 16:46:37.000000000 -0700
++++ lum/fs/ext3/iopen.c        2004-03-09 16:48:03.000000000 -0700
+@@ -0,0 +1,282 @@
  +/*
  + * linux/fs/ext3/iopen.c
  + *
@@ -211,13 +211,24 @@ Index: linux-2.4.19.SuSE/fs/ext3/iopen.c
  +
  +/* This function is spliced into ext3_lookup and does the move of a
  + * disconnected dentry (if it exists) to a connected dentry.
-+ * Caller must hold dcache_lock.
  + */
-+struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode)
++struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode,
++                                  int rehash)
  +{
  +      struct dentry *tmp, *goal = NULL;
  +      struct list_head *lp;
  +
++      /* verify this dentry is really new */
++      assert(dentry->d_inode == NULL);
++      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
++      if (rehash)
++              assert(list_empty(&dentry->d_hash));    /* d_rehash */
++      assert(list_empty(&dentry->d_subdirs));
++
++      spin_lock(&dcache_lock);
++      if (!inode)
++              goto do_rehash;
++
  +      /* preferrably return a connected dentry */
  +      list_for_each(lp, &inode->i_dentry) {
  +              tmp = list_entry(lp, struct dentry, d_alias);
@@ -231,27 +242,40 @@ Index: linux-2.4.19.SuSE/fs/ext3/iopen.c
  +      }
  +
  +      if (!goal)
-+              return NULL;
++              goto do_instantiate;
  +
  +      /* Move the goal to the de hash queue - like d_move() */
  +      goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED;
  +      list_del_init(&goal->d_hash);
  +
  +      list_del(&goal->d_child);
-+      list_del(&de->d_child);
++      list_del(&dentry->d_child);
  +
  +      /* Switch the parents and the names.. */
-+      switch_names(goal, de);
-+      do_switch(goal->d_parent, de->d_parent);
-+      do_switch(goal->d_name.len, de->d_name.len);
-+      do_switch(goal->d_name.hash, de->d_name.hash);
++      switch_names(goal, dentry);
++      do_switch(goal->d_parent, dentry->d_parent);
++      do_switch(goal->d_name.len, dentry->d_name.len);
++      do_switch(goal->d_name.hash, dentry->d_name.hash);
  +
  +      /* And add them back to the (new) parent lists */
  +      list_add(&goal->d_child, &goal->d_parent->d_subdirs);
-+      list_add(&de->d_child, &de->d_parent->d_subdirs);
++      list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
  +      __d_rehash(goal, 0);
++      spin_unlock(&dcache_lock);
++      iput(inode);
  +
  +      return goal;
++
++      /* d_add(), but don't drop dcache_lock before adding dentry to inode */
++do_instantiate:
++      list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
++      dentry->d_inode = inode;
++do_rehash:
++      if (rehash)
++              __d_rehash(dentry, 0);                  /* d_rehash */
++      spin_unlock(&dcache_lock);
++
++      return NULL;
  +}
  +
  +/*
@@ -333,10 +357,10 @@ Index: linux-2.4.19.SuSE/fs/ext3/iopen.c
  +
  +      return 1;
  +}
-Index: linux-2.4.19.SuSE/fs/ext3/iopen.h
+Index: lum/fs/ext3/iopen.h
  ===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/iopen.h     Sun Nov 16 01:27:31 2003
-+++ linux-2.4.19.SuSE/fs/ext3/iopen.h  Sun Nov 16 01:27:31 2003
+--- lum.orig/fs/ext3/iopen.h   2004-03-09 16:46:37.000000000 -0700
++++ lum/fs/ext3/iopen.h        2004-03-09 16:48:03.000000000 -0700
  @@ -0,0 +1,15 @@
  +/*
  + * iopen.h
@@ -351,8 +375,8 @@ Index: linux-2.4.19.SuSE/fs/ext3/iopen.h
  +
  +extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
  +extern int ext3_iopen_get_inode(struct inode *inode);
-+extern struct dentry *iopen_connect_dentry(struct dentry *de,
-+                                         struct inode *inode);
++extern struct dentry *iopen_connect_dentry(struct dentry *dentry,
++                                         struct inode *inode, int rehash);
  Index: linux-2.4.19.SuSE/fs/ext3/namei.c
  ===================================================================
  --- linux-2.4.19.SuSE.orig/fs/ext3/namei.c     Sun Nov 16 01:23:20 2003
@@ -366,12 +390,7 @@ Index: linux-2.4.19.SuSE/fs/ext3/namei.c
   
   /*
    * define how far ahead to read directories while searching them.
-@@ -922,10 +922,14 @@
-       struct inode * inode;
-       struct ext3_dir_entry_2 * de;
-       struct buffer_head * bh;
-+      struct dentry *alternate = NULL;
- 
+@@ -926,6 +927,9 @@
         if (dentry->d_name.len > EXT3_NAME_LEN)
                 return ERR_PTR(-ENAMETOOLONG);
   
@@ -381,36 +400,62 @@ Index: linux-2.4.19.SuSE/fs/ext3/namei.c
         bh = ext3_find_entry(dentry, &de);
         inode = NULL;
         if (bh) {
-@@ -943,7 +948,28 @@
+@@ -943,8 +948,8 @@
                         return ERR_PTR(-EACCES);
                 }
         }
  -      d_add(dentry, inode);
+-      return NULL;
  +
-+      /* verify this dentry is really new */
-+      assert(!dentry->d_inode);
-+      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
-+      assert(list_empty(&dentry->d_hash));            /* d_rehash */
-+      assert(list_empty(&dentry->d_subdirs));
-+
-+      spin_lock(&dcache_lock);
-+      if (inode && (alternate = iopen_connect_dentry(dentry, inode))) {
-+              spin_unlock(&dcache_lock);
-+              iput(inode);
-+              return alternate;
++      return iopen_connect_dentry(dentry, inode, 1);
+ }
+ 
+ #define S_SHIFT 12
+@@ -1932,10 +1935,6 @@
+                             inode->i_nlink);
+       inode->i_version = ++event;
+       inode->i_nlink = 0;
+-      /* There's no need to set i_disksize: the fact that i_nlink is
+-       * zero will ensure that the right thing happens during any
+-       * recovery. */
+-      inode->i_size = 0;
+       ext3_orphan_add(handle, inode);
+       dir->i_nlink--;
+       inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+@@ -2086,6 +2085,23 @@
+       return err;
+ }
+ 
++/* Like ext3_add_nondir() except for call to iopen_connect_dentry */
++static int ext3_add_link(handle_t *handle, struct dentry *dentry,
++                       struct inode *inode)
++{
++      int err = ext3_add_entry(handle, dentry, inode);
++      if (!err) {
++              err = ext3_mark_inode_dirty(handle, inode);
++              if (err == 0) {
++                      dput(iopen_connect_dentry(dentry, inode, 0));
++                      return 0;
++              }
  +      }
++      ext3_dec_count(handle, inode);
++      iput(inode);
++      return err;
++}
  +
-+      /* d_add(), but don't drop dcache_lock before adding dentry to inode */
-+      if (inode)                                      /* d_instantiate */
-+              list_add(&dentry->d_alias, &inode->i_dentry);
-+      dentry->d_inode = inode;
-+
-+      __d_rehash(dentry, 0);                          /* d_rehash */
-+      spin_unlock(&dcache_lock);
-+
-       return NULL;
- }
+ static int ext3_link (struct dentry * old_dentry,
+               struct inode * dir, struct dentry *dentry)
+ {
+@@ -2113,7 +2129,8 @@
+       ext3_inc_count(handle, inode);
+       atomic_inc(&inode->i_count);
   
+-      err = ext3_add_nondir(handle, dentry, inode);
++      err = ext3_add_link(handle, dentry, inode);
++      ext3_orphan_del(handle, inode);
+       ext3_journal_stop(handle, dir);
+       return err;
+ }
  Index: linux-2.4.19.SuSE/fs/ext3/super.c
  ===================================================================
  --- linux-2.4.19.SuSE.orig/fs/ext3/super.c     Sun Nov 16 01:19:22 2003
diff --git a/lustre/kernel_patches/patches/iopen-2.4.21-chaos.patch b/lustre/kernel_patches/patches/iopen-2.4.21-chaos.patch

index 62bd8e1..3bed805 100644 (file)
--- a/lustre/kernel_patches/patches/iopen-2.4.21-chaos.patch
+++ b/lustre/kernel_patches/patches/iopen-2.4.21-chaos.patch
@@ -74,10 +74,17 @@ Index: linux-ia64/fs/ext3/iopen.c
  ===================================================================
  --- linux-ia64.orig/fs/ext3/iopen.c    2004-03-17 18:02:08.000000000 -0800
  +++ linux-ia64/fs/ext3/iopen.c 2004-03-17 18:10:58.000000000 -0800
-@@ -8,3 +8,275 @@
-  * This file may be redistributed under the terms of the GNU General
-  * Public License.
-  *
+@@ -0,0 +1,282 @@
++/*
++ * linux/fs/ext3/iopen.c
++ *
++ * Special support for open by inode number
++ *
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ *
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ *
  + *
  + * Invariants:
  + *   - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias
@@ -427,7 +434,7 @@ Index: linux-ia64/fs/ext3/namei.c
  +      if (!err) {
  +              err = ext3_mark_inode_dirty(handle, inode);
  +              if (err == 0) {
-+                      (void)iopen_connect_dentry(dentry, inode, 0);
++                      dput(iopen_connect_dentry(dentry, inode, 0));
  +                      return 0;
  +              }
  +      }
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch b/lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch

index 12436a7..ee976f6 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch
@@ -1,7 +1,7 @@
  Index: linux-2.6.5-12.1/fs/exec.c
  ===================================================================
---- linux-2.6.5-12.1.orig/fs/exec.c    2004-05-10 19:21:56.000000000 +0300
-+++ linux-2.6.5-12.1/fs/exec.c 2004-05-25 17:32:14.038494200 +0300
+--- linux-2.6.5-12.1.orig/fs/exec.c    2004-05-10 12:21:56.000000000 -0400
++++ linux-2.6.5-12.1/fs/exec.c 2004-06-03 18:31:28.000000000 -0400
  @@ -125,9 +125,10 @@
         struct nameidata nd;
         int error;
@@ -47,8 +47,8 @@ Index: linux-2.6.5-12.1/fs/exec.c
                                         if (err) {
  Index: linux-2.6.5-12.1/fs/namei.c
  ===================================================================
---- linux-2.6.5-12.1.orig/fs/namei.c   2004-05-10 19:21:56.000000000 +0300
-+++ linux-2.6.5-12.1/fs/namei.c        2004-05-25 17:32:14.040493896 +0300
+--- linux-2.6.5-12.1.orig/fs/namei.c   2004-05-10 12:21:56.000000000 -0400
++++ linux-2.6.5-12.1/fs/namei.c        2004-06-03 18:42:17.000000000 -0400
  @@ -270,8 +270,19 @@
         return 0;
   }
@@ -136,25 +136,20 @@ Index: linux-2.6.5-12.1/fs/namei.c
                         dput(next.dentry);
                         mntput(next.mnt);
                         if (err)
-@@ -703,14 +749,29 @@
+@@ -703,14 +749,24 @@
                                 inode = nd->dentry->d_inode;
                                 /* fallthrough */
                         case 1:
  +                              nd->flags |= LOOKUP_LAST;
  +                              err = revalidate_special(nd);
  +                              nd->flags &= ~LOOKUP_LAST;
++                              if (!nd->dentry->d_inode)
++                                      err = -ENOENT;
  +                              if (err)
-+                                      break;
++                                      goto return_err;
                                 goto return_reval;
                 }
-+              
-+              if (err) {
-+                      if (!nd->dentry->d_inode)
-+                              err = -ENOENT;
-+                      
-+                      goto return_err;                        
-+              }
-+              
++
                 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
                         err = nd->dentry->d_op->d_hash(nd->dentry, &this);
                         if (err < 0)
@@ -166,7 +161,7 @@ Index: linux-2.6.5-12.1/fs/namei.c
                 if (err)
                         break;
                 follow_mount(&next.mnt, &next.dentry);
-@@ -936,7 +997,7 @@
+@@ -936,7 +992,7 @@
   }
   
   /* SMP-safe */
@@ -175,7 +170,7 @@ Index: linux-2.6.5-12.1/fs/namei.c
   {
         unsigned long hash;
         struct qstr this;
-@@ -956,11 +1017,16 @@
+@@ -956,11 +1012,16 @@
         }
         this.hash = end_name_hash(hash);
   
@@ -193,7 +188,7 @@ Index: linux-2.6.5-12.1/fs/namei.c
   /*
    *    namei()
    *
-@@ -972,7 +1038,8 @@
+@@ -972,7 +1033,8 @@
    * that namei follows links, while lnamei does not.
    * SMP-safe
    */
@@ -203,12 +198,12 @@ Index: linux-2.6.5-12.1/fs/namei.c
   {
         char *tmp = getname(name);
         int err = PTR_ERR(tmp);
-@@ -987,6 +1054,13 @@
+@@ -987,6 +1049,13 @@
         return err;
   }
   
-+int __user_walk(const char __user *name, unsigned flags,
-+              struct nameidata *nd, const char **pname)
++int fastcall __user_walk(const char __user *name, unsigned flags,
++                       struct nameidata *nd, const char **pname)
  +{
  +      intent_init(&nd->intent, IT_LOOKUP);
  +      return __user_walk_it(name, flags, nd, pname);
@@ -217,7 +212,7 @@ Index: linux-2.6.5-12.1/fs/namei.c
   /*
    * It's inline, so penalty for filesystems that don't use sticky bit is
    * minimal.
-@@ -1259,8 +1333,8 @@
+@@ -1259,8 +1328,8 @@
                 acc_mode |= MAY_APPEND;
   
         /* Fill in the open() intent data */
@@ -228,7 +223,7 @@ Index: linux-2.6.5-12.1/fs/namei.c
   
         /*
          * The simplest case - just a plain lookup.
-@@ -1275,6 +1349,7 @@
+@@ -1275,6 +1344,7 @@
         /*
          * Create - we need to know the parent.
          */
@@ -236,7 +231,7 @@ Index: linux-2.6.5-12.1/fs/namei.c
         error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);
         if (error)
                 return error;
-@@ -1291,7 +1366,9 @@
+@@ -1291,7 +1361,9 @@
         dir = nd->dentry;
         nd->flags &= ~LOOKUP_PARENT;
         down(&dir->d_inode->i_sem);
@@ -246,7 +241,7 @@ Index: linux-2.6.5-12.1/fs/namei.c
   
   do_last:
         error = PTR_ERR(dentry);
-@@ -1396,7 +1473,9 @@
+@@ -1396,7 +1468,9 @@
         }
         dir = nd->dentry;
         down(&dir->d_inode->i_sem);
@@ -256,7 +251,7 @@ Index: linux-2.6.5-12.1/fs/namei.c
         putname(nd->last.name);
         goto do_last;
   }
-@@ -2196,7 +2275,9 @@
+@@ -2196,7 +2270,9 @@
   __vfs_follow_link(struct nameidata *nd, const char *link)
   {
         int res = 0;
@@ -266,7 +261,7 @@ Index: linux-2.6.5-12.1/fs/namei.c
         if (IS_ERR(link))
                 goto fail;
   
-@@ -2206,6 +2287,10 @@
+@@ -2206,6 +2282,10 @@
                         /* weird __emul_prefix() stuff did it */
                         goto out;
         }
@@ -279,8 +274,8 @@ Index: linux-2.6.5-12.1/fs/namei.c
         if (current->link_count || res || nd->last_type!=LAST_NORM)
  Index: linux-2.6.5-12.1/fs/namespace.c
  ===================================================================
---- linux-2.6.5-12.1.orig/fs/namespace.c       2004-05-10 19:21:56.000000000 +0300
-+++ linux-2.6.5-12.1/fs/namespace.c    2004-05-25 17:33:44.385759328 +0300
+--- linux-2.6.5-12.1.orig/fs/namespace.c       2004-05-10 12:21:56.000000000 -0400
++++ linux-2.6.5-12.1/fs/namespace.c    2004-06-03 18:31:28.000000000 -0400
  @@ -108,6 +108,7 @@
   
   static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
@@ -316,8 +311,8 @@ Index: linux-2.6.5-12.1/fs/namespace.c
                 flags &= ~MS_MGC_MSK;
  Index: linux-2.6.5-12.1/fs/open.c
  ===================================================================
---- linux-2.6.5-12.1.orig/fs/open.c    2004-05-10 19:21:56.000000000 +0300
-+++ linux-2.6.5-12.1/fs/open.c 2004-05-25 17:32:14.042493592 +0300
+--- linux-2.6.5-12.1.orig/fs/open.c    2004-05-10 12:21:56.000000000 -0400
++++ linux-2.6.5-12.1/fs/open.c 2004-06-03 18:31:28.000000000 -0400
  @@ -227,12 +227,12 @@
         struct nameidata nd;
         struct inode * inode;
@@ -485,8 +480,8 @@ Index: linux-2.6.5-12.1/fs/open.c
    */
  Index: linux-2.6.5-12.1/fs/stat.c
  ===================================================================
---- linux-2.6.5-12.1.orig/fs/stat.c    2004-05-10 19:21:56.000000000 +0300
-+++ linux-2.6.5-12.1/fs/stat.c 2004-05-25 17:32:14.042493592 +0300
+--- linux-2.6.5-12.1.orig/fs/stat.c    2004-05-10 12:21:56.000000000 -0400
++++ linux-2.6.5-12.1/fs/stat.c 2004-06-03 18:31:28.000000000 -0400
  @@ -37,7 +37,7 @@
   
   EXPORT_SYMBOL(generic_fillattr);
@@ -563,8 +558,8 @@ Index: linux-2.6.5-12.1/fs/stat.c
   
  Index: linux-2.6.5-12.1/fs/nfs/dir.c
  ===================================================================
---- linux-2.6.5-12.1.orig/fs/nfs/dir.c 2004-05-10 19:21:53.000000000 +0300
-+++ linux-2.6.5-12.1/fs/nfs/dir.c      2004-05-25 17:32:14.043493440 +0300
+--- linux-2.6.5-12.1.orig/fs/nfs/dir.c 2004-05-10 12:21:53.000000000 -0400
++++ linux-2.6.5-12.1/fs/nfs/dir.c      2004-06-03 18:31:28.000000000 -0400
  @@ -709,7 +709,7 @@
                 return 0;
         if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE))
@@ -585,8 +580,8 @@ Index: linux-2.6.5-12.1/fs/nfs/dir.c
          * The 0 argument passed into the create function should one day
  Index: linux-2.6.5-12.1/fs/inode.c
  ===================================================================
---- linux-2.6.5-12.1.orig/fs/inode.c   2004-05-10 19:21:56.000000000 +0300
-+++ linux-2.6.5-12.1/fs/inode.c        2004-05-25 17:32:14.044493288 +0300
+--- linux-2.6.5-12.1.orig/fs/inode.c   2004-05-10 12:21:56.000000000 -0400
++++ linux-2.6.5-12.1/fs/inode.c        2004-06-03 18:31:28.000000000 -0400
  @@ -221,6 +221,7 @@
         inodes_stat.nr_unused--;
   }
@@ -597,8 +592,8 @@ Index: linux-2.6.5-12.1/fs/inode.c
    * @inode: inode to clear
  Index: linux-2.6.5-12.1/fs/super.c
  ===================================================================
---- linux-2.6.5-12.1.orig/fs/super.c   2004-05-10 19:21:56.000000000 +0300
-+++ linux-2.6.5-12.1/fs/super.c        2004-05-25 17:32:14.045493136 +0300
+--- linux-2.6.5-12.1.orig/fs/super.c   2004-05-10 12:21:56.000000000 -0400
++++ linux-2.6.5-12.1/fs/super.c        2004-06-03 18:31:28.000000000 -0400
  @@ -789,6 +789,8 @@
         return (struct vfsmount *)sb;
   }
@@ -608,10 +603,22 @@ Index: linux-2.6.5-12.1/fs/super.c
   struct vfsmount *kern_mount(struct file_system_type *type)
   {
         return do_kern_mount(type->name, 0, type->name, NULL);
+Index: linux-2.6.5-12.1/fs/block_dev.c
+===================================================================
+--- linux-2.6.5-12.1.orig/fs/block_dev.c       2004-05-10 12:21:55.000000000 -0400
++++ linux-2.6.5-12.1/fs/block_dev.c    2004-06-03 18:31:28.000000000 -0400
+@@ -834,6 +834,7 @@
+       if (!path || !*path)
+               return ERR_PTR(-EINVAL);
+ 
++      intent_init(&nd.intent, IT_LOOKUP);
+       error = path_lookup(path, LOOKUP_FOLLOW, &nd);
+       if (error)
+               return ERR_PTR(error);
  Index: linux-2.6.5-12.1/include/linux/dcache.h
  ===================================================================
---- linux-2.6.5-12.1.orig/include/linux/dcache.h       2004-04-04 06:38:24.000000000 +0300
-+++ linux-2.6.5-12.1/include/linux/dcache.h    2004-05-25 17:32:14.045493136 +0300
+--- linux-2.6.5-12.1.orig/include/linux/dcache.h       2004-04-03 22:38:24.000000000 -0500
++++ linux-2.6.5-12.1/include/linux/dcache.h    2004-06-03 18:31:28.000000000 -0400
  @@ -4,6 +4,7 @@
   #ifdef __KERNEL__
   
@@ -631,8 +638,8 @@ Index: linux-2.6.5-12.1/include/linux/dcache.h
         int nr_unused;
  Index: linux-2.6.5-12.1/include/linux/fs.h
  ===================================================================
---- linux-2.6.5-12.1.orig/include/linux/fs.h   2004-05-10 19:21:56.000000000 +0300
-+++ linux-2.6.5-12.1/include/linux/fs.h        2004-05-25 17:32:14.046492984 +0300
+--- linux-2.6.5-12.1.orig/include/linux/fs.h   2004-05-10 12:21:56.000000000 -0400
++++ linux-2.6.5-12.1/include/linux/fs.h        2004-06-03 18:31:28.000000000 -0400
  @@ -250,6 +250,8 @@
   #define ATTR_ATTR_FLAG        1024
   #define ATTR_KILL_SUID        2048
@@ -686,8 +693,8 @@ Index: linux-2.6.5-12.1/include/linux/fs.h
   
  Index: linux-2.6.5-12.1/include/linux/namei.h
  ===================================================================
---- linux-2.6.5-12.1.orig/include/linux/namei.h        2004-05-10 19:21:56.000000000 +0300
-+++ linux-2.6.5-12.1/include/linux/namei.h     2004-05-25 17:32:14.047492832 +0300
+--- linux-2.6.5-12.1.orig/include/linux/namei.h        2004-05-10 12:21:56.000000000 -0400
++++ linux-2.6.5-12.1/include/linux/namei.h     2004-06-03 18:31:28.000000000 -0400
  @@ -2,25 +2,55 @@
   #define _LINUX_NAMEI_H
   
@@ -783,32 +790,10 @@ Index: linux-2.6.5-12.1/include/linux/namei.h
   extern int follow_down(struct vfsmount **, struct dentry **);
   extern int follow_up(struct vfsmount **, struct dentry **);
   
-Index: linux-2.6.5-12.1/kernel/exit.c
-===================================================================
---- linux-2.6.5-12.1.orig/kernel/exit.c        2004-05-10 19:21:56.000000000 +0300
-+++ linux-2.6.5-12.1/kernel/exit.c     2004-05-25 17:32:14.047492832 +0300
-@@ -260,6 +260,8 @@
-       write_unlock_irq(&tasklist_lock);
- }
- 
-+EXPORT_SYMBOL(reparent_to_init);
-+
- void __set_special_pids(pid_t session, pid_t pgrp)
- {
-       struct task_struct *curr = current;
-@@ -429,6 +431,8 @@
-       __exit_files(tsk);
- }
- 
-+EXPORT_SYMBOL(exit_files);
-+
- static inline void __put_fs_struct(struct fs_struct *fs)
- {
-       /* No need to hold fs->lock if we are killing it */
  Index: linux-2.6.5-12.1/include/linux/fshooks.h
  ===================================================================
---- linux-2.6.5-12.1.orig/include/linux/fshooks.h      2004-05-10 19:21:56.000000000 +0300
-+++ linux-2.6.5-12.1/include/linux/fshooks.h   2004-05-25 17:32:14.048492680 +0300
+--- linux-2.6.5-12.1.orig/include/linux/fshooks.h      2004-05-10 12:21:56.000000000 -0400
++++ linux-2.6.5-12.1/include/linux/fshooks.h   2004-06-03 18:31:28.000000000 -0400
  @@ -90,12 +90,18 @@
   
   #define FSHOOK_BEGIN_USER_WALK(type, err, path, flags, nd, field, args...) \
@@ -847,15 +832,25 @@ Index: linux-2.6.5-12.1/include/linux/fshooks.h
   
   #define FSHOOK_END_USER_WALK(type, err, field) ((void)0);}
   
-Index: linux-2.6.5-12.1/fs/block_dev.c
+Index: linux-2.6.5-12.1/kernel/exit.c
  ===================================================================
---- linux-2.6.5-12.1.orig/fs/block_dev.c       2004-05-10 19:21:55.000000000 +0300
-+++ linux-2.6.5-12.1/fs/block_dev.c    2004-05-25 17:32:39.517620784 +0300
-@@ -834,6 +834,7 @@
-       if (!path || !*path)
-               return ERR_PTR(-EINVAL);
+--- linux-2.6.5-12.1.orig/kernel/exit.c        2004-05-10 12:21:56.000000000 -0400
++++ linux-2.6.5-12.1/kernel/exit.c     2004-06-03 18:31:28.000000000 -0400
+@@ -260,6 +260,8 @@
+       write_unlock_irq(&tasklist_lock);
+ }
   
-+      intent_init(&nd.intent, IT_LOOKUP);
-       error = path_lookup(path, LOOKUP_FOLLOW, &nd);
-       if (error)
-               return ERR_PTR(error);
++EXPORT_SYMBOL(reparent_to_init);
++
+ void __set_special_pids(pid_t session, pid_t pgrp)
+ {
+       struct task_struct *curr = current;
+@@ -429,6 +431,8 @@
+       __exit_files(tsk);
+ }
+ 
++EXPORT_SYMBOL(exit_files);
++
+ static inline void __put_fs_struct(struct fs_struct *fs)
+ {
+       /* No need to hold fs->lock if we are killing it */
diff --git a/lustre/kernel_patches/series/chaos-2.4.21 b/lustre/kernel_patches/series/chaos-2.4.21

index 0003912..b3e932f 100644 (file)
--- a/lustre/kernel_patches/series/chaos-2.4.21
+++ b/lustre/kernel_patches/series/chaos-2.4.21
@@ -1,3 +1,4 @@
+configurable-x86-stack-2.4.21-chaos.patch 
  dev_read_only_2.4.21-chaos.patch 
  exports_2.4.19-suse.patch
  lustre_version.patch
@@ -26,6 +27,7 @@ add_page_private.patch
  ext3-raw-lookup.patch
  nfs_export_kernel-2.4.21-chaos.patch 
  ext3-ea-in-inode-2.4.21-chaos.patch 
+ext3-trusted_ea-2.4.21-chaos.patch 
  listman-2.4.21-chaos.patch 
  gfp_memalloc-2.4.21-chaos.patch 
  ext3-xattr-ptr-arith-fix.patch
@@ -33,3 +35,4 @@ kernel_text_address-2.4.18-chaos.patch
  pagecache-lock-2.4.21-chaos.patch 
  ext3-truncate-buffer-head.patch
  inode-max-readahead-2.4.24.patch
+dcache_refcount_debug.patch
diff --git a/lustre/kernel_patches/series/rh-2.4.20 b/lustre/kernel_patches/series/rh-2.4.20

index 06b2642..22491a0 100644 (file)
--- a/lustre/kernel_patches/series/rh-2.4.20
+++ b/lustre/kernel_patches/series/rh-2.4.20
@@ -28,7 +28,7 @@ ext3-o_direct-1.2.4.20-rh.patch
  ext3-no-write-super-chaos.patch
  dynamic-locks-2.4.20-rh.patch 
  vfs-pdirops-2.4.20-rh.patch 
-ext3-pdirops-2.4.20-chaos.patch 
+ext3-pdirops-2.4.20-rh.patch 
  tcp_zero_copy_2.4.20_chaos.patch
  gpl_header-chaos-2.4.20.patch
  add_page_private.patch
diff --git a/lustre/kernel_patches/series/suse-2.4.19 b/lustre/kernel_patches/series/suse-2.4.19

index 9905491..8748256 100644 (file)
--- a/lustre/kernel_patches/series/suse-2.4.19
+++ b/lustre/kernel_patches/series/suse-2.4.19
@@ -10,7 +10,6 @@ ext-2.4-patch-1-chaos.patch
  ext-2.4-patch-2.patch
  ext-2.4-patch-3.patch
  ext-2.4-patch-4.patch
-linux-2.4.20-xattr-0.8.54-hp.patch 
  linux-2.4.19-xattr-0.8.54-suse.patch 
  ext3-2.4-ino_t.patch
  ext3-largefile.patch
diff --git a/lustre/kernel_patches/series/vanilla-2.4.20 b/lustre/kernel_patches/series/vanilla-2.4.20

index ae838ca..d11bec0 100644 (file)
--- a/lustre/kernel_patches/series/vanilla-2.4.20
+++ b/lustre/kernel_patches/series/vanilla-2.4.20
@@ -50,7 +50,5 @@ kernel_text_address-2.4.20-vanilla.patch
  ext3-xattr-ptr-arith-fix.patch
  gfp_memalloc-2.4.22.patch
  procfs-ndynamic-2.4.patch
-linux-2.4.20-tmpfs-xattr.patch
-linux-2.4.20-tmpfs-iopen.patch
  linux-2.4.20-filemap.patch
  ext3-truncate-buffer-head.patch
diff --git a/lustre/kernel_patches/targets/rh-2.4.target b/lustre/kernel_patches/targets/rh-2.4.target

index cca5324..70af4ab 100644 (file)
--- a/lustre/kernel_patches/targets/rh-2.4.target
+++ b/lustre/kernel_patches/targets/rh-2.4.target
@@ -1,7 +1,7 @@
-KERNEL=linux-2.4.20-28.9.tar.gz
+KERNEL=linux-2.4.20-31.9.tar.gz
  SERIES=rh-2.4.20
  VERSION=2.4.20
-EXTRA_VERSION=28.9_lustre
+EXTRA_VERSION=31.9_lustre.1.2.2
  RHBUILD=1
  
  BASE_ARCHS="i586"
@@ -11,3 +11,11 @@ JENSEN_ARCHS=""
  SMP_ARCHS="i586"
  UP_ARCHS=""
  SRC_ARCHS="i586"
+
+# the modules in this kernel do not build with gcc 3
+for cc in i386-redhat-linux-gcc-2.96 gcc296 gcc ; do
+    if which $cc >/dev/null 2>/dev/null ; then
+        CC=$cc
+        break
+    fi
+done
diff --git a/lustre/ldiskfs/autoMakefile.am b/lustre/ldiskfs/autoMakefile.am

index f81e6e7..eacc902 100644 (file)
--- a/lustre/ldiskfs/autoMakefile.am
+++ b/lustre/ldiskfs/autoMakefile.am
@@ -33,10 +33,17 @@ patches := @top_srcdir@/kernel_patches/patches
  sources: $(ext3_sources) $(ext3_headers) $(linux_headers) $(series)
         rm -rf linux-stage linux sources $(ldiskfs_SOURCES)
         mkdir -p linux-stage/fs/ext3 linux-stage/include/linux
-       cd linux-stage && quilt setup -l ../$(series) -d ../$(patches)
         cp $(ext3_sources) $(ext3_headers) $(ext3_extra) linux-stage/fs/ext3
         cp $(linux_headers) linux-stage/include/linux
+if USE_QUILT
+       cd linux-stage && quilt setup -l ../$(series) -d ../$(patches)
         cd linux-stage && quilt push -a -q
+else
+       @cd linux-stage && for i in $$(<../$(series)) ; do \
+               echo "patch -p1 < ../$(patches)/$$i" ; \
+               patch -p1 < ../$(patches)/$$i || exit 1 ; \
+       done
+endif
         mkdir linux
         @echo -n "Replacing 'ext3' with 'ldiskfs':"
         @for i in $(notdir $(ext3_headers) $(ext3_sources)) $(new_sources) ; do \
@@ -50,6 +57,7 @@ sources: $(ext3_sources) $(ext3_headers) $(linux_headers) $(series)
                         linux-stage/include/linux/ext3$$i \
                         > linux/ldiskfs$$i ; \
         done
+       @echo
         touch sources
  
  foo-check:
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c

index 906090b..cdd3b07 100644 (file)
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -481,11 +481,6 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
          if (rc && rc != EALREADY)
                  GOTO(out, rc);
  
-        /* XXX track this all the time? */
-        if (target->obd_recovering) {
-                target->obd_connected_clients++;
-        }
-
          req->rq_repmsg->handle = conn;
  
          /* If the client and the server are the same node, we will already
@@ -528,6 +523,9 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
                  GOTO(out, rc = 0);
          }
  
+        if (target->obd_recovering)
+                target->obd_connected_clients++;
+
          memcpy(&conn, lustre_msg_buf(req->rq_reqmsg, 2, sizeof conn),
                 sizeof conn);
  
@@ -580,21 +578,37 @@ void target_destroy_export(struct obd_export *exp)
   * Recovery functions
   */
  
-static void abort_delayed_replies(struct obd_device *obd)
+static void target_finish_recovery(struct obd_device *obd)
  {
-        struct ptlrpc_request *req;
          struct list_head *tmp, *n;
+        int rc;
+
+        CWARN("%s: sending delayed replies to recovered clients\n",
+              obd->obd_name);
+
+        ldlm_reprocess_all_ns(obd->obd_namespace);
+
+        /* when recovery finished, cleanup orphans on mds and ost */
+        if (OBT(obd) && OBP(obd, postrecov)) {
+                rc = OBP(obd, postrecov)(obd);
+                if (rc >= 0)
+                        CWARN("%s: all clients recovered, %d MDS "
+                              "orphans deleted\n", obd->obd_name, rc);
+                else
+                        CERROR("postrecov failed %d\n", rc);
+        }
+
          list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) {
+                struct ptlrpc_request *req;
                  req = list_entry(tmp, struct ptlrpc_request, rq_list);
-                DEBUG_REQ(D_ERROR, req, "aborted:");
-                req->rq_status = -ENOTCONN;
-                req->rq_type = PTL_RPC_MSG_ERR;
+                DEBUG_REQ(D_ERROR, req, "delayed:");
                  ptlrpc_reply(req);
                  class_export_put(req->rq_export);
                  list_del(&req->rq_list);
                  OBD_FREE(req->rq_reqmsg, req->rq_reqlen);
                  OBD_FREE(req, sizeof *req);
          }
+        return;
  }
  
  static void abort_recovery_queue(struct obd_device *obd)
@@ -625,35 +639,24 @@ static void abort_recovery_queue(struct obd_device *obd)
  void target_abort_recovery(void *data)
  {
          struct obd_device *obd = data;
-        int rc;
  
-        CERROR("disconnecting clients and aborting recovery\n");
          spin_lock_bh(&obd->obd_processing_task_lock);
          if (!obd->obd_recovering) {
                  spin_unlock_bh(&obd->obd_processing_task_lock);
                  EXIT;
                  return;
          }
-
          obd->obd_recovering = obd->obd_abort_recovery = 0;
-
-        wake_up(&obd->obd_next_transno_waitq);
          target_cancel_recovery_timer(obd);
          spin_unlock_bh(&obd->obd_processing_task_lock);
  
-        class_disconnect_exports(obd, 0);
+        CERROR("%s: recovery period over; disconnecting unfinished clients.\n",
+               obd->obd_name);
+        class_disconnect_stale_exports(obd, 0);
+        abort_recovery_queue(obd);
  
-        /* when recovery was aborted, cleanup orphans on mds and ost */
-        if (OBT(obd) && OBP(obd, postrecov)) {
-                rc = OBP(obd, postrecov)(obd);
-                if (rc >= 0)
-                        CWARN("Cleanup %d orphans after recovery was aborted\n", rc);
-                else
-                        CERROR("postrecov failed %d\n", rc);
-        }
+        target_finish_recovery(obd);
  
-        abort_delayed_replies(obd);
-        abort_recovery_queue(obd);
          ptlrpc_run_recovery_over_upcall(obd);
  }
  
@@ -662,7 +665,8 @@ static void target_recovery_expired(unsigned long castmeharder)
          struct obd_device *obd = (struct obd_device *)castmeharder;
          CERROR("recovery timed out, aborting\n");
          spin_lock_bh(&obd->obd_processing_task_lock);
-        obd->obd_abort_recovery = 1;
+        if (obd->obd_recovering)
+                obd->obd_abort_recovery = 1;
          wake_up(&obd->obd_next_transno_waitq);
          spin_unlock_bh(&obd->obd_processing_task_lock);
  }
@@ -723,6 +727,9 @@ static int check_for_next_transno(struct obd_device *obd)
          queue_len = obd->obd_requests_queued_for_recovery;
          next_transno = obd->obd_next_recovery_transno;
  
+        CDEBUG(D_HA,"max: %d, connected: %d, completed: %d, queue_len: %d, "
+               "req_transno: "LPU64", next_transno: "LPU64"\n",
+               max, connected, completed, queue_len, req_transno, next_transno);
          if (obd->obd_abort_recovery) {
                  CDEBUG(D_HA, "waking for aborted recovery\n");
                  wake_up = 1;
@@ -836,6 +843,9 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
           * Also, if this request has a transno less than the one we're waiting
           * for, we should process it now.  It could (and currently always will)
           * be an open request for a descriptor that was opened some time ago.
+         *
+         * Also, a resent, replayed request that has already been
+         * handled will pass through here and be processed immediately.
           */
          if (obd->obd_processing_task == current->pid ||
              transno < obd->obd_next_recovery_transno) {
@@ -847,6 +857,17 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
                  return 1;
          }
  
+        /* A resent, replayed request that is still on the queue; just drop it.
+           The queued request will handle this. */
+        if ((lustre_msg_get_flags(req->rq_reqmsg) & (MSG_RESENT | MSG_REPLAY)) ==
+            (MSG_RESENT | MSG_REPLAY)) {
+                DEBUG_REQ(D_ERROR, req, "dropping resent queued req");
+                spin_unlock_bh(&obd->obd_processing_task_lock);
+                OBD_FREE(reqmsg, req->rq_reqlen);
+                OBD_FREE(saved_req, sizeof *saved_req);
+                return 0;
+        }
+
          memcpy(saved_req, req, sizeof *req);
          memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen);
          req = saved_req;
@@ -902,7 +923,6 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc)
          struct ptlrpc_request *saved_req;
          struct lustre_msg *reqmsg;
          int recovery_done = 0;
-        int rc2;
  
          LASSERT ((rc == 0) == (req->rq_reply_state != NULL));
  
@@ -932,39 +952,22 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc)
          list_add(&req->rq_list, &obd->obd_delayed_reply_queue);
  
          spin_lock_bh(&obd->obd_processing_task_lock);
-        --obd->obd_recoverable_clients;
+        /* only count the first "replay over" request from each
+           export */
+        if (req->rq_export->exp_replay_needed) {
+                --obd->obd_recoverable_clients;
+                req->rq_export->exp_replay_needed = 0;
+        }
          recovery_done = (obd->obd_recoverable_clients == 0);
          spin_unlock_bh(&obd->obd_processing_task_lock);
  
          if (recovery_done) {
-                struct list_head *tmp, *n;
-                ldlm_reprocess_all_ns(req->rq_export->exp_obd->obd_namespace);
-                CWARN("%s: all clients recovered, sending delayed replies\n",
-                       obd->obd_name);
                  spin_lock_bh(&obd->obd_processing_task_lock);
-                obd->obd_recovering = 0;
+                obd->obd_recovering = obd->obd_abort_recovery = 0;
                  target_cancel_recovery_timer(obd);
                  spin_unlock_bh(&obd->obd_processing_task_lock);
  
-                /* when recovery finished, cleanup orphans on mds and ost */
-                if (OBT(obd) && OBP(obd, postrecov)) {
-                        rc2 = OBP(obd, postrecov)(obd);
-                        if (rc2 >= 0)
-                                CWARN("%s: all clients recovered, %d MDS "
-                                      "orphans deleted\n", obd->obd_name, rc2);
-                        else
-                                CERROR("postrecov failed %d\n", rc2);
-                }
-
-                list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) {
-                        req = list_entry(tmp, struct ptlrpc_request, rq_list);
-                        DEBUG_REQ(D_ERROR, req, "delayed:");
-                        ptlrpc_reply(req);
-                        class_export_put(req->rq_export);
-                        list_del(&req->rq_list);
-                        OBD_FREE(req->rq_reqmsg, req->rq_reqlen);
-                        OBD_FREE(req, sizeof *req);
-                }
+                target_finish_recovery(obd);
                  ptlrpc_run_recovery_over_upcall(obd);
          } else {
                  CWARN("%s: %d recoverable clients remain\n",
diff --git a/lustre/liblustre/rw.c b/lustre/liblustre/rw.c

index b55e91f..bacf759 100644 (file)
--- a/lustre/liblustre/rw.c
+++ b/lustre/liblustre/rw.c
@@ -193,7 +193,7 @@ int llu_glimpse_size(struct inode *inode)
          rc = obd_enqueue(sbi->ll_osc_exp, lli->lli_smd, LDLM_EXTENT, &policy,
                           LCK_PR, &flags, llu_extent_lock_callback,
                           ldlm_completion_ast, llu_glimpse_callback, inode,
-                         sizeof(*lvb), lustre_swab_ost_lvb, &lockh);
+                         sizeof(struct ost_lvb), lustre_swab_ost_lvb, &lockh);
          if (rc > 0)
                  RETURN(-EIO);
  
diff --git a/lustre/llite/file.c b/lustre/llite/file.c

index ae8034a..4918f98 100644 (file)
--- a/lustre/llite/file.c
+++ b/lustre/llite/file.c
@@ -384,7 +384,7 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
                          break;
                  }
  
-                conditional_schedule();
+                cond_resched();
  
                  page = find_get_page(inode->i_mapping, i);
                  if (page == NULL)
@@ -658,8 +658,19 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
                  rc = -EIO;
  
          if (policy->l_extent.start == 0 &&
-            policy->l_extent.end == OBD_OBJECT_EOF)
+            policy->l_extent.end == OBD_OBJECT_EOF) {
+                /* vmtruncate()->ll_truncate() first sets the i_size and then
+                 * the kms under both a DLM lock and the i_sem.  If we don't
+                 * get the i_sem here we can match the DLM lock and reset
+                 * i_size from the kms before the truncating path has updated
+                 * the kms.  generic_file_write can then trust the stale i_size
+                 * when doing appending writes and effectively cancel the
+                 * result of the truncate.  Getting the i_sem after the enqueue
+                 * maintains the DLM -> i_sem acquiry order. */
+                down(&inode->i_sem);
                  inode->i_size = lov_merge_size(lsm, 1);
+                up(&inode->i_sem);
+        }
  
          //inode->i_mtime = lov_merge_mtime(lsm, inode->i_mtime);
  
diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c

index 526eeb3..5bec189 100644 (file)
--- a/lustre/lov/lov_obd.c
+++ b/lustre/lov/lov_obd.c
@@ -274,7 +274,6 @@ static int lov_disconnect(struct obd_export *exp, int flags)
  static int lov_set_osc_active(struct lov_obd *lov, struct obd_uuid *uuid,
                                int activate)
  {
-        struct obd_device *obd;
          struct lov_tgt_desc *tgt;
          int i, rc = 0;
          ENTRY;
@@ -293,24 +292,14 @@ static int lov_set_osc_active(struct lov_obd *lov, struct obd_uuid *uuid,
          if (i == lov->desc.ld_tgt_count)
                  GOTO(out, rc = -EINVAL);
  
-        obd = class_exp2obd(tgt->ltd_exp);
-        if (obd == NULL) {
-                /* This can happen if OST failure races with node shutdown */
-                GOTO(out, rc = -ENOTCONN);
-        }
-
-        CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LOV idx %d\n",
-               obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd,
-               obd->obd_type->typ_name, i);
-        LASSERT(strcmp(obd->obd_type->typ_name, "osc") == 0);
-
          if (tgt->active == activate) {
-                CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
+                CDEBUG(D_INFO, "OSC %s already %sactive!\n", uuid->uuid,
                         activate ? "" : "in");
                  GOTO(out, rc);
          }
  
-        CDEBUG(D_INFO, "Marking OBD %p %sactive\n", obd, activate ? "" : "in");
+        CDEBUG(D_INFO, "Marking OSC %s %sactive\n", uuid->uuid, 
+               activate ? "" : "in");
  
          tgt->active = activate;
          if (activate)
@@ -2071,13 +2060,13 @@ static int lov_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
                          if (tmp > lock->l_policy_data.l_extent.end)
                                  tmp = lock->l_policy_data.l_extent.end + 1;
                          if (tmp >= loi->loi_kms) {
-                                CDEBUG(D_INODE, "lock acquired, setting rss="
+                                CDEBUG(D_DLMTRACE, "lock acquired, setting rss="
                                         LPU64", kms="LPU64"\n", loi->loi_rss,
                                         tmp);
                                  loi->loi_kms = tmp;
                                  loi->loi_kms_valid = 1;
                          } else {
-                                CDEBUG(D_INODE, "lock acquired, setting rss="
+                                CDEBUG(D_DLMTRACE, "lock acquired, setting rss="
                                         LPU64"; leaving kms="LPU64", end="LPU64
                                         "\n", loi->loi_rss, loi->loi_kms,
                                         lock->l_policy_data.l_extent.end);
@@ -2089,8 +2078,9 @@ static int lov_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
                          memset(lov_lockhp, 0, sizeof(*lov_lockhp));
                          loi->loi_rss = submd->lsm_oinfo->loi_rss;
                          loi->loi_blocks = submd->lsm_oinfo->loi_blocks;
-                        CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving"
-                               " kms="LPU64"\n", loi->loi_rss, loi->loi_kms);
+                        CDEBUG(D_DLMTRACE, "glimpsed, setting rss="LPU64
+                               "; leaving kms="LPU64"\n", loi->loi_rss,
+                               loi->loi_kms);
                  } else {
                          memset(lov_lockhp, 0, sizeof(*lov_lockhp));
                          if (lov->tgts[loi->loi_ost_idx].active) {
diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c

index 0c74ec0..5505329 100644 (file)
--- a/lustre/mds/mds_fs.c
+++ b/lustre/mds/mds_fs.c
@@ -182,7 +182,7 @@ static int mds_server_free_data(struct mds_obd *mds)
          return 0;
  }
  
-static int mds_read_last_rcvd(struct obd_device *obd, struct file *file)
+static int mds_init_server_data(struct obd_device *obd, struct file *file)
  {
          struct mds_obd *mds = &obd->u.mds;
          struct mds_server_data *msd;
@@ -326,6 +326,7 @@ static int mds_read_last_rcvd(struct obd_device *obd, struct file *file)
                  spin_lock_init(&med->med_open_lock);
  
                  mcd = NULL;
+                exp->exp_replay_needed = 1;
                  obd->obd_recoverable_clients++;
                  obd->obd_max_recoverable_clients++;
                  class_export_put(exp);
@@ -337,7 +338,11 @@ static int mds_read_last_rcvd(struct obd_device *obd, struct file *file)
                         mds->mds_last_transno = last_transno;
          }
  
+        if (mcd)
+                OBD_FREE(mcd, sizeof(*mcd));
+
          obd->obd_last_committed = mds->mds_last_transno;
+
          if (obd->obd_recoverable_clients) {
                  CWARN("RECOVERY: service %s, %d recoverable clients, "
                        "last_transno "LPU64"\n", obd->obd_name,
@@ -346,16 +351,15 @@ static int mds_read_last_rcvd(struct obd_device *obd, struct file *file)
                  obd->obd_recovering = 1;
          }
  
-        if (mcd)
-                OBD_FREE(mcd, sizeof(*mcd));
-        
          mds->mds_mount_count = mount_count + 1;
          msd->msd_mount_count = cpu_to_le64(mds->mds_mount_count);
  
          /* save it, so mount count and last_transno is current */
          rc = mds_update_server_data(obd, 1);
+        if (rc)
+                GOTO(err_client, rc);
  
-        RETURN(rc);
+        RETURN(0);
  
  err_client:
          class_disconnect_exports(obd, 0);
@@ -455,7 +459,7 @@ int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt)
                  GOTO(err_last_rcvd, rc = -ENOENT);
          }
  
-        rc = mds_read_last_rcvd(obd, file);
+        rc = mds_init_server_data(obd, file);
          if (rc) {
                  CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
                  GOTO(err_last_rcvd, rc);
@@ -562,8 +566,8 @@ int mds_obd_create(struct obd_export *exp, struct obdo *oa,
          ENTRY;
  
          push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL);
-        
-        sprintf(fidname, "OBJECTS/%u", tmpname);
+
+        sprintf(fidname, "OBJECTS/%u.%u", tmpname, current->pid);
          filp = filp_open(fidname, O_CREAT | O_EXCL, 0644);
          if (IS_ERR(filp)) {
                  rc = PTR_ERR(filp);
diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c

index d93ce0e..f0bf35b 100644 (file)
--- a/lustre/mds/mds_lov.c
+++ b/lustre/mds/mds_lov.c
@@ -435,7 +435,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
  
          case OBD_IOC_CATLOGLIST: {
                  int count = mds->mds_lov_desc.ld_tgt_count;
-                rc = llog_catlog_list(obd, count, data);
+                rc = llog_catalog_list(obd, count, data);
                  RETURN(rc);
  
          }
diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c

index ee096ac..2952fce 100644 (file)
--- a/lustre/obdclass/class_obd.c
+++ b/lustre/obdclass/class_obd.c
@@ -418,6 +418,7 @@ EXPORT_SYMBOL(class_exp2cliimp);
  EXPORT_SYMBOL(class_conn2cliimp);
  EXPORT_SYMBOL(class_disconnect);
  EXPORT_SYMBOL(class_disconnect_exports);
+EXPORT_SYMBOL(class_disconnect_stale_exports);
  
  EXPORT_SYMBOL(oig_init);
  EXPORT_SYMBOL(oig_release);
diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c

index a8db9cb..0429ceb 100644 (file)
--- a/lustre/obdclass/genops.c
+++ b/lustre/obdclass/genops.c
@@ -603,24 +603,17 @@ int class_disconnect(struct obd_export *export, int flags)
          RETURN(0);
  }
  
-void class_disconnect_exports(struct obd_device *obd, int flags)
+static void  class_disconnect_export_list(struct list_head *list, int flags)
  {
          int rc;
-        struct list_head *tmp, *n, work_list;
          struct lustre_handle fake_conn;
          struct obd_export *fake_exp, *exp;
          ENTRY;
  
-        /* Move all of the exports from obd_exports to a work list, en masse. */
-        spin_lock(&obd->obd_dev_lock);
-        list_add(&work_list, &obd->obd_exports);
-        list_del_init(&obd->obd_exports);
-        spin_unlock(&obd->obd_dev_lock);
-
-        CDEBUG(D_HA, "OBD device %d (%p) has exports, "
-               "disconnecting them\n", obd->obd_minor, obd);
-        list_for_each_safe(tmp, n, &work_list) {
-                exp = list_entry(tmp, struct obd_export, exp_obd_chain);
+        /* It's possible that an export may disconnect itself, but 
+         * nothing else will be added to this list. */
+        while(!list_empty(list)) {
+                exp = list_entry(list->next, struct obd_export, exp_obd_chain);
                  class_export_get(exp);
  
                  if (obd_uuid_equals(&exp->exp_client_uuid,
@@ -653,6 +646,51 @@ void class_disconnect_exports(struct obd_device *obd, int flags)
          EXIT;
  }
  
+void class_disconnect_exports(struct obd_device *obd, int flags)
+{
+        struct list_head work_list;
+        ENTRY;
+
+        /* Move all of the exports from obd_exports to a work list, en masse. */
+        spin_lock(&obd->obd_dev_lock);
+        list_add(&work_list, &obd->obd_exports);
+        list_del_init(&obd->obd_exports);
+        spin_unlock(&obd->obd_dev_lock);
+
+        CDEBUG(D_HA, "OBD device %d (%p) has exports, "
+               "disconnecting them\n", obd->obd_minor, obd);
+        class_disconnect_export_list(&work_list, flags);
+        EXIT;
+}
+
+/* Remove exports that have not completed recovery.
+ */
+void class_disconnect_stale_exports(struct obd_device *obd, int flags)
+{
+        struct list_head work_list;
+        struct list_head *pos, *n;
+        struct obd_export *exp;
+        int cnt = 0;
+        ENTRY;
+  
+        INIT_LIST_HEAD(&work_list);
+        spin_lock(&obd->obd_dev_lock);
+        list_for_each_safe(pos, n, &obd->obd_exports) {
+                exp = list_entry(pos, struct obd_export, exp_obd_chain);
+                if (exp->exp_replay_needed) {
+                        list_del(&exp->exp_obd_chain);
+                        list_add(&exp->exp_obd_chain, &work_list);
+                        cnt++;
+                }
+        }
+        spin_unlock(&obd->obd_dev_lock);
+
+        CDEBUG(D_ERROR, "%s: disconnecting %d stale clients\n", 
+               obd->obd_name, cnt);
+        class_disconnect_export_list(&work_list, flags);
+        EXIT;
+}
+
  int oig_init(struct obd_io_group **oig_out)
  {
          struct obd_io_group *oig;
diff --git a/lustre/obdclass/llog_ioctl.c b/lustre/obdclass/llog_ioctl.c

index 6c060e7..6c53036 100644 (file)
--- a/lustre/obdclass/llog_ioctl.c
+++ b/lustre/obdclass/llog_ioctl.c
@@ -377,7 +377,7 @@ out:
  }
  EXPORT_SYMBOL(llog_ioctl);
  
-int llog_catlog_list(struct obd_device *obd, int count,
+int llog_catalog_list(struct obd_device *obd, int count,
                       struct obd_ioctl_data *data)
  {
          int size, i;
@@ -418,4 +418,4 @@ int llog_catlog_list(struct obd_device *obd, int count,
          RETURN(0);
  
  }
-EXPORT_SYMBOL(llog_catlog_list);
+EXPORT_SYMBOL(llog_catalog_list);
diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c

index cf4797b..dd4e563 100644 (file)
--- a/lustre/obdfilter/filter.c
+++ b/lustre/obdfilter/filter.c
@@ -477,6 +477,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
                  spin_lock_init(&fed->fed_lock);
  
                  fcd = NULL;
+                exp->exp_replay_needed = 1;
                  obd->obd_recoverable_clients++;
                  class_export_put(exp);
  
@@ -488,6 +489,9 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
  
          }
  
+        if (fcd)
+                OBD_FREE(fcd, sizeof(*fcd));
+
          obd->obd_last_committed = le64_to_cpu(fsd->fsd_last_transno);
  
          if (obd->obd_recoverable_clients) {
@@ -498,17 +502,16 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
                  obd->obd_recovering = 1;
          }
  
-        if (fcd)
-                OBD_FREE(fcd, sizeof(*fcd));
-
  out:
          filter->fo_mount_count = mount_count + 1;
          fsd->fsd_mount_count = cpu_to_le64(filter->fo_mount_count);
  
          /* save it, so mount count and last_transno is current */
          rc = filter_update_server_data(obd, filp, filter->fo_fsd, 1);
+        if (rc)
+                GOTO(err_client, rc);
  
-        RETURN(rc);
+        RETURN(0);
  
  err_client:
          class_disconnect_exports(obd, 0);
@@ -2336,7 +2339,7 @@ int filter_iocontrol(unsigned int cmd, struct obd_export *exp,
          }
  
          case OBD_IOC_CATLOGLIST: {
-                rc = llog_catlog_list(obd, 1, data);
+                rc = llog_catalog_list(obd, 1, data);
                  RETURN(rc);
          }
  
diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c

index ded86b3..da09be4 100644 (file)
--- a/lustre/osc/osc_request.c
+++ b/lustre/osc/osc_request.c
@@ -779,7 +779,6 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
                  lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf)));
          osc_announce_cached(cli, &body->oa, opc == OST_WRITE ? requested_nob:0);
          spin_lock_irqsave(&req->rq_lock, flags);
-        req->rq_no_resend = 1;
          spin_unlock_irqrestore(&req->rq_lock, flags);
  
          /* size[0] still sizeof (*body) */
@@ -901,8 +900,6 @@ restart_bulk:
          rc = osc_brw_prep_request(cmd, class_exp2cliimp(exp), oa, lsm,
                                    page_count, pga, &requested_nob, &niocount,
                                    &request);
-        /* NB ^ sets rq_no_resend */
-
          if (rc != 0)
                  return (rc);
  
@@ -931,13 +928,6 @@ static int brw_interpret(struct ptlrpc_request *request,
          struct brw_page *pga = aa->aa_pga;
          ENTRY;
  
-        /* XXX bug 937 here */
-        if (rc == -ETIMEDOUT && request->rq_resend) {
-                DEBUG_REQ(D_HA, request,  "BULK TIMEOUT");
-                LBUG(); /* re-send.  later. */
-                //goto restart_bulk;
-        }
-
          rc = osc_brw_fini_request(request, oa, requested_nob, niocount,
                                    page_count, pga, rc);
          RETURN (rc);
@@ -957,7 +947,6 @@ static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa,
          rc = osc_brw_prep_request(cmd, class_exp2cliimp(exp), oa, lsm,
                                    page_count, pga, &requested_nob, &nio_count,
                                    &request);
-        /* NB ^ sets rq_no_resend */
  
          if (rc == 0) {
                  LASSERT(sizeof(*aa) <= sizeof(request->rq_async_args));
diff --git a/lustre/portals/archdep.m4 b/lustre/portals/archdep.m4

index 2a42368..c78fc34 100644 (file)
--- a/lustre/portals/archdep.m4
+++ b/lustre/portals/archdep.m4
@@ -92,6 +92,7 @@ AC_CHECK_FILE([$LINUX/include/linux/namei.h],
         [
                 linux25="yes"
                 KMODEXT=".ko"
+               enable_ldiskfs="yes"
         ],[
                 KMODEXT=".o"
                 linux25="no"
@@ -101,6 +102,16 @@ AC_MSG_RESULT([$linux25])
  AM_CONDITIONAL(LINUX25, test x$linux25 = xyes)
  AC_SUBST(KMODEXT)
  
+AC_PATH_PROG(PATCH, patch, [no])
+AC_PATH_PROG(QUILT, quilt, [no])
+AM_CONDITIONAL(USE_QUILT, test x$QUILT = xno)
+
+if test x$enable_ldiskfs$enable_modules = xyesyes ; then
+       if test x$PATCH$QUILT = xnono ; then
+               AC_MSG_ERROR([Quilt or patch are needed to build the ldiskfs module (for Linux 2.6)])
+       fi
+fi
+
  # -------  Makeflags ------------------
  
  CPPFLAGS="$CRAY_PORTALS_INCLUDE $CRAY_PORTALS_COMMANDLINE -I\$(top_srcdir)/include -I\$(top_srcdir)/portals/include"
@@ -135,7 +146,7 @@ _ACEOF
  AC_DEFUN([LUSTRE_MODULE_COMPILE_IFELSE],
  [m4_ifvaln([$1], [LUSTRE_MODULE_CONFTEST([$1])])dnl
  rm -f kernel-tests/conftest.o kernel-tests/conftest.mod.c kernel-tests/conftest.ko
-AS_IF([AC_TRY_COMMAND(cp conftest.c kernel-tests && make [$2] -f $PWD/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$LINUX_CONFIG -o tmp_include_depends -o scripts -o include/config/MARKER -C $LINUX EXTRA_CFLAGS="$EXTRA_KCFLAGS" $ARCH_UM SUBDIRS=$PWD/kernel-tests) >/dev/null && AC_TRY_COMMAND([$3])],
+AS_IF([AC_TRY_COMMAND(cp conftest.c kernel-tests && make [$2] -f $PWD/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$LINUX_CONFIG -o tmp_include_depends -o scripts -o include/config/MARKER -C $LINUX EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM SUBDIRS=$PWD/kernel-tests) >/dev/null && AC_TRY_COMMAND([$3])],
         [$4],
         [_AC_MSG_LOG_CONFTEST
  m4_ifvaln([$5],[$5])dnl])dnl
@@ -446,7 +457,7 @@ LUSTRE_MODULE_TRY_COMPILE(
  # ---------- Red Hat 2.4.20 backports some 2.5 bits --------
  # This needs to run after we've defined the KCPPFLAGS
  
-AC_MSG_CHECKING([for kernel version])
+AC_MSG_CHECKING([if task_struct has a sighand field])
  LUSTRE_MODULE_TRY_COMPILE(
         [
                 #include <linux/sched.h>
@@ -455,9 +466,24 @@ LUSTRE_MODULE_TRY_COMPILE(
                 p.sighand = NULL;
         ],[
                 AC_DEFINE(CONFIG_RH_2_4_20, 1, [this kernel contains Red Hat 2.4.20 patches])
-               AC_MSG_RESULT([redhat-2.4.20])
+               AC_MSG_RESULT([yes])
         ],[
-               AC_MSG_RESULT([$LINUXRELEASE])
+               AC_MSG_RESULT([no])
+       ])
+
+# ---------- 2.4.20 introduced cond_resched --------------
+
+AC_MSG_CHECKING([if kernel offers cond_resched])
+LUSTRE_MODULE_TRY_COMPILE(
+       [
+               #include <linux/sched.h>
+       ],[
+               cond_resched();
+       ],[
+               AC_MSG_RESULT([yes])
+               AC_DEFINE(HAVE_COND_RESCHED, 1, [cond_resched found])
+       ],[
+               AC_MSG_RESULT([no])
         ])
  
  # ---------- Red Hat 2.4.21 backports some more 2.5 bits --------
diff --git a/lustre/portals/autoMakefile.am b/lustre/portals/autoMakefile.am

index bd57e6e..485ff04 100644 (file)
--- a/lustre/portals/autoMakefile.am
+++ b/lustre/portals/autoMakefile.am
@@ -3,6 +3,6 @@
  # This code is issued under the GNU General Public License.
  # See the file COPYING in this distribution
  
-EXTRA_DIST = archdep.m4 build.m4 include 
+EXTRA_DIST = archdep.m4 build.m4
  
-SUBDIRS = portals libcfs knals unals router tests doc utils
+SUBDIRS = portals libcfs knals unals router tests doc utils include
diff --git a/lustre/portals/include/.cvsignore b/lustre/portals/include/.cvsignore

index d45f796..94d3790 100644 (file)
--- a/lustre/portals/include/.cvsignore
+++ b/lustre/portals/include/.cvsignore
@@ -2,3 +2,5 @@ config.h
  stamp-h
  stamp-h1
  stamp-h.in
+Makefile
+Makefile.in
diff --git a/lustre/portals/include/Makefile.am b/lustre/portals/include/Makefile.am

new file mode 100644 (file)

index 0000000..2b3eb8c
--- /dev/null
+++ b/lustre/portals/include/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = linux portals
+
+EXTRA_DIST = cygwin-ioctl.h
diff --git a/lustre/portals/include/linux/.cvsignore b/lustre/portals/include/linux/.cvsignore

new file mode 100644 (file)

index 0000000..282522d
--- /dev/null
+++ b/lustre/portals/include/linux/.cvsignore
@@ -0,0 +1,2 @@
+Makefile
+Makefile.in
diff --git a/lustre/portals/include/linux/Makefile.am b/lustre/portals/include/linux/Makefile.am

new file mode 100644 (file)

index 0000000..3c28c6e
--- /dev/null
+++ b/lustre/portals/include/linux/Makefile.am
@@ -0,0 +1,4 @@
+linuxdir = $(includedir)/linux
+
+EXTRA_DIST = kp30.h kpr.h libcfs.h lustre_list.h portals_compat25.h    \
+       portals_lib.h
diff --git a/lustre/portals/include/linux/libcfs.h b/lustre/portals/include/linux/libcfs.h

index efdc8fe..6772e82 100644 (file)
--- a/lustre/portals/include/linux/libcfs.h
+++ b/lustre/portals/include/linux/libcfs.h
@@ -2,7 +2,7 @@
   * vim:expandtab:shiftwidth=8:tabstop=8:
   */
  #ifndef _LIBCFS_H
-
+#define _LIBCFS_H
  
  #define PORTAL_DEBUG
  
diff --git a/lustre/portals/include/portals/.cvsignore b/lustre/portals/include/portals/.cvsignore

new file mode 100644 (file)

index 0000000..282522d
--- /dev/null
+++ b/lustre/portals/include/portals/.cvsignore
@@ -0,0 +1,2 @@
+Makefile
+Makefile.in
diff --git a/lustre/portals/include/portals/Makefile.am b/lustre/portals/include/portals/Makefile.am

new file mode 100644 (file)

index 0000000..5ed6090
--- /dev/null
+++ b/lustre/portals/include/portals/Makefile.am
@@ -0,0 +1,10 @@
+portalsdir=$(includedir)/portals
+
+if UTILS
+portals_HEADERS = list.h
+endif
+
+EXTRA_DIST = api.h api-support.h arg-blocks.h defines.h errno.h                \
+       internal.h lib-dispatch.h lib-nal.h lib-p30.h lib-types.h       \
+       list.h lltrace.h myrnal.h nal.h nalids.h p30.h ppid.h ptlctl.h  \
+       socknal.h stringtab.h types.h
diff --git a/lustre/portals/include/portals/types.h b/lustre/portals/include/portals/types.h

index 74ef493..80995e9 100644 (file)
--- a/lustre/portals/include/portals/types.h
+++ b/lustre/portals/include/portals/types.h
@@ -1,26 +1,15 @@
  #ifndef _P30_TYPES_H_
  #define _P30_TYPES_H_
  
-#ifdef __linux__
-# include <asm/types.h>
-# if defined(__powerpc__) && !defined(__KERNEL__)
-#  define __KERNEL__
-#  include <asm/timex.h>
-#  undef __KERNEL__
-# else
-#  include <asm/timex.h>
-# endif
-#else
-# include <sys/types.h>
-typedef u_int32_t __u32;
-typedef u_int64_t __u64;
-#endif
+#include <asm/types.h>
  
  #ifdef __KERNEL__
  # include <linux/time.h>
+# include <asm/timex.h>
  #else
  # include <sys/time.h>
  # define do_gettimeofday(tv) gettimeofday(tv, NULL);
+typedef unsigned long long cycles_t;
  #endif
  
  #include <portals/errno.h>
diff --git a/lustre/portals/knals/qswnal/qswnal_cb.c b/lustre/portals/knals/qswnal/qswnal_cb.c

index 6bff730..08453a0 100644 (file)
--- a/lustre/portals/knals/qswnal/qswnal_cb.c
+++ b/lustre/portals/knals/qswnal/qswnal_cb.c
@@ -585,7 +585,7 @@ kqswnal_launch (kqswnal_tx_t *ktx)
          /* Don't block for transmit descriptor if we're in interrupt context */
          int   attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0;
          int   dest = kqswnal_nid2elanid (ktx->ktx_nid);
-        long  flags;
+        unsigned long flags;
          int   rc;
  
          ktx->ktx_launchtime = jiffies;
@@ -1429,7 +1429,7 @@ kqswnal_rx (kqswnal_rx_t *krx)
  void 
  kqswnal_rxhandler(EP_RXD *rxd)
  {
-        long          flags;
+        unsigned long flags;
          int           nob    = ep_rxd_len (rxd);
          int           status = ep_rxd_status (rxd);
          kqswnal_rx_t *krx    = (kqswnal_rx_t *)ep_rxd_arg (rxd);
@@ -1732,7 +1732,7 @@ kqswnal_scheduler (void *arg)
          kqswnal_rx_t    *krx;
          kqswnal_tx_t    *ktx;
          kpr_fwd_desc_t  *fwd;
-        long             flags;
+        unsigned long    flags;
          int              rc;
          int              counter = 0;
          int              shuttingdown = 0;
diff --git a/lustre/portals/knals/socknal/socknal_cb.c b/lustre/portals/knals/socknal/socknal_cb.c

index f02cbda..37695c9 100644 (file)
--- a/lustre/portals/knals/socknal/socknal_cb.c
+++ b/lustre/portals/knals/socknal/socknal_cb.c
@@ -1187,7 +1187,7 @@ ksocknal_fmb_callback (void *arg, int error)
  {
          ksock_fmb_t       *fmb = (ksock_fmb_t *)arg;
          ksock_fmb_pool_t  *fmp = fmb->fmb_pool;
-        ptl_hdr_t         *hdr = (ptl_hdr_t *)page_address(fmb->fmb_kiov[0].kiov_page);
+        ptl_hdr_t         *hdr = &fmb->fmb_hdr;
          ksock_conn_t      *conn = NULL;
          ksock_sched_t     *sched;
          unsigned long      flags;
diff --git a/lustre/portals/unals/Makefile.am b/lustre/portals/unals/Makefile.am

index 4c842a1..15080b0 100644 (file)
--- a/lustre/portals/unals/Makefile.am
+++ b/lustre/portals/unals/Makefile.am
@@ -2,7 +2,12 @@ if LIBLUSTRE
  noinst_LIBRARIES = libtcpnal.a
  endif
  
-pkginclude_HEADERS =  pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h
-libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h
+noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h     \
+       ipmap.h bridge.h procbridge.h
+
+libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h     \
+       dispatch.h table.h timer.h address.c procapi.c proclib.c        \
+       connection.c tcpnal.c connection.h
+
  libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS)
  libtcpnal_a_CFLAGS = $(LLCFLAGS)
diff --git a/lustre/portals/utils/Makefile.am b/lustre/portals/utils/Makefile.am

index 15c1774..851a8e1 100644 (file)
--- a/lustre/portals/utils/Makefile.am
+++ b/lustre/portals/utils/Makefile.am
@@ -14,8 +14,10 @@ libuptlctl_a_CPPFLAGS = $(LLCPPFLAGS)
  libuptlctl_a_CFLAGS = $(LLCFLAGS)
  endif
  
+if UTILS
  sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck gmnalnid
  lib_LIBRARIES = libptlctl.a
+endif
  
  acceptor_SOURCES = acceptor.c
  
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c

index 489100e..1db0606 100644 (file)
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -1274,12 +1274,15 @@ void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
  
          LASSERT_SPIN_LOCKED(&imp->imp_lock);
  
+        /* clear this for new requests that were resent as well
+           as resent replayed requests. */
+        lustre_msg_clear_flags(req->rq_reqmsg, MSG_RESENT);
+
          /* don't re-add requests that have been replayed */
          if (!list_empty(&req->rq_replay_list))
                  return;
  
-        lustre_msg_add_flags(req->rq_reqmsg,
-                             MSG_REPLAY);
+        lustre_msg_add_flags(req->rq_reqmsg, MSG_REPLAY);
  
          LASSERT(imp->imp_replayable);
          /* Balanced in ptlrpc_free_committed, usually. */
@@ -1591,16 +1594,8 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
          aa->praa_old_state = req->rq_send_state;
          req->rq_send_state = LUSTRE_IMP_REPLAY;
          req->rq_phase = RQ_PHASE_NEW;
-        /*
-         * Q: "How can a req get on the replay list if it wasn't replied?"
-         * A: "If we failed during the replay of this request, it will still
-         *     be on the list, but rq_replied will have been reset to 0."
-         */
-        if (req->rq_replied) {
-                aa->praa_old_status = req->rq_repmsg->status;
-                req->rq_status = 0;
-                req->rq_replied = 0;
-        }
+        aa->praa_old_status = req->rq_repmsg->status;
+        req->rq_status = 0;
  
          req->rq_interpret_reply = ptlrpc_replay_interpret;
          atomic_inc(&req->rq_import->imp_replay_inflight);
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c

index f2d034f..0942192 100644 (file)
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -100,6 +100,10 @@ int ptlrpc_set_import_discon(struct obd_import *imp)
          spin_lock_irqsave(&imp->imp_lock, flags);
  
          if (imp->imp_state == LUSTRE_IMP_FULL) {
+                CERROR("%s: connection lost to %s@%s\n",
+                       imp->imp_obd->obd_name, 
+                       imp->imp_target_uuid.uuid,
+                       imp->imp_connection->c_remote_uuid.uuid);
                  IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
                  spin_unlock_irqrestore(&imp->imp_lock, flags);
                  obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
@@ -250,7 +254,7 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
          IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CONNECTING);
  
          imp->imp_conn_cnt++;
-        imp->imp_last_replay_transno = 0;
+        imp->imp_resend_replay = 0;
  
          if (imp->imp_remote_handle.cookie == 0) {
                  initial_connect = 1;
@@ -386,19 +390,27 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
                                 request->rq_repmsg->handle.cookie);
                          imp->imp_remote_handle = request->rq_repmsg->handle;
                  } else {
-                        CERROR("reconnected to %s@%s after partition\n",
+                        CDEBUG(D_HA, "reconnected to %s@%s after partition\n",
                                 imp->imp_target_uuid.uuid,
                                 imp->imp_connection->c_remote_uuid.uuid);
                  }
  
-                if (imp->imp_invalid)
+                if (imp->imp_invalid) {
                          IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
-                else
+                } else if (MSG_CONNECT_RECOVERING & msg_flags) {
+                        CDEBUG(D_HA, "%s: reconnected to %s during replay\n",
+                               imp->imp_obd->obd_name, 
+                               imp->imp_target_uuid.uuid);
+                        imp->imp_resend_replay = 1;
+                        IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
+                } else {
                          IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+                }
          } 
          else if ((MSG_CONNECT_RECOVERING & msg_flags) && !imp->imp_invalid) {
                  LASSERT(imp->imp_replayable);
                  imp->imp_remote_handle = request->rq_repmsg->handle;
+                imp->imp_last_replay_transno = 0;
                  IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
          } 
          else {
@@ -440,7 +452,7 @@ finish:
                  if (aa->pcaa_initial_connect && !imp->imp_initial_recov) {
                          ptlrpc_deactivate_import(imp);
                  }
-                CDEBUG(D_ERROR, "recovery of %s on %s failed (%d)\n",
+                CDEBUG(D_HA, "recovery of %s on %s failed (%d)\n",
                         imp->imp_target_uuid.uuid,
                         (char *)imp->imp_connection->c_remote_uuid.uuid, rc);
          }
@@ -453,7 +465,15 @@ static int completed_replay_interpret(struct ptlrpc_request *req,
                                      void * data, int rc)
  {
          atomic_dec(&req->rq_import->imp_replay_inflight);
-        ptlrpc_import_recovery_state_machine(req->rq_import);
+        if (req->rq_status == 0) {
+                ptlrpc_import_recovery_state_machine(req->rq_import);
+        } else {
+                CDEBUG(D_HA, "%s: LAST_REPLAY message error: %d, "
+                       "reconnecting\n", 
+                       req->rq_import->imp_obd->obd_name, req->rq_status);
+                ptlrpc_connect_import(req->rq_import, NULL);
+        }
+
          RETURN(0);
  }
  
@@ -534,6 +554,10 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
                          GOTO(out, rc);
                  IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
                  ptlrpc_activate_import(imp);
+                CERROR("%s: connection restored to %s@%s\n",
+                       imp->imp_obd->obd_name, 
+                       imp->imp_target_uuid.uuid,
+                       imp->imp_connection->c_remote_uuid.uuid);
          } 
  
          if (imp->imp_state == LUSTRE_IMP_FULL) {
diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c

index 6c7c9a3..91a9e88 100644 (file)
--- a/lustre/ptlrpc/niobuf.c
+++ b/lustre/ptlrpc/niobuf.c
@@ -31,13 +31,12 @@
  #include <linux/obd.h>
  #include "ptlrpc_internal.h"
  
-static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len, 
+static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len,
                           ptl_ack_req_t ack, struct ptlrpc_cb_id *cbid,
                           struct ptlrpc_connection *conn, int portal, __u64 xid)
  {
          ptl_process_id_t remote_id;
          int              rc;
-        int              rc2;
          ptl_md_t         md;
          char str[PTL_NALFMT_SIZE];
          ENTRY;
@@ -78,15 +77,16 @@ static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len,
          CDEBUG(D_NET, "Sending %d bytes to portal %d, xid "LPD64"\n",
                 len, portal, xid);
  
-        rc2 = PtlPut (*mdh, ack, remote_id, portal, 0, xid, 0, 0);
+        rc = PtlPut (*mdh, ack, remote_id, portal, 0, xid, 0, 0);
          if (rc != PTL_OK) {
+                int rc2;
                  /* We're going to get an UNLINK event when I unlink below,
                   * which will complete just like any other failed send, so
                   * I fall through and return success here! */
                  CERROR("PtlPut("LPU64", %d, "LPD64") failed: %d\n",
                         remote_id.nid, portal, xid, rc);
                  rc2 = PtlMDUnlink(*mdh);
-                LASSERT (rc2 == PTL_OK);
+                LASSERTF(rc2 == PTL_OK, "rc2 = %d\n", rc2);
          }
  
          RETURN (0);
diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c

index 687f588..71cfdfd 100644 (file)
--- a/lustre/ptlrpc/ptlrpcd.c
+++ b/lustre/ptlrpc/ptlrpcd.c
@@ -42,7 +42,7 @@
  #include <linux/kp30.h>
  #include <linux/lustre_net.h>
  
-#ifndef  __CYGWIN__
+#ifdef __KERNEL__
  # include <linux/ctype.h>
  # include <linux/init.h>
  #else
@@ -135,6 +135,13 @@ static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
                  }
          }
  
+        if (rc == 0) {
+                /* If new requests have been added, make sure to wake up */
+                spin_lock_irqsave(&pc->pc_set->set_new_req_lock, flags);
+                rc = !list_empty(&pc->pc_set->set_new_requests);
+                spin_unlock_irqrestore(&pc->pc_set->set_new_req_lock, flags);
+        }
+
          RETURN(rc);
  }
  
diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c

index ece3a47..a86679d 100644 (file)
--- a/lustre/ptlrpc/recover.c
+++ b/lustre/ptlrpc/recover.c
@@ -130,16 +130,16 @@ void ptlrpc_initiate_recovery(struct obd_import *imp)
          LASSERT (obd_lustre_upcall != NULL);
          
          if (strcmp(obd_lustre_upcall, "DEFAULT") == 0) {
-                CDEBUG(D_ERROR, "%s: starting recovery without upcall\n",
+                CDEBUG(D_HA, "%s: starting recovery without upcall\n",
                          imp->imp_target_uuid.uuid);
                  ptlrpc_connect_import(imp, NULL);
          } 
          else if (strcmp(obd_lustre_upcall, "NONE") == 0) {
-                CDEBUG(D_ERROR, "%s: recovery diabled\n",
+                CDEBUG(D_HA, "%s: recovery disabled\n",
                          imp->imp_target_uuid.uuid);
          } 
          else {
-                CDEBUG(D_ERROR, "%s: calling upcall to start recovery\n",
+                CDEBUG(D_HA, "%s: calling upcall to start recovery\n",
                          imp->imp_target_uuid.uuid);
                  ptlrpc_run_failed_import_upcall(imp);
          }
@@ -151,7 +151,7 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
  {
          int rc = 0;
          struct list_head *tmp, *pos;
-        struct ptlrpc_request *req;
+        struct ptlrpc_request *req = NULL;
          unsigned long flags;
          __u64 last_transno;
          ENTRY;
@@ -187,16 +187,36 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
           */
          list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
                  req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+
+                /* If need to resend the last sent transno (because a
+                   reconnect has occurred), then stop on the matching
+                   req and send it again. If, however, the last sent
+                   transno has been committed then we continue replay
+                   from the next request. */
+                if (imp->imp_resend_replay && 
+                    req->rq_transno == last_transno) {
+                        lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
+                        break;
+                }
+
                  if (req->rq_transno > last_transno) {
-                        rc = ptlrpc_replay_req(req);
-                        if (rc) {
-                                CERROR("recovery replay error %d for req "
-                                       LPD64"\n", rc, req->rq_xid);
-                                RETURN(rc);
-                        }
-                        *inflight = 1;
+                        imp->imp_last_replay_transno = req->rq_transno;
                          break;
                  }
+
+                req = NULL;
+        }
+
+        imp->imp_resend_replay = 0;
+
+        if (req != NULL) {
+                rc = ptlrpc_replay_req(req);
+                if (rc) {
+                        CERROR("recovery replay error %d for req "
+                               LPD64"\n", rc, req->rq_xid);
+                        RETURN(rc);
+                }
+                *inflight = 1;
          }
          RETURN(rc);
  }
@@ -357,13 +377,13 @@ static int ptlrpc_recover_import_no_retry(struct obd_import *imp,
          if (rc)
                  RETURN(rc);
  
-        CDEBUG(D_ERROR, "%s: recovery started, waiting\n",
+        CDEBUG(D_HA, "%s: recovery started, waiting\n",
                 imp->imp_target_uuid.uuid);
  
          lwi = LWI_TIMEOUT(MAX(obd_timeout * HZ, 1), NULL, NULL);
          rc = l_wait_event(imp->imp_recovery_waitq,
                            !ptlrpc_import_in_recovery(imp), &lwi);
-        CDEBUG(D_ERROR, "%s: recovery finished\n",
+        CDEBUG(D_HA, "%s: recovery finished\n",
                 imp->imp_target_uuid.uuid);
  
          RETURN(rc);
diff --git a/lustre/scripts/Makefile.am b/lustre/scripts/Makefile.am

index 5e57916..fe13cc7 100644 (file)
--- a/lustre/scripts/Makefile.am
+++ b/lustre/scripts/Makefile.am
@@ -4,10 +4,12 @@
  # See the file COPYING in this distribution
  
  EXTRA_DIST = license-status maketags.sh lustre.spec version_tag.pl.in  \
-       $(initd_SCRIPTS) lustre.spec.in lustre-kernel-2.4.spec.in       \
+       lustre lustre.spec.in lustre-kernel-2.4.spec.in \
         lmake linux-merge-config.awk linux-merge-modules.awk            \
         linux-rhconfig.h
  
  initddir = $(sysconfdir)/init.d
+if UTILS
  initd_SCRIPTS = lustre
+endif
  
diff --git a/lustre/scripts/lbuild b/lustre/scripts/lbuild

index 1cd283e..9b934f0 100755 (executable)
--- a/lustre/scripts/lbuild
+++ b/lustre/scripts/lbuild
@@ -20,6 +20,10 @@ SERIES=
  CONFIG=
  VERSION=
  
+RHBUILD=0
+LINUX26=0
+SUSEBUILD=0
+
  BASE_ARCH=
  BIGMEM_ARCHS=
  BOOT_ARCHS=
@@ -182,12 +186,12 @@ load_target()
  
      CONFIG_FILE="$TOPDIR/lustre/kernel_patches/kernel_configs/$CONFIG"
      [ -r "$CONFIG_FILE" ] || \
-       fatal 1 "Target $TARGET's config file $CONFIG missing from $TOPDIR/lustre/kernel_patches/kernel_configs/configs."
+       fatal 1 "Target $TARGET's config file $CONFIG missing from $TOPDIR/lustre/kernel_patches/kernel_configs/."
  
      if [ "$EXTRA_VERSION_save" ] ; then
         EXTRA_VERSION="$EXTRA_VERSION_save"
      elif ! (( $RELEASE )) ; then
-       EXTRA_VERSION="${EXTRA_VERSION}-${TAG//_/}.${TIMESTAMP}"
+       EXTRA_VERSION="${EXTRA_VERSION}-${TAG}.${TIMESTAMP}"
      fi
      # EXTRA_VERSION=${EXTRA_VERSION//-/_}
  
@@ -195,7 +199,7 @@ load_target()
  
      BUILD_ARCHS=
      for arch in $(uniqify "$ALL_ARCHS") ; do
-       if [ -z "$TARGET_ARCHS" ] || echo "$TARGET_ARCHS" | grep -s "$arch" ; then
+       if [ -z "$TARGET_ARCHS" ] || echo "$TARGET_ARCHS" | grep "$arch" >/dev/null 2>/dev/null ; then
             BUILD_ARCHS="$BUILD_ARCHS $arch"
         fi
      done
@@ -270,9 +274,11 @@ patch_linux()
      popd >/dev/null
      echo "Full patch has been saved in ${FULL_PATCH##*/}."
      echo "Replacing .config files..."
-    [ -d linux/configs ] || mkdir linux/configs
+    [ -d linux/configs ] || mkdir linux/configs || \
+        fatal 1 "Error creating configs directory."
      rm -f linux/configs/*
-    cp -v lustre/kernel_patches/kernel_configs/kernel-${VERSION}-${TARGET}*.config linux/configs/
+    cp -v lustre/kernel_patches/kernel_configs/kernel-${VERSION}-${TARGET}*.config linux/configs/ || \
+       fatal 1 "Error copying in kernel configs."
  }
  
  pack_linux()
@@ -310,6 +316,8 @@ prep_build()
         -e "s/@SMP_ARCHS@/$SMP_ARCHS/g" \
         -e "s/@UP_ARCHS@/$UP_ARCHS/g" \
         -e "s/@RHBUILD@/$RHBUILD/g" \
+       -e "s/@LINUX26@/$LINUX26/g" \
+       -e "s/@SUSEBUILD@/$SUSEBUILD/g" \
         < $TOPDIR/lustre/scripts/lustre-kernel-2.4.spec.in \
         > lustre-kernel-2.4.spec
      [ -d SRPMS ] || mkdir SRPMS
diff --git a/lustre/scripts/lustre-kernel-2.4.spec.in b/lustre/scripts/lustre-kernel-2.4.spec.in

index f177c17..3ec63bb 100644 (file)
--- a/lustre/scripts/lustre-kernel-2.4.spec.in
+++ b/lustre/scripts/lustre-kernel-2.4.spec.in
@@ -355,7 +355,10 @@ BuildKernel()
         --kerneldir $RPM_SOURCE_DIR \
         -j $RPM_BUILD_NCPUS \
         --destdir $RPM_BUILD_ROOT \
-       -- @CONFIGURE_FLAGS@
+       -- --enable-modules \
+       --disable-doc --disable-tests \
+       --disable-utils --disable-liblustre \
+       @CONFIGURE_FLAGS@
  }
  
  BuildLustre()
@@ -371,7 +374,10 @@ BuildLustre()
         --kerneldir $RPM_SOURCE_DIR \
         -j $RPM_BUILD_NCPUS \
         --destdir $RPM_BUILD_ROOT \
-       -- @CONFIGURE_FLAGS@
+       -- --enable-utils \
+       --disable-doc --disable-tests \
+       --disable-modules --disable-liblustre \
+       @CONFIGURE_FLAGS@
  }
  
  SaveHeaders()
@@ -401,14 +407,12 @@ BuildKernel jensen
  BuildKernel smp
  %endif
  
-# we want this one last, so that it is the one populating /usr/bin
-%if %{buildup} && %{buildbase}
+%if %{buildup}
  BuildKernel
-%elseif %{buildbase}
-BuildLustre
  %endif
  
  %if %{buildbase}
+BuildLustre
  SaveHeaders
  %endif
  
@@ -520,14 +524,14 @@ if [ -f ../../savedheaders/%{_target_cpu}/up/version.h ] ; then
      HEADER_FILE=../../savedheaders/%{_target_cpu}/up/version.h
  else
      # test build not including uniprocessor, must get info from somewhere
-    HEADER_FILE=$(ls ../../savedheaders/*/*/version.h | head -1)
+    HEADER_FILE=$(ls ../../savedheaders/*/*/version.h | head -n 1)
  fi
  grep -v UTS_RELEASE $HEADER_FILE >> version.h
  rm -rf ../../savedheaders
  } ; popd
  touch $RPM_BUILD_ROOT/boot/kernel.h-%{kversion}
  
-rm -f $RPM_BUILD_ROOT/usr/include/linux
+# rm -f $RPM_BUILD_ROOT/usr/include/linux
  
  rm -rf $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/savedheaders
  
@@ -822,7 +826,9 @@ exit 0
  /usr/bin/*
  /usr/lib/lustre/python
  /etc/init.d/lustre
-/usr/include/lustre
+/usr/include/lustre/*
+/usr/include/portals/*
+/usr/include/linux/*
  /lib/lib*.a
  
  #%files -n lustre-doc
diff --git a/lustre/scripts/lustre.spec.in b/lustre/scripts/lustre.spec.in

index 329ef4c..39ccc41 100644 (file)
--- a/lustre/scripts/lustre.spec.in
+++ b/lustre/scripts/lustre.spec.in
@@ -68,12 +68,23 @@ Configures openldap server for LDAP Lustre config database
  %endif
  
  %build
+# if RPM_BUILD_NCPUS unset, set it
+if [ -z "$RPM_BUILD_NCPUS" ] ; then
+    RPM_BUILD_NCPUS=$(egrep -c "^cpu[0-9]+" /proc/stat || :)
+    if [ $RPM_BUILD_NCPUS -eq 0 ] ; then
+        RPM_BUILD_NCPUS=1
+    fi
+    if [ $RPM_BUILD_NCPUS -gt 8 ] ; then
+        RPM_BUILD_NCPUS=8
+    fi
+fi
+
  rm -rf $RPM_BUILD_ROOT
  
  # Set an explicit path to our Linux tree, if we can.
  cd $RPM_BUILD_DIR/lustre-%{version}
  ./configure --with-linux='%{linuxdir}' %{disable_doc} --disable-liblustre
-make
+make -j $RPM_BUILD_NCPUS -s
  
  %install
  cd $RPM_BUILD_DIR/lustre-%{version}
diff --git a/lustre/tests/.cvsignore b/lustre/tests/.cvsignore

index 778e8f1..a27f828 100644 (file)
--- a/lustre/tests/.cvsignore
+++ b/lustre/tests/.cvsignore
@@ -63,3 +63,4 @@ logs
  ostactive
  ll_dirstripe_verify
  rename_many
+openfilleddirunlink
diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am

index 13abda9..1c19ee4 100644 (file)
--- a/lustre/tests/Makefile.am
+++ b/lustre/tests/Makefile.am
@@ -3,16 +3,19 @@ AM_CPPFLAGS = $(LLCPPFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
  AM_CFLAGS = $(LLCFLAGS)
  # LDADD = -lldap
  # LDADD := -lreadline -ltermcap # -lefence
-EXTRA_DIST = $(pkgexample_SCRIPTS) $(noinst_SCRIPTS) $(noinst_DATA) \
-       sanity.sh rundbench
-if TESTS
-pkgexample_SCRIPTS = llmount.sh llmountcleanup.sh llecho.sh llechocleanup.sh
-pkgexample_SCRIPTS += local.sh echo.sh uml.sh lov.sh
+
+pkgexample_scripts = llmount.sh llmountcleanup.sh llecho.sh llechocleanup.sh
+pkgexample_scripts += local.sh echo.sh uml.sh lov.sh
  noinst_DATA =
  noinst_SCRIPTS = leak_finder.pl llecho.sh llmount.sh llmountcleanup.sh tbox.sh
  noinst_SCRIPTS += llrmount.sh runfailure-mds runvmstat runfailure-net
  noinst_SCRIPTS += runfailure-ost runiozone runregression-net.sh runtests
  noinst_SCRIPTS += sanity.sh rundbench
+
+EXTRA_DIST = $(pkgexample_scripts) $(noinst_SCRIPTS) $(noinst_DATA) \
+       sanity.sh rundbench
+if TESTS
+pkgexample_SCRIPTS = $(pkgexample_scripts)
  noinst_PROGRAMS = openunlink testreq truncate directio openme writeme open_delay
  noinst_PROGRAMS += tchmod toexcl fsx test_brw openclose createdestroy
  noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink utime cmknod
diff --git a/lustre/tests/cfg/insanity-mdev.sh b/lustre/tests/cfg/insanity-mdev.sh

index d3f0c6e..fa15cd2 100644 (file)
--- a/lustre/tests/cfg/insanity-mdev.sh
+++ b/lustre/tests/cfg/insanity-mdev.sh
@@ -6,22 +6,25 @@ EXTRA_OSTS=${EXTRA_OSTS:-mdev7}
  client_HOST=client
  LIVE_CLIENT=${LIVE_CLIENT:-mdev6}
  # This should always be a list, not a regexp
-#FAIL_CLIENTS=${FAIL_CLIENTS:-mdev7}
-FAIL_CLIENTS=${FAIL_CLIENTS:-""}
+FAIL_CLIENTS=${FAIL_CLIENTS:-mdev8}
+#FAIL_CLIENTS=${FAIL_CLIENTS:-""}
  
  NETTYPE=${NETTYPE:-tcp}
  
  TIMEOUT=${TIMEOUT:-30}
-PTLDEBUG=${PTLDEBUG:-0}
-SUBSYSTEM=${SUBSYSTEM:-0}
+PTLDEBUG=${PTLDEBUG:-0x3f0400}
+SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff}
  MOUNT=${MOUNT:-"/mnt/lustre"}
  UPCALL=${CLIENT_UPCALL:-`pwd`/replay-single-upcall.sh}
  
  MDSDEV=${MDSDEV:-/dev/sda1}
  MDSSIZE=${MDSSIZE:-50000}
+MDSJOURNALSIZE=${MDSJOURNALSIZE:-0}
  
  OSTDEV=${OSTDEV:-$TMP/ost%d-`hostname`}
-OSTSIZE=${OSTSIZE:=50000}
+OSTSIZE=${OSTSIZE:=500000}
+OSTJOURNALSIZE=${OSTJOURNALSIZE:-0}
+
  FSTYPE=${FSTYPE:-ext3}
  STRIPE_BYTES=${STRIPE_BYTES:-1048576} 
  STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
diff --git a/lustre/tests/cfg/local.sh b/lustre/tests/cfg/local.sh

index 14f2207..9af8621 100644 (file)
--- a/lustre/tests/cfg/local.sh
+++ b/lustre/tests/cfg/local.sh
@@ -25,7 +25,7 @@ OSTDEV=${OSTDEV:-$ROOT/tmp/ost1-`hostname`}
  OSTSIZE=${OSTSIZE:-50000}
  FSTYPE=${FSTYPE:-ext3}
  TIMEOUT=${TIMEOUT:-20}
-UPCALL=${UPCALL:-$PWD/replay-single-upcall.sh}
+UPCALL=${UPCALL:-DEFAULT}
  
  STRIPE_BYTES=${STRIPE_BYTES:-65536}
  STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh

index 4212cab..2445e19 100644 (file)
--- a/lustre/tests/conf-sanity.sh
+++ b/lustre/tests/conf-sanity.sh
@@ -218,7 +218,7 @@ test_5b() {
         stop_mds || return 2
         stop_ost || return 3
  
-       lsmod | grep -q portals && return 3
+       lsmod | grep -q portals && return 4
         return 0
  
  }
@@ -230,7 +230,7 @@ test_5c() {
  
         [ -d $MOUNT ] || mkdir -p $MOUNT
         $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null
-       llmount $mds_HOST://wrong_mds_svc/client_facet $MOUNT  && exit 1
+       llmount $mds_HOST://wrong_mds_svc/client_facet $MOUNT  && return 1
  
         # cleanup client modules
         $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null
@@ -238,12 +238,33 @@ test_5c() {
         stop_mds || return 2
         stop_ost || return 3
  
-       lsmod | grep -q portals && return 3
+       lsmod | grep -q portals && return 4
         return 0
  
  }
  run_test 5c "cleanup after failed mount (bug 2712)"
  
+test_5d() {
+       start_ost
+       start_mds
+       stop_ost --force
+
+       [ -d $MOUNT ] || mkdir -p $MOUNT
+       $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null
+       llmount $mds_HOST://mds_svc/client_facet $MOUNT  || return 1 
+
+       umount $MOUNT || return 2
+       # cleanup client modules
+       $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null
+       
+       stop_mds || return 3
+
+       lsmod | grep -q portals && return 4
+       return 0
+
+}
+run_test 5d "ost down, don't crash during mount attempt"
+
  test_6() {
         setup
         manual_umount_client
diff --git a/lustre/tests/insanity.sh b/lustre/tests/insanity.sh

index 68d0ff9..9c05b27 100755 (executable)
--- a/lustre/tests/insanity.sh
+++ b/lustre/tests/insanity.sh
@@ -12,6 +12,9 @@ init_test_env $@
  
  ALWAYS_EXCEPT="10"
  
+SETUP=${SETUP:-"setup"}
+CLEANUP=${CLEANUP:-"cleanup"}
+
  build_test_filter
  
  assert_env mds_HOST ost1_HOST ost2_HOST client_HOST LIVE_CLIENT 
@@ -128,6 +131,8 @@ gen_config() {
  }
  
  setup() {
+    gen_config
+
      rm -rf logs/*
      for i in `seq $NUMOST`; do
         wait_for ost$i
@@ -205,20 +210,17 @@ node_to_ost() {
  
  
  if [ "$ONLY" == "cleanup" ]; then
-    cleanup
+    $CLEANUP
      exit
  fi
  
-if [ -z "$NOSETUP" ]; then
-    gen_config
-    setup
-fi
-
  if [ ! -z "$EVAL" ]; then
      eval "$EVAL"
      exit $?
  fi
  
+$SETUP
+
  if [ "$ONLY" == "setup" ]; then
      exit 0
  fi
@@ -615,4 +617,4 @@ test_10() {
  run_test 10 "Running Availability for 6 hours..."
  
  equals_msg "Done, cleaning up"
-cleanup
+$CLEANUP
diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh

index 882c716..8e7ca55 100755 (executable)
--- a/lustre/tests/recovery-small.sh
+++ b/lustre/tests/recovery-small.sh
@@ -7,7 +7,7 @@ ALWAYS_EXCEPT="20b"
  
  
  LUSTRE=${LUSTRE:-`dirname $0`/..}
-UPCALL=${UPCALL:-$PWD/recovery-small-upcall.sh}
+
  . $LUSTRE/tests/test-framework.sh
  
  init_test_env $@
@@ -342,7 +342,7 @@ test_20a() {        # bug 2983 - ldlm_handle_enqueue cleanup
         mkdir -p $DIR/$tdir
         multiop $DIR/$tdir/${tfile} O_wc &
         MULTI_PID=$!
-       usleep 500
+       sleep 1
         cancel_lru_locks OSC
  #define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308
         do_facet ost sysctl -w lustre.fail_loc=0x80000308
diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh

index 9c1f1e1..77e66e7 100755 (executable)
--- a/lustre/tests/replay-dual.sh
+++ b/lustre/tests/replay-dual.sh
@@ -9,6 +9,9 @@ init_test_env $@
  
  . ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
  
+SETUP=${SETUP:-"setup"}
+CLEANUP=${CLEANUP:-"cleanup"}
+
  gen_config() {
      rm -f $XMLCONFIG
      add_mds mds --dev $MDSDEV --size $MDSSIZE
@@ -35,8 +38,8 @@ cleanup() {
          fail mds
      fi
  
-    umount $MOUNT2
-    umount $MOUNT
+    umount $MOUNT2 || true
+    umount $MOUNT  || true
      rmmod llite
      stop mds ${FORCE}
      stop ost2 ${FORCE}
@@ -49,25 +52,18 @@ if [ "$ONLY" == "cleanup" ]; then
      exit
  fi
  
-gen_config
-start ost --reformat $OSTLCONFARGS 
-PINGER=`cat /proc/fs/lustre/pinger`
+setup() {
+    gen_config
+    start ost --reformat $OSTLCONFARGS 
+    start ost2 --reformat $OSTLCONFARGS 
+    start mds $MDSLCONFARGS --reformat
+    grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
+    grep " $MOUNT2 " /proc/mounts || zconf_mount `hostname` $MOUNT2
  
-if [ "$PINGER" != "on" ]; then
-    echo "ERROR: Lustre must be built with --enable-pinger for replay-dual"
-    stop mds
-    exit 1
-fi
-
-start ost2 --reformat $OSTLCONFARGS 
-[ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
-start mds $MDSLCONFARGS --reformat
-grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
-grep " $MOUNT2 " /proc/mounts || zconf_mount `hostname` $MOUNT2
-
-echo $TIMEOUT > /proc/sys/lustre/timeout
-echo $UPCALL > /proc/sys/lustre/upcall
+#    echo $TIMEOUT > /proc/sys/lustre/timeout
+}
  
+$SETUP
  [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
  
  test_1() {
@@ -175,7 +171,156 @@ test_6() {
  }
  run_test 6 "open1, open2, unlink |X| close1 [fail mds] close2"
  
+test_8() {
+    replay_barrier mds
+    drop_reint_reply "mcreate $MOUNT1/$tfile"    || return 1
+    fail mds
+    checkstat $MOUNT2/$tfile || return 2
+    rm $MOUNT1/$tfile || return 3
+
+    return 0
+}
+run_test 8 "replay of resent request"
+
+test_9() {
+    replay_barrier mds
+    mcreate $MOUNT1/$tfile-1
+    mcreate $MOUNT2/$tfile-2
+    # drop first reint reply
+    sysctl -w lustre.fail_loc=0x80000119
+    fail mds
+    sysctl -w lustre.fail_loc=0
+
+    rm $MOUNT1/$tfile-[1,2] || return 1
+
+    return 0
+}
+run_test 9 "resending a replayed create"
+
+test_10() {
+    mcreate $MOUNT1/$tfile-1
+    replay_barrier mds
+    munlink $MOUNT1/$tfile-1
+    mcreate $MOUNT2/$tfile-2
+    # drop first reint reply
+    sysctl -w lustre.fail_loc=0x80000119
+    fail mds
+    sysctl -w lustre.fail_loc=0
+
+    checkstat $MOUNT1/$tfile-1 && return 1
+    checkstat $MOUNT1/$tfile-2 || return 2
+    rm $MOUNT1/$tfile-2
+
+    return 0
+}
+run_test 10 "resending a replayed unlink"
+
+test_11() {
+    replay_barrier mds
+    mcreate $MOUNT1/$tfile-1
+    mcreate $MOUNT2/$tfile-2
+    mcreate $MOUNT1/$tfile-3
+    mcreate $MOUNT2/$tfile-4
+    mcreate $MOUNT1/$tfile-5
+    # drop all reint replies for a while
+    sysctl -w lustre.fail_loc=0x0119
+    facet_failover mds
+    #sleep for while, let both clients reconnect and timeout
+    sleep $((TIMEOUT * 2))
+    sysctl -w lustre.fail_loc=0
+
+    rm $MOUNT1/$tfile-[1-5] || return 1
+
+    return 0
+}
+run_test 11 "both clients timeout during replay"
+
+test_12() {
+    replay_barrier mds
+
+    multiop $DIR/$tfile mo_c &
+    MULTIPID=$!
+    sleep 5
+
+    # drop first enqueue
+    sysctl -w lustre.fail_loc=0x80000302
+    facet_failover mds
+    df $MOUNT || return 1
+    sysctl -w lustre.fail_loc=0
+
+    ls $DIR/$tfile
+    $CHECKSTAT -t file $DIR/$tfile || return 2
+    kill -USR1 $MULTIPID || return 3
+    wait $MULTIPID || return 4
+    rm $DIR/$tfile
+
+    return 0
+}
+run_test 12 "open resend timeout"
+
+test_13() {
+    multiop $DIR/$tfile mo_c &
+    MULTIPID=$!
+    sleep 5
+
+    replay_barrier mds
+
+    kill -USR1 $MULTIPID || return 3
+    wait $MULTIPID || return 4
+
+    # drop close 
+    sysctl -w lustre.fail_loc=0x80000115
+    facet_failover mds
+    df $MOUNT || return 1
+    sysctl -w lustre.fail_loc=0
+
+    ls $DIR/$tfile
+    $CHECKSTAT -t file $DIR/$tfile || return 2
+    rm $DIR/$tfile
+
+    return 0
+}
+run_test 13 "close resend timeout"
+
+test_14() {
+    replay_barrier mds
+    createmany -o $MOUNT1/$tfile- 25
+    createmany -o $MOUNT2/$tfile-2- 1
+    createmany -o $MOUNT1/$tfile-3- 25
+    umount $MOUNT2
+
+    facet_failover mds
+    # expect failover to fail
+    df $MOUNT && return 1
+
+    # first 25 files shouuld have been 
+    # replayed 
+    unlinkmany $MOUNT1/$tfile- 25 || return 2
+
+    zconf_mount `hostname` $MOUNT2
+    return 0
+}
+run_test 14 "timeouts waiting for lost client during replay"
+
+test_15() {
+    replay_barrier mds
+    createmany -o $MOUNT1/$tfile- 25
+    createmany -o $MOUNT2/$tfile-2- 1
+    umount $MOUNT2
+
+    facet_failover mds
+    df $MOUNT || return 1
+
+    lctl dk dk 
+    unlinkmany $MOUNT1/$tfile- 25 || return 2
+
+    zconf_mount `hostname` $MOUNT2
+    return 0
+}
+run_test 15 "timeout waiting for lost client during replay, 1 client completes"
+
+
  if [ "$ONLY" != "setup" ]; then
         equals_msg test complete, cleaning up
-       cleanup
+       $CLEANUP
  fi
diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh

index 33f9786..327ea0b 100644 (file)
--- a/lustre/tests/test-framework.sh
+++ b/lustre/tests/test-framework.sh
@@ -232,6 +232,11 @@ facet_nid() {
  facet_active() {
      local facet=$1
      local activevar=${facet}active
+
+    if [ -f ./${facet}active ] ; then
+        source ./${facet}active
+    fi
+
      active=${!activevar}
      if [ -z "$active" ] ; then 
         echo -n ${facet}
diff --git a/lustre/utils/Lustre/Makefile.am b/lustre/utils/Lustre/Makefile.am

index e8e522f..c3d9a59 100644 (file)
--- a/lustre/utils/Lustre/Makefile.am
+++ b/lustre/utils/Lustre/Makefile.am
@@ -1,2 +1,4 @@
+if UTILS
  pymod_SCRIPTS = __init__.py lustredb.py error.py cmdline.py
-EXTRA_DIST = $(pymod_SCRIPTS)
+endif
+EXTRA_DIST = __init__.py lustredb.py error.py cmdline.py
diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am

index 1f7a8b5..5704e85 100644 (file)
--- a/lustre/utils/Makefile.am
+++ b/lustre/utils/Makefile.am
@@ -6,17 +6,17 @@ AM_CFLAGS=$(LLCFLAGS)
  AM_CPPFLAGS=$(LLCPPFLAGS)
  AM_LDFLAGS := -L$(top_builddir)/portals/utils
  
+sbin_scripts = lconf lmc llanalyze llstat.pl llobdstat.pl lactive      \
+       load_ldap.sh lrun lwizard
+bin_scripts = lfind lstripe
+
  if UTILS
  rootsbin_SCRIPTS = mount.lustre
  sbin_PROGRAMS = lctl obdio obdbarrier lload wirecheck wiretest llmount
-sbin_SCRIPTS = lconf lmc llanalyze llstat.pl llobdstat.pl lactive load_ldap.sh lrun
-sbin_SCRIPTS += lwizard
-bin_SCRIPTS = lfind lstripe
  bin_PROGRAMS = lfs
  lib_LIBRARIES = liblustreapi.a
-if LIBLUSTRE
-sbin_SCRIPTS += lrun
-endif # LIBLUSTRE
+sbin_SCRIPTS = $(sbin_scripts)
+bin_SCRIPTS = $(bin_scripts)
  endif # UTILS
  
  lctl_LDADD := $(LIBREADLINE) -lptlctl
@@ -33,7 +33,7 @@ lfs_SOURCES = lfs.c
  llmount_SOURCES = llmount.c 
  llmount_LDADD = $(LIBREADLINE) -lptlctl
  
-EXTRA_DIST = $(bin_SCRIPTS) $(sbin_SCRIPTS)
+EXTRA_DIST = $(bin_scripts) $(sbin_scripts)
  
  # NOTE: this should only be run on i386.
  newwiretest: wirehdr.c wirecheck
author	adilger <adilger>
	Wed, 16 Jun 2004 16:50:40 +0000 (16:50 +0000)
committer	adilger <adilger>
	Wed, 16 Jun 2004 16:50:40 +0000 (16:50 +0000)
ldiskfs/ldiskfs/autoMakefile.am		patch \| blob \| history
lnet/archdep.m4		patch \| blob \| history
lnet/autoMakefile.am		patch \| blob \| history
lnet/include/.cvsignore		patch \| blob \| history
lnet/include/Makefile.am	[new file with mode: 0644]	patch \| blob
lnet/include/linux/.cvsignore	[new file with mode: 0644]	patch \| blob
lnet/include/linux/Makefile.am	[new file with mode: 0644]	patch \| blob
lnet/include/linux/libcfs.h		patch \| blob \| history
lnet/include/lnet/.cvsignore	[new file with mode: 0644]	patch \| blob
lnet/include/lnet/Makefile.am	[new file with mode: 0644]	patch \| blob
lnet/include/lnet/types.h		patch \| blob \| history
lnet/klnds/qswlnd/qswlnd_cb.c		patch \| blob \| history
lnet/klnds/socklnd/socklnd_cb.c		patch \| blob \| history
lnet/ulnds/Makefile.am		patch \| blob \| history
lnet/ulnds/socklnd/Makefile.am		patch \| blob \| history
lnet/utils/Makefile.am		patch \| blob \| history
lustre/ChangeLog		patch \| blob \| history
lustre/conf/Makefile.am		patch \| blob \| history
lustre/configure.in		patch \| blob \| history
lustre/include/linux/Makefile.am		patch \| blob \| history
lustre/include/linux/lustre_compat25.h		patch \| blob \| history
lustre/include/linux/lustre_export.h		patch \| blob \| history
lustre/include/linux/lustre_fsfilt.h		patch \| blob \| history
lustre/include/linux/lustre_import.h		patch \| blob \| history
lustre/include/linux/lustre_lib.h		patch \| blob \| history
lustre/include/linux/lustre_log.h		patch \| blob \| history
lustre/include/linux/obd_class.h		patch \| blob \| history
lustre/include/lustre/Makefile.am		patch \| blob \| history
lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-chaos.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/ext-2.4-patch-1-chaos.patch		patch \| blob \| history
lustre/kernel_patches/patches/ext-2.4-patch-1-suse.patch		patch \| blob \| history
lustre/kernel_patches/patches/ext-2.4-patch-1.patch		patch \| blob \| history
lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-chaos.patch		patch \| blob \| history
lustre/kernel_patches/patches/ext3-htree-2.4.19-pre1.patch		patch \| blob \| history
lustre/kernel_patches/patches/ext3-htree-2.4.21-chaos.patch		patch \| blob \| history
lustre/kernel_patches/patches/ext3-htree-2.4.22-rh.patch		patch \| blob \| history
lustre/kernel_patches/patches/ext3-htree-suse.patch		patch \| blob \| history
lustre/kernel_patches/patches/ext3-htree.patch		patch \| blob \| history
lustre/kernel_patches/patches/ext3-pdirops-2.4.20-rh.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/ext3-trusted_ea-2.4.21-chaos.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/iopen-2.4.19-suse.patch		patch \| blob \| history
lustre/kernel_patches/patches/iopen-2.4.21-chaos.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch		patch \| blob \| history
lustre/kernel_patches/series/chaos-2.4.21		patch \| blob \| history
lustre/kernel_patches/series/rh-2.4.20		patch \| blob \| history
lustre/kernel_patches/series/suse-2.4.19		patch \| blob \| history
lustre/kernel_patches/series/vanilla-2.4.20		patch \| blob \| history
lustre/kernel_patches/targets/rh-2.4.target		patch \| blob \| history
lustre/ldiskfs/autoMakefile.am		patch \| blob \| history
lustre/ldlm/ldlm_lib.c		patch \| blob \| history
lustre/liblustre/rw.c		patch \| blob \| history
lustre/llite/file.c		patch \| blob \| history
lustre/lov/lov_obd.c		patch \| blob \| history
lustre/mds/mds_fs.c		patch \| blob \| history
lustre/mds/mds_lov.c		patch \| blob \| history
lustre/obdclass/class_obd.c		patch \| blob \| history
lustre/obdclass/genops.c		patch \| blob \| history
lustre/obdclass/llog_ioctl.c		patch \| blob \| history
lustre/obdfilter/filter.c		patch \| blob \| history
lustre/osc/osc_request.c		patch \| blob \| history
lustre/portals/archdep.m4		patch \| blob \| history
lustre/portals/autoMakefile.am		patch \| blob \| history
lustre/portals/include/.cvsignore		patch \| blob \| history
lustre/portals/include/Makefile.am	[new file with mode: 0644]	patch \| blob
lustre/portals/include/linux/.cvsignore	[new file with mode: 0644]	patch \| blob
lustre/portals/include/linux/Makefile.am	[new file with mode: 0644]	patch \| blob
lustre/portals/include/linux/libcfs.h		patch \| blob \| history
lustre/portals/include/portals/.cvsignore	[new file with mode: 0644]	patch \| blob
lustre/portals/include/portals/Makefile.am	[new file with mode: 0644]	patch \| blob
lustre/portals/include/portals/types.h		patch \| blob \| history
lustre/portals/knals/qswnal/qswnal_cb.c		patch \| blob \| history
lustre/portals/knals/socknal/socknal_cb.c		patch \| blob \| history
lustre/portals/unals/Makefile.am		patch \| blob \| history
lustre/portals/utils/Makefile.am		patch \| blob \| history
lustre/ptlrpc/client.c		patch \| blob \| history
lustre/ptlrpc/import.c		patch \| blob \| history
lustre/ptlrpc/niobuf.c		patch \| blob \| history
lustre/ptlrpc/ptlrpcd.c		patch \| blob \| history
lustre/ptlrpc/recover.c		patch \| blob \| history
lustre/scripts/Makefile.am		patch \| blob \| history
lustre/scripts/lbuild		patch \| blob \| history
lustre/scripts/lustre-kernel-2.4.spec.in		patch \| blob \| history
lustre/scripts/lustre.spec.in		patch \| blob \| history
lustre/tests/.cvsignore		patch \| blob \| history
lustre/tests/Makefile.am		patch \| blob \| history
lustre/tests/cfg/insanity-mdev.sh		patch \| blob \| history
lustre/tests/cfg/local.sh		patch \| blob \| history
lustre/tests/conf-sanity.sh		patch \| blob \| history
lustre/tests/insanity.sh		patch \| blob \| history
lustre/tests/recovery-small.sh		patch \| blob \| history
lustre/tests/replay-dual.sh		patch \| blob \| history
lustre/tests/test-framework.sh		patch \| blob \| history
lustre/utils/Lustre/Makefile.am		patch \| blob \| history
lustre/utils/Makefile.am		patch \| blob \| history