Whamcloud - gitweb
merge b_devel into HEAD (20030626 merge tag) for 0.7.1
authorpschwan <pschwan>
Thu, 26 Jun 2003 14:34:02 +0000 (14:34 +0000)
committerpschwan <pschwan>
Thu, 26 Jun 2003 14:34:02 +0000 (14:34 +0000)
80 files changed:
lnet/archdep.m4
lnet/include/config.h.in
lnet/include/linux/kp30.h
lnet/klnds/qswlnd/qswlnd.h
lnet/klnds/qswlnd/qswlnd_cb.c
lustre/configure.in
lustre/include/liblustre.h
lustre/include/linux/lprocfs_status.h
lustre/include/linux/lustre_dlm.h
lustre/include/linux/lustre_fsfilt.h
lustre/include/linux/lustre_idl.h
lustre/include/linux/lustre_lib.h
lustre/include/linux/lustre_lite.h
lustre/include/linux/lustre_net.h
lustre/include/linux/obd.h
lustre/include/linux/obd_class.h
lustre/include/linux/obd_filter.h
lustre/include/linux/obd_support.h
lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch
lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch
lustre/kernel_patches/patches/iopen-2.4.18.patch
lustre/ldlm/Makefile.am
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c
lustre/llite/Makefile.am
lustre/llite/dcache.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/iod.c
lustre/llite/lproc_llite.c
lustre/llite/namei.c
lustre/llite/rw.c
lustre/llite/super.c
lustre/llite/super25.c
lustre/lov/lov_obd.c
lustre/lov/lproc_lov.c
lustre/mdc/mdc_request.c
lustre/mds/handler.c
lustre/mds/mds_reint.c
lustre/obdclass/class_obd.c
lustre/obdclass/fsfilt_ext3.c
lustre/obdclass/fsfilt_extN.c
lustre/obdclass/fsfilt_reiserfs.c
lustre/obdclass/lprocfs_status.c
lustre/obdecho/echo.c
lustre/obdecho/echo_client.c
lustre/obdfilter/filter.c
lustre/obdfilter/lproc_obdfilter.c
lustre/osc/osc_request.c
lustre/ost/ost_handler.c
lustre/portals/archdep.m4
lustre/portals/include/config.h.in
lustre/portals/include/linux/kp30.h
lustre/portals/knals/qswnal/qswnal.h
lustre/portals/knals/qswnal/qswnal_cb.c
lustre/ptlrpc/client.c
lustre/ptlrpc/lproc_ptlrpc.c
lustre/ptlrpc/ptlrpc_internal.h
lustre/ptlrpc/service.c
lustre/tests/acceptance-small.sh
lustre/tests/crash-mod.sh [new file with mode: 0644]
lustre/tests/local.sh
lustre/tests/openfile.c
lustre/tests/recovery-small.sh
lustre/tests/runas.c
lustre/tests/rundbench
lustre/tests/runvmstat
lustre/tests/sanity.sh
lustre/tests/utime.c [new file with mode: 0644]
lustre/utils/Lustre/lustredb.py
lustre/utils/Makefile.am
lustre/utils/lactive
lustre/utils/lconf
lustre/utils/lfind.c
lustre/utils/llobdstat.pl [new file with mode: 0755]
lustre/utils/llstat.pl
lustre/utils/lmc
lustre/utils/obd.c
lustre/utils/obdstat.c [deleted file]

index 7cb00cf..7a4e05c 100644 (file)
@@ -106,7 +106,7 @@ case ${host_cpu} in
 
        ia64 )
        AC_MSG_RESULT($host_cpu)
-        KCFLAGS='-gstabs -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 -mb-step'
+        KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 -mb-step'
        KCPPFLAGS='-D__KERNEL__ -DMODULE'
         MOD_LINK=elf64_ia64
 ;;
index b05d0c4..3aa6909 100644 (file)
@@ -1,11 +1,58 @@
-/* ../include/config.h.in.  Generated automatically from configure.in by autoheader.  */
+/* portals/include/config.h.in.  Generated from configure.in by autoheader.  */
 
-/* Define if you have the readline library (-lreadline).  */
-#undef HAVE_LIBREADLINE
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* IOCTL Buffer Size */
+#undef OBD_MAX_IOCTL_BUFFER
 
 /* Name of package */
 #undef PACKAGE
 
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* The size of a `unsigned long long', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_LONG_LONG
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
 /* Version number of package */
 #undef VERSION
-
index 6d7f3f3..ee3b9fc 100644 (file)
@@ -262,28 +262,27 @@ do {                                                                          \
 
 #define PORTAL_ALLOC(ptr, size)                                           \
 do {                                                                      \
-        long s = size;                                                    \
         LASSERT (!in_interrupt());                                        \
-        if (s > PORTAL_VMALLOC_SIZE)                                      \
-                (ptr) = vmalloc(s);                                       \
+        if ((size) > PORTAL_VMALLOC_SIZE)                                 \
+                (ptr) = vmalloc(size);                                    \
         else                                                              \
-                (ptr) = kmalloc(s, GFP_NOFS);                             \
+                (ptr) = kmalloc((size), GFP_NOFS);                        \
         if ((ptr) == NULL)                                                \
-                CERROR("PORTALS: out of memory at %s:%d (tried to alloc"  \
-                       " '" #ptr "' = %ld)\n", __FILE__, __LINE__, s);    \
+                CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\
+                       #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\
         else {                                                            \
-                portal_kmem_inc((ptr), s);                                \
-                memset((ptr), 0, s);                                      \
+                portal_kmem_inc((ptr), (size));                           \
+                memset((ptr), 0, (size));                                 \
         }                                                                 \
-        CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n",   \
-               s, (ptr), atomic_read (&portal_kmemory));                  \
+        CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n",    \
+               (int)(size), (ptr), atomic_read (&portal_kmemory));        \
 } while (0)
 
 #define PORTAL_FREE(ptr, size)                                          \
 do {                                                                    \
-        long s = (size);                                                \
+        int s = (size);                                                 \
         if ((ptr) == NULL) {                                            \
-                CERROR("PORTALS: free NULL '" #ptr "' (%ld bytes) at "  \
+                CERROR("PORTALS: free NULL '" #ptr "' (%d bytes) at "   \
                        "%s:%d\n", s, __FILE__, __LINE__);               \
                 break;                                                  \
         }                                                               \
@@ -292,39 +291,38 @@ do {                                                                    \
         else                                                            \
                 kfree(ptr);                                             \
         portal_kmem_dec((ptr), s);                                      \
-        CDEBUG(D_MALLOC, "kfreed '" #ptr "': %ld at %p (tot %d).\n",    \
-               s, (ptr), atomic_read (&portal_kmemory));                \
+        CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n",     \
+               s, (ptr), atomic_read(&portal_kmemory));                 \
 } while (0)
 
 #define PORTAL_SLAB_ALLOC(ptr, slab, size)                                \
 do {                                                                      \
-        long s = (size);                                                  \
-        LASSERT (!in_interrupt());                                        \
+        LASSERT(!in_interrupt());                                         \
         (ptr) = kmem_cache_alloc((slab), SLAB_KERNEL);                    \
         if ((ptr) == NULL) {                                              \
                 CERROR("PORTALS: out of memory at %s:%d (tried to alloc"  \
                        " '" #ptr "' from slab '" #slab "')\n", __FILE__,  \
                        __LINE__);                                         \
         } else {                                                          \
-                portal_kmem_inc((ptr), s);                                \
-                memset((ptr), 0, s);                                      \
+                portal_kmem_inc((ptr), (size));                           \
+                memset((ptr), 0, (size));                                 \
         }                                                                 \
         CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n",   \
-               s, (ptr), atomic_read (&portal_kmemory));                  \
+               (int)(size), (ptr), atomic_read(&portal_kmemory));         \
 } while (0)
 
 #define PORTAL_SLAB_FREE(ptr, slab, size)                               \
 do {                                                                    \
-        long s = (size);                                                \
+        int s = (size);                                                 \
         if ((ptr) == NULL) {                                            \
-                CERROR("PORTALS: free NULL '" #ptr "' (%ld bytes) at "  \
+                CERROR("PORTALS: free NULL '" #ptr "' (%d bytes) at "   \
                        "%s:%d\n", s, __FILE__, __LINE__);               \
                 break;                                                  \
         }                                                               \
         memset((ptr), 0x5a, s);                                         \
         kmem_cache_free((slab), ptr);                                   \
         portal_kmem_dec((ptr), s);                                      \
-        CDEBUG(D_MALLOC, "kfreed '" #ptr "': %ld at %p (tot %d).\n",    \
+        CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n",     \
                s, (ptr), atomic_read (&portal_kmemory));                \
 } while (0)
 
index 88ab74f..85e585b 100644 (file)
@@ -104,7 +104,7 @@ typedef unsigned long kqsw_csum_t;
 #define KQSW_TX_MAXCONTIG              (1<<10) /* largest payload that gets made contiguous on transmit */
 
 #define KQSW_NTXMSGS                   8       /* # normal transmit messages */
-#define KQSW_NNBLK_TXMSGS              128     /* # reserved transmit messages if can't block */
+#define KQSW_NNBLK_TXMSGS              256     /* # reserved transmit messages if can't block */
 
 #define KQSW_NRXMSGS_LARGE             64      /* # large receive buffers */
 #define KQSW_EP_ENVELOPES_LARGE        128     /* # large ep envelopes */
index 3b47a25..c03d592 100644 (file)
@@ -617,6 +617,7 @@ kqswnal_sendmsg (nal_cb_t     *nal,
         if (ktx == NULL) {
                 kqswnal_cerror_hdr (hdr);
                 lib_finalize (&kqswnal_lib, private, cookie);
+                return (-1);
         }
 
         memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */
index 0850115..8e12135 100644 (file)
@@ -15,7 +15,7 @@ AC_ARG_ENABLE(extN, [  --enable-extN use extN instead of ext3 for lustre backend
 AM_CONDITIONAL(EXTN, test x$enable_extN = xyes)
 
 AC_ARG_WITH(obd-buffer-size, [  --with-obd-buffer-size=[size] set lctl ioctl maximum (default=8K)],OBD_BUFFER_SIZE=$with_obd_buffer_size,OBD_BUFFER_SIZE=8192)
-AC_SUBST(OBD_BUFFER_SIZE)
+AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size])
 
 sinclude(portals/build.m4)
 sinclude(portals/archdep.m4)
index 017d5b6..202a761 100644 (file)
@@ -57,6 +57,14 @@ typedef unsigned short umode_t;
 
 #endif
 
+/* This is because lprocfs_status.h gets included here indirectly.  It would
+ * be much better to just avoid lprocfs being included into liblustre entirely
+ * but that requires more header surgery than I can handle right now.
+ */
+#ifndef smp_processor_id
+#define smp_processor_id() 0
+#endif
+
 /* always adopt 2.5 definitions */
 #define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c)
 #define LINUX_VERSION_CODE (2*200+5*10+0)
index 5ce5e98..fb96bde 100644 (file)
 #define _LPROCFS_SNMP_H
 
 #ifdef __KERNEL__
+#include <linux/config.h>
 #include <linux/autoconf.h>
 #include <linux/proc_fs.h>
+#include <linux/smp.h>
 #endif
 
+#include <linux/kp30.h>
+
 #ifndef LPROCFS
 #ifdef  CONFIG_PROC_FS  /* Ensure that /proc is configured */
 #define LPROCFS
@@ -47,7 +51,7 @@ struct lprocfs_static_vars {
         struct lprocfs_vars *obd_vars;
 };
 
-/* Lprocfs counters are can be configured using the enum bit masks below.
+/* An lprocfs counter can be configured using the enum bit masks below.
  *
  * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already
  * protects this counter from concurrent updates. If not specified,
@@ -69,98 +73,105 @@ struct lprocfs_static_vars {
  */
 
 enum {
-        LPROCFS_CNTR_EXTERNALLOCK = 1,
-        LPROCFS_CNTR_AVGMINMAX    = 2,
-        LPROCFS_CNTR_STDDEV       = 4,
+        LPROCFS_CNTR_EXTERNALLOCK = 0x0001,
+        LPROCFS_CNTR_AVGMINMAX    = 0x0002,
+        LPROCFS_CNTR_STDDEV       = 0x0004,
+
+        /* counter data type */
+        LPROCFS_TYPE_REGS         = 0x0100,
+        LPROCFS_TYPE_BYTES        = 0x0200,
+        LPROCFS_TYPE_PAGES        = 0x0400,
+        LPROCFS_TYPE_CYCLE        = 0x0800,
+};
+
+struct lprocfs_atomic {
+        atomic_t               la_entry;
+        atomic_t               la_exit;
 };
 
 struct lprocfs_counter {
-        union {
-                spinlock_t    internal; /* when there is no external lock */
-                spinlock_t   *external; /* external lock, when available */
-        } l;
-        unsigned int  config;
-        __u64         count;
-        __u64         sum;
-        __u64         min;
-        __u64         max;
-        __u64         sumsquare;
-        const char    *name;   /* must be static */
-        const char    *units;  /* must be static */
+        struct lprocfs_atomic  lc_cntl;  /* may need to move to per set */
+        unsigned int           lc_config;
+        __u64                  lc_count;
+        __u64                  lc_sum;
+        __u64                  lc_min;
+        __u64                  lc_max;
+        __u64                  lc_sumsquare;
+        const char            *lc_name;   /* must be static */
+        const char            *lc_units;  /* must be static */
 };
 
+struct lprocfs_percpu {
+        struct lprocfs_counter lp_cntr[0];
+};
 
-struct lprocfs_counters {
-        unsigned int           num;
-        unsigned int           padto8byteboundary;
-        struct lprocfs_counter cntr[0];
+
+struct lprocfs_stats {
+        unsigned int           ls_num;     /* # of counters */
+        unsigned int           ls_percpu_size;
+        struct lprocfs_percpu *ls_percpu[0];
 };
 
 
 /* class_obd.c */
 extern struct proc_dir_entry *proc_lustre_root;
+
+/* lproc_lov.c */
+extern struct file_operations ll_proc_target_fops;
 struct obd_device;
 
 #ifdef LPROCFS
 
-/* Two optimized LPROCFS counter increment macros are provided:
- *     LPROCFS_COUNTER_INCR(cntr, value) - use for multi-valued counters
- *     LPROCFS_COUNTER_INCBY1(cntr) - optimized for by-one counters
+/* Two optimized LPROCFS counter increment functions are provided:
+ *     lprocfs_counter_incr(cntr, value) - optimized for by-one counters
+ *     lprocfs_counter_add(cntr) - use for multi-valued counters
  * Counter data layout allows config flag, counter lock and the
  * count itself to reside within a single cache line.
  */
 
-#define LPROCFS_COUNTER_INCR(cntr, value)                         \
-        do {                                                      \
-               struct lprocfs_counter *c = (cntr);                \
-               LASSERT(c != NULL);                                \
-               if (!(c->config & LPROCFS_CNTR_EXTERNALLOCK))      \
-                     spin_lock(&c->l.internal);                   \
-               c->count++;                                        \
-               if (c->config & LPROCFS_CNTR_AVGMINMAX) {          \
-                      __u64 val = (__u64) (value);                \
-                      c->sum += val;                              \
-                      if (c->config & LPROCFS_CNTR_STDDEV)        \
-                         c->sumsquare += (val*val);               \
-                      if (val < c->min) c->min = val;             \
-                      if (val > c->max) c->max = val;             \
-               }                                                  \
-               if (!(c->config & LPROCFS_CNTR_EXTERNALLOCK))      \
-                      spin_unlock(&c->l.internal);                \
-      } while (0)
-
-#define LPROCFS_COUNTER_INCBY1(cntr)                              \
-        do {                                                      \
-               struct lprocfs_counter *c = (cntr);                \
-               LASSERT(c != NULL);                                \
-               if (!(c->config & LPROCFS_CNTR_EXTERNALLOCK))      \
-                     spin_lock(&c->l.internal);                   \
-               c->count++;                                        \
-               if (!(c->config & LPROCFS_CNTR_EXTERNALLOCK))      \
-                      spin_unlock(&c->l.internal);                \
-      } while (0)
-
-#define LPROCFS_COUNTER_INIT(cntr, conf, lck, nam, un)                 \
-        do {                                                           \
-               struct lprocfs_counter *c = (cntr);                     \
-               LASSERT(c != NULL);                                     \
-               memset(c, 0, sizeof(struct lprocfs_counter));           \
-               if (conf & LPROCFS_CNTR_EXTERNALLOCK) c->l.external = (lck); \
-               else spin_lock_init(&c->l.internal);                    \
-               c->config = conf;                                       \
-               c->min = (~(__u64)0);                                   \
-               c->name = (nam);                                        \
-               c->units = (un);                                        \
-        } while (0)
-
-extern struct lprocfs_counters* lprocfs_alloc_counters(unsigned int num);
-extern void lprocfs_free_counters(struct lprocfs_counters* cntrs);
-extern int lprocfs_alloc_obd_counters(struct obd_device *obddev,
-                                      unsigned int num_private_counters);
-extern void lprocfs_free_obd_counters(struct obd_device *obddev);
-extern int lprocfs_register_counters(struct proc_dir_entry *root,
-                                     const char* name,
-                                     struct lprocfs_counters *cntrs);
+static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
+                                       long amount)
+{
+        struct lprocfs_counter *percpu_cntr;
+
+        LASSERT(stats != NULL);
+        percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]);
+        atomic_inc(&percpu_cntr->lc_cntl.la_entry);
+        percpu_cntr->lc_count++;
+
+        if (percpu_cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) {
+                percpu_cntr->lc_sum += amount;
+                if (percpu_cntr->lc_config & LPROCFS_CNTR_STDDEV)
+                        percpu_cntr->lc_sumsquare += (__u64)amount * amount;
+                if (amount < percpu_cntr->lc_min)
+                        percpu_cntr->lc_min = amount;
+                if (amount > percpu_cntr->lc_max)
+                        percpu_cntr->lc_max = amount;
+        }
+        atomic_inc(&percpu_cntr->lc_cntl.la_exit);
+}
+
+static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int idx)
+{
+        struct lprocfs_counter *percpu_cntr;
+
+        LASSERT(stats != NULL);
+        percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]);
+        atomic_inc(&percpu_cntr->lc_cntl.la_entry);
+        percpu_cntr->lc_count++;
+        atomic_inc(&percpu_cntr->lc_cntl.la_exit);
+}
+
+extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num);
+extern void lprocfs_free_stats(struct lprocfs_stats *stats);
+extern int lprocfs_alloc_obd_stats(struct obd_device *obddev,
+                                   unsigned int num_private_stats);
+extern void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
+                                 unsigned conf, const char *name,
+                                 const char *units);
+extern void lprocfs_free_obd_stats(struct obd_device *obddev);
+extern int lprocfs_register_stats(struct proc_dir_entry *root, const char *name,
+                                  struct lprocfs_stats *stats);
 
 #define LPROCFS_INIT_MULTI_VARS(array, size)                              \
 void lprocfs_init_multi_vars(unsigned int idx,                            \
@@ -195,6 +206,9 @@ extern struct proc_dir_entry *lprocfs_register(const char *name,
 
 extern void lprocfs_remove(struct proc_dir_entry *root);
 
+extern struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *root,
+                                           const char *name);
+
 extern int lprocfs_obd_attach(struct obd_device *dev, struct lprocfs_vars *list);
 extern int lprocfs_obd_detach(struct obd_device *dev);
 
@@ -248,23 +262,28 @@ int fct_name(char *page, char **start, off_t off,                         \
 
 #else
 /* LPROCFS is not defined */
-#define LPROCFS_COUNTER_INCR(cntr, value)
-#define LPROCFS_COUNTER_INCBY1(cntr)
-#define LPROCFS_COUNTER_INIT(cntr, conf, lock, nam, un)
+static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
+                                       int index, long amount) { return; }
+static inline void lprocfs_counter_incr(struct lprocfs_stats *stats,
+                                        int index) { return; }
+static inline void lprocfs_counter_init(struct lprocfs_stats *stats,
+                                        int index, unsigned conf,
+                                        const char *name, const char *units)
+{ return; }
 
-static inline struct lprocfs_counters* lprocfs_alloc_counters(unsigned int num)
+static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num)
 { return NULL; }
-static inline void lprocfs_free_counters(struct lprocfs_counters* cntrs)
+static inline void lprocfs_free_stats(struct lprocfs_stats *stats)
 { return; }
 
-static inline int lprocfs_register_counters(struct proc_dir_entry *root,
-                                            const charname,
-                                            struct lprocfs_counters *cntrs)
+static inline int lprocfs_register_stats(struct proc_dir_entry *root,
+                                            const char *name,
+                                            struct lprocfs_stats *stats)
 { return 0; }
-static inline int lprocfs_alloc_obd_counters(struct obd_device *obddev,
-                                             unsigned int num_private_counters)
+static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev,
+                                             unsigned int num_private_stats)
 { return 0; }
-static inline void lprocfs_free_obd_counters(struct obd_device *obddev)
+static inline void lprocfs_free_obd_stats(struct obd_device *obddev)
 { return; }
 
 static inline struct proc_dir_entry *
@@ -279,6 +298,8 @@ static inline int lprocfs_add_vars(struct proc_dir_entry *root,
                                    struct lprocfs_vars *var,
                                    void *data) { return 0; }
 static inline void lprocfs_remove(struct proc_dir_entry *root) {};
+static inline struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
+                                    const char *name) {return 0;}
 struct obd_device;
 static inline int lprocfs_obd_attach(struct obd_device *dev,
                                      struct lprocfs_vars *list) { return 0; }
index c2a54b9..2db4196 100644 (file)
@@ -63,6 +63,7 @@ typedef enum {
 #define LDLM_FL_CANCELING      (1 << 13) /* lock cancel has already been sent */
 #define LDLM_FL_LOCAL          (1 << 14) // a local lock (ie, no srv/cli split)
 #define LDLM_FL_WARN           (1 << 15) /* see ldlm_cli_cancel_unused */
+#define LDLM_FL_MATCH_DATA     (1 << 16) /* see ldlm_lock_match */
 
 /* The blocking callback is overloaded to perform two functions.  These flags
  * indicate which operation should be performed. */
@@ -371,7 +372,7 @@ void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode);
 void ldlm_grant_lock(struct ldlm_lock *lock, void *data, int datalen);
 int ldlm_lock_match(struct ldlm_namespace *ns, int flags, struct ldlm_res_id *,
                     __u32 type, void *cookie, int cookielen, ldlm_mode_t mode,
-                    struct lustre_handle *);
+                    void *data, struct lustre_handle *);
 struct ldlm_lock *
 ldlm_lock_create(struct ldlm_namespace *ns,
                  struct lustre_handle *parent_lock_handle, struct ldlm_res_id,
@@ -440,19 +441,6 @@ int ldlm_cli_enqueue(struct lustre_handle *conn,
                      ldlm_blocking_callback callback,
                      void *data,
                      struct lustre_handle *lockh);
-int ldlm_match_or_enqueue(struct lustre_handle *connh,
-                          struct ptlrpc_request *req,
-                          struct ldlm_namespace *ns,
-                          struct lustre_handle *parent_lock_handle,
-                          struct ldlm_res_id,
-                          __u32 type,
-                          void *cookie, int cookielen,
-                          ldlm_mode_t mode,
-                          int *flags,
-                          ldlm_completion_callback completion,
-                          ldlm_blocking_callback callback,
-                          void *data,
-                          struct lustre_handle *lockh);
 int ldlm_server_ast(struct lustre_handle *lockh, struct ldlm_lock_desc *new,
                     void *data, __u32 data_len);
 int ldlm_cli_convert(struct lustre_handle *, int new_mode, int *flags);
index f736d4b..fc00fe1 100644 (file)
@@ -46,7 +46,7 @@ struct fsfilt_operations {
                                  int niocount, struct niobuf_remote *nb);
         int     (* fs_commit)(struct inode *inode, void *handle,int force_sync);
         int     (* fs_setattr)(struct dentry *dentry, void *handle,
-                               struct iattr *iattr);
+                               struct iattr *iattr, int do_trunc);
         int     (* fs_set_md)(struct inode *inode, void *handle, void *md,
                               int size);
         int     (* fs_get_md)(struct inode *inode, void *md, int size);
@@ -79,8 +79,11 @@ extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops);
 static inline void *fsfilt_start(struct obd_device *obd,
                                  struct inode *inode, int op)
 {
+        unsigned long now = jiffies;
         void *handle = obd->obd_fsops->fs_start(inode, op);
-        CDEBUG(D_HA, "starting handle %p\n", handle);
+        CDEBUG(D_HA, "started handle %p\n", handle);
+        if (time_after(jiffies, now + 15*HZ))
+                CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
         return handle;
 }
 
@@ -88,32 +91,33 @@ static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount,
                                      struct fsfilt_objinfo *fso, int niocount,
                                      struct niobuf_remote *nb)
 {
+        unsigned long now = jiffies;
         void *handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount,nb);
-        CDEBUG(D_HA, "starting handle %p\n", handle);
+        CDEBUG(D_HA, "started handle %p\n", handle);
+        if (time_after(jiffies, now + 15*HZ))
+                CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
         return handle;
 }
 
 static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode,
                                 void *handle, int force_sync)
 {
+        unsigned long now = jiffies;
+        int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync);
         CDEBUG(D_HA, "committing handle %p\n", handle);
-        return obd->obd_fsops->fs_commit(inode, handle, force_sync);
+        if (time_after(jiffies, now + 15*HZ))
+                CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+        return rc;
 }
 
 static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry,
-                                 void *handle, struct iattr *iattr)
+                                 void *handle, struct iattr *iattr,int do_trunc)
 {
+        unsigned long now = jiffies;
         int rc;
-        /*
-         * NOTE: we probably don't need to take i_sem here when changing
-         *       ATTR_SIZE because the MDS never needs to truncate a file.
-         *       The ext2/ext3 code never truncates a directory, and files
-         *       stored on the MDS are entirely sparse (no data blocks).
-         *       If we do need to get it, we can do it here.
-         */
-        lock_kernel();
-        rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr);
-        unlock_kernel();
+        rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr, do_trunc);
+        if (time_after(jiffies, now + 15*HZ))
+                CERROR("long setattr time %lus\n", (jiffies - now) / HZ);
 
         return rc;
 }
index b3acada..b49fd16 100644 (file)
@@ -216,27 +216,29 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags)
  */
 
 /* opcodes */
-#define OST_REPLY       0        /* reply ? */
-#define OST_GETATTR     1
-#define OST_SETATTR     2
-#define OST_READ        3
-#define OST_WRITE       4
-#define OST_CREATE      5
-#define OST_DESTROY     6
-#define OST_GET_INFO    7
-#define OST_CONNECT     8
-#define OST_DISCONNECT  9
-#define OST_PUNCH      10
-#define OST_OPEN       11
-#define OST_CLOSE      12
-#define OST_STATFS     13
-#define OST_SAN_READ   14
-#define OST_SAN_WRITE  15
-#define OST_SYNCFS     16
+typedef enum {
+        OST_REPLY      =  0,       /* reply ? */
+        OST_GETATTR    =  1,
+        OST_SETATTR    =  2,
+        OST_READ       =  3,
+        OST_WRITE      =  4,
+        OST_CREATE     =  5,
+        OST_DESTROY    =  6,
+        OST_GET_INFO   =  7,
+        OST_CONNECT    =  8,
+        OST_DISCONNECT =  9,
+        OST_PUNCH      = 10,
+        OST_OPEN       = 11,
+        OST_CLOSE      = 12,
+        OST_STATFS     = 13,
+        OST_SAN_READ   = 14,
+        OST_SAN_WRITE  = 15,
+        OST_SYNCFS     = 16,
+        OST_LAST_OPC
+} ost_cmd_t;
+#define OST_FIRST_OPC  OST_REPLY
 /* When adding OST RPC opcodes, please update 
  * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */
-#define OST_LAST_OPC   (OST_SYNCFS+1)
-#define OST_FIRST_OPC  OST_REPLY
 
 
 typedef uint64_t        obd_id;
@@ -385,20 +387,23 @@ extern void lustre_swab_ost_body (struct ost_body *b);
  */
 
 /* opcodes */
-#define MDS_GETATTR      33
-#define MDS_GETATTR_NAME 34
-#define MDS_CLOSE        35
-#define MDS_REINT        36
-#define MDS_READPAGE     37
-#define MDS_CONNECT      38
-#define MDS_DISCONNECT   39
-#define MDS_GETSTATUS    40
-#define MDS_STATFS       41
-#define MDS_GETLOVINFO   42
+typedef enum {
+        MDS_GETATTR      = 33,
+        MDS_GETATTR_NAME = 34,
+        MDS_CLOSE        = 35,
+        MDS_REINT        = 36,
+        MDS_READPAGE     = 37,
+        MDS_CONNECT      = 38,
+        MDS_DISCONNECT   = 39,
+        MDS_GETSTATUS    = 40,
+        MDS_STATFS       = 41,
+        MDS_GETLOVINFO   = 42,
+        MDS_LAST_OPC
+} mds_cmd_t;
+#define MDS_FIRST_OPC    MDS_GETATTR
 /* When adding MDS RPC opcodes, please update 
  * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */
-#define MDS_LAST_OPC     (MDS_GETLOVINFO+1)
-#define MDS_FIRST_OPC    MDS_GETATTR
+
 /*
  * Do not exceed 63 
  */
@@ -586,15 +591,17 @@ extern void lustre_swab_lov_desc (struct lov_desc *ld);
  *   LDLM requests:
  */
 /* opcodes -- MUST be distinct from OST/MDS opcodes */
-#define LDLM_ENQUEUE       101
-#define LDLM_CONVERT       102
-#define LDLM_CANCEL        103
-#define LDLM_BL_CALLBACK   104
-#define LDLM_CP_CALLBACK   105
+typedef enum {
+        LDLM_ENQUEUE     = 101,
+        LDLM_CONVERT     = 102,
+        LDLM_CANCEL      = 103,
+        LDLM_BL_CALLBACK = 104,
+        LDLM_CP_CALLBACK = 105,
+        LDLM_LAST_OPC
+} ldlm_cmd_t;
+#define LDLM_FIRST_OPC LDLM_ENQUEUE
 /* When adding LDLM RPC opcodes, please update 
  * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */
-#define LDLM_LAST_OPC      (LDLM_CP_CALLBACK+1)
-#define LDLM_FIRST_OPC     LDLM_ENQUEUE
 
 #define RES_NAME_SIZE 3
 #define RES_VERSION_SIZE 4
@@ -679,11 +686,11 @@ typedef enum {
         PTLBD_FLUSH = 203,
         PTLBD_CONNECT = 204,
         PTLBD_DISCONNECT = 205,
+        PTLBD_LAST_OPC
 } ptlbd_cmd_t;
+#define PTLBD_FIRST_OPC PTLBD_QUERY
 /* When adding PTLBD RPC opcodes, please update 
  * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */
-#define PTLBD_LAST_OPC  (PTLBD_FLUSH+1)
-#define PTLBD_FIRST_OPC PTLBD_QUERY
 
 struct ptlbd_op {
         __u16 op_cmd;
index c43cf95..57e9620 100644 (file)
@@ -25,6 +25,8 @@
 #ifndef _LUSTRE_LIB_H
 #define _LUSTRE_LIB_H
 
+#include <config.h>
+
 #ifndef __KERNEL__
 # include <string.h>
 # include <sys/types.h>
@@ -195,7 +197,7 @@ static inline int ll_fid2str(char *str, __u64 id, __u32 generation)
 /*
  *   OBD IOCTLS
  */
-#define OBD_IOCTL_VERSION 0x00010002
+#define OBD_IOCTL_VERSION 0x00010003
 
 struct obd_ioctl_data {
         uint32_t ioc_len;
@@ -222,13 +224,15 @@ struct obd_ioctl_data {
         uint32_t ioc_plen2;
         char    *ioc_pbuf2;
 
-        /* two inline buffers */
+        /* inline buffers for various arguments */
         uint32_t ioc_inllen1;
         char    *ioc_inlbuf1;
         uint32_t ioc_inllen2;
         char    *ioc_inlbuf2;
         uint32_t ioc_inllen3;
         char    *ioc_inlbuf3;
+        uint32_t ioc_inllen4;
+        char    *ioc_inlbuf4;
 
         char    ioc_bulk[0];
 };
@@ -244,6 +248,7 @@ static inline int obd_ioctl_packlen(struct obd_ioctl_data *data)
         len += size_round(data->ioc_inllen1);
         len += size_round(data->ioc_inllen2);
         len += size_round(data->ioc_inllen3);
+        len += size_round(data->ioc_inllen4);
         return len;
 }
 
@@ -262,11 +267,14 @@ static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
                 printk("OBD ioctl: ioc_inllen2 larger than 1<<30\n");
                 return 1;
         }
-
         if (data->ioc_inllen3 > (1<<30)) {
                 printk("OBD ioctl: ioc_inllen3 larger than 1<<30\n");
                 return 1;
         }
+        if (data->ioc_inllen4 > (1<<30)) {
+                printk("OBD ioctl: ioc_inllen4 larger than 1<<30\n");
+                return 1;
+        }
         if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
                 printk("OBD ioctl: inlbuf1 pointer but 0 length\n");
                 return 1;
@@ -279,6 +287,10 @@ static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
                 printk("OBD ioctl: inlbuf3 pointer but 0 length\n");
                 return 1;
         }
+        if (data->ioc_inlbuf4 && !data->ioc_inllen4) {
+                printk("OBD ioctl: inlbuf4 pointer but 0 length\n");
+                return 1;
+        }
         if (data->ioc_pbuf1 && !data->ioc_plen1) {
                 printk("OBD ioctl: pbuf1 pointer but 0 length\n");
                 return 1;
@@ -287,20 +299,6 @@ static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
                 printk("OBD ioctl: pbuf2 pointer but 0 length\n");
                 return 1;
         }
-        /*
-        if (data->ioc_inllen1 && !data->ioc_inlbuf1) {
-                printk("OBD ioctl: inllen1 set but NULL pointer\n");
-                return 1;
-        }
-        if (data->ioc_inllen2 && !data->ioc_inlbuf2) {
-                printk("OBD ioctl: inllen2 set but NULL pointer\n");
-                return 1;
-        }
-        if (data->ioc_inllen3 && !data->ioc_inlbuf3) {
-                printk("OBD ioctl: inllen3 set but NULL pointer\n");
-                return 1;
-        }
-        */
         if (data->ioc_plen1 && !data->ioc_pbuf1) {
                 printk("OBD ioctl: plen1 set but NULL pointer\n");
                 return 1;
@@ -314,24 +312,6 @@ static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
                        obd_ioctl_packlen(data), data->ioc_len);
                 return 1;
         }
-#if 0
-        if (data->ioc_inllen1 &&
-            data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
-                printk("OBD ioctl: inlbuf1 not 0 terminated\n");
-                return 1;
-        }
-        if (data->ioc_inllen2 &&
-            data->ioc_bulk[size_round(data->ioc_inllen1) + data->ioc_inllen2 - 1] != '\0') {
-                printk("OBD ioctl: inlbuf2 not 0 terminated\n");
-                return 1;
-        }
-        if (data->ioc_inllen3 &&
-            data->ioc_bulk[size_round(data->ioc_inllen1) + size_round(data->ioc_inllen2)
-                           + data->ioc_inllen3 - 1] != '\0') {
-                printk("OBD ioctl: inlbuf3 not 0 terminated\n");
-                return 1;
-        }
-#endif
         return 0;
 }
 
@@ -361,6 +341,8 @@ static inline int obd_ioctl_pack(struct obd_ioctl_data *data, char **pbuf,
                 LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
         if (data->ioc_inlbuf3)
                 LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
+        if (data->ioc_inlbuf4)
+                LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
         if (obd_ioctl_is_invalid(overlay))
                 return 1;
 
@@ -381,6 +363,7 @@ static inline int obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf,
         overlay->ioc_inlbuf1 = data->ioc_inlbuf1;
         overlay->ioc_inlbuf2 = data->ioc_inlbuf2;
         overlay->ioc_inlbuf3 = data->ioc_inlbuf3;
+        overlay->ioc_inlbuf4 = data->ioc_inlbuf4;
 
         memcpy(data, pbuf, sizeof(*data));
 
@@ -391,6 +374,8 @@ static inline int obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf,
                 LOGU(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
         if (data->ioc_inlbuf3)
                 LOGU(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
+        if (data->ioc_inlbuf4)
+                LOGU(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
 
         return 0;
 }
@@ -398,8 +383,6 @@ static inline int obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf,
 
 #include <linux/obd_support.h>
 
-#define OBD_MAX_IOCTL_BUFFER 8192
-
 /* buffer MUST be at least the size of obd_ioctl_hdr */
 static inline int obd_ioctl_getdata(char **buf, int *len, void *arg)
 {
@@ -467,6 +450,13 @@ static inline int obd_ioctl_getdata(char **buf, int *len, void *arg)
                         size_round(data->ioc_inllen2);
         }
 
+        if (data->ioc_inllen4) {
+                data->ioc_inlbuf4 = &data->ioc_bulk[0] +
+                        size_round(data->ioc_inllen1) +
+                        size_round(data->ioc_inllen2) + 
+                        size_round(data->ioc_inllen3) ;
+        }
+
         EXIT;
         return 0;
 }
index 35d4994..17ea754 100644 (file)
@@ -98,32 +98,6 @@ struct ll_read_extent {
 int ll_check_dirty( struct super_block *sb );
 int ll_batch_writepage( struct inode *inode, struct page *page );
 
-struct file_io_stats {
-        spinlock_t     fis_lock;
-        __u64   fis_dirty_pages;
-        __u64   fis_dirty_hits;
-        __u64   fis_dirty_misses;
-        __u64   fis_forced_pages;
-        __u64   fis_writepage_pages;
-        __u64   fis_wb_ok;
-        __u64   fis_wb_fail;
-        __u64   fis_wb_from_writepage;
-        __u64   fis_wb_from_pressure;
-};
-
-#define IO_STAT_ADD(FIS, STAT, VAL) do {        \
-        struct file_io_stats *_fis_ = (FIS);    \
-        spin_lock(&_fis_->fis_lock);            \
-        _fis_->fis_##STAT += VAL;               \
-        spin_unlock(&_fis_->fis_lock);          \
-} while (0)
-
-#define INODE_IO_STAT_ADD(INODE, STAT, VAL)        \
-        IO_STAT_ADD(&ll_i2sbi(INODE)->ll_iostats, STAT, VAL)
-
-#define PAGE_IO_STAT_ADD(PAGE, STAT, VAL)               \
-        INODE_IO_STAT_ADD((PAGE)->mapping, STAT, VAL)
-
 /* interpet return codes from intent lookup */
 #define LL_LOOKUP_POSITIVE 1
 #define LL_LOOKUP_NEGATIVE 2
@@ -155,7 +129,7 @@ struct ll_sb_info {
 
         struct list_head          ll_orphan_dentry_list; /*please don't ask -p*/
 
-        struct  file_io_stats     ll_iostats;
+        struct lprocfs_stats     *ll_stats; /* lprocfs stats counter */
 };
 
 static inline struct ll_sb_info *ll_s2sbi(struct super_block *sb)
@@ -306,6 +280,40 @@ do {                                                                           \
 
 #define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
 
+enum {
+         LPROC_LL_DIRTY_PAGES       = 0,
+         LPROC_LL_DIRTY_HITS,
+         LPROC_LL_DIRTY_MISSES,
+         LPROC_LL_WB_WRITEPAGE,
+         LPROC_LL_WB_PRESSURE,
+         LPROC_LL_WB_OK,
+         LPROC_LL_WB_FAIL,
+         LPROC_LL_READ_BYTES,
+         LPROC_LL_WRITE_BYTES,
+         LPROC_LL_BRW_READ,
+         LPROC_LL_BRW_WRITE,
+         LPROC_LL_IOCTL,
+         LPROC_LL_OPEN,
+         LPROC_LL_RELEASE,
+         LPROC_LL_MAP,
+         LPROC_LL_LLSEEK,
+         LPROC_LL_FSYNC,
+         LPROC_LL_SETATTR_RAW,
+         LPROC_LL_SETATTR,
+         LPROC_LL_TRUNC,
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+         LPROC_LL_GETATTR,
+#else
+         LPROC_LL_REVALIDATE,
+#endif
+         LPROC_LL_STAFS,
+         LPROC_LL_ALLOC_INODE,
+
+         LPROC_LL_DIRECT_READ,
+         LPROC_LL_DIRECT_WRITE,
+         LPROC_LL_FILE_OPCODES
+};
 /* dcache.c */
 int ll_have_md_lock(struct dentry *de);
 
@@ -318,8 +326,8 @@ extern struct file_operations ll_file_operations;
 extern struct inode_operations ll_file_inode_operations;
 extern struct inode_operations ll_special_inode_operations;
 struct ldlm_lock;
-int ll_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *, void *data,
-                     int flag);
+int ll_extent_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *,
+                            void *data, int flag);
 int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
                    struct lov_stripe_md *lsm, int mode,
                    struct ldlm_extent *extent, struct lustre_handle *lockh);
index ed5db88..f71802c 100644 (file)
@@ -412,8 +412,8 @@ struct ptlrpc_service {
         struct list_head srv_threads;
         int (*srv_handler)(struct ptlrpc_request *req);
         char *srv_name;  /* only statically allocated strings here; we don't clean them */
-        struct proc_dir_entry   *svc_procroot;    
-        struct lprocfs_counters *svc_counters;
+        struct proc_dir_entry *svc_procroot;
+        struct lprocfs_stats  *svc_stats;
 
         int                  srv_interface_rover;
         struct ptlrpc_srv_ni srv_interfaces[0];
index fe53974..2fb2c5c 100644 (file)
@@ -74,25 +74,26 @@ struct brw_page {
 struct ost_server_data;
 
 struct filter_obd {
-        char *fo_fstype;
-        struct super_block *fo_sb;
-        struct vfsmount *fo_vfsmnt;
-        struct obd_run_ctxt fo_ctxt;
-        struct dentry *fo_dentry_O;
-        struct dentry *fo_dentry_O_mode[16];
-        struct dentry **fo_dentry_O_sub;
-        spinlock_t fo_objidlock;        /* protects fo_lastobjid increment */
-        spinlock_t fo_translock;        /* protects fsd_last_rcvd increment */
-        struct file *fo_rcvd_filp;
+        const char          *fo_fstype;
+        char *fo_nspath;
+        struct super_block  *fo_sb;
+        struct vfsmount     *fo_vfsmnt;
+        struct obd_run_ctxt  fo_ctxt;
+        struct dentry       *fo_dentry_O;
+        struct dentry       *fo_dentry_O_mode[16];
+        struct dentry      **fo_dentry_O_sub;
+        spinlock_t           fo_objidlock; /* protect fo_lastobjid increment */
+        spinlock_t           fo_translock; /* protect fsd_last_rcvd increment */
+        struct file         *fo_rcvd_filp;
         struct filter_server_data *fo_fsd;
-        unsigned long *fo_last_rcvd_slots;
+        unsigned long       *fo_last_rcvd_slots;
 
         struct file_operations *fo_fop;
         struct inode_operations *fo_iop;
         struct address_space_operations *fo_aops;
-        struct list_head fo_export_list;
-        spinlock_t fo_fddlock;          /* protects setting dentry->d_fsdata */
-        int fo_subdir_count;
+        struct list_head     fo_export_list;
+        spinlock_t           fo_fddlock; /* protect setting dentry->d_fsdata */
+        int                  fo_subdir_count;
 };
 
 struct mds_server_data;
@@ -223,6 +224,7 @@ struct niobuf_local {
         __u32 rc;
         struct page *page;
         struct dentry *dentry;
+        unsigned long start;
 };
 
 /* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */
@@ -287,8 +289,8 @@ struct obd_device {
                 struct ptlbd_obd ptlbd;
         } u;
        /* Fields used by LProcFS */
-        unsigned int cntr_base;
-        void *counters;
+        unsigned int           obd_cntr_base;
+        struct lprocfs_stats  *obd_stats;
 };
 
 struct obd_ops {
@@ -364,11 +366,11 @@ struct obd_ops {
         int (*o_enqueue)(struct lustre_handle *conn, struct lov_stripe_md *md,
                          struct lustre_handle *parent_lock,
                          __u32 type, void *cookie, int cookielen, __u32 mode,
-                         int *flags, void *cb, void *data, int datalen,
+                         int *flags, void *cb, void *data,
                          struct lustre_handle *lockh);
         int (*o_match)(struct lustre_handle *conn, struct lov_stripe_md *md,
                          __u32 type, void *cookie, int cookielen, __u32 mode,
-                         int *flags, struct lustre_handle *lockh);
+                         int *flags, void *data, struct lustre_handle *lockh);
         int (*o_cancel)(struct lustre_handle *, struct lov_stripe_md *md,
                         __u32 mode, struct lustre_handle *);
         int (*o_cancel_unused)(struct lustre_handle *, struct lov_stripe_md *,
index 64b0a68..e93032a 100644 (file)
@@ -210,17 +210,17 @@ do {                                                            \
           offsetof(struct obd_ops, o_iocontrol))                \
          / sizeof(((struct obd_ops *)(0))->o_iocontrol))
 
-#define OBD_COUNTER_INCREMENT(obd, op)                           \
-        if ((obd)->counters != NULL) {                           \
-            struct lprocfs_counters* cntrs = obd->counters;      \
-            unsigned int coffset;                                \
-            coffset = (obd)->cntr_base + OBD_COUNTER_OFFSET(op); \
-            LASSERT(coffset < cntrs->num);                       \
-            LPROCFS_COUNTER_INCBY1(&cntrs->cntr[coffset]);       \
+#define OBD_COUNTER_INCREMENT(obd, op)                          \
+        if ((obd)->obd_stats != NULL) {                         \
+                unsigned int coffset;                           \
+                coffset = (unsigned int)(obd)->obd_cntr_base +  \
+                        OBD_COUNTER_OFFSET(op);                 \
+                LASSERT(coffset < obd->obd_stats->ls_num);      \
+                lprocfs_counter_incr(obd->obd_stats, coffset);  \
         }
 #else
-#define OBD_COUNTER_OFFSET(op) 
-#define OBD_COUNTER_INCREMENT(obd, op)           
+#define OBD_COUNTER_OFFSET(op)
+#define OBD_COUNTER_INCREMENT(obd, op)
 #endif
 
 #define OBD_CHECK_OP(obd, op)                                   \
@@ -230,7 +230,6 @@ do {                                                            \
                        obd->obd_minor);                         \
                 RETURN(-EOPNOTSUPP);                            \
         }                                                       \
-        OBD_COUNTER_INCREMENT(obd, op);                         \
 } while (0)
 
 static inline int obd_get_info(struct lustre_handle *conn, __u32 keylen,
@@ -242,6 +241,7 @@ static inline int obd_get_info(struct lustre_handle *conn, __u32 keylen,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, get_info);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, get_info);
 
         rc = OBP(exp->exp_obd, get_info)(conn, keylen, key, vallen, val);
         class_export_put(exp);
@@ -257,6 +257,7 @@ static inline int obd_set_info(struct lustre_handle *conn, obd_count keylen,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, set_info);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, set_info);
 
         rc = OBP(exp->exp_obd, set_info)(conn, keylen, key, vallen, val);
         class_export_put(exp);
@@ -269,6 +270,7 @@ static inline int obd_setup(struct obd_device *obd, int datalen, void *data)
         ENTRY;
 
         OBD_CHECK_OP(obd, setup);
+        OBD_COUNTER_INCREMENT(obd, setup);
 
         rc = OBP(obd, setup)(obd, datalen, data);
         RETURN(rc);
@@ -281,6 +283,7 @@ static inline int obd_cleanup(struct obd_device *obd, int force, int failover)
 
         OBD_CHECK_DEV_STOPPING(obd);
         OBD_CHECK_OP(obd, cleanup);
+        OBD_COUNTER_INCREMENT(obd, cleanup);
 
         rc = OBP(obd, cleanup)(obd, force, failover);
         RETURN(rc);
@@ -303,6 +306,7 @@ static inline int obd_packmd(struct lustre_handle *conn,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, packmd);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, packmd);
 
         rc = OBP(exp->exp_obd, packmd)(conn, disk_tgt, mem_src);
         class_export_put(exp);
@@ -350,6 +354,7 @@ static inline int obd_unpackmd(struct lustre_handle *conn,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, unpackmd);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, unpackmd);
 
         rc = OBP(exp->exp_obd, unpackmd)(conn, mem_tgt, disk_src, disk_len);
         class_export_put(exp);
@@ -390,6 +395,7 @@ static inline int obd_create(struct lustre_handle *conn, struct obdo *obdo,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, create);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, create);
 
         rc = OBP(exp->exp_obd, create)(conn, obdo, ea, oti);
         class_export_put(exp);
@@ -406,6 +412,7 @@ static inline int obd_destroy(struct lustre_handle *conn, struct obdo *obdo,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, destroy);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, destroy);
 
         rc = OBP(exp->exp_obd, destroy)(conn, obdo, ea, oti);
         class_export_put(exp);
@@ -421,14 +428,15 @@ static inline int obd_getattr(struct lustre_handle *conn, struct obdo *obdo,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, getattr);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, getattr);
 
         rc = OBP(exp->exp_obd, getattr)(conn, obdo, ea);
         class_export_put(exp);
         RETURN(rc);
 }
 
-static inline int obd_getattr_async(struct lustre_handle *conn, struct obdo *obdo,
-                                    struct lov_stripe_md *ea, 
+static inline int obd_getattr_async(struct lustre_handle *conn,
+                                    struct obdo *obdo, struct lov_stripe_md *ea,
                                     struct ptlrpc_request_set *set)
 {
         struct obd_export *exp;
@@ -437,6 +445,7 @@ static inline int obd_getattr_async(struct lustre_handle *conn, struct obdo *obd
 
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, getattr);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, getattr);
 
         rc = OBP(exp->exp_obd, getattr_async)(conn, obdo, ea, set);
         class_export_put(exp);
@@ -453,6 +462,7 @@ static inline int obd_close(struct lustre_handle *conn, struct obdo *obdo,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, close);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, close);
 
         rc = OBP(exp->exp_obd, close)(conn, obdo, ea, oti);
         class_export_put(exp);
@@ -469,6 +479,7 @@ static inline int obd_open(struct lustre_handle *conn, struct obdo *obdo,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, open);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, open);
 
         rc = OBP(exp->exp_obd, open)(conn, obdo, ea, oti, och);
         class_export_put(exp);
@@ -485,6 +496,7 @@ static inline int obd_setattr(struct lustre_handle *conn, struct obdo *obdo,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, setattr);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, setattr);
 
         rc = OBP(exp->exp_obd, setattr)(conn, obdo, ea, oti);
         class_export_put(exp);
@@ -499,6 +511,7 @@ static inline int obd_connect(struct lustre_handle *conn,
 
         OBD_CHECK_DEV_ACTIVE(obd);
         OBD_CHECK_OP(obd, connect);
+        OBD_COUNTER_INCREMENT(obd, connect);
 
         rc = OBP(obd, connect)(conn, obd, cluuid);
         RETURN(rc);
@@ -512,6 +525,7 @@ static inline int obd_disconnect(struct lustre_handle *conn, int failover)
 
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, disconnect);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, disconnect);
 
         rc = OBP(exp->exp_obd, disconnect)(conn, failover);
         class_export_put(exp);
@@ -534,6 +548,7 @@ static inline int obd_statfs(struct lustre_handle *conn,struct obd_statfs *osfs)
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, statfs);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, statfs);
 
         rc = OBP(exp->exp_obd, statfs)(conn, osfs);
         class_export_put(exp);
@@ -546,6 +561,7 @@ static inline int obd_syncfs(struct obd_export *exp)
         ENTRY;
 
         OBD_CHECK_OP(exp->exp_obd, syncfs);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, syncfs);
 
         rc = OBP(exp->exp_obd, syncfs)(exp);
         RETURN(rc);
@@ -561,6 +577,7 @@ static inline int obd_punch(struct lustre_handle *conn, struct obdo *oa,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, punch);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, punch);
 
         rc = OBP(exp->exp_obd, punch)(conn, oa, ea, start, end, oti);
         class_export_put(exp);
@@ -577,6 +594,7 @@ static inline int obd_brw(int cmd, struct lustre_handle *conn,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, brw);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, brw);
 
         if (!(cmd & (OBD_BRW_RWMASK | OBD_BRW_CHECK))) {
                 CERROR("obd_brw: cmd must be OBD_BRW_READ, OBD_BRW_WRITE, "
@@ -601,6 +619,7 @@ static inline int obd_brw_async(int cmd, struct lustre_handle *conn,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, brw_async);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, brw_async);
 
         if (!(cmd & OBD_BRW_RWMASK)) {
                 CERROR("obd_brw: cmd must be OBD_BRW_READ or OBD_BRW_WRITE\n");
@@ -622,6 +641,7 @@ static inline int obd_preprw(int cmd, struct obd_export *exp,
         ENTRY;
 
         OBD_CHECK_OP(exp->exp_obd, preprw);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
 
         rc = OBP(exp->exp_obd, preprw)(cmd, exp, objcount, obj, niocount,
                                        remote, local, desc_private, oti);
@@ -637,6 +657,7 @@ static inline int obd_commitrw(int cmd, struct obd_export *exp,
         ENTRY;
 
         OBD_CHECK_OP(exp->exp_obd, commitrw);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, commitrw);
 
         rc = OBP(exp->exp_obd, commitrw)(cmd, exp, objcount, obj, niocount,
                                          local, desc_private, oti);
@@ -652,6 +673,7 @@ static inline int obd_iocontrol(unsigned int cmd, struct lustre_handle *conn,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, iocontrol);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, iocontrol);
 
         rc = OBP(exp->exp_obd, iocontrol)(cmd, conn, len, karg, uarg);
         class_export_put(exp);
@@ -663,7 +685,7 @@ static inline int obd_enqueue(struct lustre_handle *conn,
                               struct lustre_handle *parent_lock,
                               __u32 type, void *cookie, int cookielen,
                               __u32 mode, int *flags, void *cb, void *data,
-                              int datalen, struct lustre_handle *lockh)
+                              struct lustre_handle *lockh)
 {
         struct obd_export *exp;
         int rc;
@@ -671,19 +693,19 @@ static inline int obd_enqueue(struct lustre_handle *conn,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, enqueue);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, enqueue);
 
         rc = OBP(exp->exp_obd, enqueue)(conn, ea, parent_lock, type,
                                         cookie, cookielen, mode, flags, cb,
-                                        data, datalen, lockh);
+                                        data, lockh);
         class_export_put(exp);
         RETURN(rc);
 }
 
 static inline int obd_match(struct lustre_handle *conn,
-                              struct lov_stripe_md *ea,
-                              __u32 type, void *cookie, int cookielen,
-                              __u32 mode, int *flags, 
-                              struct lustre_handle *lockh)
+                            struct lov_stripe_md *ea, __u32 type, void *cookie,
+                            int cookielen, __u32 mode, int *flags, void *data,
+                            struct lustre_handle *lockh)
 {
         struct obd_export *exp;
         int rc;
@@ -691,9 +713,10 @@ static inline int obd_match(struct lustre_handle *conn,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, match);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, match);
 
         rc = OBP(exp->exp_obd, match)(conn, ea, type, cookie, cookielen, mode,
-                                      flags, lockh);
+                                      flags, data, lockh);
         class_export_put(exp);
         RETURN(rc);
 }
@@ -709,6 +732,7 @@ static inline int obd_cancel(struct lustre_handle *conn,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, cancel);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, cancel);
 
         rc = OBP(exp->exp_obd, cancel)(conn, ea, mode, lockh);
         class_export_put(exp);
@@ -725,6 +749,7 @@ static inline int obd_cancel_unused(struct lustre_handle *conn,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, cancel_unused);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, cancel_unused);
 
         rc = OBP(exp->exp_obd, cancel_unused)(conn, ea, flags, opaque);
         class_export_put(exp);
@@ -740,6 +765,7 @@ static inline int obd_san_preprw(int cmd, struct lustre_handle *conn,
 
         OBD_CHECK_ACTIVE(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, preprw);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
 
         rc = OBP(exp->exp_obd, san_preprw)(cmd, conn, objcount, obj,
                                            niocount, remote);
index 74bb784..3d8188a 100644 (file)
@@ -91,11 +91,13 @@ struct filter_file_data {
 };
 
 struct filter_dentry_data {
-        obd_id           fdd_objid;
-        atomic_t         fdd_open_count;
-        int              fdd_flags;
+        obd_id                  fdd_objid;
+        __u32                   fdd_magic;
+        atomic_t                fdd_open_count;
+        int                     fdd_flags;
 };
 
+#define FILTER_DENTRY_MAGIC 0x9efba101
 #define FILTER_FLAG_DESTROY 0x0001      /* destroy dentry on last file close */
 
 
index 69a47dc..aecef05 100644 (file)
@@ -96,6 +96,7 @@ extern unsigned long obd_sync_filter;
 #define OBD_FAIL_OST_ALL_REPLY_NET       0x211
 #define OBD_FAIL_OST_ALL_REQUESTS_NET    0x212
 #define OBD_FAIL_OST_LDLM_REPLY_NET      0x213
+#define OBD_FAIL_OST_BRW_PAUSE_BULK      0x214
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
@@ -146,6 +147,19 @@ do {                                                                         \
         }                                                                    \
 } while(0)
 
+#define OBD_FAIL_TIMEOUT(id, secs)                                           \
+do {                                                                         \
+        if  (OBD_FAIL_CHECK_ONCE(id)) {                                      \
+                CERROR("obd_fail_timeout id %x sleeping for %ld secs\n",     \
+                       (id), (secs));                                        \
+                set_current_state(TASK_UNINTERRUPTIBLE);                     \
+                schedule_timeout((secs) * HZ);                               \
+                set_current_state(TASK_RUNNING);                             \
+                CERROR("obd_fail_timeout id %x awake\n",                     \
+                       (id));                                                \
+       }                                                                     \
+} while(0)
+
 #define fixme() CDEBUG(D_OTHER, "FIXME\n");
 
 #ifdef __KERNEL__
@@ -194,47 +208,39 @@ static inline void OBD_FAIL_WRITE(int id, kdev_t dev)
 #define LTIME_S(time) (time)
 #endif  /* __KERNEL__ */
 
-#define OBD_ALLOC(ptr, size)                                            \
-do {                                                                    \
-        void *lptr;                                                     \
-        int s = (size);                                                 \
-        (ptr) = lptr = kmalloc(s, GFP_KERNEL);                          \
-        if (lptr == NULL) {                                             \
-                CERROR("kmalloc of '" #ptr "' (%d bytes) failed "       \
-                       "at %s:%d\n", s, __FILE__, __LINE__);            \
-        } else {                                                        \
-                int obd_curmem;                                         \
-                memset(lptr, 0, s);                                     \
-                atomic_add(s, &obd_memory);                             \
-                obd_curmem = atomic_read(&obd_memory);                  \
-                if (obd_curmem > obd_memmax)                            \
-                        obd_memmax = obd_curmem;                        \
-                CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p "      \
-                       "(tot %d)\n", s, lptr, obd_curmem);              \
-        }                                                               \
+#define OBD_ALLOC(ptr, size)                                                  \
+do {                                                                          \
+        (ptr) = kmalloc(size, GFP_KERNEL);                                    \
+        if ((ptr) == NULL) {                                                  \
+                CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
+                       (int)(size), __FILE__, __LINE__);                      \
+        } else {                                                              \
+                memset(ptr, 0, size);                                         \
+                atomic_add(size, &obd_memory);                                \
+                if (atomic_read(&obd_memory) > obd_memmax)                    \
+                        obd_memmax = atomic_read(&obd_memory);                \
+                CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \
+                       (int)(size), ptr, atomic_read(&obd_memory));           \
+        }                                                                     \
 } while (0)
 
 #ifdef __arch_um__
 # define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size)
 #else
-# define OBD_VMALLOC(ptr, size)                                         \
-do {                                                                    \
-        void *lptr;                                                     \
-        int s = (size);                                                 \
-        (ptr) = lptr = vmalloc(s);                                      \
-        if (lptr == NULL) {                                             \
-                CERROR("vmalloc of '" #ptr "' (%d bytes) failed "       \
-                       "at %s:%d\n", s, __FILE__, __LINE__);            \
-        } else {                                                        \
-                int obd_curmem;                                         \
-                memset(lptr, 0, s);                                     \
-                atomic_add(s, &obd_memory);                             \
-                obd_curmem = atomic_read(&obd_memory);                  \
-                if (obd_curmem > obd_memmax)                            \
-                        obd_memmax = obd_curmem;                        \
-                CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p "      \
-                       "(tot %d)\n", s, lptr, obd_curmem);              \
-        }                                                               \
+# define OBD_VMALLOC(ptr, size)                                               \
+do {                                                                          \
+        (ptr) = vmalloc(size);                                                \
+        if ((ptr) == NULL) {                                                  \
+                CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
+                       (int)(size), __FILE__, __LINE__);                      \
+        } else {                                                              \
+                memset(ptr, 0, size);                                         \
+                atomic_add(size, &obd_memory);                                \
+                if (atomic_read(&obd_memory) > obd_memmax)                    \
+                        obd_memmax = atomic_read(&obd_memory);                \
+                CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \
+                       (int)(size), ptr, atomic_read(&obd_memory));           \
+        }                                                                     \
 } while (0)
 #endif
 
@@ -244,68 +250,58 @@ do {                                                                    \
 #define POISON(lptr, c, s) memset(lptr, c, s)
 #endif
 
-#define OBD_FREE(ptr, size)                                             \
-do {                                                                    \
-        void *lptr = (ptr);                                             \
-        int s = (size);                                                 \
-        LASSERT(lptr);                                                  \
-        POISON(lptr, 0x5a, s);                                          \
-        kfree(lptr);                                                    \
-        atomic_sub(s, &obd_memory);                                     \
-        CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n",     \
-               s, lptr, atomic_read(&obd_memory));                      \
-        (ptr) = (void *)0xdeadbeef;                                     \
+#define OBD_FREE(ptr, size)                                                   \
+do {                                                                          \
+        LASSERT(ptr);                                                         \
+        atomic_sub(size, &obd_memory);                                        \
+        CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n",           \
+               (int)(size), ptr, atomic_read(&obd_memory));                   \
+        POISON(ptr, 0x5a, size);                                              \
+        kfree(ptr);                                                           \
+        (ptr) = (void *)0xdeadbeef;                                           \
 } while (0)
 
 #ifdef __arch_um__
 # define OBD_VFREE(ptr, size) OBD_FREE(ptr, size)
 #else
-# define OBD_VFREE(ptr, size)                                           \
-do {                                                                    \
-        void *lptr = (ptr);                                             \
-        int s = (size);                                                 \
-        LASSERT(lptr);                                                  \
-        POISON(lptr, 0x5a, s);                                          \
-        vfree(lptr);                                                    \
-        atomic_sub(s, &obd_memory);                                     \
-        CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n",     \
-               s, lptr, atomic_read(&obd_memory));                      \
-        (ptr) = (void *)0xdeadbeef;                                     \
+# define OBD_VFREE(ptr, size)                                                 \
+do {                                                                          \
+        LASSERT(ptr);                                                         \
+        atomic_sub(size, &obd_memory);                                        \
+        CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n",           \
+               (int)(size), ptr, atomic_read(&obd_memory));                   \
+        POISON(ptr, 0x5a, size);                                              \
+        vfree(ptr);                                                           \
+        (ptr) = (void *)0xdeadbeef;                                           \
 } while (0)
 #endif
 
-#define OBD_SLAB_ALLOC(ptr, slab, type, size)                             \
-do {                                                                      \
-        long s = (size);                                                  \
-        void *lptr;                                                       \
-        LASSERT (!in_interrupt());                                        \
-        (ptr) = lptr = kmem_cache_alloc((slab), type);                    \
-        if (lptr == NULL) {                                               \
-                CERROR("slab-alloc of '" #ptr "' (%ld bytes) failed "     \
-                       "at %s:%d\n", s, __FILE__, __LINE__);              \
-        } else {                                                          \
-                int obd_curmem;                                           \
-                memset(lptr, 0, s);                                       \
-                atomic_add(s, &obd_memory);                               \
-                obd_curmem = atomic_read(&obd_memory);                    \
-                if (obd_curmem > obd_memmax)                              \
-                        obd_memmax = obd_curmem;                          \
-                CDEBUG(D_MALLOC, "slab-alloced '" #ptr "': %ld at %p "    \
-                       "(tot %d)\n", s, lptr, obd_curmem);                \
-        }                                                                 \
+#define OBD_SLAB_ALLOC(ptr, slab, type, size)                                 \
+do {                                                                          \
+        LASSERT (!in_interrupt());                                            \
+        (ptr) = kmem_cache_alloc(slab, type);                                 \
+        if ((ptr) == NULL) {                                                  \
+                CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \
+                       (int)(size), __FILE__, __LINE__);                      \
+        } else {                                                              \
+                memset(ptr, 0, size);                                         \
+                atomic_add(size, &obd_memory);                                \
+                if (atomic_read(&obd_memory) > obd_memmax)                    \
+                        obd_memmax = atomic_read(&obd_memory);                \
+                CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n",\
+                       (int)(size), ptr, atomic_read(&obd_memory));           \
+        }                                                                     \
 } while (0)
 
-#define OBD_SLAB_FREE(ptr, slab, size)                                    \
-do {                                                                      \
-        long s = (size);                                                  \
-        void *lptr = (ptr);                                               \
-        LASSERT(lptr);                                                    \
-        POISON(lptr, 0x5a, s);                                            \
-        CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %ld at %p (tot %d).\n",  \
-               s, lptr, atomic_read(&obd_memory));                        \
-        kmem_cache_free((slab), lptr);                                    \
-        atomic_sub(s, &obd_memory);                                       \
-        (ptr) = (void *)0xdeadbeef;                                       \
+#define OBD_SLAB_FREE(ptr, slab, size)                                        \
+do {                                                                          \
+        LASSERT(ptr);                                                         \
+        CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n",       \
+               (int)(size), ptr, atomic_read(&obd_memory));                   \
+        atomic_sub(size, &obd_memory);                                        \
+        POISON(ptr, 0x5a, size);                                              \
+        kmem_cache_free(slab, ptr);                                           \
+        (ptr) = (void *)0xdeadbeef;                                           \
 } while (0)
 
 #endif
index 6b9a348..e01feca 100644 (file)
@@ -1,8 +1,11 @@
- 0 files changed
+ fs/ext3/super.c            |  229 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h    |    2 
+ include/linux/ext3_fs_sb.h |   10 +
+ 3 files changed, 241 insertions(+)
 
---- linux-2.4.18-chaos52/fs/ext3/super.c~ext3-delete_thread-2.4.18     2003-06-01 03:24:13.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/super.c  2003-06-03 17:01:49.000000000 +0800
-@@ -398,6 +398,210 @@ static void dump_orphan_list(struct supe
+--- linux-2.4.18-18.8.0-l15/fs/ext3/super.c~ext3-delete_thread-2.4.18  Tue Jun  3 17:26:21 2003
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c    Wed Jun 18 11:59:14 2003
+@@ -396,6 +396,219 @@ static void dump_orphan_list(struct supe
        }
  }
  
 +
 +      INIT_LIST_HEAD(&sbi->s_delete_list);
 +      wake_up(&sbi->s_delete_waiter_queue);
-+      ext3_debug("EXT3-fs: delete thread on %s started\n",
-+             kdevname(sb->s_dev));
++      ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev));
 +
 +      /* main loop */
 +      for (;;) {
-+              sleep_on(&sbi->s_delete_thread_queue);
++              wait_event_interruptible(sbi->s_delete_thread_queue,
++                                       !list_empty(&sbi->s_delete_list) ||
++                                       !test_opt(sb, ASYNCDEL));
 +              ext3_debug("%s woken up: %lu inodes, %lu blocks\n",
 +                         tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks);
 +
 +              spin_lock(&sbi->s_delete_lock);
 +              if (list_empty(&sbi->s_delete_list)) {
++                      clear_opt(sbi->s_mount_opt, ASYNCDEL);
 +                      memset(&sbi->s_delete_list, 0,
 +                             sizeof(sbi->s_delete_list));
 +                      spin_unlock(&sbi->s_delete_lock);
-+                      ext3_debug("ext3 delete thread on %s exiting\n",
-+                             kdevname(sb->s_dev));
++                      ext3_debug("delete thread on %s exiting\n",
++                                 kdevname(sb->s_dev));
 +                      wake_up(&sbi->s_delete_waiter_queue);
 +                      break;
 +              }
 +                      sbi->s_delete_blocks -= blocks;
 +                      sbi->s_delete_inodes--;
 +              }
-+              if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0)
++              if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) {
 +                      ext3_warning(sb, __FUNCTION__,
 +                                   "%lu blocks, %lu inodes on list?\n",
 +                                   sbi->s_delete_blocks,sbi->s_delete_inodes);
-+              sbi->s_delete_blocks = 0;
-+              sbi->s_delete_inodes = 0;
++                      sbi->s_delete_blocks = 0;
++                      sbi->s_delete_inodes = 0;
++              }
 +              spin_unlock(&sbi->s_delete_lock);
 +              wake_up(&sbi->s_delete_waiter_queue);
 +      }
 +      int rc;
 +
 +      spin_lock_init(&sbi->s_delete_lock);
-+      memset(&sbi->s_delete_list, 0, sizeof(sbi->s_delete_list));
 +      init_waitqueue_head(&sbi->s_delete_thread_queue);
 +      init_waitqueue_head(&sbi->s_delete_waiter_queue);
-+      sbi->s_delete_blocks = 0;
-+      sbi->s_delete_inodes = 0;
++
++      if (!test_opt(sb, ASYNCDEL))
++              return;
 +
 +      rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
 +      if (rc < 0)
 +
 +static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
 +{
++      if (sbi->s_delete_list.next == 0)       /* thread never started */
++              return;
++
++      clear_opt(sbi->s_mount_opt, ASYNCDEL);
 +      wake_up(&sbi->s_delete_thread_queue);
 +      wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list));
 +}
 +              clear_inode(old_inode);
 +              return;
 +      }
-+      
-+      if (!test_opt (old_inode->i_sb, ASYNCDEL)) {
++
++      if (!test_opt(old_inode->i_sb, ASYNCDEL)) {
 +              ext3_delete_inode(old_inode);
 +              return;
 +      }
 +              return;
 +      }
 +
-+      if (EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) {
++      if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) ||
++          (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
 +              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
 +                         old_inode->i_ino, blocks);
 +              ext3_delete_inode(old_inode);
 +      }
 +      if (!new_inode) {
 +              up(&sbi->s_orphan_lock);
-+              ext3_debug(KERN_DEBUG "delete inode %lu directly (bad read)\n",
++              ext3_debug("delete inode %lu directly (bad read)\n",
 +                         old_inode->i_ino);
 +              ext3_delete_inode(old_inode);
 +              return;
 +
 +      clear_inode(old_inode);
 +
-+      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
-+                 new_inode->i_ino, blocks);
 +      spin_lock(&sbi->s_delete_lock);
 +      J_ASSERT(list_empty(&new_inode->i_dentry));
 +      list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
 +      sbi->s_delete_inodes++;
 +      spin_unlock(&sbi->s_delete_lock);
 +
++      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++                 new_inode->i_ino, blocks);
++
 +      wake_up(&sbi->s_delete_thread_queue);
 +}
 +#else
  void ext3_put_super (struct super_block * sb)
  {
        struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -405,6 +609,7 @@ void ext3_put_super (struct super_block 
+@@ -403,6 +615,7 @@ void ext3_put_super (struct super_block 
        kdev_t j_dev = sbi->s_journal->j_dev;
        int i;
  
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
        if (!(sb->s_flags & MS_RDONLY)) {
-@@ -453,7 +658,11 @@ static struct super_operations ext3_sops
+@@ -451,7 +664,11 @@ static struct super_operations ext3_sops
        write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
        dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
        put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
 +#endif
        put_super:      ext3_put_super,         /* BKL held */
        write_super:    ext3_write_super,       /* BKL held */
-       sync_fs:        ext3_sync_fs,
-@@ -514,6 +723,12 @@ static int parse_options (char * options
+       write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
+@@ -511,6 +728,14 @@ static int parse_options (char * options
             this_char = strtok (NULL, ",")) {
                if ((value = strchr (this_char, '=')) != NULL)
                        *value++ = 0;
 +#ifdef EXT3_DELETE_THREAD
 +              if (!strcmp(this_char, "asyncdel"))
 +                      set_opt(*mount_options, ASYNCDEL);
++              else if (!strcmp(this_char, "noasyncdel"))
++                      clear_opt(*mount_options, ASYNCDEL);
 +              else
 +#endif
 +
                if (!strcmp (this_char, "bsddf"))
                        clear_opt (*mount_options, MINIX_DF);
                else if (!strcmp (this_char, "nouid32")) {
-@@ -1209,6 +1424,7 @@ struct super_block * ext3_read_super (st
+@@ -1206,6 +1431,7 @@ struct super_block * ext3_read_super (st
        }
  
        ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
        /*
         * akpm: core read_super() calls in here with the superblock locked.
         * That deadlocks, because orphan cleanup needs to lock the superblock
---- linux-2.4.18-chaos52/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18     2003-06-01 03:24:11.000000000 +0800
-+++ linux-2.4.18-chaos52-root/include/linux/ext3_fs.h  2003-06-03 17:03:28.000000000 +0800
+@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s
+       if (!parse_options(data, &tmp, sbi, &tmp, 1))
+               return -EINVAL;
++      if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY))
++              ext3_stop_delete_thread(sbi);
++
+       if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
+               ext3_abort(sb, __FUNCTION__, "Abort forced by user");
+--- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18  Tue Jun  3 17:26:20 2003
++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h    Tue Jun 17 12:36:56 2003
 @@ -190,6 +190,7 @@ struct ext3_group_desc
   */
  #define EXT3_STATE_JDATA              0x00000001 /* journaled data exists */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
---- linux-2.4.18-chaos52/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18  2003-06-01 03:24:13.000000000 +0800
-+++ linux-2.4.18-chaos52-root/include/linux/ext3_fs_sb.h       2003-06-03 16:59:24.000000000 +0800
+--- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18       Tue Jun  3 17:26:21 2003
++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h Tue Jun 17 12:36:56 2003
 @@ -29,6 +29,8 @@
  
  #define EXT3_MAX_GROUP_LOADED 32
index be2723c..34c5158 100644 (file)
@@ -1,7 +1,7 @@
 diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 --- origin/fs/ext3/super.c     2003-05-04 17:23:52.000000000 +0400
 +++ linux/fs/ext3/super.c      2003-05-04 17:09:20.000000000 +0400
-@@ -398,6 +398,210 @@ static void dump_orphan_list(struct supe
+@@ -398,6 +398,219 @@ static void dump_orphan_list(struct supe
        }
  }
  
@@ -35,22 +35,24 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +
 +      INIT_LIST_HEAD(&sbi->s_delete_list);
 +      wake_up(&sbi->s_delete_waiter_queue);
-+      ext3_debug("EXT3-fs: delete thread on %s started\n",
-+             kdevname(sb->s_dev));
++      ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev));
 +
 +      /* main loop */
 +      for (;;) {
-+              sleep_on(&sbi->s_delete_thread_queue);
++              wait_event_interruptible(sbi->s_delete_thread_queue,
++                                       !list_empty(&sbi->s_delete_list) ||
++                                       !test_opt(sb, ASYNCDEL));
 +              ext3_debug("%s woken up: %lu inodes, %lu blocks\n",
 +                         tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks);
 +
 +              spin_lock(&sbi->s_delete_lock);
 +              if (list_empty(&sbi->s_delete_list)) {
++                      clear_opt(sbi->s_mount_opt, ASYNCDEL);
 +                      memset(&sbi->s_delete_list, 0,
 +                             sizeof(sbi->s_delete_list));
 +                      spin_unlock(&sbi->s_delete_lock);
-+                      ext3_debug("ext3 delete thread on %s exiting\n",
-+                             kdevname(sb->s_dev));
++                      ext3_debug("delete thread on %s exiting\n",
++                                 kdevname(sb->s_dev));
 +                      wake_up(&sbi->s_delete_waiter_queue);
 +                      break;
 +              }
@@ -72,12 +74,13 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +                      sbi->s_delete_blocks -= blocks;
 +                      sbi->s_delete_inodes--;
 +              }
-+              if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0)
++              if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) {
 +                      ext3_warning(sb, __FUNCTION__,
 +                                   "%lu blocks, %lu inodes on list?\n",
 +                                   sbi->s_delete_blocks,sbi->s_delete_inodes);
-+              sbi->s_delete_blocks = 0;
-+              sbi->s_delete_inodes = 0;
++                      sbi->s_delete_blocks = 0;
++                      sbi->s_delete_inodes = 0;
++              }
 +              spin_unlock(&sbi->s_delete_lock);
 +              wake_up(&sbi->s_delete_waiter_queue);
 +      }
@@ -91,11 +94,11 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +      int rc;
 +
 +      spin_lock_init(&sbi->s_delete_lock);
-+      memset(&sbi->s_delete_list, 0, sizeof(sbi->s_delete_list));
 +      init_waitqueue_head(&sbi->s_delete_thread_queue);
 +      init_waitqueue_head(&sbi->s_delete_waiter_queue);
-+      sbi->s_delete_blocks = 0;
-+      sbi->s_delete_inodes = 0;
++
++      if (!test_opt(sb, ASYNCDEL))
++              return;
 +
 +      rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
 +      if (rc < 0)
@@ -107,6 +110,10 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +
 +static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
 +{
++      if (sbi->s_delete_list.next == 0)       /* thread never started */
++              return;
++
++      clear_opt(sbi->s_mount_opt, ASYNCDEL);
 +      wake_up(&sbi->s_delete_thread_queue);
 +      wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list));
 +}
@@ -134,8 +141,8 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +              clear_inode(old_inode);
 +              return;
 +      }
-+      
-+      if (!test_opt (old_inode->i_sb, ASYNCDEL)) {
++
++      if (!test_opt(old_inode->i_sb, ASYNCDEL)) {
 +              ext3_delete_inode(old_inode);
 +              return;
 +      }
@@ -147,7 +154,8 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +              return;
 +      }
 +
-+      if (EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) {
++      if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) ||
++          (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
 +              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
 +                         old_inode->i_ino, blocks);
 +              ext3_delete_inode(old_inode);
@@ -173,7 +181,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +      }
 +      if (!new_inode) {
 +              up(&sbi->s_orphan_lock);
-+              ext3_debug(KERN_DEBUG "delete inode %lu directly (bad read)\n",
++              ext3_debug("delete inode %lu directly (bad read)\n",
 +                         old_inode->i_ino);
 +              ext3_delete_inode(old_inode);
 +              return;
@@ -193,8 +201,6 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +
 +      clear_inode(old_inode);
 +
-+      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
-+                 new_inode->i_ino, blocks);
 +      spin_lock(&sbi->s_delete_lock);
 +      J_ASSERT(list_empty(&new_inode->i_dentry));
 +      list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
@@ -202,6 +208,9 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
 +      sbi->s_delete_inodes++;
 +      spin_unlock(&sbi->s_delete_lock);
 +
++      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++                 new_inode->i_ino, blocks);
++
 +      wake_up(&sbi->s_delete_thread_queue);
 +}
 +#else
@@ -232,13 +241,15 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
        put_super:      ext3_put_super,         /* BKL held */
        write_super:    ext3_write_super,       /* BKL held */
        write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
-@@ -514,6 +725,11 @@ static int parse_options (char * options
+@@ -514,6 +725,13 @@ static int parse_options (char * options
             this_char = strtok (NULL, ",")) {
                if ((value = strchr (this_char, '=')) != NULL)
                        *value++ = 0;
 +#ifdef EXT3_DELETE_THREAD
 +              if (!strcmp(this_char, "asyncdel"))
 +                      set_opt(*mount_options, ASYNCDEL);
++              else if (!strcmp(this_char, "noasyncdel"))
++                      clear_opt(*mount_options, ASYNCDEL);
 +              else
 +#endif
  #ifdef CONFIG_EXT3_FS_XATTR_USER
@@ -252,6 +263,16 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
        /*
         * akpm: core read_super() calls in here with the superblock locked.
         * That deadlocks, because orphan cleanup needs to lock the superblock
+@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s
+       if (!parse_options(data, &tmp, sbi, &tmp, 1))
+               return -EINVAL;
++      if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY))
++              ext3_stop_delete_thread(sbi);
++
+       if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
+               ext3_abort(sb, __FUNCTION__, "Abort forced by user");
 diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h
 --- origin/include/linux/ext3_fs.h     2003-05-04 17:22:49.000000000 +0400
 +++ linux/include/linux/ext3_fs.h      2003-05-04 15:06:10.000000000 +0400
index d8dbdfb..6eabe85 100644 (file)
  #define EXT3_MOUNT_INDEX              0x4000  /* Enable directory index */
 +#define EXT3_MOUNT_IOPEN              0x8000  /* Allow access via iopen */
 +#define EXT3_MOUNT_IOPEN_NOPRIV               0x10000 /* Make iopen world-readable */
- #define EXT3_MOUNT_ASYNCDEL          0x20000  /* Delayed deletion */
+ #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 
index 1ceb276..b4e4cea 100644 (file)
@@ -6,7 +6,7 @@
 DEFS= 
 
 LDLMSOURCES= l_lock.c ldlm_lock.c ldlm_resource.c ldlm_lib.c \
-ldlm_extent.c ldlm_request.c ldlm_lockd.c
+ldlm_extent.c ldlm_request.c ldlm_lockd.c ldlm_internal.h
 
 if LIBLUSTRE
 lib_LIBRARIES = libldlm.a
index 62272fa..c5f8873 100644 (file)
@@ -612,7 +612,8 @@ void ldlm_grant_lock(struct ldlm_lock *lock, void *data, int datalen)
  * comment above ldlm_lock_match */
 static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
                                       struct ldlm_extent *extent,
-                                      struct ldlm_lock *old_lock, int flags)
+                                      struct ldlm_lock *old_lock, void *data,
+                                      int flags)
 {
         struct ldlm_lock *lock;
         struct list_head *tmp;
@@ -651,6 +652,9 @@ static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
                     !(lock->l_flags & LDLM_FL_LOCAL))
                         continue;
 
+                if ((flags & LDLM_FL_MATCH_DATA) && lock->l_data != data)
+                        continue;
+
                 ldlm_lock_addref_internal(lock, mode);
                 return lock;
         }
@@ -672,13 +676,16 @@ static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
  * If 'flags' contains LDLM_FL_CBPENDING, then locks that have been marked
  *     to be canceled can still be matched as long as they still have reader
  *     or writer refernces
+ * If 'flags' contains LDLM_FL_MATCH_DATA, then only match a lock if the opaque
+ *     data is the same.
  *
  * Returns 1 if it finds an already-existing lock that is compatible; in this
  * case, lockh is filled in with a addref()ed lock
  */
 int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
                     struct ldlm_res_id *res_id, __u32 type, void *cookie,
-                    int cookielen, ldlm_mode_t mode,struct lustre_handle *lockh)
+                    int cookielen, ldlm_mode_t mode, void *data,
+                    struct lustre_handle *lockh)
 {
         struct ldlm_resource *res;
         struct ldlm_lock *lock, *old_lock = NULL;
@@ -703,15 +710,18 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
 
         l_lock(&ns->ns_lock);
 
-        lock = search_queue(&res->lr_granted, mode, cookie, old_lock, flags);
+        lock = search_queue(&res->lr_granted, mode, cookie, old_lock, data,
+                            flags);
         if (lock != NULL)
                 GOTO(out, rc = 1);
         if (flags & LDLM_FL_BLOCK_GRANTED)
                 GOTO(out, rc = 0);
-        lock = search_queue(&res->lr_converting, mode, cookie, old_lock, flags);
+        lock = search_queue(&res->lr_converting, mode, cookie, old_lock, data,
+                            flags);
         if (lock != NULL)
                 GOTO(out, rc = 1);
-        lock = search_queue(&res->lr_waiting, mode, cookie, old_lock, flags);
+        lock = search_queue(&res->lr_waiting, mode, cookie, old_lock, data,
+                            flags);
         if (lock != NULL)
                 GOTO(out, rc = 1);
 
index 9d2857e..3f46618 100644 (file)
@@ -1080,7 +1080,6 @@ EXPORT_SYMBOL(ldlm_cli_convert);
 EXPORT_SYMBOL(ldlm_cli_enqueue);
 EXPORT_SYMBOL(ldlm_cli_cancel);
 EXPORT_SYMBOL(ldlm_cli_cancel_unused);
-EXPORT_SYMBOL(ldlm_match_or_enqueue);
 EXPORT_SYMBOL(ldlm_replay_locks);
 EXPORT_SYMBOL(ldlm_resource_foreach);
 EXPORT_SYMBOL(ldlm_namespace_foreach);
index e5d9c24..008adab 100644 (file)
@@ -60,6 +60,7 @@ int ldlm_expired_completion_wait(void *data)
 
 int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data)
 {
+        /* XXX ALLOCATE - 160 mytes */
         struct lock_wait_data lwd;
         unsigned long irqflags;
         struct obd_device *obd;
@@ -373,43 +374,6 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
         return rc;
 }
 
-int ldlm_match_or_enqueue(struct lustre_handle *connh,
-                          struct ptlrpc_request *req,
-                          struct ldlm_namespace *ns,
-                          struct lustre_handle *parent_lock_handle,
-                          struct ldlm_res_id res_id,
-                          __u32 type,
-                          void *cookie, int cookielen,
-                          ldlm_mode_t mode,
-                          int *flags,
-                          ldlm_completion_callback completion,
-                          ldlm_blocking_callback blocking,
-                          void *data,
-                          struct lustre_handle *lockh)
-{
-        int rc;
-        ENTRY;
-        if (connh == NULL) {
-                /* Just to make sure that I understand things --phil */
-                LASSERT(*flags & LDLM_FL_LOCAL_ONLY);
-        }
-
-        LDLM_DEBUG_NOLOCK("resource "LPU64"/"LPU64, res_id.name[0],
-                          res_id.name[1]);
-        rc = ldlm_lock_match(ns, *flags, &res_id, type, cookie, cookielen, mode,
-                             lockh);
-        if (rc == 0) {
-                rc = ldlm_cli_enqueue(connh, req, ns, parent_lock_handle,
-                                      res_id, type, cookie, cookielen, mode,
-                                      flags, completion, blocking, data,
-                                      lockh);
-                if (rc != ELDLM_OK)
-                        CERROR("ldlm_cli_enqueue: err: %d\n", rc);
-                RETURN(rc);
-        }
-        RETURN(0);
-}
-
 int ldlm_cli_replay_enqueue(struct ldlm_lock *lock)
 {
         struct lustre_handle lockh;
@@ -666,16 +630,20 @@ static int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
                 struct ldlm_lock *lock;
                 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
 
+                if (opaque != NULL && lock->l_data != opaque) {
+                        LDLM_ERROR(lock, "data %p doesn't match opaque %p res"
+                                  LPU64":"LPU64, lock->l_data, opaque,
+                                  res_id.name[0], res_id.name[1]);
+                        //LBUG();
+                        continue;
+                }
+
                 if (lock->l_readers || lock->l_writers) {
                         if (flags & LDLM_FL_WARN) {
                                 LDLM_ERROR(lock, "lock in use");
-                                LBUG();
+                                //LBUG();
                         }
-                }
-                if (opaque != NULL && lock->l_data != opaque) {
-                        LDLM_ERROR(lock, "data %p doesn't match opaque %p",
-                                   lock->l_data, opaque);
-                        LBUG();
+                        continue;
                 }
 
                 /* See CBPENDING comment in ldlm_cancel_lru */
index ddb9657..b6fc501 100644 (file)
@@ -11,6 +11,6 @@ EXTRA_PROGRAMS = llite
 
 llite_SOURCES = dcache.c commit_callback.c super.c rw.c iod.c super25.c
 llite_SOURCES += file.c dir.c sysctl.c symlink.c
-llite_SOURCES += namei.c lproc_llite.c
+llite_SOURCES += namei.c lproc_llite.c llite_internal.h
 
 include $(top_srcdir)/Rules
index 0c9fcf7..20924fc 100644 (file)
@@ -167,6 +167,7 @@ int ll_have_md_lock(struct dentry *de)
         struct lustre_handle lockh;
         struct ldlm_res_id res_id = { .name = {0} };
         struct obd_device *obddev;
+        int flags;
         ENTRY;
 
         if (!de->d_inode)
@@ -178,14 +179,15 @@ int ll_have_md_lock(struct dentry *de)
 
         CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
 
-        if (ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
-                            &res_id, LDLM_PLAIN, NULL, 0, LCK_PR, &lockh)) {
+        flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA;
+        if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_PLAIN,
+                            NULL, 0, LCK_PR, de->d_inode, &lockh)) {
                 ldlm_lock_decref(&lockh, LCK_PR);
                 RETURN(1);
         }
 
-        if (ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
-                            &res_id, LDLM_PLAIN, NULL, 0, LCK_PW, &lockh)) {
+        if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_PLAIN,
+                            NULL, 0, LCK_PW, de->d_inode, &lockh)) {
                 ldlm_lock_decref(&lockh, LCK_PW);
                 RETURN(1);
         }
@@ -217,9 +219,11 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
                 struct ldlm_res_id res_id =
                         { .name = {inode->i_ino, (__u64)inode->i_generation} };
                 struct lustre_handle lockh;
-                rc = ldlm_lock_match(obddev->obd_namespace,
-                                     LDLM_FL_BLOCK_GRANTED, &res_id,
-                                     LDLM_PLAIN, NULL, 0, LCK_PR, &lockh);
+                int flags;
+                flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA;
+                rc = ldlm_lock_match(obddev->obd_namespace, flags, &res_id,
+                                     LDLM_PLAIN, NULL, 0, LCK_PR, inode,
+                                     &lockh);
                 if (rc) {
                         de->d_flags &= ~DCACHE_LUSTRE_INVALID;
                         if (it && it->it_op == IT_GETATTR) {
@@ -232,9 +236,9 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
                         }
                         RETURN(1);
                 }
-                rc = ldlm_lock_match(obddev->obd_namespace,
-                                     LDLM_FL_BLOCK_GRANTED, &res_id,
-                                     LDLM_PLAIN, NULL, 0, LCK_PW, &lockh);
+                rc = ldlm_lock_match(obddev->obd_namespace, flags, &res_id,
+                                     LDLM_PLAIN, NULL, 0, LCK_PW, inode,
+                                     &lockh);
                 if (rc) {
                         de->d_flags &= ~DCACHE_LUSTRE_INVALID;
                         if (it && it->it_op == IT_GETATTR) {
index 8759598..2d5954d 100644 (file)
@@ -35,7 +35,7 @@
 #include <asm/uaccess.h>
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 #include <linux/locks.h>   // for wait_on_buffer
-#else 
+#else
 #include <linux/buffer_head.h>   // for wait_on_buffer
 #endif
 
@@ -117,7 +117,7 @@ static int ll_dir_readpage(struct file *file, struct page *page)
                 body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body));
                 LASSERT (body != NULL);         /* checked by mdc_readpage() */
                 LASSERT_REPSWABBED (request, 0); /* swabbed by mdc_readpage() */
-                
+
                 inode->i_size = body->size;
         }
         ptlrpc_req_finished(request);
@@ -770,6 +770,9 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino,
                inode->i_generation, inode, cmd);
 
+        if ((cmd & 0xffffff00) == ((int)'T') << 8) /* tty ioctls */
+                return -ENOTTY;
+
         switch(cmd) {
         case IOC_MDC_LOOKUP: {
                 struct ptlrpc_request *request = NULL;
@@ -803,9 +806,9 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 }
 
                 body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body));
-                LASSERT (body != NULL);         /* checked by mdc_getattr_name() */
-                LASSERT_REPSWABBED (request, 0); /* swabbed by mdc_getattr_name() */
-                
+                LASSERT(body != NULL);         /* checked by mdc_getattr_name */
+                LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */
+
                 /* surely there's a better way -phik */
                 data->ioc_obdo1.o_mode = body->mode;
                 data->ioc_obdo1.o_uid = body->uid;
index 3429b28..bd3fa7d 100644 (file)
@@ -132,6 +132,7 @@ int ll_file_release(struct inode *inode, struct file *file)
         if (inode->i_sb->s_root == file->f_dentry)
                 RETURN(0);
 
+        lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_RELEASE);
         fd = (struct ll_file_data *)file->private_data;
         if (!fd) /* no process opened the file after an mcreate */
                 RETURN(rc = 0);
@@ -345,6 +346,7 @@ int ll_file_open(struct inode *inode, struct file *file)
         if (inode->i_sb->s_root == file->f_dentry)
                 RETURN(0);
 
+        lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
         LL_GET_INTENT(file->f_dentry, it);
         rc = ll_it_open_error(IT_OPEN_OPEN, it);
         if (rc)
@@ -495,8 +497,8 @@ int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
                inode->i_ino, extent->start, extent->end);
 
         rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, extent,
-                         sizeof(extent), mode, &flags, ll_lock_callback,
-                         inode, sizeof(*inode), lockh);
+                         sizeof(extent), mode, &flags, ll_extent_lock_callback,
+                         inode, lockh);
 
         RETURN(rc);
 }
@@ -506,15 +508,13 @@ int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
  * the OST is returning the file size with each lock acquisition.
  */
 int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
-                   struct lov_stripe_md *lsm,
-                   int mode, struct ldlm_extent *extent,
-                   struct lustre_handle *lockh)
+                   struct lov_stripe_md *lsm, int mode,
+                   struct ldlm_extent *extent, struct lustre_handle *lockh)
 {
         struct ll_inode_info *lli = ll_i2info(inode);
         struct ldlm_extent size_lock;
         struct lustre_handle match_lockh = {0};
-        int flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED;
-        int rc, matched;
+        int flags, rc, matched;
         ENTRY;
 
         rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh);
@@ -534,9 +534,10 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
         size_lock.end = OBD_OBJECT_EOF;
 
         /* XXX I bet we should be checking the lock ignore flags.. */
+        flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA;
         matched = obd_match(&ll_i2sbi(inode)->ll_osc_conn, lsm, LDLM_EXTENT,
-                       &size_lock, sizeof(size_lock), LCK_PR, &flags,
-                       &match_lockh);
+                            &size_lock, sizeof(size_lock), LCK_PR, &flags,
+                            inode, &match_lockh);
 
         /* hey, alright, we hold a size lock that covers the size we
          * just found, its not going to change for a while.. */
@@ -756,8 +757,8 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
         EXIT;
 }
 
-int ll_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
-                     void *data, int flag)
+int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
+                            void *data, int flag)
 {
         struct inode *inode = data;
         struct ll_inode_info *lli = ll_i2info(inode);
@@ -811,6 +812,8 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
         if (count == 0)
                 RETURN(0);
 
+        lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_READ_BYTES,
+                            count);
         /* grab a -> eof extent to push extending writes out of node's caches
          * so we can see them at the getattr after lock acquisition.  this will
          * turn into a seperate [*ppos + count, EOF] 'size intent' lock attempt
@@ -916,6 +919,8 @@ ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
 
 out:
         /* XXX errors? */
+        lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_WRITE_BYTES,
+                            retval);
         ll_extent_unlock(fd, inode, lsm, LCK_PW, &lockh);
         RETURN(retval);
 }
@@ -983,6 +988,7 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
         if ((cmd & 0xffffff00) == ((int)'T') << 8) /* tty ioctls */
                 return -ENOTTY;
 
+        lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_IOCTL);
         switch(cmd) {
         case LL_IOC_GETFLAGS:
                 /* Get the current value of the file flags */
@@ -1034,6 +1040,7 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
                inode->i_generation, inode,
                offset + ((origin==2) ? inode->i_size : file->f_pos));
 
+        lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_LLSEEK);
         if (origin == 2) { /* SEEK_END */
                 ldlm_error_t err;
                 struct ldlm_extent extent = {0, OBD_OBJECT_EOF};
@@ -1071,6 +1078,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data)
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
                inode->i_generation, inode);
 
+        lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_FSYNC);
         /*
          * filemap_fdata{sync,wait} are also called at PW lock cancelation so
          * we know that they can only find data to writeback here if we are
@@ -1096,6 +1104,9 @@ int ll_inode_revalidate(struct dentry *dentry)
         }
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
                inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0))
+        lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_REVALIDATE);
+#endif
 
         /* this is very tricky.  it is unsafe to call ll_have_md_lock
            when we have a referenced lock: because it may cause an RPC
@@ -1160,7 +1171,7 @@ int ll_inode_revalidate(struct dentry *dentry)
                                 ptlrpc_req_finished(req);
                                 RETURN(rc);
                         }
-                        LASSERT(rc >= sizeof (*lsm));
+                        LASSERT(rc >= sizeof(*lsm));
                 }
 
                 ll_update_inode(inode, body, lsm);
@@ -1201,6 +1212,7 @@ static int ll_getattr(struct vfsmount *mnt, struct dentry *de,
         int res = 0;
         struct inode *inode = de->d_inode;
 
+        lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_GETATTR);
         res = ll_inode_revalidate(de);
         if (res)
                 return res;
index f88ed87..836a9aa 100644 (file)
@@ -174,10 +174,11 @@ static void ll_writeback(struct inode *inode, struct ll_writeback_pages *llwp)
          */
         if (rc) {
                 CERROR("error from obd_brw_async: rc = %d\n", rc);
-                INODE_IO_STAT_ADD(inode, wb_fail, llwp->npgs);
-        } else {
-                INODE_IO_STAT_ADD(inode, wb_ok, llwp->npgs);
-        }
+                lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
+                                    LPROC_LL_WB_FAIL, llwp->npgs);
+        } else
+                lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
+                                    LPROC_LL_WB_OK, (llwp->npgs));
 
         for (i = 0 ; i < llwp->npgs ; i++) {
                 struct page *page = llwp->pga[i].pg;
@@ -314,11 +315,12 @@ int ll_check_dirty(struct super_block *sb)
                         llwp.npgs = 0;
                         ll_get_dirty_pages(inode, &llwp);
                         if (llwp.npgs) {
-                                INODE_IO_STAT_ADD(inode, wb_from_pressure,
-                                                  llwp.npgs);
-                                ll_writeback(inode, &llwp);
-                                rc += llwp.npgs;
-                                making_progress = 1;
+                               lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
+                                                   LPROC_LL_WB_PRESSURE,
+                                                   llwp.npgs);
+                               ll_writeback(inode, &llwp);
+                               rc += llwp.npgs;
+                               making_progress = 1;
                         }
                 } while (llwp.npgs && should_writeback());
 
@@ -384,7 +386,8 @@ int ll_batch_writepage(struct inode *inode, struct page *page)
                 ll_get_dirty_pages(inode, &llwp);
 
         if (llwp.npgs) {
-                INODE_IO_STAT_ADD(inode, wb_from_writepage, llwp.npgs);
+                lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
+                                    LPROC_LL_WB_WRITEPAGE, llwp.npgs);
                 ll_writeback(inode, &llwp);
         }
 
@@ -461,7 +464,8 @@ static inline void lldo_dirty_add(struct inode *inode,
                                   long val)
 {
         lldo->do_num_dirty += val;
-        INODE_IO_STAT_ADD(inode, dirty_pages, val);
+        lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_DIRTY_PAGES,
+                            val);
 }
 
 void ll_record_dirty(struct inode *inode, unsigned long offset)
@@ -624,76 +628,3 @@ void ll_lldo_init(struct ll_dirty_offsets *lldo)
         lldo->do_num_dirty = 0;
         lldo->do_root.rb_node = NULL;
 }
-
-/* seq file export of some page cache tracking stats */
-static int ll_pgcache_seq_show(struct seq_file *seq, void *v)
-{
-        struct timeval now;
-        struct ll_sb_info *sbi = seq->private;
-        do_gettimeofday(&now);
-
-        seq_printf(seq, "snapshot_time:            %lu:%lu (secs:usecs)\n",
-                   now.tv_sec, now.tv_usec);
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        seq_printf(seq, "VM_under_pressure:        %s\n",
-                   should_writeback() ? "yes" : "no");
-#endif        
-        seq_printf(seq, "dirty_pages:              "LPU64"\n",
-                   sbi->ll_iostats.fis_dirty_pages);
-        seq_printf(seq, "dirty_page_hits:          "LPU64"\n",
-                   sbi->ll_iostats.fis_dirty_hits);
-        seq_printf(seq, "dirty_page_misses:        "LPU64"\n",
-                   sbi->ll_iostats.fis_dirty_misses);
-        seq_printf(seq, "writeback_from_writepage: "LPU64"\n",
-                   sbi->ll_iostats.fis_wb_from_writepage);
-        seq_printf(seq, "writeback_from_pressure:  "LPU64"\n",
-                   sbi->ll_iostats.fis_wb_from_pressure);
-        seq_printf(seq, "writeback_ok_pages:       "LPU64"\n",
-                   sbi->ll_iostats.fis_wb_ok);
-        seq_printf(seq, "writeback_failed_pages:   "LPU64"\n",
-                   sbi->ll_iostats.fis_wb_fail);
-        return 0;
-}
-
-static void *ll_pgcache_seq_start(struct seq_file *p, loff_t *pos)
-{
-        if (*pos == 0)
-                return (void *)1;
-        return NULL;
-}
-static void *ll_pgcache_seq_next(struct seq_file *p, void *v, loff_t *pos)
-{
-        ++*pos;
-        return NULL;
-}
-static void ll_pgcache_seq_stop(struct seq_file *p, void *v)
-{
-}
-
-struct seq_operations ll_pgcache_seq_sops = {
-        .start = ll_pgcache_seq_start,
-        .stop = ll_pgcache_seq_stop,
-        .next = ll_pgcache_seq_next,
-        .show = ll_pgcache_seq_show,
-};
-
-static int ll_pgcache_seq_open(struct inode *inode, struct file *file)
-{
-        struct proc_dir_entry *dp = inode->u.generic_ip;
-        struct seq_file *seq;
-        int rc;
-
-        rc = seq_open(file, &ll_pgcache_seq_sops);
-        if (rc)
-                return rc;
-        seq = file->private_data;
-        seq->private = dp->data;
-        return 0;
-}
-
-struct file_operations ll_pgcache_seq_fops = {
-        .open    = ll_pgcache_seq_open,
-        .read    = seq_read,
-        .llseek  = seq_lseek,
-        .release = seq_release,
-};
index 59cec1f..14eac3f 100644 (file)
@@ -36,6 +36,7 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
 {
         return 0;
 }
+void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi){}
 #else
 
 #define LPROC_LLITE_STAT_FCT(fct_name, get_statfs_fct)                    \
@@ -100,16 +101,69 @@ struct lprocfs_vars lprocfs_obd_vars[] = {
 };
 
 #define MAX_STRING_SIZE 128
+
+struct llite_file_opcode {
+        __u32       opcode;
+        __u32       type;
+        const char *opname;
+} llite_opcode_table[LPROC_LL_FILE_OPCODES] = {
+        /* file operation */
+        { LPROC_LL_DIRTY_PAGES,    LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
+                                   "dirty_pages" },
+        { LPROC_LL_DIRTY_HITS,     LPROCFS_TYPE_REGS, "dirty_pages_hits" },
+        { LPROC_LL_DIRTY_MISSES,   LPROCFS_TYPE_REGS, "dirty_pages_misses" },
+        { LPROC_LL_WB_WRITEPAGE,   LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
+                                   "writeback_from_writepage" },
+        { LPROC_LL_WB_PRESSURE,    LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
+                                   "writeback_from_pressure" },
+        { LPROC_LL_WB_OK,          LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
+                                   "writeback_ok_pages" },
+        { LPROC_LL_WB_FAIL,        LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
+                                   "writeback_failed_pages" },
+        { LPROC_LL_READ_BYTES,     LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
+                                   "read_bytes" },
+        { LPROC_LL_WRITE_BYTES,    LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
+                                   "write_bytes" },
+        { LPROC_LL_BRW_READ,       LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
+                                   "brw_read" },
+        { LPROC_LL_BRW_WRITE,      LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
+                                   "brw_write" },
+
+        { LPROC_LL_IOCTL,          LPROCFS_TYPE_REGS, "ioctl" },
+        { LPROC_LL_OPEN,           LPROCFS_TYPE_REGS, "open" },
+        { LPROC_LL_RELEASE,        LPROCFS_TYPE_REGS, "close" },
+        { LPROC_LL_MAP,            LPROCFS_TYPE_REGS, "mmap" },
+        { LPROC_LL_LLSEEK,         LPROCFS_TYPE_REGS, "seek" },
+        { LPROC_LL_FSYNC,          LPROCFS_TYPE_REGS, "fsync" },
+        /* inode operation */
+        { LPROC_LL_SETATTR_RAW,    LPROCFS_TYPE_REGS, "setattr_raw" },
+        { LPROC_LL_SETATTR,        LPROCFS_TYPE_REGS, "setattr" },
+        { LPROC_LL_TRUNC,          LPROCFS_TYPE_REGS, "punch" },
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+        { LPROC_LL_GETATTR,        LPROCFS_TYPE_REGS, "getattr" },
+#else
+        { LPROC_LL_REVALIDATE,     LPROCFS_TYPE_REGS, "getattr" },
+#endif
+        /* special inode operation */
+        { LPROC_LL_STAFS,          LPROCFS_TYPE_REGS, "statfs" },
+        { LPROC_LL_ALLOC_INODE,    LPROCFS_TYPE_REGS, "alloc_inode" },
+        { LPROC_LL_DIRECT_READ,    LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
+                                   "direct_read" },
+        { LPROC_LL_DIRECT_WRITE,   LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
+                                   "direct_write" },
+
+};
+
 int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
                                 struct super_block *sb, char *osc, char *mdc)
 {
         struct lprocfs_vars lvars[2];
         struct ll_sb_info *sbi = ll_s2sbi(sb);
         struct obd_device *obd;
-        struct proc_dir_entry *entry;
         char name[MAX_STRING_SIZE + 1];
         struct obd_uuid uuid;
-        int err;
+        int err, id;
+        struct lprocfs_stats *svc_stats = NULL;
         ENTRY;
 
         memset(lvars, 0, sizeof(lvars));
@@ -131,17 +185,41 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
                 sbi->ll_proc_root = NULL;
                 RETURN(err);
         }
+
+        svc_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES);
+        if (svc_stats == NULL) {
+                err = -ENOMEM;
+                goto out;
+        }
+        /* do counter init */
+        for (id = 0; id < LPROC_LL_FILE_OPCODES; id++) {
+                __u32 type = llite_opcode_table[id].type;
+                void *ptr = NULL;
+                if (type & LPROCFS_TYPE_REGS)
+                        ptr = "regs";
+                else {
+                        if (type & LPROCFS_TYPE_BYTES)
+                                ptr = "bytes";
+                        else {
+                                if (type & LPROCFS_TYPE_PAGES)
+                                        ptr = "pages";
+                        }
+                }
+                lprocfs_counter_init(svc_stats, llite_opcode_table[id].opcode,
+                                     (type & LPROCFS_CNTR_AVGMINMAX),
+                                     llite_opcode_table[id].opname, ptr);
+        }
+        err = lprocfs_register_stats(sbi->ll_proc_root, "stats", svc_stats);
+        if (err)
+                goto out;
+        else
+                sbi->ll_stats = svc_stats;
+        /* need place to keep svc_stats */
+
         /* Static configuration info */
         err = lprocfs_add_vars(sbi->ll_proc_root, lprocfs_obd_vars, sb);
         if (err)
-                RETURN(err);
-
-        /* llite page cache stats */
-        entry = create_proc_entry("pgcache", 0444, sbi->ll_proc_root);
-        if (entry == NULL)
-                RETURN(-ENOMEM);
-        entry->proc_fops = &ll_pgcache_seq_fops;
-        entry->data = sbi;
+                goto out;
 
         /* MDC info */
         strncpy(uuid.uuid, mdc, sizeof(uuid.uuid));
@@ -156,13 +234,13 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
         lvars[0].read_fptr = lprocfs_rd_name;
         err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
         if (err)
-                RETURN(err);
+                goto out;
 
         snprintf(name, MAX_STRING_SIZE, "%s/uuid", obd->obd_type->typ_name);
         lvars[0].read_fptr = lprocfs_rd_uuid;
         err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
-        if (err < 0)
-                RETURN(err);
+        if (err)
+                goto out;
 
         /* OSC */
         strncpy(uuid.uuid, osc, sizeof(uuid.uuid));
@@ -177,14 +255,32 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
         lvars[0].read_fptr = lprocfs_rd_name;
         err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
         if (err)
-                RETURN(err);
+                goto out;
 
         snprintf(name, MAX_STRING_SIZE, "%s/uuid", obd->obd_type->typ_name);
         lvars[0].read_fptr = lprocfs_rd_uuid;
         err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
-
+out:
+        if (err) {
+                if (svc_stats)
+                        lprocfs_free_stats(svc_stats);
+                if (sbi->ll_proc_root)
+                        lprocfs_remove(sbi->ll_proc_root);
+        }
         RETURN(err);
 }
 
+void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi)
+{
+        if (sbi->ll_proc_root) {
+                struct proc_dir_entry *file_stats =
+                        lprocfs_srch(sbi->ll_proc_root, "stats");
+
+                if (file_stats) {
+                        lprocfs_free_stats(sbi->ll_stats);
+                        lprocfs_remove(file_stats);
+                }
+        }
+}
 #undef MAX_STRING_SIZE
 #endif /* LPROCFS */
index 5e37d55..c14fd61 100644 (file)
@@ -217,9 +217,6 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock,
                 struct inode *inode = lock->l_data;
                 LASSERT(inode != NULL);
 
-                //if (inode->i_state & I_FREEING)
-                //        break;
-
                 if (S_ISDIR(inode->i_mode)) {
                         CDEBUG(D_INODE, "invalidating inode %lu\n",
                                inode->i_ino);
@@ -227,6 +224,7 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock,
                         ll_invalidate_inode_pages(inode);
                 }
 
+#warning FIXME: we should probably free this inode if there are no aliases
                 if (inode->i_sb->s_root &&
                     inode != inode->i_sb->s_root->d_inode)
                         d_unhash_aliases(inode);
@@ -375,7 +373,7 @@ int ll_intent_lock(struct inode *parent, struct dentry **de,
 
                 /*We were called from revalidate2: did we find the same inode?*/
                 if (inode && (ino != inode->i_ino ||
-                   mds_body->fid1.generation != inode->i_generation)) {
+                    mds_body->fid1.generation != inode->i_generation)) {
                         it->it_disposition |= IT_ENQ_COMPLETE;
                         RETURN(-ESTALE);
                 }
index cd1fa90..af90d66 100644 (file)
@@ -118,6 +118,12 @@ static int ll_brw(int cmd, struct inode *inode, struct page *page, int flags)
 
         pg.flag = flags;
 
+        if (cmd == OBD_BRW_WRITE)
+                lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
+                                    LPROC_LL_BRW_WRITE, pg.count);
+        else
+                lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
+                                    LPROC_LL_BRW_READ, pg.count);
         rc = obd_brw(cmd, ll_i2obdconn(inode), lsm, 1, &pg, NULL);
         if (rc)
                 CERROR("error from obd_brw: rc = %d\n", rc);
@@ -446,9 +452,11 @@ static int ll_commit_write(struct file *file, struct page *page,
         /* mark the page dirty, put it on mapping->dirty,
          * mark the inode PAGES_DIRTY, put it on sb->dirty */
         if (!PageDirty(page))
-                INODE_IO_STAT_ADD(inode, dirty_misses, 1);
+                lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
+                                     LPROC_LL_DIRTY_MISSES);
         else
-                INODE_IO_STAT_ADD(inode, dirty_hits, 1);
+                lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
+                                     LPROC_LL_DIRTY_HITS);
 
         size = (((obd_off)page->index) << PAGE_SHIFT) + to;
         if (size > inode->i_size)
@@ -531,6 +539,12 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
                 }
         }
 
+        if (rw == WRITE)
+                lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
+                                    LPROC_LL_DIRECT_WRITE, iobuf->length);
+        else
+                lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
+                                    LPROC_LL_DIRECT_READ, iobuf->length);
         rc = obd_brw_async(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
                            ll_i2obdconn(inode), lsm, iobuf->nr_pages, pga, set,
                            NULL);
index 66563c7..aef3c06 100644 (file)
@@ -43,6 +43,7 @@ struct super_operations ll_super_operations;
 /* /proc/lustre/llite root that tracks llite mount points */
 struct proc_dir_entry *proc_lustre_fs_root = NULL;
 /* lproc_llite.c */
+extern void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
 extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
                                        struct super_block *sb,
                                        char *osc, char *mdc);
@@ -141,7 +142,6 @@ static struct super_block *ll_read_super(struct super_block *sb,
         INIT_LIST_HEAD(&sbi->ll_conn_chain);
         INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
         generate_random_uuid(uuid);
-        spin_lock_init(&sbi->ll_iostats.fis_lock);
         class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
 
         sb->u.generic_sbp = sbi;
@@ -266,6 +266,7 @@ out_osc:
 out_mdc:
         obd_disconnect(&sbi->ll_mdc_conn, 0);
 out_free:
+        lprocfs_unregister_mountpoint(sbi);
         OBD_FREE(sbi, sizeof(*sbi));
 
         goto out_dev;
@@ -293,6 +294,7 @@ static void ll_put_super(struct super_block *sb)
         if (!obd->obd_no_recov)
                 mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
 
+        lprocfs_unregister_mountpoint(sbi);
         if (sbi->ll_proc_root) {
                 lprocfs_remove(sbi->ll_proc_root);
                 sbi->ll_proc_root = NULL;
@@ -585,6 +587,7 @@ int ll_setattr(struct dentry *de, struct iattr *attr)
         CDEBUG(D_VFSTRACE, "VFS Op:name=%s\n", de->d_name.name);
         if (rc)
                 return rc;
+        lprocfs_counter_incr(ll_i2sbi(de->d_inode)->ll_stats, LPROC_LL_SETATTR);
 
         return ll_inode_setattr(de->d_inode, attr, 1);
 }
@@ -597,6 +600,7 @@ static int ll_statfs(struct super_block *sb, struct statfs *sfs)
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op:\n");
+        lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_STAFS);
         memset(sfs, 0, sizeof(*sfs));
         rc = obd_statfs(&sbi->ll_mdc_conn, &osfs);
         statfs_unpack(sfs, &osfs);
index 680c47f..e942736 100644 (file)
@@ -271,6 +271,7 @@ out_osc:
 out_mdc:
         obd_disconnect(&sbi->ll_mdc_conn, 0);
 out_free:
+        lprocfs_unregister_mountpoint(sbi);
         OBD_FREE(sbi, sizeof(*sbi));
 
         goto out_dev;
@@ -286,6 +287,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
 
+        LPROC_COUNTER_INODE_INCBY1(inode, LPROC_LL_SETATTR);
         if ((attr->ia_valid & ATTR_SIZE)) {
                 /* writeback uses inode->i_size to determine how far out
                  * its cached pages go.  ll_truncate gets a PW lock, canceling
@@ -368,6 +370,7 @@ static void ll_put_super(struct super_block *sb)
          */
         mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
 
+        lprocfs_unregister_mountpoint(sbi);
         if (sbi->ll_proc_root) {
                 lprocfs_remove(sbi->ll_proc_root);
         sbi->ll_proc_root = NULL;
@@ -562,6 +565,7 @@ int ll_setattr(struct dentry *de, struct iattr *attr)
         if (rc)
                 return rc;
 
+        LPROC_COUNTER_INODE_INCBY1((de->d_inode), LPROC_LL_SETATTR);
         return ll_inode_setattr(de->d_inode, attr, 1);
 }
 
@@ -573,6 +577,7 @@ static int ll_statfs(struct super_block *sb, struct statfs *sfs)
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:\n");
 
+        LPROC_COUNTER_SBI_INCBY1(sbi, LPROC_LL_STAFS);
         memset(sfs, 0, sizeof(*sfs));
         rc = obd_statfs(&sbi->ll_mdc_conn, &osfs);
         statfs_unpack(sfs, &osfs);
@@ -745,6 +750,7 @@ static kmem_cache_t *ll_inode_cachep;
 static struct inode *ll_alloc_inode(struct super_block *sb)
 {
         struct ll_inode_info *lli;
+        LPROC_COUNTER_SBI_INCBY1((ll_s2sbi(sb)), LL_ALLOC_INODE);
         OBD_SLAB_ALLOC(lli, ll_inode_cachep, SLAB_KERNEL, sizeof *lli);
         if (lli == NULL)
                 return NULL;
index 1a4f6c4..87c3fb9 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/lustre_mds.h>
 #include <linux/obd_class.h>
 #include <linux/obd_lov.h>
+#include <linux/seq_file.h>
 #include <linux/lprocfs_status.h>
 
 struct lov_file_handles {
@@ -169,9 +170,22 @@ static void lov_llh_destroy(struct lov_lock_handles *llh)
 int lov_attach(struct obd_device *dev, obd_count len, void *data)
 {
         struct lprocfs_static_vars lvars;
+        struct proc_dir_entry *entry;
+        int rc;
 
         lprocfs_init_vars(&lvars);
-        return lprocfs_obd_attach(dev, lvars.obd_vars);
+        rc = lprocfs_obd_attach(dev, lvars.obd_vars);
+        if (rc) 
+                return rc;
+
+        entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry);
+        if (entry == NULL) 
+                RETURN(-ENOMEM);
+        entry->proc_fops = &ll_proc_target_fops;
+        entry->data = dev;
+        
+        return rc;
+        
 }
 
 int lov_detach(struct obd_device *dev)
@@ -645,9 +659,9 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
                                                "err %d\n", err);
                                         err = -EIO;
                                 }
-                                if (!rc)
-                                        rc = err;
                         }
+                        if (!rc)
+                                rc = err;
                         continue;
                 }
                 loi->loi_id = tmp->o_id;
@@ -663,13 +677,15 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
                 ++loi;
 
                 /* If we have allocated enough objects, we are OK */
-                if (obj_alloc == lsm->lsm_stripe_count) {
-                        rc = 0;
-                        GOTO(out_done, rc);
-                }
+                if (obj_alloc == lsm->lsm_stripe_count)
+                        GOTO(out_done, rc = 0);
         }
 
         if (*ea != NULL) {
+                CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n",
+                       lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count,rc);
+                if (rc == 0)
+                        rc = -EFBIG;
                 GOTO(out_cleanup, rc);
         } else {
                 struct lov_stripe_md *lsm_new;
@@ -687,6 +703,8 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
                 /* XXX LOV STACKING call into osc for sizes */
                 OBD_FREE(lsm, lov_stripe_md_size(lsm->lsm_stripe_count));
                 lsm = lsm_new;
+
+                rc = 0;
         }
  out_done:
         *ea = lsm;
@@ -1700,7 +1718,7 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn,
 static int lov_enqueue(struct lustre_handle *conn, struct lov_stripe_md *lsm,
                        struct lustre_handle *parent_lock,
                        __u32 type, void *cookie, int cookielen, __u32 mode,
-                       int *flags, void *cb, void *data, int datalen,
+                       int *flags, void *cb, void *data,
                        struct lustre_handle *lockh)
 {
         struct obd_export *export = class_conn2export(conn);
@@ -1764,7 +1782,7 @@ static int lov_enqueue(struct lustre_handle *conn, struct lov_stripe_md *lsm,
                 *flags = 0;
                 rc = obd_enqueue(&(lov->tgts[loi->loi_ost_idx].conn), &submd,
                                   parent_lock, type, &sub_ext, sizeof(sub_ext),
-                                  mode, flags, cb, data, datalen, lov_lockhp);
+                                  mode, flags, cb, data, lov_lockhp);
 
                 // XXX add a lock debug statement here
                 if (rc != ELDLM_OK) {
@@ -1812,8 +1830,8 @@ static int lov_enqueue(struct lustre_handle *conn, struct lov_stripe_md *lsm,
 }
 
 static int lov_match(struct lustre_handle *conn, struct lov_stripe_md *lsm,
-                       __u32 type, void *cookie, int cookielen, __u32 mode,
-                       int *flags, struct lustre_handle *lockh)
+                     __u32 type, void *cookie, int cookielen, __u32 mode,
+                     int *flags, void *data, struct lustre_handle *lockh)
 {
         struct obd_export *export = class_conn2export(conn);
         struct lov_lock_handles *lov_lockh = NULL;
@@ -1874,7 +1892,7 @@ static int lov_match(struct lustre_handle *conn, struct lov_stripe_md *lsm,
                 /* XXX submd is not fully initialized here */
                 rc = obd_match(&(lov->tgts[loi->loi_ost_idx].conn), &submd,
                                type, &sub_ext, sizeof(sub_ext), mode,
-                               &lov_flags, lov_lockhp);
+                               &lov_flags, data, lov_lockhp);
                 if (rc != 1)
                         break;
         }
index 630148a..e0b3adb 100644 (file)
@@ -27,6 +27,7 @@
 #endif
 #include <linux/lprocfs_status.h>
 #include <linux/obd_class.h>
+#include <linux/seq_file.h>
 
 #ifndef LPROCFS
 struct lprocfs_vars lprocfs_module_vars[] = { {0} };
@@ -113,42 +114,70 @@ int rd_activeobd(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, "%u\n", desc->ld_active_tgt_count);
 }
 
-int rd_target(char *page, char **start, off_t off, int count, int *eof,
-              void *data)
+int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data)
 {
         struct obd_device *dev = (struct obd_device*) data;
-        int len = 0, i;
         struct lov_obd *lov;
-        struct lov_tgt_desc *tgts;
-        
+
         LASSERT(dev != NULL);
         lov = &dev->u.lov;
-        tgts = lov->tgts;
-        LASSERT(tgts != NULL);
+        *eof = 1;
+        return snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid.uuid);
+}
 
-        for (i = 0; i < lov->desc.ld_tgt_count; i++, tgts++) {
-                int cur;
-                cur = snprintf(&page[len], count, "%d: %s %sACTIVE\n",
-                                i, tgts->uuid.uuid, tgts->active ? "" : "IN");
-                len += cur;
-                count -= cur;
-        }
+static void *ll_tgt_seq_start(struct seq_file *p, loff_t *pos)
+{
+        struct obd_device *dev = p->private;
+        struct lov_obd *lov = &dev->u.lov;
+
+        return (*pos >= lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]);
 
-        *eof = 1;
-        return len;
 }
+static void ll_tgt_seq_stop(struct seq_file *p, void *v)
+{
 
-int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data)
+}
+
+static void *ll_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
 {
-        struct obd_device *dev = (struct obd_device*) data;
-        struct lov_obd *lov;
+        struct obd_device *dev = p->private;
+        struct lov_obd *lov = &dev->u.lov;
 
-        LASSERT(dev != NULL);
-        lov = &dev->u.lov;
-        *eof = 1;
-        return snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid.uuid);
+        ++*pos;
+        return (*pos >=lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]);
+}
+
+static int ll_tgt_seq_show(struct seq_file *p, void *v)
+{
+        struct lov_tgt_desc *tgt = v;
+        struct obd_device *dev = p->private;
+        struct lov_obd *lov = &dev->u.lov;
+        int idx = tgt - &(lov->tgts[0]);
+        return seq_printf(p, "%d: %s %sACTIVE\n", idx+1, tgt->uuid.uuid,
+                          tgt->active ? "" : "IN");
 }
 
+struct seq_operations ll_tgt_sops = {
+        .start = ll_tgt_seq_start,
+        .stop = ll_tgt_seq_stop,
+        .next = ll_tgt_seq_next,
+        .show = ll_tgt_seq_show,
+};
+
+static int ll_target_seq_open(struct inode *inode, struct file *file)
+{
+        struct proc_dir_entry *dp = inode->u.generic_ip;
+        struct seq_file *seq;
+        int rc = seq_open(file, &ll_tgt_sops);
+
+        if (rc)
+                return rc;
+
+        seq = file->private_data;
+        seq->private = dp->data;
+
+        return 0;
+}
 struct lprocfs_vars lprocfs_obd_vars[] = {
         { "uuid",         lprocfs_rd_uuid, 0, 0 },
         { "stripesize",   rd_stripesize,   0, 0 },
@@ -163,7 +192,6 @@ struct lprocfs_vars lprocfs_obd_vars[] = {
         { "blocksize",    rd_blksize,      0, 0 },
         { "kbytestotal",  rd_kbytestotal,  0, 0 },
         { "kbytesfree",   rd_kbytesfree,   0, 0 },
-        { "target_obd",   rd_target,       0, 0 },
         { "target_mdc",   rd_mdc,          0, 0 },
         { 0 }
 };
@@ -173,5 +201,12 @@ struct lprocfs_vars lprocfs_module_vars[] = {
         { 0 }
 };
 
+struct file_operations ll_proc_target_fops = {
+        .open = ll_target_seq_open,
+        .read = seq_read,
+        .llseek = seq_lseek,
+        .release = seq_release,
+};
+
 #endif /* LPROCFS */
 LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
index dfcd7af..dc90885 100644 (file)
@@ -130,14 +130,14 @@ int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
                 CERROR ("rcp failed\n");
                 GOTO (failed, rc);
         }
-        
+
         desc = lustre_swab_repbuf (req, 0, sizeof (*desc),
                                    lustre_swab_lov_desc);
         if (desc == NULL) {
                 CERROR ("Can't unpack lov_desc\n");
                 GOTO (failed, rc = -EPROTO);
         }
-        
+
         LASSERT_REPSWAB (req, 1);
         /* array of uuids byte-sex insensitive; just verify they are all
          * there and terminated */
@@ -150,7 +150,7 @@ int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
 
         for (i = 0; i < desc->ld_tgt_count; i++) {
                 int uid_len = strnlen (uuids[i].uuid, sizeof (uuids[i].uuid));
-                
+
                 if (uid_len == sizeof (uuids[i].uuid)) {
                         CERROR ("Unterminated uuid %d:%*s\n",
                                 i, (int)sizeof (uuids[i].uuid), uuids[i].uuid);
@@ -169,7 +169,7 @@ int mdc_getattr_common (struct lustre_handle *conn,
 {
         struct mds_body *body;
         void            *eadata;
-        int              rc; 
+        int              rc;
         int              size[2] = {sizeof(*body), 0};
         int              bufcount = 1;
         ENTRY;
@@ -188,7 +188,7 @@ int mdc_getattr_common (struct lustre_handle *conn,
         mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
         if (rc != 0)
                 RETURN (rc);
-        
+
         body = lustre_swab_repbuf (req, 0, sizeof (*body),
                                    lustre_swab_mds_body);
         if (body == NULL) {
@@ -210,7 +210,7 @@ int mdc_getattr_common (struct lustre_handle *conn,
 
         RETURN (0);
 }
-                        
+
 int mdc_getattr(struct lustre_handle *conn, struct ll_fid *fid,
                 unsigned long valid, unsigned int ea_size,
                 struct ptlrpc_request **request)
@@ -289,7 +289,7 @@ void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
 
         LASSERT (rec != NULL);
         LASSERT (body != NULL);
-        
+
         memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
         DEBUG_REQ(D_HA, req, "storing generation %x for ino "LPD64,
                   rec->cr_replayfid.generation, rec->cr_replayfid.id);
@@ -451,8 +451,10 @@ int mdc_enqueue(struct lustre_handle *conn,
                 LDLM_DEBUG(lock, "matching against this");
 
                 memcpy(&lockh2, lockh, sizeof(lockh2));
-                if (ldlm_lock_match(NULL, LDLM_FL_BLOCK_GRANTED, NULL,
-                                    LDLM_PLAIN, NULL, 0, LCK_NL, &lockh2)) {
+                if (ldlm_lock_match(NULL,
+                                    LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA,
+                                    NULL, LDLM_PLAIN, NULL, 0, LCK_NL, cb_data,
+                                    &lockh2)) {
                         /* We already have a lock; cancel the new one */
                         ldlm_lock_decref_and_cancel(lockh, lock_mode);
                         memcpy(lockh, &lockh2, sizeof(lockh2));
@@ -463,7 +465,7 @@ int mdc_enqueue(struct lustre_handle *conn,
         dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep));
         LASSERT (dlm_rep != NULL);           /* checked by ldlm_cli_enqueue() */
         LASSERT_REPSWABBED (req, 0);         /* swabbed by ldlm_cli_enqueue() */
-        
+
         it->it_disposition = (int) dlm_rep->lock_policy_res1;
         it->it_status = (int) dlm_rep->lock_policy_res2;
         it->it_lock_mode = lock_mode;
@@ -485,7 +487,7 @@ int mdc_enqueue(struct lustre_handle *conn,
                         /* The eadata is opaque; just check that it is
                          * there.  Eventually, obd_unpackmd() will check
                          * the contents */
-                        eadata = lustre_swab_repbuf (req, 2, body->eadatasize, 
+                        eadata = lustre_swab_repbuf (req, 2, body->eadatasize,
                                                      NULL);
                         if (eadata == NULL) {
                                 CERROR ("Missing/short eadata\n");
@@ -493,7 +495,7 @@ int mdc_enqueue(struct lustre_handle *conn,
                         }
                 }
         }
-        
+
         RETURN(rc);
 }
 
@@ -507,7 +509,7 @@ static void mdc_replay_open(struct ptlrpc_request *req)
         body = lustre_swab_repbuf (req, 1, sizeof (*body),
                                    lustre_swab_mds_body);
         LASSERT (body != NULL);
-        
+
         memcpy(&old, file_fh, sizeof(old));
         CDEBUG(D_HA, "updating handle from "LPD64" to "LPD64"\n",
                file_fh->cookie, body->handle.cookie);
@@ -675,7 +677,7 @@ static int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
                 CERROR ("Can't unpack obd_statfs\n");
                 GOTO (out, rc = -EPROTO);
         }
-        
+
         memcpy (osfs, msfs, sizeof (*msfs));
         EXIT;
 out:
index 259a6bc..3c2aa89 100644 (file)
@@ -1477,6 +1477,18 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
         if (IS_ERR(obddev->obd_fsops))
                 RETURN(rc = PTR_ERR(obddev->obd_fsops));
 
+
+        if (data->ioc_inllen3 > 0 && data->ioc_inlbuf3) {
+                if (*data->ioc_inlbuf3 == '/') {
+                        CERROR("mds namespace mount: %s\n", 
+                               data->ioc_inlbuf3);
+//                        mds->mds_nspath = strdup(ioc->inlbuf4);
+                } else {
+                        CERROR("namespace mount must be absolute path: '%s'\n",
+                               data->ioc_inlbuf3);
+                }
+        }
+
        if (!(page = __get_free_page(GFP_KERNEL)))
                return -ENOMEM;
 
index 823a7a6..50949dd 100644 (file)
@@ -290,7 +290,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
         if (rc)
                 GOTO(cleanup, rc);
 
-        rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr);
+        rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr, 0);
         if (rc == 0 &&
             S_ISREG(inode->i_mode) &&
             rec->ur_eadata != NULL) {
@@ -494,7 +494,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                                inode->i_ino, inode->i_generation);
                 }
 
-                rc = fsfilt_setattr(obd, dchild, handle, &iattr);
+                rc = fsfilt_setattr(obd, dchild, handle, &iattr, 0);
                 if (rc) {
                         CERROR("error on setattr: rc = %d\n", rc);
                         /* XXX should we abort here in case of error? */
@@ -715,9 +715,15 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
          * (bug 72) */
         switch (rec->ur_mode & S_IFMT) {
         case S_IFDIR:
+                /* Drop any lingering child directories before we start our
+                 * transaction, to avoid doing multiple inode dirty/delete
+                 * in our compound transaction (bug 1321).
+                 */
+                shrink_dcache_parent(dchild);
                 handle = fsfilt_start(obd, dir_inode, FSFILT_OP_RMDIR);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
+                cleanup_phase = 4;
                 rc = vfs_rmdir(dir_inode, dchild);
                 break;
         case S_IFREG:
@@ -740,21 +746,24 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
                 handle = fsfilt_start(obd, dir_inode, FSFILT_OP_UNLINK);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
+                cleanup_phase = 4;
                 rc = vfs_unlink(dir_inode, dchild);
                 break;
         default:
-                CERROR("bad file type %o unlinking %s\n", rec->ur_mode, rec->ur_name);
+                CERROR("bad file type %o unlinking %s\n", rec->ur_mode,
+                       rec->ur_name);
                 LBUG();
                 GOTO(cleanup, rc = -EINVAL);
         }
 
  cleanup:
-        rc = mds_finish_transno(mds, dir_inode, handle, req, rc, 0);
-        if (rc && body) {
-                /* Don't unlink the OST objects if the MDS unlink failed */
-                body->valid = 0;
-        }
         switch(cleanup_phase) {
+            case 4:
+                rc = mds_finish_transno(mds, dir_inode, handle, req, rc, 0);
+                if (rc && body) {
+                        /* Don't unlink the OST objects if the MDS unlink failed */
+                        body->valid = 0;
+                }
             case 3: /* child lock */
                 if (rc != 0 || return_lock == 0)
                         ldlm_lock_decref(child_lockh, LCK_EX);
index 1e180a8..9619861 100644 (file)
@@ -73,6 +73,9 @@ int obd_memmax;
 
 /* Root for /proc/lustre */
 struct proc_dir_entry *proc_lustre_root = NULL;
+int obd_proc_read_version(char *page, char **start, off_t off, int count, int *eof, void *data);
+struct lprocfs_vars lprocfs_version[] = {{"version", obd_proc_read_version, NULL, NULL },{NULL,NULL,NULL,NULL}};
+int proc_version;
 
 /* The following are visible and mutable through /proc/sys/lustre/. */
 unsigned long obd_fail_loc;
@@ -244,7 +247,8 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
         case OBD_IOC_DEVICE: {
                 CDEBUG(D_IOCTL, "\n");
                 if (data->ioc_dev >= MAX_OBD_DEVICES || data->ioc_dev < 0) {
-                        CERROR("OBD ioctl: DEVICE insufficient devices\n");
+                        CERROR("OBD ioctl: DEVICE invalid device %d\n",
+                               data->ioc_dev);
                         GOTO(out, err = -EINVAL);
                 }
                 CDEBUG(D_IOCTL, "device %d\n", data->ioc_dev);
@@ -851,12 +855,23 @@ int init_obdclass(void)
         proc_lustre_root = proc_mkdir("lustre", proc_root_fs);
         if (!proc_lustre_root)
                 printk(KERN_ERR "error registering /proc/fs/lustre\n");
+        proc_version = lprocfs_add_vars(proc_lustre_root,lprocfs_version,NULL);
 #else
         proc_lustre_root = NULL;
+        proc_version = -1;
 #endif
         return 0;
 }
 
+#ifdef LPROCFS
+int obd_proc_read_version(char *page, char **start, off_t off, int count, int *eof, void *data) {
+        *eof = 1;
+        return snprintf(page, count, "%s\n", BUILD_VERSION);
+}
+#else
+int obd_proc_read_version(char *page, char **start, off_t off, int count, int *eof, void *data) { return 0; }
+#endif
+
 #ifdef __KERNEL__
 static void __exit cleanup_obdclass(void)
 #else
index a02f1f5..5f6322f 100644 (file)
@@ -124,7 +124,7 @@ static void *fsfilt_ext3_start(struct inode *inode, int op)
  * objcount inode blocks
  * 1 superblock
  * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quota files
- * 
+ *
  * 1 EXT3_DATA_TRANS_BLOCKS for the last_rcvd update.
  */
 static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso)
@@ -155,7 +155,7 @@ static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso)
                 ngdblocks = EXT3_SB(sb)->s_gdb_count;
 
         needed += nbitmaps + ngdblocks;
-        
+
         /* last_rcvd update */
         needed += EXT3_DATA_TRANS_BLOCKS;
 
@@ -238,7 +238,7 @@ static int fsfilt_ext3_commit(struct inode *inode, void *h, int force_sync)
 }
 
 static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle,
-                               struct iattr *iattr)
+                               struct iattr *iattr, int do_trunc)
 {
         struct inode *inode = dentry->d_inode;
         int rc;
@@ -251,11 +251,7 @@ static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle,
          * zero all the time (which doesn't invoke block truncate at unlink
          * time), so we assert we never change the MDS file size from zero.
          */
-        if (iattr->ia_valid & ATTR_SIZE) {
-                CERROR("hmm, setting %*s file size to %lld\n",
-                       dentry->d_name.len, dentry->d_name.name, iattr->ia_size);
-                LASSERT(iattr->ia_size == 0);
-#if 0
+        if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
                 /* ATTR_SIZE would invoke truncate: clear it */
                 iattr->ia_valid &= ~ATTR_SIZE;
                 inode->i_size = iattr->ia_size;
@@ -267,7 +263,6 @@ static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle,
                         iattr->ia_valid |= ATTR_MODE;
                         iattr->ia_mode = inode->i_mode;
                 }
-#endif
         }
         if (inode->i_op->setattr)
                 rc = inode->i_op->setattr(dentry, iattr);
index ddec807..1fba0f4 100644 (file)
@@ -238,7 +238,7 @@ static int fsfilt_extN_commit(struct inode *inode, void *h, int force_sync)
 }
 
 static int fsfilt_extN_setattr(struct dentry *dentry, void *handle,
-                               struct iattr *iattr)
+                               struct iattr *iattr, int do_trunc)
 {
         struct inode *inode = dentry->d_inode;
         int rc;
@@ -251,11 +251,7 @@ static int fsfilt_extN_setattr(struct dentry *dentry, void *handle,
          * zero all the time (which doesn't invoke block truncate at unlink
          * time), so we assert we never change the MDS file size from zero.
          */
-        if (iattr->ia_valid & ATTR_SIZE) {
-                CERROR("hmm, setting %*s file size to %lld\n",
-                       dentry->d_name.len, dentry->d_name.name, iattr->ia_size);
-                LASSERT(iattr->ia_size == 0);
-#if 0
+        if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
                 /* ATTR_SIZE would invoke truncate: clear it */
                 iattr->ia_valid &= ~ATTR_SIZE;
                 inode->i_size = iattr->ia_size;
@@ -267,7 +263,6 @@ static int fsfilt_extN_setattr(struct dentry *dentry, void *handle,
                         iattr->ia_valid |= ATTR_MODE;
                         iattr->ia_mode = inode->i_mode;
                 }
-#endif
         }
         if (inode->i_op->setattr)
                 rc = inode->i_op->setattr(dentry, iattr);
index 2aba0f1..ccefb92 100644 (file)
@@ -71,7 +71,7 @@ static int fsfilt_reiserfs_commit(struct inode *inode, void *handle,
 }
 
 static int fsfilt_reiserfs_setattr(struct dentry *dentry, void *handle,
-                               struct iattr *iattr)
+                               struct iattr *iattr, int do_trunc)
 {
         struct inode *inode = dentry->d_inode;
         int rc;
@@ -84,11 +84,7 @@ static int fsfilt_reiserfs_setattr(struct dentry *dentry, void *handle,
          * zero all the time (which doesn't invoke block truncate at unlink
          * time), so we assert we never change the MDS file size from zero.
          */
-        if (iattr->ia_valid & ATTR_SIZE) {
-                CERROR("hmm, setting %*s file size to %llu\n",
-                       dentry->d_name.len, dentry->d_name.name, iattr->ia_size);
-                LASSERT(iattr->ia_size == 0);
-#if 0
+        if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
                 /* ATTR_SIZE would invoke truncate: clear it */
                 iattr->ia_valid &= ~ATTR_SIZE;
                 inode->i_size = iattr->ia_size;
@@ -100,7 +96,6 @@ static int fsfilt_reiserfs_setattr(struct dentry *dentry, void *handle,
                         iattr->ia_valid |= ATTR_MODE;
                         iattr->ia_mode = inode->i_mode;
                 }
-#endif
         }
         if (inode->i_op->setattr)
                 rc = inode->i_op->setattr(dentry, iattr);
index 2984e9c..8ec50d8 100644 (file)
@@ -320,80 +320,83 @@ int lprocfs_obd_detach(struct obd_device *dev)
         return 0;
 }
 
-struct lprocfs_counters* lprocfs_alloc_counters(unsigned int num)
+struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num)
 {
-        struct lprocfs_counters* cntrs;
-        int csize;
+        struct lprocfs_stats *stats;
+        struct lprocfs_percpu *percpu;
+        unsigned int percpusize;
+        unsigned int i;
+
         if (num == 0)
                 return NULL;
 
-        csize = offsetof(struct lprocfs_counters, cntr[num]);
-        OBD_ALLOC(cntrs, csize);
-        if (cntrs != NULL) {
-                cntrs->num = num;
+        OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus]));
+        if (stats == NULL)
+                return NULL;
+
+        percpusize = L1_CACHE_ALIGN(offsetof(typeof(*percpu), lp_cntr[num]));
+        stats->ls_percpu_size = smp_num_cpus * percpusize;
+        OBD_ALLOC(stats->ls_percpu[0], stats->ls_percpu_size);
+        if (stats->ls_percpu[0] == NULL) {
+                OBD_FREE(stats, offsetof(typeof(*stats),
+                                         ls_percpu[smp_num_cpus]));
+                return NULL;
         }
-        return cntrs;
+
+        stats->ls_num = num;
+        for (i = 1; i < smp_num_cpus; i++)
+                stats->ls_percpu[i] = (void *)(stats->ls_percpu[i - 1]) +
+                        percpusize;
+
+        return stats;
 }
 
-void lprocfs_free_counters(struct lprocfs_counters* cntrs)
+void lprocfs_free_stats(struct lprocfs_stats *stats)
 {
-        if (cntrs != NULL) {
-                int csize = offsetof(struct lprocfs_counters, cntr[cntrs->num]);                OBD_FREE(cntrs, csize);
-        }
+        if (stats->ls_num == 0)
+                return;
+
+        OBD_FREE(stats->ls_percpu[0], stats->ls_percpu_size);
+        OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus]));
 }
 
 /* Reset counter under lock */
 int lprocfs_counter_write(struct file *file, const char *buffer,
                           unsigned long count, void *data)
 {
-        struct lprocfs_counters *cntrs = (struct lprocfs_counters*) data;
-        unsigned int i;
-        LASSERT(cntrs != NULL);
-
-        for (i = 0; i < cntrs->num; i++) {
-                struct lprocfs_counter *cntr = &(cntrs->cntr[i]);
-                spinlock_t *lock = (cntr->config & LPROCFS_CNTR_EXTERNALLOCK) ?
-                        cntr->l.external : &cntr->l.internal;
-
-                spin_lock(lock);
-                cntr->count     = 0;
-                cntr->sum       = 0;
-                cntr->min       = (~(__u64)0);
-                cntr->max       = 0;
-                cntr->sumsquare = 0;
-                spin_unlock(lock);
-        }
+        /* not supported */
         return 0;
 }
 
-static void *lprocfs_counters_seq_start(struct seq_file *p, loff_t *pos)
+static void *lprocfs_stats_seq_start(struct seq_file *p, loff_t *pos)
 {
-        struct lprocfs_counters *cntrs = p->private;
-        return (*pos >= cntrs->num) ? NULL : (void*) &cntrs->cntr[*pos];
+        struct lprocfs_stats *stats = p->private;
+        /* return 1st cpu location */
+        return (*pos >= stats->ls_num) ? NULL :
+                &(stats->ls_percpu[0]->lp_cntr[*pos]);
 }
 
-static void lprocfs_counters_seq_stop(struct seq_file *p, void *v)
+static void lprocfs_stats_seq_stop(struct seq_file *p, void *v)
 {
 }
 
-static void *lprocfs_counters_seq_next(struct seq_file *p, void *v,
-                                       loff_t *pos)
+static void *lprocfs_stats_seq_next(struct seq_file *p, void *v, loff_t *pos)
 {
-        struct lprocfs_counters *cntrs = p->private;
+        struct lprocfs_stats *stats = p->private;
         ++*pos;
-        return (*pos >= cntrs->num) ? NULL : (void*) &(cntrs->cntr[*pos]);
+        return (*pos >= stats->ls_num) ? NULL :
+                &(stats->ls_percpu[0]->lp_cntr[*pos]);
 }
 
 /* seq file export of one lprocfs counter */
-static int lprocfs_counters_seq_show(struct seq_file *p, void *v)
+static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
 {
-       struct lprocfs_counters *cntrs = p->private;
+       struct lprocfs_stats *stats = p->private;
        struct lprocfs_counter  *cntr = v;
-       spinlock_t              *lock;
-       struct lprocfs_counter  c;
-       int rc = 0;
+       struct lprocfs_counter  t, ret = { .lc_min = ~(__u64)0 };
+       int i, idx, rc;
 
-       if (cntr == &(cntrs->cntr[0])) {
+       if (cntr == &(stats->ls_percpu[0])->lp_cntr[0]) {
                struct timeval now;
                do_gettimeofday(&now);
                rc = seq_printf(p, "%-25s %lu.%lu secs.usecs\n",
@@ -401,31 +404,42 @@ static int lprocfs_counters_seq_show(struct seq_file *p, void *v)
                if (rc < 0)
                        return rc;
        }
+       idx = cntr - &(stats->ls_percpu[0])->lp_cntr[0];
+
+       for (i = 0; i < smp_num_cpus; i++) {
+               struct lprocfs_counter *percpu_cntr =
+                       &(stats->ls_percpu[i])->lp_cntr[idx];
+               int centry;
+               do {
+                        centry = atomic_read(&percpu_cntr->lc_cntl.la_entry);
+                        t.lc_count = percpu_cntr->lc_count;
+                        t.lc_sum = percpu_cntr->lc_sum;
+                        t.lc_min = percpu_cntr->lc_min;
+                        t.lc_max = percpu_cntr->lc_max;
+                        t.lc_sumsquare = percpu_cntr->lc_sumsquare;
+               } while (centry != atomic_read(&percpu_cntr->lc_cntl.la_entry) &&
+                        centry != atomic_read(&percpu_cntr->lc_cntl.la_exit));
+               ret.lc_count += t.lc_count;
+               ret.lc_sum += t.lc_sum;
+               if (t.lc_min < ret.lc_min)
+                       ret.lc_min = t.lc_min;
+               if (t.lc_max > ret.lc_max)
+                       ret.lc_max = t.lc_max;
+               ret.lc_sumsquare += t.lc_sumsquare;
+       }
 
-       /* Take a snapshot of the counter under lock */
-       lock = (cntr->config & LPROCFS_CNTR_EXTERNALLOCK) ?
-               cntr->l.external : &cntr->l.internal;
-       spin_lock(lock);
-
-       c.count = cntr->count;
-       c.sum = cntr->sum;
-       c.min = cntr->min;
-       c.max = cntr->max;
-       c.sumsquare = cntr->sumsquare;
-
-       spin_unlock(lock);
-
-       rc = seq_printf(p, "%-25s "LPU64" samples [%s]", cntr->name, c.count,
-                       cntr->units);
+       rc = seq_printf(p, "%-25s "LPU64" samples [%s]", cntr->lc_name,
+                       ret.lc_count, cntr->lc_units);
        if (rc < 0)
                goto out;
 
-       if ((cntr->config & LPROCFS_CNTR_AVGMINMAX) && (c.count > 0)) {
-               rc = seq_printf(p, " "LPU64" "LPU64" "LPU64, c.min,c.max,c.sum);
+       if ((cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) && (ret.lc_count > 0)) {
+               rc = seq_printf(p, " "LPU64" "LPU64" "LPU64,
+                               ret.lc_min, ret.lc_max, ret.lc_sum);
                if (rc < 0)
                        goto out;
-               if (cntr->config & LPROCFS_CNTR_STDDEV)
-                       rc = seq_printf(p, " "LPU64, c.sumsquare);
+               if (cntr->lc_config & LPROCFS_CNTR_STDDEV)
+                       rc = seq_printf(p, " "LPU64, ret.lc_sumsquare);
                if (rc < 0)
                        goto out;
        }
@@ -434,20 +448,20 @@ static int lprocfs_counters_seq_show(struct seq_file *p, void *v)
        return (rc < 0) ? rc : 0;
 }
 
-struct seq_operations lprocfs_counters_seq_sops = {
-        .start = lprocfs_counters_seq_start,
-        .stop = lprocfs_counters_seq_stop,
-        .next = lprocfs_counters_seq_next,
-        .show = lprocfs_counters_seq_show,
+struct seq_operations lprocfs_stats_seq_sops = {
+        .start = lprocfs_stats_seq_start,
+        .stop = lprocfs_stats_seq_stop,
+        .next = lprocfs_stats_seq_next,
+        .show = lprocfs_stats_seq_show,
 };
 
-static int lprocfs_counters_seq_open(struct inode *inode, struct file *file)
+static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
 {
         struct proc_dir_entry *dp = inode->u.generic_ip;
         struct seq_file *seq;
         int rc;
 
-        rc = seq_open(file, &lprocfs_counters_seq_sops);
+        rc = seq_open(file, &lprocfs_stats_seq_sops);
         if (rc)
                 return rc;
         seq = file->private_data;
@@ -455,15 +469,15 @@ static int lprocfs_counters_seq_open(struct inode *inode, struct file *file)
         return 0;
 }
 
-struct file_operations lprocfs_counters_seq_fops = {
-        .open    = lprocfs_counters_seq_open,
+struct file_operations lprocfs_stats_seq_fops = {
+        .open    = lprocfs_stats_seq_open,
         .read    = seq_read,
         .llseek  = seq_lseek,
         .release = seq_release,
 };
 
-int lprocfs_register_counters(struct proc_dir_entry *root, const char* name,
-                              struct lprocfs_counters *cntrs)
+int lprocfs_register_stats(struct proc_dir_entry *root, const char* name,
+                           struct lprocfs_stats *stats)
 {
         struct proc_dir_entry *entry;
         LASSERT(root != NULL);
@@ -471,112 +485,130 @@ int lprocfs_register_counters(struct proc_dir_entry *root, const char* name,
         entry = create_proc_entry(name, 0444, root);
         if (entry == NULL)
                 return -ENOMEM;
-        entry->proc_fops = &lprocfs_counters_seq_fops;
-        entry->data = (void*) cntrs;
+        entry->proc_fops = &lprocfs_stats_seq_fops;
+        entry->data = (void *)stats;
         entry->write_proc = lprocfs_counter_write;
         return 0;
 }
 
-#define LPROCFS_OBD_OP_INIT(base, cntrs, op)                               \
+void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
+                          unsigned conf, const char *name, const char *units)
+{
+        struct lprocfs_counter *c;
+        int i;
+
+        LASSERT(stats != NULL);
+        for (i = 0; i < smp_num_cpus; i++) {
+                c = &(stats->ls_percpu[i]->lp_cntr[index]);
+                c->lc_config = conf;
+                c->lc_min = ~(__u64)0;
+                c->lc_name = name;
+                c->lc_units = units;
+        }
+}
+EXPORT_SYMBOL(lprocfs_counter_init);
+
+#define LPROCFS_OBD_OP_INIT(base, stats, op)                               \
 do {                                                                       \
         unsigned int coffset = base + OBD_COUNTER_OFFSET(op);              \
-        LASSERT(coffset < cntrs->num);                                     \
-        LPROCFS_COUNTER_INIT(&cntrs->cntr[coffset], 0, NULL, #op, "reqs"); \
+        LASSERT(coffset < stats->ls_num);                                     \
+        lprocfs_counter_init(stats, coffset, 0, #op, "reqs");              \
 } while (0)
 
-
-int lprocfs_alloc_obd_counters(struct obd_device *obddev,
-                               unsigned int num_private_counters)
+int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
 {
-        struct lprocfs_counters* obdops_cntrs;
-        unsigned int num_counters;
+        struct lprocfs_stats *stats;
+        unsigned int num_stats;
         int rc, i;
 
-        LASSERT(obddev->counters == NULL);
-        LASSERT(obddev->obd_proc_entry != NULL);
-        LASSERT(obddev->cntr_base == 0);
+        LASSERT(obd->obd_stats == NULL);
+        LASSERT(obd->obd_proc_entry != NULL);
+        LASSERT(obd->obd_cntr_base == 0);
 
-        num_counters = 1 + OBD_COUNTER_OFFSET(san_preprw)+num_private_counters;
-        obdops_cntrs = lprocfs_alloc_counters(num_counters);
-        if (!obdops_cntrs)
+        num_stats = 1 + OBD_COUNTER_OFFSET(destroy_export) +
+                num_private_stats;
+        stats = lprocfs_alloc_stats(num_stats);
+        if (!stats)
                 return -ENOMEM;
 
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, iocontrol);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, get_info);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, set_info);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, attach);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, detach);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, setup);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, cleanup);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, connect);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, disconnect);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, statfs);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, syncfs);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, packmd);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, unpackmd);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, preallocate);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, create);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, destroy);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, setattr);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, getattr);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, getattr_async);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, open);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, close);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, brw);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, brw_async);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, punch);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, sync);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, migrate);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, copy);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, iterate);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, preprw);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, commitrw);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, enqueue);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, match);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, cancel);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, cancel_unused);
-        LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, san_preprw);
-
-        for (i = num_private_counters; i < num_counters; i++) {
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, iocontrol);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, get_info);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_info);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, attach);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, detach);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, setup);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, cleanup);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, connect);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, disconnect);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, statfs);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, syncfs);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, packmd);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpackmd);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, preallocate);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, create);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, setattr);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr_async);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, open);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, close);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw_async);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, punch);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, sync);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, migrate);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, copy);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, iterate);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, preprw);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, commitrw);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, enqueue);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, match);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, san_preprw);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export);
+
+        for (i = num_private_stats; i < num_stats; i++) {
                 /* If this assertion failed, it is likely that an obd
                  * operation was added to struct obd_ops in
                  * <linux/obd.h>, and that the corresponding line item
                  * LPROCFS_OBD_OP_INIT(.., .., opname)
                  * is missing from the list above. */
-                LASSERT(obdops_cntrs->cntr[i].name != NULL);
+                LASSERT(&(stats->ls_percpu[0])->lp_cntr[i].lc_name != NULL);
         }
-        rc = lprocfs_register_counters(obddev->obd_proc_entry, "obd_stats",
-                                       obdops_cntrs);
+        rc = lprocfs_register_stats(obd->obd_proc_entry, "stats", stats);
         if (rc < 0) {
-                lprocfs_free_counters(obdops_cntrs);
+                lprocfs_free_stats(stats);
         } else {
-                obddev->counters  = obdops_cntrs;
-                obddev->cntr_base = num_private_counters;
+                obd->obd_stats  = stats;
+                obd->obd_cntr_base = num_private_stats;
         }
         return rc;
 }
 
-void lprocfs_free_obd_counters(struct obd_device *obddev)
+void lprocfs_free_obd_stats(struct obd_device *obd)
 {
-        struct lprocfs_counters* obdops_cntrs = obddev->counters;
-        if (obdops_cntrs != NULL) {
-                obddev->counters = NULL;
-                lprocfs_free_counters(obdops_cntrs);
+        struct lprocfs_stats *stats = obd->obd_stats;
+
+        if (stats != NULL) {
+                obd->obd_stats = NULL;
+                lprocfs_free_stats(stats);
         }
 }
 
 #endif /* LPROCFS*/
 
 EXPORT_SYMBOL(lprocfs_register);
+EXPORT_SYMBOL(lprocfs_srch);
 EXPORT_SYMBOL(lprocfs_remove);
 EXPORT_SYMBOL(lprocfs_add_vars);
 EXPORT_SYMBOL(lprocfs_obd_attach);
 EXPORT_SYMBOL(lprocfs_obd_detach);
-EXPORT_SYMBOL(lprocfs_alloc_counters);
-EXPORT_SYMBOL(lprocfs_free_counters);
-EXPORT_SYMBOL(lprocfs_register_counters);
-EXPORT_SYMBOL(lprocfs_alloc_obd_counters);
-EXPORT_SYMBOL(lprocfs_free_obd_counters);
+EXPORT_SYMBOL(lprocfs_alloc_stats);
+EXPORT_SYMBOL(lprocfs_free_stats);
+EXPORT_SYMBOL(lprocfs_register_stats);
+EXPORT_SYMBOL(lprocfs_alloc_obd_stats);
+EXPORT_SYMBOL(lprocfs_free_obd_stats);
 
 EXPORT_SYMBOL(lprocfs_rd_u64);
 EXPORT_SYMBOL(lprocfs_rd_uuid);
index 1eaa282..603a166 100644 (file)
 #define ECHO_OBJECT0_NPAGES  16
 static struct page *echo_object0_pages[ECHO_OBJECT0_NPAGES];
 
-/* should be generic per-obd stats... */
-struct xprocfs_io_stat {
-        __u64    st_read_bytes;
-        __u64    st_read_reqs;
-        __u64    st_write_bytes;
-        __u64    st_write_reqs;
-        __u64    st_getattr_reqs;
-        __u64    st_setattr_reqs;
-        __u64    st_create_reqs;
-        __u64    st_destroy_reqs;
-        __u64    st_statfs_reqs;
-        __u64    st_syncfs_reqs;
-        __u64    st_open_reqs;
-        __u64    st_close_reqs;
-        __u64    st_punch_reqs;
+enum {
+        LPROC_ECHO_READ_BYTES = 1,
+        LPROC_ECHO_WRITE_BYTES = 2,
+        LPROC_ECHO_LAST = LPROC_ECHO_WRITE_BYTES +1
 };
 
-static struct xprocfs_io_stat xprocfs_iostats[NR_CPUS];
-static struct proc_dir_entry *xprocfs_dir;
-
-#define XPROCFS_BUMP_MYCPU_IOSTAT(field, count)                 \
-do {                                                            \
-        xprocfs_iostats[smp_processor_id()].field += (count);   \
-} while (0)
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#define DECLARE_XPROCFS_SUM_STAT(field)                 \
-static long long                                        \
-xprocfs_sum_##field (void)                              \
-{                                                       \
-        long long stat = 0;                             \
-        int       i;                                    \
-                                                        \
-        for (i = 0; i < smp_num_cpus; i++)              \
-                stat += xprocfs_iostats[i].field;       \
-        return (stat);                                  \
-}
-
-DECLARE_XPROCFS_SUM_STAT (st_read_bytes)
-DECLARE_XPROCFS_SUM_STAT (st_read_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_write_bytes)
-DECLARE_XPROCFS_SUM_STAT (st_write_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_getattr_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_setattr_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_create_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_destroy_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_statfs_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_syncfs_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_open_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_close_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_punch_reqs)
-#endif
-
-static int
-xprocfs_rd_stat (char *page, char **start, off_t off, int count,
-                 int  *eof, void *data)
-{
-        long long (*fn)(void) = (long long(*)(void))data;
-        int         len;
-
-        *eof = 1;
-        if (off != 0)
-                return (0);
-
-        len = snprintf (page, count, "%Ld\n", fn());
-        *start = page;
-        return (len);
-}
-
-
-static void
-xprocfs_add_stat(char *name, long long (*fn)(void))
-{
-        struct proc_dir_entry *entry;
-
-        entry = create_proc_entry (name, S_IFREG|S_IRUGO, xprocfs_dir);
-        if (entry == NULL) {
-                CERROR ("Can't add procfs stat %s\n", name);
-                return;
-        }
-
-        entry->data = fn;
-        entry->read_proc = xprocfs_rd_stat;
-        entry->write_proc = NULL;
-}
-
-static void
-xprocfs_init (char *name)
-{
-        char  dirname[64];
-
-        snprintf (dirname, sizeof (dirname), "sys/%s", name);
-
-        xprocfs_dir = proc_mkdir (dirname, NULL);
-        if (xprocfs_dir == NULL) {
-                CERROR ("Can't make procfs dir %s\n", dirname);
-                return;
-        }
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        xprocfs_add_stat ("read_bytes",   xprocfs_sum_st_read_bytes);
-        xprocfs_add_stat ("read_reqs",    xprocfs_sum_st_read_reqs);
-        xprocfs_add_stat ("write_bytes",  xprocfs_sum_st_write_bytes);
-        xprocfs_add_stat ("write_reqs",   xprocfs_sum_st_write_reqs);
-        xprocfs_add_stat ("getattr_reqs", xprocfs_sum_st_getattr_reqs);
-        xprocfs_add_stat ("setattr_reqs", xprocfs_sum_st_setattr_reqs);
-        xprocfs_add_stat ("create_reqs",  xprocfs_sum_st_create_reqs);
-        xprocfs_add_stat ("destroy_reqs", xprocfs_sum_st_destroy_reqs);
-        xprocfs_add_stat ("statfs_reqs",  xprocfs_sum_st_statfs_reqs);
-        xprocfs_add_stat ("syncfs_reqs",  xprocfs_sum_st_syncfs_reqs);
-        xprocfs_add_stat ("open_reqs",    xprocfs_sum_st_open_reqs);
-        xprocfs_add_stat ("close_reqs",   xprocfs_sum_st_close_reqs);
-        xprocfs_add_stat ("punch_reqs",   xprocfs_sum_st_punch_reqs);
-#endif
-}
-
-void xprocfs_fini (void)
-{
-        if (xprocfs_dir == NULL)
-                return;
-
-        remove_proc_entry ("read_bytes",   xprocfs_dir);
-        remove_proc_entry ("read_reqs",    xprocfs_dir);
-        remove_proc_entry ("write_bytes",  xprocfs_dir);
-        remove_proc_entry ("write_reqs",   xprocfs_dir);
-        remove_proc_entry ("getattr_reqs", xprocfs_dir);
-        remove_proc_entry ("setattr_reqs", xprocfs_dir);
-        remove_proc_entry ("create_reqs",  xprocfs_dir);
-        remove_proc_entry ("destroy_reqs", xprocfs_dir);
-        remove_proc_entry ("statfs_reqs",  xprocfs_dir);
-        remove_proc_entry ("syncfs_reqs",  xprocfs_dir);
-        remove_proc_entry ("open_reqs",    xprocfs_dir);
-        remove_proc_entry ("close_reqs",   xprocfs_dir);
-        remove_proc_entry ("punch_reqs",   xprocfs_dir);
-
-        remove_proc_entry (xprocfs_dir->name, xprocfs_dir->parent);
-        xprocfs_dir = NULL;
-}
-
 static int echo_connect(struct lustre_handle *conn, struct obd_device *obd,
                         struct obd_uuid *cluuid)
 {
@@ -224,8 +91,6 @@ int echo_create(struct lustre_handle *conn, struct obdo *oa,
 {
         struct obd_device *obd = class_conn2obd(conn);
 
-        XPROCFS_BUMP_MYCPU_IOSTAT (st_create_reqs, 1);
-
         if (!obd) {
                 CERROR("invalid client cookie "LPX64"\n", conn->cookie);
                 return -EINVAL;
@@ -253,8 +118,6 @@ int echo_destroy(struct lustre_handle *conn, struct obdo *oa,
 {
         struct obd_device *obd = class_conn2obd(conn);
 
-        XPROCFS_BUMP_MYCPU_IOSTAT (st_destroy_reqs, 1);
-
         if (!obd) {
                 CERROR("invalid client cookie "LPX64"\n", conn->cookie);
                 RETURN(-EINVAL);
@@ -282,8 +145,6 @@ static int echo_open(struct lustre_handle *conn, struct obdo *oa,
         struct lustre_handle *fh = obdo_handle (oa);
         struct obd_device    *obd = class_conn2obd (conn);
 
-        XPROCFS_BUMP_MYCPU_IOSTAT (st_open_reqs, 1);
-
         if (!obd) {
                 CERROR("invalid client cookie "LPX64"\n", conn->cookie);
                 return (-EINVAL);
@@ -306,8 +167,6 @@ static int echo_close(struct lustre_handle *conn, struct obdo *oa,
         struct lustre_handle *fh = obdo_handle (oa);
         struct obd_device    *obd = class_conn2obd(conn);
 
-        XPROCFS_BUMP_MYCPU_IOSTAT (st_close_reqs, 1);
-
         if (!obd) {
                 CERROR("invalid client cookie "LPX64"\n", conn->cookie);
                 return (-EINVAL);
@@ -332,8 +191,6 @@ static int echo_getattr(struct lustre_handle *conn, struct obdo *oa,
         struct obd_device *obd = class_conn2obd(conn);
         obd_id id = oa->o_id;
 
-        XPROCFS_BUMP_MYCPU_IOSTAT (st_getattr_reqs, 1);
-
         if (!obd) {
                 CERROR("invalid client cookie "LPX64"\n", conn->cookie);
                 RETURN(-EINVAL);
@@ -355,8 +212,6 @@ static int echo_setattr(struct lustre_handle *conn, struct obdo *oa,
 {
         struct obd_device *obd = class_conn2obd(conn);
 
-        XPROCFS_BUMP_MYCPU_IOSTAT (st_setattr_reqs, 1);
-
         if (!obd) {
                 CERROR("invalid client cookie "LPX64"\n", conn->cookie);
                 RETURN(-EINVAL);
@@ -384,15 +239,11 @@ int echo_preprw(int cmd, struct obd_export *export, int objcount,
 {
         struct obd_device *obd;
         struct niobuf_local *r = res;
+        int tot_bytes = 0;
         int rc = 0;
         int i;
         ENTRY;
 
-        if ((cmd & OBD_BRW_WRITE) != 0)
-                XPROCFS_BUMP_MYCPU_IOSTAT (st_write_reqs, 1);
-        else
-                XPROCFS_BUMP_MYCPU_IOSTAT (st_read_reqs, 1);
-
         obd = export->exp_obd;
         if (obd == NULL)
                 RETURN(-EINVAL);
@@ -428,6 +279,8 @@ int echo_preprw(int cmd, struct obd_export *export, int objcount,
                                 }
                         }
 
+                        tot_bytes += r->len;
+
                         atomic_inc(&obd->u.echo.eo_prep);
 
                         r->offset = nb->offset;
@@ -437,9 +290,8 @@ int echo_preprw(int cmd, struct obd_export *export, int objcount,
                         CDEBUG(D_PAGE, "$$$$ get page %p @ "LPU64" for %d\n",
                                r->page, r->offset, r->len);
 
-                        if (cmd == OBD_BRW_READ) {
+                        if (cmd & OBD_BRW_READ) {
                                 r->rc = r->len;
-                                XPROCFS_BUMP_MYCPU_IOSTAT(st_read_bytes,r->len);
                                 if (verify) {
                                         page_debug_setup(kmap (r->page), r->len,
                                                          r->offset,obj->ioo_id);
@@ -447,8 +299,6 @@ int echo_preprw(int cmd, struct obd_export *export, int objcount,
                                 }
                                 r->rc = r->len;
                         } else {
-                                XPROCFS_BUMP_MYCPU_IOSTAT(st_write_bytes,
-                                                          r->len);
                                 if (verify) {
                                         page_debug_setup(kmap (r->page), r->len,
                                                          0xecc0ecc0ecc0ecc0,
@@ -458,6 +308,13 @@ int echo_preprw(int cmd, struct obd_export *export, int objcount,
                         }
                 }
         }
+        if (cmd & OBD_BRW_READ)
+                lprocfs_counter_add(obd->obd_stats, LPROC_ECHO_READ_BYTES,
+                                    tot_bytes);
+        else
+                lprocfs_counter_add(obd->obd_stats, LPROC_ECHO_WRITE_BYTES,
+                                    tot_bytes);
+
         CDEBUG(D_PAGE, "%d pages allocated after prep\n",
                atomic_read(&obd->u.echo.eo_prep));
 
@@ -518,14 +375,12 @@ int echo_commitrw(int cmd, struct obd_export *export, int objcount,
                         struct page *page = r->page;
                         void *addr;
 
-                        kmap (page);
-                        
-                        if (!page || !(addr = page_address(page)) ||
+                        if (!page || !(addr = kmap(page)) ||
                             !kern_addr_valid(addr)) {
 
                                 CERROR("bad page objid "LPU64":%p, buf %d/%d\n",
                                        obj->ioo_id, page, j, obj->ioo_bufcnt);
-                                kunmap (page);
+                                kunmap(page);
                                 GOTO(commitrw_cleanup, rc = -EFAULT);
                         }
 
@@ -593,16 +448,29 @@ static int echo_cleanup(struct obd_device *obddev, int force, int failover)
         RETURN(0);
 }
 
-int echo_attach(struct obd_device *dev, obd_count len, void *data)
+int echo_attach(struct obd_device *obd, obd_count len, void *data)
 {
         struct lprocfs_static_vars lvars;
+        int rc;
 
         lprocfs_init_vars(&lvars);
-        return lprocfs_obd_attach(dev, lvars.obd_vars);
+        rc = lprocfs_obd_attach(obd, lvars.obd_vars);
+        if (rc != 0)
+                return rc;
+        rc = lprocfs_alloc_obd_stats(obd, LPROC_ECHO_LAST);
+        if (rc != 0)
+                return rc;
+
+        lprocfs_counter_init(obd->obd_stats, LPROC_ECHO_READ_BYTES,
+                             LPROCFS_CNTR_AVGMINMAX, "read_bytes", "bytes");
+        lprocfs_counter_init(obd->obd_stats, LPROC_ECHO_WRITE_BYTES,
+                             LPROCFS_CNTR_AVGMINMAX, "write_bytes", "bytes");
+        return rc;
 }
 
 int echo_detach(struct obd_device *dev)
 {
+        lprocfs_free_obd_stats(dev);
         return lprocfs_obd_detach(dev);
 }
 
@@ -673,8 +541,6 @@ static int __init obdecho_init(void)
 
         lprocfs_init_vars(&lvars);
 
-        xprocfs_init ("echo");
-
         rc = echo_object0_pages_init ();
         if (rc != 0)
                 goto failed_0;
@@ -692,8 +558,6 @@ static int __init obdecho_init(void)
  failed_1:
         echo_object0_pages_fini ();
  failed_0:
-        xprocfs_fini ();
-
         RETURN(rc);
 }
 
@@ -702,7 +566,6 @@ static void __exit obdecho_exit(void)
         echo_client_cleanup();
         class_unregister_type(OBD_ECHO_DEVICENAME);
         echo_object0_pages_fini ();
-        xprocfs_fini ();
 }
 
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
index 31f7334..1d8233b 100644 (file)
@@ -750,10 +750,10 @@ echo_enqueue (struct obd_export *exp, struct obdo *oa,
         ecl->ecl_extent.end = (nob == 0) ? ((obd_off) -1) : (offset + nob - 1);
 
         flags = 0;
-        rc = obd_enqueue (&ec->ec_conn, eco->eco_lsm, NULL, LDLM_EXTENT,
-                          &ecl->ecl_extent,sizeof(ecl->ecl_extent), mode,
-                          &flags, echo_ldlm_callback, eco, sizeof (*eco),
-                          &ecl->ecl_lock_handle);
+        rc = obd_enqueue(&ec->ec_conn, eco->eco_lsm, NULL, LDLM_EXTENT,
+                         &ecl->ecl_extent,sizeof(ecl->ecl_extent), mode,
+                         &flags, echo_ldlm_callback, eco,
+                         &ecl->ecl_lock_handle);
         if (rc != 0)
                 goto failed_1;
 
index 21d05ef..e6c223c 100644 (file)
 #endif
 
 enum {
-        LPROC_FILTER_READS = 0,
-        LPROC_FILTER_READ_BYTES = 1,
-        LPROC_FILTER_WRITES = 2,
-        LPROC_FILTER_WRITE_BYTES = 3,
-        LPROC_FILTER_LAST = LPROC_FILTER_WRITE_BYTES +1
+        LPROC_FILTER_READ_BYTES = 0,
+        LPROC_FILTER_WRITE_BYTES = 1,
+        LPROC_FILTER_LAST,
 };
 
-/* should be generic per-obd stats... */
-struct xprocfs_io_stat {
-        __u64    st_read_bytes;
-        __u64    st_read_reqs;
-        __u64    st_write_bytes;
-        __u64    st_write_reqs;
-        __u64    st_getattr_reqs;
-        __u64    st_setattr_reqs;
-        __u64    st_create_reqs;
-        __u64    st_destroy_reqs;
-        __u64    st_statfs_reqs;
-        __u64    st_syncfs_reqs;
-        __u64    st_open_reqs;
-        __u64    st_close_reqs;
-        __u64    st_punch_reqs;
-};
-
-static struct xprocfs_io_stat xprocfs_iostats[NR_CPUS];
-static struct proc_dir_entry *xprocfs_dir;
-
-#define XPROCFS_BUMP_MYCPU_IOSTAT(field, count)                 \
-do {                                                            \
-        xprocfs_iostats[smp_processor_id()].field += (count);   \
-} while (0)
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#define DECLARE_XPROCFS_SUM_STAT(field)                 \
-static long long                                        \
-xprocfs_sum_##field (void)                              \
-{                                                       \
-        long long stat = 0;                             \
-        int       i;                                    \
-                                                        \
-        for (i = 0; i < smp_num_cpus; i++)              \
-                stat += xprocfs_iostats[i].field;       \
-        return (stat);                                  \
-}
-
-DECLARE_XPROCFS_SUM_STAT (st_read_bytes)
-DECLARE_XPROCFS_SUM_STAT (st_read_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_write_bytes)
-DECLARE_XPROCFS_SUM_STAT (st_write_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_getattr_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_setattr_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_create_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_destroy_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_statfs_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_syncfs_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_open_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_close_reqs)
-DECLARE_XPROCFS_SUM_STAT (st_punch_reqs)
-#endif
-
-static int
-xprocfs_rd_stat (char *page, char **start, off_t off, int count,
-                 int  *eof, void *data)
-{
-        long long (*fn)(void) = (long long(*)(void))data;
-        int         len;
-
-        *eof = 1;
-        if (off != 0)
-                return (0);
-
-        len = snprintf (page, count, "%Ld\n", fn());
-        *start = page;
-        return (len);
-}
-
-
-static void
-xprocfs_add_stat(char *name, long long (*fn)(void))
-{
-        struct proc_dir_entry *entry;
-
-        entry = create_proc_entry (name, S_IFREG|S_IRUGO, xprocfs_dir);
-        if (entry == NULL) {
-                CERROR ("Can't add procfs stat %s\n", name);
-                return;
-        }
-
-        entry->data = fn;
-        entry->read_proc = xprocfs_rd_stat;
-        entry->write_proc = NULL;
-}
-
-static void
-xprocfs_init (char *name)
-{
-        char  dirname[64];
-
-        snprintf (dirname, sizeof (dirname), "sys/%s", name);
-
-        xprocfs_dir = proc_mkdir (dirname, NULL);
-        if (xprocfs_dir == NULL) {
-                CERROR ("Can't make procfs dir %s\n", dirname);
-                return;
-        }
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        xprocfs_add_stat ("read_bytes",   xprocfs_sum_st_read_bytes);
-        xprocfs_add_stat ("read_reqs",    xprocfs_sum_st_read_reqs);
-        xprocfs_add_stat ("write_bytes",  xprocfs_sum_st_write_bytes);
-        xprocfs_add_stat ("write_reqs",   xprocfs_sum_st_write_reqs);
-        xprocfs_add_stat ("getattr_reqs", xprocfs_sum_st_getattr_reqs);
-        xprocfs_add_stat ("setattr_reqs", xprocfs_sum_st_setattr_reqs);
-        xprocfs_add_stat ("create_reqs",  xprocfs_sum_st_create_reqs);
-        xprocfs_add_stat ("destroy_reqs", xprocfs_sum_st_destroy_reqs);
-        xprocfs_add_stat ("statfs_reqs",  xprocfs_sum_st_statfs_reqs);
-        xprocfs_add_stat ("syncfs_reqs",  xprocfs_sum_st_syncfs_reqs);
-        xprocfs_add_stat ("open_reqs",    xprocfs_sum_st_open_reqs);
-        xprocfs_add_stat ("close_reqs",   xprocfs_sum_st_close_reqs);
-        xprocfs_add_stat ("punch_reqs",   xprocfs_sum_st_punch_reqs);
-#endif
-}
-
-void xprocfs_fini (void)
-{
-        if (xprocfs_dir == NULL)
-                return;
-
-        remove_proc_entry ("read_bytes",   xprocfs_dir);
-        remove_proc_entry ("read_reqs",    xprocfs_dir);
-        remove_proc_entry ("write_bytes",  xprocfs_dir);
-        remove_proc_entry ("write_reqs",   xprocfs_dir);
-        remove_proc_entry ("getattr_reqs", xprocfs_dir);
-        remove_proc_entry ("setattr_reqs", xprocfs_dir);
-        remove_proc_entry ("create_reqs",  xprocfs_dir);
-        remove_proc_entry ("destroy_reqs", xprocfs_dir);
-        remove_proc_entry ("statfs_reqs",  xprocfs_dir);
-        remove_proc_entry ("syncfs_reqs",  xprocfs_dir);
-        remove_proc_entry ("open_reqs",    xprocfs_dir);
-        remove_proc_entry ("close_reqs",   xprocfs_dir);
-        remove_proc_entry ("punch_reqs",   xprocfs_dir);
-
-        remove_proc_entry (xprocfs_dir->name, xprocfs_dir->parent);
-        xprocfs_dir = NULL;
-}
-
 #define S_SHIFT 12
 static char *obd_type_by_mode[S_IFMT >> S_SHIFT] = {
         [0]                     NULL,
@@ -337,19 +196,6 @@ int filter_finish_transno(struct obd_export *export, void *handle,
         RETURN(written);
 }
 
-/* write the pathname into the string */
-static char *filter_id(char *buf, struct filter_obd *filter, obd_id id,
-                       obd_mode mode)
-{
-        if (!S_ISREG(mode) || filter->fo_subdir_count == 0)
-                sprintf(buf, "O/%s/"LPU64, obd_mode_to_type(mode), id);
-        else
-                sprintf(buf, "O/%s/d%d/"LPU64, obd_mode_to_type(mode),
-                       (int)id & (filter->fo_subdir_count - 1), id);
-
-        return buf;
-}
-
 static inline void f_dput(struct dentry *dentry)
 {
         /* Can't go inside filter_ddelete because it can block */
@@ -935,64 +781,19 @@ static void filter_post(struct obd_device *obd)
 }
 
 
-static __u64 filter_next_id(struct obd_device *obd)
+static __u64 filter_next_id(struct filter_obd *filter)
 {
         obd_id id;
-        LASSERT(obd->u.filter.fo_fsd != NULL);
+        LASSERT(filter->fo_fsd != NULL);
 
-        spin_lock(&obd->u.filter.fo_objidlock);
-        id = le64_to_cpu(obd->u.filter.fo_fsd->fsd_last_objid);
-        obd->u.filter.fo_fsd->fsd_last_objid = cpu_to_le64(id + 1);
-        spin_unlock(&obd->u.filter.fo_objidlock);
+        spin_lock(&filter->fo_objidlock);
+        id = le64_to_cpu(filter->fo_fsd->fsd_last_objid);
+        filter->fo_fsd->fsd_last_objid = cpu_to_le64(id + 1);
+        spin_unlock(&filter->fo_objidlock);
 
         return id;
 }
 
-/* how to get files, dentries, inodes from object id's */
-/* parent i_sem is already held if needed for exclusivity */
-static struct dentry *filter_fid2dentry(struct obd_device *obd,
-                                        struct dentry *dparent,
-                                        __u64 id, int lockit)
-{
-        struct super_block *sb = obd->u.filter.fo_sb;
-        struct dentry *dchild;
-        char name[32];
-        int len;
-        ENTRY;
-
-        if (!sb || !sb->s_dev) {
-                CERROR("fatal: device not initialized.\n");
-                RETURN(ERR_PTR(-ENXIO));
-        }
-
-        if (id == 0) {
-                CERROR("fatal: invalid object id 0\n");
-                LBUG();
-                RETURN(ERR_PTR(-ESTALE));
-        }
-
-        len = sprintf(name, LPU64, id);
-        CDEBUG(D_INODE, "looking up object O/%*s/%s\n",
-               dparent->d_name.len, dparent->d_name.name, name);
-        if (lockit)
-                down(&dparent->d_inode->i_sem);
-        dchild = lookup_one_len(name, dparent, len);
-        if (lockit)
-                up(&dparent->d_inode->i_sem);
-        if (IS_ERR(dchild)) {
-                CERROR("child lookup error %ld\n", PTR_ERR(dchild));
-                RETURN(dchild);
-        }
-
-        CDEBUG(D_INODE, "got child obj O/%*s/%s: %p, count = %d\n",
-               dparent->d_name.len, dparent->d_name.name, name, dchild,
-               atomic_read(&dchild->d_count));
-
-        LASSERT(atomic_read(&dchild->d_count) > 0);
-
-        RETURN(dchild);
-}
-
 /* direct cut-n-paste of mds_blocking_ast() */
 int filter_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                      void *data, int flag)
@@ -1038,7 +839,7 @@ int filter_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 }
 
 static int filter_lock_dentry(struct obd_device *obd, struct dentry *de,
-                              int lock_mode, struct lustre_handle *lockh)
+                              ldlm_mode_t lock_mode,struct lustre_handle *lockh)
 {
         struct ldlm_res_id res_id = { .name = {0} };
         int flags = 0, rc;
@@ -1054,6 +855,14 @@ static int filter_lock_dentry(struct obd_device *obd, struct dentry *de,
         RETURN(rc == ELDLM_OK ? 0 : -ENOLCK);  /* XXX translate ldlm code */
 }
 
+static void filter_parent_unlock(struct dentry *dparent,
+                                 struct lustre_handle *lockh,
+                                 ldlm_mode_t lock_mode)
+{
+        ldlm_lock_decref(lockh, lock_mode);
+}
+
+/* We never dget the object parent, so DON'T dput it either */
 static inline struct dentry *filter_parent(struct obd_device *obd,
                                            obd_mode mode, obd_id objid)
 {
@@ -1066,11 +875,13 @@ static inline struct dentry *filter_parent(struct obd_device *obd,
         return filter->fo_dentry_O_sub[objid & (filter->fo_subdir_count - 1)];
 }
 
+/* We never dget the object parent, so DON'T dput it either */
 static inline struct dentry *filter_parent_lock(struct obd_device *obd,
                                                 obd_mode mode, obd_id objid,
-                                                int lock_mode,
+                                                ldlm_mode_t lock_mode,
                                                 struct lustre_handle *lockh)
 {
+        unsigned long now = jiffies;
         struct dentry *de = filter_parent(obd, mode, objid);
         int rc;
 
@@ -1078,17 +889,75 @@ static inline struct dentry *filter_parent_lock(struct obd_device *obd,
                 return de;
 
         rc = filter_lock_dentry(obd, de, lock_mode, lockh);
+        if (time_after(jiffies, now + 15*HZ))
+                CERROR("slow parent lock %lus\n", (jiffies - now) / HZ);
         return rc ? ERR_PTR(rc) : de;
 }
 
+/* How to get files, dentries, inodes from object id's.
+ *
+ * If dir_dentry is passed, the caller has already locked the parent
+ * appropriately for this operation (normally a write lock).  If
+ * dir_dentry is NULL, we do a read lock while we do the lookup to
+ * avoid races with create/destroy and such changing the directory
+ * internal to the filesystem code.
+ */
+static struct dentry *filter_fid2dentry(struct obd_device *obd,
+                                        struct dentry *dir_dentry,
+                                        obd_mode mode, obd_id id)
+{
+        struct super_block *sb = obd->u.filter.fo_sb;
+        struct lustre_handle lockh;
+        struct dentry *dparent = dir_dentry;
+        struct dentry *dchild;
+        char name[32];
+        int len;
+        ENTRY;
+
+        if (!sb || !sb->s_dev) {
+                CERROR("device not initialized.\n");
+                RETURN(ERR_PTR(-ENXIO));
+        }
+
+        if (id == 0) {
+                CERROR("fatal: invalid object id 0\n");
+                LBUG();
+                RETURN(ERR_PTR(-ESTALE));
+        }
+
+        len = sprintf(name, LPU64, id);
+        if (!dir_dentry) {
+                dparent = filter_parent_lock(obd, mode, id, LCK_PR, &lockh);
+                if (IS_ERR(dparent))
+                        RETURN(dparent);
+        }
+        CDEBUG(D_INODE, "looking up object O/%*s/%s\n",
+               dparent->d_name.len, dparent->d_name.name, name);
+        dchild = ll_lookup_one_len(name, dparent, len);
+        if (!dir_dentry)
+                filter_parent_unlock(dparent, &lockh, LCK_PR);
+        if (IS_ERR(dchild)) {
+                CERROR("child lookup error %ld\n", PTR_ERR(dchild));
+                RETURN(dchild);
+        }
+
+        CDEBUG(D_INODE, "got child objid %s: %p, count = %d\n",
+               name, dchild, atomic_read(&dchild->d_count));
+
+        LASSERT(atomic_read(&dchild->d_count) > 0);
+
+        RETURN(dchild);
+}
+
 static struct file *filter_obj_open(struct obd_export *export,
-                                    __u64 id, __u32 type, int parent_mode,
+                                    __u64 id, __u32 type,
+                                    ldlm_mode_t parent_mode,
                                     struct lustre_handle *parent_lockh)
 {
         struct obd_device *obd = export->exp_obd;
         struct filter_obd *filter = &obd->u.filter;
         struct super_block *sb = filter->fo_sb;
-        struct dentry *dchild = NULL,  *parent;
+        struct dentry *dchild = NULL, *dparent = NULL;
         struct filter_export_data *fed = &export->exp_filter_data;
         struct filter_dentry_data *fdd = NULL;
         struct filter_file_data *ffd = NULL;
@@ -1133,21 +1002,26 @@ static struct file *filter_obj_open(struct obd_export *export,
 
         cleanup_phase = 2;
 
-        parent = filter_parent_lock(obd, type, id, parent_mode, parent_lockh);
-        if (IS_ERR(parent))
-                GOTO(cleanup, file = (void *)parent);
+        dparent = filter_parent_lock(obd, type, id, parent_mode, parent_lockh);
+        if (IS_ERR(dparent))
+                GOTO(cleanup, file = (void *)dparent);
 
         cleanup_phase = 3;
 
         len = snprintf(name, sizeof(name), LPU64, id);
-        dchild = lookup_one_len(name, parent, len);
+        dchild = ll_lookup_one_len(name, dparent, len);
         if (IS_ERR(dchild))
                 GOTO(cleanup, file = (void *)dchild);
-        LASSERT(dchild->d_inode);
 
         cleanup_phase = 4;
 
-        /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
+        if (dchild->d_inode == NULL) {
+                CERROR("opening non-existent object %s - O_CREAT?\n", name);
+                file = ERR_PTR(-ENOENT);
+                GOTO(cleanup, file);
+        }
+
+        /* dentry_open does a dput(dchild) and mntput(mnt) on error */
         mntget(filter->fo_vfsmnt);
         file = dentry_open(dchild, filter->fo_vfsmnt, O_RDWR | O_LARGEFILE);
         if (IS_ERR(file)) {
@@ -1161,12 +1035,14 @@ static struct file *filter_obj_open(struct obd_export *export,
                 spin_unlock(&filter->fo_fddlock);
                 OBD_FREE(fdd, sizeof *fdd);
                 fdd = dchild->d_fsdata;
+                LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
                 /* should only happen during client recovery */
                 if (fdd->fdd_flags & FILTER_FLAG_DESTROY)
                         CDEBUG(D_INODE,"opening destroyed object "LPU64"\n",id);
                 atomic_inc(&fdd->fdd_open_count);
         } else {
                 atomic_set(&fdd->fdd_open_count, 1);
+                fdd->fdd_magic = FILTER_DENTRY_MAGIC;
                 fdd->fdd_flags = 0;
                 fdd->fdd_objid = id;
                 /* If this is racy, then we can use {cmp}xchg and atomic_add */
@@ -1192,10 +1068,10 @@ cleanup:
         switch (cleanup_phase) {
         case 4:
                 if (IS_ERR(file))
-                        l_dput(dchild);
+                        f_dput(dchild);
         case 3:
                 if (IS_ERR(file))
-                        ldlm_lock_decref(parent_lockh, parent_mode);
+                        filter_parent_unlock(dparent, parent_lockh,parent_mode);
         case 2:
                 if (IS_ERR(file))
                         OBD_FREE(fdd, sizeof *fdd);
@@ -1209,29 +1085,28 @@ cleanup:
         RETURN(file);
 }
 
-/* Caller must hold i_sem on dir_dentry->d_inode */
-/* Caller must push us into kernel context */
+/* Caller must hold LCK_PW on parent and push us into kernel context.
+ * Caller is also required to ensure that dchild->d_inode exists.
+ */
 static int filter_destroy_internal(struct obd_device *obd,
-                                   struct dentry *dir_dentry,
-                                   struct dentry *object_dentry)
+                                   struct dentry *dparent,
+                                   struct dentry *dchild)
 {
-        struct inode *inode = object_dentry->d_inode;
+        struct inode *inode = dchild->d_inode;
         int rc;
         ENTRY;
 
         if (inode->i_nlink != 1 || atomic_read(&inode->i_count) != 1) {
                 CERROR("destroying objid %*s nlink = %d, count = %d\n",
-                       object_dentry->d_name.len,
-                       object_dentry->d_name.name,
+                       dchild->d_name.len, dchild->d_name.name,
                        inode->i_nlink, atomic_read(&inode->i_count));
         }
 
-        rc = vfs_unlink(dir_dentry->d_inode, object_dentry);
+        rc = vfs_unlink(dparent->d_inode, dchild);
 
         if (rc)
                 CERROR("error unlinking objid %*s: rc %d\n",
-                       object_dentry->d_name.len,
-                       object_dentry->d_name.name, rc);
+                       dchild->d_name.len, dchild->d_name.name, rc);
 
         RETURN(rc);
 }
@@ -1239,24 +1114,25 @@ static int filter_destroy_internal(struct obd_device *obd,
 /* If closing because we are failing this device, then
    don't do the unlink on close.
 */
-static int filter_close_internal(struct obd_export *export,
+static int filter_close_internal(struct obd_export *exp,
                                  struct filter_file_data *ffd,
                                  struct obd_trans_info *oti,
                                  int failover)
 {
-        struct obd_device *obd = export->exp_obd;
+        struct obd_device *obd = exp->exp_obd;
         struct filter_obd *filter = &obd->u.filter;
         struct file *filp = ffd->ffd_file;
-        struct dentry *object_dentry = dget(filp->f_dentry);
-        struct filter_dentry_data *fdd = object_dentry->d_fsdata;
+        struct dentry *dchild = dget(filp->f_dentry);
+        struct filter_dentry_data *fdd = dchild->d_fsdata;
         struct lustre_handle parent_lockh;
         int rc, rc2, cleanup_phase = 0;
-        struct dentry *dir_dentry;
+        struct dentry *dparent;
         struct obd_run_ctxt saved;
         ENTRY;
 
         LASSERT(filp->private_data == ffd);
         LASSERT(fdd);
+        LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
 
         rc = filp_close(filp, 0);
 
@@ -1267,23 +1143,24 @@ static int filter_close_internal(struct obd_export *export,
                 push_ctxt(&saved, &filter->fo_ctxt, NULL);
                 cleanup_phase = 1;
 
-                dir_dentry = filter_parent_lock(obd, S_IFREG, fdd->fdd_objid,
-                                                LCK_PW, &parent_lockh);
-                if (IS_ERR(dir_dentry))
-                        GOTO(cleanup, rc = PTR_ERR(dir_dentry));
+                LASSERT(fdd->fdd_objid > 0);
+                dparent = filter_parent_lock(obd, S_IFREG, fdd->fdd_objid,
+                                             LCK_PW, &parent_lockh);
+                if (IS_ERR(dparent))
+                        GOTO(cleanup, rc = PTR_ERR(dparent));
                 cleanup_phase = 2;
 
-                handle = fsfilt_start(obd, dir_dentry->d_inode,
+                handle = fsfilt_start(obd, dparent->d_inode,
                                       FSFILT_OP_UNLINK);
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
 
                 /* XXX unlink from PENDING directory now too */
-                rc2 = filter_destroy_internal(obd, dir_dentry, object_dentry);
+                rc2 = filter_destroy_internal(obd, dparent, dchild);
                 if (rc2 && !rc)
                         rc = rc2;
-                rc = filter_finish_transno(export, handle, oti, rc);
-                rc2 = fsfilt_commit(obd, dir_dentry->d_inode, handle, 0);
+                rc = filter_finish_transno(exp, handle, oti, rc);
+                rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0);
                 if (rc2) {
                         CERROR("error on commit, err = %d\n", rc2);
                         if (!rc)
@@ -1295,7 +1172,7 @@ cleanup:
         switch(cleanup_phase) {
         case 2:
                 if (rc || oti == NULL) {
-                        ldlm_lock_decref(&parent_lockh, LCK_PW);
+                        filter_parent_unlock(dparent, &parent_lockh, LCK_PW);
                 } else {
                         memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh,
                                sizeof(parent_lockh));
@@ -1304,7 +1181,7 @@ cleanup:
         case 1:
                 pop_ctxt(&saved, &filter->fo_ctxt, NULL);
         case 0:
-                f_dput(object_dentry);
+                f_dput(dchild);
                 filter_ffd_destroy(ffd);
                 break;
         default:
@@ -1321,7 +1198,8 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
                                char *option)
 {
         struct obd_ioctl_data* data = buf;
-        struct filter_obd *filter;
+        struct filter_obd *filter = &obd->u.filter;
+
         struct vfsmount *mnt;
         int rc = 0;
         ENTRY;
@@ -1345,16 +1223,28 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
                         CERROR("%s: configured for recovery and sync write\n",
                                obd->obd_name);
                 } else {
-                        CERROR("unrecognised flag '%c'\n",
-                               *data->ioc_inlbuf3);
+                        if (*data->ioc_inlbuf3 != 'n') {
+                                CERROR("unrecognised flag '%c'\n",
+                                       *data->ioc_inlbuf3);
+                        }
+                }
+        }
+
+        if (data->ioc_inllen4 > 0 && data->ioc_inlbuf4) {
+                if (*data->ioc_inlbuf4 == '/') {
+                        CERROR("filter namespace mount: %s\n",
+                               data->ioc_inlbuf4);
+                        filter->fo_nspath = strdup(data->ioc_inlbuf4);
+                } else {
+                        CERROR("namespace mount must be absolute path: '%s'\n",
+                               data->ioc_inlbuf4);
                 }
         }
 
-        filter = &obd->u.filter;
         filter->fo_vfsmnt = mnt;
-        filter->fo_fstype = strdup(data->ioc_inlbuf2);
-        filter->fo_sb = mnt->mnt_root->d_inode->i_sb;
-        CDEBUG(D_SUPER, "%s: mnt = %p\n", data->ioc_inlbuf1, mnt);
+        filter->fo_sb = mnt->mnt_sb;
+        filter->fo_fstype = mnt->mnt_sb->s_type->name;
+        CDEBUG(D_SUPER, "%s: mnt = %p\n", filter->fo_fstype, mnt);
 
         OBD_SET_CTXT_MAGIC(&filter->fo_ctxt);
         filter->fo_ctxt.pwdmnt = mnt;
@@ -1363,15 +1253,15 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
 
         rc = filter_prep(obd);
         if (rc)
-                GOTO(err_kfree, rc);
+                GOTO(err_mntput, rc);
 
         spin_lock_init(&filter->fo_translock);
         spin_lock_init(&filter->fo_fddlock);
         spin_lock_init(&filter->fo_objidlock);
         INIT_LIST_HEAD(&filter->fo_export_list);
 
-        obd->obd_namespace =
-                ldlm_namespace_new("filter-tgt", LDLM_NAMESPACE_SERVER);
+        obd->obd_namespace = ldlm_namespace_new("filter-tgt",
+                                                LDLM_NAMESPACE_SERVER);
         if (!obd->obd_namespace)
                 GOTO(err_post, rc = -ENOMEM);
 
@@ -1382,10 +1272,9 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
 
 err_post:
         filter_post(obd);
-err_kfree:
-        kfree(filter->fo_fstype);
+err_mntput:
         unlock_kernel();
-        mntput(filter->fo_vfsmnt);
+        mntput(mnt);
         filter->fo_sb = 0;
         lock_kernel();
 err_ops:
@@ -1445,7 +1334,7 @@ static int filter_cleanup(struct obd_device *obd, int force, int failover)
         ldlm_namespace_free(obd->obd_namespace);
 
         sb = obd->u.filter.fo_sb;
-        if (!obd->u.filter.fo_sb)
+        if (!sb)
                 RETURN(0);
 
         filter_post(obd);
@@ -1462,48 +1351,37 @@ static int filter_cleanup(struct obd_device *obd, int force, int failover)
         obd->u.filter.fo_sb = 0;
 /*        destroy_buffers(obd->u.filter.fo_sb->s_dev);*/
 
-        kfree(obd->u.filter.fo_fstype);
         fsfilt_put_ops(obd->obd_fsops);
-
         lock_kernel();
 
         RETURN(0);
 }
 
-int filter_attach(struct obd_device *dev, obd_count len, void *data)
+int filter_attach(struct obd_device *obd, obd_count len, void *data)
 {
         struct lprocfs_static_vars lvars;
-        struct lprocfs_counters* cntrs;
         int rc;
 
         lprocfs_init_vars(&lvars);
-        rc = lprocfs_obd_attach(dev, lvars.obd_vars);
+        rc = lprocfs_obd_attach(obd, lvars.obd_vars);
         if (rc != 0)
                 return rc;
 
-        rc = lprocfs_alloc_obd_counters(dev, LPROC_FILTER_LAST);
+        rc = lprocfs_alloc_obd_stats(obd, LPROC_FILTER_LAST);
         if (rc != 0)
                 return rc;
 
-        /* Init obdfilter private counters here */
-        cntrs = dev->counters;
-        LPROCFS_COUNTER_INIT(&cntrs->cntr[LPROC_FILTER_READS],
-                             0, NULL, "read", "reqs");
-        LPROCFS_COUNTER_INIT(&cntrs->cntr[LPROC_FILTER_READ_BYTES],
-                             LPROCFS_CNTR_AVGMINMAX,
-                             NULL, "read_bytes", "bytes");
-        LPROCFS_COUNTER_INIT(&cntrs->cntr[LPROC_FILTER_WRITES],
-                             0, NULL, "write", "reqs");
-
-        LPROCFS_COUNTER_INIT(&cntrs->cntr[LPROC_FILTER_WRITE_BYTES],
-                             LPROCFS_CNTR_AVGMINMAX,
-                             NULL, "write_bytes", "bytes");
+        /* Init obdfilter private stats here */
+        lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_READ_BYTES,
+                             LPROCFS_CNTR_AVGMINMAX, "read_bytes", "bytes");
+        lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_WRITE_BYTES,
+                             LPROCFS_CNTR_AVGMINMAX, "write_bytes", "bytes");
         return rc;
 }
 
 int filter_detach(struct obd_device *dev)
 {
-        lprocfs_free_obd_counters(dev);
+        lprocfs_free_obd_stats(dev);
         return lprocfs_obd_detach(dev);
 }
 
@@ -1531,8 +1409,8 @@ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd,
         fed = &exp->exp_filter_data;
         class_export_put(exp);
 
-        INIT_LIST_HEAD(&exp->exp_filter_data.fed_open_head);
-        spin_lock_init(&exp->exp_filter_data.fed_lock);
+        INIT_LIST_HEAD(&fed->fed_open_head);
+        spin_lock_init(&fed->fed_lock);
 
         if (!obd->obd_replayable)
                 RETURN(0);
@@ -1635,47 +1513,53 @@ static void filter_from_inode(struct obdo *oa, struct inode *inode, int valid)
 }
 
 static struct dentry *__filter_oa2dentry(struct lustre_handle *conn,
-                                         struct obdo *oa, int locked,char *what)
+                                         struct obdo *oa, char *what)
 {
-        struct dentry *dentry = NULL;
+        struct dentry *dchild = NULL;
 
         if (oa->o_valid & OBD_MD_FLHANDLE) {
                 struct lustre_handle *ost_handle = obdo_handle(oa);
                 struct filter_file_data *ffd = filter_handle2ffd(ost_handle);
 
                 if (ffd != NULL) {
-                        dentry = dget(ffd->ffd_file->f_dentry);
+                        struct filter_dentry_data *fdd;
+                        dchild = dget(ffd->ffd_file->f_dentry);
+                        fdd = dchild->d_fsdata;
+                        LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
                         filter_ffd_put(ffd);
+
+                        CDEBUG(D_INODE,
+                               "got child objid %*s: %p, count = %d\n",
+                               dchild->d_name.len, dchild->d_name.name,
+                               dchild, atomic_read(&dchild->d_count));
                 }
         }
 
-        if (!dentry) {
+        if (!dchild) {
                 struct obd_device *obd = class_conn2obd(conn);
+
                 if (!obd) {
                         CERROR("invalid client cookie "LPX64"\n", conn->cookie);
                         RETURN(ERR_PTR(-EINVAL));
                 }
-                dentry = filter_fid2dentry(obd, filter_parent(obd, oa->o_mode,
-                                                              oa->o_id),
-                                           oa->o_id, locked);
+                dchild = filter_fid2dentry(obd, NULL, oa->o_mode, oa->o_id);
         }
 
-        if (IS_ERR(dentry)) {
+        if (IS_ERR(dchild)) {
                 CERROR("%s error looking up object: "LPU64"\n", what, oa->o_id);
-                RETURN(dentry);
+                RETURN(dchild);
         }
 
-        if (!dentry->d_inode) {
+        if (!dchild->d_inode) {
                 CERROR("%s on non-existent object: "LPU64"\n", what, oa->o_id);
-                f_dput(dentry);
+                f_dput(dchild);
                 RETURN(ERR_PTR(-ENOENT));
         }
 
-        return dentry;
+        return dchild;
 }
 
-#define filter_oa2dentry(conn, oa, locked) __filter_oa2dentry(conn, oa, locked,\
-                                                              __FUNCTION__)
+#define filter_oa2dentry(conn, oa) __filter_oa2dentry(conn, oa, __FUNCTION__)
 
 static int filter_getattr(struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md *md)
@@ -1684,9 +1568,7 @@ static int filter_getattr(struct lustre_handle *conn, struct obdo *oa,
         int rc = 0;
         ENTRY;
 
-        XPROCFS_BUMP_MYCPU_IOSTAT (st_getattr_reqs, 1);
-
-        dentry = filter_oa2dentry(conn, oa, 1);
+        dentry = filter_oa2dentry(conn, oa);
         if (IS_ERR(dentry))
                 RETURN(PTR_ERR(dentry));
 
@@ -1711,9 +1593,7 @@ static int filter_setattr(struct lustre_handle *conn, struct obdo *oa,
         int rc, rc2;
         ENTRY;
 
-        XPROCFS_BUMP_MYCPU_IOSTAT (st_setattr_reqs, 1);
-
-        dentry = filter_oa2dentry(conn, oa, 0);
+        dentry = filter_oa2dentry(conn, oa);
 
         if (IS_ERR(dentry))
                 GOTO(out_exp, rc = PTR_ERR(dentry));
@@ -1731,10 +1611,7 @@ static int filter_setattr(struct lustre_handle *conn, struct obdo *oa,
         if (IS_ERR(handle))
                 GOTO(out_unlock, rc = PTR_ERR(handle));
 
-        if (inode->i_op->setattr)
-                rc = inode->i_op->setattr(dentry, &iattr);
-        else
-                rc = inode_setattr(inode, &iattr);
+        rc = fsfilt_setattr(obd, dentry, handle, &iattr, 1);
         rc = filter_finish_transno(export, handle, oti, rc);
         rc2 = fsfilt_commit(obd, dentry->d_inode, handle, 0);
         if (rc2) {
@@ -1763,7 +1640,7 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa,
                        struct lov_stripe_md *ea, struct obd_trans_info *oti,
                        struct obd_client_handle *och)
 {
-        struct obd_export *export;
+        struct obd_export *export = NULL;
         struct lustre_handle *handle;
         struct filter_file_data *ffd;
         struct file *filp;
@@ -1778,8 +1655,6 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa,
                 GOTO(out, rc = -EINVAL);
         }
 
-        XPROCFS_BUMP_MYCPU_IOSTAT (st_open_reqs, 1);
-
         filp = filter_obj_open(export, oa->o_id, oa->o_mode,
                                LCK_PR, &parent_lockh);
         if (IS_ERR(filp))
@@ -1816,8 +1691,6 @@ static int filter_close(struct lustre_handle *conn, struct obdo *oa,
                 GOTO(out, rc = -EINVAL);
         }
 
-        XPROCFS_BUMP_MYCPU_IOSTAT (st_close_reqs, 1);
-
         if (!(oa->o_valid & OBD_MD_FLHANDLE)) {
                 CERROR("no handle for close of objid "LPU64"\n", oa->o_id);
                 GOTO(out, rc = -EINVAL);
@@ -1846,13 +1719,13 @@ static int filter_close(struct lustre_handle *conn, struct obdo *oa,
 static int filter_create(struct lustre_handle *conn, struct obdo *oa,
                          struct lov_stripe_md **ea, struct obd_trans_info *oti)
 {
-        struct obd_export *export;
+        struct obd_export *exp;
         struct obd_device *obd = class_conn2obd(conn);
         struct filter_obd *filter = &obd->u.filter;
         struct obd_run_ctxt saved;
-        struct dentry *dir_dentry;
         struct lustre_handle parent_lockh;
-        struct dentry *new = NULL;
+        struct dentry *dparent;
+        struct dentry *dchild = NULL;
         struct iattr;
         void *handle;
         int err, rc, cleanup_phase;
@@ -1863,53 +1736,49 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa,
                 RETURN(-EINVAL);
         }
 
-        export = class_conn2export(conn);
-        XPROCFS_BUMP_MYCPU_IOSTAT (st_create_reqs, 1);
-
-        oa->o_id = filter_next_id(obd);
+        exp = class_conn2export(conn);
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
  retry:
+        oa->o_id = filter_next_id(filter);
+
         cleanup_phase = 0;
-        dir_dentry = filter_parent_lock(obd, S_IFREG, oa->o_id, LCK_PW,
-                                        &parent_lockh);
-        if (IS_ERR(dir_dentry))
-                GOTO(cleanup, rc = PTR_ERR(dir_dentry));
+        dparent = filter_parent_lock(obd, S_IFREG, oa->o_id, LCK_PW,
+                                     &parent_lockh);
+        if (IS_ERR(dparent))
+                GOTO(cleanup, rc = PTR_ERR(dparent));
         cleanup_phase = 1;
 
-        new = filter_fid2dentry(obd, dir_dentry, oa->o_id, 0);
-        if (IS_ERR(new))
-                GOTO(cleanup, rc = PTR_ERR(new));
-        if (new->d_inode) {
-                char buf[32];
-
+        dchild = filter_fid2dentry(obd, dparent, S_IFREG, oa->o_id);
+        if (IS_ERR(dchild))
+                GOTO(cleanup, rc = PTR_ERR(dchild));
+        if (dchild->d_inode) {
                 /* This would only happen if lastobjid was bad on disk */
-                CERROR("Serious error: objid %s already exists; is this "
+                CERROR("Serious error: objid %*s already exists; is this "
                        "filesystem corrupt?  I will try to work around it.\n",
-                       filter_id(buf, filter, oa->o_id, oa->o_mode));
-                f_dput(new);
-                ldlm_lock_decref(&parent_lockh, LCK_PW);
-                oa->o_id = filter_next_id(obd);
+                       dchild->d_name.len, dchild->d_name.name);
+                f_dput(dchild);
+                filter_parent_unlock(dparent, &parent_lockh, LCK_PW);
                 goto retry;
         }
 
         cleanup_phase = 2;
-        handle = fsfilt_start(obd, dir_dentry->d_inode, FSFILT_OP_CREATE);
+        handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_CREATE);
         if (IS_ERR(handle))
                 GOTO(cleanup, rc = PTR_ERR(handle));
 
-        rc = vfs_create(dir_dentry->d_inode, new, oa->o_mode);
+        rc = vfs_create(dparent->d_inode, dchild, oa->o_mode);
         if (rc)
                 CERROR("create failed rc = %d\n", rc);
 
-        rc = filter_finish_transno(export, handle, oti, rc);
+        rc = filter_finish_transno(exp, handle, oti, rc);
         err = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd);
         if (err) {
                 CERROR("unable to write lastobjid but file created\n");
                 if (!rc)
                         rc = err;
         }
-        err = fsfilt_commit(obd, dir_dentry->d_inode, handle, 0);
+        err = fsfilt_commit(obd, dparent->d_inode, handle, 0);
         if (err) {
                 CERROR("error on commit, err = %d\n", err);
                 if (!rc)
@@ -1922,16 +1791,16 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa,
         /* Set flags for fields we have set in the inode struct */
         oa->o_valid = OBD_MD_FLID | OBD_MD_FLBLKSZ | OBD_MD_FLBLOCKS |
                  OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME;
-        filter_from_inode(oa, new->d_inode, oa->o_valid);
+        filter_from_inode(oa, dchild->d_inode, oa->o_valid);
 
         EXIT;
 cleanup:
         switch(cleanup_phase) {
         case 2:
-                f_dput(new);
+                f_dput(dchild);
         case 1: /* locked parent dentry */
                 if (rc || oti == NULL) {
-                        ldlm_lock_decref(&parent_lockh, LCK_PW);
+                        filter_parent_unlock(dparent, &parent_lockh, LCK_PW);
                 } else {
                         memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh,
                                sizeof(parent_lockh));
@@ -1939,7 +1808,7 @@ cleanup:
                 }
         case 0:
                 pop_ctxt(&saved, &filter->fo_ctxt, NULL);
-                class_export_put(export);
+                class_export_put(exp);
                 break;
         default:
                 CERROR("invalid cleanup_phase %d\n", cleanup_phase);
@@ -1952,10 +1821,10 @@ cleanup:
 static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md *ea, struct obd_trans_info *oti)
 {
-        struct obd_export *export;
+        struct obd_export *exp;
         struct obd_device *obd = class_conn2obd(conn);
         struct filter_obd *filter = &obd->u.filter;
-        struct dentry *dir_dentry, *object_dentry = NULL;
+        struct dentry *dparent, *dchild = NULL;
         struct filter_dentry_data *fdd;
         struct obd_run_ctxt saved;
         void *handle = NULL;
@@ -1968,30 +1837,35 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
                 RETURN(-EINVAL);
         }
 
-        export = class_conn2export(conn);
-        XPROCFS_BUMP_MYCPU_IOSTAT (st_destroy_reqs, 1);
+        exp = class_conn2export(conn);
 
         CDEBUG(D_INODE, "destroying objid "LPU64"\n", oa->o_id);
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
-        dir_dentry = filter_parent_lock(obd, oa->o_mode, oa->o_id,
-                                        LCK_PW, &parent_lockh);
-        if (IS_ERR(dir_dentry))
-                GOTO(cleanup, rc = PTR_ERR(dir_dentry));
+        dparent = filter_parent_lock(obd, oa->o_mode, oa->o_id,
+                                     LCK_PW, &parent_lockh);
+        if (IS_ERR(dparent))
+                GOTO(cleanup, rc = PTR_ERR(dparent));
         cleanup_phase = 1;
 
-        object_dentry = filter_oa2dentry(conn, oa, 0);
-        if (IS_ERR(object_dentry))
+        dchild = filter_fid2dentry(obd, dparent, S_IFREG, oa->o_id);
+        if (IS_ERR(dchild))
                 GOTO(cleanup, rc = -ENOENT);
         cleanup_phase = 2;
 
-        handle = fsfilt_start(obd, dir_dentry->d_inode, FSFILT_OP_UNLINK);
+        if (!dchild->d_inode) {
+                CERROR("destroying non-existent object "LPU64"\n", oa->o_id);
+                GOTO(cleanup, rc = -ENOENT);
+        }
+
+        handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK);
         if (IS_ERR(handle))
                 GOTO(cleanup, rc = PTR_ERR(handle));
         cleanup_phase = 3;
 
-        fdd = object_dentry->d_fsdata;
+        fdd = dchild->d_fsdata;
         if (fdd && atomic_read(&fdd->fdd_open_count)) {
+                LASSERT(fdd->fdd_magic = FILTER_DENTRY_MAGIC);
                 if (!(fdd->fdd_flags & FILTER_FLAG_DESTROY)) {
                         fdd->fdd_flags |= FILTER_FLAG_DESTROY;
                         /* XXX put into PENDING directory in case of crash */
@@ -2005,23 +1879,23 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
                 GOTO(cleanup, rc = 0);
         }
 
-        rc = filter_destroy_internal(obd, dir_dentry, object_dentry);
+        rc = filter_destroy_internal(obd, dparent, dchild);
 
 cleanup:
         switch(cleanup_phase) {
         case 3:
-                rc = filter_finish_transno(export, handle, oti, rc);
-                rc2 = fsfilt_commit(obd, dir_dentry->d_inode, handle, 0);
+                rc = filter_finish_transno(exp, handle, oti, rc);
+                rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0);
                 if (rc2) {
                         CERROR("error on commit, err = %d\n", rc2);
                         if (!rc)
                                 rc = rc2;
                 }
         case 2:
-                f_dput(object_dentry);
+                f_dput(dchild);
         case 1:
                 if (rc || oti == NULL) {
-                        ldlm_lock_decref(&parent_lockh, LCK_PW);
+                        filter_parent_unlock(dparent, &parent_lockh, LCK_PW);
                 } else {
                         memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh,
                                sizeof(parent_lockh));
@@ -2029,7 +1903,7 @@ cleanup:
                 }
         case 0:
                 pop_ctxt(&saved, &filter->fo_ctxt, NULL);
-                class_export_put(export);
+                class_export_put(exp);
                 break;
         default:
                 CERROR("invalid cleanup_phase %d\n", cleanup_phase);
@@ -2048,8 +1922,6 @@ static int filter_truncate(struct lustre_handle *conn, struct obdo *oa,
         int error;
         ENTRY;
 
-        XPROCFS_BUMP_MYCPU_IOSTAT (st_punch_reqs, 1);
-
         if (end != OBD_OBJECT_EOF)
                 CERROR("PUNCH not supported, only truncate: end = "LPX64"\n",
                        end);
@@ -2199,7 +2071,7 @@ static int lustre_commit_write(struct niobuf_local *lnb)
         LASSERT(to <= PAGE_SIZE);
         err = page->mapping->a_ops->commit_write(NULL, page, from, to);
         if (!err && IS_SYNC(inode))
-                waitfor_one_page(page);
+                err = waitfor_one_page(page);
         //SetPageUptodate(page); // the client commit_write will do this
 
         SetPageReferenced(page);
@@ -2225,15 +2097,12 @@ int filter_get_page_write(struct inode *inode, struct niobuf_local *lnb,
 
         /* This page is currently locked, so get a temporary page instead. */
         if (!page) {
-                unsigned long addr;
                 CDEBUG(D_ERROR,"ino %lu page %ld locked\n", inode->i_ino,index);
-                addr = __get_free_pages(GFP_KERNEL, 0); /* locked page */
-                if (!addr) {
+                page = alloc_pages(GFP_KERNEL, 0); /* locked page */
+                if (!page) {
                         CERROR("no memory for a temp page\n");
                         GOTO(err, rc = -ENOMEM);
                 }
-                POISON((void *)addr, 0xBA, PAGE_SIZE);
-                page = virt_to_page(addr);
                 page->index = index;
                 lnb->page = page;
                 lnb->flags |= N_LOCAL_TEMP_PAGE;
@@ -2305,7 +2174,7 @@ static int filter_commit_write(struct niobuf_local *lnb, int err)
         return lustre_commit_write(lnb);
 }
 
-static int filter_preprw(int cmd, struct obd_export *export,
+static int filter_preprw(int cmd, struct obd_export *exp,
                          int objcount, struct obd_ioobj *obj,
                          int niocount, struct niobuf_remote *nb,
                          struct niobuf_local *res, void **desc_private,
@@ -2319,28 +2188,16 @@ static int filter_preprw(int cmd, struct obd_export *export,
         struct fsfilt_objinfo *fso;
         struct dentry *dentry;
         struct inode *inode;
-        struct lprocfs_counters *cntrs;
-        int pglocked = 0, rc = 0, i, j;
-
+        int pglocked = 0, rc = 0, i, j, tot_bytes = 0;
+        unsigned long now = jiffies;
         ENTRY;
 
-        if ((cmd & OBD_BRW_WRITE) != 0)
-                XPROCFS_BUMP_MYCPU_IOSTAT (st_write_reqs, 1);
-        else
-                XPROCFS_BUMP_MYCPU_IOSTAT (st_read_reqs, 1);
-
         memset(res, 0, niocount * sizeof(*res));
 
-        obd = export->exp_obd;
+        obd = exp->exp_obd;
         if (obd == NULL)
                 RETURN(-EINVAL);
 
-        cntrs = obd->counters;
-        if ((cmd & OBD_BRW_WRITE) != 0)
-                LPROCFS_COUNTER_INCBY1(&cntrs->cntr[LPROC_FILTER_WRITES]);
-        else
-                LPROCFS_COUNTER_INCBY1(&cntrs->cntr[LPROC_FILTER_READS]);
-
         // theoretically we support multi-obj BRW RPCs, but until then...
         LASSERT(objcount == 1);
 
@@ -2355,9 +2212,7 @@ static int filter_preprw(int cmd, struct obd_export *export,
 
                 LASSERT(o->ioo_bufcnt);
 
-                dentry = filter_fid2dentry(obd, filter_parent(obd, S_IFREG,
-                                                              o->ioo_id),
-                                           o->ioo_id, 0);
+                dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id);
 
                 if (IS_ERR(dentry))
                         GOTO(out_objinfo, rc = PTR_ERR(dentry));
@@ -2368,6 +2223,7 @@ static int filter_preprw(int cmd, struct obd_export *export,
                 if (!dentry->d_inode) {
                         CERROR("trying to BRW to non-existent file "LPU64"\n",
                                o->ioo_id);
+                        f_dput(dentry);
                         GOTO(out_objinfo, rc = -ENOENT);
                 }
 
@@ -2394,6 +2250,9 @@ static int filter_preprw(int cmd, struct obd_export *export,
                                o->ioo_id);
         }
 
+        if (time_after(jiffies, now + 15*HZ))
+                CERROR("slow prep setup %lus\n", (jiffies - now) / HZ);
+
         if (cmd & OBD_BRW_WRITE) {
                 *desc_private = fsfilt_brw_start(obd, objcount, fso,
                                                  niocount, nb);
@@ -2419,36 +2278,34 @@ static int filter_preprw(int cmd, struct obd_export *export,
                         lnb->offset = rnb->offset;
                         lnb->len    = rnb->len;
                         lnb->flags  = rnb->flags;
+                        lnb->start  = jiffies;
 
                         if (cmd & OBD_BRW_WRITE) {
                                 rc = filter_get_page_write(inode,lnb,&pglocked);
-
-                                XPROCFS_BUMP_MYCPU_IOSTAT(st_write_bytes,
-                                                          lnb->len);
-                                LPROCFS_COUNTER_INCR(&cntrs->cntr[LPROC_FILTER_WRITE_BYTES], lnb->len);
+                                if (rc)
+                                        up(&dentry->d_inode->i_sem);
                         } else if (inode->i_size <= rnb->offset) {
                                 /* If there's no more data, abort early.
                                  * lnb->page == NULL and lnb->rc == 0, so it's
                                  * easy to detect later. */
-                                f_dput(lnb->dentry);
+                                f_dput(dentry);
                                 lnb->dentry = NULL;
                                 break;
                         } else {
                                 rc = filter_start_page_read(inode, lnb);
-
-                                XPROCFS_BUMP_MYCPU_IOSTAT(st_read_bytes,
-                                                          lnb->len);
-                                LPROCFS_COUNTER_INCR(&cntrs->cntr[LPROC_FILTER_READ_BYTES], lnb->len);
                         }
 
                         if (rc) {
                                 CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
-                                       "error on page @"LPU64"%u/%u: rc = %d\n",
-                                       lnb->offset, j, o->ioo_bufcnt, rc);
+                                       "page err %u@"LPU64" %u/%u %p: rc %d\n",
+                                       lnb->len, lnb->offset, j, o->ioo_bufcnt,
+                                       dentry, rc);
                                 f_dput(dentry);
                                 GOTO(out_pages, rc);
                         }
 
+                        tot_bytes += lnb->len;
+
                         if ((cmd & OBD_BRW_READ) && lnb->rc < lnb->len) {
                                 /* Likewise with a partial read */
                                 break;
@@ -2456,15 +2313,29 @@ static int filter_preprw(int cmd, struct obd_export *export,
                 }
         }
 
-        while ((cmd & OBD_BRW_READ) && lnb-- > res) {
-                rc = filter_finish_page_read(lnb);
-                if (rc) {
-                        CERROR("error on page %u@"LPU64": rc = %d\n",
-                               lnb->len, lnb->offset, rc);
-                        f_dput(lnb->dentry);
-                        GOTO(out_pages, rc);
+        if (time_after(jiffies, now + 15*HZ))
+                CERROR("slow prep get page %lus\n", (jiffies - now) / HZ);
+
+        if (cmd & OBD_BRW_READ) {
+                lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES,
+                                    tot_bytes);
+                while (lnb-- > res) {
+                        rc = filter_finish_page_read(lnb);
+                        if (rc) {
+                                CERROR("error page %u@"LPU64" %u %p: rc %d\n",
+                                       lnb->len, lnb->offset, lnb - res,
+                                       lnb->dentry, rc);
+                                f_dput(lnb->dentry);
+                                GOTO(out_pages, rc);
+                        }
                 }
-        }
+        } else
+                lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_WRITE_BYTES,
+                                    tot_bytes);
+
+        if (time_after(jiffies, now + 15*HZ))
+                CERROR("slow prep finish page %lus\n", (jiffies - now) / HZ);
+
         EXIT;
 out:
         OBD_FREE(fso, objcount * sizeof(*fso));
@@ -2483,7 +2354,7 @@ out_pages:
                 f_dput(lnb->dentry);
         }
         if (cmd & OBD_BRW_WRITE) {
-                filter_finish_transno(export, *desc_private, oti, rc);
+                filter_finish_transno(exp, *desc_private, oti, rc);
                 fsfilt_commit(obd,
                               filter_parent(obd,S_IFREG,obj->ioo_id)->d_inode,
                               *desc_private, 0);
@@ -2553,12 +2424,10 @@ static int filter_syncfs(struct obd_export *exp)
         struct obd_device *obd = exp->exp_obd;
         ENTRY;
 
-        XPROCFS_BUMP_MYCPU_IOSTAT (st_syncfs_reqs, 1);
-
         RETURN(fsfilt_sync(obd, obd->u.filter.fo_sb));
 }
 
-static int filter_commitrw(int cmd, struct obd_export *export,
+static int filter_commitrw(int cmd, struct obd_export *exp,
                            int objcount, struct obd_ioobj *obj,
                            int niocount, struct niobuf_local *res,
                            void *desc_private, struct obd_trans_info *oti)
@@ -2566,8 +2435,9 @@ static int filter_commitrw(int cmd, struct obd_export *export,
         struct obd_run_ctxt saved;
         struct obd_ioobj *o;
         struct niobuf_local *lnb;
-        struct obd_device *obd = export->exp_obd;
+        struct obd_device *obd = exp->exp_obd;
         int found_locked = 0, rc = 0, i;
+        unsigned long now = jiffies;  /* DEBUGGING OST TIMEOUTS */
         ENTRY;
 
         push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
@@ -2586,11 +2456,16 @@ static int filter_commitrw(int cmd, struct obd_export *export,
                         if (lnb->page == NULL) {
                                 continue;
                         }
+
                         if (lnb->flags & N_LOCAL_TEMP_PAGE) {
                                 found_locked++;
                                 continue;
                         }
 
+                        if (time_after(jiffies, lnb->start + 15*HZ))
+                                CERROR("slow commitrw %lus\n",
+                                       (jiffies - lnb->start) / HZ);
+
                         if (cmd & OBD_BRW_WRITE) {
                                 int err = filter_commit_write(lnb, 0);
 
@@ -2601,6 +2476,9 @@ static int filter_commitrw(int cmd, struct obd_export *export,
                         }
 
                         f_dput(lnb->dentry);
+                        if (time_after(jiffies, lnb->start + 15*HZ))
+                                CERROR("slow commit_write %lus\n",
+                                       (jiffies - lnb->start) / HZ);
                 }
         }
 
@@