Whamcloud - gitweb
land b_md onto HEAD. the highlights:
authorpschwan <pschwan>
Sat, 14 Dec 2002 19:40:17 +0000 (19:40 +0000)
committerpschwan <pschwan>
Sat, 14 Dec 2002 19:40:17 +0000 (19:40 +0000)
 - fstat didn't correctly synchronize attributes (399)
 - server must handle lock cancellation during blocking AST prep (487)
 - bulk descriptors were free()d too soon (511)
 - fix paths in lconf, which would load incorrect modules (451, 507)
 - fix confusing lconf 'host not found' error message (386)

53 files changed:
lustre/ChangeLog
lustre/configure.in
lustre/extN/htree-ext3-2.4.18.diff
lustre/include/linux/lustre_export.h
lustre/include/linux/lustre_fsfilt.h [new file with mode: 0644]
lustre/include/linux/lustre_idl.h
lustre/include/linux/lustre_lib.h
lustre/include/linux/lustre_mds.h
lustre/include/linux/lustre_net.h
lustre/include/linux/obd.h
lustre/include/linux/obd_support.h
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_resource.c
lustre/lib/simple.c
lustre/llite/file.c
lustre/llite/namei.c
lustre/llite/rw.c
lustre/lov/lov_pack.c
lustre/mdc/mdc_request.c
lustre/mds/Makefile.am
lustre/mds/handler.c
lustre/mds/lproc_mds.c
lustre/mds/mds_ext2.c [deleted file]
lustre/mds/mds_extN.c [deleted file]
lustre/mds/mds_fs.c
lustre/mds/mds_reint.c
lustre/obdclass/Makefile.am
lustre/obdclass/class_obd.c
lustre/obdclass/fsfilt.c [new file with mode: 0644]
lustre/obdclass/fsfilt_ext3.c [moved from lustre/mds/mds_ext3.c with 55% similarity]
lustre/obdclass/fsfilt_extN.c [new file with mode: 0644]
lustre/obdclass/genops.c
lustre/obdecho/echo_client.c
lustre/obdfilter/filter.c
lustre/ost/ost_handler.c
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c
lustre/ptlrpc/niobuf.c
lustre/scripts/lustre.spec.in
lustre/tests/.cvsignore
lustre/tests/Makefile.am
lustre/tests/checkstat.c [new file with mode: 0644]
lustre/tests/common.sh
lustre/tests/fsx.c
lustre/tests/llmodules.sh
lustre/tests/runslabinfo
lustre/tests/sanity.sh
lustre/tests/toexcl.c
lustre/utils/.cvsignore
lustre/utils/lconf.in [moved from lustre/utils/lconf with 96% similarity]
lustre/utils/llanalyze
lustre/utils/obd.c

index bbf73c2..8495e29 100644 (file)
@@ -9,6 +9,11 @@ TBA
         - stop dereferencing request after dropping refcount (457)
         - don't LASSERT(spin_is_locked) on non-SMP (455)
         - fixes for many rename() bugs
+        - fstat didn't correctly synchronize attributes (399)
+        - server must handle lock cancellation during blocking AST prep (487)
+        - bulk descriptors were free()d too soon (511)
+        - fix paths in lconf, which would load incorrect modules (451, 507)
+        - fix confusing lconf 'host not found' error message (386)
 
 2002-12-02  Andreas Dilger  <adilger@clusterfs.com>
        * version v0_5_18
index c40124e..47c3d35 100644 (file)
@@ -62,7 +62,15 @@ portalsdir_def='$(top_srcdir)/../portals'
 AC_ARG_WITH(portals, [  --with-portals=[path] set path to Portals source (default=../portals)], enable_portalsdir=$withval)
 AC_ARG_ENABLE(portalsdir, [  --enable-portalsdir=[path] (deprecated) set path to Portals source (default=$(top_srcdir)/../portals)],, enable_portalsdir=$portalsdir_def)
 PORTALS=$enable_portalsdir
+
+if test $PORTALS = $portalsdir_def; then
+       PORTALSLOC='../portals'
+else
+       PORTALSLOC=$PORTALS
+fi
+
 AC_SUBST(PORTALS)
+AC_SUBST(PORTALSLOC)
 
 portalslib_def=$enable_portalsdir/linux/utils
 AC_ARG_WITH(portalslib, [  --with-portalslib=[path] set path to Portals library (default=../portals/linux/utils)], enable_portalslib=$withval)
@@ -120,6 +128,6 @@ AC_SUBST(demodir)
 
 AC_OUTPUT(Makefile lib/Makefile ldlm/Makefile obdecho/Makefile ptlrpc/Makefile \
        lov/Makefile osc/Makefile mdc/Makefile mds/Makefile ost/Makefile \
-       utils/Makefile tests/Makefile obdfilter/Makefile obdclass/Makefile \
-       llite/Makefile doc/Makefile scripts/Makefile \
-       scripts/lustre.spec extN/Makefile)
+       utils/Makefile utils/lconf tests/Makefile obdfilter/Makefile \
+        obdclass/Makefile llite/Makefile doc/Makefile scripts/Makefile \
+       scripts/lustre.spec extN/Makefile, chmod +x utils/lconf)
index de8bc8a..9eba30c 100644 (file)
@@ -48,7 +48,7 @@
  
  /*
   * define how far ahead to read directories while searching them.
-@@ -38,6 +42,433 @@
+@@ -38,6 +42,437 @@
  #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
  #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
  
@@ -81,7 +81,6 @@
 +
 +#define dxtrace_on(command) command
 +#define dxtrace_off(command)
-+#define dxtrace dxtrace_off
 +
 +struct fake_dirent
 +{
 +/*
 + * Debug
 + */
++#ifdef DX_DEBUG
++#define dxtrace dxtrace_on
 +static void dx_show_index (char * label, struct dx_entry *entries)
 +{
 +      int i, n = dx_get_count (entries);
 +                      names, space/bcount,(space/bcount)*100/blocksize);
 +      return (struct stats) { names, space, bcount};
 +}
++#else
++#define dxtrace dxtrace_off
++#endif
 +
 +/*
 + * Probe for a directory leaf block to search
index dc2c0b5..ba9555c 100644 (file)
@@ -14,6 +14,7 @@
 
 #include <linux/lustre_idl.h>
 #include <linux/lustre_dlm.h>
+#include <linux/lustre_mds.h>
 #include <linux/obd_filter.h>
 
 struct lov_export_data {
diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h
new file mode 100644 (file)
index 0000000..eeae647
--- /dev/null
@@ -0,0 +1,151 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <info@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Filesystem interface helper.
+ *
+ */
+
+#ifndef _LUSTRE_FSFILT_H
+#define _LUSTRE_FSFILT_H
+
+#ifdef __KERNEL__
+
+#include <linux/obd.h>
+#include <linux/fs.h>
+
+typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, int error);
+
+struct fsfilt_objinfo {
+        struct dentry *fso_dentry;
+        int fso_bufcnt;
+};
+
+struct fsfilt_operations {
+        struct list_head fs_list;
+        struct module *fs_owner;
+        char   *fs_type;
+        void   *(* fs_start)(struct inode *inode, int op);
+        void   *(* fs_brw_start)(int objcount, struct fsfilt_objinfo *fso,
+                                 int niocount, struct niobuf_remote *nb);
+        int     (* fs_commit)(struct inode *inode, void *handle);
+        int     (* fs_setattr)(struct dentry *dentry, void *handle,
+                               struct iattr *iattr);
+        int     (* fs_set_md)(struct inode *inode, void *handle, void *md,
+                              int size);
+        int     (* fs_get_md)(struct inode *inode, void *md, int size);
+        ssize_t (* fs_readpage)(struct file *file, char *buf, size_t count,
+                                loff_t *offset);
+        int     (* fs_journal_data)(struct file *file);
+        int     (* fs_set_last_rcvd)(struct obd_device *obd, __u64 last_rcvd,
+                                     void *handle, fsfilt_cb_t cb_func);
+        int     (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs);
+};
+
+extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops);
+extern void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops);
+extern struct fsfilt_operations *fsfilt_get_ops(char *type);
+extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops);
+
+#define FSFILT_OP_UNLINK         1
+#define FSFILT_OP_RMDIR          2
+#define FSFILT_OP_RENAME         3
+#define FSFILT_OP_CREATE         4
+#define FSFILT_OP_MKDIR          5
+#define FSFILT_OP_SYMLINK        6
+#define FSFILT_OP_MKNOD          7
+#define FSFILT_OP_SETATTR        8
+#define FSFILT_OP_LINK           9
+
+static inline void *fsfilt_start(struct obd_device *obd,
+                                 struct inode *inode, int op)
+{
+        return obd->obd_fsops->fs_start(inode, op);
+}
+
+static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount,
+                                     struct fsfilt_objinfo *fso, int niocount,
+                                     struct niobuf_remote *nb)
+{
+        return obd->obd_fsops->fs_brw_start(objcount, fso, niocount, nb);
+}
+
+static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode,
+                                void *handle)
+{
+        return obd->obd_fsops->fs_commit(inode, handle);
+}
+
+static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry,
+                                 void *handle, struct iattr *iattr)
+{
+        int rc;
+        /*
+         * NOTE: we probably don't need to take i_sem here when changing
+         *       ATTR_SIZE because the MDS never needs to truncate a file.
+         *       The ext2/ext3 code never truncates a directory, and files
+         *       stored on the MDS are entirely sparse (no data blocks).
+         *       If we do need to get it, we can do it here.
+         */
+        lock_kernel();
+        rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr);
+        unlock_kernel();
+
+        return rc;
+}
+
+static inline int fsfilt_set_md(struct obd_device *obd, struct inode *inode,
+                                void *handle, void *md, int size)
+{
+        return obd->obd_fsops->fs_set_md(inode, handle, md, size);
+}
+
+static inline int fsfilt_get_md(struct obd_device *obd, struct inode *inode,
+                                void *md, int size)
+{
+        return obd->obd_fsops->fs_get_md(inode, md, size);
+}
+
+static inline ssize_t fsfilt_readpage(struct obd_device *obd,
+                                      struct file *file, char *buf,
+                                      size_t count, loff_t *offset)
+{
+        return obd->obd_fsops->fs_readpage(file, buf, count, offset);
+}
+
+static inline int fsfilt_journal_data(struct obd_device *obd, struct file *file)
+{
+        return obd->obd_fsops->fs_journal_data(file);
+}
+
+static inline int fsfilt_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
+                                       void *handle, fsfilt_cb_t cb_func)
+{
+        return obd->obd_fsops->fs_set_last_rcvd(obd, last_rcvd,handle,cb_func);
+}
+
+static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *fs,
+                                struct obd_statfs *osfs)
+{
+        return obd->obd_fsops->fs_statfs(fs, osfs);
+}
+
+#endif /* __KERNEL__ */
+
+#endif
index ea75f08..8d6536f 100644 (file)
@@ -284,7 +284,6 @@ struct obd_statfs {
 struct obd_ioobj {
         obd_id               ioo_id;
         obd_gr               ioo_gr;
-        /* struct lustre_handle ioo_handle; XXX in the future */
         __u32                ioo_type;
         __u32                ioo_bufcnt;
 };
index da5cc81..0372504 100644 (file)
 #else
 # include <asm/semaphore.h>
 #endif
-#include <linux/kp30.h> /* XXX just for LASSERT! */
+#include <linux/types.h>
 #include <linux/portals_lib.h>
+#include <linux/kp30.h> /* XXX just for LASSERT! */
 #include <linux/lustre_idl.h>
 
+#ifndef LPU64
+#if BITS_PER_LONG > 32
+#define LPU64 "%lu"
+#define LPD64 "%ld"
+#define LPX64 "%#lx"
+#else
+#define LPU64 "%Lu"
+#define LPD64 "%Ld"
+#define LPX64 "%#Lx"
+#endif
+#endif
+
 #ifdef __KERNEL__
 /* l_net.c */
 struct ptlrpc_request;
index 0260ac8..558c10b 100644 (file)
@@ -153,6 +153,10 @@ int mds_reint(struct ptlrpc_request *req, int offset);
 int mds_pack_md(struct mds_obd *mds, struct ptlrpc_request *req,
                 int offset, struct mds_body *body, struct inode *inode);
 
+/* mds/mds_fs.c */
+int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt);
+void mds_fs_cleanup(struct obd_device *obddev);
+
 /* mdc/mdc_request.c */
 int mdc_enqueue(struct lustre_handle *conn, int lock_type,
                 struct lookup_intent *it, int lock_mode, struct inode *dir,
@@ -198,41 +202,6 @@ int mds_client_add(struct mds_obd *mds, struct mds_export_data *med,
                    int cl_off);
 int mds_client_free(struct obd_export *exp);
 
-/* mds/mds_fs.c */
-struct mds_fs_operations {
-        struct module *fs_owner;
-        void   *(* fs_start)(struct inode *inode, int op);
-        int     (* fs_commit)(struct inode *inode, void *handle);
-        int     (* fs_setattr)(struct dentry *dentry, void *handle,
-                               struct iattr *iattr);
-        int     (* fs_set_md)(struct inode *inode, void *handle,
-                              struct lov_mds_md *md, int size);
-        int     (* fs_get_md)(struct inode *inode, struct lov_mds_md *md,
-                              int size);
-        ssize_t (* fs_readpage)(struct file *file, char *buf, size_t count,
-                                loff_t *offset);
-        void    (* fs_delete_inode)(struct inode *inode);
-        void    (* cl_delete_inode)(struct inode *inode);
-        int     (* fs_journal_data)(struct file *file);
-        int     (* fs_set_last_rcvd)(struct mds_obd *mds, void *handle);
-        int     (* fs_statfs)(struct super_block *sb, struct statfs *sfs);
-};
-
-extern int mds_register_fs_type(struct mds_fs_operations *op, const char *name);
-extern void mds_unregister_fs_type(const char *name);
-extern int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt);
-extern void mds_fs_cleanup(struct obd_device *obddev);
-
-#define MDS_FSOP_UNLINK         1
-#define MDS_FSOP_RMDIR          2
-#define MDS_FSOP_RENAME         3
-#define MDS_FSOP_CREATE         4
-#define MDS_FSOP_MKDIR          5
-#define MDS_FSOP_SYMLINK        6
-#define MDS_FSOP_MKNOD          7
-#define MDS_FSOP_SETATTR        8
-#define MDS_FSOP_LINK           9
-
 #endif /* __KERNEL__ */
 
 /* ioctls for trying requests */
index e539d8e..fb060d0 100644 (file)
@@ -344,12 +344,11 @@ void *lustre_msg_buf(struct lustre_msg *m, int n);
 
 static inline void ptlrpc_bulk_decref(struct ptlrpc_bulk_desc *desc)
 {
+        CDEBUG(D_PAGE, "%p -> %d\n", desc, atomic_read(&desc->bd_refcount) - 1);
+
         if (atomic_dec_and_test(&desc->bd_refcount)) {
                 CDEBUG(D_PAGE, "Released last ref on %p, freeing\n", desc);
                 ptlrpc_free_bulk(desc);
-        } else {
-                CDEBUG(D_PAGE, "%p -> %d\n", desc,
-                       atomic_read(&desc->bd_refcount));
         }
 }
 
index cb72b5b..9612846 100644 (file)
@@ -123,15 +123,12 @@ struct client_obd {
 struct mds_obd {
         struct ptlrpc_service           *mds_service;
 
-        char                            *mds_fstype;
         struct super_block              *mds_sb;
-        struct super_operations         *mds_sop;
         struct vfsmount                 *mds_vfsmnt;
         struct obd_run_ctxt              mds_ctxt;
         struct file_operations          *mds_fop;
         struct inode_operations         *mds_iop;
         struct address_space_operations *mds_aops;
-        struct mds_fs_operations        *mds_fsops;
 
         int                              mds_max_mdsize;
         struct file                     *mds_rcvd_filp;
@@ -250,6 +247,7 @@ struct obd_device {
         struct ptlrpc_client   obd_ldlm_client; /* XXX OST/MDS only */
         /* a spinlock is OK for what we do now, may need a semaphore later */
         spinlock_t obd_dev_lock;
+        struct fsfilt_operations *obd_fsops;
         union {
                 struct ext2_obd ext2;
                 struct filter_obd filter;
@@ -257,7 +255,6 @@ struct obd_device {
                 struct client_obd cli;
                 struct ost_obd ost;
                 struct echo_client_obd echo_client;;
-                //                struct osc_obd osc;
                 struct ldlm_obd ldlm;
                 struct echo_obd echo;
                 struct recovd_obd recovd;
@@ -269,7 +266,7 @@ struct obd_device {
         } u;
        /* Fields used by LProcFS */
         unsigned int cntr_mem_size;
-        voidcounters;
+        void *counters;
 };
 
 struct obd_ops {
@@ -342,79 +339,6 @@ struct obd_ops {
                         __u32 mode, struct lustre_handle *);
         int (*o_cancel_unused)(struct lustre_handle *, struct lov_stripe_md *,
                                int local_only);
-        
 };
-
-static inline void *mds_fs_start(struct mds_obd *mds, struct inode *inode,
-                                 int op)
-{
-        return mds->mds_fsops->fs_start(inode, op);
-}
-
-static inline int mds_fs_commit(struct mds_obd *mds, struct inode *inode,
-                                void *handle)
-{
-        return mds->mds_fsops->fs_commit(inode, handle);
-}
-
-static inline int mds_fs_setattr(struct mds_obd *mds, struct dentry *dentry,
-                                 void *handle, struct iattr *iattr)
-{
-        int rc;
-        /*
-         * NOTE: we probably don't need to take i_sem here when changing
-         *       ATTR_SIZE because the MDS never needs to truncate a file.
-         *       The ext2/ext3 code never truncates a directory, and files
-         *       stored on the MDS are entirely sparse (no data blocks).
-         *       If we do need to get it, we can do it here.
-         */
-        lock_kernel();
-        rc = mds->mds_fsops->fs_setattr(dentry, handle, iattr);
-        unlock_kernel();
-
-        return rc;
-}
-
-static inline int mds_fs_set_md(struct mds_obd *mds, struct inode *inode,
-                                void *handle, struct lov_mds_md *md,
-                                int size)
-{
-        return mds->mds_fsops->fs_set_md(inode, handle, md, size);
-}
-
-static inline int mds_fs_get_md(struct mds_obd *mds, struct inode *inode,
-                                struct lov_mds_md *md, int size)
-{
-        return mds->mds_fsops->fs_get_md(inode, md, size);
-}
-
-static inline ssize_t mds_fs_readpage(struct mds_obd *mds, struct file *file,
-                                      char *buf, size_t count, loff_t *offset)
-{
-        return mds->mds_fsops->fs_readpage(file, buf, count, offset);
-}
-
-/* Set up callback to update mds->mds_last_committed with the current
- * value of mds->mds_last_recieved when this transaction is on disk.
- */
-static inline int mds_fs_set_last_rcvd(struct mds_obd *mds, void *handle)
-{
-        return mds->mds_fsops->fs_set_last_rcvd(mds, handle);
-}
-
-/* Enable data journaling on the given file */
-static inline ssize_t mds_fs_journal_data(struct mds_obd *mds,
-                                          struct file *file)
-{
-        return mds->mds_fsops->fs_journal_data(file);
-}
-
-static inline int mds_fs_statfs(struct mds_obd *mds, struct statfs *sfs)
-{
-        if (mds->mds_fsops->fs_statfs)
-                return mds->mds_fsops->fs_statfs(mds->mds_sb, sfs);
-
-        return vfs_statfs(mds->mds_sb, sfs);
-}
 #endif /* __KERNEL */
 #endif /* __OBD_H */
index b95b266..f6c2770 100644 (file)
@@ -30,7 +30,7 @@
 #include <linux/kp30.h>
 
 /* global variables */
-extern unsigned long obd_memory;
+extern atomic_t obd_memory;
 extern unsigned long obd_fail_loc;
 extern unsigned long obd_timeout;
 extern char obd_recovery_upcall[128];
@@ -153,23 +153,23 @@ static inline void OBD_FAIL_WRITE(int id, kdev_t dev)
 #define OBD_ALLOC(ptr, size)                                            \
 do {                                                                    \
         void *lptr;                                                     \
-        long s = (size);                                                \
+        int s = (size);                                                 \
         (ptr) = lptr = kmalloc(s, GFP_KERNEL);                          \
         if (lptr == NULL) {                                             \
                 CERROR("kmalloc of '" #ptr "' (%ld bytes) failed "      \
                        "at %s:%d\n", s, __FILE__, __LINE__);            \
         } else {                                                        \
                 memset(lptr, 0, s);                                     \
-                obd_memory += s;                                        \
-                CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at "        \
-                       "%p (tot %ld).\n", s, lptr, obd_memory);         \
+                atomic_add(s, &obd_memory);                             \
+                CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p "      \
+                       "(tot %d)\n", s, lptr, atomic_read(&obd_memory));\
         }                                                               \
 } while (0)
 
 #ifdef CONFIG_DEBUG_SLAB
 #define POISON(lptr, s) do {} while (0)
 #else
-#define POISON(lptr, s) memset(lptr, 0xb6, s)
+#define POISON(lptr, s) memset(lptr, 0x5a, s)
 #endif
 
 #define OBD_FREE(ptr, size)                                             \
@@ -179,9 +179,9 @@ do {                                                                    \
         LASSERT(lptr);                                                  \
         POISON(lptr, s);                                                \
         kfree(lptr);                                                    \
-        obd_memory -= s;                                                \
-        CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %ld).\n",    \
-               s, lptr, obd_memory);                                    \
+        atomic_sub(s, &obd_memory);                                     \
+        CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n",     \
+               s, lptr, atomic_read(&obd_memory));                      \
         (ptr) = (void *)0xdeadbeef;                                     \
 } while (0)
 
index 83a6661..9742eec 100644 (file)
@@ -175,8 +175,6 @@ void ldlm_lock_put(struct ldlm_lock *lock)
 
                 lock->l_resource = NULL;
                 lock->l_random = DEAD_HANDLE_MAGIC;
-                if (lock->l_export && lock->l_export->exp_connection)
-                        ptlrpc_put_connection(lock->l_export->exp_connection);
                 memset(lock, 0x5a, sizeof(*lock));
                 kmem_cache_free(ldlm_lock_slab, lock);
                 CDEBUG(D_MALLOC, "kfreed 'lock': %d at %p (tot 0).\n",
index bd2dd09..cbf67d0 100644 (file)
@@ -142,6 +142,15 @@ static int ldlm_server_blocking_ast(struct ldlm_lock *lock,
                 RETURN(0);
         }
 
+        LASSERT(lock);
+
+        l_lock(&lock->l_resource->lr_namespace->ns_lock);
+        if (lock->l_destroyed) {
+                /* What's the point? */
+                l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+                RETURN(0);
+        }
+
         req = ptlrpc_prep_req(&lock->l_export->exp_ldlm_data.led_import,
                               LDLM_BL_CALLBACK, 1, &size, NULL);
         if (!req)
@@ -156,6 +165,8 @@ static int ldlm_server_blocking_ast(struct ldlm_lock *lock,
         req->rq_replen = 0; /* no reply needed */
 
         ldlm_add_waiting_lock(lock);
+        l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+
         (void)ptl_send_rpc(req);
 
         /* not waiting for reply */
@@ -247,6 +258,13 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req)
                sizeof(lock->l_remote_handle));
         LDLM_DEBUG(lock, "server-side enqueue handler, new lock created");
 
+        LASSERT(req->rq_export);
+        lock->l_export = req->rq_export;
+        l_lock(&lock->l_resource->lr_namespace->ns_lock);
+        list_add(&lock->l_export_chain,
+                 &lock->l_export->exp_ldlm_data.led_held_locks);
+        l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+
         err = ldlm_lock_enqueue(lock, cookie, cookielen, &flags,
                                 ldlm_server_completion_ast,
                                 ldlm_server_blocking_ast);
@@ -266,14 +284,6 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req)
                 dlm_rep->lock_mode = lock->l_req_mode;
         }
 
-        lock->l_export = req->rq_export;
-        if (lock->l_export) {
-                l_lock(&lock->l_resource->lr_namespace->ns_lock);
-                list_add(&lock->l_export_chain,
-                         &lock->l_export->exp_ldlm_data.led_held_locks);
-                l_unlock(&lock->l_resource->lr_namespace->ns_lock);
-        }
-
         EXIT;
  out:
         if (lock)
@@ -481,11 +491,16 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
         }
 
         if (req->rq_export == NULL) {
+                struct ldlm_request *dlm_req;
+
                 CERROR("operation %d with bad export (ptl req %d/rep %d)\n",
                        req->rq_reqmsg->opc, req->rq_request_portal,
                        req->rq_reply_portal);
                 CERROR("--> export addr: "LPX64", cookie: "LPX64"\n",
                        req->rq_reqmsg->addr, req->rq_reqmsg->cookie);
+                dlm_req = lustre_msg_buf(req->rq_reqmsg, 0);
+                CERROR("--> lock addr: "LPX64", cookie: "LPX64"\n",
+                       dlm_req->lock_handle1.addr,dlm_req->lock_handle1.cookie);
                 CERROR("--> ignoring this error as a temporary workaround!  "
                        "beware!\n");
                 //RETURN(-ENOTCONN);
@@ -554,7 +569,7 @@ static int ldlm_cancel_handler(struct ptlrpc_request *req)
 }
 
 
-static int ldlm_iocontrol(long cmd, struct lustre_handle *conn, int len,
+static int ldlm_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                           void *karg, void *uarg)
 {
         struct obd_device *obddev = class_conn2obd(conn);
index 64ec591..8bb5aa3 100644 (file)
@@ -68,23 +68,19 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
         ENTRY;
 
         OBD_ALLOC(ns, sizeof(*ns));
-        if (!ns) {
-                LBUG();
-                GOTO(out, NULL);
-        }
+        if (!ns)
+                RETURN(NULL);
 
         ns->ns_hash = vmalloc(sizeof(*ns->ns_hash) * RES_HASH_SIZE);
-        if (!ns->ns_hash) {
-                LBUG();
-                GOTO(out, ns);
-        }
-        obd_memory += sizeof(*ns->ns_hash) * RES_HASH_SIZE;
+        if (!ns->ns_hash)
+                GOTO(out_ns, NULL);
+
+        atomic_add(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
 
         OBD_ALLOC(ns->ns_name, strlen(name) + 1);
-        if (!ns->ns_name) {
-                LBUG();
-                GOTO(out, ns);
-        }
+        if (!ns->ns_name)
+                GOTO(out_hash, NULL);
+
         strcpy(ns->ns_name, name);
 
         INIT_LIST_HEAD(&ns->ns_root_list);
@@ -109,16 +105,12 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
         ldlm_proc_namespace(ns);
         RETURN(ns);
 
- out:
-        if (ns && ns->ns_hash) {
-                memset(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
-                vfree(ns->ns_hash);
-                obd_memory -= sizeof(*ns->ns_hash) * RES_HASH_SIZE;
-        }
-        if (ns && ns->ns_name)
-                OBD_FREE(ns->ns_name, strlen(name) + 1);
-        if (ns)
-                OBD_FREE(ns, sizeof(*ns));
+out_hash:
+        memset(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
+        vfree(ns->ns_hash);
+        atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
+out_ns:
+        OBD_FREE(ns, sizeof(*ns));
         return NULL;
 }
 
@@ -212,7 +204,7 @@ int ldlm_namespace_free(struct ldlm_namespace *ns)
 
         memset(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
         vfree(ns->ns_hash /* , sizeof(*ns->ns_hash) * RES_HASH_SIZE */);
-        obd_memory -= sizeof(*ns->ns_hash) * RES_HASH_SIZE;
+        atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
         OBD_FREE(ns->ns_name, strlen(ns->ns_name) + 1);
         OBD_FREE(ns, sizeof(*ns));
 
@@ -411,7 +403,6 @@ int ldlm_resource_putref(struct ldlm_resource *res)
                 ENTRY;
                 CDEBUG(D_INFO, "putref res: %p count: %d\n", res,
                        atomic_read(&res->lr_refcount));
-        out:
                 LASSERT(atomic_read(&res->lr_refcount) >= 0);
         }
 
index 4b423d4..cb4ccda 100644 (file)
@@ -139,25 +139,28 @@ struct dentry *simple_mknod(struct dentry *dir, char *name, int mode)
         down(&dir->d_inode->i_sem);
         dchild = lookup_one_len(name, dir, strlen(name));
         if (IS_ERR(dchild))
-                GOTO(out, PTR_ERR(dchild));
+                GOTO(out_up, dchild);
 
         if (dchild->d_inode) {
                 if ((dchild->d_inode->i_mode & S_IFMT) != S_IFREG)
-                        GOTO(out, err = -EEXIST);
+                        GOTO(out_err, err = -EEXIST);
 
-                GOTO(out, dchild);
+                GOTO(out_up, dchild);
         }
 
         err = vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG);
-        EXIT;
-out:
-        up(&dir->d_inode->i_sem);
-        if (err) {
-                dput(dchild);
-                RETURN(ERR_PTR(err));
-        }
+        if (err)
+                GOTO(out_err, err);
 
+        up(&dir->d_inode->i_sem);
         RETURN(dchild);
+
+out_err:
+        dput(dchild);
+        dchild = ERR_PTR(err);
+out_up:
+        up(&dir->d_inode->i_sem);
+        return dchild;
 }
 
 /* utility to make a directory */
@@ -172,25 +175,28 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode)
         down(&dir->d_inode->i_sem);
         dchild = lookup_one_len(name, dir, strlen(name));
         if (IS_ERR(dchild))
-                GOTO(out, PTR_ERR(dchild));
+                GOTO(out_up, dchild);
 
         if (dchild->d_inode) {
                 if (!S_ISDIR(dchild->d_inode->i_mode))
-                        GOTO(out, err = -ENOTDIR);
+                        GOTO(out_err, err = -ENOTDIR);
 
-                GOTO(out, dchild);
+                GOTO(out_up, dchild);
         }
 
         err = vfs_mkdir(dir->d_inode, dchild, mode);
-        EXIT;
-out:
-        up(&dir->d_inode->i_sem);
-        if (err) {
-                dput(dchild);
-                RETURN(ERR_PTR(err));
-        }
+        if (err)
+                GOTO(out_err, err);
 
+        up(&dir->d_inode->i_sem);
         RETURN(dchild);
+
+out_err:
+        dput(dchild);
+        dchild = ERR_PTR(err);
+out_up:
+        up(&dir->d_inode->i_sem);
+        return dchild;
 }
 
 /*
index 3d343a0..b265ffc 100644 (file)
@@ -804,25 +804,25 @@ static int ll_inode_revalidate(struct dentry *dentry)
         struct lov_stripe_md *lsm;
         ENTRY;
 
-        if (!inode) { 
+        if (!inode) {
                 CERROR("REPORT THIS LINE TO PETER\n");
                 RETURN(0);
         }
-        
-        if (! ll_have_md_lock(dentry)) { 
-                struct ptlrpc_request *req;
+
+        if (!ll_have_md_lock(dentry)) {
+                struct ptlrpc_request *req = NULL;
                 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
-                struct mds_body *body; 
-                int rc, datalen, valid; 
+                struct mds_body *body;
+                unsigned long valid = 0;
+                int datalen = 0;
+                int rc;
 
                 if (S_ISREG(inode->i_mode)) {
                         datalen = obd_size_wiremd(&sbi->ll_osc_conn, NULL);
                         valid |= OBD_MD_FLEASIZE;
                 }
-                rc = mdc_getattr(&sbi->ll_mdc_conn, 
-                                 inode->i_ino, 
-                                 inode->i_mode, valid,
-                                 datalen, &req);
+                rc = mdc_getattr(&sbi->ll_mdc_conn, inode->i_ino,
+                                 inode->i_mode, valid, datalen, &req);
                 if (rc) {
                         CERROR("failure %d inode "LPX64"\n", rc, inode->i_ino);
                         ptlrpc_req_finished(req);
@@ -832,9 +832,7 @@ static int ll_inode_revalidate(struct dentry *dentry)
                 body = lustre_msg_buf(req->rq_repmsg, 0);
                 ll_update_inode(inode, body);
                 ptlrpc_req_finished(req);
-        } 
-                
-        
+        }
 
         lsm = ll_i2info(inode)->lli_smd;
         if (!lsm)       /* object not yet allocated, don't validate size */
index 00a6aac..54a81a4 100644 (file)
@@ -377,7 +377,7 @@ int ll_intent_lock(struct inode *parent, struct dentry **de,
         RETURN(rc);
 
  drop_req:
-        ptlrpc_free_req(request);
+        ptlrpc_req_finished(request);
  drop_lock:
 #warning FIXME: must release lock here
         RETURN(rc);
index ee1631e..c572590 100644 (file)
@@ -241,23 +241,27 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
         return rc;
 }
 
-/* returns the page unlocked, but with a reference */
-static int ll_writepage(struct page *page)
-{
-        struct inode *inode = page->mapping->host;
-        int err;
-        ENTRY;
+/* Write a page from kupdated or kswapd.
+ *
+ * We unlock the page even in the face of an error, otherwise dirty
+ * pages could OOM the system if they cannot be written.  Also, there
+ * is nobody to return an error code to from here - the application
+ * may not even be running anymore.
+ *
+ * Returns the page unlocked, but with a reference.
+ */
+static int ll_writepage(struct page *page) {
+        struct inode *inode = page->mapping->host; int err; ENTRY;
 
-        if (!PageLocked(page))
-                LBUG();
+        LASSERT(PageLocked(page));
 
+        /* XXX need to make sure we have LDLM lock on this page */
         err = ll_brw(OBD_BRW_WRITE, inode, page, 1);
-        if ( !err ) {
-                //SetPageUptodate(page);
-                set_page_clean(page);
-        } else {
+        if (err)
                 CERROR("ll_brw failure %d\n", err);
-        }
+        else
+                set_page_clean(page);
+
         unlock_page(page);
         RETURN(err);
 }
index d28a6c9..247015c 100644 (file)
@@ -59,8 +59,14 @@ int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmmp,
         int i;
         ENTRY;
 
-        if (lsm)
+        if (lsm) {
+                if (lsm->lsm_magic != LOV_MAGIC) {
+                        CERROR("bad mem LOV MAGIC: %#08x != %#08x\n",
+                               lsm->lsm_magic, LOV_MAGIC);
+                        RETURN(-EINVAL);
+                }
                 stripe_count = lsm->lsm_stripe_count;
+        }
 
         /* XXX LOV STACKING call into osc for sizes */
         lmm_size = lov_mds_md_size(ost_count);
@@ -90,6 +96,7 @@ int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmmp,
         /* XXX endianness */
         lmm->lmm_magic = (lsm->lsm_magic);
         lmm->lmm_object_id = (lsm->lsm_object_id);
+        LASSERT(lsm->lsm_object_id);
         lmm->lmm_stripe_size = (lsm->lsm_stripe_size);
         lmm->lmm_stripe_pattern = (lsm->lsm_stripe_pattern);
         lmm->lmm_stripe_offset = (lsm->lsm_stripe_offset);
@@ -97,9 +104,11 @@ int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmmp,
 
         /* Only fill in the object ids which we are actually using.
          * Assumes lmm_objects is otherwise zero-filled. */
-        for (i = 0, loi = lsm->lsm_oinfo; i < stripe_count; i++, loi++)
+        for (i = 0, loi = lsm->lsm_oinfo; i < stripe_count; i++, loi++) {
                 /* XXX call down to osc_packmd() to do the packing */
+                LASSERT(loi->loi_id);
                 lmm->lmm_objects[loi->loi_ost_idx].l_object_id = (loi->loi_id);
+        }
 
         RETURN(lmm_size);
 }
@@ -118,9 +127,15 @@ int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
         int i;
         ENTRY;
 
-        if (lmm)
+        if (lmm) {
                 /* endianness */
+                if (lmm->lmm_magic != LOV_MAGIC) {
+                        CERROR("bad wire LOV MAGIC: %#08x != %#08x\n",
+                               lmm->lmm_magic, LOV_MAGIC);
+                        RETURN(-EINVAL);
+                }
                 stripe_count = (lmm->lmm_stripe_count);
+        }
 
         if (!stripe_count)
                 stripe_count = lov->desc.ld_default_stripe_count;
@@ -156,6 +171,7 @@ int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
         ost_offset = lsm->lsm_stripe_offset = (lmm->lmm_stripe_offset);
         lsm->lsm_magic = (lmm->lmm_magic);
         lsm->lsm_object_id = (lmm->lmm_object_id);
+        LASSERT(lsm->lsm_object_id);
         lsm->lsm_stripe_size = (lmm->lmm_stripe_size);
         lsm->lsm_stripe_pattern = (lmm->lmm_stripe_pattern);
 
@@ -171,6 +187,7 @@ int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
                 loi->loi_ost_idx = ost_offset;
                 loi++;
         }
+        LASSERT(loi - lsm->lsm_oinfo == stripe_count);
 
         RETURN(lsm_size);
 }
index f8a5f0f..daeccf1 100644 (file)
@@ -203,7 +203,7 @@ static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                         ll_invalidate_inode_pages(inode);
                 }
 
-                if ( inode != inode->i_sb->s_root->d_inode ) {
+                if (inode != inode->i_sb->s_root->d_inode) {
                         /* XXX should this igrab move up 12 lines? */
                         LASSERT(igrab(inode) == inode);
                         d_delete_aliases(inode);
@@ -569,6 +569,7 @@ int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
         bulk->bp_buflen = PAGE_SIZE;
         bulk->bp_buf = addr;
         bulk->bp_xid = req->rq_xid;
+        desc->bd_ptl_ev_hdlr = NULL;
         desc->bd_portal = MDS_BULK_PORTAL;
 
         rc = ptlrpc_register_bulk(desc);
@@ -591,7 +592,7 @@ int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
 
         EXIT;
  out2:
-        ptlrpc_free_bulk(desc);
+        ptlrpc_bulk_decref(desc);
  out:
         *request = req;
         return rc;
index 3332d0b..6a0855e 100644 (file)
@@ -5,15 +5,10 @@
 
 DEFS= 
 
-if LINUX25
-FSMOD = mds_ext3
-else
-FSMOD = mds_extN
-endif
-
 MODULE = mds
-modulefs_DATA = mds.o $(FSMOD).o
-EXTRA_PROGRAMS = mds $(FSMOD)
+
+modulefs_DATA = mds.o
+EXTRA_PROGRAMS = mds
 
 LINX= mds_updates.c simple.c ll_pack.c target.c
 
index 393b793..8eab05f 100644 (file)
@@ -40,6 +40,8 @@
 #include <linux/buffer_head.h>
 #endif
 #include <linux/obd_lov.h>
+#include <linux/lustre_mds.h>
+#include <linux/lustre_fsfilt.h>
 #include <linux/lprocfs_status.h>
 
 static kmem_cache_t *mds_file_cache;
@@ -73,12 +75,11 @@ static int mds_bulk_timeout(void *data)
 static int mds_sendpage(struct ptlrpc_request *req, struct file *file,
                         __u64 offset)
 {
-        int rc = 0;
-        struct mds_obd *mds = mds_req2mds(req);
         struct ptlrpc_bulk_desc *desc;
         struct ptlrpc_bulk_page *bulk;
         struct l_wait_info lwi;
         char *buf;
+        int rc = 0;
         ENTRY;
 
         desc = ptlrpc_prep_bulk(req->rq_connection);
@@ -93,7 +94,8 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file,
         if (buf == NULL)
                 GOTO(cleanup_bulk, rc = -ENOMEM);
 
-        rc = mds_fs_readpage(mds, file, buf, PAGE_SIZE, (loff_t *)&offset);
+        rc = fsfilt_readpage(req->rq_export->exp_obd, file, buf, PAGE_SIZE,
+                             (loff_t *)&offset);
 
         if (rc != PAGE_SIZE)
                 GOTO(cleanup_buf, rc = -EIO);
@@ -101,6 +103,7 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file,
         bulk->bp_xid = req->rq_xid;
         bulk->bp_buf = buf;
         bulk->bp_buflen = PAGE_SIZE;
+        desc->bd_ptl_ev_hdlr = NULL;
         desc->bd_portal = MDS_BULK_PORTAL;
 
         rc = ptlrpc_send_bulk(desc);
@@ -127,7 +130,7 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file,
  cleanup_buf:
         OBD_FREE(buf, PAGE_SIZE);
  cleanup_bulk:
-        ptlrpc_free_bulk(desc);
+        ptlrpc_bulk_decref(desc);
  out:
         return rc;
 }
@@ -596,8 +599,9 @@ int mds_pack_md(struct mds_obd *mds, struct ptlrpc_request *req,
          * discarded right after unpacking, and the LOV can figure out the
          * size itself from the ost count.
          */
-        if ((rc = mds_fs_get_md(mds, inode, lmm, lmm_size)) < 0) {
-                CDEBUG(D_INFO, "No md for ino %lu: rc = %d\n", inode->i_ino, rc);
+        if ((rc = fsfilt_get_md(req->rq_export->exp_obd, inode,
+                                lmm, lmm_size)) < 0) {
+                CDEBUG(D_INFO, "No md for ino %lu: rc = %d\n", inode->i_ino,rc);
         } else if (rc > 0) {
                 body->valid |= OBD_MD_FLEASIZE;
                 rc = 0;
@@ -623,7 +627,7 @@ static int mds_getattr_internal(struct mds_obd *mds, struct dentry *dentry,
         mds_pack_inode2fid(&body->fid1, inode);
         mds_pack_inode2body(body, inode);
 
-        if (S_ISREG(inode->i_mode)) {
+        if (S_ISREG(inode->i_mode) /* && reqbody->valid & OBD_MD_FLEASIZE */) {
                 rc = mds_pack_md(mds, req, reply_off + 1, body, inode);
         } else if (S_ISLNK(inode->i_mode) && reqbody->valid & OBD_MD_LINKNAME) {
                 char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1);
@@ -656,7 +660,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req)
         __u64 res_id[3] = {0, 0, 0};
         ENTRY;
 
-        LASSERT(!strcmp(req->rq_export->exp_obd->obd_type->typ_name, "mds"));
+        LASSERT(!strcmp(obd->obd_type->typ_name, "mds"));
 
         if (req->rq_reqmsg->bufcount <= offset + 1) {
                 LBUG();
@@ -748,7 +752,7 @@ static int mds_getattr(int offset, struct ptlrpc_request *req)
 
         inode = de->d_inode;
         if (S_ISREG(body->fid1.f_type)) {
-                int rc = mds_fs_get_md(mds, inode, NULL, 0);
+                int rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0);
                 CDEBUG(D_INODE, "got %d bytes MD data for inode %lu\n",
                        rc, inode->i_ino);
                 if (rc < 0) {
@@ -786,18 +790,18 @@ static int mds_getattr(int offset, struct ptlrpc_request *req)
 
         req->rq_status = mds_getattr_internal(mds, de, req, body, 0);
 
+        EXIT;
 out:
         l_dput(de);
 out_pop:
         pop_ctxt(&saved, &mds->mds_ctxt, &uc);
-        RETURN(rc);
+        return rc;
 }
 
 static int mds_statfs(struct ptlrpc_request *req)
 {
-        struct mds_obd *mds = mds_req2mds(req);
+        struct obd_device *obd = req->rq_export->exp_obd;
         struct obd_statfs *osfs;
-        struct statfs sfs;
         int rc, size = sizeof(*osfs);
         ENTRY;
 
@@ -807,24 +811,24 @@ static int mds_statfs(struct ptlrpc_request *req)
                 GOTO(out, rc);
         }
 
-        rc = mds_fs_statfs(mds, &sfs);
+        osfs = lustre_msg_buf(req->rq_repmsg, 0);
+        rc = fsfilt_statfs(obd, obd->u.mds.mds_sb, osfs);
         if (rc) {
                 CERROR("mds: statfs failed: rc %d\n", rc);
                 GOTO(out, rc);
         }
-        osfs = lustre_msg_buf(req->rq_repmsg, 0);
-        memset(osfs, 0, size);
-        statfs_pack(osfs, &sfs);
         obd_statfs_pack(osfs, osfs);
 
+        EXIT;
 out:
         req->rq_status = rc;
-        RETURN(0);
+        return 0;
 }
 
 static struct mds_file_data *mds_handle2mfd(struct lustre_handle *handle)
 {
         struct mds_file_data *mfd = NULL;
+        ENTRY;
 
         if (!handle || !handle->addr)
                 RETURN(NULL);
@@ -836,12 +840,13 @@ static struct mds_file_data *mds_handle2mfd(struct lustre_handle *handle)
         if (mfd->mfd_servercookie != handle->cookie)
                 RETURN(NULL);
 
-        return mfd;
+        RETURN(mfd);
 }
 
 static int mds_store_md(struct mds_obd *mds, struct ptlrpc_request *req,
                         int offset, struct mds_body *body, struct inode *inode)
 {
+        struct obd_device *obd = req->rq_export->exp_obd;
         struct lov_mds_md *lmm = lustre_msg_buf(req->rq_reqmsg, offset);
         int lmm_size = req->rq_reqmsg->buflens[offset];
         struct obd_run_ctxt saved;
@@ -866,17 +871,17 @@ static int mds_store_md(struct mds_obd *mds, struct ptlrpc_request *req,
         uc.ouc_cap = body->capability;
         push_ctxt(&saved, &mds->mds_ctxt, &uc);
         mds_start_transno(mds);
-        handle = mds_fs_start(mds, inode, MDS_FSOP_SETATTR);
+        handle = fsfilt_start(obd, inode,FSFILT_OP_SETATTR);
         if (IS_ERR(handle)) {
                 rc = PTR_ERR(handle);
                 mds_finish_transno(mds, handle, req, rc);
                 GOTO(out_ea, rc);
         }
 
-        rc = mds_fs_set_md(mds, inode, handle, lmm, lmm_size);
+        rc = fsfilt_set_md(obd, inode,handle,lmm,lmm_size);
         rc = mds_finish_transno(mds, handle, req, rc);
 
-        rc2 = mds_fs_commit(mds, inode, handle);
+        rc2 = fsfilt_commit(obd, inode, handle);
         if (rc2 && !rc)
                 rc = rc2;
 out_ea:
@@ -1096,7 +1101,7 @@ int mds_handle(struct ptlrpc_request *req);
 static int check_for_next_transno(struct mds_obd *mds)
 {
         struct ptlrpc_request *req;
-        req = list_entry(mds->mds_recovery_queue.next, 
+        req = list_entry(mds->mds_recovery_queue.next,
                          struct ptlrpc_request, rq_list);
         return req->rq_reqmsg->transno == mds->mds_next_recovery_transno;
 }
@@ -1104,10 +1109,10 @@ static int check_for_next_transno(struct mds_obd *mds)
 static void process_recovery_queue(struct mds_obd *mds)
 {
         struct ptlrpc_request *req;
-        
+
         for (;;) {
                 spin_lock(&mds->mds_processing_task_lock);
-                req = list_entry(mds->mds_recovery_queue.next, 
+                req = list_entry(mds->mds_recovery_queue.next,
                                  struct ptlrpc_request, rq_list);
 
                 if (req->rq_reqmsg->transno != mds->mds_next_recovery_transno) {
@@ -1121,7 +1126,7 @@ static void process_recovery_queue(struct mds_obd *mds)
 
                 DEBUG_REQ(D_HA, req, "");
                 mds_handle(req);
-                
+
                 if (list_empty(&mds->mds_recovery_queue))
                         break;
         }
@@ -1148,7 +1153,7 @@ static int queue_recovery_request(struct ptlrpc_request *req,
 
         /* XXX O(n^2) */
         list_for_each(tmp, &mds->mds_recovery_queue) {
-                struct ptlrpc_request *reqiter = 
+                struct ptlrpc_request *reqiter =
                         list_entry(tmp, struct ptlrpc_request, rq_list);
                 if (reqiter->rq_reqmsg->transno > transno) {
                         list_add_tail(&req->rq_list, &reqiter->rq_list);
@@ -1180,7 +1185,7 @@ static int queue_recovery_request(struct ptlrpc_request *req,
         return 0;
 }
 
-static int filter_recovery_request(struct ptlrpc_request *req, 
+static int filter_recovery_request(struct ptlrpc_request *req,
                                    struct mds_obd *mds, int *process)
 {
         switch (req->rq_reqmsg->opc) {
@@ -1189,13 +1194,13 @@ static int filter_recovery_request(struct ptlrpc_request *req,
         case MDS_OPEN:
                *process = 1;
                RETURN(0);
-            
+
         case MDS_GETSTATUS: /* used in unmounting */
         case MDS_REINT:
         case LDLM_ENQUEUE:
                 *process = queue_recovery_request(req, mds);
                 RETURN(0);
-                
+
         default:
                 DEBUG_REQ(D_ERROR, req, "not permitted during recovery");
                 *process = 0;
@@ -1324,13 +1329,13 @@ int mds_handle(struct ptlrpc_request *req)
 
         case MDS_REINT: {
                 int size = sizeof(struct mds_body);
-                int opc = *(u32 *)lustre_msg_buf(req->rq_reqmsg, 0), 
+                int opc = *(u32 *)lustre_msg_buf(req->rq_reqmsg, 0),
                         realopc = opc & REINT_OPCODE_MASK;
-                        
+
                 DEBUG_REQ(D_INODE, req, "reint (%s%s)",
                           reint_names[realopc],
                           opc & REINT_REPLAYING ? "|REPLAYING" : "");
-                          
+
                 OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET, 0);
 
                 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
@@ -1400,7 +1405,7 @@ int mds_handle(struct ptlrpc_request *req)
                 DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply");
                 return mds_queue_final_reply(req, rc);
         }
-        
+
         /* MDS_CONNECT / EALREADY (note: not -EALREADY!) isn't an error */
         if (rc && (req->rq_reqmsg->opc != MDS_CONNECT ||
                    rc != EALREADY)) {
@@ -1488,16 +1493,18 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
         if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2)
                 GOTO(err_dec, rc = -EINVAL);
 
-        mds->mds_fstype = strdup(data->ioc_inlbuf2);
+        obddev->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2);
+        if (IS_ERR(obddev->obd_fsops))
+                GOTO(err_dec, rc = PTR_ERR(obddev->obd_fsops));
 
-        mnt = do_kern_mount(mds->mds_fstype, 0, data->ioc_inlbuf1, NULL);
+        mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, NULL);
         if (IS_ERR(mnt)) {
                 rc = PTR_ERR(mnt);
                 CERROR("do_kern_mount failed: rc = %d\n", rc);
-                GOTO(err_kfree, rc);
+                GOTO(err_ops, rc);
         }
 
-        CERROR("%s: mnt is %p\n", data->ioc_inlbuf1, mnt);
+        CDEBUG(D_SUPER, "%s: mnt = %p\n", data->ioc_inlbuf1, mnt);
         mds->mds_sb = mnt->mnt_root->d_inode->i_sb;
         if (!mds->mds_sb)
                 GOTO(err_put, rc = -ENODEV);
@@ -1524,7 +1531,7 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
         mds->mds_processing_task = 0;
         INIT_LIST_HEAD(&mds->mds_recovery_queue);
         INIT_LIST_HEAD(&mds->mds_delayed_reply_queue);
-        
+
         RETURN(0);
 
 err_fs:
@@ -1534,8 +1541,8 @@ err_put:
         mntput(mds->mds_vfsmnt);
         mds->mds_sb = 0;
         lock_kernel();
-err_kfree:
-        kfree(mds->mds_fstype);
+err_ops:
+        fsfilt_put_ops(obddev->obd_fsops);
 err_dec:
         MOD_DEC_USE_COUNT;
         RETURN(rc);
@@ -1567,7 +1574,6 @@ static int mds_cleanup(struct obd_device *obddev)
         unlock_kernel();
         mntput(mds->mds_vfsmnt);
         mds->mds_sb = 0;
-        kfree(mds->mds_fstype);
 
         ldlm_namespace_free(obddev->obd_namespace);
 
@@ -1576,6 +1582,7 @@ static int mds_cleanup(struct obd_device *obddev)
         dev_clear_rdonly(2);
 #endif
         mds_fs_cleanup(obddev);
+        fsfilt_put_ops(obddev->obd_fsops);
 
         MOD_DEC_USE_COUNT;
         RETURN(0);
@@ -1806,7 +1813,6 @@ static struct obd_ops mdt_obd_ops = {
 
 static int __init mds_init(void)
 {
-
         mds_file_cache = kmem_cache_create("ll_mds_file_data",
                                            sizeof(struct mds_file_data),
                                            0, 0, NULL, NULL);
@@ -1816,20 +1822,17 @@ static int __init mds_init(void)
         class_register_type(&mds_obd_ops, status_class_var, LUSTRE_MDS_NAME);
         class_register_type(&mdt_obd_ops, 0, LUSTRE_MDT_NAME);
         ldlm_register_intent(ldlm_intent_policy);
-        return 0;
 
+        return 0;
 }
 
 static void __exit mds_exit(void)
 {
-
-
         ldlm_unregister_intent();
         class_unregister_type(LUSTRE_MDS_NAME);
         class_unregister_type(LUSTRE_MDT_NAME);
         if (kmem_cache_destroy(mds_file_cache))
                 CERROR("couldn't free MDS file cache\n");
-
 }
 
 MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>");
index 0fc96bd..7028603 100644 (file)
@@ -22,6 +22,7 @@
 #define DEBUG_SUBSYSTEM S_CLASS
 
 #include <linux/lustre_lite.h>
+#include <linux/lustre_fsfilt.h>
 #include <linux/lprocfs_status.h>
 
 int rd_uuid(char* page, char **start, off_t off, int count, int *eof, 
@@ -103,15 +104,12 @@ int rd_kbfree(char* page, char **start, off_t off, int count, int *eof,
         
 }
 
-int rd_fstype(char* page, char **start, off_t off, int count, int *eof, 
+int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
               void *data)
-{               
-        struct obd_device* temp = (struct obd_device*)data;
-        struct mds_obd *mds = &temp->u.mds;
-        int len = 0;
-        len += snprintf(page, count, "%s\n", mds->mds_fstype); 
-        return len;  
+{
+        struct obd_device *obd = (struct obd_device *)data;
+
+        return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
 }
 
 int rd_filestotal(char* page, char **start, off_t off, int count, int *eof, 
diff --git a/lustre/mds/mds_ext2.c b/lustre/mds/mds_ext2.c
deleted file mode 100644 (file)
index ef1d8e5..0000000
+++ /dev/null
@@ -1,145 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  linux/mds/mds_null.c
- *
- *  Lustre Metadata Server (mds) journal abstraction routines
- *
- *  Copyright (C) 2002  Cluster File Systems, Inc.
- *  author: Andreas Dilger <adilger@clusterfs.com>
- *
- *  This code is issued under the GNU General Public License.
- *  See the file COPYING in this distribution
- *
- */
-
-#define DEBUG_SUBSYSTEM S_MDS
-
-#include <linux/fs.h>
-#include <linux/ext2_fs.h>
-#include <linux/lustre_mds.h>
-#include <linux/module.h>
-
-static void *mds_ext2_start(struct inode *inode, int nblocks)
-{
-        return (void *)1;
-}
-
-static int mds_ext2_stop(struct inode *inode, void *handle)
-{
-        return 0;
-}
-
-static int mds_ext2_setattr(struct dentry *dentry, void *handle,
-                            struct iattr *iattr)
-{
-        struct inode *inode = dentry->d_inode;
-
-        lock_kernel();
-
-        /* a _really_ horrible hack to avoid removing the data stored
-           in the block pointers; this data is the object id
-           this will go into an extended attribute at some point.
-        */
-        if (iattr->ia_valid & ATTR_SIZE) {
-                /* ATTR_SIZE would invoke truncate: clear it */
-                iattr->ia_valid &= ~ATTR_SIZE;
-                inode->i_size = iattr->ia_size;
-
-                /* make sure _something_ gets set - so new inode
-                   goes to disk (probably won't work over XFS */
-                if (!iattr->ia_valid & ATTR_MODE) {
-                        iattr->ia_valid |= ATTR_MODE;
-                        iattr->ia_mode = inode->i_mode;
-                }
-        }
-
-        if (inode->i_op->setattr)
-                rc = inode->i_op->setattr(dentry, iattr);
-        else
-                rc = inode_setattr(inode, iattr);
-
-        unlock_kernel();
-
-        return rc;
-}
-
-/*
- * FIXME: nasty hack - store the object id in the first two
- *        direct block spots.  This should be done with EAs...
- */
-static int mds_ext2_set_objid(struct inode *inode, void *handle, obd_id id)
-{
-        (__u64)(inode->u.ext2_i.i_data[0]) = cpu_to_le64(id);
-        return 0;
-}
-
-static int mds_ext2_get_objid(struct inode *inode, obd_id *id)
-{
-        *id = le64_to_cpu(inode->u.ext2_i.i_data[0]);
-
-        return 0;
-}
-
-static ssize_t mds_ext2_readpage(struct file *file, char *buf, size_t count,
-                                 loff_t *offset)
-{
-        if (S_ISREG(file->f_dentry->d_inode->i_mode))
-                return file->f_op->read(file, buf, count, offset);
-        else
-                return generic_file_read(file, buf, count, offset);
-}
-
-static struct mds_fs_operations mds_ext2_fs_ops;
-
-static void mds_ext2_delete_inode(struct inode *inode)
-{
-        if (S_ISREG(inode->i_mode))
-                mds_ext2_set_objid(inode, NULL, 0);
-
-        mds_ext2_fs_ops.cl_delete_inode(inode);
-}
-
-static int mds_ext2_set_last_rcvd(struct mds_obd *mds, void *handle)
-{
-        /* Bail for ext2 - can't tell when it is on disk anyways, sync? */
-        mds->mds_last_committed = mds->mds_last_rcvd;
-
-        return 0;
-}
-
-static int mds_ext2_journal_data(struct file *filp)
-{
-        return 0;
-}
-
-static struct mds_fs_operations mds_ext2_fs_ops = {
-        fs_owner:               THIS_MODULE,
-        fs_start:               mds_ext2_start,
-        fs_commit:              mds_ext2_stop,
-        fs_setattr:             mds_ext2_setattr,
-        fs_set_objid:           mds_ext2_set_objid,
-        fs_get_objid:           mds_ext2_get_objid,
-        fs_readpage:            mds_ext2_readpage,
-        fs_delete_inode:        mds_ext2_delete_inode,
-        cl_delete_inode:        clear_inode,
-        fs_journal_data:        mds_ext2_journal_data,
-        fs_set_last_rcvd:       mds_ext2_set_last_rcvd,
-};
-
-static int __init mds_ext2_init(void)
-{
-        return mds_register_fs_type(&mds_ext2_fs_ops, "ext2");
-}
-
-static void __exit mds_ext2_exit(void)
-{
-        mds_unregister_fs_type("ext2");
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <adilger@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre MDS ext2 Filesystem Helper v0.1");
-MODULE_LICENSE("GPL");
-
-module_init(mds_ext2_init);
-module_exit(mds_ext2_exit);
diff --git a/lustre/mds/mds_extN.c b/lustre/mds/mds_extN.c
deleted file mode 100644 (file)
index 4091f8d..0000000
+++ /dev/null
@@ -1,356 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  lustre/mds/mds_extN.c
- *  Lustre Metadata Server (mds) journal abstraction routines
- *
- *  Copyright (C) 2002 Cluster File Systems, Inc.
- *   Author: Andreas Dilger <adilger@clusterfs.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_MDS
-
-#include <linux/fs.h>
-#include <linux/jbd.h>
-#include <linux/slab.h>
-#include <linux/extN_fs.h>
-#include <linux/extN_jbd.h>
-#include <linux/extN_xattr.h>
-#include <linux/kp30.h>
-#include <linux/lustre_mds.h>
-#include <linux/obd.h>
-#include <linux/module.h>
-
-static struct mds_fs_operations mds_extN_fs_ops;
-static kmem_cache_t *mcb_cache;
-static int mcb_cache_count;
-
-struct mds_cb_data {
-        struct journal_callback cb_jcb;
-        struct mds_obd *cb_mds;
-        __u64 cb_last_rcvd;
-};
-
-#define EXTN_XATTR_INDEX_LUSTRE         5
-#define XATTR_LUSTRE_MDS_OBJID          "system.lustre_mds_objid"
-
-/*
- * We don't currently need any additional blocks for rmdir and
- * unlink transactions because we are storing the OST oa_id inside
- * the inode (which we will be changing anyways as part of this
- * transaction).
- */
-static void *mds_extN_start(struct inode *inode, int op)
-{
-        /* For updates to the last recieved file */
-        int nblocks = EXTN_DATA_TRANS_BLOCKS;
-        void *handle;
-
-        switch(op) {
-        case MDS_FSOP_RMDIR:
-        case MDS_FSOP_UNLINK:
-                nblocks += EXTN_DELETE_TRANS_BLOCKS;
-                break;
-        case MDS_FSOP_RENAME:
-                /* We may be modifying two directories */
-                nblocks += EXTN_DATA_TRANS_BLOCKS;
-        case MDS_FSOP_SYMLINK:
-                /* Possible new block + block bitmap + GDT for long symlink */
-                nblocks += 3;
-        case MDS_FSOP_CREATE:
-        case MDS_FSOP_MKDIR:
-        case MDS_FSOP_MKNOD:
-                /* New inode + block bitmap + GDT for new file */
-                nblocks += 3;
-        case MDS_FSOP_LINK:
-                /* Change parent directory */
-                nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS;
-                break;
-        case MDS_FSOP_SETATTR:
-                /* Setattr on inode */
-                nblocks += 1;
-                break;
-        default: CERROR("unknown transaction start op %d\n", op);
-                 LBUG();
-        }
-
-        lock_kernel();
-        handle = journal_start(EXTN_JOURNAL(inode), nblocks);
-        unlock_kernel();
-
-        return handle;
-}
-
-static int mds_extN_commit(struct inode *inode, void *handle)
-{
-        int rc;
-
-        lock_kernel();
-        rc = journal_stop((handle_t *)handle);
-        unlock_kernel();
-
-        return rc;
-}
-
-static int mds_extN_setattr(struct dentry *dentry, void *handle,
-                            struct iattr *iattr)
-{
-        struct inode *inode = dentry->d_inode;
-        int rc;
-
-        lock_kernel();
-        if (inode->i_op->setattr)
-                rc = inode->i_op->setattr(dentry, iattr);
-        else
-                rc = inode_setattr(inode, iattr);
-
-        unlock_kernel();
-
-        return rc;
-}
-
-static int mds_extN_set_md(struct inode *inode, void *handle,
-                           struct lov_mds_md *lmm, int lmm_size)
-{
-        int rc;
-
-        down(&inode->i_sem);
-        lock_kernel();
-        rc = extN_xattr_set(handle, inode, EXTN_XATTR_INDEX_LUSTRE,
-                            XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0);
-        unlock_kernel();
-        up(&inode->i_sem);
-
-        if (rc) {
-                CERROR("error adding objectid "LPX64" to inode %lu: rc = %d\n",
-                       lmm->lmm_object_id, inode->i_ino, rc);
-                if (rc != -ENOSPC) LBUG();
-        }
-        return rc;
-}
-
-static int mds_extN_get_md(struct inode *inode, struct lov_mds_md *lmm,int size)
-{
-        int rc;
-
-        down(&inode->i_sem);
-        lock_kernel();
-        rc = extN_xattr_get(inode, EXTN_XATTR_INDEX_LUSTRE,
-                            XATTR_LUSTRE_MDS_OBJID, lmm, size);
-        unlock_kernel();
-        up(&inode->i_sem);
-
-        /* This gives us the MD size */
-        if (lmm == NULL)
-                return (rc == -ENODATA) ? 0 : rc;
-
-        if (rc < 0) {
-                CDEBUG(D_INFO, "error getting EA %s from MDS inode %lu: "
-                       "rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc);
-                memset(lmm, 0, size);
-                return (rc == -ENODATA) ? 0 : rc;
-        }
-
-        /* This field is byteswapped because it appears in the
-         * catalogue.  All others are opaque to the MDS */
-        lmm->lmm_object_id = le64_to_cpu(lmm->lmm_object_id);
-
-        return rc;
-}
-
-static ssize_t mds_extN_readpage(struct file *file, char *buf, size_t count,
-                                 loff_t *offset)
-{
-        struct inode *inode = file->f_dentry->d_inode;
-        int rc = 0;
-
-        if (S_ISREG(inode->i_mode))
-                rc = file->f_op->read(file, buf, count, offset);
-        else {
-                struct buffer_head *bh;
-
-                /* FIXME: this assumes the blocksize == count, but the calling
-                 *        function will detect this as an error for now */
-                bh = extN_bread(NULL, inode,
-                                *offset >> inode->i_sb->s_blocksize_bits,
-                                0, &rc);
-
-                if (bh) {
-                        memcpy(buf, bh->b_data, inode->i_blksize);
-                        brelse(bh);
-                        rc = inode->i_blksize;
-                }
-        }
-
-        return rc;
-}
-
-static void mds_extN_delete_inode(struct inode *inode)
-{
-        if (S_ISREG(inode->i_mode)) {
-                void *handle = mds_extN_start(inode, MDS_FSOP_UNLINK);
-
-                if (IS_ERR(handle)) {
-                        CERROR("unable to start transaction");
-                        EXIT;
-                        return;
-                }
-                if (mds_extN_set_md(inode, handle, NULL, 0))
-                        CERROR("error clearing objid on %lu\n", inode->i_ino);
-
-                if (mds_extN_fs_ops.cl_delete_inode)
-                        mds_extN_fs_ops.cl_delete_inode(inode);
-
-                if (mds_extN_commit(inode, handle))
-                        CERROR("error closing handle on %lu\n", inode->i_ino);
-        } else
-                mds_extN_fs_ops.cl_delete_inode(inode);
-}
-
-static void mds_extN_callback_status(struct journal_callback *jcb, int error)
-{
-        struct mds_cb_data *mcb = (struct mds_cb_data *)jcb;
-
-        CDEBUG(D_EXT2, "got callback for last_rcvd "LPD64": rc = %d\n",
-               mcb->cb_last_rcvd, error);
-        if (!error && mcb->cb_last_rcvd > mcb->cb_mds->mds_last_committed)
-                mcb->cb_mds->mds_last_committed = mcb->cb_last_rcvd;
-
-        kmem_cache_free(mcb_cache, mcb);
-        --mcb_cache_count;
-}
-
-static int mds_extN_set_last_rcvd(struct mds_obd *mds, void *handle)
-{
-        struct mds_cb_data *mcb;
-
-        mcb = kmem_cache_alloc(mcb_cache, GFP_NOFS);
-        if (!mcb)
-                RETURN(-ENOMEM);
-
-        ++mcb_cache_count;
-        mcb->cb_mds = mds;
-        mcb->cb_last_rcvd = mds->mds_last_rcvd;
-
-#ifdef HAVE_JOURNAL_CALLBACK_STATUS
-        CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n",
-               mcb->cb_last_rcvd);
-        lock_kernel();
-        /* Note that an "incompatible pointer" warning here is OK for now */
-        journal_callback_set(handle, mds_extN_callback_status,
-                             (struct journal_callback *)mcb);
-        unlock_kernel();
-#else
-#warning "no journal callback kernel patch, faking it..."
-        {
-        static long next = 0;
-
-        if (time_after(jiffies, next)) {
-                CERROR("no journal callback kernel patch, faking it...\n");
-                next = jiffies + 300 * HZ;
-        }
-
-        mds_extN_callback_status((struct journal_callback *)mcb, 0);
-#endif
-
-        return 0;
-}
-
-static int mds_extN_journal_data(struct file *filp)
-{
-        struct inode *inode = filp->f_dentry->d_inode;
-
-        EXTN_I(inode)->i_flags |= EXTN_JOURNAL_DATA_FL;
-
-        return 0;
-}
-
-/*
- * We need to hack the return value for the free inode counts because
- * the current EA code requires one filesystem block per inode with EAs,
- * so it is possible to run out of blocks before we run out of inodes.
- *
- * This can be removed when the extN EA code is fixed.
- */
-static int mds_extN_statfs(struct super_block *sb, struct statfs *sfs)
-{
-        int rc = vfs_statfs(sb, sfs);
-
-        if (!rc && sfs->f_bfree < sfs->f_ffree)
-                sfs->f_ffree = sfs->f_bfree;
-
-        return rc;
-}
-
-static struct mds_fs_operations mds_extN_fs_ops = {
-        fs_owner:               THIS_MODULE,
-        fs_start:               mds_extN_start,
-        fs_commit:              mds_extN_commit,
-        fs_setattr:             mds_extN_setattr,
-        fs_set_md:              mds_extN_set_md,
-        fs_get_md:              mds_extN_get_md,
-        fs_readpage:            mds_extN_readpage,
-        fs_delete_inode:        mds_extN_delete_inode,
-        cl_delete_inode:        clear_inode,
-        fs_journal_data:        mds_extN_journal_data,
-        fs_set_last_rcvd:       mds_extN_set_last_rcvd,
-        fs_statfs:              mds_extN_statfs,
-};
-
-static int __init mds_extN_init(void)
-{
-        int rc;
-
-        //rc = extN_xattr_register();
-        mcb_cache = kmem_cache_create("mds_extN_mcb",
-                                      sizeof(struct mds_cb_data), 0,
-                                      0, NULL, NULL);
-        if (!mcb_cache) {
-                CERROR("error allocating MDS journal callback cache\n");
-                GOTO(out, rc = -ENOMEM);
-        }
-
-        rc = mds_register_fs_type(&mds_extN_fs_ops, "extN");
-
-        if (rc)
-                kmem_cache_destroy(mcb_cache);
-out:
-        return rc;
-}
-
-static void __exit mds_extN_exit(void)
-{
-        int rc;
-
-        mds_unregister_fs_type("extN");
-        rc = kmem_cache_destroy(mcb_cache);
-
-        if (rc || mcb_cache_count) {
-                CERROR("can't free MDS callback cache: count %d, rc = %d\n",
-                       mcb_cache_count, rc);
-        }
-
-        //rc = extN_xattr_unregister();
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre MDS extN Filesystem Helper v0.1");
-MODULE_LICENSE("GPL");
-
-module_init(mds_extN_init);
-module_exit(mds_extN_exit);
index 163a45f..83201aa 100644 (file)
 #include <linux/obd_class.h>
 #include <linux/obd_support.h>
 #include <linux/lustre_lib.h>
-
-LIST_HEAD(mds_fs_types);
-
-struct mds_fs_type {
-        struct list_head                 mft_list;
-        struct mds_fs_operations        *mft_ops;
-        char                            *mft_name;
-};
+#include <linux/lustre_fsfilt.h>
 
 /* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
 #define MDS_MAX_CLIENTS (PAGE_SIZE * 8)
@@ -322,7 +315,7 @@ static int mds_fs_prep(struct obd_device *obddev)
                 GOTO(err_pop, rc = -ENOENT);
         }
 
-        rc = mds_fs_journal_data(mds, f);
+        rc = fsfilt_journal_data(obddev, f);
         if (rc) {
                 CERROR("cannot journal data on %s: rc = %d\n", LAST_RCVD, rc);
                 GOTO(err_filp, rc);
@@ -347,113 +340,10 @@ err_filp:
         goto err_pop;
 }
 
-static struct mds_fs_operations *mds_search_fs_type(const char *name)
-{
-        struct list_head *p;
-        struct mds_fs_type *type;
-
-        /* lock mds_fs_types list */
-        list_for_each(p, &mds_fs_types) {
-                type = list_entry(p, struct mds_fs_type, mft_list);
-                if (!strcmp(type->mft_name, name)) {
-                        /* unlock mds_fs_types list */
-                        return type->mft_ops;
-                }
-        }
-        /* unlock mds_fs_types list */
-        return NULL;
-}
-
-int mds_register_fs_type(struct mds_fs_operations *ops, const char *name)
-{
-        struct mds_fs_operations *found;
-        struct mds_fs_type *type;
-
-        if ((found = mds_search_fs_type(name))) {
-                if (found != ops) {
-                        CERROR("different operations for type %s\n", name);
-                        RETURN(-EEXIST);
-                }
-                return 0;
-        }
-        OBD_ALLOC(type, sizeof(*type));
-        if (!type)
-                RETURN(-ENOMEM);
-
-        INIT_LIST_HEAD(&type->mft_list);
-        type->mft_ops = ops;
-        type->mft_name = strdup(name);
-        if (!type->mft_name) {
-                OBD_FREE(type, sizeof(*type));
-                RETURN(-ENOMEM);
-        }
-        MOD_INC_USE_COUNT;
-        list_add(&type->mft_list, &mds_fs_types);
-
-        return 0;
-}
-
-void mds_unregister_fs_type(const char *name)
-{
-        struct list_head *p;
-
-        /* lock mds_fs_types list */
-        list_for_each(p, &mds_fs_types) {
-                struct mds_fs_type *type;
-
-                type = list_entry(p, struct mds_fs_type, mft_list);
-                if (!strcmp(type->mft_name, name)) {
-                        list_del(p);
-                        kfree(type->mft_name);
-                        OBD_FREE(type, sizeof(*type));
-                        MOD_DEC_USE_COUNT;
-                        break;
-                }
-        }
-        /* unlock mds_fs_types list */
-}
-
-struct mds_fs_operations *mds_fs_get_ops(char *fstype)
-{
-        struct mds_fs_operations *fs_ops;
-
-        if (!(fs_ops = mds_search_fs_type(fstype))) {
-                char name[32];
-                int rc;
-
-                snprintf(name, sizeof(name) - 1, "mds_%s", fstype);
-                name[sizeof(name) - 1] = '\0';
-
-                if ((rc = request_module(name))) {
-                        fs_ops = mds_search_fs_type(fstype);
-                        CDEBUG(D_INFO, "Loaded module '%s'\n", name);
-                        if (!fs_ops)
-                                rc = -ENOENT;
-                }
-
-                if (rc) {
-                        CERROR("Can't find MDS fs interface '%s'\n", name);
-                        RETURN(ERR_PTR(rc));
-                }
-        }
-        __MOD_INC_USE_COUNT(fs_ops->fs_owner);
-
-        return fs_ops;
-}
-
-void mds_fs_put_ops(struct mds_fs_operations *fs_ops)
-{
-        __MOD_DEC_USE_COUNT(fs_ops->fs_owner);
-}
-
 int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt)
 {
         struct mds_obd *mds = &obddev->u.mds;
-        int rc;
-
-        mds->mds_fsops = mds_fs_get_ops(mds->mds_fstype);
-        if (IS_ERR(mds->mds_fsops))
-                RETURN(PTR_ERR(mds->mds_fsops));
+        ENTRY;
 
         mds->mds_vfsmnt = mnt;
 
@@ -462,40 +352,7 @@ int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt)
         mds->mds_ctxt.pwd = mnt->mnt_root;
         mds->mds_ctxt.fs = get_ds();
 
-        /*
-         * Replace the client filesystem delete_inode method with our own,
-         * so that we can clear the object ID before the inode is deleted.
-         * The fs_delete_inode method will call cl_delete_inode for us.
-         * We need to do this for the MDS superblock only, hence we install
-         * a modified copy of the original superblock method table.
-         *
-         * We still assume that there is only a single MDS client filesystem
-         * type, as we don't have access to the mds struct in delete_inode
-         * and store the client delete_inode method in a global table.  This
-         * will only become a problem if/when multiple MDSs are running on a
-         * single host with different underlying filesystems.
-         */
-        OBD_ALLOC(mds->mds_sop, sizeof(*mds->mds_sop));
-        if (!mds->mds_sop)
-                GOTO(out_dec, rc = -ENOMEM);
-
-        memcpy(mds->mds_sop, mds->mds_sb->s_op, sizeof(*mds->mds_sop));
-        mds->mds_fsops->cl_delete_inode = mds->mds_sop->delete_inode;
-        mds->mds_sop->delete_inode = mds->mds_fsops->fs_delete_inode;
-        mds->mds_sb->s_op = mds->mds_sop;
-
-        rc = mds_fs_prep(obddev);
-
-        if (rc)
-                GOTO(out_free, rc);
-
-        return 0;
-
-out_free:
-        OBD_FREE(mds->mds_sop, sizeof(*mds->mds_sop));
-out_dec:
-        mds_fs_put_ops(mds->mds_fsops);
-        return rc;
+        RETURN(mds_fs_prep(obddev));
 }
 
 void mds_fs_cleanup(struct obd_device *obddev)
@@ -504,10 +361,4 @@ void mds_fs_cleanup(struct obd_device *obddev)
 
         class_disconnect_all(obddev); /* this cleans up client info too */
         mds_server_free_data(mds);
-
-        OBD_FREE(mds->mds_sop, sizeof(*mds->mds_sop));
-        mds_fs_put_ops(mds->mds_fsops);
 }
-
-EXPORT_SYMBOL(mds_register_fs_type);
-EXPORT_SYMBOL(mds_unregister_fs_type);
index ca34445..689e424 100644 (file)
 #include <linux/lustre_idl.h>
 #include <linux/lustre_mds.h>
 #include <linux/lustre_dlm.h>
-#include <linux/obd_class.h>
+#include <linux/lustre_fsfilt.h>
 
 extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req);
 
+static void mds_last_rcvd_cb(struct obd_device *obd, __u64 last_rcvd, int error)
+{
+        struct mds_obd *mds = &obd->u.mds;
+
+        CDEBUG(D_HA, "got callback for last_rcvd "LPD64": rc = %d\n",
+               last_rcvd, error);
+        if (!error && last_rcvd > mds->mds_last_committed)
+                mds->mds_last_committed = last_rcvd;
+}
+
 void mds_start_transno(struct mds_obd *mds)
 {
         ENTRY;
@@ -57,13 +67,11 @@ int mds_finish_transno(struct mds_obd *mds, void *handle,
 
         /* Propagate error code. */
         if (rc)
-                goto out;
+                GOTO(out, rc);
 
         /* we don't allocate new transnos for replayed requests */
-        if (req->rq_level == LUSTRE_CONN_RECOVD) {
-                rc = 0;
-                goto out;
-        }
+        if (req->rq_level == LUSTRE_CONN_RECOVD)
+                GOTO(out, rc = 0);
 
         off = MDS_LR_CLIENT + med->med_off * MDS_LR_SIZE;
 
@@ -73,7 +81,8 @@ int mds_finish_transno(struct mds_obd *mds, void *handle,
         mcd->mcd_mount_count = cpu_to_le64(mds->mds_mount_count);
         mcd->mcd_last_xid = cpu_to_le64(req->rq_xid);
 
-        mds_fs_set_last_rcvd(mds, handle);
+        fsfilt_set_last_rcvd(req->rq_export->exp_obd, last_rcvd, handle,
+                             mds_last_rcvd_cb);
         written = lustre_fwrite(mds->mds_rcvd_filp, (char *)mcd, sizeof(*mcd),
                                 &off);
         CDEBUG(D_INODE, "wrote trans #"LPD64" for client %s at #%d: written = "
@@ -87,8 +96,8 @@ int mds_finish_transno(struct mds_obd *mds, void *handle,
 
         rc = 0;
 
- out:
         EXIT;
+ out:
         up(&mds->mds_transno_sem);
         return rc;
 }
@@ -144,14 +153,14 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
                        to_kdev_t(inode->i_sb->s_dev));
 
         mds_start_transno(mds);
-        handle = mds_fs_start(mds, inode, MDS_FSOP_SETATTR);
+        handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR);
         if (IS_ERR(handle)) {
                 rc = PTR_ERR(handle);
                 (void)mds_finish_transno(mds, handle, req, rc);
                 GOTO(out_setattr_de, rc);
         }
 
-        rc = mds_fs_setattr(mds, de, handle, &rec->ur_iattr);
+        rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr);
 
         if (offset) {
                 body = lustre_msg_buf(req->rq_repmsg, 1);
@@ -161,7 +170,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
 
         rc = mds_finish_transno(mds, handle, req, rc);
 
-        err = mds_fs_commit(mds, de->d_inode, handle);
+        err = fsfilt_commit(obd, de->d_inode, handle);
         if (err) {
                 CERROR("error on commit: err = %d\n", err);
                 if (!rc)
@@ -264,7 +273,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
 
         switch (type) {
         case S_IFREG:{
-                handle = mds_fs_start(mds, dir, MDS_FSOP_CREATE);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE);
                 if (IS_ERR(handle))
                         GOTO(out_transno_dchild, rc = PTR_ERR(handle));
                 rc = vfs_create(dir, dchild, rec->ur_mode);
@@ -272,7 +281,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                 break;
         }
         case S_IFDIR:{
-                handle = mds_fs_start(mds, dir, MDS_FSOP_MKDIR);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_MKDIR);
                 if (IS_ERR(handle))
                         GOTO(out_transno_dchild, rc = PTR_ERR(handle));
                 rc = vfs_mkdir(dir, dchild, rec->ur_mode);
@@ -280,7 +289,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                 break;
         }
         case S_IFLNK:{
-                handle = mds_fs_start(mds, dir, MDS_FSOP_SYMLINK);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_SYMLINK);
                 if (IS_ERR(handle))
                         GOTO(out_transno_dchild, rc = PTR_ERR(handle));
                 rc = vfs_symlink(dir, dchild, rec->ur_tgt);
@@ -292,7 +301,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
         case S_IFIFO:
         case S_IFSOCK:{
                 int rdev = rec->ur_rdev;
-                handle = mds_fs_start(mds, dir, MDS_FSOP_MKNOD);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD);
                 if (IS_ERR(handle))
                         GOTO(out_transno_dchild, rc = PTR_ERR(handle));
                 rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev);
@@ -331,7 +340,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                         CDEBUG(D_INODE, "created ino %lu\n", inode->i_ino);
                 }
 
-                rc = mds_fs_setattr(mds, dchild, handle, &iattr);
+                rc = fsfilt_setattr(obd, dchild, handle, &iattr);
                 if (rc) {
                         CERROR("error on setattr: rc = %d\n", rc);
                         /* XXX should we abort here in case of error? */
@@ -350,7 +359,7 @@ out_create_commit:
                 if (rc)
                         GOTO(out_create_unlink, rc);
         }
-        err = mds_fs_commit(mds, dir, handle);
+        err = fsfilt_commit(obd, dir, handle);
         if (err) {
                 CERROR("error on commit: err = %d\n", err);
                 if (!rc)
@@ -466,7 +475,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
         mds_start_transno(mds);
         switch (rec->ur_mode /* & S_IFMT ? */) {
         case S_IFDIR:
-                handle = mds_fs_start(mds, dir, MDS_FSOP_RMDIR);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_RMDIR);
                 if (IS_ERR(handle))
                         GOTO(out_unlink_cancel_transno, rc = PTR_ERR(handle));
                 rc = vfs_rmdir(dir, dchild);
@@ -481,7 +490,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
         case S_IFBLK:
         case S_IFIFO:
         case S_IFSOCK:
-                handle = mds_fs_start(mds, dir, MDS_FSOP_UNLINK);
+                handle = fsfilt_start(obd, dir, FSFILT_OP_UNLINK);
                 if (IS_ERR(handle))
                         GOTO(out_unlink_cancel_transno, rc = PTR_ERR(handle));
                 rc = vfs_unlink(dir, dchild);
@@ -494,7 +503,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
         }
 
         rc = mds_finish_transno(mds, handle, req, rc);
-        err = mds_fs_commit(mds, dir, handle);
+        err = fsfilt_commit(obd, dir, handle);
         if (err) {
                 CERROR("error on commit: err = %d\n", err);
                 if (!rc)
@@ -626,7 +635,7 @@ static int mds_reint_link(struct mds_update_record *rec, int offset,
                        to_kdev_t(de_src->d_inode->i_sb->s_dev));
 
         mds_start_transno(mds);
-        handle = mds_fs_start(mds, de_tgt_dir->d_inode, MDS_FSOP_LINK);
+        handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK);
         if (IS_ERR(handle)) {
                 rc = PTR_ERR(handle);
                 mds_finish_transno(mds, handle, req, rc);
@@ -638,7 +647,7 @@ static int mds_reint_link(struct mds_update_record *rec, int offset,
                 CERROR("link error %d\n", rc);
         rc = mds_finish_transno(mds, handle, req, rc);
 
-        err = mds_fs_commit(mds, de_tgt_dir->d_inode, handle);
+        err = fsfilt_commit(obd, de_tgt_dir->d_inode, handle);
         if (err) {
                 CERROR("error on commit: err = %d\n", err);
                 if (!rc)
@@ -760,7 +769,7 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset,
                        to_kdev_t(de_srcdir->d_inode->i_sb->s_dev));
 
         mds_start_transno(mds);
-        handle = mds_fs_start(mds, de_tgtdir->d_inode, MDS_FSOP_RENAME);
+        handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME);
         if (IS_ERR(handle)) {
                 rc = PTR_ERR(handle);
                 mds_finish_transno(mds, handle, req, rc);
@@ -774,7 +783,7 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset,
 
         rc = mds_finish_transno(mds, handle, req, rc);
 
-        err = mds_fs_commit(mds, de_tgtdir->d_inode, handle);
+        err = fsfilt_commit(obd, de_tgtdir->d_inode, handle);
         if (err) {
                 CERROR("error on commit: err = %d\n", err);
                 if (!rc)
index d7df0bb..ed2b321 100644 (file)
@@ -2,9 +2,19 @@
 # lustre_build_version, or 'make -j2' breaks!
 DEFS=
 MODULE = obdclass
-modulefs_DATA = lustre_build_version obdclass.o
-EXTRA_PROGRAMS = obdclass
+
+if LINUX25
+FSMOD = fsfilt_ext3
+else
+FSMOD = fsfilt_extN
+endif
+
+modulefs_DATA = lustre_build_version obdclass.o $(FSMOD).o
+EXTRA_PROGRAMS = obdclass $(FSMOD)
+
 obdclass_SOURCES = debug.c genops.c class_obd.c sysctl.c uuid.c lprocfs_status.c
+obdclass_SOURCES += fsfilt.c
+
 include $(top_srcdir)/Rules
 lustre_build_version:
        perl $(top_srcdir)/scripts/version_tag.pl $(top_srcdir) > tmpver
index 420a3fb..03faf17 100644 (file)
@@ -51,7 +51,7 @@
 struct semaphore obd_conf_sem;   /* serialize configuration commands */
 struct obd_device obd_dev[MAX_OBD_DEVICES];
 struct list_head obd_types;
-unsigned long obd_memory;
+atomic_t obd_memory;
 
 /* The following are visible and mutable through /proc/sys/lustre/. */
 unsigned long obd_fail_loc;
diff --git a/lustre/obdclass/fsfilt.c b/lustre/obdclass/fsfilt.c
new file mode 100644 (file)
index 0000000..97a84df
--- /dev/null
@@ -0,0 +1,110 @@
+#define EXPORT_SYMTAB
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/slab.h>
+#include <linux/extN_fs.h>
+#include <linux/extN_jbd.h>
+#include <linux/extN_xattr.h>
+#include <linux/kp30.h>
+#include <linux/lustre_fsfilt.h>
+
+LIST_HEAD(fsfilt_types);
+
+static struct fsfilt_operations *fsfilt_search_type(const char *type)
+{
+        struct fsfilt_operations *found;
+        struct list_head *p;
+
+        list_for_each(p, &fsfilt_types) {
+                found = list_entry(p, struct fsfilt_operations, fs_list);
+                if (!strcmp(found->fs_type, type)) {
+                        return found;
+                }
+        }
+        return NULL;
+}
+
+int fsfilt_register_ops(struct fsfilt_operations *fs_ops)
+{
+        struct fsfilt_operations *found;
+
+        /* lock fsfilt_types list */
+        if ((found = fsfilt_search_type(fs_ops->fs_type))) {
+                if (found != fs_ops) {
+                        CERROR("different operations for type %s\n",
+                              fs_ops->fs_type);
+                        /* unlock fsfilt_types list */
+                        RETURN(-EEXIST);
+                }
+        } else {
+               MOD_INC_USE_COUNT;
+               list_add(&fs_ops->fs_list, &fsfilt_types);
+       }
+
+       /* unlock fsfilt_types list */
+        return 0;
+}
+
+void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops)
+{
+        struct list_head *p;
+
+        /* lock fsfilt_types list */
+        list_for_each(p, &fsfilt_types) {
+               struct fsfilt_operations *found;
+
+                found = list_entry(p, typeof(*found), fs_list);
+                if (found == fs_ops) {
+                        list_del(p);
+                        MOD_DEC_USE_COUNT;
+                        break;
+                }
+        }
+        /* unlock fsfilt_types list */
+}
+
+struct fsfilt_operations *fsfilt_get_ops(char *type)
+{
+        struct fsfilt_operations *fs_ops;
+
+        /* lock fsfilt_types list */
+        if (!(fs_ops = fsfilt_search_type(type))) {
+                char name[32];
+                int rc;
+
+                snprintf(name, sizeof(name) - 1, "fsfilt_%s", type);
+                name[sizeof(name) - 1] = '\0';
+
+                if ((rc = request_module(name))) {
+                        fs_ops = fsfilt_search_type(type);
+                        CDEBUG(D_INFO, "Loaded module '%s'\n", name);
+                        if (!fs_ops)
+                                rc = -ENOENT;
+                }
+
+                if (rc) {
+                        CERROR("Can't find fsfilt_%s interface\n", name);
+                        RETURN(ERR_PTR(rc));
+                       /* unlock fsfilt_types list */
+                }
+        }
+        __MOD_INC_USE_COUNT(fs_ops->fs_owner);
+        /* unlock fsfilt_types list */
+
+        return fs_ops;
+}
+
+void fsfilt_put_ops(struct fsfilt_operations *fs_ops)
+{
+        __MOD_DEC_USE_COUNT(fs_ops->fs_owner);
+}
+
+
+EXPORT_SYMBOL(fsfilt_register_ops);
+EXPORT_SYMBOL(fsfilt_unregister_ops);
+EXPORT_SYMBOL(fsfilt_get_ops);
+EXPORT_SYMBOL(fsfilt_put_ops);
similarity index 55%
rename from lustre/mds/mds_ext3.c
rename to lustre/obdclass/fsfilt_ext3.c
index 7dede30..3878315 100644 (file)
@@ -1,8 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  lustre/mds/mds_ext3.c
- *  Lustre Metadata Server (mds) journal abstraction routines
+ *  lustre/lib/fsfilt_ext3.c
+ *  Lustre filesystem abstraction routines
  *
  *  Copyright (C) 2002 Cluster File Systems, Inc.
  *   Author: Andreas Dilger <adilger@clusterfs.com>
@@ -23,7 +23,7 @@
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#define DEBUG_SUBSYSTEM S_MDS
+#define DEBUG_SUBSYSTEM S_FILTER
 
 #include <linux/fs.h>
 #include <linux/jbd.h>
 #include <linux/init.h>
 #include <linux/ext3_fs.h>
 #include <linux/ext3_jbd.h>
-#include <../fs/ext3/xattr.h>
+#include <linux/ext3_xattr.h>
 #include <linux/kp30.h>
-#include <linux/lustre_mds.h>
+#include <linux/lustre_fsfilt.h>
 #include <linux/obd.h>
 #include <linux/module.h>
 
-static struct mds_fs_operations mds_ext3_fs_ops;
-static kmem_cache_t *mcb_cache;
-static int mcb_cache_count;
+static kmem_cache_t *fcb_cache;
+static int fcb_cache_count;
 
-struct mds_cb_data {
-        struct journal_callback cb_jcb;
-        struct mds_obd *cb_mds;
-        __u64 cb_last_rcvd;
+struct fsfilt_cb_data {
+        struct journal_callback cb_jcb; /* data private to jbd */
+        fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
+        struct obd_device *cb_obd;      /* MDS/OBD completion device */
+        __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
 };
 
 #define EXT3_XATTR_INDEX_LUSTRE         5
@@ -56,33 +56,33 @@ struct mds_cb_data {
  * the inode (which we will be changing anyways as part of this
  * transaction).
  */
-static void *mds_ext3_start(struct inode *inode, int op)
+static void *fsfilt_ext3_start(struct inode *inode, int op)
 {
         /* For updates to the last recieved file */
         int nblocks = EXT3_DATA_TRANS_BLOCKS;
         void *handle;
 
         switch(op) {
-        case MDS_FSOP_RMDIR:
-        case MDS_FSOP_UNLINK:
+        case FSFILT_OP_RMDIR:
+        case FSFILT_OP_UNLINK:
                 nblocks += EXT3_DELETE_TRANS_BLOCKS;
                 break;
-        case MDS_FSOP_RENAME:
+        case FSFILT_OP_RENAME:
                 /* We may be modifying two directories */
                 nblocks += EXT3_DATA_TRANS_BLOCKS;
-        case MDS_FSOP_SYMLINK:
+        case FSFILT_OP_SYMLINK:
                 /* Possible new block + block bitmap + GDT for long symlink */
                 nblocks += 3;
-        case MDS_FSOP_CREATE:
-        case MDS_FSOP_MKDIR:
-        case MDS_FSOP_MKNOD:
+        case FSFILT_OP_CREATE:
+        case FSFILT_OP_MKDIR:
+        case FSFILT_OP_MKNOD:
                 /* New inode + block bitmap + GDT for new file */
                 nblocks += 3;
-        case MDS_FSOP_LINK:
+        case FSFILT_OP_LINK:
                 /* Change parent directory */
                 nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
                 break;
-        case MDS_FSOP_SETATTR:
+        case FSFILT_OP_SETATTR:
                 /* Setattr on inode */
                 nblocks += 1;
                 break;
@@ -97,7 +97,7 @@ static void *mds_ext3_start(struct inode *inode, int op)
         return handle;
 }
 
-static int mds_ext3_commit(struct inode *inode, void *handle)
+static int fsfilt_ext3_commit(struct inode *inode, void *handle)
 {
         int rc;
 
@@ -108,8 +108,8 @@ static int mds_ext3_commit(struct inode *inode, void *handle)
         return rc;
 }
 
-static int mds_ext3_setattr(struct dentry *dentry, void *handle,
-                            struct iattr *iattr)
+static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle,
+                               struct iattr *iattr)
 {
         struct inode *inode = dentry->d_inode;
         int rc;
@@ -125,8 +125,8 @@ static int mds_ext3_setattr(struct dentry *dentry, void *handle,
         return rc;
 }
 
-static int mds_ext3_set_md(struct inode *inode, void *handle,
-                           struct lov_mds_md *lmm, int lmm_size)
+static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
+                              void *lmm, int lmm_size)
 {
         int rc;
 
@@ -138,14 +138,14 @@ static int mds_ext3_set_md(struct inode *inode, void *handle,
         up(&inode->i_sem);
 
         if (rc) {
-                CERROR("error adding objectid "LPX64" to inode %lu: %d\n",
-                       lmm->lmm_object_id, inode->i_ino, rc);
+                CERROR("error adding MD data to inode %lu: rc = %d\n",
+                       inode->i_ino, rc);
                 if (rc != -ENOSPC) LBUG();
         }
         return rc;
 }
 
-static int mds_ext3_get_md(struct inode *inode, struct lov_mds_md *lmm,int size)
+static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int size)
 {
         int rc;
 
@@ -161,21 +161,17 @@ static int mds_ext3_get_md(struct inode *inode, struct lov_mds_md *lmm,int size)
                 return (rc == -ENODATA) ? 0 : rc;
 
         if (rc < 0) {
-                CDEBUG(D_INFO, "error getting EA %s from MDS inode %lu: "
+                CDEBUG(D_INFO, "error getting EA %s from inode %lu: "
                        "rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc);
                 memset(lmm, 0, size);
                 return (rc == -ENODATA) ? 0 : rc;
         }
 
-        /* This field is byteswapped because it appears in the
-         * catalogue.  All others are opaque to the MDS */
-        lmm->lmm_object_id = le64_to_cpu(lmm->lmm_object_id);
-
         return rc;
 }
 
-static ssize_t mds_ext3_readpage(struct file *file, char *buf, size_t count,
-                                 loff_t *offset)
+static ssize_t fsfilt_ext3_readpage(struct file *file, char *buf, size_t count,
+                                    loff_t *offset)
 {
         struct inode *inode = file->f_dentry->d_inode;
         int rc = 0;
@@ -201,64 +197,39 @@ static ssize_t mds_ext3_readpage(struct file *file, char *buf, size_t count,
         return rc;
 }
 
-static void mds_ext3_delete_inode(struct inode *inode)
-{
-        if (S_ISREG(inode->i_mode)) {
-                void *handle = mds_ext3_start(inode, MDS_FSOP_UNLINK);
-
-                if (IS_ERR(handle)) {
-                        CERROR("unable to start transaction");
-                        EXIT;
-                        return;
-                }
-                if (mds_ext3_set_md(inode, handle, NULL, 0))
-                        CERROR("error clearing objid on %lu\n", inode->i_ino);
-
-                if (mds_ext3_fs_ops.cl_delete_inode)
-                        mds_ext3_fs_ops.cl_delete_inode(inode);
-
-                if (mds_ext3_commit(inode, handle))
-                        CERROR("error closing handle on %lu\n", inode->i_ino);
-        } else
-                mds_ext3_fs_ops.cl_delete_inode(inode);
-}
-
-static void mds_ext3_callback_status(struct journal_callback *jcb, int error)
+static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error)
 {
-        struct mds_cb_data *mcb = (struct mds_cb_data *)jcb;
+        struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
 
-        CDEBUG(D_EXT2, "got callback for last_rcvd "LPD64": rc = %d\n",
-               mcb->cb_last_rcvd, error);
-        if (!error && mcb->cb_last_rcvd > mcb->cb_mds->mds_last_committed)
-                mcb->cb_mds->mds_last_committed = mcb->cb_last_rcvd;
+        fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error);
 
-        kmem_cache_free(mcb_cache, mcb);
-        --mcb_cache_count;
+        kmem_cache_free(fcb_cache, fcb);
+        --fcb_cache_count;
 }
 
-static int mds_ext3_set_last_rcvd(struct mds_obd *mds, void *handle)
+static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
+                                     void *handle, fsfilt_cb_t cb_func)
 {
-        struct mds_cb_data *mcb;
+#ifdef HAVE_JOURNAL_CALLBACK_STATUS
+        struct fsfilt_cb_data *fcb;
 
-        mcb = kmem_cache_alloc(mcb_cache, GFP_NOFS);
-        if (!mcb)
+        fcb = kmem_cache_alloc(fcb_cache, GFP_NOFS);
+        if (!fcb)
                 RETURN(-ENOMEM);
 
-        ++mcb_cache_count;
-        mcb->cb_mds = mds;
-        mcb->cb_last_rcvd = mds->mds_last_rcvd;
+        ++fcb_cache_count;
+        fcb->cb_func = cb_func;
+        fcb->cb_obd = obd;
+        fcb->cb_last_rcvd = last_rcvd;
 
-#ifdef HAVE_JOURNAL_CALLBACK_STATUS
-        CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n",
-               mcb->cb_last_rcvd);
+        CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
         lock_kernel();
         /* Note that an "incompatible pointer" warning here is OK for now */
-        journal_callback_set(handle, mds_ext3_callback_status,
-                             (struct journal_callback *)mcb);
+        journal_callback_set(handle, fsfilt_ext3_cb_func,
+                             (struct journal_callback *)fcb);
         unlock_kernel();
 #else
 #warning "no journal callback kernel patch, faking it..."
-        {
         static long next = 0;
 
         if (time_after(jiffies, next)) {
@@ -266,13 +237,13 @@ static int mds_ext3_set_last_rcvd(struct mds_obd *mds, void *handle)
                 next = jiffies + 300 * HZ;
         }
 
-        mds_ext3_callback_status((struct journal_callback *)mcb, 0);
+        cb_func(obd, last_rcvd, 0);
 #endif
 
         return 0;
 }
 
-static int mds_ext3_journal_data(struct file *filp)
+static int fsfilt_ext3_journal_data(struct file *filp)
 {
         struct inode *inode = filp->f_dentry->d_inode;
 
@@ -288,7 +259,7 @@ static int mds_ext3_journal_data(struct file *filp)
  *
  * This can be removed when the ext3 EA code is fixed.
  */
-static int mds_ext3_statfs(struct super_block *sb, struct statfs *sfs)
+static int fsfilt_ext3_statfs(struct super_block *sb, struct statfs *sfs)
 {
         int rc = vfs_statfs(sb, sfs);
 
@@ -298,60 +269,59 @@ static int mds_ext3_statfs(struct super_block *sb, struct statfs *sfs)
         return rc;
 }
 
-static struct mds_fs_operations mds_ext3_fs_ops = {
+static struct fsfilt_operations fsfilt_ext3_ops = {
+        fs_type:                "ext3",
         fs_owner:               THIS_MODULE,
-        fs_start:               mds_ext3_start,
-        fs_commit:              mds_ext3_commit,
-        fs_setattr:             mds_ext3_setattr,
-        fs_set_md:              mds_ext3_set_md,
-        fs_get_md:              mds_ext3_get_md,
-        fs_readpage:            mds_ext3_readpage,
-        fs_delete_inode:        mds_ext3_delete_inode,
-        cl_delete_inode:        clear_inode,
-        fs_journal_data:        mds_ext3_journal_data,
-        fs_set_last_rcvd:       mds_ext3_set_last_rcvd,
-        fs_statfs:              mds_ext3_statfs,
+        fs_start:               fsfilt_ext3_start,
+        fs_commit:              fsfilt_ext3_commit,
+        fs_setattr:             fsfilt_ext3_setattr,
+        fs_set_md:              fsfilt_ext3_set_md,
+        fs_get_md:              fsfilt_ext3_get_md,
+        fs_readpage:            fsfilt_ext3_readpage,
+        fs_journal_data:        fsfilt_ext3_journal_data,
+        fs_set_last_rcvd:       fsfilt_ext3_set_last_rcvd,
+        fs_statfs:              fsfilt_ext3_statfs,
 };
 
-static int __init mds_ext3_init(void)
+static int __init fsfilt_ext3_init(void)
 {
         int rc;
 
         //rc = ext3_xattr_register();
-        mcb_cache = kmem_cache_create("mds_ext3_mcb",
-                                      sizeof(struct mds_cb_data), 0,
+        fcb_cache = kmem_cache_create("fsfilt_ext3_fcb",
+                                      sizeof(struct fsfilt_cb_data), 0,
                                       0, NULL, NULL);
-        if (!mcb_cache) {
-                CERROR("error allocating MDS journal callback cache\n");
+        if (!fcb_cache) {
+                CERROR("error allocating fsfilt journal callback cache\n");
                 GOTO(out, rc = -ENOMEM);
         }
 
-        rc = mds_register_fs_type(&mds_ext3_fs_ops, "ext3");
+        rc = fsfilt_register_ops(&fsfilt_ext3_fs_ops);
 
         if (rc)
-                kmem_cache_destroy(mcb_cache);
+                kmem_cache_destroy(fcb_cache);
 out:
         return rc;
 }
 
-static void __exit mds_ext3_exit(void)
+static void __exit fsfilt_ext3_exit(void)
 {
         int rc;
 
-        mds_unregister_fs_type("ext3");
-        rc = kmem_cache_destroy(mcb_cache);
+        fsfilt_unregister_ops(&fsfilt_ext3_fs_ops);
+        rc = kmem_cache_destroy(fcb_cache);
 
-        if (rc || mcb_cache_count) {
-                CERROR("can't free MDS callback cache: count %d, rc = %d\n",
-                       mcb_cache_count, rc);
+        if (rc || fcb_cache_count) {
+                CERROR("can't free fsfilt callback cache: count %d, rc = %d\n",
+                       fcb_cache_count, rc);
         }
 
         //rc = ext3_xattr_unregister();
 }
 
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre MDS ext3 Filesystem Helper v0.1");
+MODULE_DESCRIPTION("Lustre ext3 Filesystem Helper v0.1");
 MODULE_LICENSE("GPL");
 
-module_init(mds_ext3_init);
-module_exit(mds_ext3_exit);
+module_init(fsfilt_ext3_init);
+module_exit(fsfilt_ext3_exit);
diff --git a/lustre/obdclass/fsfilt_extN.c b/lustre/obdclass/fsfilt_extN.c
new file mode 100644 (file)
index 0000000..9b5a1f9
--- /dev/null
@@ -0,0 +1,449 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  lustre/lib/fsfilt_extN.c
+ *  Lustre filesystem abstraction routines
+ *
+ *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/extN_fs.h>
+#include <linux/extN_jbd.h>
+#include <linux/extN_xattr.h>
+#include <linux/kp30.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/obd.h>
+#include <linux/module.h>
+
+static kmem_cache_t *fcb_cache;
+static int fcb_cache_count;
+
+struct fsfilt_cb_data {
+        struct journal_callback cb_jcb; /* data private to jbd */
+        fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
+        struct obd_device *cb_obd;      /* MDS/OBD completion device */
+        __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
+};
+
+#define EXTN_XATTR_INDEX_LUSTRE         5
+#define XATTR_LUSTRE_MDS_OBJID          "system.lustre_mds_objid"
+
+/*
+ * We don't currently need any additional blocks for rmdir and
+ * unlink transactions because we are storing the OST oa_id inside
+ * the inode (which we will be changing anyways as part of this
+ * transaction).
+ */
+static void *fsfilt_extN_start(struct inode *inode, int op)
+{
+        /* For updates to the last recieved file */
+        int nblocks = EXTN_DATA_TRANS_BLOCKS;
+        void *handle;
+
+        switch(op) {
+        case FSFILT_OP_RMDIR:
+        case FSFILT_OP_UNLINK:
+                nblocks += EXTN_DELETE_TRANS_BLOCKS;
+                break;
+        case FSFILT_OP_RENAME:
+                /* modify additional directory */
+                nblocks += EXTN_DATA_TRANS_BLOCKS;
+                /* no break */
+        case FSFILT_OP_SYMLINK:
+                /* additional block + block bitmap + GDT for long symlink */
+                nblocks += 3;
+                /* no break */
+        case FSFILT_OP_CREATE:
+        case FSFILT_OP_MKDIR:
+        case FSFILT_OP_MKNOD:
+                /* modify one inode + block bitmap + GDT */
+                nblocks += 3;
+                /* no break */
+        case FSFILT_OP_LINK:
+                /* modify parent directory */
+                nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS;
+                break;
+        case FSFILT_OP_SETATTR:
+                /* Setattr on inode */
+                nblocks += 1;
+                break;
+        default: CERROR("unknown transaction start op %d\n", op);
+                 LBUG();
+        }
+
+        LASSERT(!current->journal_info);
+        lock_kernel();
+        handle = journal_start(EXTN_JOURNAL(inode), nblocks);
+        unlock_kernel();
+
+        return handle;
+}
+
+/*
+ * Calculate the number of buffer credits needed to write multiple pages in
+ * a single extN transaction.  No, this shouldn't be here, but as yet extN
+ * doesn't have a nice API for calculating this sort of thing in advance.
+ *
+ * See comment above extN_writepage_trans_blocks for details.  We assume
+ * no data journaling is being done, but it does allow for all of the pages
+ * being non-contiguous.  If we are guaranteed contiguous pages we could
+ * reduce the number of (d)indirect blocks a lot.
+ *
+ * With N blocks per page and P pages, for each inode we have at most:
+ * N*P indirect
+ * min(N*P, blocksize/4 + 1) dindirect blocks
+ * niocount tindirect
+ *
+ * For the entire filesystem, we have at most:
+ * min(sum(nindir + P), ngroups) bitmap blocks (from the above)
+ * min(sum(nindir + P), gdblocks) group descriptor blocks (from the above)
+ * objcount inode blocks
+ * 1 superblock
+ * 2 * EXTN_SINGLEDATA_TRANS_BLOCKS for the quota files
+ */
+static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso)
+{
+        struct super_block *sb = fso->fso_dentry->d_inode->i_sb;
+        int blockpp = 1 << (PAGE_CACHE_SHIFT - sb->s_blocksize_bits);
+        int addrpp = EXTN_ADDR_PER_BLOCK(sb) * blockpp;
+        int nbitmaps = 0;
+        int ngdblocks = 0;
+        int needed = objcount + 1;
+        int i;
+
+        for (i = 0; i < objcount; i++, fso++) {
+                int nblocks = fso->fso_bufcnt * blockpp;
+                int ndindirect = min(nblocks, addrpp + 1);
+                int nindir = nblocks + ndindirect + 1;
+
+                nbitmaps += nindir + nblocks;
+                ngdblocks += nindir + nblocks;
+
+                needed += nindir;
+        }
+
+        /* Assumes extN and extN have same sb_info layout at the start. */
+        if (nbitmaps > EXTN_SB(sb)->s_groups_count)
+                nbitmaps = EXTN_SB(sb)->s_groups_count;
+        if (ngdblocks > EXTN_SB(sb)->s_gdb_count)
+                ngdblocks = EXTN_SB(sb)->s_gdb_count;
+
+        needed += nbitmaps + ngdblocks;
+
+#ifdef CONFIG_QUOTA
+        /* We assume that there will be 1 bit set in s_dquot.flags for each
+         * quota file that is active.  This is at least true for now.
+         */
+        needed += hweight32(sb_any_quota_enabled(sb)) *
+                EXTN_SINGLEDATA_TRANS_BLOCKS;
+#endif
+
+        return needed;
+}
+
+/* We have to start a huge journal transaction here to hold all of the
+ * metadata for the pages being written here.  This is necessitated by
+ * the fact that we do lots of prepare_write operations before we do
+ * any of the matching commit_write operations, so even if we split
+ * up to use "smaller" transactions none of them could complete until
+ * all of them were opened.  By having a single journal transaction,
+ * we eliminate duplicate reservations for common blocks like the
+ * superblock and group descriptors or bitmaps.
+ *
+ * We will start the transaction here, but each prepare_write will
+ * add a refcount to the transaction, and each commit_write will
+ * remove a refcount.  The transaction will be closed when all of
+ * the pages have been written.
+ */
+static void *fsfilt_extN_brw_start(int objcount, struct fsfilt_objinfo *fso,
+                                   int niocount, struct niobuf_remote *nb)
+{
+        journal_t *journal;
+        handle_t *handle;
+        int needed;
+        ENTRY;
+
+        LASSERT(!current->journal_info);
+        journal = EXTN_SB(fso->fso_dentry->d_inode->i_sb)->s_journal;
+        needed = fsfilt_extN_credits_needed(objcount, fso);
+
+        /* The number of blocks we could _possibly_ dirty can very large.
+         * We reduce our request if it is absurd (and we couldn't get that
+         * many credits for a single handle anyways).
+         *
+         * At some point we have to limit the size of I/Os sent at one time,
+         * increase the size of the journal, or we have to calculate the
+         * actual journal requirements more carefully by checking all of
+         * the blocks instead of being maximally pessimistic.  It remains to
+         * be seen if this is a real problem or not.
+         */
+        if (needed > journal->j_max_transaction_buffers) {
+                CERROR("want too many journal credits (%d) using %d instead\n",
+                       needed, journal->j_max_transaction_buffers);
+                needed = journal->j_max_transaction_buffers;
+        }
+
+        lock_kernel();
+        handle = journal_start(journal, needed);
+        unlock_kernel();
+        if (IS_ERR(handle))
+                CERROR("can't get handle for %d credits: rc = %ld\n", needed,
+                       PTR_ERR(handle));
+
+        RETURN(handle);
+}
+
+static int fsfilt_extN_commit(struct inode *inode, void *handle)
+{
+        int rc;
+
+        lock_kernel();
+        rc = journal_stop((handle_t *)handle);
+        unlock_kernel();
+
+        return rc;
+}
+
+static int fsfilt_extN_setattr(struct dentry *dentry, void *handle,
+                               struct iattr *iattr)
+{
+        struct inode *inode = dentry->d_inode;
+        int rc;
+
+        lock_kernel();
+        if (inode->i_op->setattr)
+                rc = inode->i_op->setattr(dentry, iattr);
+        else
+                rc = inode_setattr(inode, iattr);
+
+        unlock_kernel();
+
+        return rc;
+}
+
+static int fsfilt_extN_set_md(struct inode *inode, void *handle,
+                              void *lmm, int lmm_size)
+{
+        int rc;
+
+        down(&inode->i_sem);
+        lock_kernel();
+        rc = extN_xattr_set(handle, inode, EXTN_XATTR_INDEX_LUSTRE,
+                            XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0);
+        unlock_kernel();
+        up(&inode->i_sem);
+
+        if (rc) {
+                CERROR("error adding MD data to inode %lu: rc = %d\n",
+                       inode->i_ino, rc);
+                if (rc != -ENOSPC) LBUG();
+        }
+        return rc;
+}
+
+static int fsfilt_extN_get_md(struct inode *inode, void *lmm, int size)
+{
+        int rc;
+
+        down(&inode->i_sem);
+        lock_kernel();
+        rc = extN_xattr_get(inode, EXTN_XATTR_INDEX_LUSTRE,
+                            XATTR_LUSTRE_MDS_OBJID, lmm, size);
+        unlock_kernel();
+        up(&inode->i_sem);
+
+        /* This gives us the MD size */
+        if (lmm == NULL)
+                return (rc == -ENODATA) ? 0 : rc;
+
+        if (rc < 0) {
+                CDEBUG(D_INFO, "error getting EA %s from inode %lu: "
+                       "rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc);
+                memset(lmm, 0, size);
+                return (rc == -ENODATA) ? 0 : rc;
+        }
+
+        return rc;
+}
+
+static ssize_t fsfilt_extN_readpage(struct file *file, char *buf, size_t count,
+                                    loff_t *offset)
+{
+        struct inode *inode = file->f_dentry->d_inode;
+        int rc = 0;
+
+        if (S_ISREG(inode->i_mode))
+                rc = file->f_op->read(file, buf, count, offset);
+        else {
+                struct buffer_head *bh;
+
+                /* FIXME: this assumes the blocksize == count, but the calling
+                 *        function will detect this as an error for now */
+                bh = extN_bread(NULL, inode,
+                                *offset >> inode->i_sb->s_blocksize_bits,
+                                0, &rc);
+
+                if (bh) {
+                        memcpy(buf, bh->b_data, inode->i_blksize);
+                        brelse(bh);
+                        rc = inode->i_blksize;
+                }
+        }
+
+        return rc;
+}
+
+static void fsfilt_extN_cb_func(struct journal_callback *jcb, int error)
+{
+        struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
+
+        fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error);
+
+        kmem_cache_free(fcb_cache, fcb);
+        --fcb_cache_count;
+}
+
+static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
+                                     void *handle, fsfilt_cb_t cb_func)
+{
+#ifdef HAVE_JOURNAL_CALLBACK_STATUS
+        struct fsfilt_cb_data *fcb;
+
+        fcb = kmem_cache_alloc(fcb_cache, GFP_NOFS);
+        if (!fcb)
+                RETURN(-ENOMEM);
+
+        ++fcb_cache_count;
+        fcb->cb_func = cb_func;
+        fcb->cb_obd = obd;
+        fcb->cb_last_rcvd = last_rcvd;
+
+        CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
+        lock_kernel();
+        /* Note that an "incompatible pointer" warning here is OK for now */
+        journal_callback_set(handle, fsfilt_extN_cb_func,
+                             (struct journal_callback *)fcb);
+        unlock_kernel();
+#else
+#warning "no journal callback kernel patch, faking it..."
+        static long next = 0;
+
+        if (time_after(jiffies, next)) {
+                CERROR("no journal callback kernel patch, faking it...\n");
+                next = jiffies + 300 * HZ;
+        }
+
+        cb_func(obd, last_rcvd, 0);
+#endif
+
+        return 0;
+}
+
+static int fsfilt_extN_journal_data(struct file *filp)
+{
+        struct inode *inode = filp->f_dentry->d_inode;
+
+        EXTN_I(inode)->i_flags |= EXTN_JOURNAL_DATA_FL;
+
+        return 0;
+}
+
+/*
+ * We need to hack the return value for the free inode counts because
+ * the current EA code requires one filesystem block per inode with EAs,
+ * so it is possible to run out of blocks before we run out of inodes.
+ *
+ * This can be removed when the extN EA code is fixed.
+ */
+static int fsfilt_extN_statfs(struct super_block *sb, struct obd_statfs *osfs)
+{
+        struct statfs sfs;
+        int rc = vfs_statfs(sb, &sfs);
+
+        if (!rc && sfs.f_bfree < sfs.f_ffree)
+                sfs.f_ffree = sfs.f_bfree;
+
+        statfs_pack(osfs, &sfs);
+        return rc;
+}
+
+static struct fsfilt_operations fsfilt_extN_ops = {
+        fs_type:                "extN",
+        fs_owner:               THIS_MODULE,
+        fs_start:               fsfilt_extN_start,
+        fs_brw_start:           fsfilt_extN_brw_start,
+        fs_commit:              fsfilt_extN_commit,
+        fs_setattr:             fsfilt_extN_setattr,
+        fs_set_md:              fsfilt_extN_set_md,
+        fs_get_md:              fsfilt_extN_get_md,
+        fs_readpage:            fsfilt_extN_readpage,
+        fs_journal_data:        fsfilt_extN_journal_data,
+        fs_set_last_rcvd:       fsfilt_extN_set_last_rcvd,
+        fs_statfs:              fsfilt_extN_statfs,
+};
+
+static int __init fsfilt_extN_init(void)
+{
+        int rc;
+
+        //rc = extN_xattr_register();
+        fcb_cache = kmem_cache_create("fsfilt_extN_fcb",
+                                      sizeof(struct fsfilt_cb_data), 0,
+                                      0, NULL, NULL);
+        if (!fcb_cache) {
+                CERROR("error allocating fsfilt journal callback cache\n");
+                GOTO(out, rc = -ENOMEM);
+        }
+
+        rc = fsfilt_register_ops(&fsfilt_extN_ops);
+
+        if (rc)
+                kmem_cache_destroy(fcb_cache);
+out:
+        return rc;
+}
+
+static void __exit fsfilt_extN_exit(void)
+{
+        int rc;
+
+        fsfilt_unregister_ops(&fsfilt_extN_ops);
+        rc = kmem_cache_destroy(fcb_cache);
+
+        if (rc || fcb_cache_count) {
+                CERROR("can't free fsfilt callback cache: count %d, rc = %d\n",
+                       fcb_cache_count, rc);
+        }
+
+        //rc = extN_xattr_unregister();
+}
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre extN Filesystem Helper v0.1");
+MODULE_LICENSE("GPL");
+
+module_init(fsfilt_extN_init);
+module_exit(fsfilt_extN_exit);
index 926991a..8a0ed36 100644 (file)
@@ -84,25 +84,44 @@ int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars,
 
         ENTRY;
 
+        LASSERT (strnlen (nm, 1024) < 1024);    /* sanity check */
+        
         if (class_search_type(nm)) {
                 CDEBUG(D_IOCTL, "Type %s already registered\n", nm);
                 RETURN(-EEXIST);
         }
 
+        rc = -ENOMEM;
         OBD_ALLOC(type, sizeof(*type));
+        if (type == NULL)
+                RETURN(rc);
+
         OBD_ALLOC(type->typ_ops, sizeof(*type->typ_ops));
         OBD_ALLOC(type->typ_name, strlen(nm) + 1);
-        if (!type)
-                RETURN(-ENOMEM);
-        INIT_LIST_HEAD(&type->typ_chain);
+        if (type->typ_ops == NULL ||
+            type->typ_name == NULL)
+                GOTO (failed, rc);
+        
+        *(type->typ_ops) = *ops;
+        strcpy(type->typ_name, nm);
+        list_add(&type->typ_chain, &obd_types);
+
+        rc = lprocfs_reg_class(type, vars, type);
+        if (rc != 0) {
+                list_del (&type->typ_chain);
+                GOTO (failed, rc);
+        }
+        
         CDEBUG(D_INFO, "MOD_INC_USE for register_type: count = %d\n",
                atomic_read(&(THIS_MODULE)->uc.usecount));
         MOD_INC_USE_COUNT;
-        list_add(&type->typ_chain, &obd_types);
-        memcpy(type->typ_ops, ops, sizeof(*type->typ_ops));
-        strcpy(type->typ_name, nm);
-        rc = lprocfs_reg_class(type, vars, type);
+        RETURN (0);
 
+ failed:
+        if (type->typ_ops != NULL)
+                OBD_FREE (type->typ_name, strlen (nm) + 1);
+        if (type->typ_ops != NULL)
+                OBD_FREE (type->typ_ops, sizeof (*type->typ_ops));
         RETURN(rc);
 }
 
index 4ccc043..3d2f222 100644 (file)
@@ -57,8 +57,10 @@ static int echo_iocontrol(unsigned int cmd, struct lustre_handle *obdconn, int l
         case OBD_IOC_CREATE: {
                 struct lov_stripe_md *tmp_lsm = NULL;
                 rc = obd_create(&ec->conn, &data->ioc_obdo1, &tmp_lsm);
-                if (lsm)
+                if (lsm && tmp_lsm ) {
                         memcpy(lsm, tmp_lsm, sizeof(*tmp_lsm));
+                        data->ioc_conn2 = 1;
+                }
 
                 GOTO(out, rc);
         }
index 31047b5..a370e56 100644 (file)
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+/*
+ * Invariant: get O/R i_sem for lookup, if needed, before any journal ops
+ *            (which need to get journal_lock, may block if journal full).
+ */
+
 #define EXPORT_SYMTAB
 #define DEBUG_SUBSYSTEM S_FILTER
 
+#include <linux/config.h>
 #include <linux/module.h>
-#include <linux/pagemap.h>
+#include <linux/pagemap.h> // XXX kill me soon
 #include <linux/fs.h>
 #include <linux/dcache.h>
 #include <linux/obd_class.h>
 #include <linux/lustre_dlm.h>
 #include <linux/obd_filter.h>
-#include <linux/ext3_jbd.h>
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#include <linux/extN_jbd.h>
-#endif
-#include <linux/quotaops.h>
 #include <linux/init.h>
 #include <linux/random.h>
-#include <linux/stringify.h>
+#include <linux/lustre_fsfilt.h>
 #include <linux/lprocfs_status.h>
 
 extern struct lprocfs_vars status_class_var[];
@@ -49,9 +50,6 @@ extern struct lprocfs_vars status_var_nm_1[];
 static kmem_cache_t *filter_open_cache;
 static kmem_cache_t *filter_dentry_cache;
 
-#define FILTER_ROOTINO 2
-#define FILTER_ROOTINO_STR __stringify(FILTER_ROOTINO)
-
 #define S_SHIFT 12
 static char *obd_type_by_mode[S_IFMT >> S_SHIFT] = {
         [0]                     NULL,
@@ -96,13 +94,14 @@ struct dentry_operations filter_dops = {
         .d_release = filter_drelease,
 };
 
+#define LAST_RCVD "last_rcvd"
+
 /* setup the object store with correct subdirectories */
 static int filter_prep(struct obd_device *obd)
 {
         struct obd_run_ctxt saved;
         struct filter_obd *filter = &obd->u.filter;
         struct dentry *dentry;
-        struct dentry *root;
         struct file *file;
         struct inode *inode;
         int rc = 0;
@@ -118,58 +117,33 @@ static int filter_prep(struct obd_device *obd)
                 GOTO(out, rc);
         }
         filter->fo_dentry_O = dentry;
-        dentry = simple_mkdir(current->fs->pwd, "P", 0700);
-        CDEBUG(D_INODE, "got/created P: %p\n", dentry);
-        if (IS_ERR(dentry)) {
-                rc = PTR_ERR(dentry);
-                CERROR("cannot open/create P: rc = %d\n", rc);
-                GOTO(out_O, rc);
-        }
-        f_dput(dentry);
-        dentry = simple_mkdir(current->fs->pwd, "D", 0700);
-        CDEBUG(D_INODE, "got/created D: %p\n", dentry);
-        if (IS_ERR(dentry)) {
-                rc = PTR_ERR(dentry);
-                CERROR("cannot open/create D: rc = %d\n", rc);
-                GOTO(out_O, rc);
-        }
-
-        root = simple_mknod(dentry, FILTER_ROOTINO_STR, S_IFREG | 0755);
-        f_dput(dentry);
-        if (IS_ERR(root)) {
-                rc = PTR_ERR(root);
-                CERROR("OBD filter: cannot open/create root %d: rc = %d\n",
-                       FILTER_ROOTINO, rc);
-                GOTO(out_O, rc);
-        }
-        f_dput(root);
 
         /*
          * Create directories and/or get dentries for each object type.
          * This saves us from having to do multiple lookups for each one.
          */
         for (mode = 0; mode < (S_IFMT >> S_SHIFT); mode++) {
-                char *type = obd_type_by_mode[mode];
+                char *name = obd_type_by_mode[mode];
 
-                if (!type) {
+                if (!name) {
                         filter->fo_dentry_O_mode[mode] = NULL;
                         continue;
                 }
-                dentry = simple_mkdir(filter->fo_dentry_O, type, 0700);
-                CDEBUG(D_INODE, "got/created O/%s: %p\n", type, dentry);
+                dentry = simple_mkdir(filter->fo_dentry_O, name, 0700);
+                CDEBUG(D_INODE, "got/created O/%s: %p\n", name, dentry);
                 if (IS_ERR(dentry)) {
                         rc = PTR_ERR(dentry);
-                        CERROR("cannot create O/%s: rc = %d\n", type, rc);
+                        CERROR("cannot create O/%s: rc = %d\n", name, rc);
                         GOTO(out_O_mode, rc);
                 }
                 filter->fo_dentry_O_mode[mode] = dentry;
         }
 
-        file = filp_open("D/status", O_RDWR | O_CREAT, 0700);
+        file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0700);
         if ( !file || IS_ERR(file) ) {
                 rc = PTR_ERR(file);
-                CERROR("OBD filter: cannot open/create status %s: rc = %d\n",
-                       "D/status", rc);
+                CERROR("OBD filter: cannot open/create %s: rc = %d\n",
+                       LAST_RCVD, rc);
                 GOTO(out_O_mode, rc);
         }
 
@@ -210,7 +184,7 @@ static int filter_prep(struct obd_device *obd)
 
         return(rc);
 
-out_O_mode:
+ out_O_mode:
         while (mode-- > 0) {
                 struct dentry *dentry = filter->fo_dentry_O_mode[mode];
                 if (dentry) {
@@ -218,7 +192,6 @@ out_O_mode:
                         filter->fo_dentry_O_mode[mode] = NULL;
                 }
         }
-out_O:
         f_dput(filter->fo_dentry_O);
         filter->fo_dentry_O = NULL;
         goto out;
@@ -235,9 +208,9 @@ static void filter_post(struct obd_device *obd)
         int mode;
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
-        file = filp_open("D/status", O_RDWR | O_CREAT, 0700);
+        file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0700);
         if (IS_ERR(file)) {
-                CERROR("OBD filter: cannot create status file\n");
+                CERROR("OBD filter: cannot create %s\n", LAST_RCVD);
                 goto out;
         }
 
@@ -273,7 +246,6 @@ static __u64 filter_next_id(struct obd_device *obd)
         id = ++obd->u.filter.fo_lastobjid;
         spin_unlock(&obd->u.filter.fo_objidlock);
 
-        /* FIXME: write the lastobjid to disk here */
         return id;
 }
 
@@ -281,7 +253,7 @@ static __u64 filter_next_id(struct obd_device *obd)
 /* parent i_sem is already held if needed for exclusivity */
 static struct dentry *filter_fid2dentry(struct obd_device *obd,
                                         struct dentry *dparent,
-                                        __u64 id, __u32 type, int locked)
+                                        __u64 id, int locked)
 {
         struct super_block *sb = obd->u.filter.fo_sb;
         struct dentry *dchild;
@@ -295,20 +267,14 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd,
         }
 
         if (id == 0) {
-                CERROR("fatal: invalid object #0\n");
+                CERROR("fatal: invalid object id 0\n");
                 LBUG();
                 RETURN(ERR_PTR(-ESTALE));
         }
 
-        if (!(type & S_IFMT)) {
-                CERROR("OBD %s, object "LPU64" has bad type: %o\n",
-                       __FUNCTION__, id, type);
-                RETURN(ERR_PTR(-EINVAL));
-        }
-
         len = sprintf(name, LPU64, id);
-        CDEBUG(D_INODE, "opening object O/%s/%s\n", obd_mode_to_type(type),
-               name);
+        CDEBUG(D_INODE, "opening object O/%*s/%s\n",
+               dparent->d_name.len, dparent->d_name.name, name);
         if (!locked)
                 down(&dparent->d_inode->i_sem);
         dchild = lookup_one_len(name, dparent, len);
@@ -319,8 +285,8 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd,
                 RETURN(dchild);
         }
 
-        CDEBUG(D_INODE, "got child obj O/%s/%s: %p, count = %d\n",
-               obd_mode_to_type(type), name, dchild,
+        CDEBUG(D_INODE, "got child obj O/%*s/%s: %p, count = %d\n",
+               dparent->d_name.len, dparent->d_name.name, name, dchild,
                atomic_read(&dchild->d_count));
 
         LASSERT(atomic_read(&dchild->d_count) > 0);
@@ -333,6 +299,7 @@ static inline struct dentry *filter_parent(struct obd_device *obd,
 {
         struct filter_obd *filter = &obd->u.filter;
 
+        LASSERT((mode & S_IFMT) == S_IFREG);   /* only regular files for now */
         return filter->fo_dentry_O_mode[(mode & S_IFMT) >> S_SHIFT];
 }
 
@@ -453,7 +420,6 @@ static int filter_destroy_internal(struct obd_device *obd,
 
         push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
         rc = vfs_unlink(dir_dentry->d_inode, object_dentry);
-        /* XXX unlink from PENDING directory now too */
         pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
 
         if (rc)
@@ -483,7 +449,10 @@ static int filter_close_internal(struct obd_device *obd,
                 struct dentry *dir_dentry = filter_parent(obd, S_IFREG);
 
                 down(&dir_dentry->d_inode->i_sem);
+                /* XXX start transaction */
+                /* XXX unlink from PENDING directory now too */
                 rc2 = filter_destroy_internal(obd, dir_dentry, object_dentry);
+                /* XXX finish transaction */
                 if (rc2 && !rc)
                         rc = rc2;
                 up(&dir_dentry->d_inode->i_sem);
@@ -496,126 +465,70 @@ static int filter_close_internal(struct obd_device *obd,
 }
 
 /* obd methods */
-static int filter_connect(struct lustre_handle *conn, struct obd_device *obd,
-                          obd_uuid_t cluuid, struct recovd_obd *recovd,
-                          ptlrpc_recovery_cb_t recover)
-{
-        struct obd_export *exp;
-        int rc;
-
-        ENTRY;
-        MOD_INC_USE_COUNT;
-        rc = class_connect(conn, obd, cluuid);
-        if (rc)
-                GOTO(out_dec, rc);
-        exp = class_conn2export(conn);
-        LASSERT(exp);
-
-        INIT_LIST_HEAD(&exp->exp_filter_data.fed_open_head);
-        spin_lock_init(&exp->exp_filter_data.fed_lock);
-out:
-        RETURN(rc);
-
-out_dec:
-        MOD_DEC_USE_COUNT;
-        goto out;
-}
-
-static int filter_disconnect(struct lustre_handle *conn)
-{
-        struct obd_export *exp = class_conn2export(conn);
-        struct filter_export_data *fed;
-        int rc;
-        ENTRY;
-
-        LASSERT(exp);
-        fed = &exp->exp_filter_data;
-        spin_lock(&fed->fed_lock);
-        while (!list_empty(&fed->fed_open_head)) {
-                struct filter_file_data *ffd;
-
-                ffd = list_entry(fed->fed_open_head.next, typeof(*ffd),
-                                 ffd_export_list);
-                list_del(&ffd->ffd_export_list);
-                spin_unlock(&fed->fed_lock);
-
-                CERROR("force closing file %*s on disconnect\n",
-                       ffd->ffd_file->f_dentry->d_name.len,
-                       ffd->ffd_file->f_dentry->d_name.name);
-
-                filter_close_internal(exp->exp_obd, ffd);
-                spin_lock(&fed->fed_lock);
-        }
-        spin_unlock(&fed->fed_lock);
-
-        ldlm_cancel_locks_for_export(exp);
-        rc = class_disconnect(conn);
-        if (!rc)
-                MOD_DEC_USE_COUNT;
-
-        /* XXX cleanup preallocated inodes */
-        RETURN(rc);
-}
-
 /* mount the file system (secretly) */
 static int filter_setup(struct obd_device *obd, obd_count len, void *buf)
 {
         struct obd_ioctl_data* data = buf;
         struct filter_obd *filter;
         struct vfsmount *mnt;
-        int err = 0;
+        int rc = 0;
         ENTRY;
 
+        MOD_INC_USE_COUNT;
         if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2)
-                RETURN(-EINVAL);
+                GOTO(err_dec, rc = -EINVAL);
+
+        obd->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2);
+        if (IS_ERR(obd->obd_fsops))
+                GOTO(err_dec, rc = PTR_ERR(obd->obd_fsops));
 
-        MOD_INC_USE_COUNT;
         mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, NULL);
-        err = PTR_ERR(mnt);
+        rc = PTR_ERR(mnt);
         if (IS_ERR(mnt))
-                GOTO(err_dec, err);
+                GOTO(err_ops, rc);
 
         filter = &obd->u.filter;;
         filter->fo_vfsmnt = mnt;
         filter->fo_fstype = strdup(data->ioc_inlbuf2);
         filter->fo_sb = mnt->mnt_root->d_inode->i_sb;
-        CERROR("%s: mnt is %p\n", data->ioc_inlbuf1, filter->fo_vfsmnt);
-        /* XXX is this even possible if do_kern_mount succeeded? */
-        if (!filter->fo_sb)
-                GOTO(err_kfree, err = -ENODEV);
+        CDEBUG(D_SUPER, "%s: mnt = %p\n", data->ioc_inlbuf1, mnt);
 
         OBD_SET_CTXT_MAGIC(&filter->fo_ctxt);
         filter->fo_ctxt.pwdmnt = mnt;
         filter->fo_ctxt.pwd = mnt->mnt_root;
         filter->fo_ctxt.fs = get_ds();
 
-        err = filter_prep(obd);
-        if (err)
-                GOTO(err_kfree, err);
+        rc = filter_prep(obd);
+        if (rc)
+                GOTO(err_kfree, rc);
+
         spin_lock_init(&filter->fo_fddlock);
         spin_lock_init(&filter->fo_objidlock);
         INIT_LIST_HEAD(&filter->fo_export_list);
 
         obd->obd_namespace =
                 ldlm_namespace_new("filter-tgt", LDLM_NAMESPACE_SERVER);
-        if (obd->obd_namespace == NULL)
-                LBUG();
+        if (!obd->obd_namespace)
+                GOTO(err_post, rc = -ENOMEM);
 
         ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
                            "filter_ldlm_cb_client", &obd->obd_ldlm_client);
 
         RETURN(0);
 
+err_post:
+        filter_post(obd);
 err_kfree:
         kfree(filter->fo_fstype);
         unlock_kernel();
         mntput(filter->fo_vfsmnt);
         filter->fo_sb = 0;
         lock_kernel();
-
+err_ops:
+        fsfilt_put_ops(obd->obd_fsops);
 err_dec:
         MOD_DEC_USE_COUNT;
-        return err;
+        return rc;
 }
 
 
@@ -646,6 +559,7 @@ static int filter_cleanup(struct obd_device *obd)
         mntput(obd->u.filter.fo_vfsmnt);
         obd->u.filter.fo_sb = 0;
         kfree(obd->u.filter.fo_fstype);
+        fsfilt_put_ops(obd->obd_fsops);
 
         lock_kernel();
 
@@ -653,6 +567,76 @@ static int filter_cleanup(struct obd_device *obd)
         RETURN(0);
 }
 
+int filter_attach(struct obd_device *dev, obd_count len, void *data)
+{
+        return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+}
+
+int filter_detach(struct obd_device *dev)
+{
+        return lprocfs_dereg_obd(dev);
+}
+
+static int filter_connect(struct lustre_handle *conn, struct obd_device *obd,
+                          obd_uuid_t cluuid, struct recovd_obd *recovd,
+                          ptlrpc_recovery_cb_t recover)
+{
+        struct obd_export *exp;
+        int rc;
+
+        ENTRY;
+        MOD_INC_USE_COUNT;
+        rc = class_connect(conn, obd, cluuid);
+        if (rc)
+                GOTO(out_dec, rc);
+        exp = class_conn2export(conn);
+        LASSERT(exp);
+
+        INIT_LIST_HEAD(&exp->exp_filter_data.fed_open_head);
+        spin_lock_init(&exp->exp_filter_data.fed_lock);
+out:
+        RETURN(rc);
+
+out_dec:
+        MOD_DEC_USE_COUNT;
+        goto out;
+}
+
+static int filter_disconnect(struct lustre_handle *conn)
+{
+        struct obd_export *exp = class_conn2export(conn);
+        struct filter_export_data *fed;
+        int rc;
+        ENTRY;
+
+        LASSERT(exp);
+        fed = &exp->exp_filter_data;
+        spin_lock(&fed->fed_lock);
+        while (!list_empty(&fed->fed_open_head)) {
+                struct filter_file_data *ffd;
+
+                ffd = list_entry(fed->fed_open_head.next, typeof(*ffd),
+                                 ffd_export_list);
+                list_del(&ffd->ffd_export_list);
+                spin_unlock(&fed->fed_lock);
+
+                CERROR("force closing file %*s on disconnect\n",
+                       ffd->ffd_file->f_dentry->d_name.len,
+                       ffd->ffd_file->f_dentry->d_name.name);
+
+                filter_close_internal(exp->exp_obd, ffd);
+                spin_lock(&fed->fed_lock);
+        }
+        spin_unlock(&fed->fed_lock);
+
+        ldlm_cancel_locks_for_export(exp);
+        rc = class_disconnect(conn);
+        if (!rc)
+                MOD_DEC_USE_COUNT;
+
+        /* XXX cleanup preallocated inodes */
+        RETURN(rc);
+}
 
 static void filter_from_inode(struct obdo *oa, struct inode *inode, int valid)
 {
@@ -714,7 +698,7 @@ static struct dentry *__filter_oa2dentry(struct lustre_handle *conn,
                         RETURN(ERR_PTR(-EINVAL));
                 }
                 dentry = filter_fid2dentry(obd, filter_parent(obd, oa->o_mode),
-                                           oa->o_id, oa->o_mode, locked);
+                                           oa->o_id, locked);
         }
 
         if (IS_ERR(dentry)) {
@@ -751,6 +735,7 @@ static int filter_getattr(struct lustre_handle *conn, struct obdo *oa,
         RETURN(rc);
 }
 
+/* this is called from filter_truncate() until we have filter_punch() */
 static int filter_setattr(struct lustre_handle *conn, struct obdo *oa,
                           struct lov_stripe_md *md)
 {
@@ -771,21 +756,26 @@ static int filter_setattr(struct lustre_handle *conn, struct obdo *oa,
         iattr.ia_mode = (iattr.ia_mode & ~S_IFMT) | S_IFREG;
         inode = dentry->d_inode;
 
+        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
         lock_kernel();
         if (iattr.ia_valid & ATTR_SIZE)
                 down(&inode->i_sem);
-        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+
+        /* XXX start transaction */
         if (inode->i_op->setattr)
                 rc = inode->i_op->setattr(dentry, &iattr);
         else
                 rc = inode_setattr(inode, &iattr);
-        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        /* XXX update last_rcvd, finish transaction */
+
         if (iattr.ia_valid & ATTR_SIZE) {
                 up(&inode->i_sem);
                 oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLCTIME | OBD_MD_FLMTIME;
                 obdo_from_inode(oa, inode, oa->o_valid);
         }
+
         unlock_kernel();
+        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
 
         f_dput(dentry);
         RETURN(rc);
@@ -865,10 +855,11 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa,
                          struct lov_stripe_md **ea)
 {
         struct obd_device *obd = class_conn2obd(conn);
-        char name[64];
         struct obd_run_ctxt saved;
+        struct dentry *dir_dentry;
         struct dentry *new;
         struct iattr;
+        int rc;
         ENTRY;
 
         if (!obd) {
@@ -876,31 +867,42 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa,
                 return -EINVAL;
         }
 
-        if (!(oa->o_mode & S_IFMT)) {
-                CERROR("OBD %s, object "LPU64" has bad type: %o\n",
-                       __FUNCTION__, oa->o_id, oa->o_mode);
-                return -ENOENT;
-        }
-
         oa->o_id = filter_next_id(obd);
 
-        //filter_id(name, oa->o_id, oa->o_mode);
-        sprintf(name, LPU64, oa->o_id);
         push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-        new = simple_mknod(filter_parent(obd, oa->o_mode), name, oa->o_mode);
-        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-        if (IS_ERR(new)) {
-                CERROR("Error mknod obj %s, err %ld\n", name, PTR_ERR(new));
-                return -ENOENT;
+        dir_dentry = filter_parent(obd, oa->o_mode);
+        down(&dir_dentry->d_inode->i_sem);
+        new = filter_fid2dentry(obd, dir_dentry, oa->o_id, 1);
+        if (IS_ERR(new))
+                GOTO(out, rc = PTR_ERR(new));
+
+        if (new->d_inode) {
+                /* This would only happen if lastobjid was bad on disk */
+                CERROR("objid O/%*s/"LPU64" already exists\n",
+                       dir_dentry->d_name.len, dir_dentry->d_name.name,
+                       oa->o_id);
+                LBUG();
+                GOTO(out, rc = -EEXIST);
         }
 
+        /* XXX start transaction */
+        rc = vfs_create(dir_dentry->d_inode, new, oa->o_mode);
+        if (rc)
+                GOTO(out_put, rc);
+        /* XXX update last_rcvd+lastobjid on disk, finish transaction */
+
         /* Set flags for fields we have set in the inode struct */
         oa->o_valid = OBD_MD_FLID | OBD_MD_FLBLKSZ | OBD_MD_FLBLOCKS |
                  OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME;
         filter_from_inode(oa, new->d_inode, oa->o_valid);
-        f_dput(new);
 
-        return 0;
+        EXIT;
+out_put:
+        f_dput(new);
+out:
+        up(&dir_dentry->d_inode->i_sem);
+        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        return rc;
 }
 
 static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
@@ -927,6 +929,7 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
                 GOTO(out, rc = -ENOENT);
 
         fdd = object_dentry->d_fsdata;
+        /* XXX start transaction */
         if (fdd && atomic_read(&fdd->fdd_open_count)) {
                 if (!(fdd->fdd_flags & FILTER_FLAG_DESTROY)) {
                         fdd->fdd_flags |= FILTER_FLAG_DESTROY;
@@ -943,6 +946,7 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
 
         rc = filter_destroy_internal(obd, dir_dentry, object_dentry);
 out_dput:
+        /* XXX update last_rcvd on disk, finish transaction */
         f_dput(object_dentry);
 
         EXIT;
@@ -951,7 +955,7 @@ out:
         return rc;
 }
 
-/* NB count and offset are used for punch, but not truncate */
+/* NB start and end are used for punch, but not truncate */
 static int filter_truncate(struct lustre_handle *conn, struct obdo *oa,
                            struct lov_stripe_md *lsm,
                            obd_off start, obd_off end)
@@ -969,266 +973,6 @@ static int filter_truncate(struct lustre_handle *conn, struct obdo *oa,
         RETURN(error);
 }
 
-static int filter_pgcache_brw(int cmd, struct lustre_handle *conn,
-                              struct lov_stripe_md *lsm, obd_count oa_bufs,
-                              struct brw_page *pga, struct obd_brw_set *set)
-{
-        struct obd_export       *export = class_conn2export(conn);
-        struct obd_run_ctxt      saved;
-        struct super_block      *sb;
-        int                      pnum;          /* index to pages (bufs) */
-        unsigned long            retval;
-        int                      error;
-        struct file             *file;
-        int pg;
-        ENTRY;
-
-        if (!export) {
-                CDEBUG(D_IOCTL, "invalid client "LPX64"\n", conn->addr);
-                RETURN(-EINVAL);
-        }
-
-        sb = export->exp_obd->u.filter.fo_sb;
-        push_ctxt(&saved, &export->exp_obd->u.filter.fo_ctxt, NULL);
-        pnum = 0; /* pnum indexes buf 0..num_pages */
-
-        file = filter_obj_open(export, lsm->lsm_object_id, S_IFREG);
-        if (IS_ERR(file))
-                GOTO(out, retval = PTR_ERR(file));
-
-        /* count doubles as retval */
-        for (pg = 0; pg < oa_bufs; pg++) {
-                CDEBUG(D_INODE, "OP %d inode %lu pgno: (%d) "LPU64
-                       ") off count ("LPU64",%d)\n",
-                       cmd, file->f_dentry->d_inode->i_ino, pnum,
-                       pga[pnum].off >> PAGE_CACHE_SHIFT, pga[pnum].off,
-                       (int)pga[pnum].count);
-                if (cmd & OBD_BRW_WRITE) {
-                        loff_t off;
-                        char *buffer;
-                        off = pga[pnum].off;
-                        buffer = kmap(pga[pnum].pg);
-                        retval = file->f_op->write(file, buffer,
-                                                   pga[pnum].count,
-                                                   &off);
-                        kunmap(pga[pnum].pg);
-                        CDEBUG(D_INODE, "retval %ld\n", retval);
-                } else {
-                        loff_t off = pga[pnum].off;
-                        char *buffer = kmap(pga[pnum].pg);
-
-                        if (off >= file->f_dentry->d_inode->i_size) {
-                                memset(buffer, 0, pga[pnum].count);
-                                retval = pga[pnum].count;
-                        } else {
-                                retval = file->f_op->read(file, buffer,
-                                                          pga[pnum].count, &off);
-                        }
-                        kunmap(pga[pnum].pg);
-
-                        if (retval != pga[pnum].count) {
-                                filp_close(file, 0);
-                                GOTO(out, retval = -EIO);
-                        }
-                        CDEBUG(D_INODE, "retval %ld\n", retval);
-                }
-                pnum++;
-        }
-        /* sizes and blocks are set by generic_file_write */
-        /* ctimes/mtimes will follow with a setattr call */
-        filp_close(file, 0);
-
-        /* XXX: do something with callback if it is set? */
-
-        EXIT;
-out:
-        pop_ctxt(&saved, &export->exp_obd->u.filter.fo_ctxt, NULL);
-        error = (retval >= 0) ? 0 : retval;
-        return error;
-}
-
-/*
- * Calculate the number of buffer credits needed to write multiple pages in
- * a single ext3/extN transaction.  No, this shouldn't be here, but as yet
- * ext3 doesn't have a nice API for calculating this sort of thing in advance.
- *
- * See comment above ext3_writepage_trans_blocks for details.  We assume
- * no data journaling is being done, but it does allow for all of the pages
- * being non-contiguous.  If we are guaranteed contiguous pages we could
- * reduce the number of (d)indirect blocks a lot.
- *
- * With N blocks per page and P pages, for each inode we have at most:
- * N*P indirect
- * min(N*P, blocksize/4 + 1) dindirect blocks
- * 1 tindirect
- *
- * For the entire filesystem, we have at most:
- * min(sum(nindir + P), ngroups) bitmap blocks (from the above)
- * min(sum(nindir + P), gdblocks) group descriptor blocks (from the above)
- * 1 inode block
- * 1 superblock
- * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quota files
- */
-static int ext3_credits_needed(struct super_block *sb, int objcount,
-                               struct obd_ioobj *obj)
-{
-        struct obd_ioobj *o = obj;
-        int blockpp = 1 << (PAGE_CACHE_SHIFT - sb->s_blocksize_bits);
-        int addrpp = EXT3_ADDR_PER_BLOCK(sb) * blockpp;
-        int nbitmaps = 0;
-        int ngdblocks = 0;
-        int needed = objcount + 1;
-        int i;
-
-        for (i = 0; i < objcount; i++, o++) {
-                int nblocks = o->ioo_bufcnt * blockpp;
-                int ndindirect = min(nblocks, addrpp + 1);
-                int nindir = nblocks + ndindirect + 1;
-
-                nbitmaps += nindir + nblocks;
-                ngdblocks += nindir + nblocks;
-
-                needed += nindir;
-        }
-
-        /* Assumes ext3 and extN have same sb_info layout at the start. */
-        if (nbitmaps > EXT3_SB(sb)->s_groups_count)
-                nbitmaps = EXT3_SB(sb)->s_groups_count;
-        if (ngdblocks > EXT3_SB(sb)->s_gdb_count)
-                ngdblocks = EXT3_SB(sb)->s_gdb_count;
-
-        needed += nbitmaps + ngdblocks;
-
-#ifdef CONFIG_QUOTA
-        /* We assume that there will be 1 bit set in s_dquot.flags for each
-         * quota file that is active.  This is at least true for now.
-         */
-        needed += hweight32(sb_any_quota_enabled(sb)) *
-                EXT3_SINGLEDATA_TRANS_BLOCKS;
-#endif
-
-        return needed;
-}
-
-/* We have to start a huge journal transaction here to hold all of the
- * metadata for the pages being written here.  This is necessitated by
- * the fact that we do lots of prepare_write operations before we do
- * any of the matching commit_write operations, so even if we split
- * up to use "smaller" transactions none of them could complete until
- * all of them were opened.  By having a single journal transaction,
- * we eliminate duplicate reservations for common blocks like the
- * superblock and group descriptors or bitmaps.
- *
- * We will start the transaction here, but each prepare_write will
- * add a refcount to the transaction, and each commit_write will
- * remove a refcount.  The transaction will be closed when all of
- * the pages have been written.
- */
-static void *ext3_filter_journal_start(struct filter_obd *filter,
-                                       int objcount, struct obd_ioobj *obj,
-                                       int niocount, struct niobuf_remote *nb)
-{
-        journal_t *journal = NULL;
-        handle_t *handle = NULL;
-        int needed;
-
-        /* It appears that some kernels have different values for
-         * EXT*_MAX_GROUP_LOADED (either 8 or 32), so we cannot
-         * assume anything after s_inode_bitmap_number is the same.
-         */
-        if (!strcmp(filter->fo_fstype, "ext3"))
-                journal = EXT3_SB(filter->fo_sb)->s_journal;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        else if (!strcmp(filter->fo_fstype, "extN"))
-                journal = EXTN_SB(filter->fo_sb)->s_journal;
-#endif
-        needed = ext3_credits_needed(filter->fo_sb, objcount, obj);
-
-        /* The number of blocks we could _possibly_ dirty can very large.
-         * We reduce our request if it is absurd (and we couldn't get that
-         * many credits for a single handle anyways).
-         *
-         * At some point we have to limit the size of I/Os sent at one time,
-         * increase the size of the journal, or we have to calculate the
-         * actual journal requirements more carefully by checking all of
-         * the blocks instead of being maximally pessimistic.  It remains to
-         * be seen if this is a real problem or not.
-         */
-        if (needed > journal->j_max_transaction_buffers) {
-                CERROR("want too many journal credits (%d) using %d instead\n",
-                       needed, journal->j_max_transaction_buffers);
-                needed = journal->j_max_transaction_buffers;
-        }
-
-        lock_kernel();
-        handle = journal_start(journal, needed);
-        unlock_kernel();
-        if (IS_ERR(handle))
-                CERROR("can't get handle for %d credits: rc = %ld\n", needed,
-                       PTR_ERR(handle));
-
-        return(handle);
-}
-
-static void *filter_journal_start(void **journal_save,
-                                  struct filter_obd *filter,
-                                  int objcount, struct obd_ioobj *obj,
-                                  int niocount, struct niobuf_remote *nb)
-{
-        void *handle = NULL;
-
-        /* This may not be necessary - we probably never have a
-         * transaction started when we enter here, so we can
-         * remove the saving of the journal state entirely.
-         * For now leave it in just to see if it ever happens.
-         */
-        *journal_save = current->journal_info;
-        if (*journal_save) {
-                CERROR("Already have handle %p???\n", *journal_save);
-                LBUG();
-                current->journal_info = NULL;
-        }
-
-        if (!strcmp(filter->fo_fstype, "ext3") ||
-            !strcmp(filter->fo_fstype, "extN"))
-                handle = ext3_filter_journal_start(filter, objcount, obj,
-                                                   niocount, nb);
-        return handle;
-}
-
-static int ext3_filter_journal_stop(void *handle)
-{
-        int rc;
-
-        /* We got a refcount on the handle for each call to prepare_write,
-         * so we can drop the "parent" handle here to avoid the need for
-         * osc to call back into filterobd to close the handle.  The
-         * remaining references will be dropped in commit_write.
-         */
-        lock_kernel();
-        rc = journal_stop((handle_t *)handle);
-        unlock_kernel();
-
-        return rc;
-}
-
-static int filter_journal_stop(void *journal_save, struct filter_obd *filter,
-                               void *handle)
-{
-        int rc = 0;
-
-        if (!strcmp(filter->fo_fstype, "ext3") ||
-            !strcmp(filter->fo_fstype, "extN"))
-                rc = ext3_filter_journal_stop(handle);
-
-        if (rc)
-                CERROR("error on journal stop: rc = %d\n", rc);
-
-        current->journal_info = journal_save;
-
-        return rc;
-}
-
 static inline void lustre_put_page(struct page *page)
 {
         kunmap(page);
@@ -1440,57 +1184,73 @@ static int filter_preprw(int cmd, struct lustre_handle *conn,
 {
         struct obd_run_ctxt saved;
         struct obd_device *obd;
-        struct obd_ioobj *o = obj;
+        struct obd_ioobj *o;
         struct niobuf_remote *rnb = nb;
         struct niobuf_local *lnb = res;
-        void *journal_save = NULL;
+        struct dentry *dir_dentry;
+        struct fsfilt_objinfo *fso;
         int pglocked = 0;
         int rc = 0;
         int i;
         ENTRY;
 
+        memset(res, 0, niocount * sizeof(*res));
+
         obd = class_conn2obd(conn);
         if (!obd) {
                 CDEBUG(D_IOCTL, "invalid client "LPX64"\n", conn->addr);
                 RETURN(-EINVAL);
         }
-        memset(res, 0, sizeof(*res) * niocount);
 
-        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        LASSERT(objcount < 16); // theoretically we support multi-obj BRW
 
-        if (cmd & OBD_BRW_WRITE) {
-                *desc_private = filter_journal_start(&journal_save,
-                                                     &obd->u.filter,
-                                                     objcount, obj, niocount,
-                                                     nb);
-                if (IS_ERR(*desc_private))
-                        GOTO(out_ctxt, rc = PTR_ERR(*desc_private));
-        }
+        OBD_ALLOC(fso, objcount * sizeof(*fso));
+        if (!fso)
+                RETURN(-ENOMEM);
 
-        obd_kmap_get(niocount, 1);
+        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        dir_dentry = filter_parent(obd, S_IFREG);
 
-        for (i = 0; i < objcount; i++, o++) {
+        for (i = 0, o = obj; i < objcount; i++, o++) {
                 struct filter_dentry_data *fdd;
                 struct dentry *dentry;
-                struct inode *inode;
-                int j;
 
-                dentry = filter_fid2dentry(obd, filter_parent(obd, S_IFREG),
-                                           o->ioo_id, S_IFREG, 0);
+                LASSERT(o->ioo_bufcnt);
 
-                if (!(fdd = dentry->d_fsdata) || !atomic_read(&fdd->fdd_open_count))
-                        CERROR("I/O to unopened object "LPX64"\n", o->ioo_id);
+                dentry = filter_fid2dentry(obd, dir_dentry, o->ioo_id, 0);
 
                 if (IS_ERR(dentry))
-                        GOTO(out_clean, rc = PTR_ERR(dentry));
-                inode = dentry->d_inode;
-                if (!inode) {
+                        GOTO(out_objinfo, rc = PTR_ERR(dentry));
+
+                fso[i].fso_dentry = dentry;
+                fso[i].fso_bufcnt = o->ioo_bufcnt;
+
+                if (!dentry->d_inode) {
                         CERROR("trying to BRW to non-existent file "LPU64"\n",
                                o->ioo_id);
-                        f_dput(dentry);
-                        GOTO(out_clean, rc = -ENOENT);
+                        GOTO(out_objinfo, rc = -ENOENT);
                 }
 
+                fdd = dentry->d_fsdata;
+                if (!fdd || !atomic_read(&fdd->fdd_open_count))
+                        CDEBUG(D_PAGE, "I/O to unopened object "LPX64"\n",
+                               o->ioo_id);
+        }
+
+        if (cmd & OBD_BRW_WRITE) {
+                *desc_private = fsfilt_brw_start(obd, objcount, fso,
+                                                 niocount, nb);
+                if (IS_ERR(*desc_private))
+                        GOTO(out_objinfo, rc = PTR_ERR(*desc_private));
+        }
+
+        obd_kmap_get(niocount, 1);
+
+        for (i = 0, o = obj; i < objcount; i++, o++) {
+                struct dentry *dentry = fso->fso_dentry;
+                struct inode *inode = dentry->d_inode;
+                int j;
+
                 for (j = 0; j < o->ioo_bufcnt; j++, rnb++, lnb++) {
                         struct page *page;
 
@@ -1506,8 +1266,11 @@ static int filter_preprw(int cmd, struct lustre_handle *conn,
                                 page = lustre_get_page_read(inode, rnb);
 
                         if (IS_ERR(page)) {
-                                f_dput(dentry);
-                                GOTO(out_clean, rc = PTR_ERR(page));
+                                if (cmd & OBD_BRW_WRITE)
+                                        fsfilt_commit(obd, dir_dentry->d_inode,
+                                                      *desc_private);
+
+                                GOTO(out_pages, rc = PTR_ERR(page));
                         }
 
                         lnb->addr = page_address(page);
@@ -1517,27 +1280,34 @@ static int filter_preprw(int cmd, struct lustre_handle *conn,
                 }
         }
 
-out_stop:
         if (cmd & OBD_BRW_WRITE) {
-                int err = filter_journal_stop(journal_save, &obd->u.filter,
-                                              *desc_private);
-                if (!rc)
-                        rc = err;
+                int err = fsfilt_commit(obd, dir_dentry->d_inode,
+                                        *desc_private);
+                if (err)
+                        GOTO(out_pages, rc = err);
         }
-out_ctxt:
+
+        EXIT;
+out:
+        OBD_FREE(fso, objcount * sizeof(*fso));
+        current->journal_info = NULL;
         pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-        RETURN(rc);
-out_clean:
+        return rc;
+
+out_pages:
         while (lnb-- > res) {
                 CERROR("error cleanup on brw\n");
-                f_dput(lnb->dentry);
                 if (cmd & OBD_BRW_WRITE)
                         filter_commit_write(lnb->page, 0, PAGE_SIZE, rc);
                 else
                         lustre_put_page(lnb->page);
         }
         obd_kmap_put(niocount);
-        goto out_stop;
+out_objinfo:
+        for (i = 0; i < objcount && fso[i].fso_dentry; i++)
+                f_dput(fso[i].fso_dentry);
+
+        goto out;
 }
 
 static int filter_write_locked_page(struct niobuf_local *lnb)
@@ -1588,21 +1358,19 @@ static int filter_commitrw(int cmd, struct lustre_handle *conn,
         struct obd_ioobj *o;
         struct niobuf_local *r;
         struct obd_device *obd = class_conn2obd(conn);
-        void *journal_save;
         int found_locked = 0;
         int rc = 0;
         int i;
         ENTRY;
 
         push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-        lock_kernel();
-        journal_save = current->journal_info;
-        LASSERT(!journal_save);
 
+        LASSERT(!current->journal_info);
         current->journal_info = private;
-        unlock_kernel();
+
         for (i = 0, o = obj, r = res; i < objcount; i++, o++) {
                 int j;
+
                 for (j = 0 ; j < o->ioo_bufcnt ; j++, r++) {
                         struct page *page = r->page;
 
@@ -1627,9 +1395,6 @@ static int filter_commitrw(int cmd, struct lustre_handle *conn,
                         f_dput(r->dentry);
                 }
         }
-        lock_kernel();
-        current->journal_info = journal_save;
-        unlock_kernel();
 
         if (!found_locked)
                 goto out_ctxt;
@@ -1650,22 +1415,76 @@ static int filter_commitrw(int cmd, struct lustre_handle *conn,
         }
 
 out_ctxt:
+        LASSERT(!current->journal_info);
+        current->journal_info = NULL;
+
         pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
         RETURN(rc);
 }
 
-static int filter_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
+static int filter_brw(int cmd, struct lustre_handle *conn,
+                              struct lov_stripe_md *lsm, obd_count oa_bufs,
+                              struct brw_page *pga, struct obd_brw_set *set)
 {
-        struct obd_device *obd = class_conn2obd(conn);
-        struct statfs sfs;
-        int rc;
+        struct obd_ioobj        ioo;
+        struct niobuf_local     *lnb;
+        struct niobuf_remote    *rnb;
+        obd_count               i;
+        void                    *desc_private;
+        int                     ret = 0;
+        ENTRY;
+
+        OBD_ALLOC(lnb, oa_bufs * sizeof(struct niobuf_local));
+        OBD_ALLOC(rnb, oa_bufs * sizeof(struct niobuf_remote));
+
+        if ( lnb == NULL || rnb == NULL )
+                GOTO(out, ret = -ENOMEM);
+
+        for ( i = 0 ; i < oa_bufs ; i++ ) {
+                rnb[i].offset = pga[i].off;
+                rnb[i].len = pga[i].count;
+        }
+
+        ioo.ioo_id = lsm->lsm_object_id;
+        ioo.ioo_gr = 0;
+        ioo.ioo_type = S_IFREG;
+        ioo.ioo_bufcnt = oa_bufs;
+
+        ret = filter_preprw(cmd, conn, 1, &ioo, oa_bufs, rnb, lnb, 
+                                &desc_private);
+        if ( ret != 0 )
+                GOTO(out, ret);
+
+        for ( i = 0; i < oa_bufs ; i++ ) {
+                void *virt = kmap(pga[i].pg);
+                obd_off off = pga[i].off & ~PAGE_MASK;
+
+                if ( cmd & OBD_BRW_WRITE ) 
+                        memcpy(lnb[i].addr + off, virt + off, pga[i].count);
+                else
+                        memcpy(virt + off, lnb[i].addr + off, pga[i].count);
+
+                kunmap(virt);
+        }
 
+        ret = filter_commitrw(cmd, conn, 1, &ioo, oa_bufs, lnb, desc_private);
+
+out:
+        if ( lnb )
+                OBD_FREE(lnb, oa_bufs * sizeof(struct niobuf_local));
+        if ( rnb )
+                OBD_FREE(rnb, oa_bufs * sizeof(struct niobuf_remote));
+        RETURN(ret);
+}
+
+static int filter_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
+{
+        struct obd_device *obd;
         ENTRY;
-        rc = vfs_statfs(obd->u.filter.fo_sb, &sfs);
-        if (!rc)
-                statfs_pack(osfs, &sfs);
 
-        return rc;
+        obd = class_conn2obd(conn);
+
+        RETURN(fsfilt_statfs(obd, obd->u.filter.fo_sb, osfs));
 }
 
 static int filter_get_info(struct lustre_handle *conn, obd_count keylen,
@@ -1694,13 +1513,6 @@ static int filter_get_info(struct lustre_handle *conn, obd_count keylen,
                 RETURN(0);
         }
 
-        if ( keylen == strlen("root_ino") &&
-             memcmp(key, "root_ino", keylen) == 0 ){
-                *vallen = sizeof(obd_id);
-                *val = (void *)(obd_id)FILTER_ROOTINO;
-                RETURN(0);
-        }
-
         CDEBUG(D_IOCTL, "invalid key\n");
         RETURN(-EINVAL);
 }
@@ -1794,15 +1606,7 @@ int filter_copy_data(struct lustre_handle *dst_conn, struct obdo *dst,
 
         RETURN(err);
 }
-int filter_attach(struct obd_device *dev, obd_count len, void *data)
-{
-        return lprocfs_reg_obd(dev, status_var_nm_1, dev);
-}
 
-int filter_detach(struct obd_device *dev)
-{
-        return lprocfs_dereg_obd(dev);
-}
 static struct obd_ops filter_obd_ops = {
         o_attach:      filter_attach,
         o_detach:      filter_detach,
@@ -1818,7 +1622,7 @@ static struct obd_ops filter_obd_ops = {
         o_destroy:     filter_destroy,
         o_open:        filter_open,
         o_close:       filter_close,
-        o_brw:         filter_pgcache_brw,
+        o_brw:         filter_brw,
         o_punch:       filter_truncate,
         o_preprw:      filter_preprw,
         o_commitrw:    filter_commitrw
index 2d47fcd..3dea05f 100644 (file)
@@ -270,6 +270,7 @@ static int ost_brw_read(struct ptlrpc_request *req)
         desc = ptlrpc_prep_bulk(req->rq_connection);
         if (desc == NULL)
                 GOTO(out_local, rc = -ENOMEM);
+        desc->bd_ptl_ev_hdlr = NULL;
         desc->bd_portal = OST_BULK_PORTAL;
 
         for (i = 0; i < niocount; i++) {
@@ -287,7 +288,8 @@ static int ost_brw_read(struct ptlrpc_request *req)
                 GOTO(out_bulk, rc);
 
         lwi = LWI_TIMEOUT(obd_timeout * HZ, ost_bulk_timeout, desc);
-        rc = l_wait_event(desc->bd_waitq, desc->bd_flags &PTL_BULK_FL_SENT, &lwi);
+        rc = l_wait_event(desc->bd_waitq, desc->bd_flags & PTL_BULK_FL_SENT,
+                          &lwi);
         if (rc) {
                 LASSERT(rc == -ETIMEDOUT);
                 GOTO(out_bulk, rc);
@@ -299,7 +301,7 @@ static int ost_brw_read(struct ptlrpc_request *req)
         rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
 
 out_bulk:
-        ptlrpc_free_bulk(desc);
+        ptlrpc_bulk_decref(desc);
 out_local:
         OBD_FREE(local_nb, sizeof(*local_nb) * niocount);
 out:
@@ -418,7 +420,7 @@ static int ost_brw_write(struct ptlrpc_request *req)
 
         rc = obd_commitrw(cmd, conn, objcount, tmp1, niocount, local_nb,
                           desc->bd_desc_private);
-        ptlrpc_free_bulk(desc);
+        ptlrpc_bulk_decref(desc);
         EXIT;
 out_free:
         OBD_FREE(local_nb, niocount * sizeof(*local_nb));
index af371d8..069fd2a 100644 (file)
@@ -131,6 +131,10 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
 
         LASSERT(list_empty(&desc->bd_set_chain));
 
+        if (atomic_read(&desc->bd_refcount) != 0)
+                CERROR("freeing desc %p with refcount %d!\n", desc,
+                       atomic_read(&desc->bd_refcount));
+
         list_for_each_safe(tmp, next, &desc->bd_page_list) {
                 struct ptlrpc_bulk_page *bulk;
                 bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link);
@@ -253,11 +257,14 @@ int ll_brw_sync_wait(struct obd_brw_set *set, int phase)
 struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
                                        int count, int *lengths, char **bufs)
 {
-        struct ptlrpc_connection *conn = imp->imp_connection;
+        struct ptlrpc_connection *conn;
         struct ptlrpc_request *request;
         int rc;
         ENTRY;
 
+        LASSERT(imp);
+        conn = imp->imp_connection;
+
         OBD_ALLOC(request, sizeof(*request));
         if (!request) {
                 CERROR("request allocation out of memory\n");
@@ -726,8 +733,8 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
                         imp->imp_max_transno = req->rq_transno;
                 } else if (req->rq_transno != 0 &&
                            imp->imp_level == LUSTRE_CONN_FULL) {
-                        CERROR("got transno "LPD64" after "LPD64": recovery "
-                               "may not work\n", req->rq_transno,
+                        CDEBUG(D_HA, "got transno "LPD64" after "LPD64
+                               ": recovery may not work\n", req->rq_transno,
                                imp->imp_max_transno);
                 }
 
index 0bdf0d8..c260f5d 100644 (file)
@@ -170,6 +170,8 @@ static int bulk_source_callback(ptl_event_t *ev)
         LASSERT(ev->mem_desc.niov == desc->bd_page_count);
 
         if (atomic_dec_and_test(&desc->bd_source_callback_count)) {
+                void (*event_handler)(struct ptlrpc_bulk_desc *);
+
                 list_for_each_safe(tmp, next, &desc->bd_page_list) {
                         bulk = list_entry(tmp, struct ptlrpc_bulk_page,
                                           bp_link);
@@ -177,10 +179,18 @@ static int bulk_source_callback(ptl_event_t *ev)
                         if (bulk->bp_cb != NULL)
                                 bulk->bp_cb(bulk);
                 }
+
+                /* We need to make a note of whether there's an event handler
+                 * before we call wake_up, because if there is no event handler,
+                 * 'desc' might be freed before we're scheduled again. */
+                event_handler = desc->bd_ptl_ev_hdlr;
+
                 desc->bd_flags |= PTL_BULK_FL_SENT;
                 wake_up(&desc->bd_waitq);
-                if (desc->bd_ptl_ev_hdlr != NULL)
-                        desc->bd_ptl_ev_hdlr(desc);
+                if (event_handler) {
+                        LASSERT(desc->bd_ptl_ev_hdlr == event_handler);
+                        event_handler(desc);
+                }
         }
 
         RETURN(0);
@@ -193,35 +203,39 @@ static int bulk_sink_callback(ptl_event_t *ev)
         struct list_head        *tmp;
         struct list_head        *next;
         ptl_size_t               total = 0;
+        void                   (*event_handler)(struct ptlrpc_bulk_desc *);
         ENTRY;
 
-        if (ev->type == PTL_EVENT_PUT) {
-                /* put with zero offset */
-                LASSERT(ev->offset == 0);
-                /* used iovs */
-                LASSERT((ev->mem_desc.options & PTL_MD_IOV) != 0);
-                /* 1 fragment for each page always */
-                LASSERT(ev->mem_desc.niov == desc->bd_page_count);
-
-                list_for_each_safe (tmp, next, &desc->bd_page_list) {
-                        bulk = list_entry(tmp, struct ptlrpc_bulk_page,
-                                          bp_link);
+        LASSERT(ev->type == PTL_EVENT_PUT);
+
+        /* put with zero offset */
+        LASSERT(ev->offset == 0);
+        /* used iovs */
+        LASSERT((ev->mem_desc.options & PTL_MD_IOV) != 0);
+        /* 1 fragment for each page always */
+        LASSERT(ev->mem_desc.niov == desc->bd_page_count);
 
-                        total += bulk->bp_buflen;
+        list_for_each_safe (tmp, next, &desc->bd_page_list) {
+                bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link);
 
-                        if (bulk->bp_cb != NULL)
-                                bulk->bp_cb(bulk);
-                }
+                total += bulk->bp_buflen;
+
+                if (bulk->bp_cb != NULL)
+                        bulk->bp_cb(bulk);
+        }
 
-                LASSERT(ev->mem_desc.length == total);
+        LASSERT(ev->mem_desc.length == total);
 
-                desc->bd_flags |= PTL_BULK_FL_RCVD;
-                wake_up(&desc->bd_waitq);
-                if (desc->bd_ptl_ev_hdlr != NULL)
-                        desc->bd_ptl_ev_hdlr(desc);
-        } else {
-                CERROR("Unexpected event type!\n");
-                LBUG();
+        /* We need to make a note of whether there's an event handler
+         * before we call wake_up, because if there is no event
+         * handler, 'desc' might be freed before we're scheduled again. */
+        event_handler = desc->bd_ptl_ev_hdlr;
+
+        desc->bd_flags |= PTL_BULK_FL_RCVD;
+        wake_up(&desc->bd_waitq);
+        if (event_handler) {
+                LASSERT(desc->bd_ptl_ev_hdlr == event_handler);
+                event_handler(desc);
         }
 
         RETURN(1);
index c2c2b32..bfd11bc 100644 (file)
@@ -280,7 +280,7 @@ int ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc)
 
 void obd_brw_set_add(struct obd_brw_set *set, struct ptlrpc_bulk_desc *desc)
 {
-        atomic_inc(&desc->bd_refcount);
+        ptlrpc_bulk_addref(desc);
         atomic_inc(&set->brw_refcount);
         desc->bd_brw_set = set;
         list_add(&desc->bd_set_chain, &set->brw_desc_head);
index 2b5b786..dd1e33c 100644 (file)
@@ -92,7 +92,7 @@ make distdir distdir=lustre-source/lustre-%{version}
 %attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/llite.o
 %attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/mdc.o
 %attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/mds.o
-%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/mds_extN.o
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/fsfilt_extN.o
 %attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/obdclass.o
 %attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/obdecho.o
 %attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/obdfilter.o
index 5563923..e34f984 100644 (file)
@@ -27,3 +27,4 @@ lovstripe
 *.xml
 stat
 setuid
+multifstat
index 77c3039..12b7d52 100644 (file)
@@ -24,7 +24,9 @@ noinst_SCRIPTS += fs.sh intent-test.sh intent-test2.sh leak_finder.pl \
        runtests runvmstat snaprun.sh tbox.sh  common.sh
 noinst_PROGRAMS = openunlink testreq truncate directio openme writeme mcreate
 noinst_PROGRAMS += munlink tchmod toexcl fsx test_brw openclose createdestroy
-noinst_PROGRAMS += lovstripe stat createmany mkdirmany multifstat # ldaptest 
+noinst_PROGRAMS += lovstripe stat createmany mkdirmany multifstat
+# noinst_PROGRAMS += ldaptest 
+noinst_PROGRAMS += checkstat
 
 # ldaptest_SOURCES = ldaptest.c
 tchmod_SOURCES = tchmod.c
@@ -46,5 +48,6 @@ stat_SOURCES = stat.c
 createmany_SOURCES = createmany.c
 mkdirmany_SOURCES = mkdirmany.c
 multifstat_SOURCES = multifstat.c
+checkstat_SOURCES = checkstat.c
 
 include $(top_srcdir)/Rules
diff --git a/lustre/tests/checkstat.c b/lustre/tests/checkstat.c
new file mode 100644 (file)
index 0000000..ed97bd6
--- /dev/null
@@ -0,0 +1,315 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <pwd.h>
+#include <grp.h>
+
+void
+usage (char *argv0, int help)
+{
+       char *progname = strrchr(argv0, '/');
+
+       if (progname == NULL)
+               progname = argv0;
+       
+       fprintf (help ? stdout : stderr,
+                "Usage: %s [flags] file[s]\n",
+                progname);
+       
+       if (!help)
+       {
+               fprintf (stderr, "   or try '-h' for help\n");
+               exit (1);
+       }
+       
+       printf ("Check given files have...\n");
+       printf (" -p    permission       file must have required permissions\n");
+       printf (" -t    dir|file|link    file must be of the specified type\n");
+       printf (" -l    link_name        file must be a link to the given name\n");
+       printf (" -s    size             file must have the given size\n");
+       printf (" -u    user             file must be owned by given user\n");
+       printf (" -g    group            file must be owned by given group\n");
+       printf (" -f                     follow symlinks\n");
+       printf (" -a                     file must be absent\n");
+       printf (" -v                     increase verbosity\n");
+       printf (" -h                     print help\n");
+       printf (" Exit status is 0 on success, 1 on failure\n");
+}
+
+int
+main (int argc, char **argv)
+{
+       int           c;
+       struct stat64 buf;
+       int           perms = -1;
+       uid_t         uid = (uid_t)-1;
+       gid_t         gid = (gid_t)-1;
+       char         *type = NULL;
+       long          absent = 0;
+       char         *checklink = NULL;
+       int           verbose = 0;
+       long long     size = -1;
+       int           follow = 0;
+       char         *term;
+   
+       while ((c = getopt (argc, argv, "p:t:l:s:u:g:avfh")) != -1)
+               switch (c)
+               {
+               case 'p':
+                       perms = (int)strtol (optarg, &term, 0);
+                       if (term == optarg)
+                       {
+                               fprintf (stderr, "Can't parse permission %s\n", optarg);
+                               return (1);
+                       }
+                       break;
+
+               case 'l':
+                       checklink = optarg;
+                       break;
+
+               case 's':
+                       size = strtoll (optarg, &term, 0);
+                       if (term == optarg)
+                       {
+                               fprintf (stderr, "Can't parse size %s\n", optarg);
+                               return (1);
+                       }
+                       break;
+
+               case 'u':
+                       if (*optarg == '#')
+                       {
+                               uid = (uid_t)strtol (optarg + 1, &term, 0);
+                               if (term == optarg + 1)
+                               {
+                                       fprintf (stderr, "Can't parse numeric uid %s\n", optarg);
+                                       return (1);
+                               }
+                       } else {
+                               struct passwd *pw = getpwnam (optarg);
+                               
+                               if (pw == NULL)
+                               {
+                                       fprintf (stderr, "Can't find user %s\n", optarg);
+                                       return (1);
+                               }
+                               uid = pw->pw_uid;
+                       }
+                       break;
+
+               case 'g':
+                       if (*optarg == '#')
+                       {
+                               gid = (gid_t)strtol (optarg + 1, &term, 0);
+                               if (term == optarg + 1)
+                               {
+                                       fprintf (stderr, "Can't parse numeric gid %s\n", optarg);
+                                       return (1);
+                               }
+                       } else {
+                               struct group *gr = getgrnam (optarg);
+                               
+                               if (gr == NULL)
+                               {
+                                       fprintf (stderr, "Can't find group %s\n", optarg);
+                                       return (1);
+                               }
+                               uid = gr->gr_gid;
+                       }
+                       break;
+                       
+               case 't':
+                       type = optarg;
+                       break;
+
+               case 'a':
+                       absent = 1;
+                       break;
+
+               case 'v':
+                       verbose++;
+                       break;
+                       
+               case 'f':
+                       follow++;
+                       break;
+                       
+               case 'h':
+                       usage (argv[0], 1);
+                       return (0);
+        
+               default:
+                       usage (argv[0], 0);
+               }
+
+       if (optind == argc)
+               usage (argv[0], 0);
+       
+       do
+       {
+               char *fname = argv[optind];
+               int rc = follow ? stat64 (fname, &buf) : lstat64 (fname, &buf);
+      
+               if (rc != 0)
+               {
+                       if (!(absent && errno == ENOENT))
+                       {
+                               if (verbose)
+                                       printf ("Can't %sstat %s: %s\n", 
+                                               follow ? "" : "l",
+                                               fname, strerror (errno));
+                               return (1);
+                       }
+
+                       if (verbose)
+                               printf ("%s: absent OK\n", fname);
+                       continue;
+               }
+
+               if (absent)
+               {
+                       if (verbose)
+                               printf ("%s exists\n", fname);
+                       return (1);
+               }
+               
+               if (type != NULL)
+               {
+                       if (!strcmp (type, "d") || 
+                           !strcmp (type, "dir"))
+                       {
+                               if (!S_ISDIR (buf.st_mode))
+                               {
+                                       if (verbose)
+                                               printf ("%s is not a directory\n", 
+                                                        fname);
+                                       return (1);
+                               }
+                       }
+                       else if (!strcmp (type, "f") || 
+                                !strcmp (type, "file"))
+                       {
+                               if (!S_ISREG (buf.st_mode))
+                               {
+                                       if (verbose)
+                                               printf ("%s is not a regular file\n", 
+                                                       fname);
+                                       return (1);
+                               }
+                       }
+                       else if (!strcmp (type, "l") || 
+                                !strcmp (type, "link"))
+                       {
+                               if (!S_ISLNK (buf.st_mode))
+                               {
+                                       if (verbose)
+                                               printf ("%s is not a link\n", 
+                                                       fname);
+                                       return (1);
+                               }
+                       }
+                       else
+                       {
+                               fprintf (stderr, "Can't parse file type %s\n", type);
+                               return (1);
+                       }
+                       
+                       if (verbose)
+                               printf ("%s has type %s OK\n", fname, type);
+               }
+
+               if (perms != -1)
+               {
+                       if ((buf.st_mode & ~S_IFMT) != perms)
+                       {
+                               if (verbose)
+                                       printf ("%s has perms 0%o, not 0%o\n",
+                                               fname, (buf.st_mode & ~S_IFMT), perms);
+                               return (1);
+                       }
+
+                       if (verbose)
+                               printf ("%s has perms 0%o OK\n",
+                                       fname, perms);
+               }
+
+               if (size != -1)
+               {
+                       if (buf.st_size != size)
+                       {
+                               if (verbose)
+                                       printf ("%s has size %Ld, not %Ld\n",
+                                               fname, (long long)buf.st_size, size);
+                               return (1);
+                       }
+                       
+                       if (verbose)
+                               printf ("%s has size %Ld OK\n", fname, size);
+               }
+               
+               if (checklink != NULL)
+               {
+                       static char lname[4<<10];
+
+                       rc = readlink (fname, lname, sizeof (lname) - 1);
+
+                       if (rc < 0)
+                       {
+                               if (verbose)
+                                       printf ("%s: can't read link: %s\n",
+                                               fname, strerror (errno));
+                               return (1);
+                       }
+                       
+                       lname[rc] = 0;
+                       if (strcmp (checklink, lname))
+                       {
+                               if (verbose)
+                                       printf ("%s is a link to %s and not %s\n",
+                                               fname, lname, checklink);
+                               return (1);
+                       }
+                       
+                       if (verbose)
+                               printf ("%s links to %s OK\n", fname, checklink);
+               }
+
+               if (uid != (uid_t)-1)
+               {
+                       if (buf.st_uid != uid)
+                       {
+                               if (verbose)
+                                       printf ("%s is owned by user #%ld and not #%ld\n",
+                                               fname, (long)buf.st_uid, (long)uid);
+                               return (1);
+                       }
+                       
+                       if (verbose)
+                               printf ("%s is owned by user #%ld OK\n",
+                                       fname, (long)uid);
+               }
+               
+               if (gid != (gid_t)-1)
+               {
+                       if (buf.st_gid != gid)
+                       {
+                               if (verbose)
+                                       printf ("%s is owned by group #%ld and not #%ld\n",
+                                               fname, (long)buf.st_gid, (long)gid);
+                               return (1);
+                       }
+                       
+                       if (verbose)
+                               printf ("%s is owned by group #%ld OK\n",
+                                       fname, (long)gid);
+               }
+               
+       } while (++optind < argc);
+       
+       return (0);
+}
index 34e3b83..6a4429e 100644 (file)
@@ -270,10 +270,10 @@ setup_lustre() {
        do_insmod $LUSTRE/extN/extN.o || \
                echo "info: can't load extN.o module, not fatal if using ext3"
        do_insmod $LUSTRE/mds/mds.o || exit -1
-       #do_insmod $LUSTRE/mds/mds_ext2.o || exit -1
-       #do_insmod $LUSTRE/mds/mds_ext3.o || exit -1
-       do_insmod $LUSTRE/mds/mds_extN.o || \
-               echo "info: can't load mds_extN.o module, needs extN.o"
+       #do_insmod $LUSTRE/obdclass/fsfilt_ext2.o || exit -1
+       #do_insmod $LUSTRE/obdclass/fsfilt_ext3.o || exit -1
+       do_insmod $LUSTRE/obdclass/fsfilt_extN.o || \
+               echo "info: can't load fsfilt_extN.o module, needs extN.o"
        do_insmod $LUSTRE/obdecho/obdecho.o || exit -1
        #do_insmod $LUSTRE/obdext2/obdext2.o || exit -1
        do_insmod $LUSTRE/obdfilter/obdfilter.o || exit -1
@@ -571,9 +571,9 @@ cleanup_lustre() {
        do_rmmod mdc
        do_rmmod osc
 
-       do_rmmod mds_extN
-       do_rmmod mds_ext3
-       do_rmmod mds_ext2
+       do_rmmod fsfilt_extN
+       do_rmmod fsfilt_ext3
+       do_rmmod fsfilt_ext2
        do_rmmod mds
        do_rmmod ost
        do_rmmod obdecho
index aba33d5..859f40a 100644 (file)
@@ -532,6 +532,13 @@ domapread(unsigned offset, unsigned size)
                prterr("domapread: mmap");
                report_failure(190);
        }
+       if (!quiet && (debug > 1 &&
+                       (monitorstart == -1 ||
+                        (offset + size > monitorstart &&
+                         (monitorend == -1 || offset <= monitorend))))) {
+               gettimeofday(&t, NULL);
+               prt("       %lu.%06lu mmap done\n", t.tv_sec, t.tv_usec);
+       }
        memcpy(temp_buf, p + pg_offset, size);
        if (!quiet && (debug > 1 &&
                        (monitorstart == -1 ||
@@ -683,6 +690,13 @@ domapwrite(unsigned offset, unsigned size)
                        prterr("domapwrite: ftruncate");
                        exit(201);
                }
+               if (!quiet && (debug > 1 &&
+                              (monitorstart == -1 ||
+                               (offset + size > monitorstart &&
+                                (monitorend == -1 || offset <= monitorend))))) {
+                       gettimeofday(&t, NULL);
+                       prt("       %lu.%06lu truncate done\n", t.tv_sec, t.tv_usec);
+       }
        }
        pg_offset = offset & page_mask;
        map_size  = pg_offset + size;
@@ -693,6 +707,13 @@ domapwrite(unsigned offset, unsigned size)
                prterr("domapwrite: mmap");
                report_failure(202);
        }
+       if (!quiet && (debug > 1 &&
+                       (monitorstart == -1 ||
+                        (offset + size > monitorstart &&
+                         (monitorend == -1 || offset <= monitorend))))) {
+               gettimeofday(&t, NULL);
+               prt("       %lu.%06lu mmap done\n", t.tv_sec, t.tv_usec);
+       }
        memcpy(p + pg_offset, good_buf + offset, size);
        if (!quiet && (debug > 1 &&
                        (monitorstart == -1 ||
@@ -821,7 +842,7 @@ docloseopen(void)
        }
        if (!quiet && debug > 1) {
                gettimeofday(&t, NULL);
-               prt("       %lu.%06lu opendone\n", t.tv_sec, t.tv_usec);
+               prt("       %lu.%06lu open done\n", t.tv_sec, t.tv_usec);
        }
 }
 
index 649e96d..a39b73c 100644 (file)
@@ -25,8 +25,8 @@ do_insmod $LUSTRE/ldlm/ldlm.o || exit -1
 do_insmod $LUSTRE/extN/extN.o || \
     echo "info: can't load extN.o module, not fatal if using ext3"
 do_insmod $LUSTRE/mds/mds.o || exit -1
-do_insmod $LUSTRE/mds/mds_extN.o || \
-    echo "info: can't load mds_extN.o module, needs extN.o"
+do_insmod $LUSTRE/obdclass/fsfilt_extN.o || \
+    echo "info: can't load fsfilt_extN.o module, needs extN.o"
 do_insmod $LUSTRE/obdecho/obdecho.o || exit -1
 do_insmod $LUSTRE/obdfilter/obdfilter.o || exit -1
 do_insmod $LUSTRE/ost/ost.o || exit -1
index 48d6602..eba407d 100755 (executable)
@@ -1,5 +1,5 @@
 #!/bin/sh
 while sleep 1 ; do
-       egrep "ll_|ldlm|filp|dentry|inode|portals|size-[0-9]* " /proc/slabinfo
         echo '-----------------------'
+       egrep "ll_|ldlm|filp|dentry|inode|portals|size-[0-9]* " /proc/slabinfo
 done
index cf305a4..cbd17ca 100644 (file)
@@ -1,17 +1,21 @@
 #!/bin/bash
 
+set -e
+
+CHECKSTAT=${CHECKSTAT:-"./checkstat -v"}
+MOUNT=${MOUNT:-/mnt/lustre}
 export NAME=$NAME
 clean() {
         echo -n "cleanup..."
         sh llmountcleanup.sh > /dev/null
 }
-CLEAN=clean
+CLEAN=${CLEAN:-clean}
 start() {
         echo -n "mounting..."
         sh llrmount.sh > /dev/null
         echo -n "mounted"
 }
-START=start
+START=${START:-start}
 
 error () { 
     echo FAIL
@@ -23,283 +27,373 @@ pass() {
 }
 
 echo '== touch .../f ; rm .../f ======================== test 0'
-touch /mnt/lustre/f
-[ -f /mnt/lustre/f ] || error 
-rm /mnt/lustre/f
-[ ! -f /mnt/lustre/f ] || error
+touch $MOUNT/f
+$CHECKSTAT -t file $MOUNT/f || error 
+rm $MOUNT/f
+$CHECKSTAT -a $MOUNT/f || error
 pass
 $CLEAN
 $START
 
 echo '== mkdir .../d1; mkdir .../d1/d2 ================= test 1'
-mkdir /mnt/lustre/d1
-mkdir /mnt/lustre/d1/d2
-[ -d /mnt/lustre/d1/d2 ] || error
+mkdir $MOUNT/d1
+mkdir $MOUNT/d1/d2
+$CHECKSTAT -t dir $MOUNT/d1/d2 || error
 pass
 $CLEAN
 $START
 
 echo '== rmdir .../d1/d2; rmdir .../d1 ================= test 1b'
-rmdir /mnt/lustre/d1/d2
-rmdir /mnt/lustre/d1
-[ ! -d /mnt/lustre/d1 ] || error
+rmdir $MOUNT/d1/d2
+rmdir $MOUNT/d1
+$CHECKSTAT -a $MOUNT/d1 || error
 pass
 $CLEAN
 $START
 
 echo '== mkdir .../d2; touch .../d2/f ================== test 2'
-mkdir /mnt/lustre/d2
-touch /mnt/lustre/d2/f
+mkdir $MOUNT/d2
+touch $MOUNT/d2/f
+$CHECKSTAT -t file $MOUNT/d2/f || error
+pass
 $CLEAN
 $START
 
 echo '== rm -r .../d2; touch .../d2/f ================== test 2b'
-rm -r /mnt/lustre/d2
+rm -r $MOUNT/d2
+$CHECKSTAT -a $MOUNT/d2 || error
+pass
 $CLEAN
 $START
 
 echo '== mkdir .../d3 ================================== test 3'
-mkdir /mnt/lustre/d3
+mkdir $MOUNT/d3
+$CHECKSTAT -t dir $MOUNT/d3 || error
+pass
 $CLEAN
 $START
 echo '== touch .../d3/f ================================ test 3b'
-touch /mnt/lustre/d3/f
+touch $MOUNT/d3/f
+$CHECKSTAT -t file $MOUNT/d3/f || error
+pass
 $CLEAN
 $START
 echo '== rm -r .../d3 ================================== test 3c'
-rm -r /mnt/lustre/d3
+rm -r $MOUNT/d3
+$CHECKSTAT -a $MOUNT/d3 || error
+pass
 $CLEAN
 $START
 
 echo '== mkdir .../d4 ================================== test 4'
-mkdir /mnt/lustre/d4
+mkdir $MOUNT/d4
+$CHECKSTAT -t dir $MOUNT/d4 || error
+pass
 $CLEAN
 $START
 echo '== mkdir .../d4/d2 =============================== test 4b'
-mkdir /mnt/lustre/d4/d2
+mkdir $MOUNT/d4/d2
+$CHECKSTAT -t dir $MOUNT/d4/d2 || error
+pass
 $CLEAN
 $START
 
 echo '== mkdir .../d5; mkdir .../d5/d2; chmod .../d5/d2 = test 5'
-mkdir /mnt/lustre/d5
-mkdir /mnt/lustre/d5/d2
-chmod 0666 /mnt/lustre/d5/d2
+mkdir $MOUNT/d5
+mkdir $MOUNT/d5/d2
+chmod 0666 $MOUNT/d5/d2
+$CHECKSTAT -t dir -p 0666 $MOUNT/d5/d2 || error
+pass
 $CLEAN
 $START
 
 echo '== touch .../f6; chmod .../f6 ==================== test 6'
-touch /mnt/lustre/f6
-chmod 0666 /mnt/lustre/f6
+touch $MOUNT/f6
+chmod 0666 $MOUNT/f6
+$CHECKSTAT -t file -p 0666 $MOUNT/f6 || error
+pass
 $CLEAN
 $START
 
 echo '== mkdir .../d7; mcreate .../d7/f; chmod .../d7/f = test 7'
-mkdir /mnt/lustre/d7
-./mcreate /mnt/lustre/d7/f
-chmod 0666 /mnt/lustre/d7/f
+mkdir $MOUNT/d7
+./mcreate $MOUNT/d7/f
+chmod 0666 $MOUNT/d7/f
+$CHECKSTAT -t file -p 0666 $MOUNT/d7/f || error
+pass
 $CLEAN
 $START
 
 echo '== mkdir .../d8; touch .../d8/f; chmod .../d8/f == test 8'
-mkdir /mnt/lustre/d8
-touch /mnt/lustre/d8/f
-chmod 0666 /mnt/lustre/d8/f
+mkdir $MOUNT/d8
+touch $MOUNT/d8/f
+chmod 0666 $MOUNT/d8/f
+$CHECKSTAT -t file -p 0666 $MOUNT/d8/f || error
+pass
 $CLEAN
 $START
 
 
-echo '== mkdir .../d9; mkdir .../d9/d2; mkdir .../d9/d2/d3 == test 9'
-mkdir /mnt/lustre/d9
-mkdir /mnt/lustre/d9/d2
-mkdir /mnt/lustre/d9/d2/d3
+echo '== mkdir .../d9 .../d9/d2 .../d9/d2/d3 =========== test 9'
+mkdir $MOUNT/d9
+mkdir $MOUNT/d9/d2
+mkdir $MOUNT/d9/d2/d3
+$CHECKSTAT -t dir $MOUNT/d9/d2/d3 || error
+pass
 $CLEAN
 $START
 
 
 echo '== mkdir .../d10; mkdir .../d10/d2; touch .../d10/d2/f = test 10'
-mkdir /mnt/lustre/d10
-mkdir /mnt/lustre/d10/d2
-touch /mnt/lustre/d10/d2/f
+mkdir $MOUNT/d10
+mkdir $MOUNT/d10/d2
+touch $MOUNT/d10/d2/f
+$CHECKSTAT -t file $MOUNT/d10/d2/f || error
+pass
 $CLEAN
 $START
 
-echo '=================================================== test 11'
-mkdir /mnt/lustre/d11
-mkdir /mnt/lustre/d11/d2
-chmod 0666 /mnt/lustre/d11/d2
-chmod 0555 /mnt/lustre/d11/d2
+echo '== mkdir .../d11 d11/d2; chmod .../d11/d2 .../d11/d2 == test 11'
+mkdir $MOUNT/d11
+mkdir $MOUNT/d11/d2
+chmod 0666 $MOUNT/d11/d2
+chmod 0555 $MOUNT/d11/d2
+$CHECKSTAT -t dir -p 0555 $MOUNT/d11/d2 || error
+pass
 $CLEAN
 $START
 
-echo '=================================================== test 12'
-mkdir /mnt/lustre/d12
-touch /mnt/lustre/d12/f
-chmod 0666 /mnt/lustre/d12/f
-chmod 0555 /mnt/lustre/d12/f
+echo '== mkdir .../d12; touch .../d12/f; chmod .../d12/f d12/f == test 12'
+mkdir $MOUNT/d12
+touch $MOUNT/d12/f
+chmod 0666 $MOUNT/d12/f
+chmod 0555 $MOUNT/d12/f
+$CHECKSTAT -t file -p 0555 $MOUNT/d12/f || error
+pass
 $CLEAN
 $START
 
-echo '=================================================== test 13'
-mkdir /mnt/lustre/d13
-cp /etc/passwd /mnt/lustre/d13/f
->  /mnt/lustre/d13/f
+echo '== mkdir .../d13; cp /etc/passwd .../d13/f; > .../d13/f == test 13'
+mkdir $MOUNT/d13
+cp /etc/hosts $MOUNT/d13/f
+>  $MOUNT/d13/f
+$CHECKSTAT -t file -s 0 $MOUNT/d13/f || error
+pass
 $CLEAN
 $START
 
 
-echo '=================================================== test 14'
-mkdir /mnt/lustre/d14
-touch /mnt/lustre/d14/f
-rm /mnt/lustre/d14/f
+echo '================================================== test 14'
+mkdir $MOUNT/d14
+touch $MOUNT/d14/f
+rm $MOUNT/d14/f
+$CHECKSTAT -a $MOUNT/d14/f || error
+pass
 $CLEAN
 $START
 
 
-echo '=================================================== test 15'
-mkdir /mnt/lustre/d15
-touch /mnt/lustre/d15/f
-mv /mnt/lustre/d15/f /mnt/lustre/d15/f2
+echo '================================================== test 15'
+mkdir $MOUNT/d15
+touch $MOUNT/d15/f
+mv $MOUNT/d15/f $MOUNT/d15/f2
+$CHECKSTAT -t file $MOUNT/d15/f2 || error
+pass
 $CLEAN
 $START
 
-echo '=================================================== test 16'
-mkdir /mnt/lustre/d16
-touch /mnt/lustre/d16/f
-rm -rf /mnt/lustre/d16/f
+echo '================================================== test 16'
+mkdir $MOUNT/d16
+touch $MOUNT/d16/f
+rm -rf $MOUNT/d16/f
+$CHECKSTAT -a $MOUNT/d16/f || error
+pass
 $CLEAN
 $START
 
-echo '== symlinks: create, remove (dangling and real) === test 17'
-mkdir /mnt/lustre/d17
-touch /mnt/lustre/d17/f
-ln -s /mnt/lustre/d17/f /mnt/lustre/d17/l-exist
-ln -s no-such-file /mnt/lustre/d17/l-dangle
-ls -l /mnt/lustre/d17
-rm -f /mnt/lustre/l-dangle
-rm -f /mnt/lustre/l-exist
+echo '== symlinks: create, remove (dangling and real) == test 17'
+mkdir $MOUNT/d17
+touch $MOUNT/d17/f
+ln -s $MOUNT/d17/f $MOUNT/d17/l-exist
+ln -s no-such-file $MOUNT/d17/l-dangle
+ls -l $MOUNT/d17
+$CHECKSTAT -l $MOUNT/d17/f $MOUNT/d17/l-exist || error
+$CHECKSTAT -f -t f $MOUNT/d17/l-exist || error
+$CHECKSTAT -l no-such-file $MOUNT/d17/l-dangle || error
+$CHECKSTAT -fa $MOUNT/d17/l-dangle || error
+rm -f $MOUNT/l-dangle
+rm -f $MOUNT/l-exist
+$CHECKSTAT -a $MOUNT/l-dangle || error
+$CHECKSTAT -a $MOUNT/l-exist || error
+pass
 $CLEAN
 $START
 
-echo '== touch /mnt/lustre/f ; ls /mnt/lustre ========== test 18'
-touch /mnt/lustre/f
-ls /mnt/lustre
+echo "== touch $MOUNT/f ; ls $MOUNT ==================== test 18"
+touch $MOUNT/f
+ls $MOUNT || error
+pass
 $CLEAN
 $START
 
-echo '== touch /mnt/lustre/f ; ls -l /mnt/lustre ======= test 19'
-touch /mnt/lustre/f
-ls -l /mnt/lustre
-rm /mnt/lustre/f
+echo "== touch $MOUNT/f ; ls -l $MOUNT ================= test 19"
+touch $MOUNT/f
+ls -l $MOUNT
+rm $MOUNT/f
+$CHECKSTAT -a $MOUNT/f || error
+pass
 $CLEAN
 $START
 
-echo '== touch /mnt/lustre/f ; ls -l /mnt/lustre ======= test 20'
-touch /mnt/lustre/f
-rm /mnt/lustre/f
+echo "== touch $MOUNT/f ; ls -l $MOUNT ================= test 20"
+touch $MOUNT/f
+rm $MOUNT/f
 echo "1 done"
-touch /mnt/lustre/f
-rm /mnt/lustre/f
+touch $MOUNT/f
+rm $MOUNT/f
 echo "2 done"
-touch /mnt/lustre/f
-rm /mnt/lustre/f
+touch $MOUNT/f
+rm $MOUNT/f
 echo "3 done"
+$CHECKSTAT -a $MOUNT/f || error
+pass
 $CLEAN
 $START
 
-echo '== write to dangling link ======================= test 21'
-mkdir /mnt/lustre/d21
-ln -s dangle /mnt/lustre/d21/link
-echo foo >> /mnt/lustre/d21/link
-cat /mnt/lustre/d21/dangle
+echo '== write to dangling link ======================== test 21'
+mkdir $MOUNT/d21
+[ -f $MOUNT/d21/dangle ] && rm -f $MOUNT/d21/dangle
+ln -s dangle $MOUNT/d21/link
+echo foo >> $MOUNT/d21/link
+cat $MOUNT/d21/dangle
+$CHECKSTAT -t link $MOUNT/d21/link || error
+$CHECKSTAT -f -t file $MOUNT/d21/link || error
+pass
 $CLEAN
 $START
 
-echo '== unpack tar archive as nonroot user =========== test 22'
-mkdir /mnt/lustre/d22
-chown 4711 /mnt/lustre/d22
-sudo -u \#4711 tar cf - /etc/hosts /etc/sysconfig/network | sudo -u \#4711 tar xfC - /mnt/lustre/d22
-ls -lR /mnt/lustre/d22/etc
+echo '== unpack tar archive as non-root user =========== test 22'
+mkdir $MOUNT/d22
+which sudo && chown 4711 $MOUNT/d22
+SUDO=`which sudo 2> /dev/null` && SUDO="$SUDO -u #4711" || SUDO=""
+$SUDO tar cf - /etc/hosts /etc/sysconfig/network | $SUDO tar xfC - $MOUNT/d22
+ls -lR $MOUNT/d22/etc
+$CHECKSTAT -t dir $MOUNT/d22/etc || error
+[ -z "$SUDO" ] || $CHECKSTAT -u \#4711 $MOUNT/d22/etc || error
+pass
 $CLEAN
 $START
 
-echo '== O_CREAT|O_EXCL in subdir ===================== test 23'
-mkdir /mnt/lustre/d23
-./toexcl /mnt/lustre/d23/f23
-./toexcl /mnt/lustre/d23/f23
+echo '== O_CREAT|O_EXCL in subdir ====================== test 23'
+mkdir $MOUNT/d23
+./toexcl $MOUNT/d23/f23
+./toexcl -e $MOUNT/d23/f23 || error
+pass
 $CLEAN
 $START
 
-echo '== rename sanity ============================= test24'
+echo '== rename sanity ================================= test24'
 echo '-- same directory rename'
 echo '-- test 24-R1: touch a ; rename a b'
-mkdir /mnt/lustre/R1
-touch /mnt/lustre/R1/f
-mv /mnt/lustre/R1/f /mnt/lustre/R1/g
+mkdir $MOUNT/R1
+touch $MOUNT/R1/f
+mv $MOUNT/R1/f $MOUNT/R1/g
+$CHECKSTAT -t file $MOUNT/R1/g || error
+pass
 $CLEAN
 $START
 
 echo '-- test 24-R2: touch a b ; rename a b;'
-mkdir /mnt/lustre/R2
-touch /mnt/lustre/R2/{f,g}
-mv /mnt/lustre/R2/f /mnt/lustre/R2/g
+mkdir $MOUNT/R2
+touch $MOUNT/R2/{f,g}
+mv $MOUNT/R2/f $MOUNT/R2/g
+$CHECKSTAT -a $MOUNT/R2/f || error
+$CHECKSTAT -t file $MOUNT/R2/g || error
+pass
 $CLEAN
 $START
 
 echo '-- test 24-R3: mkdir a  ; rename a b;'
-mkdir /mnt/lustre/R3
-mkdir /mnt/lustre/R3/f
-mv /mnt/lustre/R3/f /mnt/lustre/R3/g
+mkdir $MOUNT/R3
+mkdir $MOUNT/R3/f
+mv $MOUNT/R3/f $MOUNT/R3/g
+$CHECKSTAT -a $MOUNT/R3/f || error
+$CHECKSTAT -t dir $MOUNT/R3/g || error
+pass
 $CLEAN
 $START
 
 echo '-- test 24-R4: mkdir a b ; rename a b;'
-mkdir /mnt/lustre/R4
-mkdir /mnt/lustre/R4/{f,g}
-perl -e 'rename "/mnt/lustre/R3/f", "/mnt/lustre/R3/g";'
+mkdir $MOUNT/R4
+mkdir $MOUNT/R4/{f,g}
+perl -e "rename \"$MOUNT/R4/f\", \"$MOUNT/R4/g\";"
+$CHECKSTAT -a $MOUNT/R4/f || error
+$CHECKSTAT -t dir $MOUNT/R4/g || error
+pass
 $CLEAN
 $START
 
 echo '-- cross directory renames --' 
 echo '-- test 24-R5: touch a ; rename a b'
-mkdir /mnt/lustre/R5{a,b}
-touch /mnt/lustre/R5a/f
-mv /mnt/lustre/R5a/f /mnt/lustre/R5b/g
+mkdir $MOUNT/R5{a,b}
+touch $MOUNT/R5a/f
+mv $MOUNT/R5a/f $MOUNT/R5b/g
+$CHECKSTAT -a $MOUNT/R5a/f || error
+$CHECKSTAT -t file $MOUNT/R5b/g || error
+pass
 $CLEAN
 $START
 
 echo '-- test 24-R6: touch a ; rename a b'
-mkdir /mnt/lustre/R6{a,b}
-touch /mnt/lustre/R6a/f /mnt/lustre/R6b/g
-mv /mnt/lustre/R6a/f /mnt/lustre/R6b/g
+mkdir $MOUNT/R6{a,b}
+touch $MOUNT/R6a/f $MOUNT/R6b/g
+mv $MOUNT/R6a/f $MOUNT/R6b/g
+$CHECKSTAT -a $MOUNT/R6a/f || error
+$CHECKSTAT -t file $MOUNT/R6b/g || error
+pass
 $CLEAN
 $START
 
 echo '-- test 24-R7: touch a ; rename a b'
-mkdir /mnt/lustre/R7{a,b}
-mkdir /mnt/lustre/R7a/f
-mv /mnt/lustre/R7a/f /mnt/lustre/R7b/g
+mkdir $MOUNT/R7{a,b}
+mkdir $MOUNT/R7a/f
+mv $MOUNT/R7a/f $MOUNT/R7b/g
+$CHECKSTAT -a $MOUNT/R7a/f || error
+$CHECKSTAT -t dir $MOUNT/R7b/g || error
+pass
 $CLEAN
 $START
 
 echo '-- test 24-R8: touch a ; rename a b'
-mkdir /mnt/lustre/R8{a,b}
-mkdir /mnt/lustre/R8a/f /mnt/lustre/R8b/g
-perl -e 'rename "/mnt/lustre/R8a/f", "/mnt/lustre/R8b/g";'
+mkdir $MOUNT/R8{a,b}
+mkdir $MOUNT/R8a/f $MOUNT/R8b/g
+perl -e "rename \"$MOUNT/R8a/f\", \"$MOUNT/R8b/g\";"
+$CHECKSTAT -a $MOUNT/R8a/f || error
+$CHECKSTAT -t dir $MOUNT/R8b/g || error
+pass
 $CLEAN
 $START
 
 echo "-- rename error cases"
 echo "-- test 24-R9 target error: touch f ; mkdir a ; rename f a"
-mkdir /mnt/lustre/R9
-mkdir /mnt/lustre/R9/a
-touch /mnt/lustre/R9/f
-perl -e 'rename "/mnt/lustre/R9/f", "/mnt/lustre/R9/a";'
+mkdir $MOUNT/R9
+mkdir $MOUNT/R9/a
+touch $MOUNT/R9/f
+perl -e "rename \"$MOUNT/R9/f\", \"$MOUNT/R9/a\";"
+$CHECKSTAT -t file $MOUNT/R9/f || error
+$CHECKSTAT -t dir  $MOUNT/R9/a || error
+$CHECKSTAT -a file $MOUNT/R9/a/f || error
+pass
 $CLEAN
 $START
 
 echo "--test 24-R10 source does not exist" 
-mkdir /mnt/lustre/R10
-mv /mnt/lustre/R10/f /mnt/lustre/R10/g 
+mkdir $MOUNT/R10
+perl -e "rename \"$MOUNT/R10/f\", \"$MOUNT/R10/g\"" 
+$CHECKSTAT -t dir $MOUNT/R10 || error
+$CHECKSTAT -a $MOUNT/R10/f || error
+$CHECKSTAT -a $MOUNT/R10/g || error
+pass
 $CLEAN
 $START
 
index da13217..7f099e8 100644 (file)
@@ -5,20 +5,73 @@
 #include <fcntl.h>
 #include <errno.h>
 #include <string.h>
+#include <unistd.h>
+
+void
+usage (char *argv0, int help)
+{
+       char *progname = strrchr(argv0, '/');
+
+       if (progname == NULL)
+               progname = argv0;
+       
+       fprintf (help ? stdout : stderr,
+                "Usage: %s [-e] file\n", progname);
+       
+       if (!help)
+       {
+               fprintf (stderr, "   or try '-h' for help\n");
+               exit (1);
+       }
+       
+       printf ("Create the given file with O_EXCL...\n");
+       printf (" -e    expect EEXIST\n");
+       printf (" -h    print help");
+       printf (" Exit status is 0 on success, 1 on failure\n");
+}
 
 int main(int argc, char **argv)
 {
         int rc;
-
-        if (argc != 2) { 
-                printf("usage: %s name\n", argv[0]);
+       int want_eexist = 0;
+       
+       while ((rc = getopt (argc, argv, "eh")) != -1)
+               switch (rc)
+               {
+               case 'e':
+                       want_eexist = 1;
+                       break;
+               case 'h':
+                       usage (argv[1], 1);
+                       return (0);
+               default:
+                       usage (argv[0], 0);
+               }
+       
+        if (optind != argc - 1) { 
+               usage (argv[0], 0);
                 return 1;
         }
 
-        rc = open(argv[1], O_CREAT|O_EXCL, 0644);
+        rc = open(argv[optind], O_CREAT|O_EXCL, 0644);
         if (rc == -1)
-                printf("open failed: %s\n", strerror(errno));
-        else
-                printf("open success.\n");
-        return 0;
+       {
+               if (want_eexist && errno == EEXIST)
+               {
+                       printf("open failed: %s (expected)\n", strerror(errno));
+                       return (0);
+               }
+               printf("open failed: %s\n", strerror(errno));
+               return (1);
+       } else {
+               if (want_eexist)
+               {
+                       printf("open success (expecting EEXIST).\n");
+                       return (1);
+               }
+               printf("open success.\n");
+               return (0);
+       }
+       
+       return ((rc == 0) ? 0 : 1);
 }
index 7695706..de7b425 100644 (file)
@@ -11,3 +11,4 @@ obdctl
 lctl
 lfind
 lstripe
+lconf
similarity index 96%
rename from lustre/utils/lconf
rename to lustre/utils/lconf.in
index d460503..d7ca788 100755 (executable)
@@ -37,6 +37,7 @@ DEFAULT_TCPBUF = 1048576
 # Maximum number of devices to search for.
 # (the /dev/loop* nodes need to be created beforehand)
 MAX_LOOP_DEVICES = 256
+PORTALS_DIR = '@PORTALSLOC@'
 
 first_cleanup_error = 0
 def cleanup_error(rc):
@@ -470,9 +471,10 @@ def find_prog(cmd):
     syspath = string.split(os.environ['PATH'], ':')
     cmdpath = os.path.dirname(sys.argv[0])
     syspath.insert(0, cmdpath);
-    syspath.insert(0, os.path.join(cmdpath, '../../portals/linux/utils/'))
+    syspath.insert(0, os.path.join(cmdpath, PORTALS_DIR+'/linux/utils/'))
     for d in syspath:
         prog = os.path.join(d,cmd)
+       debug(prog)
         if os.access(prog, os.X_OK):
             return prog
     return ''
@@ -489,9 +491,10 @@ def do_find_file(base, mod):
             if module:
                 return module
 
-def find_module(src_dir, dev_dir, modname):
+def find_module(dev_dir, modname):
     mod = '%s.o' % (modname)
-    module = src_dir +'/'+ dev_dir +'/'+ mod
+
+    module = dev_dir +'/'+ mod
     try: 
        if os.access(module, os.R_OK):
             return module
@@ -738,7 +741,7 @@ class Module:
                 continue
             log ('loading module:', mod)
             if config.src_dir():
-                module = find_module(config.src_dir(),dev_dir,  mod)
+                module = find_module(dev_dir,  mod)
                 if not module:
                     panic('module not found:', mod)
                 (rc, out)  = run('/sbin/insmod', module)
@@ -782,19 +785,19 @@ class Network(Module):
                 panic("unable to set nid for", self.net_type, self.nid)
             debug("nid:", self.nid)
 
-        self.add_module('portals/linux/oslib/', 'portals')
+        self.add_module(PORTALS_DIR+"/linux/oslib", 'portals')
         if node_needs_router():
-            self.add_module('portals/linux/router', 'kptlrouter')
+            self.add_module(PORTALS_DIR+"/linux/router", 'kptlrouter')
         if self.net_type == 'tcp':
-            self.add_module('portals/linux/socknal', 'ksocknal')
+            self.add_module(PORTALS_DIR+"/linux/socknal", 'ksocknal')
         if self.net_type == 'toe':
-            self.add_module('portals/linux/toenal', 'ktoenal')
+            self.add_module(PORTALS_DIR+"/linux/toenal", 'ktoenal')
         if self.net_type == 'elan':
-            self.add_module('portals/linux/rqswnal', 'kqswnal')
+            self.add_module(PORTALS_DIR+"/linux/rqswnal", 'kqswnal')
         if self.net_type == 'gm':
-            self.add_module('portals/linux/gmnal', 'kgmnal')
-        self.add_module('lustre/obdclass', 'obdclass')
-        self.add_module('lustre/ptlrpc', 'ptlrpc')
+            self.add_module(PORTALS_DIR+"/linux/gmnal", 'kgmnal')
+        self.add_module(config.src_dir()+'obdclass', 'obdclass')
+        self.add_module(config.src_dir()+'ptlrpc', 'ptlrpc')
 
     def prepare(self):
         self.info(self.net_type, self.nid, self.port)
@@ -868,7 +871,7 @@ class Network(Module):
 class LDLM(Module):
     def __init__(self,dom_node):
         Module.__init__(self, 'LDLM', dom_node)
-        self.add_module('lustre/ldlm', 'ldlm')
+        self.add_module(config.src_dir()+'ldlm', 'ldlm') 
     def prepare(self):
         if is_prepared(self.uuid):
             return
@@ -890,8 +893,8 @@ class LOV(Module):
             self.pattern = get_attr_int(dev_node, 'pattern', 0)
             self.devlist = get_all_refs(dev_node, 'osc')
             self.stripe_cnt = get_attr_int(dev_node, 'stripecount', len(self.devlist))
-        self.add_module('lustre/mdc', 'mdc')
-        self.add_module('lustre/lov', 'lov')
+        self.add_module(config.src_dir()+'mdc', 'mdc')
+        self.add_module(config.src_dir()+'lov', 'lov')
 
     def prepare(self):
         if is_prepared(self.uuid):
@@ -979,9 +982,9 @@ class MDS(Module):
         # FIXME: if fstype not set, then determine based on kernel version
         self.format = get_text(dom_node, 'autoformat', "no")
         if self.fstype == 'extN':
-            self.add_module('lustre/extN', 'extN') 
-        self.add_module('lustre/mds', 'mds')
-        self.add_module('lustre/mds', 'mds_%s' % (self.fstype))
+            self.add_module(config.src_dir()+'extN', 'extN') 
+        self.add_module(config.src_dir()+'mds', 'mds')
+        self.add_module(config.src_dir()+'obdclass', 'fsfilt_%s'%(self.fstype))
             
     def prepare(self):
         if is_prepared(self.uuid):
@@ -1023,7 +1026,7 @@ class MDC(Module):
                                       int(random.random() * 1048576))
 
         self.lookup_server(self.mds.uuid)
-        self.add_module('lustre/mdc', 'mdc')
+        self.add_module(config.src_dir()+'mdc', 'mdc')
 
     def prepare(self):
         if is_prepared(self.uuid):
@@ -1043,8 +1046,9 @@ class OBD(Module):
         # FIXME: if fstype not set, then determine based on kernel version
         self.format = get_text(dom_node, 'autoformat', 'yes')
         if self.fstype == 'extN':
-            self.add_module('lustre/extN', 'extN') 
-        self.add_module('lustre/' + self.obdtype, self.obdtype)
+            self.add_module(config.src_dir()+'extN', 'extN') 
+        self.add_module(config.src_dir()+'' + self.obdtype, self.obdtype)
+        self.add_module(config.src_dir()+'obdclass' , 'fsfilt_%s' % (self.fstype))
 
     # need to check /proc/mounts and /etc/mtab before
     # formatting anything.
@@ -1070,7 +1074,7 @@ class OST(Module):
     def __init__(self,dom_node):
         Module.__init__(self, 'OST', dom_node)
         self.obd_uuid = get_first_ref(dom_node, 'obd')
-        self.add_module('lustre/ost', 'ost')
+        self.add_module(config.src_dir()+'ost', 'ost')
 
     def prepare(self):
         if is_prepared(self.uuid):
@@ -1104,7 +1108,7 @@ class OSC(Module):
         self.obd_uuid = get_first_ref(dom_node, 'obd')
         self.ost_uuid = get_first_ref(dom_node, 'ost')
         self.lookup_server(self.ost_uuid)
-        self.add_module('lustre/osc', 'osc')
+        self.add_module(config.src_dir()+'osc', 'osc')
 
     def prepare(self, ignore_connect_failure = 0):
         if is_prepared(self.uuid):
@@ -1182,8 +1186,8 @@ class Mountpoint(Module):
         self.path = get_text(dom_node, 'path')
         self.mds_uuid = get_first_ref(dom_node, 'mds')
         self.lov_uuid = get_first_ref(dom_node, 'osc')
-        self.add_module('lustre/mdc', 'mdc')
-        self.add_module('lustre/llite', 'llite')
+        self.add_module(config.src_dir()+'mdc', 'mdc')
+        self.add_module(config.src_dir()+'llite', 'llite')
         l = lookup(self.dom_node.parentNode, self.lov_uuid)
         self.osc = VOSC(l)
 
@@ -1566,9 +1570,8 @@ def doHost(lustreNode, hosts):
         dom_node = getByName(lustreNode, h, 'node')
         if dom_node:
             break
-
     if not dom_node:
-        print 'No host entry found.'
+        print 'lconf: No host entry found in '+sys.argv[1]
         return
 
     if not get_attr(dom_node, 'router'):
@@ -1661,9 +1664,12 @@ def fetch(url):
     return data
 
 def setupModulePath(cmd):
+    global PORTALS_DIR
     base = os.path.dirname(cmd)
     if os.access(base+"/Makefile", os.R_OK):
-        config.src_dir(base + "/../../")
+        config.src_dir(base + "/../")  
+    if PORTALS_DIR[0] != '/':
+       PORTALS_DIR= config.src_dir()+PORTALS_DIR
 
 def sys_set_debug_path():
     debug("debug path: ", config.debug_path())
@@ -1720,6 +1726,8 @@ def sanitise_path():
 #
 def main():
     global TCP_ACCEPTOR, lctl, MAXTCPBUF
+    setupModulePath(sys.argv[0])
+
     host = socket.gethostname()
 
     # the PRNG is normally seeded with time(), which is not so good for starting
@@ -1769,7 +1777,6 @@ def main():
 
     lctl = LCTLInterface('lctl')
 
-    setupModulePath(sys.argv[0])
     sys_make_devices()
     sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
     sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
index d2c9273..56e58c8 100644 (file)
@@ -66,7 +66,16 @@ sub extractpid
     if ($line =~ m/\(\) ([0-9]*)\+[0-9]*\):/) {
        return $1;
     }
-    if ($line =~ m/\(\) ([0-9]*) | [0-9]*\+[0-9]*\):/) {
+    if ($line =~ m/\(\) ([0-9]*) \| [0-9]*\+[0-9]*\):/) {
+       return $1;
+    }
+}
+
+sub extracthostpid
+{
+    $line = shift;
+#    print "$_\n";
+    if ($line =~ m/\(\) [0-9]* \| ([0-9]*)\+[0-9]*\):/) {
        return $1;
     }
 }
@@ -213,6 +222,7 @@ sub unmatched_intents {
 
 while (<STDIN>) {
     $linepid = extractpid($_);
+    $linehpid = extracthostpid($_);
     $linemask = getmask($_);
     $linesubsys = getsubsys($_);
 
@@ -230,7 +240,7 @@ while (<STDIN>) {
         study_lock($_);
     }
 
-    if ( !$pid || $linepid == $pid) {
+    if ( !$pid || $linepid == $pid || $linehpid == $pid) {
         next if ($rpctrace && $linemask != $masks->{RPCTRACE});
         next if ($trace && $linemask != $masks->{TRACE});
 
index 4a3b336..ba22a9e 100644 (file)
@@ -810,7 +810,8 @@ int jt_obd_setup(int argc, char **argv)
 
 /* The ioctl API has been extended to provide the LOV stripe metadata to the
  * caller when applicable.  This utility, however, only saves the LSM for the
- * latest CREATE. */
+ * latest CREATE.   It only saves the LSM when the ioctl indicates that it
+ * is valid by overloading 'ioc_conn2' as a boolean. */
 int jt_obd_create(int argc, char **argv)
 {
         struct obd_ioctl_data data;
@@ -880,7 +881,7 @@ int jt_obd_create(int argc, char **argv)
                         break;
                 }
 
-                lsm_valid = 1;
+                lsm_valid = data.ioc_conn2;
 
                 if (be_verbose(verbose, &next_time, i, &next_count, count))
                         printf("%s: #%d is object id "LPX64"\n",