From 8a84461a7f46eff896466bfa269d1397a67a3dde Mon Sep 17 00:00:00 2001
From: adilger <adilger>
Date: Tue, 10 Dec 2002 08:25:59 +0000
Subject: [PATCH] At Peter's request, a work-in-progress check-in of the fix
 for bug 478 and 108.  Breaks everything, doesn't even compile.

The intention is to move the MDS fs interface methods so they can be used
by obdfilter also, for creating compound transactions, last_rcvd callbacks,
etc.

Hopefully I'll have it in shape in a couple of hours.
---
 lustre/include/linux/lustre_fsfilt.h | 127 ++++++++++++
 lustre/obdclass/fsfilt.c             |  96 ++++++++++
 lustre/obdclass/fsfilt_ext3.c        | 357 ++++++++++++++++++++++++++++++++++
 lustre/obdclass/fsfilt_extN.c        | 360 +++++++++++++++++++++++++++++++++++
 4 files changed, 940 insertions(+)
 create mode 100644 lustre/include/linux/lustre_fsfilt.h
 create mode 100644 lustre/obdclass/fsfilt.c
 create mode 100644 lustre/obdclass/fsfilt_ext3.c
 create mode 100644 lustre/obdclass/fsfilt_extN.c
diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h
new file mode 100644
index 0000000..19057bd
--- /dev/null
+++ b/lustre/include/linux/lustre_fsfilt.h
@@ -0,0 +1,127 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <info@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Filesystem interface helper.
+ *
+ */
+
+#ifndef _LUSTRE_FSFILT_H
+#define _LUSTRE_FSFILT_H
+
+#ifdef __KERNEL__
+
+#include <linux/obd.h>
+#include <linux/fs.h>
+
+typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, int error);
+
+struct fsfilt_operations {
+        struct list_head fs_list;
+        struct module *fs_owner;
+        char   *fs_type;
+        void   *(* fs_start)(struct inode *inode, int op);
+        int     (* fs_commit)(struct inode *inode, void *handle);
+        int     (* fs_setattr)(struct dentry *dentry, void *handle,
+                               struct iattr *iattr);
+        int     (* fs_set_md)(struct inode *inode, void *handle,
+                              void *md, int size);
+        int     (* fs_get_md)(struct inode *inode, void *md, int size);
+        ssize_t (* fs_readpage)(struct file *file, char *buf, size_t count,
+                                loff_t *offset);
+        void    (* fs_delete_inode)(struct inode *inode);
+        void    (* cl_delete_inode)(struct inode *inode);
+        int     (* fs_journal_data)(struct file *file);
+        int     (* fs_set_last_rcvd)(struct obd_device *obd, __u64 last_rcvd,
+                                     void *handle, fsfilt_cb_t *cb_func);
+        int     (* fs_statfs)(struct super_block *sb, struct statfs *sfs);
+};
+
+extern int fsfilt_register_fs_type(struct fsfilt_operations *op);
+extern void fsfilt_unregister_fs_type(const char *name);
+
+#define FSFILT_OP_UNLINK         1
+#define FSFILT_OP_RMDIR          2
+#define FSFILT_OP_RENAME         3
+#define FSFILT_OP_CREATE         4
+#define FSFILT_OP_MKDIR          5
+#define FSFILT_OP_SYMLINK        6
+#define FSFILT_OP_MKNOD          7
+#define FSFILT_OP_SETATTR        8
+#define FSFILT_OP_LINK           9
+
+static inline void *fsfilt_start(struct obd_device *obd,
+                                 struct inode *inode, int op)
+{
+        return obd->obd_fsops->fs_start(inode, op);
+}
+
+static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode,
+                                   void *handle)
+{
+        return obd->obd_fsops->fs_commit(inode, handle);
+}
+
+static inline int fsfilt_setattr(struct obd_device *obd,
+                                    struct dentry *dentry,
+                                    void *handle, struct iattr *iattr)
+{
+        int rc;
+        /*
+         * NOTE: we probably don't need to take i_sem here when changing
+         *       ATTR_SIZE because the MDS never needs to truncate a file.
+         *       The ext2/ext3 code never truncates a directory, and files
+         *       stored on the MDS are entirely sparse (no data blocks).
+         *       If we do need to get it, we can do it here.
+         */
+        lock_kernel();
+        rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr);
+        unlock_kernel();
+
+        return rc;
+}
+
+static inline int fsfilt_set_md(struct obd_device *obd, struct inode *inode,
+                                   void *handle, void *md, int size)
+{
+        return obd->obd_fsops->fs_set_md(inode, handle, md, size);
+}
+
+static inline int fsfilt_get_md(struct obd_device *obd, struct inode *inode,
+                                void *md, int size)
+{
+        return obd->obd_fsops->fs_get_md(inode, md, size);
+}
+
+static inline ssize_t fsfilt_readpage(struct obd_device *obd,
+                                      struct file *file, char *buf,
+                                      size_t count, loff_t *offset)
+{
+        return obd->obd_fsops->fs_readpage(file, buf, count, offset);
+}
+
+static inline int fsfilt_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
+                                       void *handle, fsfilt_cb_t *cb_func)
+{
+        return obd->obd_fsops->fs_set_last_rcvd(obd, last_rcvd,handle,cb_func);
+}
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/lustre/obdclass/fsfilt.c b/lustre/obdclass/fsfilt.c
new file mode 100644
index 0000000..3000eb0
--- /dev/null
+++ b/lustre/obdclass/fsfilt.c
@@ -0,0 +1,96 @@
+#include <linux/lustre_fsfilt.h>
+
+LIST_HEAD(fsfilt_types);
+
+static struct fsfilt_operations *fsfilt_search_type(const char *type)
+{
+        struct fsfilt_operations *found;
+        struct list_head *p;
+
+        list_for_each(p, &fsfilt_types) {
+                found = list_entry(p, struct fsfilt_operations, fs_list);
+                if (!strcmp(found->fs_type, type)) {
+                        return found;
+                }
+        }
+        return NULL;
+}
+
+int fsfilt_register_type(struct fsfilt_operations *ops)
+{
+        struct fsfilt_operations *found;
+
+        /* lock fsfilt_types list */
+        if ((found = fsfilt_search_type(ops->fs_type))) {
+                if (found != ops) {
+                        CERROR("different operations for type %s\n",
+			       ops->fs_type);
+                        /* unlock fsfilt_types list */
+                        RETURN(-EEXIST);
+                }
+        } else {
+		MOD_INC_USE_COUNT;
+		list_add(&ops->fs_list, &fsfilt_types);
+	}
+
+	/* unlock fsfilt_types list */
+        return 0;
+}
+
+void fsfilt_unregister_type(const char *type)
+{
+        struct list_head *p;
+
+        /* lock fsfilt_types list */
+        list_for_each(p, &fsfilt_types) {
+		struct fsfilt_operations *found;
+
+                found = list_entry(p, struct fsfilt_operations, fs_list);
+                if (!strcmp(found->fs_type, type)) {
+                        list_del(p);
+                        MOD_DEC_USE_COUNT;
+                        break;
+                }
+        }
+        /* unlock fsfilt_types list */
+}
+
+struct fsfilt_operations *fsfilt_get_ops(char *type)
+{
+        struct fsfilt_operations *fs_ops;
+
+        /* lock fsfilt_types list */
+        if (!(fs_ops = fsfilt_search_type(type))) {
+                char name[32];
+                int rc;
+
+                snprintf(name, sizeof(name) - 1, "fsfilt_%s", type);
+                name[sizeof(name) - 1] = '\0';
+
+                if ((rc = request_module(name))) {
+                        fs_ops = fsfilt_search_type(type);
+                        CDEBUG(D_INFO, "Loaded module '%s'\n", name);
+                        if (!fs_ops)
+                                rc = -ENOENT;
+                }
+
+                if (rc) {
+                        CERROR("Can't find fsfilt_%s interface\n", name);
+                        RETURN(ERR_PTR(rc));
+			/* unlock fsfilt_types list */
+                }
+        }
+        __MOD_INC_USE_COUNT(fs_ops->fs_owner);
+        /* unlock fsfilt_types list */
+
+        return fs_ops;
+}
+
+void fsfilt_put_ops(struct fsfilt_operations *fs_ops)
+{
+        __MOD_DEC_USE_COUNT(fs_ops->fs_owner);
+}
+
+
+EXPORT_SYMBOL(fsfilt_register_fs_type);
+EXPORT_SYMBOL(fsfilt_unregister_fs_type);
diff --git a/lustre/obdclass/fsfilt_ext3.c b/lustre/obdclass/fsfilt_ext3.c
new file mode 100644
index 0000000..e762e4f
--- /dev/null
+++ b/lustre/obdclass/fsfilt_ext3.c
@@ -0,0 +1,357 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  lustre/lib/fsfilt_ext3.c
+ *  Lustre filesystem abstraction routines
+ *
+ *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <../fs/ext3/xattr.h>
+#include <linux/kp30.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/obd.h>
+#include <linux/module.h>
+
+static struct fsfilt_fs_operations fsfilt_ext3_fs_ops;
+static kmem_cache_t *fcb_cache;
+static int fcb_cache_count;
+
+struct fsfilt_cb_data {
+        struct journal_callback cb_jcb;
+        struct obd_device *cb_obd;
+        __u64 cb_last_rcvd;
+};
+
+#define EXT3_XATTR_INDEX_LUSTRE         5
+#define XATTR_LUSTRE_MDS_OBJID          "system.lustre_mds_objid"
+
+/*
+ * We don't currently need any additional blocks for rmdir and
+ * unlink transactions because we are storing the OST oa_id inside
+ * the inode (which we will be changing anyways as part of this
+ * transaction).
+ */
+static void *fsfilt_ext3_start(struct inode *inode, int op)
+{
+        /* For updates to the last recieved file */
+        int nblocks = EXT3_DATA_TRANS_BLOCKS;
+        void *handle;
+
+        switch(op) {
+        case FSFILT_OP_RMDIR:
+        case FSFILT_OP_UNLINK:
+                nblocks += EXT3_DELETE_TRANS_BLOCKS;
+                break;
+        case FSFILT_OP_RENAME:
+                /* We may be modifying two directories */
+                nblocks += EXT3_DATA_TRANS_BLOCKS;
+        case FSFILT_OP_SYMLINK:
+                /* Possible new block + block bitmap + GDT for long symlink */
+                nblocks += 3;
+        case FSFILT_OP_CREATE:
+        case FSFILT_OP_MKDIR:
+        case FSFILT_OP_MKNOD:
+                /* New inode + block bitmap + GDT for new file */
+                nblocks += 3;
+        case FSFILT_OP_LINK:
+                /* Change parent directory */
+                nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
+                break;
+        case FSFILT_OP_SETATTR:
+                /* Setattr on inode */
+                nblocks += 1;
+                break;
+        default: CERROR("unknown transaction start op %d\n", op);
+                 LBUG();
+        }
+
+        lock_kernel();
+        handle = journal_start(EXT3_JOURNAL(inode), nblocks);
+        unlock_kernel();
+
+        return handle;
+}
+
+static int fsfilt_ext3_commit(struct inode *inode, void *handle)
+{
+        int rc;
+
+        lock_kernel();
+        rc = journal_stop((handle_t *)handle);
+        unlock_kernel();
+
+        return rc;
+}
+
+static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle,
+                            struct iattr *iattr)
+{
+        struct inode *inode = dentry->d_inode;
+        int rc;
+
+        lock_kernel();
+        if (inode->i_op->setattr)
+                rc = inode->i_op->setattr(dentry, iattr);
+        else
+                rc = inode_setattr(inode, iattr);
+
+        unlock_kernel();
+
+        return rc;
+}
+
+static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
+                           struct lov_mds_md *lmm, int lmm_size)
+{
+        int rc;
+
+        down(&inode->i_sem);
+        lock_kernel();
+        rc = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_LUSTRE,
+                            XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0);
+        unlock_kernel();
+        up(&inode->i_sem);
+
+        if (rc) {
+                CERROR("error adding objectid "LPX64" to inode %lu: rc = %d\n",
+                       lmm->lmm_object_id, inode->i_ino, rc);
+                if (rc != -ENOSPC) LBUG();
+        }
+        return rc;
+}
+
+static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int size)
+{
+        int rc;
+
+        down(&inode->i_sem);
+        lock_kernel();
+        rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_LUSTRE,
+                            XATTR_LUSTRE_MDS_OBJID, lmm, size);
+        unlock_kernel();
+        up(&inode->i_sem);
+
+        /* This gives us the MD size */
+        if (lmm == NULL)
+                return (rc == -ENODATA) ? 0 : rc;
+
+        if (rc < 0) {
+                CDEBUG(D_INFO, "error getting EA %s from inode %lu: "
+                       "rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc);
+                memset(lmm, 0, size);
+                return (rc == -ENODATA) ? 0 : rc;
+        }
+
+        /* This field is byteswapped because it appears in the
+         * catalogue.  All others are opaque to the MDS */
+        lmm->lmm_object_id = le64_to_cpu(lmm->lmm_object_id);
+
+        return rc;
+}
+
+static ssize_t fsfilt_ext3_readpage(struct file *file, char *buf, size_t count,
+                                    loff_t *offset)
+{
+        struct inode *inode = file->f_dentry->d_inode;
+        int rc = 0;
+
+        if (S_ISREG(inode->i_mode))
+                rc = file->f_op->read(file, buf, count, offset);
+        else {
+                struct buffer_head *bh;
+
+                /* FIXME: this assumes the blocksize == count, but the calling
+                 *        function will detect this as an error for now */
+                bh = ext3_bread(NULL, inode,
+                                *offset >> inode->i_sb->s_blocksize_bits,
+                                0, &rc);
+
+                if (bh) {
+                        memcpy(buf, bh->b_data, inode->i_blksize);
+                        brelse(bh);
+                        rc = inode->i_blksize;
+                }
+        }
+
+        return rc;
+}
+
+static void fsfilt_ext3_delete_inode(struct inode *inode)
+{
+        if (S_ISREG(inode->i_mode)) {
+                void *handle = fsfilt_ext3_start(inode, FSFILT_OP_UNLINK);
+
+                if (IS_ERR(handle)) {
+                        CERROR("unable to start transaction");
+                        EXIT;
+                        return;
+                }
+                if (fsfilt_ext3_set_md(inode, handle, NULL, 0))
+                        CERROR("error clearing objid on %lu\n", inode->i_ino);
+
+                if (fsfilt_ext3_fs_ops.cl_delete_inode)
+                        fsfilt_ext3_fs_ops.cl_delete_inode(inode);
+
+                if (fsfilt_ext3_commit(inode, handle))
+                        CERROR("error closing handle on %lu\n", inode->i_ino);
+        } else
+                fsfilt_ext3_fs_ops.cl_delete_inode(inode);
+}
+
+static void fsfilt_ext3_callback_status(struct journal_callback *jcb, int error)
+{
+        struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
+
+        CDEBUG(D_EXT2, "got callback for last_rcvd "LPD64": rc = %d\n",
+               fcb->cb_last_rcvd, error);
+        if (!error && fcb->cb_last_rcvd > fcb->cb_obd->mds_last_committed)
+                fcb->cb_obd->mds_last_committed = fcb->cb_last_rcvd;
+
+        kmem_cache_free(fcb_cache, fcb);
+        --fcb_cache_count;
+}
+
+static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, void *handle)
+{
+        struct fsfilt_cb_data *fcb;
+
+        fcb = kmem_cache_alloc(fcb_cache, GFP_NOFS);
+        if (!fcb)
+                RETURN(-ENOMEM);
+
+        ++fcb_cache_count;
+        fcb->cb_obd = obd;
+        fcb->cb_last_rcvd = obd->mds_last_rcvd;
+
+#ifdef HAVE_JOURNAL_CALLBACK_STATUS
+        CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n",
+               fcb->cb_last_rcvd);
+        lock_kernel();
+        /* Note that an "incompatible pointer" warning here is OK for now */
+        journal_callback_set(handle, fsfilt_ext3_callback_status,
+                             (struct journal_callback *)fcb);
+        unlock_kernel();
+#else
+#warning "no journal callback kernel patch, faking it..."
+        {
+        static long next = 0;
+
+        if (time_after(jiffies, next)) {
+                CERROR("no journal callback kernel patch, faking it...\n");
+                next = jiffies + 300 * HZ;
+        }
+
+        fsfilt_ext3_callback_status((struct journal_callback *)fcb, 0);
+#endif
+
+        return 0;
+}
+
+static int fsfilt_ext3_journal_data(struct file *filp)
+{
+        struct inode *inode = filp->f_dentry->d_inode;
+
+        EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
+
+        return 0;
+}
+
+/*
+ * We need to hack the return value for the free inode counts because
+ * the current EA code requires one filesystem block per inode with EAs,
+ * so it is possible to run out of blocks before we run out of inodes.
+ *
+ * This can be removed when the ext3 EA code is fixed.
+ */
+static int fsfilt_ext3_statfs(struct super_block *sb, struct statfs *sfs)
+{
+        int rc = vfs_statfs(sb, sfs);
+
+        if (!rc && sfs->f_bfree < sfs->f_ffree)
+                sfs->f_ffree = sfs->f_bfree;
+
+        return rc;
+}
+
+static struct fsfilt_fs_operations fsfilt_ext3_fs_ops = {
+        fs_owner:               THIS_MODULE,
+        fs_start:               fsfilt_ext3_start,
+        fs_commit:              fsfilt_ext3_commit,
+        fs_setattr:             fsfilt_ext3_setattr,
+        fs_set_md:              fsfilt_ext3_set_md,
+        fs_get_md:              fsfilt_ext3_get_md,
+        fs_readpage:            fsfilt_ext3_readpage,
+        fs_delete_inode:        fsfilt_ext3_delete_inode,
+        cl_delete_inode:        clear_inode,
+        fs_journal_data:        fsfilt_ext3_journal_data,
+        fs_set_last_rcvd:       fsfilt_ext3_set_last_rcvd,
+        fs_statfs:              fsfilt_ext3_statfs,
+};
+
+static int __init fsfilt_ext3_init(void)
+{
+        int rc;
+
+        //rc = ext3_xattr_register();
+        fcb_cache = kmem_cache_create("fsfilt_ext3_fcb",
+                                      sizeof(struct fsfilt_cb_data), 0,
+                                      0, NULL, NULL);
+        if (!fcb_cache) {
+                CERROR("error allocating fsfilt journal callback cache\n");
+                GOTO(out, rc = -ENOMEM);
+        }
+
+        rc = fsfilt_register_fs_type(&fsfilt_ext3_fs_ops, "ext3");
+
+        if (rc)
+                kmem_cache_destroy(fcb_cache);
+out:
+        return rc;
+}
+
+static void __exit fsfilt_ext3_exit(void)
+{
+        int rc;
+
+        fsfilt_unregister_fs_type("ext3");
+        rc = kmem_cache_destroy(fcb_cache);
+
+        if (rc || fcb_cache_count) {
+                CERROR("can't free fsfilt callback cache: count %d, rc = %d\n",
+                       fcb_cache_count, rc);
+        }
+
+        //rc = ext3_xattr_unregister();
+}
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre ext3 Filesystem Helper v0.1");
+MODULE_LICENSE("GPL");
+
+module_init(fsfilt_ext3_init);
+module_exit(fsfilt_ext3_exit);
diff --git a/lustre/obdclass/fsfilt_extN.c b/lustre/obdclass/fsfilt_extN.c
new file mode 100644
index 0000000..2bbec46
--- /dev/null
+++ b/lustre/obdclass/fsfilt_extN.c
@@ -0,0 +1,360 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  lustre/lib/fsfilt_extN.c
+ *  Lustre filesystem abstraction routines
+ *
+ *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/slab.h>
+#include <linux/extN_fs.h>
+#include <linux/extN_jbd.h>
+#include <linux/extN_xattr.h>
+#include <linux/kp30.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/obd.h>
+#include <linux/module.h>
+
+static struct fsfilt_fs_operations fsfilt_extN_fs_ops;
+static kmem_cache_t *fcb_cache;
+static int fcb_cache_count;
+
+struct fsfilt_cb_data {
+        struct journal_callback cb_jcb; /* data private to jbd */
+        fsfilt_cb_t *cb_func;           /* MDS/OBD completion function */
+        struct obd_device *cb_obd;      /* MDS/OBD completion device */
+        __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
+};
+
+#define EXTN_XATTR_INDEX_LUSTRE         5
+#define XATTR_LUSTRE_MDS_OBJID          "system.lustre_mds_objid"
+
+/*
+ * We don't currently need any additional blocks for rmdir and
+ * unlink transactions because we are storing the OST oa_id inside
+ * the inode (which we will be changing anyways as part of this
+ * transaction).
+ */
+static void *fsfilt_extN_start(struct inode *inode, int op)
+{
+        /* For updates to the last recieved file */
+        int nblocks = EXTN_DATA_TRANS_BLOCKS;
+        void *handle;
+
+        switch(op) {
+        case FSFILT_OP_RMDIR:
+        case FSFILT_OP_UNLINK:
+                nblocks += EXTN_DELETE_TRANS_BLOCKS;
+                break;
+        case FSFILT_OP_RENAME:
+                /* We may be modifying two directories */
+                nblocks += EXTN_DATA_TRANS_BLOCKS;
+        case FSFILT_OP_SYMLINK:
+                /* Possible new block + block bitmap + GDT for long symlink */
+                nblocks += 3;
+        case FSFILT_OP_CREATE:
+        case FSFILT_OP_MKDIR:
+        case FSFILT_OP_MKNOD:
+                /* New inode + block bitmap + GDT for new file */
+                nblocks += 3;
+        case FSFILT_OP_LINK:
+                /* Change parent directory */
+                nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS;
+                break;
+        case FSFILT_OP_SETATTR:
+                /* Setattr on inode */
+                nblocks += 1;
+                break;
+        default: CERROR("unknown transaction start op %d\n", op);
+                 LBUG();
+        }
+
+        lock_kernel();
+        handle = journal_start(EXTN_JOURNAL(inode), nblocks);
+        unlock_kernel();
+
+        return handle;
+}
+
+static int fsfilt_extN_commit(struct inode *inode, void *handle)
+{
+        int rc;
+
+        lock_kernel();
+        rc = journal_stop((handle_t *)handle);
+        unlock_kernel();
+
+        return rc;
+}
+
+static int fsfilt_extN_setattr(struct dentry *dentry, void *handle,
+                               struct iattr *iattr)
+{
+        struct inode *inode = dentry->d_inode;
+        int rc;
+
+        lock_kernel();
+        if (inode->i_op->setattr)
+                rc = inode->i_op->setattr(dentry, iattr);
+        else
+                rc = inode_setattr(inode, iattr);
+
+        unlock_kernel();
+
+        return rc;
+}
+
+static int fsfilt_extN_set_md(struct inode *inode, void *handle,
+                              struct lov_fsfilt_md *lmm, int lmm_size)
+{
+        int rc;
+
+        down(&inode->i_sem);
+        lock_kernel();
+        rc = extN_xattr_set(handle, inode, EXTN_XATTR_INDEX_LUSTRE,
+                            XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0);
+        unlock_kernel();
+        up(&inode->i_sem);
+
+        if (rc) {
+                CERROR("error adding objectid "LPX64" to inode %lu: rc = %d\n",
+                       lmm->lmm_object_id, inode->i_ino, rc);
+                if (rc != -ENOSPC) LBUG();
+        }
+        return rc;
+}
+
+static int fsfilt_extN_get_md(struct inode *inode, void *lmm, int size)
+{
+        int rc;
+
+        down(&inode->i_sem);
+        lock_kernel();
+        rc = extN_xattr_get(inode, EXTN_XATTR_INDEX_LUSTRE,
+                            XATTR_LUSTRE_MDS_OBJID, lmm, size);
+        unlock_kernel();
+        up(&inode->i_sem);
+
+        /* This gives us the MD size */
+        if (lmm == NULL)
+                return (rc == -ENODATA) ? 0 : rc;
+
+        if (rc < 0) {
+                CDEBUG(D_INFO, "error getting EA %s from inode %lu: "
+                       "rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc);
+                memset(lmm, 0, size);
+                return (rc == -ENODATA) ? 0 : rc;
+        }
+
+        /* This field is byteswapped because it appears in the
+         * catalogue.  All others are opaque to the MDS */
+        lmm->lmm_object_id = le64_to_cpu(lmm->lmm_object_id);
+
+        return rc;
+}
+
+static ssize_t fsfilt_extN_readpage(struct file *file, char *buf, size_t count,
+                                    loff_t *offset)
+{
+        struct inode *inode = file->f_dentry->d_inode;
+        int rc = 0;
+
+        if (S_ISREG(inode->i_mode))
+                rc = file->f_op->read(file, buf, count, offset);
+        else {
+                struct buffer_head *bh;
+
+                /* FIXME: this assumes the blocksize == count, but the calling
+                 *        function will detect this as an error for now */
+                bh = extN_bread(NULL, inode,
+                                *offset >> inode->i_sb->s_blocksize_bits,
+                                0, &rc);
+
+                if (bh) {
+                        memcpy(buf, bh->b_data, inode->i_blksize);
+                        brelse(bh);
+                        rc = inode->i_blksize;
+                }
+        }
+
+        return rc;
+}
+
+static void fsfilt_extN_delete_inode(struct inode *inode)
+{
+        if (S_ISREG(inode->i_mode)) {
+                void *handle = fsfilt_extN_start(inode, FSFILT_OP_UNLINK);
+
+                if (IS_ERR(handle)) {
+                        CERROR("unable to start transaction");
+                        EXIT;
+                        return;
+                }
+                if (fsfilt_extN_set_md(inode, handle, NULL, 0))
+                        CERROR("error clearing objid on %lu\n", inode->i_ino);
+
+                if (fsfilt_extN_fs_ops.cl_delete_inode)
+                        fsfilt_extN_fs_ops.cl_delete_inode(inode);
+
+                if (fsfilt_extN_commit(inode, handle))
+                        CERROR("error closing handle on %lu\n", inode->i_ino);
+        } else
+                fsfilt_extN_fs_ops.cl_delete_inode(inode);
+}
+
+static void fsfilt_extN_cb_func(struct journal_callback *jcb, int error)
+{
+        struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
+
+        fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error);
+
+        kmem_cache_free(fcb);
+}
+
+static int fsfilt_extN_set_callback(struct obd_device *obd, __u64 last_rcvd,
+                                    void *handle, fsfilt_cb_t *cb_func)
+{
+#ifdef HAVE_JOURNAL_CALLBACK_STATUS
+        struct fsfilt_cb_data *fcb;
+
+        fcb = kmem_cache_alloc(fcb_cache, GFP_NOFS);
+        if (!fcb)
+                RETURN(-ENOMEM);
+
+        ++fcb_cache_count;
+        fcb->cb_func = cb_func;
+        fcb->cb_obd = obd;
+        fcb->cb_last_rcvd = last_rcvd;
+
+        CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
+        lock_kernel();
+        /* Note that an "incompatible pointer" warning here is OK for now */
+        journal_callback_set(handle, fsfilt_extN_cb_func,
+                             (struct journal_callback *)fcb);
+        unlock_kernel();
+#else
+#warning "no journal callback kernel patch, faking it..."
+        static long next = 0;
+
+        if (time_after(jiffies, next)) {
+                CERROR("no journal callback kernel patch, faking it...\n");
+                next = jiffies + 300 * HZ;
+        }
+
+        cb_func(obd, last_rcvd, 0);
+#endif
+
+        return 0;
+}
+
+static void fsfilt_extN_free_callback(struct fsfilt_cb_data *fcb)
+{
+        kmem_cache_free(fcb_cache, fcb);
+        --fcb_cache_count;
+}
+
+static int fsfilt_extN_journal_data(struct file *filp)
+{
+        struct inode *inode = filp->f_dentry->d_inode;
+
+        EXTN_I(inode)->i_flags |= EXTN_JOURNAL_DATA_FL;
+
+        return 0;
+}
+
+/*
+ * We need to hack the return value for the free inode counts because
+ * the current EA code requires one filesystem block per inode with EAs,
+ * so it is possible to run out of blocks before we run out of inodes.
+ *
+ * This can be removed when the extN EA code is fixed.
+ */
+static int fsfilt_extN_statfs(struct super_block *sb, struct statfs *sfs)
+{
+        int rc = vfs_statfs(sb, sfs);
+
+        if (!rc && sfs->f_bfree < sfs->f_ffree)
+                sfs->f_ffree = sfs->f_bfree;
+
+        return rc;
+}
+
+static struct fsfilt_fs_operations fsfilt_extN_fs_ops = {
+        fs_type:                "extN",
+        fs_owner:               THIS_MODULE,
+        fs_start:               fsfilt_extN_start,
+        fs_commit:              fsfilt_extN_commit,
+        fs_setattr:             fsfilt_extN_setattr,
+        fs_set_md:              fsfilt_extN_set_md,
+        fs_get_md:              fsfilt_extN_get_md,
+        fs_readpage:            fsfilt_extN_readpage,
+        fs_delete_inode:        fsfilt_extN_delete_inode,
+        cl_delete_inode:        clear_inode,
+        fs_journal_data:        fsfilt_extN_journal_data,
+        fs_set_last_rcvd:       fsfilt_extN_set_last_rcvd,
+        fs_statfs:              fsfilt_extN_statfs,
+};
+
+static int __init fsfilt_extN_init(void)
+{
+        int rc;
+
+        //rc = extN_xattr_register();
+        fcb_cache = kmem_cache_create("fsfilt_extN_fcb",
+                                      sizeof(struct fsfilt_cb_data), 0,
+                                      0, NULL, NULL);
+        if (!fcb_cache) {
+                CERROR("error allocating fsfilt journal callback cache\n");
+                GOTO(out, rc = -ENOMEM);
+        }
+
+        rc = fsfilt_register_fs_type(&fsfilt_extN_fs_ops);
+
+        if (rc)
+                kmem_cache_destroy(fcb_cache);
+out:
+        return rc;
+}
+
+static void __exit fsfilt_extN_exit(void)
+{
+        int rc;
+
+        fsfilt_unregister_fs_type("extN");
+        rc = kmem_cache_destroy(fcb_cache);
+
+        if (rc || fcb_cache_count) {
+                CERROR("can't free fsfilt callback cache: count %d, rc = %d\n",
+                       fcb_cache_count, rc);
+        }
+
+        //rc = extN_xattr_unregister();
+}
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre extN Filesystem Helper v0.1");
+MODULE_LICENSE("GPL");
+
+module_init(fsfilt_extN_init);
+module_exit(fsfilt_extN_exit);
-- 
1.8.3.1