- 2.5 fixes for MDS

author braam <braam>

Mon, 21 Oct 2002 05:58:41 +0000 (05:58 +0000)

committer braam <braam>

Mon, 21 Oct 2002 05:58:41 +0000 (05:58 +0000)
author braam <braam>
Mon, 21 Oct 2002 05:58:41 +0000 (05:58 +0000)
committer braam <braam>
Mon, 21 Oct 2002 05:58:41 +0000 (05:58 +0000)
diff --git a/lustre/Makefile.am b/lustre/Makefile.am

index b228a89..59411e7 100644 (file)
--- a/lustre/Makefile.am
+++ b/lustre/Makefile.am
@@ -5,9 +5,15 @@
  
  AUTOMAKE_OPTIONS = foreign
  
+if LINUX25
+DIRS24 = mds
+else
+DIRS24 = extN mds
+endif
+
  # NOTE: keep extN before mds
-SUBDIRS = lov utils obdclass ldlm ptlrpc lib obdecho mdc osc ost llite obdfilter
-SUBDIRS+=  extN mds tests doc scripts 
+SUBDIRS = lov utils obdclass ldlm ptlrpc lib obdecho mdc osc ost llite 
+SUBDIRS+=   $(DIRS24) mds obdfilter tests doc scripts 
  
  DIST_SUBDIRS = $(SUBDIRS)
  EXTRA_DIST = BUGS FDL Rules include patches archdep.m4
diff --git a/lustre/archdep.m4 b/lustre/archdep.m4

index e23e75c..3a031ed 100644 (file)
--- a/lustre/archdep.m4
+++ b/lustre/archdep.m4
@@ -1,3 +1,13 @@
+AC_MSG_CHECKING(if you are running linux 2.5...)
+if test -e $LINUX/include/linux/namei.h ; then
+       linux25=yes
+       AC_MSG_RESULT(yes)
+else
+       linux25=no
+       AC_MSG_RESULT(no)
+fi
+AM_CONDITIONAL(LINUX25, test x$linux25 = xyes)
+
  AC_MSG_CHECKING(if you are running user mode linux for $host_alias..)
  if test -e $LINUX/include/asm-um ; then
  if test  X`ls -id $LINUX/include/asm | awk '{print $1}'` = X`ls -id $LINUX/include/asm-um | awk '{print $1}'` ; then
diff --git a/lustre/mds/Makefile.am b/lustre/mds/Makefile.am

index 4bc8070..24fdd57 100644 (file)
--- a/lustre/mds/Makefile.am
+++ b/lustre/mds/Makefile.am
@@ -5,9 +5,17 @@
  
  DEFS= 
  
+if LINUX25
+FSMOD = mds_ext3
+else
+FSMOD = mds_extN
+endif
+
+
+
  MODULE = mds
-modulefs_DATA = mds.o mds_extN.o # mds_ext2.o mds_ext3.o
-EXTRA_PROGRAMS = mds mds_extN # mds_ext2 mds_ext3
+modulefs_DATA = mds.o $(FSMOD).o
+EXTRA_PROGRAMS = mds $(FSMOD)
  
  LINX= mds_updates.c simple.c ll_pack.c lov_pack.c target.c
  
diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c

index 58d2882..44021d6 100644 (file)
--- a/lustre/mds/handler.c
+++ b/lustre/mds/handler.c
@@ -34,6 +34,9 @@
  #include <linux/init.h>
  #include <linux/obd_class.h>
  #include <linux/random.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <linux/buffer_head.h>
+#endif
  
  static kmem_cache_t *mds_file_cache;
  
@@ -196,6 +199,10 @@ struct dentry *mds_fid2locked_dentry(struct obd_device *obd, struct ll_fid *fid,
          RETURN(retval);
  }
  
+#ifndef DCACHE_DISCONNECTED
+#define DCACHE_DISCONNECTED DCACHE_NFSD_DISCONNECTED
+#endif
+
  /* Look up an entry by inode number. */
  struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
                                struct vfsmount **mnt)
@@ -235,7 +242,7 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
          spin_lock(&dcache_lock);
          list_for_each(lp, &inode->i_dentry) {
                  result = list_entry(lp, struct dentry, d_alias);
-                if (!(result->d_flags & DCACHE_NFSD_DISCONNECTED)) {
+                if (!(result->d_flags & DCACHE_DISCONNECTED)) {
                          dget_locked(result);
                          result->d_vfs_flags |= DCACHE_REFERENCED;
                          spin_unlock(&dcache_lock);
@@ -253,7 +260,7 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
          }
          if (mnt)
                  mntget(*mnt);
-        result->d_flags |= DCACHE_NFSD_DISCONNECTED;
+        result->d_flags |= DCACHE_DISCONNECTED;
          return result;
  }
  
@@ -1120,7 +1127,11 @@ int mds_update_server_data(struct mds_obd *mds)
                          RETURN(-EIO);
                  RETURN(rc);
          }
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
          rc = fsync_dev(filp->f_dentry->d_inode->i_rdev);
+#else
+        rc = file_fsync(filp,  filp->f_dentry, 1);
+#endif
          if (rc)
                  CERROR("error flushing MDS server data: rc = %d\n", rc);
  
diff --git a/lustre/mds/mds_ext3.c b/lustre/mds/mds_ext3.c

index d095e5e..4f98f44 100644 (file)
--- a/lustre/mds/mds_ext3.c
+++ b/lustre/mds/mds_ext3.c
@@ -4,7 +4,7 @@
   *  lustre/mds/mds_ext3.c
   *  Lustre Metadata Server (mds) journal abstraction routines
   *
- *  Copyright (C) 2002  Cluster File Systems, Inc.
+ *  Copyright (C) 2002 Cluster File Systems, Inc.
   *   Author: Andreas Dilger <adilger@clusterfs.com>
   *
   *   This file is part of Lustre, http://www.lustre.org.
@@ -21,17 +21,22 @@
   *   You should have received a copy of the GNU General Public License
   *   along with Lustre; if not, write to the Free Software
   *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
   */
  
  #define DEBUG_SUBSYSTEM S_MDS
  
  #include <linux/fs.h>
  #include <linux/jbd.h>
+#include <linux/slab.h>
+#include <linux/init.h>
  #include <linux/ext3_fs.h>
  #include <linux/ext3_jbd.h>
+#include <../fs/ext3/xattr.h>
+#include <linux/kp30.h>
  #include <linux/lustre_mds.h>
+#include <linux/obd.h>
  #include <linux/module.h>
+#include <linux/obd_lov.h>
  
  static struct mds_fs_operations mds_ext3_fs_ops;
  static kmem_cache_t *mcb_cache;
@@ -43,6 +48,11 @@ struct mds_cb_data {
          __u64 cb_last_rcvd;
  };
  
+#define EXT3_XATTR_INDEX_LUSTRE         5
+#define XATTR_LUSTRE_MDS_OBJID          "system.lustre_mds_objid"
+
+#define XATTR_MDS_MO_MAGIC              0xEA0BD047
+
  /*
   * We don't currently need any additional blocks for rmdir and
   * unlink transactions because we are storing the OST oa_id inside
@@ -73,7 +83,7 @@ static void *mds_ext3_start(struct inode *inode, int op)
                  nblocks += 3;
          case MDS_FSOP_LINK:
                  /* Change parent directory */
-                nblocks += EXT3_DATA_TRANS_BLOCKS;
+                nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
                  break;
          case MDS_FSOP_SETATTR:
                  /* Setattr on inode */
@@ -108,33 +118,8 @@ static int mds_ext3_setattr(struct dentry *dentry, void *handle,
          int rc;
  
          lock_kernel();
-
-        /* a _really_ horrible hack to avoid removing the data stored
-           in the block pointers; this data is the object id
-           this will go into an extended attribute at some point.
-        */
-        if (iattr->ia_valid & ATTR_SIZE) {
-                /* ATTR_SIZE would invoke truncate: clear it */
-                iattr->ia_valid &= ~ATTR_SIZE;
-                inode->i_size = iattr->ia_size;
-
-                /* an _even_more_ horrible hack to make this hack work with
-                 * ext3.  This is because ext3 keeps a separate inode size
-                 * until the inode is committed to ensure consistency.  This
-                 * will also go away with the move to EAs.
-                 */
-                EXT3_I(inode)->i_disksize = inode->i_size;
-
-                /* make sure _something_ gets set - so new inode
-                   goes to disk (probably won't work over XFS */
-                if (!iattr->ia_valid & ATTR_MODE) {
-                        iattr->ia_valid |= ATTR_MODE;
-                        iattr->ia_mode = inode->i_mode;
-                }
-        }
-
          if (inode->i_op->setattr)
-                rc =  inode->i_op->setattr(dentry, iattr);
+                rc = inode->i_op->setattr(dentry, iattr);
          else
                  rc = inode_setattr(inode, iattr);
  
@@ -143,25 +128,55 @@ static int mds_ext3_setattr(struct dentry *dentry, void *handle,
          return rc;
  }
  
-/*
- * FIXME: nasty hack - store the object id in the first two
- *        direct block spots.  This should be done with EAs...
- *        Note also that this does not currently mark the inode
- *        dirty (it currently is used with other operations that
- *        subsequently also mark the inode dirty).
- */
  static int mds_ext3_set_md(struct inode *inode, void *handle,
-                           void *obd_md, int len)
+                           struct lov_mds_md *lmm)
  {
-        *((__u64 *)EXT3_I(inode)->i_data) = cpu_to_le64(id);
-        return 0;
+        int rc;
+
+        down(&inode->i_sem);
+        lock_kernel();
+        rc = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_LUSTRE,
+                            XATTR_LUSTRE_MDS_OBJID, lmm,
+                            lmm ? lmm->lmm_easize : 0, 0);
+        unlock_kernel();
+        up(&inode->i_sem);
+
+        if (rc) {
+                CERROR("error adding objectid "LPX64" to inode %ld: %d\n",
+                       lmm->lmm_object_id, inode->i_ino, rc);
+                if (rc != -ENOSPC) LBUG();
+        }
+        return rc;
  }
  
-static int mds_ext3_get_objid(struct inode *inode, obd_id *id)
+static int mds_ext3_get_md(struct inode *inode, struct lov_mds_md *lmm)
  {
-        *id = le64_to_cpu(*((__u64 *)EXT3_I(inode)->i_data));
+        int rc;
+        int size = lmm->lmm_easize;
  
-        return 0;
+        down(&inode->i_sem);
+        lock_kernel();
+        rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_LUSTRE,
+                            XATTR_LUSTRE_MDS_OBJID, lmm, size);
+        unlock_kernel();
+        up(&inode->i_sem);
+
+        /* This gives us the MD size */
+        if (lmm == NULL)
+                return rc;
+
+        if (rc < 0) {
+                CDEBUG(D_INFO, "error getting EA %s from MDS inode %ld: "
+                       "rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc);
+                memset(lmm, 0, size);
+                return rc;
+        }
+
+        /* This field is byteswapped because it appears in the
+         * catalogue.  All others are opaque to the MDS */
+        lmm->lmm_object_id = le64_to_cpu(lmm->lmm_object_id);
+
+        return rc;
  }
  
  static ssize_t mds_ext3_readpage(struct file *file, char *buf, size_t count,
@@ -201,7 +216,7 @@ static void mds_ext3_delete_inode(struct inode *inode)
                          EXIT;
                          return;
                  }
-                if (mds_ext3_set_objid(inode, handle, 0))
+                if (mds_ext3_set_md(inode, handle, NULL))
                          CERROR("error clearing objid on %ld\n", inode->i_ino);
  
                  if (mds_ext3_fs_ops.cl_delete_inode)
@@ -255,7 +270,7 @@ static int mds_ext3_set_last_rcvd(struct mds_obd *mds, void *handle)
                  CERROR("no journal callback kernel patch, faking it...\n");
                  next = jiffies + 300 * HZ;
          }
-        }
+
          mds_ext3_callback_status((struct journal_callback *)mcb, 0);
  #endif
  
@@ -271,24 +286,43 @@ static int mds_ext3_journal_data(struct file *filp)
          return 0;
  }
  
+/*
+ * We need to hack the return value for the free inode counts because
+ * the current EA code requires one filesystem block per inode with EAs,
+ * so it is possible to run out of blocks before we run out of inodes.
+ *
+ * This can be removed when the ext3 EA code is fixed.
+ */
+static int mds_ext3_statfs(struct super_block *sb, struct statfs *sfs)
+{
+        int rc = vfs_statfs(sb, sfs);
+
+        if (!rc && sfs->f_bfree < sfs->f_ffree)
+                sfs->f_ffree = sfs->f_bfree;
+
+        return rc;
+}
+
  static struct mds_fs_operations mds_ext3_fs_ops = {
          fs_owner:               THIS_MODULE,
          fs_start:               mds_ext3_start,
          fs_commit:              mds_ext3_commit,
          fs_setattr:             mds_ext3_setattr,
-        fs_set_objid:           mds_ext3_set_objid,
-        fs_get_objid:           mds_ext3_get_objid,
+        fs_set_md:              mds_ext3_set_md,
+        fs_get_md:              mds_ext3_get_md,
          fs_readpage:            mds_ext3_readpage,
          fs_delete_inode:        mds_ext3_delete_inode,
          cl_delete_inode:        clear_inode,
          fs_journal_data:        mds_ext3_journal_data,
          fs_set_last_rcvd:       mds_ext3_set_last_rcvd,
+        fs_statfs:              mds_ext3_statfs,
  };
  
  static int __init mds_ext3_init(void)
  {
          int rc;
  
+        //rc = ext3_xattr_register();
          mcb_cache = kmem_cache_create("mds_ext3_mcb",
                                        sizeof(struct mds_cb_data), 0,
                                        0, NULL, NULL);
@@ -316,6 +350,8 @@ static void __exit mds_ext3_exit(void)
                  CERROR("can't free MDS callback cache: count %d, rc = %d\n",
                         mcb_cache_count, rc);
          }
+
+        //rc = ext3_xattr_unregister();
  }
  
  MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c

index eb9a157..aac0b47 100644 (file)
--- a/lustre/mds/mds_reint.c
+++ b/lustre/mds/mds_reint.c
@@ -127,7 +127,8 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
          inode = de->d_inode;
          CDEBUG(D_INODE, "ino %ld\n", inode->i_ino);
  
-        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE, inode->i_sb->s_dev);
+        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE, 
+                       to_kdev_t(inode->i_sb->s_dev));
  
          handle = mds_fs_start(mds, inode, MDS_FSOP_SETATTR);
          if (!handle)
@@ -280,7 +281,8 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                  GOTO(out_create_dchild, rc = -EEXIST);
          }
  
-        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE, dir->i_sb->s_dev);
+        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE, 
+                       to_kdev_t(dir->i_sb->s_dev));
  
          if (dir->i_mode & S_ISGID) {
                  rec->ur_gid = dir->i_gid;
@@ -461,7 +463,8 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
                  mds_pack_inode2body(body, inode);
          }
  
-        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE, dir->i_sb->s_dev);
+        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE, 
+                       to_kdev_t(dir->i_sb->s_dev));
  
          switch (rec->ur_mode /* & S_IFMT ? */) {
          case S_IFDIR:
@@ -628,7 +631,7 @@ static int mds_reint_link(struct mds_update_record *rec, int offset,
          }
  
          OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE,
-                       de_src->d_inode->i_sb->s_dev);
+                       to_kdev_t(de_src->d_inode->i_sb->s_dev));
  
          handle = mds_fs_start(mds, de_tgt_dir->d_inode, MDS_FSOP_LINK);
          if (!handle)
@@ -725,8 +728,9 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset,
          } else
                  ldlm_lock_dump((void *)(unsigned long)tgtlockh.addr);
  
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
          double_lock(de_tgtdir, de_srcdir);
-
+#endif
          de_old = lookup_one_len(rec->ur_name, de_srcdir, rec->ur_namelen - 1);
          if (IS_ERR(de_old)) {
                  CERROR("old child lookup error (%*s): %ld\n",
@@ -767,7 +771,7 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset,
          }
  
          OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE,
-                       de_srcdir->d_inode->i_sb->s_dev);
+                       to_kdev_t(de_srcdir->d_inode->i_sb->s_dev));
  
          handle = mds_fs_start(mds, de_tgtdir->d_inode, MDS_FSOP_RENAME);
          if (!handle)
@@ -817,7 +821,9 @@ out_rename_deold:
                                 LPD64": %d\n", res_id[0], rc);
          }
  out_rename_tgtdir:
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
          double_up(&de_srcdir->d_inode->i_sem, &de_tgtdir->d_inode->i_sem);
+#endif
          ldlm_lock_decref(&tgtlockh, lock_mode);
  out_rename_tgtput:
          l_dput(de_tgtdir);
author	braam <braam>
	Mon, 21 Oct 2002 05:58:41 +0000 (05:58 +0000)
committer	braam <braam>
	Mon, 21 Oct 2002 05:58:41 +0000 (05:58 +0000)
lustre/Makefile.am		patch \| blob \| history
lustre/archdep.m4		patch \| blob \| history
lustre/mds/Makefile.am		patch \| blob \| history
lustre/mds/handler.c		patch \| blob \| history
lustre/mds/mds_ext3.c		patch \| blob \| history
lustre/mds/mds_reint.c		patch \| blob \| history