* lustre/mds/mds_ext3.c
* Lustre Metadata Server (mds) journal abstraction routines
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002 Cluster File Systems, Inc.
* Author: Andreas Dilger <adilger@clusterfs.com>
*
* This file is part of Lustre, http://www.lustre.org.
* You should have received a copy of the GNU General Public License
* along with Lustre; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
*/
#define DEBUG_SUBSYSTEM S_MDS
#include <linux/fs.h>
#include <linux/jbd.h>
+#include <linux/slab.h>
+#include <linux/init.h>
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
+#include <../fs/ext3/xattr.h>
+#include <linux/kp30.h>
#include <linux/lustre_mds.h>
+#include <linux/obd.h>
#include <linux/module.h>
+#include <linux/obd_lov.h>
static struct mds_fs_operations mds_ext3_fs_ops;
static kmem_cache_t *mcb_cache;
__u64 cb_last_rcvd;
};
+#define EXT3_XATTR_INDEX_LUSTRE 5
+#define XATTR_LUSTRE_MDS_OBJID "system.lustre_mds_objid"
+
+#define XATTR_MDS_MO_MAGIC 0xEA0BD047
+
/*
* We don't currently need any additional blocks for rmdir and
* unlink transactions because we are storing the OST oa_id inside
nblocks += 3;
case MDS_FSOP_LINK:
/* Change parent directory */
- nblocks += EXT3_DATA_TRANS_BLOCKS;
+ nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
break;
case MDS_FSOP_SETATTR:
/* Setattr on inode */
int rc;
lock_kernel();
-
- /* a _really_ horrible hack to avoid removing the data stored
- in the block pointers; this data is the object id
- this will go into an extended attribute at some point.
- */
- if (iattr->ia_valid & ATTR_SIZE) {
- /* ATTR_SIZE would invoke truncate: clear it */
- iattr->ia_valid &= ~ATTR_SIZE;
- inode->i_size = iattr->ia_size;
-
- /* an _even_more_ horrible hack to make this hack work with
- * ext3. This is because ext3 keeps a separate inode size
- * until the inode is committed to ensure consistency. This
- * will also go away with the move to EAs.
- */
- EXT3_I(inode)->i_disksize = inode->i_size;
-
- /* make sure _something_ gets set - so new inode
- goes to disk (probably won't work over XFS */
- if (!iattr->ia_valid & ATTR_MODE) {
- iattr->ia_valid |= ATTR_MODE;
- iattr->ia_mode = inode->i_mode;
- }
- }
-
if (inode->i_op->setattr)
- rc = inode->i_op->setattr(dentry, iattr);
+ rc = inode->i_op->setattr(dentry, iattr);
else
rc = inode_setattr(inode, iattr);
return rc;
}
-/*
- * FIXME: nasty hack - store the object id in the first two
- * direct block spots. This should be done with EAs...
- * Note also that this does not currently mark the inode
- * dirty (it currently is used with other operations that
- * subsequently also mark the inode dirty).
- */
static int mds_ext3_set_md(struct inode *inode, void *handle,
- void *obd_md, int len)
+ struct lov_mds_md *lmm)
{
- *((__u64 *)EXT3_I(inode)->i_data) = cpu_to_le64(id);
- return 0;
+ int rc;
+
+ down(&inode->i_sem);
+ lock_kernel();
+ rc = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_LUSTRE,
+ XATTR_LUSTRE_MDS_OBJID, lmm,
+ lmm ? lmm->lmm_easize : 0, 0);
+ unlock_kernel();
+ up(&inode->i_sem);
+
+ if (rc) {
+ CERROR("error adding objectid "LPX64" to inode %ld: %d\n",
+ lmm->lmm_object_id, inode->i_ino, rc);
+ if (rc != -ENOSPC) LBUG();
+ }
+ return rc;
}
-static int mds_ext3_get_objid(struct inode *inode, obd_id *id)
+static int mds_ext3_get_md(struct inode *inode, struct lov_mds_md *lmm)
{
- *id = le64_to_cpu(*((__u64 *)EXT3_I(inode)->i_data));
+ int rc;
+ int size = lmm->lmm_easize;
- return 0;
+ down(&inode->i_sem);
+ lock_kernel();
+ rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_LUSTRE,
+ XATTR_LUSTRE_MDS_OBJID, lmm, size);
+ unlock_kernel();
+ up(&inode->i_sem);
+
+ /* This gives us the MD size */
+ if (lmm == NULL)
+ return rc;
+
+ if (rc < 0) {
+ CDEBUG(D_INFO, "error getting EA %s from MDS inode %ld: "
+ "rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc);
+ memset(lmm, 0, size);
+ return rc;
+ }
+
+ /* This field is byteswapped because it appears in the
+ * catalogue. All others are opaque to the MDS */
+ lmm->lmm_object_id = le64_to_cpu(lmm->lmm_object_id);
+
+ return rc;
}
static ssize_t mds_ext3_readpage(struct file *file, char *buf, size_t count,
EXIT;
return;
}
- if (mds_ext3_set_objid(inode, handle, 0))
+ if (mds_ext3_set_md(inode, handle, NULL))
CERROR("error clearing objid on %ld\n", inode->i_ino);
if (mds_ext3_fs_ops.cl_delete_inode)
CERROR("no journal callback kernel patch, faking it...\n");
next = jiffies + 300 * HZ;
}
- }
+
mds_ext3_callback_status((struct journal_callback *)mcb, 0);
#endif
return 0;
}
+/*
+ * We need to hack the return value for the free inode counts because
+ * the current EA code requires one filesystem block per inode with EAs,
+ * so it is possible to run out of blocks before we run out of inodes.
+ *
+ * This can be removed when the ext3 EA code is fixed.
+ */
+static int mds_ext3_statfs(struct super_block *sb, struct statfs *sfs)
+{
+ int rc = vfs_statfs(sb, sfs);
+
+ if (!rc && sfs->f_bfree < sfs->f_ffree)
+ sfs->f_ffree = sfs->f_bfree;
+
+ return rc;
+}
+
static struct mds_fs_operations mds_ext3_fs_ops = {
fs_owner: THIS_MODULE,
fs_start: mds_ext3_start,
fs_commit: mds_ext3_commit,
fs_setattr: mds_ext3_setattr,
- fs_set_objid: mds_ext3_set_objid,
- fs_get_objid: mds_ext3_get_objid,
+ fs_set_md: mds_ext3_set_md,
+ fs_get_md: mds_ext3_get_md,
fs_readpage: mds_ext3_readpage,
fs_delete_inode: mds_ext3_delete_inode,
cl_delete_inode: clear_inode,
fs_journal_data: mds_ext3_journal_data,
fs_set_last_rcvd: mds_ext3_set_last_rcvd,
+ fs_statfs: mds_ext3_statfs,
};
static int __init mds_ext3_init(void)
{
int rc;
+ //rc = ext3_xattr_register();
mcb_cache = kmem_cache_create("mds_ext3_mcb",
sizeof(struct mds_cb_data), 0,
0, NULL, NULL);
CERROR("can't free MDS callback cache: count %d, rc = %d\n",
mcb_cache_count, rc);
}
+
+ //rc = ext3_xattr_unregister();
}
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
inode = de->d_inode;
CDEBUG(D_INODE, "ino %ld\n", inode->i_ino);
- OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE, inode->i_sb->s_dev);
+ OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE,
+ to_kdev_t(inode->i_sb->s_dev));
handle = mds_fs_start(mds, inode, MDS_FSOP_SETATTR);
if (!handle)
GOTO(out_create_dchild, rc = -EEXIST);
}
- OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE, dir->i_sb->s_dev);
+ OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE,
+ to_kdev_t(dir->i_sb->s_dev));
if (dir->i_mode & S_ISGID) {
rec->ur_gid = dir->i_gid;
mds_pack_inode2body(body, inode);
}
- OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE, dir->i_sb->s_dev);
+ OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE,
+ to_kdev_t(dir->i_sb->s_dev));
switch (rec->ur_mode /* & S_IFMT ? */) {
case S_IFDIR:
}
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE,
- de_src->d_inode->i_sb->s_dev);
+ to_kdev_t(de_src->d_inode->i_sb->s_dev));
handle = mds_fs_start(mds, de_tgt_dir->d_inode, MDS_FSOP_LINK);
if (!handle)
} else
ldlm_lock_dump((void *)(unsigned long)tgtlockh.addr);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
double_lock(de_tgtdir, de_srcdir);
-
+#endif
de_old = lookup_one_len(rec->ur_name, de_srcdir, rec->ur_namelen - 1);
if (IS_ERR(de_old)) {
CERROR("old child lookup error (%*s): %ld\n",
}
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE,
- de_srcdir->d_inode->i_sb->s_dev);
+ to_kdev_t(de_srcdir->d_inode->i_sb->s_dev));
handle = mds_fs_start(mds, de_tgtdir->d_inode, MDS_FSOP_RENAME);
if (!handle)
LPD64": %d\n", res_id[0], rc);
}
out_rename_tgtdir:
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
double_up(&de_srcdir->d_inode->i_sem, &de_tgtdir->d_inode->i_sem);
+#endif
ldlm_lock_decref(&tgtlockh, lock_mode);
out_rename_tgtput:
l_dput(de_tgtdir);