From 8371d266869e8370d1a6b730798d38c71fb3a8a0 Mon Sep 17 00:00:00 2001 From: braam Date: Mon, 21 Oct 2002 05:58:41 +0000 Subject: [PATCH] - 2.5 fixes for MDS --- lustre/Makefile.am | 10 +++- lustre/archdep.m4 | 10 ++++ lustre/mds/Makefile.am | 12 ++++- lustre/mds/handler.c | 15 +++++- lustre/mds/mds_ext3.c | 128 +++++++++++++++++++++++++++++++------------------ lustre/mds/mds_reint.c | 18 ++++--- 6 files changed, 135 insertions(+), 58 deletions(-) diff --git a/lustre/Makefile.am b/lustre/Makefile.am index b228a89..59411e7 100644 --- a/lustre/Makefile.am +++ b/lustre/Makefile.am @@ -5,9 +5,15 @@ AUTOMAKE_OPTIONS = foreign +if LINUX25 +DIRS24 = mds +else +DIRS24 = extN mds +endif + # NOTE: keep extN before mds -SUBDIRS = lov utils obdclass ldlm ptlrpc lib obdecho mdc osc ost llite obdfilter -SUBDIRS+= extN mds tests doc scripts +SUBDIRS = lov utils obdclass ldlm ptlrpc lib obdecho mdc osc ost llite +SUBDIRS+= $(DIRS24) mds obdfilter tests doc scripts DIST_SUBDIRS = $(SUBDIRS) EXTRA_DIST = BUGS FDL Rules include patches archdep.m4 diff --git a/lustre/archdep.m4 b/lustre/archdep.m4 index e23e75c..3a031ed 100644 --- a/lustre/archdep.m4 +++ b/lustre/archdep.m4 @@ -1,3 +1,13 @@ +AC_MSG_CHECKING(if you are running linux 2.5...) +if test -e $LINUX/include/linux/namei.h ; then + linux25=yes + AC_MSG_RESULT(yes) +else + linux25=no + AC_MSG_RESULT(no) +fi +AM_CONDITIONAL(LINUX25, test x$linux25 = xyes) + AC_MSG_CHECKING(if you are running user mode linux for $host_alias..) if test -e $LINUX/include/asm-um ; then if test X`ls -id $LINUX/include/asm | awk '{print $1}'` = X`ls -id $LINUX/include/asm-um | awk '{print $1}'` ; then diff --git a/lustre/mds/Makefile.am b/lustre/mds/Makefile.am index 4bc8070..24fdd57 100644 --- a/lustre/mds/Makefile.am +++ b/lustre/mds/Makefile.am @@ -5,9 +5,17 @@ DEFS= +if LINUX25 +FSMOD = mds_ext3 +else +FSMOD = mds_extN +endif + + + MODULE = mds -modulefs_DATA = mds.o mds_extN.o # mds_ext2.o mds_ext3.o -EXTRA_PROGRAMS = mds mds_extN # mds_ext2 mds_ext3 +modulefs_DATA = mds.o $(FSMOD).o +EXTRA_PROGRAMS = mds $(FSMOD) LINX= mds_updates.c simple.c ll_pack.c lov_pack.c target.c diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 58d2882..44021d6 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -34,6 +34,9 @@ #include #include #include +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +#include +#endif static kmem_cache_t *mds_file_cache; @@ -196,6 +199,10 @@ struct dentry *mds_fid2locked_dentry(struct obd_device *obd, struct ll_fid *fid, RETURN(retval); } +#ifndef DCACHE_DISCONNECTED +#define DCACHE_DISCONNECTED DCACHE_NFSD_DISCONNECTED +#endif + /* Look up an entry by inode number. */ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, struct vfsmount **mnt) @@ -235,7 +242,7 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, spin_lock(&dcache_lock); list_for_each(lp, &inode->i_dentry) { result = list_entry(lp, struct dentry, d_alias); - if (!(result->d_flags & DCACHE_NFSD_DISCONNECTED)) { + if (!(result->d_flags & DCACHE_DISCONNECTED)) { dget_locked(result); result->d_vfs_flags |= DCACHE_REFERENCED; spin_unlock(&dcache_lock); @@ -253,7 +260,7 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, } if (mnt) mntget(*mnt); - result->d_flags |= DCACHE_NFSD_DISCONNECTED; + result->d_flags |= DCACHE_DISCONNECTED; return result; } @@ -1120,7 +1127,11 @@ int mds_update_server_data(struct mds_obd *mds) RETURN(-EIO); RETURN(rc); } +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) rc = fsync_dev(filp->f_dentry->d_inode->i_rdev); +#else + rc = file_fsync(filp, filp->f_dentry, 1); +#endif if (rc) CERROR("error flushing MDS server data: rc = %d\n", rc); diff --git a/lustre/mds/mds_ext3.c b/lustre/mds/mds_ext3.c index d095e5e..4f98f44 100644 --- a/lustre/mds/mds_ext3.c +++ b/lustre/mds/mds_ext3.c @@ -4,7 +4,7 @@ * lustre/mds/mds_ext3.c * Lustre Metadata Server (mds) journal abstraction routines * - * Copyright (C) 2002 Cluster File Systems, Inc. + * Copyright (C) 2002 Cluster File Systems, Inc. * Author: Andreas Dilger * * This file is part of Lustre, http://www.lustre.org. @@ -21,17 +21,22 @@ * You should have received a copy of the GNU General Public License * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * */ #define DEBUG_SUBSYSTEM S_MDS #include #include +#include +#include #include #include +#include <../fs/ext3/xattr.h> +#include #include +#include #include +#include static struct mds_fs_operations mds_ext3_fs_ops; static kmem_cache_t *mcb_cache; @@ -43,6 +48,11 @@ struct mds_cb_data { __u64 cb_last_rcvd; }; +#define EXT3_XATTR_INDEX_LUSTRE 5 +#define XATTR_LUSTRE_MDS_OBJID "system.lustre_mds_objid" + +#define XATTR_MDS_MO_MAGIC 0xEA0BD047 + /* * We don't currently need any additional blocks for rmdir and * unlink transactions because we are storing the OST oa_id inside @@ -73,7 +83,7 @@ static void *mds_ext3_start(struct inode *inode, int op) nblocks += 3; case MDS_FSOP_LINK: /* Change parent directory */ - nblocks += EXT3_DATA_TRANS_BLOCKS; + nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS; break; case MDS_FSOP_SETATTR: /* Setattr on inode */ @@ -108,33 +118,8 @@ static int mds_ext3_setattr(struct dentry *dentry, void *handle, int rc; lock_kernel(); - - /* a _really_ horrible hack to avoid removing the data stored - in the block pointers; this data is the object id - this will go into an extended attribute at some point. - */ - if (iattr->ia_valid & ATTR_SIZE) { - /* ATTR_SIZE would invoke truncate: clear it */ - iattr->ia_valid &= ~ATTR_SIZE; - inode->i_size = iattr->ia_size; - - /* an _even_more_ horrible hack to make this hack work with - * ext3. This is because ext3 keeps a separate inode size - * until the inode is committed to ensure consistency. This - * will also go away with the move to EAs. - */ - EXT3_I(inode)->i_disksize = inode->i_size; - - /* make sure _something_ gets set - so new inode - goes to disk (probably won't work over XFS */ - if (!iattr->ia_valid & ATTR_MODE) { - iattr->ia_valid |= ATTR_MODE; - iattr->ia_mode = inode->i_mode; - } - } - if (inode->i_op->setattr) - rc = inode->i_op->setattr(dentry, iattr); + rc = inode->i_op->setattr(dentry, iattr); else rc = inode_setattr(inode, iattr); @@ -143,25 +128,55 @@ static int mds_ext3_setattr(struct dentry *dentry, void *handle, return rc; } -/* - * FIXME: nasty hack - store the object id in the first two - * direct block spots. This should be done with EAs... - * Note also that this does not currently mark the inode - * dirty (it currently is used with other operations that - * subsequently also mark the inode dirty). - */ static int mds_ext3_set_md(struct inode *inode, void *handle, - void *obd_md, int len) + struct lov_mds_md *lmm) { - *((__u64 *)EXT3_I(inode)->i_data) = cpu_to_le64(id); - return 0; + int rc; + + down(&inode->i_sem); + lock_kernel(); + rc = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_LUSTRE, + XATTR_LUSTRE_MDS_OBJID, lmm, + lmm ? lmm->lmm_easize : 0, 0); + unlock_kernel(); + up(&inode->i_sem); + + if (rc) { + CERROR("error adding objectid "LPX64" to inode %ld: %d\n", + lmm->lmm_object_id, inode->i_ino, rc); + if (rc != -ENOSPC) LBUG(); + } + return rc; } -static int mds_ext3_get_objid(struct inode *inode, obd_id *id) +static int mds_ext3_get_md(struct inode *inode, struct lov_mds_md *lmm) { - *id = le64_to_cpu(*((__u64 *)EXT3_I(inode)->i_data)); + int rc; + int size = lmm->lmm_easize; - return 0; + down(&inode->i_sem); + lock_kernel(); + rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_LUSTRE, + XATTR_LUSTRE_MDS_OBJID, lmm, size); + unlock_kernel(); + up(&inode->i_sem); + + /* This gives us the MD size */ + if (lmm == NULL) + return rc; + + if (rc < 0) { + CDEBUG(D_INFO, "error getting EA %s from MDS inode %ld: " + "rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc); + memset(lmm, 0, size); + return rc; + } + + /* This field is byteswapped because it appears in the + * catalogue. All others are opaque to the MDS */ + lmm->lmm_object_id = le64_to_cpu(lmm->lmm_object_id); + + return rc; } static ssize_t mds_ext3_readpage(struct file *file, char *buf, size_t count, @@ -201,7 +216,7 @@ static void mds_ext3_delete_inode(struct inode *inode) EXIT; return; } - if (mds_ext3_set_objid(inode, handle, 0)) + if (mds_ext3_set_md(inode, handle, NULL)) CERROR("error clearing objid on %ld\n", inode->i_ino); if (mds_ext3_fs_ops.cl_delete_inode) @@ -255,7 +270,7 @@ static int mds_ext3_set_last_rcvd(struct mds_obd *mds, void *handle) CERROR("no journal callback kernel patch, faking it...\n"); next = jiffies + 300 * HZ; } - } + mds_ext3_callback_status((struct journal_callback *)mcb, 0); #endif @@ -271,24 +286,43 @@ static int mds_ext3_journal_data(struct file *filp) return 0; } +/* + * We need to hack the return value for the free inode counts because + * the current EA code requires one filesystem block per inode with EAs, + * so it is possible to run out of blocks before we run out of inodes. + * + * This can be removed when the ext3 EA code is fixed. + */ +static int mds_ext3_statfs(struct super_block *sb, struct statfs *sfs) +{ + int rc = vfs_statfs(sb, sfs); + + if (!rc && sfs->f_bfree < sfs->f_ffree) + sfs->f_ffree = sfs->f_bfree; + + return rc; +} + static struct mds_fs_operations mds_ext3_fs_ops = { fs_owner: THIS_MODULE, fs_start: mds_ext3_start, fs_commit: mds_ext3_commit, fs_setattr: mds_ext3_setattr, - fs_set_objid: mds_ext3_set_objid, - fs_get_objid: mds_ext3_get_objid, + fs_set_md: mds_ext3_set_md, + fs_get_md: mds_ext3_get_md, fs_readpage: mds_ext3_readpage, fs_delete_inode: mds_ext3_delete_inode, cl_delete_inode: clear_inode, fs_journal_data: mds_ext3_journal_data, fs_set_last_rcvd: mds_ext3_set_last_rcvd, + fs_statfs: mds_ext3_statfs, }; static int __init mds_ext3_init(void) { int rc; + //rc = ext3_xattr_register(); mcb_cache = kmem_cache_create("mds_ext3_mcb", sizeof(struct mds_cb_data), 0, 0, NULL, NULL); @@ -316,6 +350,8 @@ static void __exit mds_ext3_exit(void) CERROR("can't free MDS callback cache: count %d, rc = %d\n", mcb_cache_count, rc); } + + //rc = ext3_xattr_unregister(); } MODULE_AUTHOR("Cluster File Systems, Inc. "); diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index eb9a157..aac0b47 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -127,7 +127,8 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, inode = de->d_inode; CDEBUG(D_INODE, "ino %ld\n", inode->i_ino); - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE, inode->i_sb->s_dev); + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE, + to_kdev_t(inode->i_sb->s_dev)); handle = mds_fs_start(mds, inode, MDS_FSOP_SETATTR); if (!handle) @@ -280,7 +281,8 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, GOTO(out_create_dchild, rc = -EEXIST); } - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE, dir->i_sb->s_dev); + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE, + to_kdev_t(dir->i_sb->s_dev)); if (dir->i_mode & S_ISGID) { rec->ur_gid = dir->i_gid; @@ -461,7 +463,8 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, mds_pack_inode2body(body, inode); } - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE, dir->i_sb->s_dev); + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE, + to_kdev_t(dir->i_sb->s_dev)); switch (rec->ur_mode /* & S_IFMT ? */) { case S_IFDIR: @@ -628,7 +631,7 @@ static int mds_reint_link(struct mds_update_record *rec, int offset, } OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE, - de_src->d_inode->i_sb->s_dev); + to_kdev_t(de_src->d_inode->i_sb->s_dev)); handle = mds_fs_start(mds, de_tgt_dir->d_inode, MDS_FSOP_LINK); if (!handle) @@ -725,8 +728,9 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset, } else ldlm_lock_dump((void *)(unsigned long)tgtlockh.addr); +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) double_lock(de_tgtdir, de_srcdir); - +#endif de_old = lookup_one_len(rec->ur_name, de_srcdir, rec->ur_namelen - 1); if (IS_ERR(de_old)) { CERROR("old child lookup error (%*s): %ld\n", @@ -767,7 +771,7 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset, } OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE, - de_srcdir->d_inode->i_sb->s_dev); + to_kdev_t(de_srcdir->d_inode->i_sb->s_dev)); handle = mds_fs_start(mds, de_tgtdir->d_inode, MDS_FSOP_RENAME); if (!handle) @@ -817,7 +821,9 @@ out_rename_deold: LPD64": %d\n", res_id[0], rc); } out_rename_tgtdir: +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) double_up(&de_srcdir->d_inode->i_sem, &de_tgtdir->d_inode->i_sem); +#endif ldlm_lock_decref(&tgtlockh, lock_mode); out_rename_tgtput: l_dput(de_tgtdir); -- 1.8.3.1