From 52b3216f3e0c493451b72c886994bd62a8c8f1d9 Mon Sep 17 00:00:00 2001 From: adilger Date: Fri, 29 Mar 2002 08:28:39 +0000 Subject: [PATCH] All of the "hack" and FIXME stuff in MDS is now handled by fs-specific methods which are selected at mount time. This includes journaling support. Changes OBD_FAIL_WRITE() to include the block device which should be disabled. We conditionally compile the "make device readonly" support. --- lustre/include/linux/obd_support.h | 31 ++++++++---- lustre/mds/handler.c | 90 ++++++++++++--------------------- lustre/mds/mds_reint.c | 100 ++++++++++++++----------------------- 3 files changed, 92 insertions(+), 129 deletions(-) diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index 14aad36..ceffa9a 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -86,23 +86,36 @@ enum { #define OBD_FAIL_MDS_ALL_NET 0x01000000 #define OBD_FAIL_OST_ALL_NET 0x02000000 -#define OBD_FAIL_CHECK(id) ((obd_fail_loc & OBD_FAIL_MASK_LOC) == (id)) +#define OBD_FAIL_CHECK(id) ((obd_fail_loc & OBD_FAIL_MASK_LOC) == (id) && \ + ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!=\ + (OBD_FAILED | OBD_FAIL_ONCE))) #define OBD_FAIL_RETURN(id, ret) \ do { \ if (OBD_FAIL_CHECK(id)) { \ - CERROR("obd_fail_loc=%d, fail operation rc=%d\n", id, ret); \ + CERROR("obd_fail_loc=%x, fail operation rc=%d\n", id, ret); \ + obd_fail_loc |= OBD_FAILED; \ RETURN(ret); \ } \ } while(0) -#define OBD_FAIL_WRITE(id) \ -do { \ - if (OBD_FAIL_CHECK(id)) { \ - CERROR("obd_fail_loc=%d, fail write operation\n", id); \ - /* FIXME: do something bad here */ \ - } \ -} while (0) +#include + +static inline void OBD_FAIL_WRITE(int id, kdev_t dev) +{ + if (OBD_FAIL_CHECK(id)) { +#ifdef CONFIG_DEV_RDONLY + CERROR("obd_fail_loc=%x, fail write operation on %s\n", + id, bdevname(dev)); + dev_set_rdonly(dev, 2); +#else + CERROR("obd_fail_loc=%x, can't fail write operation on %s\n", + id, bdevname(dev)); +#endif + /* We set FAIL_ONCE because we never "un-fail" a device */ + obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE; + } +} #define OBD_ALLOC(ptr, size) \ do { \ diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 49452f3..00c47ab 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -34,9 +33,6 @@ #include #include -struct buffer_head *ext3_bread(void *handle, struct inode *inode, - int block, int create, int *err); - int mds_sendpage(struct ptlrpc_request *req, struct file *file, __u64 offset, struct niobuf *dst) { @@ -46,34 +42,12 @@ int mds_sendpage(struct ptlrpc_request *req, struct file *file, OBD_FAIL_RETURN(OBD_FAIL_MDS_SENDPAGE, -EIO); if (req->rq_peer.peer_nid == 0) { - struct inode *inode = file->f_dentry->d_inode; + /* dst->addr is a user address, but in a different task! */ char *buf = (char *)(long)dst->addr; - /* dst->addr is a user address, but in a different task! */ set_fs(KERNEL_DS); - /* FIXME: we need to use ext3_bread because ext3 does not - * have the directories in page cache yet. If we - * just use generic_file_read() then the pages we - * get are in a different address space than those - * used by the filesystem == cache incoherency. - */ - if (S_ISREG(inode->i_mode)) - rc = file->f_op->read(file, buf, PAGE_SIZE, &offset); - else if (!strcmp(inode->i_sb->s_type->name, "ext3")) { - struct buffer_head *bh; - - bh = ext3_bread(NULL, inode, - offset >> inode->i_sb->s_blocksize_bits, - 0, &rc); - - if (bh) { - memcpy(buf, bh->b_data, inode->i_blksize); - brelse(bh); - rc = inode->i_blksize; - } - } else - rc = generic_file_read(file, buf, PAGE_SIZE, &offset); - + rc = mds_fs_readpage(&req->rq_obd->u.mds, file, buf, PAGE_SIZE, + &offset); set_fs(oldfs); if (rc != PAGE_SIZE) { @@ -82,7 +56,6 @@ int mds_sendpage(struct ptlrpc_request *req, struct file *file, } EXIT; } else { - struct inode *inode = file->f_dentry->d_inode; struct ptlrpc_bulk_desc *bulk; char *buf; @@ -101,23 +74,8 @@ int mds_sendpage(struct ptlrpc_request *req, struct file *file, } set_fs(KERNEL_DS); - /* FIXME: see comments above */ - if (S_ISREG(inode->i_mode)) - rc = file->f_op->read(file, buf, PAGE_SIZE, &offset); - else if (!strcmp(inode->i_sb->s_type->name, "ext3")) { - struct buffer_head *bh; - - bh = ext3_bread(NULL, inode, offset >> inode->i_blkbits, - 0, &rc); - - if (bh) { - memcpy(buf, bh->b_data, inode->i_blksize); - brelse(bh); - rc = inode->i_blksize; - } - } else - rc = generic_file_read(file, buf, PAGE_SIZE, &offset); - + rc = mds_fs_readpage(&req->rq_obd->u.mds, file, buf, PAGE_SIZE, + &offset); set_fs(oldfs); if (rc != PAGE_SIZE) { @@ -210,17 +168,12 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, return result; } -static inline void mds_get_objid(struct inode *inode, __u64 *id) -{ - /* FIXME: it is only by luck that this works on ext3 */ - memcpy(id, &inode->u.ext2_i.i_data, sizeof(*id)); -} - int mds_getattr(struct ptlrpc_request *req) { struct dentry *de; struct inode *inode; struct mds_rep *rep; + struct mds_obd *mds = &req->rq_obd->u.mds; int rc; rc = mds_pack_rep(NULL, 0, NULL, 0, &req->rq_rephdr, &req->rq_rep, @@ -234,7 +187,7 @@ int mds_getattr(struct ptlrpc_request *req) req->rq_rephdr->xid = req->rq_reqhdr->xid; rep = req->rq_rep.mds; - de = mds_fid2dentry(&req->rq_obd->u.mds, &req->rq_req.mds->fid1, NULL); + de = mds_fid2dentry(mds, &req->rq_req.mds->fid1, NULL); if (IS_ERR(de)) { req->rq_rephdr->status = -ENOENT; RETURN(0); @@ -252,7 +205,7 @@ int mds_getattr(struct ptlrpc_request *req) rep->mode = inode->i_mode; rep->nlink = inode->i_nlink; rep->valid = ~0; - mds_get_objid(inode, &rep->objid); + mds_fs_get_objid(mds, inode, &rep->objid); dput(de); return 0; } @@ -485,20 +438,43 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf) int err; ENTRY; + MOD_INC_USE_COUNT; mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, NULL); err = PTR_ERR(mnt); if (IS_ERR(mnt)) { CERROR("do_kern_mount failed: %d\n", err); + MOD_DEC_USE_COUNT; RETURN(err); } mds->mds_sb = mnt->mnt_root->d_inode->i_sb; - if (!mds->mds_sb) + if (!mds->mds_sb) { + MOD_DEC_USE_COUNT; RETURN(-ENODEV); + } mds->mds_vfsmnt = mnt; mds->mds_fstype = strdup(data->ioc_inlbuf2); + if (!strcmp(mds->mds_fstype, "ext3")) + mds->mds_fsops = &mds_ext3_fs_ops; + else if (!strcmp(mds->mds_fstype, "ext2")) + mds->mds_fsops = &mds_ext2_fs_ops; + else { + CERROR("unsupported MDS filesystem type %s\n", mds->mds_fstype); + kfree(mds->mds_fstype); + MOD_DEC_USE_COUNT; + RETURN(-EPERM); + } + + /* + * Replace the client filesystem delete_inode method with our own, + * so that we can clear the object ID before the inode is deleted. + * The fs_delete_inode method will call cl_delete_inode for us. + */ + mds->mds_fsops->cl_delete_inode = mds->mds_sb->s_op->delete_inode; + mds->mds_sb->s_op->delete_inode = mds->mds_fsops->fs_delete_inode; + mds->mds_ctxt.pwdmnt = mnt; mds->mds_ctxt.pwd = mnt->mnt_root; mds->mds_ctxt.fs = KERNEL_DS; @@ -514,8 +490,8 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf) err = ptlrpc_start_thread(obddev, mds->mds_service, "lustre_mds"); if (err) CERROR("cannot start thread\n"); + /* FIXME: do we need to MOD_DEC_USE_COUNT here? */ - MOD_INC_USE_COUNT; RETURN(0); } diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 5ee12fb..dbfa18c 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -43,8 +42,9 @@ static int mds_reint_setattr(struct mds_update_record *rec, struct ptlrpc_reques { struct dentry *de; struct inode *inode; + struct mds_obd *mds = &req->rq_obd->u.mds; - de = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid1, NULL); + de = mds_fid2dentry(mds, rec->ur_fid1, NULL); if (IS_ERR(de) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_SETATTR)) { req->rq_rephdr->status = -ESTALE; RETURN(0); @@ -53,70 +53,38 @@ static int mds_reint_setattr(struct mds_update_record *rec, struct ptlrpc_reques inode = de->d_inode; CDEBUG(D_INODE, "ino %ld\n", inode->i_ino); - /* a _really_ horrible hack to avoid removing the data stored - in the block pointers; this data is the object id - this will go into an extended attribute at some point. - */ - if ( rec->ur_iattr.ia_valid & ATTR_SIZE ) { - /* ATTR_SIZE would invoke truncate: clear it */ - rec->ur_iattr.ia_valid &= ~ATTR_SIZE; - inode->i_size = rec->ur_iattr.ia_size; - - /* an _even_more_ horrible hack to make this hack work with - * ext3. This is because ext3 keeps a separate inode size - * until the inode is committed to ensure consistency. This - * will also go away with the move to EAs. - */ - if (!strcmp(inode->i_sb->s_type->name, "ext3")) - inode->u.ext3_i.i_disksize = inode->i_size; - - /* make sure _something_ gets set - so new inode - goes to disk (probably won't work over XFS */ - if (!rec->ur_iattr.ia_valid & ATTR_MODE) { - rec->ur_iattr.ia_valid |= ATTR_MODE; - rec->ur_iattr.ia_mode = inode->i_mode; - } - } - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE); - if ( inode->i_op->setattr ) { - req->rq_rephdr->status = - inode->i_op->setattr(de, &rec->ur_iattr); - } else { - req->rq_rephdr->status = - inode_setattr(inode, &rec->ur_iattr); - } + mds_fs_setattr(mds, inode, NULL, &rec->ur_iattr); + + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE, inode->i_sb->s_dev); + + if (inode->i_op->setattr) + req->rq_rephdr->status = inode->i_op->setattr(de, &rec->ur_iattr); + else + req->rq_rephdr->status = inode_setattr(inode, &rec->ur_iattr); l_dput(de); RETURN(0); } -/* - XXX nasty hack: store the object id in the first two - direct block spots -*/ -static inline void mds_store_objid(struct inode *inode, __u64 *id) -{ - /* FIXME: it is only by luck that this works on ext3 */ - memcpy(&inode->u.ext2_i.i_data, id, sizeof(*id)); -} - - static int mds_reint_create(struct mds_update_record *rec, struct ptlrpc_request *req) { int type = rec->ur_mode & S_IFMT; struct dentry *de = NULL; struct mds_rep *rep = req->rq_rep.mds; + struct mds_obd *mds = &req->rq_obd->u.mds; struct dentry *dchild = NULL; + struct inode *dir; int rc = 0; ENTRY; - de = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid1, NULL); + de = mds_fid2dentry(mds, rec->ur_fid1, NULL); if (IS_ERR(de) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE)) { LBUG(); GOTO(out_reint_create, (rc = -ESTALE)); } - CDEBUG(D_INODE, "ino %ld\n", de->d_inode->i_ino); + dir = de->d_inode; + CDEBUG(D_INODE, "ino %ld\n", dir->i_ino); dchild = lookup_one_len(rec->ur_name, de, rec->ur_namelen - 1); if (IS_ERR(dchild)) { @@ -127,26 +95,26 @@ static int mds_reint_create(struct mds_update_record *rec, if (dchild->d_inode) { CERROR("child exists (dir %ld, name %s)\n", - de->d_inode->i_ino, rec->ur_name); + dir->i_ino, rec->ur_name); LBUG(); GOTO(out_reint_create, (rc = -EEXIST)); } - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE); + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE, dir->i_sb->s_dev); switch (type) { case S_IFREG: { - rc = vfs_create(de->d_inode, dchild, rec->ur_mode); + rc = vfs_create(dir, dchild, rec->ur_mode); EXIT; break; } case S_IFDIR: { - rc = vfs_mkdir(de->d_inode, dchild, rec->ur_mode); + rc = vfs_mkdir(dir, dchild, rec->ur_mode); EXIT; break; } case S_IFLNK: { - rc = vfs_symlink(de->d_inode, dchild, rec->ur_tgt); + rc = vfs_symlink(dir, dchild, rec->ur_tgt); EXIT; break; } @@ -155,7 +123,7 @@ static int mds_reint_create(struct mds_update_record *rec, case S_IFIFO: case S_IFSOCK: { int rdev = rec->ur_id; - rc = vfs_mknod(de->d_inode, dchild, rec->ur_mode, rdev); + rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev); EXIT; break; } @@ -163,7 +131,8 @@ static int mds_reint_create(struct mds_update_record *rec, if (!rc) { if (type == S_IFREG) - mds_store_objid(dchild->d_inode, &rec->ur_id); + rc = mds_fs_set_objid(mds, dchild->d_inode, + NULL, rec->ur_id); dchild->d_inode->i_atime = rec->ur_time; dchild->d_inode->i_ctime = rec->ur_time; dchild->d_inode->i_mtime = rec->ur_time; @@ -188,15 +157,18 @@ static int mds_reint_unlink(struct mds_update_record *rec, { struct dentry *de = NULL; struct dentry *dchild = NULL; + struct mds_obd *mds = &req->rq_obd->u.mds; + struct inode *dir; int rc = 0; ENTRY; - de = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid1, NULL); + de = mds_fid2dentry(mds, rec->ur_fid1, NULL); if (IS_ERR(de) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK)) { LBUG(); GOTO(out_unlink, (rc = -ESTALE)); } - CDEBUG(D_INODE, "ino %ld\n", de->d_inode->i_ino); + dir = de->d_inode; + CDEBUG(D_INODE, "ino %ld\n", dir->i_ino); dchild = lookup_one_len(rec->ur_name, de, rec->ur_namelen - 1); if (IS_ERR(dchild)) { @@ -207,7 +179,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, if (!dchild->d_inode) { CERROR("child doesn't exist (dir %ld, name %s\n", - de->d_inode->i_ino, rec->ur_name); + dir->i_ino, rec->ur_name); LBUG(); GOTO(out_unlink, (rc = -ESTALE)); } @@ -217,15 +189,15 @@ static int mds_reint_unlink(struct mds_update_record *rec, if (dchild->d_inode->i_generation != rec->ur_fid2->generation) LBUG(); - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE); + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE, dir->i_sb->s_dev); switch (dchild->d_inode->i_mode & S_IFMT) { case S_IFDIR: - rc = vfs_rmdir(de->d_inode, dchild); + rc = vfs_rmdir(dir, dchild); EXIT; break; default: - rc = vfs_unlink(de->d_inode, dchild); + rc = vfs_unlink(dir, dchild); EXIT; break; } @@ -268,7 +240,8 @@ static int mds_reint_link(struct mds_update_record *rec, GOTO(out_link, (rc = -EEXIST)); } - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE); + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE, + dchild->d_inode->i_sb->s_dev); rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild); EXIT; @@ -314,7 +287,8 @@ static int mds_reint_rename(struct mds_update_record *rec, GOTO(out_rename, (rc = -ESTALE)); } - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE); + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE, + de_srcdir->d_inode->i_sb->s_dev); rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new); EXIT; @@ -335,7 +309,7 @@ static mds_reinter reinters[REINT_MAX+1] = { [REINT_CREATE] mds_reint_create, [REINT_UNLINK] mds_reint_unlink, [REINT_LINK] mds_reint_link, - [REINT_RENAME] mds_reint_rename + [REINT_RENAME] mds_reint_rename, }; int mds_reint_rec(struct mds_update_record *rec, struct ptlrpc_request *req) -- 1.8.3.1