#define OBD_FAIL_MDS_ALL_NET 0x01000000
#define OBD_FAIL_OST_ALL_NET 0x02000000
-#define OBD_FAIL_CHECK(id) ((obd_fail_loc & OBD_FAIL_MASK_LOC) == (id))
+#define OBD_FAIL_CHECK(id) ((obd_fail_loc & OBD_FAIL_MASK_LOC) == (id) && \
+ ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!=\
+ (OBD_FAILED | OBD_FAIL_ONCE)))
#define OBD_FAIL_RETURN(id, ret) \
do { \
if (OBD_FAIL_CHECK(id)) { \
- CERROR("obd_fail_loc=%d, fail operation rc=%d\n", id, ret); \
+ CERROR("obd_fail_loc=%x, fail operation rc=%d\n", id, ret); \
+ obd_fail_loc |= OBD_FAILED; \
RETURN(ret); \
} \
} while(0)
-#define OBD_FAIL_WRITE(id) \
-do { \
- if (OBD_FAIL_CHECK(id)) { \
- CERROR("obd_fail_loc=%d, fail write operation\n", id); \
- /* FIXME: do something bad here */ \
- } \
-} while (0)
+#include <linux/blkdev.h>
+
+static inline void OBD_FAIL_WRITE(int id, kdev_t dev)
+{
+ if (OBD_FAIL_CHECK(id)) {
+#ifdef CONFIG_DEV_RDONLY
+ CERROR("obd_fail_loc=%x, fail write operation on %s\n",
+ id, bdevname(dev));
+ dev_set_rdonly(dev, 2);
+#else
+ CERROR("obd_fail_loc=%x, can't fail write operation on %s\n",
+ id, bdevname(dev));
+#endif
+ /* We set FAIL_ONCE because we never "un-fail" a device */
+ obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE;
+ }
+}
#define OBD_ALLOC(ptr, size) \
do { \
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/locks.h>
-#include <linux/ext2_fs.h>
#include <linux/quotaops.h>
#include <asm/unistd.h>
#include <asm/uaccess.h>
#include <linux/lustre_lib.h>
#include <linux/lustre_net.h>
-struct buffer_head *ext3_bread(void *handle, struct inode *inode,
- int block, int create, int *err);
-
int mds_sendpage(struct ptlrpc_request *req, struct file *file,
__u64 offset, struct niobuf *dst)
{
OBD_FAIL_RETURN(OBD_FAIL_MDS_SENDPAGE, -EIO);
if (req->rq_peer.peer_nid == 0) {
- struct inode *inode = file->f_dentry->d_inode;
+ /* dst->addr is a user address, but in a different task! */
char *buf = (char *)(long)dst->addr;
- /* dst->addr is a user address, but in a different task! */
set_fs(KERNEL_DS);
- /* FIXME: we need to use ext3_bread because ext3 does not
- * have the directories in page cache yet. If we
- * just use generic_file_read() then the pages we
- * get are in a different address space than those
- * used by the filesystem == cache incoherency.
- */
- if (S_ISREG(inode->i_mode))
- rc = file->f_op->read(file, buf, PAGE_SIZE, &offset);
- else if (!strcmp(inode->i_sb->s_type->name, "ext3")) {
- struct buffer_head *bh;
-
- bh = ext3_bread(NULL, inode,
- offset >> inode->i_sb->s_blocksize_bits,
- 0, &rc);
-
- if (bh) {
- memcpy(buf, bh->b_data, inode->i_blksize);
- brelse(bh);
- rc = inode->i_blksize;
- }
- } else
- rc = generic_file_read(file, buf, PAGE_SIZE, &offset);
-
+ rc = mds_fs_readpage(&req->rq_obd->u.mds, file, buf, PAGE_SIZE,
+ &offset);
set_fs(oldfs);
if (rc != PAGE_SIZE) {
}
EXIT;
} else {
- struct inode *inode = file->f_dentry->d_inode;
struct ptlrpc_bulk_desc *bulk;
char *buf;
}
set_fs(KERNEL_DS);
- /* FIXME: see comments above */
- if (S_ISREG(inode->i_mode))
- rc = file->f_op->read(file, buf, PAGE_SIZE, &offset);
- else if (!strcmp(inode->i_sb->s_type->name, "ext3")) {
- struct buffer_head *bh;
-
- bh = ext3_bread(NULL, inode, offset >> inode->i_blkbits,
- 0, &rc);
-
- if (bh) {
- memcpy(buf, bh->b_data, inode->i_blksize);
- brelse(bh);
- rc = inode->i_blksize;
- }
- } else
- rc = generic_file_read(file, buf, PAGE_SIZE, &offset);
-
+ rc = mds_fs_readpage(&req->rq_obd->u.mds, file, buf, PAGE_SIZE,
+ &offset);
set_fs(oldfs);
if (rc != PAGE_SIZE) {
return result;
}
-static inline void mds_get_objid(struct inode *inode, __u64 *id)
-{
- /* FIXME: it is only by luck that this works on ext3 */
- memcpy(id, &inode->u.ext2_i.i_data, sizeof(*id));
-}
-
int mds_getattr(struct ptlrpc_request *req)
{
struct dentry *de;
struct inode *inode;
struct mds_rep *rep;
+ struct mds_obd *mds = &req->rq_obd->u.mds;
int rc;
rc = mds_pack_rep(NULL, 0, NULL, 0, &req->rq_rephdr, &req->rq_rep,
req->rq_rephdr->xid = req->rq_reqhdr->xid;
rep = req->rq_rep.mds;
- de = mds_fid2dentry(&req->rq_obd->u.mds, &req->rq_req.mds->fid1, NULL);
+ de = mds_fid2dentry(mds, &req->rq_req.mds->fid1, NULL);
if (IS_ERR(de)) {
req->rq_rephdr->status = -ENOENT;
RETURN(0);
rep->mode = inode->i_mode;
rep->nlink = inode->i_nlink;
rep->valid = ~0;
- mds_get_objid(inode, &rep->objid);
+ mds_fs_get_objid(mds, inode, &rep->objid);
dput(de);
return 0;
}
int err;
ENTRY;
+ MOD_INC_USE_COUNT;
mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, NULL);
err = PTR_ERR(mnt);
if (IS_ERR(mnt)) {
CERROR("do_kern_mount failed: %d\n", err);
+ MOD_DEC_USE_COUNT;
RETURN(err);
}
mds->mds_sb = mnt->mnt_root->d_inode->i_sb;
- if (!mds->mds_sb)
+ if (!mds->mds_sb) {
+ MOD_DEC_USE_COUNT;
RETURN(-ENODEV);
+ }
mds->mds_vfsmnt = mnt;
mds->mds_fstype = strdup(data->ioc_inlbuf2);
+ if (!strcmp(mds->mds_fstype, "ext3"))
+ mds->mds_fsops = &mds_ext3_fs_ops;
+ else if (!strcmp(mds->mds_fstype, "ext2"))
+ mds->mds_fsops = &mds_ext2_fs_ops;
+ else {
+ CERROR("unsupported MDS filesystem type %s\n", mds->mds_fstype);
+ kfree(mds->mds_fstype);
+ MOD_DEC_USE_COUNT;
+ RETURN(-EPERM);
+ }
+
+ /*
+ * Replace the client filesystem delete_inode method with our own,
+ * so that we can clear the object ID before the inode is deleted.
+ * The fs_delete_inode method will call cl_delete_inode for us.
+ */
+ mds->mds_fsops->cl_delete_inode = mds->mds_sb->s_op->delete_inode;
+ mds->mds_sb->s_op->delete_inode = mds->mds_fsops->fs_delete_inode;
+
mds->mds_ctxt.pwdmnt = mnt;
mds->mds_ctxt.pwd = mnt->mnt_root;
mds->mds_ctxt.fs = KERNEL_DS;
err = ptlrpc_start_thread(obddev, mds->mds_service, "lustre_mds");
if (err)
CERROR("cannot start thread\n");
+ /* FIXME: do we need to MOD_DEC_USE_COUNT here? */
- MOD_INC_USE_COUNT;
RETURN(0);
}
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/locks.h>
-#include <linux/ext2_fs.h>
#include <linux/quotaops.h>
#include <asm/unistd.h>
#include <asm/uaccess.h>
{
struct dentry *de;
struct inode *inode;
+ struct mds_obd *mds = &req->rq_obd->u.mds;
- de = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid1, NULL);
+ de = mds_fid2dentry(mds, rec->ur_fid1, NULL);
if (IS_ERR(de) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_SETATTR)) {
req->rq_rephdr->status = -ESTALE;
RETURN(0);
inode = de->d_inode;
CDEBUG(D_INODE, "ino %ld\n", inode->i_ino);
- /* a _really_ horrible hack to avoid removing the data stored
- in the block pointers; this data is the object id
- this will go into an extended attribute at some point.
- */
- if ( rec->ur_iattr.ia_valid & ATTR_SIZE ) {
- /* ATTR_SIZE would invoke truncate: clear it */
- rec->ur_iattr.ia_valid &= ~ATTR_SIZE;
- inode->i_size = rec->ur_iattr.ia_size;
-
- /* an _even_more_ horrible hack to make this hack work with
- * ext3. This is because ext3 keeps a separate inode size
- * until the inode is committed to ensure consistency. This
- * will also go away with the move to EAs.
- */
- if (!strcmp(inode->i_sb->s_type->name, "ext3"))
- inode->u.ext3_i.i_disksize = inode->i_size;
-
- /* make sure _something_ gets set - so new inode
- goes to disk (probably won't work over XFS */
- if (!rec->ur_iattr.ia_valid & ATTR_MODE) {
- rec->ur_iattr.ia_valid |= ATTR_MODE;
- rec->ur_iattr.ia_mode = inode->i_mode;
- }
- }
- OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE);
- if ( inode->i_op->setattr ) {
- req->rq_rephdr->status =
- inode->i_op->setattr(de, &rec->ur_iattr);
- } else {
- req->rq_rephdr->status =
- inode_setattr(inode, &rec->ur_iattr);
- }
+ mds_fs_setattr(mds, inode, NULL, &rec->ur_iattr);
+
+ OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE, inode->i_sb->s_dev);
+
+ if (inode->i_op->setattr)
+ req->rq_rephdr->status = inode->i_op->setattr(de, &rec->ur_iattr);
+ else
+ req->rq_rephdr->status = inode_setattr(inode, &rec->ur_iattr);
l_dput(de);
RETURN(0);
}
-/*
- XXX nasty hack: store the object id in the first two
- direct block spots
-*/
-static inline void mds_store_objid(struct inode *inode, __u64 *id)
-{
- /* FIXME: it is only by luck that this works on ext3 */
- memcpy(&inode->u.ext2_i.i_data, id, sizeof(*id));
-}
-
-
static int mds_reint_create(struct mds_update_record *rec,
struct ptlrpc_request *req)
{
int type = rec->ur_mode & S_IFMT;
struct dentry *de = NULL;
struct mds_rep *rep = req->rq_rep.mds;
+ struct mds_obd *mds = &req->rq_obd->u.mds;
struct dentry *dchild = NULL;
+ struct inode *dir;
int rc = 0;
ENTRY;
- de = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid1, NULL);
+ de = mds_fid2dentry(mds, rec->ur_fid1, NULL);
if (IS_ERR(de) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE)) {
LBUG();
GOTO(out_reint_create, (rc = -ESTALE));
}
- CDEBUG(D_INODE, "ino %ld\n", de->d_inode->i_ino);
+ dir = de->d_inode;
+ CDEBUG(D_INODE, "ino %ld\n", dir->i_ino);
dchild = lookup_one_len(rec->ur_name, de, rec->ur_namelen - 1);
if (IS_ERR(dchild)) {
if (dchild->d_inode) {
CERROR("child exists (dir %ld, name %s)\n",
- de->d_inode->i_ino, rec->ur_name);
+ dir->i_ino, rec->ur_name);
LBUG();
GOTO(out_reint_create, (rc = -EEXIST));
}
- OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE);
+ OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE, dir->i_sb->s_dev);
switch (type) {
case S_IFREG: {
- rc = vfs_create(de->d_inode, dchild, rec->ur_mode);
+ rc = vfs_create(dir, dchild, rec->ur_mode);
EXIT;
break;
}
case S_IFDIR: {
- rc = vfs_mkdir(de->d_inode, dchild, rec->ur_mode);
+ rc = vfs_mkdir(dir, dchild, rec->ur_mode);
EXIT;
break;
}
case S_IFLNK: {
- rc = vfs_symlink(de->d_inode, dchild, rec->ur_tgt);
+ rc = vfs_symlink(dir, dchild, rec->ur_tgt);
EXIT;
break;
}
case S_IFIFO:
case S_IFSOCK: {
int rdev = rec->ur_id;
- rc = vfs_mknod(de->d_inode, dchild, rec->ur_mode, rdev);
+ rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev);
EXIT;
break;
}
if (!rc) {
if (type == S_IFREG)
- mds_store_objid(dchild->d_inode, &rec->ur_id);
+ rc = mds_fs_set_objid(mds, dchild->d_inode,
+ NULL, rec->ur_id);
dchild->d_inode->i_atime = rec->ur_time;
dchild->d_inode->i_ctime = rec->ur_time;
dchild->d_inode->i_mtime = rec->ur_time;
{
struct dentry *de = NULL;
struct dentry *dchild = NULL;
+ struct mds_obd *mds = &req->rq_obd->u.mds;
+ struct inode *dir;
int rc = 0;
ENTRY;
- de = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid1, NULL);
+ de = mds_fid2dentry(mds, rec->ur_fid1, NULL);
if (IS_ERR(de) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK)) {
LBUG();
GOTO(out_unlink, (rc = -ESTALE));
}
- CDEBUG(D_INODE, "ino %ld\n", de->d_inode->i_ino);
+ dir = de->d_inode;
+ CDEBUG(D_INODE, "ino %ld\n", dir->i_ino);
dchild = lookup_one_len(rec->ur_name, de, rec->ur_namelen - 1);
if (IS_ERR(dchild)) {
if (!dchild->d_inode) {
CERROR("child doesn't exist (dir %ld, name %s\n",
- de->d_inode->i_ino, rec->ur_name);
+ dir->i_ino, rec->ur_name);
LBUG();
GOTO(out_unlink, (rc = -ESTALE));
}
if (dchild->d_inode->i_generation != rec->ur_fid2->generation)
LBUG();
- OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE);
+ OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE, dir->i_sb->s_dev);
switch (dchild->d_inode->i_mode & S_IFMT) {
case S_IFDIR:
- rc = vfs_rmdir(de->d_inode, dchild);
+ rc = vfs_rmdir(dir, dchild);
EXIT;
break;
default:
- rc = vfs_unlink(de->d_inode, dchild);
+ rc = vfs_unlink(dir, dchild);
EXIT;
break;
}
GOTO(out_link, (rc = -EEXIST));
}
- OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE);
+ OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE,
+ dchild->d_inode->i_sb->s_dev);
rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild);
EXIT;
GOTO(out_rename, (rc = -ESTALE));
}
- OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE);
+ OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE,
+ de_srcdir->d_inode->i_sb->s_dev);
rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new);
EXIT;
[REINT_CREATE] mds_reint_create,
[REINT_UNLINK] mds_reint_unlink,
[REINT_LINK] mds_reint_link,
- [REINT_RENAME] mds_reint_rename
+ [REINT_RENAME] mds_reint_rename,
};
int mds_reint_rec(struct mds_update_record *rec, struct ptlrpc_request *req)