Whamcloud - gitweb
All of the "hack" and FIXME stuff in MDS is now handled by fs-specific methods
authoradilger <adilger>
Fri, 29 Mar 2002 08:28:39 +0000 (08:28 +0000)
committeradilger <adilger>
Fri, 29 Mar 2002 08:28:39 +0000 (08:28 +0000)
which are selected at mount time.  This includes journaling support.

Changes OBD_FAIL_WRITE() to include the block device which should be
disabled.  We conditionally compile the "make device readonly" support.

lustre/include/linux/obd_support.h
lustre/mds/handler.c
lustre/mds/mds_reint.c

index 14aad36..ceffa9a 100644 (file)
@@ -86,23 +86,36 @@ enum {
 #define OBD_FAIL_MDS_ALL_NET 0x01000000
 #define OBD_FAIL_OST_ALL_NET 0x02000000
 
-#define OBD_FAIL_CHECK(id)      ((obd_fail_loc & OBD_FAIL_MASK_LOC) == (id))
+#define OBD_FAIL_CHECK(id)   ((obd_fail_loc & OBD_FAIL_MASK_LOC) == (id) &&  \
+                              ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!=\
+                                (OBD_FAILED | OBD_FAIL_ONCE)))
 
 #define OBD_FAIL_RETURN(id, ret)                                             \
 do {                                                                         \
         if (OBD_FAIL_CHECK(id)) {                                            \
-                CERROR("obd_fail_loc=%d, fail operation rc=%d\n", id, ret);  \
+                CERROR("obd_fail_loc=%x, fail operation rc=%d\n", id, ret);  \
+                obd_fail_loc |= OBD_FAILED;                                  \
                 RETURN(ret);                                                 \
         }                                                                    \
 } while(0)
 
-#define OBD_FAIL_WRITE(id)                                                   \
-do {                                                                         \
-        if (OBD_FAIL_CHECK(id)) {                                            \
-                CERROR("obd_fail_loc=%d, fail write operation\n", id);       \
-                /* FIXME: do something bad here */                           \
-        }                                                                    \
-} while (0)
+#include <linux/blkdev.h>
+
+static inline void OBD_FAIL_WRITE(int id, kdev_t dev)
+{
+        if (OBD_FAIL_CHECK(id)) {
+#ifdef CONFIG_DEV_RDONLY
+                CERROR("obd_fail_loc=%x, fail write operation on %s\n",
+                       id, bdevname(dev));
+                dev_set_rdonly(dev, 2);
+#else
+                CERROR("obd_fail_loc=%x, can't fail write operation on %s\n",
+                       id, bdevname(dev));
+#endif
+                /* We set FAIL_ONCE because we never "un-fail" a device */
+                obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE;
+        }
+}
 
 #define OBD_ALLOC(ptr, size)                                    \
 do {                                                            \
index 49452f3..00c47ab 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/fs.h>
 #include <linux/stat.h>
 #include <linux/locks.h>
-#include <linux/ext2_fs.h>
 #include <linux/quotaops.h>
 #include <asm/unistd.h>
 #include <asm/uaccess.h>
@@ -34,9 +33,6 @@
 #include <linux/lustre_lib.h>
 #include <linux/lustre_net.h>
 
-struct buffer_head *ext3_bread(void *handle, struct inode *inode,
-                               int block, int create, int *err);
-
 int mds_sendpage(struct ptlrpc_request *req, struct file *file,
                  __u64 offset, struct niobuf *dst)
 {
@@ -46,34 +42,12 @@ int mds_sendpage(struct ptlrpc_request *req, struct file *file,
         OBD_FAIL_RETURN(OBD_FAIL_MDS_SENDPAGE, -EIO);
 
         if (req->rq_peer.peer_nid == 0) {
-                struct inode *inode = file->f_dentry->d_inode;
+                /* dst->addr is a user address, but in a different task! */
                 char *buf = (char *)(long)dst->addr;
 
-                /* dst->addr is a user address, but in a different task! */
                 set_fs(KERNEL_DS);
-                /* FIXME: we need to use ext3_bread because ext3 does not
-                 *        have the directories in page cache yet.  If we
-                 *        just use generic_file_read() then the pages we
-                 *        get are in a different address space than those
-                 *        used by the filesystem == cache incoherency.
-                 */
-                if (S_ISREG(inode->i_mode))
-                        rc = file->f_op->read(file, buf, PAGE_SIZE, &offset);
-                else if (!strcmp(inode->i_sb->s_type->name, "ext3")) {
-                        struct buffer_head *bh;
-
-                        bh = ext3_bread(NULL, inode,
-                                        offset >> inode->i_sb->s_blocksize_bits,
-                                        0, &rc);
-
-                        if (bh) {
-                                memcpy(buf, bh->b_data, inode->i_blksize);
-                                brelse(bh);
-                                rc = inode->i_blksize;
-                        }
-                } else
-                        rc = generic_file_read(file, buf, PAGE_SIZE, &offset);
-
+                rc = mds_fs_readpage(&req->rq_obd->u.mds, file, buf, PAGE_SIZE,
+                                     &offset);
                 set_fs(oldfs);
 
                 if (rc != PAGE_SIZE) {
@@ -82,7 +56,6 @@ int mds_sendpage(struct ptlrpc_request *req, struct file *file,
                 }
                 EXIT;
         } else {
-                struct inode *inode = file->f_dentry->d_inode;
                 struct ptlrpc_bulk_desc *bulk;
                 char *buf;
 
@@ -101,23 +74,8 @@ int mds_sendpage(struct ptlrpc_request *req, struct file *file,
                 }
 
                 set_fs(KERNEL_DS);
-                /* FIXME: see comments above */
-                if (S_ISREG(inode->i_mode))
-                        rc = file->f_op->read(file, buf, PAGE_SIZE, &offset);
-                else if (!strcmp(inode->i_sb->s_type->name, "ext3")) {
-                        struct buffer_head *bh;
-
-                        bh = ext3_bread(NULL, inode, offset >> inode->i_blkbits,
-                                        0, &rc);
-
-                        if (bh) {
-                                memcpy(buf, bh->b_data, inode->i_blksize);
-                                brelse(bh);
-                                rc = inode->i_blksize;
-                        }
-                } else
-                        rc = generic_file_read(file, buf, PAGE_SIZE, &offset);
-
+                rc = mds_fs_readpage(&req->rq_obd->u.mds, file, buf, PAGE_SIZE,
+                                     &offset);
                 set_fs(oldfs);
 
                 if (rc != PAGE_SIZE) {
@@ -210,17 +168,12 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
         return result;
 }
 
-static inline void mds_get_objid(struct inode *inode, __u64 *id)
-{
-        /* FIXME: it is only by luck that this works on ext3 */
-        memcpy(id, &inode->u.ext2_i.i_data, sizeof(*id));
-}
-
 int mds_getattr(struct ptlrpc_request *req)
 {
         struct dentry *de;
         struct inode *inode;
         struct mds_rep *rep;
+        struct mds_obd *mds = &req->rq_obd->u.mds;
         int rc;
 
         rc = mds_pack_rep(NULL, 0, NULL, 0, &req->rq_rephdr, &req->rq_rep,
@@ -234,7 +187,7 @@ int mds_getattr(struct ptlrpc_request *req)
         req->rq_rephdr->xid = req->rq_reqhdr->xid;
         rep = req->rq_rep.mds;
 
-        de = mds_fid2dentry(&req->rq_obd->u.mds, &req->rq_req.mds->fid1, NULL);
+        de = mds_fid2dentry(mds, &req->rq_req.mds->fid1, NULL);
         if (IS_ERR(de)) {
                 req->rq_rephdr->status = -ENOENT;
                 RETURN(0);
@@ -252,7 +205,7 @@ int mds_getattr(struct ptlrpc_request *req)
         rep->mode = inode->i_mode;
         rep->nlink = inode->i_nlink;
         rep->valid = ~0;
-        mds_get_objid(inode, &rep->objid);
+        mds_fs_get_objid(mds, inode, &rep->objid);
         dput(de);
         return 0;
 }
@@ -485,20 +438,43 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
         int err;
         ENTRY;
 
+        MOD_INC_USE_COUNT;
         mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, NULL);
         err = PTR_ERR(mnt);
         if (IS_ERR(mnt)) {
                 CERROR("do_kern_mount failed: %d\n", err);
+                MOD_DEC_USE_COUNT;
                 RETURN(err);
         }
 
         mds->mds_sb = mnt->mnt_root->d_inode->i_sb;
-        if (!mds->mds_sb)
+        if (!mds->mds_sb) {
+                MOD_DEC_USE_COUNT;
                 RETURN(-ENODEV);
+        }
 
         mds->mds_vfsmnt = mnt;
         mds->mds_fstype = strdup(data->ioc_inlbuf2);
 
+        if (!strcmp(mds->mds_fstype, "ext3"))
+                mds->mds_fsops = &mds_ext3_fs_ops;
+        else if (!strcmp(mds->mds_fstype, "ext2"))
+                mds->mds_fsops = &mds_ext2_fs_ops;
+        else {
+                CERROR("unsupported MDS filesystem type %s\n", mds->mds_fstype);
+                kfree(mds->mds_fstype);
+                MOD_DEC_USE_COUNT;
+                RETURN(-EPERM);
+        }
+
+        /*
+         * Replace the client filesystem delete_inode method with our own,
+         * so that we can clear the object ID before the inode is deleted.
+         * The fs_delete_inode method will call cl_delete_inode for us.
+         */
+        mds->mds_fsops->cl_delete_inode = mds->mds_sb->s_op->delete_inode;
+        mds->mds_sb->s_op->delete_inode = mds->mds_fsops->fs_delete_inode;
+
         mds->mds_ctxt.pwdmnt = mnt;
         mds->mds_ctxt.pwd = mnt->mnt_root;
         mds->mds_ctxt.fs = KERNEL_DS;
@@ -514,8 +490,8 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
         err = ptlrpc_start_thread(obddev, mds->mds_service, "lustre_mds");
         if (err)
                 CERROR("cannot start thread\n");
+                /* FIXME: do we need to MOD_DEC_USE_COUNT here? */
 
-        MOD_INC_USE_COUNT;
         RETURN(0);
 }
 
index 5ee12fb..dbfa18c 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/fs.h>
 #include <linux/stat.h>
 #include <linux/locks.h>
-#include <linux/ext2_fs.h>
 #include <linux/quotaops.h>
 #include <asm/unistd.h>
 #include <asm/uaccess.h>
@@ -43,8 +42,9 @@ static int mds_reint_setattr(struct mds_update_record *rec, struct ptlrpc_reques
 {
         struct dentry *de;
         struct inode *inode;
+        struct mds_obd *mds = &req->rq_obd->u.mds;
 
-        de = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid1, NULL);
+        de = mds_fid2dentry(mds, rec->ur_fid1, NULL);
         if (IS_ERR(de) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_SETATTR)) {
                 req->rq_rephdr->status = -ESTALE;
                 RETURN(0);
@@ -53,70 +53,38 @@ static int mds_reint_setattr(struct mds_update_record *rec, struct ptlrpc_reques
         inode = de->d_inode;
         CDEBUG(D_INODE, "ino %ld\n", inode->i_ino);
 
-        /* a _really_ horrible hack to avoid removing the data stored
-           in the block pointers; this data is the object id
-           this will go into an extended attribute at some point.
-        */
-        if ( rec->ur_iattr.ia_valid & ATTR_SIZE ) {
-                /* ATTR_SIZE would invoke truncate: clear it */
-                rec->ur_iattr.ia_valid &= ~ATTR_SIZE;
-                inode->i_size = rec->ur_iattr.ia_size;
-
-                /* an _even_more_ horrible hack to make this hack work with
-                 * ext3.  This is because ext3 keeps a separate inode size
-                 * until the inode is committed to ensure consistency.  This
-                 * will also go away with the move to EAs.
-                 */
-                if (!strcmp(inode->i_sb->s_type->name, "ext3"))
-                        inode->u.ext3_i.i_disksize = inode->i_size;
-
-                /* make sure _something_ gets set - so new inode
-                   goes to disk (probably won't work over XFS */
-                if (!rec->ur_iattr.ia_valid & ATTR_MODE) {
-                        rec->ur_iattr.ia_valid |= ATTR_MODE;
-                        rec->ur_iattr.ia_mode = inode->i_mode;
-                }
-        }
-        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE);
-        if ( inode->i_op->setattr ) {
-                req->rq_rephdr->status =
-                        inode->i_op->setattr(de, &rec->ur_iattr);
-        } else {
-                req->rq_rephdr->status =
-                        inode_setattr(inode, &rec->ur_iattr);
-        }
+        mds_fs_setattr(mds, inode, NULL, &rec->ur_iattr);
+
+        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE, inode->i_sb->s_dev);
+
+        if (inode->i_op->setattr)
+                req->rq_rephdr->status = inode->i_op->setattr(de, &rec->ur_iattr);
+        else
+                req->rq_rephdr->status = inode_setattr(inode, &rec->ur_iattr);
 
         l_dput(de);
         RETURN(0);
 }
 
-/*
-   XXX nasty hack: store the object id in the first two
-   direct block spots
-*/
-static inline void mds_store_objid(struct inode *inode, __u64 *id)
-{
-        /* FIXME: it is only by luck that this works on ext3 */
-        memcpy(&inode->u.ext2_i.i_data, id, sizeof(*id));
-}
-
-
 static int mds_reint_create(struct mds_update_record *rec,
                             struct ptlrpc_request *req)
 {
         int type = rec->ur_mode & S_IFMT;
         struct dentry *de = NULL;
         struct mds_rep *rep = req->rq_rep.mds;
+        struct mds_obd *mds = &req->rq_obd->u.mds;
         struct dentry *dchild = NULL;
+        struct inode *dir;
         int rc = 0;
         ENTRY;
 
-        de = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid1, NULL);
+        de = mds_fid2dentry(mds, rec->ur_fid1, NULL);
         if (IS_ERR(de) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE)) {
                 LBUG();
                 GOTO(out_reint_create, (rc = -ESTALE));
         }
-        CDEBUG(D_INODE, "ino %ld\n", de->d_inode->i_ino);
+        dir = de->d_inode;
+        CDEBUG(D_INODE, "ino %ld\n", dir->i_ino);
 
         dchild = lookup_one_len(rec->ur_name, de, rec->ur_namelen - 1);
         if (IS_ERR(dchild)) {
@@ -127,26 +95,26 @@ static int mds_reint_create(struct mds_update_record *rec,
 
         if (dchild->d_inode) {
                 CERROR("child exists (dir %ld, name %s)\n",
-                       de->d_inode->i_ino, rec->ur_name);
+                       dir->i_ino, rec->ur_name);
                 LBUG();
                 GOTO(out_reint_create, (rc = -EEXIST));
         }
 
-        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE);
+        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE, dir->i_sb->s_dev);
 
         switch (type) {
         case S_IFREG: {
-                rc = vfs_create(de->d_inode, dchild, rec->ur_mode);
+                rc = vfs_create(dir, dchild, rec->ur_mode);
                 EXIT;
                 break;
         }
         case S_IFDIR: {
-                rc = vfs_mkdir(de->d_inode, dchild, rec->ur_mode);
+                rc = vfs_mkdir(dir, dchild, rec->ur_mode);
                 EXIT;
                 break;
         }
         case S_IFLNK: {
-                rc = vfs_symlink(de->d_inode, dchild, rec->ur_tgt);
+                rc = vfs_symlink(dir, dchild, rec->ur_tgt);
                 EXIT;
                 break;
         }
@@ -155,7 +123,7 @@ static int mds_reint_create(struct mds_update_record *rec,
         case S_IFIFO:
         case S_IFSOCK: {
                 int rdev = rec->ur_id;
-                rc = vfs_mknod(de->d_inode, dchild, rec->ur_mode, rdev);
+                rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev);
                 EXIT;
                 break;
         }
@@ -163,7 +131,8 @@ static int mds_reint_create(struct mds_update_record *rec,
 
         if (!rc) {
                 if (type == S_IFREG)
-                        mds_store_objid(dchild->d_inode, &rec->ur_id);
+                        rc = mds_fs_set_objid(mds, dchild->d_inode,
+                                              NULL, rec->ur_id);
                 dchild->d_inode->i_atime = rec->ur_time;
                 dchild->d_inode->i_ctime = rec->ur_time;
                 dchild->d_inode->i_mtime = rec->ur_time;
@@ -188,15 +157,18 @@ static int mds_reint_unlink(struct mds_update_record *rec,
 {
         struct dentry *de = NULL;
         struct dentry *dchild = NULL;
+        struct mds_obd *mds = &req->rq_obd->u.mds;
+        struct inode *dir;
         int rc = 0;
         ENTRY;
 
-        de = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid1, NULL);
+        de = mds_fid2dentry(mds, rec->ur_fid1, NULL);
         if (IS_ERR(de) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK)) {
                 LBUG();
                 GOTO(out_unlink, (rc = -ESTALE));
         }
-        CDEBUG(D_INODE, "ino %ld\n", de->d_inode->i_ino);
+        dir = de->d_inode;
+        CDEBUG(D_INODE, "ino %ld\n", dir->i_ino);
 
         dchild = lookup_one_len(rec->ur_name, de, rec->ur_namelen - 1);
         if (IS_ERR(dchild)) {
@@ -207,7 +179,7 @@ static int mds_reint_unlink(struct mds_update_record *rec,
 
         if (!dchild->d_inode) {
                 CERROR("child doesn't exist (dir %ld, name %s\n",
-                       de->d_inode->i_ino, rec->ur_name);
+                       dir->i_ino, rec->ur_name);
                 LBUG();
                 GOTO(out_unlink, (rc = -ESTALE));
         }
@@ -217,15 +189,15 @@ static int mds_reint_unlink(struct mds_update_record *rec,
         if (dchild->d_inode->i_generation != rec->ur_fid2->generation)
                 LBUG();
 
-        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE);
+        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE, dir->i_sb->s_dev);
 
         switch (dchild->d_inode->i_mode & S_IFMT) {
         case S_IFDIR:
-                rc = vfs_rmdir(de->d_inode, dchild);
+                rc = vfs_rmdir(dir, dchild);
                 EXIT;
                 break;
         default:
-                rc = vfs_unlink(de->d_inode, dchild);
+                rc = vfs_unlink(dir, dchild);
                 EXIT;
                 break;
         }
@@ -268,7 +240,8 @@ static int mds_reint_link(struct mds_update_record *rec,
                 GOTO(out_link, (rc = -EEXIST));
         }
 
-        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE);
+        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE,
+                       dchild->d_inode->i_sb->s_dev);
 
         rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild);
         EXIT;
@@ -314,7 +287,8 @@ static int mds_reint_rename(struct mds_update_record *rec,
                 GOTO(out_rename, (rc = -ESTALE));
         }
 
-        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE);
+        OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE,
+                       de_srcdir->d_inode->i_sb->s_dev);
 
         rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new);
         EXIT;
@@ -335,7 +309,7 @@ static mds_reinter  reinters[REINT_MAX+1] = {
         [REINT_CREATE]    mds_reint_create,
         [REINT_UNLINK]    mds_reint_unlink,
         [REINT_LINK]      mds_reint_link,
-        [REINT_RENAME]    mds_reint_rename
+        [REINT_RENAME]    mds_reint_rename,
 };
 
 int mds_reint_rec(struct mds_update_record *rec, struct ptlrpc_request *req)