X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmds%2Fmds_reint.c;h=2695aba409d5e58a203652922845d9d275556800;hb=bcb47c4c689a1f5935a915dc6b3f7267040965e4;hp=3389057bb8929b61248b8c12651f8c58cc43e57f;hpb=ba2815641da6dfff347b093bb5b5a235dfa2cd7a;p=fs%2Flustre-release.git diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 3389057..2695aba 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -37,116 +36,107 @@ #include #include -extern struct ptlrpc_request *mds_prep_req(int size, int opcode, int namelen, char *name, int tgtlen, char *tgt); +int mds_update_last_rcvd(struct mds_obd *mds, struct ptlrpc_request *req) +{ + return 0; +} -static int mds_reint_setattr(struct mds_update_record *rec, struct ptlrpc_request *req) +static int mds_reint_setattr(struct mds_update_record *rec, + struct ptlrpc_request *req) { + struct mds_obd *mds = &req->rq_obd->u.mds; struct dentry *de; - struct inode *inode; + void *handle; + int rc = 0; - de = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid1, NULL); + de = mds_fid2dentry(mds, rec->ur_fid1, NULL); if (IS_ERR(de) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_SETATTR)) { - req->rq_rephdr->status = -ESTALE; - RETURN(0); + GOTO(out_setattr, rc = -ESTALE); } - inode = de->d_inode; - CDEBUG(D_INODE, "ino %ld\n", inode->i_ino); - - /* a _really_ horrible hack to avoid removing the data stored - in the block pointers; this data is the object id - this will go into an extended attribute at some point. - */ - if ( rec->ur_iattr.ia_valid & ATTR_SIZE ) { - /* ATTR_SIZE would invoke truncate: clear it */ - rec->ur_iattr.ia_valid &= ~ATTR_SIZE; - inode->i_size = rec->ur_iattr.ia_size; - - /* an _even_more_ horrible hack to make this hack work with - * ext3. This is because ext3 keeps a separate inode size - * until the inode is committed to ensure consistency. This - * will also go away with the move to EAs. - */ - if (!strcmp(inode->i_sb->s_type->name, "ext3")) - inode->u.ext3_i.i_disksize = inode->i_size; - - /* make sure _something_ gets set - so new inode - goes to disk (probably won't work over XFS */ - if (!rec->ur_iattr.ia_valid & ATTR_MODE) { - rec->ur_iattr.ia_valid |= ATTR_MODE; - rec->ur_iattr.ia_mode = inode->i_mode; - } - } - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE); - if ( inode->i_op->setattr ) { - req->rq_rephdr->status = - inode->i_op->setattr(de, &rec->ur_iattr); - } else { - req->rq_rephdr->status = - inode_setattr(inode, &rec->ur_iattr); - } + CDEBUG(D_INODE, "ino %ld\n", de->d_inode->i_ino); - l_dput(de); - RETURN(0); -} + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE, + de->d_inode->i_sb->s_dev); -/* - XXX nasty hack: store the object id in the first two - direct block spots -*/ -static inline void mds_store_objid(struct inode *inode, __u64 *id) -{ - /* FIXME: it is only by luck that this works on ext3 */ - memcpy(&inode->u.ext2_i.i_data, id, sizeof(*id)); -} + handle = mds_fs_start(mds, de->d_inode, MDS_FSOP_SETATTR); + if (!handle) + GOTO(out_setattr_de, rc = PTR_ERR(handle)); + rc = mds_fs_setattr(mds, de, handle, &rec->ur_iattr); + + if (!rc) + rc = mds_update_last_rcvd(mds, req); + EXIT; + + /* FIXME: keep rc intact */ + rc = mds_fs_commit(mds, de->d_inode, handle); +out_setattr_de: + l_dput(de); +out_setattr: + req->rq_status = rc; + return(0); +} static int mds_reint_create(struct mds_update_record *rec, struct ptlrpc_request *req) { - int type = rec->ur_mode & S_IFMT; struct dentry *de = NULL; - struct mds_rep *rep = req->rq_rep.mds; + struct mds_obd *mds = &req->rq_obd->u.mds; struct dentry *dchild = NULL; - int rc; + struct inode *dir; + void *handle; + int rc = 0, type = rec->ur_mode & S_IFMT; ENTRY; - de = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid1, NULL); + de = mds_fid2dentry(mds, rec->ur_fid1, NULL); if (IS_ERR(de) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE)) { LBUG(); - GOTO(out_reint_create, (rc = -ESTALE)); + GOTO(out_create_de, rc = -ESTALE); } - CDEBUG(D_INODE, "ino %ld\n", de->d_inode->i_ino); + dir = de->d_inode; + CDEBUG(D_INODE, "ino %ld\n", dir->i_ino); + down(&dir->i_sem); dchild = lookup_one_len(rec->ur_name, de, rec->ur_namelen - 1); if (IS_ERR(dchild)) { CERROR("child lookup error %ld\n", PTR_ERR(dchild)); + up(&dir->i_sem); LBUG(); - GOTO(out_reint_create, (rc = -ESTALE)); + GOTO(out_create_dchild, rc = -ESTALE); } if (dchild->d_inode) { CERROR("child exists (dir %ld, name %s)\n", - de->d_inode->i_ino, rec->ur_name); + dir->i_ino, rec->ur_name); LBUG(); - GOTO(out_reint_create, (rc = -EEXIST)); + GOTO(out_create_dchild, rc = -EEXIST); } - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE); + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE, dir->i_sb->s_dev); switch (type) { case S_IFREG: { - rc = vfs_create(de->d_inode, dchild, rec->ur_mode); + handle = mds_fs_start(mds, dir, MDS_FSOP_CREATE); + if (!handle) + GOTO(out_create_dchild, PTR_ERR(handle)); + rc = vfs_create(dir, dchild, rec->ur_mode); EXIT; break; } case S_IFDIR: { - rc = vfs_mkdir(de->d_inode, dchild, rec->ur_mode); + handle = mds_fs_start(mds, dir, MDS_FSOP_MKDIR); + if (!handle) + GOTO(out_create_dchild, PTR_ERR(handle)); + rc = vfs_mkdir(dir, dchild, rec->ur_mode); EXIT; break; } case S_IFLNK: { - rc = vfs_symlink(de->d_inode, dchild, rec->ur_tgt); + handle = mds_fs_start(mds, dir, MDS_FSOP_SYMLINK); + if (!handle) + GOTO(out_create_dchild, PTR_ERR(handle)); + rc = vfs_symlink(dir, dchild, rec->ur_tgt); EXIT; break; } @@ -155,28 +145,63 @@ static int mds_reint_create(struct mds_update_record *rec, case S_IFIFO: case S_IFSOCK: { int rdev = rec->ur_id; - rc = vfs_mknod(de->d_inode, dchild, rec->ur_mode, rdev); + handle = mds_fs_start(mds, dir, MDS_FSOP_MKNOD); + if (!handle) + GOTO(out_create_dchild, PTR_ERR(handle)); + rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev); EXIT; break; } + default: + CERROR("bad file type %d for create of %s\n",type,rec->ur_name); + GOTO(out_create_dchild, rc = -EINVAL); } - if (!rc) { - if (type == S_IFREG) - mds_store_objid(dchild->d_inode, &rec->ur_id); - dchild->d_inode->i_atime = rec->ur_time; - dchild->d_inode->i_ctime = rec->ur_time; - dchild->d_inode->i_mtime = rec->ur_time; - dchild->d_inode->i_uid = rec->ur_uid; - dchild->d_inode->i_gid = rec->ur_gid; - rep->ino = dchild->d_inode->i_ino; + if (rc) { + CERROR("error during create: %d\n", rc); + LBUG(); + GOTO(out_create_commit, rc); + } else { + struct iattr iattr; + struct inode *inode = dchild->d_inode; + struct mds_body *body; + + if (type == S_IFREG) { + rc = mds_fs_set_objid(mds, inode, handle, rec->ur_id); + if (rc) + CERROR("error %d setting objid for %ld\n", + rc, inode->i_ino); + } + + iattr.ia_atime = rec->ur_time; + iattr.ia_ctime = rec->ur_time; + iattr.ia_mtime = rec->ur_time; + iattr.ia_uid = rec->ur_uid; + iattr.ia_gid = rec->ur_gid; + iattr.ia_valid = ATTR_UID | ATTR_GID | ATTR_ATIME | + ATTR_MTIME | ATTR_CTIME; + + rc = mds_fs_setattr(mds, dchild, handle, &iattr); + /* XXX should we abort here in case of error? */ + + body = lustre_msg_buf(req->rq_repmsg, 0); + body->ino = inode->i_ino; + body->generation = inode->i_generation; } -out_reint_create: - req->rq_rephdr->status = rc; - l_dput(de); + if (!rc) + rc = mds_update_last_rcvd(mds, req); + +out_create_commit: + /* FIXME: keep rc intact */ + rc = mds_fs_commit(mds, dir, handle); +out_create_dchild: l_dput(dchild); - RETURN(0); + up(&dir->i_sem); +out_create_de: + l_dput(de); + req->rq_status = rc; + return 0; } static int mds_reint_unlink(struct mds_update_record *rec, @@ -184,48 +209,80 @@ static int mds_reint_unlink(struct mds_update_record *rec, { struct dentry *de = NULL; struct dentry *dchild = NULL; + struct mds_obd *mds = &req->rq_obd->u.mds; + struct inode *dir, *inode; + void *handle; int rc = 0; ENTRY; - de = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid1, NULL); + de = mds_fid2dentry(mds, rec->ur_fid1, NULL); if (IS_ERR(de) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK)) { LBUG(); - GOTO(out_unlink, (rc = -ESTALE)); + GOTO(out_unlink, rc = -ESTALE); } - CDEBUG(D_INODE, "ino %ld\n", de->d_inode->i_ino); + dir = de->d_inode; + CDEBUG(D_INODE, "ino %ld\n", dir->i_ino); + down(&dir->i_sem); dchild = lookup_one_len(rec->ur_name, de, rec->ur_namelen - 1); if (IS_ERR(dchild)) { CERROR("child lookup error %ld\n", PTR_ERR(dchild)); LBUG(); - GOTO(out_unlink, (rc = -ESTALE)); + GOTO(out_unlink_de, rc = -ESTALE); } - if (!dchild->d_inode) { + inode = dchild->d_inode; + if (!inode) { CERROR("child doesn't exist (dir %ld, name %s\n", - de->d_inode->i_ino, rec->ur_name); + dir->i_ino, rec->ur_name); + LBUG(); + GOTO(out_unlink_dchild, rc = -ESTALE); + } + + if (inode->i_ino != rec->ur_fid2->id) { + CERROR("inode and FID ID do not match (%ld != %Ld)\n", + inode->i_ino, rec->ur_fid2->id); LBUG(); - GOTO(out_unlink, (rc = -ESTALE)); + GOTO(out_unlink_dchild, rc = -ESTALE); + } + if (inode->i_generation != rec->ur_fid2->generation) { + CERROR("inode and FID GENERATION do not match (%d != %d)\n", + inode->i_generation, rec->ur_fid2->generation); + LBUG(); + GOTO(out_unlink_dchild, rc = -ESTALE); } - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE); + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE, dir->i_sb->s_dev); switch (dchild->d_inode->i_mode & S_IFMT) { case S_IFDIR: - rc = vfs_rmdir(de->d_inode, dchild); - EXIT; + handle = mds_fs_start(mds, dir, MDS_FSOP_RMDIR); + if (!handle) + GOTO(out_unlink_dchild, rc = PTR_ERR(handle)); + rc = vfs_rmdir(dir, dchild); break; default: - rc = vfs_unlink(de->d_inode, dchild); - EXIT; + handle = mds_fs_start(mds, dir, MDS_FSOP_UNLINK); + if (!handle) + GOTO(out_unlink_dchild, rc = PTR_ERR(handle)); + rc = vfs_unlink(dir, dchild); break; } -out_unlink: - req->rq_rephdr->status = rc; - l_dput(de); + if (!rc) + rc = mds_update_last_rcvd(mds, req); + /* FIXME: keep rc intact */ + rc = mds_fs_commit(mds, dir, handle); + + EXIT; +out_unlink_dchild: l_dput(dchild); - RETURN(0); +out_unlink_de: + up(&dir->i_sem); + l_dput(de); +out_unlink: + req->rq_status = rc; + return 0; } static int mds_reint_link(struct mds_update_record *rec, @@ -234,45 +291,62 @@ static int mds_reint_link(struct mds_update_record *rec, struct dentry *de_src = NULL; struct dentry *de_tgt_dir = NULL; struct dentry *dchild = NULL; + struct mds_obd *mds = &req->rq_obd->u.mds; + void *handle; int rc = 0; ENTRY; - de_src = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid1, NULL); + de_src = mds_fid2dentry(mds, rec->ur_fid1, NULL); if (IS_ERR(de_src) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_LINK)) { - GOTO(out_link, (rc = -ESTALE)); + GOTO(out_link, rc = -ESTALE); } - de_tgt_dir = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid2, NULL); + de_tgt_dir = mds_fid2dentry(mds, rec->ur_fid2, NULL); if (IS_ERR(de_tgt_dir)) { - GOTO(out_link, (rc = -ESTALE)); + GOTO(out_link_de_src, rc = -ESTALE); } + down(&de_tgt_dir->d_inode->i_sem); dchild = lookup_one_len(rec->ur_name, de_tgt_dir, rec->ur_namelen - 1); if (IS_ERR(dchild)) { CERROR("child lookup error %ld\n", PTR_ERR(dchild)); - GOTO(out_link, (rc = -ESTALE)); + GOTO(out_link_de_tgt_dir, rc = -ESTALE); } if (dchild->d_inode) { CERROR("child exists (dir %ld, name %s\n", de_tgt_dir->d_inode->i_ino, rec->ur_name); - GOTO(out_link, (rc = -EEXIST)); + GOTO(out_link_dchild, rc = -EEXIST); } - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE); + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE, + dchild->d_inode->i_sb->s_dev); + + handle = mds_fs_start(mds, de_tgt_dir->d_inode, MDS_FSOP_LINK); + if (!handle) + GOTO(out_link_dchild, rc = PTR_ERR(handle)); rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild); + + if (!rc) + rc = mds_update_last_rcvd(mds, req); + + /* FIXME: keep rc intact */ + rc = mds_fs_commit(mds, de_tgt_dir->d_inode, handle); EXIT; - out_link: - req->rq_rephdr->status = rc; - l_dput(de_src); - l_dput(de_tgt_dir); +out_link_dchild: l_dput(dchild); +out_link_de_tgt_dir: + up(&de_tgt_dir->d_inode->i_sem); + l_dput(de_tgt_dir); +out_link_de_src: + l_dput(de_src); +out_link: + req->rq_status = rc; return 0; } - static int mds_reint_rename(struct mds_update_record *rec, struct ptlrpc_request *req) { @@ -280,73 +354,87 @@ static int mds_reint_rename(struct mds_update_record *rec, struct dentry *de_tgtdir = NULL; struct dentry *de_old = NULL; struct dentry *de_new = NULL; + struct mds_obd *mds = &req->rq_obd->u.mds; + void *handle; int rc = 0; ENTRY; - de_srcdir = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid1, NULL); + de_srcdir = mds_fid2dentry(mds, rec->ur_fid1, NULL); if (IS_ERR(de_srcdir)) { - GOTO(out_rename, (rc = -ESTALE)); + GOTO(out_rename, rc = -ESTALE); } - de_tgtdir = mds_fid2dentry(&req->rq_obd->u.mds, rec->ur_fid2, NULL); + de_tgtdir = mds_fid2dentry(mds, rec->ur_fid2, NULL); if (IS_ERR(de_tgtdir)) { - GOTO(out_rename, (rc = -ESTALE)); + GOTO(out_rename_srcdir, rc = -ESTALE); } de_old = lookup_one_len(rec->ur_name, de_srcdir, rec->ur_namelen - 1); if (IS_ERR(de_old)) { - CERROR("child lookup error %ld\n", PTR_ERR(de_old)); - GOTO(out_rename, (rc = -ESTALE)); + CERROR("old child lookup error %ld\n", PTR_ERR(de_old)); + GOTO(out_rename_tgtdir, rc = -ESTALE); } de_new = lookup_one_len(rec->ur_tgt, de_tgtdir, rec->ur_tgtlen - 1); if (IS_ERR(de_new)) { - CERROR("child lookup error %ld\n", PTR_ERR(de_new)); - GOTO(out_rename, (rc = -ESTALE)); + CERROR("new child lookup error %ld\n", PTR_ERR(de_new)); + GOTO(out_rename_deold, rc = -ESTALE); } - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE); + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE, + de_srcdir->d_inode->i_sb->s_dev); + handle = mds_fs_start(mds, de_tgtdir->d_inode, MDS_FSOP_RENAME); + if (!handle) + GOTO(out_rename_denew, rc = PTR_ERR(handle)); rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new); + + if (!rc) + rc = mds_update_last_rcvd(mds, req); + + /* FIXME: keep rc intact */ + rc = mds_fs_commit(mds, de_tgtdir->d_inode, handle); EXIT; - out_rename: - req->rq_rephdr->status = rc; +out_rename_denew: l_dput(de_new); +out_rename_deold: l_dput(de_old); +out_rename_tgtdir: l_dput(de_tgtdir); +out_rename_srcdir: l_dput(de_srcdir); +out_rename: + req->rq_status = rc; return 0; } typedef int (*mds_reinter)(struct mds_update_record *, struct ptlrpc_request*); -static mds_reinter reinters[REINT_MAX+1] = { +static mds_reinter reinters[REINT_MAX+1] = { [REINT_SETATTR] mds_reint_setattr, [REINT_CREATE] mds_reint_create, [REINT_UNLINK] mds_reint_unlink, [REINT_LINK] mds_reint_link, - [REINT_RENAME] mds_reint_rename + [REINT_RENAME] mds_reint_rename, }; int mds_reint_rec(struct mds_update_record *rec, struct ptlrpc_request *req) { - int rc; + int rc, size = sizeof(struct mds_body); - if (rec->ur_opcode < 0 || rec->ur_opcode > REINT_MAX) { + if (rec->ur_opcode < 1 || rec->ur_opcode > REINT_MAX) { CERROR("opcode %d not valid\n", rec->ur_opcode); rc = req->rq_status = -EINVAL; RETURN(rc); } - rc = mds_pack_rep(NULL, 0, NULL, 0, &req->rq_rephdr, &req->rq_rep, - &req->rq_replen, &req->rq_repbuf); + rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); if (rc) { CERROR("mds: out of memory\n"); rc = req->rq_status = -ENOMEM; RETURN(rc); } - req->rq_rephdr->xid = req->rq_reqhdr->xid; rc = reinters[rec->ur_opcode](rec, req); return rc;