X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fllite%2Ffile.c;h=14e38cf7050c35306ea957d2579feb1ce9498d90;hb=a7811634d45319dc3812fa652c727367fa290a24;hp=8d04d4e277b1fa29e47520baf418d7a5197c5699;hpb=c42b426c87c3d3b1dc9eda612cc831293dc80d68;p=fs%2Flustre-release.git diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 8d04d4e..14e38cf 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -705,8 +705,7 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, oinfo.oi_md = lsm; oinfo.oi_oa = obdo; - oinfo.oi_oa->o_id = lsm->lsm_object_id; - oinfo.oi_oa->o_seq = lsm->lsm_object_seq; + oinfo.oi_oa->o_oi = lsm->lsm_oi; oinfo.oi_oa->o_mode = S_IFREG; oinfo.oi_oa->o_ioepoch = ioepoch; oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | @@ -756,10 +755,11 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo, capa, obdo, ioepoch, sync); capa_put(capa); if (rc == 0) { + struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi; + obdo_refresh_inode(inode, obdo, obdo->o_valid); - CDEBUG(D_INODE, - "objid "LPX64" size %llu, blocks %llu, blksize %lu\n", - lsm ? lsm->lsm_object_id : 0, i_size_read(inode), + CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu," + " blksize %lu\n", POSTID(oi), i_size_read(inode), (unsigned long long)inode->i_blocks, (unsigned long)ll_inode_blksize(inode)); } @@ -767,38 +767,47 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo, RETURN(rc); } -int ll_merge_lvb(struct inode *inode) +int ll_merge_lvb(const struct lu_env *env, struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct lov_stripe_md *lsm; + struct cl_object *obj = lli->lli_clob; + struct cl_attr *attr = ccc_env_thread_attr(env); struct ost_lvb lvb; int rc = 0; ENTRY; - lsm = ccc_inode_lsm_get(inode); ll_inode_size_lock(inode); + /* merge timestamps the most recently obtained from mds with + timestamps obtained from osts */ + LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime; + LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime; + LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime; inode_init_lvb(inode, &lvb); - /* merge timestamps the most resently obtained from mds with - timestamps obtained from osts */ - lvb.lvb_atime = lli->lli_lvb.lvb_atime; - lvb.lvb_mtime = lli->lli_lvb.lvb_mtime; - lvb.lvb_ctime = lli->lli_lvb.lvb_ctime; - if (lsm != NULL) { - rc = obd_merge_lvb(sbi->ll_dt_exp, lsm, &lvb, 0); - cl_isize_write_nolock(inode, lvb.lvb_size); + cl_object_attr_lock(obj); + rc = cl_object_attr_get(env, obj, attr); + cl_object_attr_unlock(obj); + + if (rc == 0) { + if (lvb.lvb_atime < attr->cat_atime) + lvb.lvb_atime = attr->cat_atime; + if (lvb.lvb_ctime < attr->cat_ctime) + lvb.lvb_ctime = attr->cat_ctime; + if (lvb.lvb_mtime < attr->cat_mtime) + lvb.lvb_mtime = attr->cat_mtime; CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n", - PFID(&lli->lli_fid), lvb.lvb_size); - inode->i_blocks = lvb.lvb_blocks; + PFID(&lli->lli_fid), attr->cat_size); + cl_isize_write_nolock(inode, attr->cat_size); + + inode->i_blocks = attr->cat_blocks; + + LTIME_S(inode->i_mtime) = lvb.lvb_mtime; + LTIME_S(inode->i_atime) = lvb.lvb_atime; + LTIME_S(inode->i_ctime) = lvb.lvb_ctime; } - LTIME_S(inode->i_mtime) = lvb.lvb_mtime; - LTIME_S(inode->i_atime) = lvb.lvb_atime; - LTIME_S(inode->i_ctime) = lvb.lvb_ctime; ll_inode_size_unlock(inode); - ccc_inode_lsm_put(inode, lsm); RETURN(rc); } @@ -827,7 +836,9 @@ void ll_io_init(struct cl_io *io, const struct file *file, int write) io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK; if (write) { io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND); - io->u.ci_wr.wr_sync = file->f_flags & O_SYNC || IS_SYNC(inode); + io->u.ci_wr.wr_sync = file->f_flags & O_SYNC || + file->f_flags & O_DIRECT || + IS_SYNC(inode); } io->ci_obj = ll_i2info(inode)->lli_clob; io->ci_lockreq = CILR_MAYBE; @@ -850,6 +861,7 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args, ssize_t result; ENTRY; +restart: io = ccc_env_thread_io(env); ll_io_init(io, file, iot == CIT_WRITE); @@ -908,6 +920,16 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args, GOTO(out, result); out: cl_io_fini(env, io); + /* If any bit been read/written (result != 0), we just return + * short read/write instead of restart io. */ + if (result == 0 && io->ci_need_restart) { + CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n", + iot == CIT_READ ? "read" : "write", + file->f_dentry->d_name.name, *ppos, count); + LASSERTF(io->u.ci_rw.crw_count == count, "%zd != %zd\n", + io->u.ci_rw.crw_count, count); + goto restart; + } if (iot == CIT_READ) { if (result >= 0) @@ -918,7 +940,7 @@ out: ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), LPROC_LL_WRITE_BYTES, result); fd->fd_write_failed = false; - } else { + } else if (result != -ERESTARTSYS) { fd->fd_write_failed = true; } } @@ -1669,8 +1691,7 @@ int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap, !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) GOTO(out, rc = -EOPNOTSUPP); - fm_key.oa.o_id = lsm->lsm_object_id; - fm_key.oa.o_seq = lsm->lsm_object_seq; + fm_key.oa.o_oi = lsm->lsm_oi; fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE); @@ -1726,6 +1747,7 @@ int ll_fid2path(struct inode *inode, void *arg) rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL); if (rc) GOTO(gf_free, rc); + if (copy_to_user(arg, gfout, outsize)) rc = -EFAULT; @@ -1832,13 +1854,163 @@ int ll_data_version(struct inode *inode, __u64 *data_version, RETURN(rc); } -long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +struct ll_swap_stack { + struct iattr ia1, ia2; + __u64 dv1, dv2; + struct inode *inode1, *inode2; + bool check_dv1, check_dv2; +}; + +static int ll_swap_layouts(struct file *file1, struct file *file2, + struct lustre_swap_layouts *lsl) { - struct inode *inode = file->f_dentry->d_inode; - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - int flags; + struct mdc_swap_layouts msl; + struct md_op_data *op_data; + __u32 gid; + __u64 dv; + struct ll_swap_stack *llss = NULL; + int rc, rc1; - ENTRY; + OBD_ALLOC_PTR(llss); + if (llss == NULL) + RETURN(-ENOMEM); + + llss->inode1 = file1->f_dentry->d_inode; + llss->inode2 = file2->f_dentry->d_inode; + + if (!S_ISREG(llss->inode2->i_mode)) + GOTO(free, rc = -EINVAL); + + if (ll_permission(llss->inode1, MAY_WRITE, NULL) || + ll_permission(llss->inode2, MAY_WRITE, NULL)) + GOTO(free, rc = -EPERM); + + if (llss->inode2->i_sb != llss->inode1->i_sb) + GOTO(free, rc = -EXDEV); + + /* we use 2 bool because it is easier to swap than 2 bits */ + if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1) + llss->check_dv1 = true; + + if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2) + llss->check_dv2 = true; + + /* we cannot use lsl->sl_dvX directly because we may swap them */ + llss->dv1 = lsl->sl_dv1; + llss->dv2 = lsl->sl_dv2; + + rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2)); + if (rc == 0) /* same file, done! */ + GOTO(free, rc = 0); + + if (rc < 0) { /* sequentialize it */ + swap(llss->inode1, llss->inode2); + swap(file1, file2); + swap(llss->dv1, llss->dv2); + swap(llss->check_dv1, llss->check_dv2); + } + + gid = lsl->sl_gid; + if (gid != 0) { /* application asks to flush dirty cache */ + rc = ll_get_grouplock(llss->inode1, file1, gid); + if (rc < 0) + GOTO(free, rc); + + rc = ll_get_grouplock(llss->inode2, file2, gid); + if (rc < 0) { + ll_put_grouplock(llss->inode1, file1, gid); + GOTO(free, rc); + } + } + + /* to be able to restore mtime and atime after swap + * we need to first save them */ + if (lsl->sl_flags & + (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) { + llss->ia1.ia_mtime = llss->inode1->i_mtime; + llss->ia1.ia_atime = llss->inode1->i_atime; + llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME; + llss->ia2.ia_mtime = llss->inode2->i_mtime; + llss->ia2.ia_atime = llss->inode2->i_atime; + llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME; + } + + /* ultimate check, before swaping the layouts we check if + * dataversion has changed (if requested) */ + if (llss->check_dv1) { + rc = ll_data_version(llss->inode1, &dv, 0); + if (rc) + GOTO(putgl, rc); + if (dv != llss->dv1) + GOTO(putgl, rc = -EAGAIN); + } + + if (llss->check_dv2) { + rc = ll_data_version(llss->inode2, &dv, 0); + if (rc) + GOTO(putgl, rc); + if (dv != llss->dv2) + GOTO(putgl, rc = -EAGAIN); + } + + /* struct md_op_data is used to send the swap args to the mdt + * only flags is missing, so we use struct mdc_swap_layouts + * through the md_op_data->op_data */ + /* flags from user space have to be converted before they are send to + * server, no flag is sent today, they are only used on the client */ + msl.msl_flags = 0; + rc = -ENOMEM; + op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0, + 0, LUSTRE_OPC_ANY, &msl); + if (op_data != NULL) { + rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, + ll_i2mdexp(llss->inode1), + sizeof(*op_data), op_data, NULL); + ll_finish_md_op_data(op_data); + } + +putgl: + if (gid != 0) { + ll_put_grouplock(llss->inode2, file2, gid); + ll_put_grouplock(llss->inode1, file1, gid); + } + + /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */ + if (rc != 0) + GOTO(free, rc); + + /* clear useless flags */ + if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) { + llss->ia1.ia_valid &= ~ATTR_MTIME; + llss->ia2.ia_valid &= ~ATTR_MTIME; + } + + if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) { + llss->ia1.ia_valid &= ~ATTR_ATIME; + llss->ia2.ia_valid &= ~ATTR_ATIME; + } + + /* update time if requested */ + rc = rc1 = 0; + if (llss->ia2.ia_valid != 0) + rc = ll_setattr(file1->f_dentry, &llss->ia2); + + if (llss->ia1.ia_valid != 0) + rc1 = ll_setattr(file2->f_dentry, &llss->ia1); + +free: + if (llss != NULL) + OBD_FREE_PTR(llss); + + RETURN(rc); +} + +long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = file->f_dentry->d_inode; + struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + int flags, rc; + ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino, inode->i_generation, inode, cmd); @@ -1878,6 +2050,27 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) RETURN(ll_lov_setstripe(inode, file, arg)); case LL_IOC_LOV_SETEA: RETURN(ll_lov_setea(inode, file, arg)); + case LL_IOC_LOV_SWAP_LAYOUTS: { + struct file *file2; + struct lustre_swap_layouts lsl; + + if (cfs_copy_from_user(&lsl, (char *)arg, + sizeof(struct lustre_swap_layouts))) + RETURN(-EFAULT); + + if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */ + RETURN(-EPERM); + + file2 = fget(lsl.sl_fd); + if (file2 == NULL) + RETURN(-EBADF); + + rc = -EPERM; + if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */ + rc = ll_swap_layouts(file, file2, &lsl); + fput(file2); + RETURN(rc); + } case LL_IOC_LOV_GETSTRIPE: RETURN(ll_lov_getstripe(inode, arg)); case LL_IOC_RECREATE_OBJ: @@ -2009,12 +2202,37 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) OBD_FREE_PTR(hss); RETURN(rc); } + case LL_IOC_HSM_ACTION: { + struct md_op_data *op_data; + struct hsm_current_action *hca; + int rc; + + OBD_ALLOC_PTR(hca); + if (hca == NULL) + RETURN(-ENOMEM); + + op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY, hca); + if (op_data == NULL) { + OBD_FREE_PTR(hca); + RETURN(-ENOMEM); + } + rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data), + op_data, NULL); + + if (cfs_copy_to_user((char *)arg, hca, sizeof(*hca))) + rc = -EFAULT; + + ll_finish_md_op_data(op_data); + OBD_FREE_PTR(hca); + RETURN(rc); + } default: { int err; if (LLIOC_STOP == - ll_iocontrol_call(inode, file, cmd, arg, &err)) + ll_iocontrol_call(inode, file, cmd, arg, &err)) RETURN(err); RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL, @@ -2110,7 +2328,7 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin) eof = i_size_read(inode); } - retval = generic_file_llseek_size(file, offset, origin, + retval = ll_generic_file_llseek_size(file, offset, origin, ll_file_maxbytes(inode), eof); RETURN(retval); } @@ -2193,15 +2411,25 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, RETURN(result); } +/* + * When dentry is provided (the 'else' case), *file->f_dentry may be + * null and dentry must be used directly rather than pulled from + * *file->f_dentry as is done otherwise. + */ + #ifdef HAVE_FILE_FSYNC_4ARGS int ll_fsync(struct file *file, loff_t start, loff_t end, int data) +{ + struct dentry *dentry = file->f_dentry; #elif defined(HAVE_FILE_FSYNC_2ARGS) int ll_fsync(struct file *file, int data) +{ + struct dentry *dentry = file->f_dentry; #else int ll_fsync(struct file *file, struct dentry *dentry, int data) -#endif { - struct inode *inode = file->f_dentry->d_inode; +#endif + struct inode *inode = dentry->d_inode; struct ll_inode_info *lli = ll_i2info(inode); struct ptlrpc_request *req; struct obd_capa *oc; @@ -2502,7 +2730,7 @@ int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it, /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC. * But under CMD case, it caused some lock issues, should be fixed * with new CMD ibits lock. See bug 12718 */ - if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) { + if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) { struct lookup_intent oit = { .it_op = IT_GETATTR }; struct md_op_data *op_data; @@ -3050,6 +3278,7 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode, rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm, lock->l_lvb_data, lock->l_lvb_len); if (rc >= 0) { + *gen = LL_LAYOUT_GEN_EMPTY; if (md.lsm != NULL) *gen = md.lsm->lsm_layout_gen; rc = 0; @@ -3125,11 +3354,11 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen) .ei_mode = LCK_CR, .ei_cb_bl = ll_md_blocking_ast, .ei_cb_cp = ldlm_completion_ast, - .ei_cbdata = inode }; + .ei_cbdata = NULL }; int rc; ENTRY; - *gen = LL_LAYOUT_GEN_ZERO; + *gen = LL_LAYOUT_GEN_NONE; if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK)) RETURN(0); @@ -3188,6 +3417,8 @@ again: ll_finish_md_op_data(op_data); + md_set_lock_data(sbi->ll_md_exp, &it.d.lustre.it_lock_handle, inode, NULL); + mode = it.d.lustre.it_lock_mode; it.d.lustre.it_lock_mode = 0; ll_intent_drop_lock(&it);