X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Ffile.c;h=3a66cab54ebe5fb1fc338f74402a7bdcdd7c8d99;hp=7aa00c9a8c30ea559e5acc3f1c14c37c9937c19f;hb=c084c6215851d238d14b0d414374b6b55c91f525;hpb=5d32447500107f5abd386400ec9cd8ab8a3d9706 diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 7aa00c9..3a66cab 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -1083,27 +1083,120 @@ static bool file_is_noatime(const struct file *file) return false; } -static void ll_io_init(struct cl_io *io, const struct file *file, int write) +static int ll_file_io_ptask(struct cfs_ptask *ptask); + +static void ll_io_init(struct cl_io *io, struct file *file, enum cl_io_type iot) { - struct inode *inode = file_inode((struct file *)file); - - io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK; - if (write) { - io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND); - io->u.ci_wr.wr_sync = file->f_flags & O_SYNC || - file->f_flags & O_DIRECT || - IS_SYNC(inode); - } - io->ci_obj = ll_i2info(inode)->lli_clob; - io->ci_lockreq = CILR_MAYBE; - if (ll_file_nolock(file)) { - io->ci_lockreq = CILR_NEVER; - io->ci_no_srvlock = 1; - } else if (file->f_flags & O_APPEND) { - io->ci_lockreq = CILR_MANDATORY; - } + struct inode *inode = file_inode(file); + memset(&io->u.ci_rw.rw_iter, 0, sizeof(io->u.ci_rw.rw_iter)); + init_sync_kiocb(&io->u.ci_rw.rw_iocb, file); + io->u.ci_rw.rw_file = file; + io->u.ci_rw.rw_ptask = ll_file_io_ptask; + io->u.ci_rw.rw_nonblock = !!(file->f_flags & O_NONBLOCK); + if (iot == CIT_WRITE) { + io->u.ci_rw.rw_append = !!(file->f_flags & O_APPEND); + io->u.ci_rw.rw_sync = !!(file->f_flags & O_SYNC || + file->f_flags & O_DIRECT || + IS_SYNC(inode)); + } + io->ci_obj = ll_i2info(inode)->lli_clob; + io->ci_lockreq = CILR_MAYBE; + if (ll_file_nolock(file)) { + io->ci_lockreq = CILR_NEVER; + io->ci_no_srvlock = 1; + } else if (file->f_flags & O_APPEND) { + io->ci_lockreq = CILR_MANDATORY; + } io->ci_noatime = file_is_noatime(file); + if (ll_i2sbi(inode)->ll_flags & LL_SBI_PIO) + io->ci_pio = !io->u.ci_rw.rw_append; + else + io->ci_pio = 0; +} + +static int ll_file_io_ptask(struct cfs_ptask *ptask) +{ + struct cl_io_pt *pt = ptask->pt_cbdata; + struct file *file = pt->cip_file; + struct lu_env *env; + struct cl_io *io; + loff_t pos = pt->cip_pos; + int rc; + __u16 refcheck; + ENTRY; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + RETURN(PTR_ERR(env)); + + CDEBUG(D_VFSTRACE, "%s: %s range: [%llu, %llu)\n", + file_dentry(file)->d_name.name, + pt->cip_iot == CIT_READ ? "read" : "write", + pos, pos + pt->cip_count); + +restart: + io = vvp_env_thread_io(env); + ll_io_init(io, file, pt->cip_iot); + io->u.ci_rw.rw_iter = pt->cip_iter; + io->u.ci_rw.rw_iocb = pt->cip_iocb; + io->ci_pio = 0; /* It's already in parallel task */ + + rc = cl_io_rw_init(env, io, pt->cip_iot, pos, + pt->cip_count - pt->cip_result); + if (!rc) { + struct vvp_io *vio = vvp_env_io(env); + + vio->vui_io_subtype = IO_NORMAL; + vio->vui_fd = LUSTRE_FPRIVATE(file); + + ll_cl_add(file, env, io, LCC_RW); + rc = cl_io_loop(env, io); + ll_cl_remove(file, env); + } else { + /* cl_io_rw_init() handled IO */ + rc = io->ci_result; + } + + if (OBD_FAIL_CHECK_RESET(OBD_FAIL_LLITE_PTASK_IO_FAIL, 0)) { + if (io->ci_nob > 0) + io->ci_nob /= 2; + rc = -EIO; + } + + if (io->ci_nob > 0) { + pt->cip_result += io->ci_nob; + iov_iter_advance(&pt->cip_iter, io->ci_nob); + pos += io->ci_nob; + pt->cip_iocb.ki_pos = pos; +#ifdef HAVE_KIOCB_KI_LEFT + pt->cip_iocb.ki_left = pt->cip_count - pt->cip_result; +#elif defined(HAVE_KI_NBYTES) + pt->cip_iocb.ki_nbytes = pt->cip_count - pt->cip_result; +#endif + } + + cl_io_fini(env, io); + + if ((rc == 0 || rc == -ENODATA) && + pt->cip_result < pt->cip_count && + io->ci_need_restart) { + CDEBUG(D_VFSTRACE, + "%s: restart %s range: [%llu, %llu) ret: %zd, rc: %d\n", + file_dentry(file)->d_name.name, + pt->cip_iot == CIT_READ ? "read" : "write", + pos, pos + pt->cip_count - pt->cip_result, + pt->cip_result, rc); + goto restart; + } + + CDEBUG(D_VFSTRACE, "%s: %s ret: %zd, rc: %d\n", + file_dentry(file)->d_name.name, + pt->cip_iot == CIT_READ ? "read" : "write", + pt->cip_result, rc); + + cl_env_put(env, &refcheck); + RETURN(pt->cip_result > 0 ? 0 : rc); } static ssize_t @@ -1111,39 +1204,45 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args, struct file *file, enum cl_io_type iot, loff_t *ppos, size_t count) { + struct range_lock range; struct vvp_io *vio = vvp_env_io(env); struct inode *inode = file_inode(file); struct ll_inode_info *lli = ll_i2info(inode); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); struct cl_io *io; + loff_t pos = *ppos; ssize_t result = 0; int rc = 0; - struct range_lock range; ENTRY; - CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zu\n", - file_dentry(file)->d_name.name, iot, *ppos, count); + CDEBUG(D_VFSTRACE, "%s: %s range: [%llu, %llu)\n", + file_dentry(file)->d_name.name, + iot == CIT_READ ? "read" : "write", pos, pos + count); restart: io = vvp_env_thread_io(env); - ll_io_init(io, file, iot == CIT_WRITE); + ll_io_init(io, file, iot); + if (args->via_io_subtype == IO_NORMAL) { + io->u.ci_rw.rw_iter = *args->u.normal.via_iter; + io->u.ci_rw.rw_iocb = *args->u.normal.via_iocb; + } else { + io->ci_pio = 0; + } - if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) { + if (cl_io_rw_init(env, io, iot, pos, count) == 0) { bool range_locked = false; if (file->f_flags & O_APPEND) range_lock_init(&range, 0, LUSTRE_EOF); else - range_lock_init(&range, *ppos, *ppos + count - 1); + range_lock_init(&range, pos, pos + count - 1); vio->vui_fd = LUSTRE_FPRIVATE(file); vio->vui_io_subtype = args->via_io_subtype; switch (vio->vui_io_subtype) { case IO_NORMAL: - vio->vui_iter = args->u.normal.via_iter; - vio->vui_iocb = args->u.normal.via_iocb; /* Direct IO reads must also take range lock, * or multiple reads will try to work on the same pages * See LU-6227 for details. */ @@ -1169,7 +1268,16 @@ restart: } ll_cl_add(file, env, io, LCC_RW); + if (io->ci_pio && iot == CIT_WRITE && !IS_NOSEC(inode) && + !lli->lli_inode_locked) { + inode_lock(inode); + lli->lli_inode_locked = 1; + } rc = cl_io_loop(env, io); + if (lli->lli_inode_locked) { + lli->lli_inode_locked = 0; + inode_unlock(inode); + } ll_cl_remove(file, env); if (range_locked) { @@ -1184,22 +1292,31 @@ restart: if (io->ci_nob > 0) { result += io->ci_nob; - count -= io->ci_nob; - *ppos = io->u.ci_wr.wr.crw_pos; /* for splice */ + count -= io->ci_nob; - /* prepare IO restart */ - if (count > 0 && args->via_io_subtype == IO_NORMAL) - args->u.normal.via_iter = vio->vui_iter; + if (args->via_io_subtype == IO_NORMAL) { + iov_iter_advance(args->u.normal.via_iter, io->ci_nob); + pos += io->ci_nob; + args->u.normal.via_iocb->ki_pos = pos; +#ifdef HAVE_KIOCB_KI_LEFT + args->u.normal.via_iocb->ki_left = count; +#elif defined(HAVE_KI_NBYTES) + args->u.normal.via_iocb->ki_nbytes = count; +#endif + } else { + /* for splice */ + pos = io->u.ci_rw.rw_range.cir_pos; + } } out: cl_io_fini(env, io); if ((rc == 0 || rc == -ENODATA) && count > 0 && io->ci_need_restart) { CDEBUG(D_VFSTRACE, - "%s: restart %s from %lld, count:%zu, result: %zd\n", - file_dentry(file)->d_name.name, - iot == CIT_READ ? "read" : "write", - *ppos, count, result); + "%s: restart %s range: [%llu, %llu) ret: %zd, rc: %d\n", + file_dentry(file)->d_name.name, + iot == CIT_READ ? "read" : "write", + pos, pos + count, result, rc); goto restart; } @@ -1223,7 +1340,11 @@ out: } } - CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result); + CDEBUG(D_VFSTRACE, "%s: %s *ppos: %llu, pos: %llu, ret: %zd, rc: %d\n", + file_dentry(file)->d_name.name, + iot == CIT_READ ? "read" : "write", *ppos, pos, result, rc); + + *ppos = pos; RETURN(result > 0 ? result : rc); } @@ -1264,8 +1385,7 @@ out: * \retval - number of bytes have been read, or error code if error occurred. */ static ssize_t -ll_do_fast_read(const struct lu_env *env, struct kiocb *iocb, - struct iov_iter *iter) +ll_do_fast_read(struct kiocb *iocb, struct iov_iter *iter) { ssize_t result; @@ -1277,9 +1397,7 @@ ll_do_fast_read(const struct lu_env *env, struct kiocb *iocb, if (iocb->ki_filp->f_flags & O_DIRECT) return 0; - ll_cl_add(iocb->ki_filp, env, NULL, LCC_RW); result = generic_file_read_iter(iocb, iter); - ll_cl_remove(iocb->ki_filp, env); /* If the first page is not in cache, generic_file_aio_read() will be * returned with -ENODATA. @@ -1305,14 +1423,14 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) ssize_t rc2; __u16 refcheck; + result = ll_do_fast_read(iocb, to); + if (result < 0 || iov_iter_count(to) == 0) + GOTO(out, result); + env = cl_env_get(&refcheck); if (IS_ERR(env)) return PTR_ERR(env); - result = ll_do_fast_read(env, iocb, to); - if (result < 0 || iov_iter_count(to) == 0) - GOTO(out, result); - args = ll_env_args(env, IO_NORMAL); args->u.normal.via_iter = to; args->u.normal.via_iocb = iocb; @@ -1324,8 +1442,8 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) else if (result == 0) result = rc2; -out: cl_env_put(env, &refcheck); +out: return result; } @@ -1411,30 +1529,22 @@ static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov, static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct lu_env *env; struct iovec iov = { .iov_base = buf, .iov_len = count }; - struct kiocb *kiocb; + struct kiocb kiocb; ssize_t result; - __u16 refcheck; ENTRY; - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - RETURN(PTR_ERR(env)); - - kiocb = &ll_env_info(env)->lti_kiocb; - init_sync_kiocb(kiocb, file); - kiocb->ki_pos = *ppos; + init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = *ppos; #ifdef HAVE_KIOCB_KI_LEFT - kiocb->ki_left = count; + kiocb.ki_left = count; #elif defined(HAVE_KI_NBYTES) - kiocb->ki_nbytes = count; + kiocb.i_nbytes = count; #endif - result = ll_file_aio_read(kiocb, &iov, 1, kiocb->ki_pos); - *ppos = kiocb->ki_pos; + result = ll_file_aio_read(&kiocb, &iov, 1, kiocb.ki_pos); + *ppos = kiocb.ki_pos; - cl_env_put(env, &refcheck); RETURN(result); } @@ -1642,7 +1752,7 @@ out: } static int ll_lov_setea(struct inode *inode, struct file *file, - unsigned long arg) + void __user *arg) { __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE; struct lov_user_md *lump; @@ -1658,7 +1768,7 @@ static int ll_lov_setea(struct inode *inode, struct file *file, if (lump == NULL) RETURN(-ENOMEM); - if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) + if (copy_from_user(lump, arg, lum_size)) GOTO(out_lump, rc = -EFAULT); rc = ll_lov_setstripe_ea_info(inode, file_dentry(file), flags, lump, @@ -1670,8 +1780,7 @@ out_lump: RETURN(rc); } -static int ll_file_getstripe(struct inode *inode, - struct lov_user_md __user *lum) +static int ll_file_getstripe(struct inode *inode, void __user *lum, size_t size) { struct lu_env *env; __u16 refcheck; @@ -1682,13 +1791,13 @@ static int ll_file_getstripe(struct inode *inode, if (IS_ERR(env)) RETURN(PTR_ERR(env)); - rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum); + rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum, size); cl_env_put(env, &refcheck); RETURN(rc); } static int ll_lov_setstripe(struct inode *inode, struct file *file, - unsigned long arg) + void __user *arg) { struct lov_user_md __user *lum = (struct lov_user_md __user *)arg; struct lov_user_md *klum; @@ -1703,7 +1812,22 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file, lum_size = rc; rc = ll_lov_setstripe_ea_info(inode, file_dentry(file), flags, klum, lum_size); + if (!rc) { + __u32 gen; + + rc = put_user(0, &lum->lmm_stripe_count); + if (rc) + GOTO(out, rc); + + rc = ll_layout_refresh(inode, &gen); + if (rc) + GOTO(out, rc); + + rc = ll_file_getstripe(inode, arg, lum_size); + } cl_lov_delay_create_clear(&file->f_flags); + +out: OBD_FREE(klum, lum_size); RETURN(rc); } @@ -2461,16 +2585,17 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) fd->fd_flags &= ~flags; } RETURN(0); - case LL_IOC_LOV_SETSTRIPE: - RETURN(ll_lov_setstripe(inode, file, arg)); - case LL_IOC_LOV_SETEA: - RETURN(ll_lov_setea(inode, file, arg)); + case LL_IOC_LOV_SETSTRIPE: + case LL_IOC_LOV_SETSTRIPE_NEW: + RETURN(ll_lov_setstripe(inode, file, (void __user *)arg)); + case LL_IOC_LOV_SETEA: + RETURN(ll_lov_setea(inode, file, (void __user *)arg)); case LL_IOC_LOV_SWAP_LAYOUTS: { struct file *file2; struct lustre_swap_layouts lsl; if (copy_from_user(&lsl, (char __user *)arg, - sizeof(struct lustre_swap_layouts))) + sizeof(struct lustre_swap_layouts))) RETURN(-EFAULT); if ((file->f_flags & O_ACCMODE) == O_RDONLY) @@ -2511,8 +2636,8 @@ out: RETURN(rc); } case LL_IOC_LOV_GETSTRIPE: - RETURN(ll_file_getstripe(inode, - (struct lov_user_md __user *)arg)); + case LL_IOC_LOV_GETSTRIPE_NEW: + RETURN(ll_file_getstripe(inode, (void __user *)arg, 0)); case FSFILT_IOC_GETFLAGS: case FSFILT_IOC_SETFLAGS: RETURN(ll_iocontrol(inode, file, cmd, arg)); @@ -3007,6 +3132,7 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct dentry *dentry = file_dentry(file); + bool lock_inode; #elif defined(HAVE_FILE_FSYNC_2ARGS) int ll_fsync(struct file *file, int datasync) { @@ -3031,7 +3157,9 @@ int ll_fsync(struct file *file, struct dentry *dentry, int datasync) #ifdef HAVE_FILE_FSYNC_4ARGS rc = filemap_write_and_wait_range(inode->i_mapping, start, end); - inode_lock(inode); + lock_inode = !lli->lli_inode_locked; + if (lock_inode) + inode_lock(inode); #else /* fsync's caller has already called _fdata{sync,write}, we want * that IO to finish before calling the osc and mdc sync methods */ @@ -3071,7 +3199,8 @@ int ll_fsync(struct file *file, struct dentry *dentry, int datasync) } #ifdef HAVE_FILE_FSYNC_4ARGS - inode_unlock(inode); + if (lock_inode) + inode_unlock(inode); #endif RETURN(rc); } @@ -3688,7 +3817,7 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; stat->ctime = inode->i_ctime; - stat->blksize = 1 << inode->i_blkbits; + stat->blksize = sbi->ll_stat_blksize ?: 1 << inode->i_blkbits; stat->nlink = inode->i_nlink; stat->size = i_size_read(inode); @@ -3749,6 +3878,59 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type) RETURN(acl); } +#ifdef HAVE_IOP_SET_ACL +#ifdef CONFIG_FS_POSIX_ACL +int ll_set_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + const char *name = NULL; + char *value = NULL; + size_t size = 0; + int rc = 0; + ENTRY; + + switch (type) { + case ACL_TYPE_ACCESS: + if (acl) { + rc = posix_acl_update_mode(inode, &inode->i_mode, &acl); + if (rc) + GOTO(out, rc); + } + name = XATTR_NAME_POSIX_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + if (!S_ISDIR(inode->i_mode)) + GOTO(out, rc = acl ? -EACCES : 0); + name = XATTR_NAME_POSIX_ACL_DEFAULT; + break; + default: + GOTO(out, rc = -EINVAL); + } + + if (acl) { + size = posix_acl_xattr_size(acl->a_count); + value = kmalloc(size, GFP_NOFS); + if (value == NULL) + GOTO(out, rc = -ENOMEM); + + rc = posix_acl_to_xattr(&init_user_ns, acl, value, size); + if (rc < 0) + GOTO(out_free, rc); + } + + /* dentry is only used for *.lov attributes so it's safe to be NULL */ + rc = __vfs_setxattr(NULL, inode, name, value, size, XATTR_CREATE); +out_free: + kfree(value); +out: + if (!rc) + set_cached_acl(inode, type, acl); + else + forget_cached_acl(inode, type); + RETURN(rc); +} +#endif /* CONFIG_FS_POSIX_ACL */ +#endif /* HAVE_IOP_SET_ACL */ + #ifndef HAVE_GENERIC_PERMISSION_2ARGS static int # ifdef HAVE_GENERIC_PERMISSION_4ARGS @@ -3942,14 +4124,19 @@ struct inode_operations ll_file_inode_operations = { .setattr = ll_setattr, .getattr = ll_getattr, .permission = ll_inode_permission, +#ifdef HAVE_IOP_XATTR .setxattr = ll_setxattr, .getxattr = ll_getxattr, - .listxattr = ll_listxattr, .removexattr = ll_removexattr, +#endif + .listxattr = ll_listxattr, .fiemap = ll_fiemap, #ifdef HAVE_IOP_GET_ACL .get_acl = ll_get_acl, #endif +#ifdef HAVE_IOP_SET_ACL + .set_acl = ll_set_acl, +#endif }; /* dynamic ioctl number support routins */