X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Ffile.c;h=a9708882d319be200714f9555a62c665da5331d6;hp=41af22a11ac1d60e2885d1aa41a11ae202045a34;hb=f9fe2977d184fbc8e345c4a77f4596d059b7d118;hpb=3ffa5d680f0092ae51ffa84bd94a9983f9a8c99e diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 41af22a..a970888 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -105,7 +105,16 @@ static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data, op_data->op_attr.ia_atime = inode->i_atime; op_data->op_attr.ia_mtime = inode->i_mtime; op_data->op_attr.ia_ctime = inode->i_ctime; - op_data->op_attr.ia_size = i_size_read(inode); + /* In case of encrypted file without the key, visible size was rounded + * up to next LUSTRE_ENCRYPTION_UNIT_SIZE, and clear text size was + * stored into lli_lazysize in ll_merge_attr(), so set proper file size + * now that we are closing. + */ + if (llcrypt_require_key(inode) == -ENOKEY && + ll_i2info(inode)->lli_attr_valid & OBD_MD_FLLAZYSIZE) + op_data->op_attr.ia_size = ll_i2info(inode)->lli_lazysize; + else + op_data->op_attr.ia_size = i_size_read(inode); op_data->op_attr.ia_valid |= (ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET | ATTR_MTIME | ATTR_MTIME_SET | ATTR_CTIME); @@ -318,28 +327,39 @@ static int ll_md_close(struct inode *inode, struct file *file) if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid); + mutex_lock(&lli->lli_och_mutex); if (fd->fd_lease_och != NULL) { bool lease_broken; + struct obd_client_handle *lease_och; + + lease_och = fd->fd_lease_och; + fd->fd_lease_och = NULL; + mutex_unlock(&lli->lli_och_mutex); /* Usually the lease is not released when the * application crashed, we need to release here. */ - rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken); + rc = ll_lease_close(lease_och, inode, &lease_broken); + + mutex_lock(&lli->lli_och_mutex); + CDEBUG_LIMIT(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n", PFID(&lli->lli_fid), rc, lease_broken); - - fd->fd_lease_och = NULL; } if (fd->fd_och != NULL) { - rc = ll_close_inode_openhandle(inode, fd->fd_och, 0, NULL); + struct obd_client_handle *och; + + och = fd->fd_och; fd->fd_och = NULL; + mutex_unlock(&lli->lli_och_mutex); + + rc = ll_close_inode_openhandle(inode, och, 0, NULL); GOTO(out, rc); } /* Let's see if we have good enough OPEN lock on the file and if we can skip talking to MDS */ - mutex_lock(&lli->lli_och_mutex); if (fd->fd_omode & FMODE_WRITE) { lockmode = LCK_CW; LASSERT(lli->lli_open_fd_write_count); @@ -628,7 +648,7 @@ retry: } op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode, - name, len, 0, LUSTRE_OPC_ANY, NULL); + name, len, 0, LUSTRE_OPC_OPEN, NULL); if (IS_ERR(op_data)) { kfree(name); RETURN(PTR_ERR(op_data)); @@ -636,6 +656,8 @@ retry: op_data->op_data = lmm; op_data->op_data_size = lmmsize; + OBD_FAIL_TIMEOUT(OBD_FAIL_LLITE_OPEN_DELAY, cfs_fail_val); + rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req, &ll_md_blocking_ast, 0); kfree(name); @@ -671,8 +693,11 @@ retry: * of kernel will deal with that later. */ ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, &bits); - if (bits & MDS_INODELOCK_LOOKUP) + if (bits & MDS_INODELOCK_LOOKUP) { d_lustre_revalidate(de); + ll_update_dir_depth(parent->d_inode, de->d_inode); + } + /* if DoM bit returned along with LAYOUT bit then there * can be read-on-open data returned. */ @@ -684,14 +709,20 @@ out: ptlrpc_req_finished(req); ll_intent_drop_lock(itp); - /* We did open by fid, but by the time we got to the server, - * the object disappeared. If this is a create, we cannot really - * tell the userspace that the file it was trying to create - * does not exist. Instead let's return -ESTALE, and the VFS will - * retry the create with LOOKUP_REVAL that we are going to catch - * in ll_revalidate_dentry() and use lookup then. + /* We did open by fid, but by the time we got to the server, the object + * disappeared. This is possible if the object was unlinked, but it's + * also possible if the object was unlinked by a rename. In the case + * of an object renamed over our existing one, we can't fail this open. + * O_CREAT also goes through this path if we had an existing dentry, + * and it's obviously wrong to return ENOENT for O_CREAT. + * + * Instead let's return -ESTALE, and the VFS will retry the open with + * LOOKUP_REVAL, which we catch in ll_revalidate_dentry and fail to + * revalidate, causing a lookup. This causes extra lookups in the case + * where we had a dentry in cache but the file is being unlinked and we + * lose the race with unlink, but this should be very rare. */ - if (rc == -ENOENT && itp->it_op & IT_CREAT) + if (rc == -ENOENT) rc = -ESTALE; RETURN(rc); @@ -733,12 +764,6 @@ static int ll_local_open(struct file *file, struct lookup_intent *it, file->private_data = fd; ll_readahead_init(inode, &fd->fd_ras); fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); - /* turn off the kernel's read-ahead */ - file->f_ra.ra_pages = 0; - - /* ll_cl_context initialize */ - rwlock_init(&fd->fd_lock); - INIT_LIST_HEAD(&fd->fd_lccs); RETURN(0); } @@ -798,9 +823,12 @@ int ll_file_open(struct inode *inode, struct file *file) file->private_data = NULL; /* prevent ll_local_open assertion */ if (S_ISREG(inode->i_mode)) { - rc = llcrypt_file_open(inode, file); - if (rc) + rc = ll_file_open_encrypt(inode, file); + if (rc) { + if (it && it->it_disposition) + ll_release_openhandle(file_dentry(file), it); GOTO(out_nofiledata, rc); + } } fd = ll_file_data_get(); @@ -1449,6 +1477,16 @@ int ll_merge_attr(const struct lu_env *env, struct inode *inode) CDEBUG(D_VFSTRACE, DFID" updating i_size %llu\n", PFID(&lli->lli_fid), attr->cat_size); + if (llcrypt_require_key(inode) == -ENOKEY) { + /* Without the key, round up encrypted file size to next + * LUSTRE_ENCRYPTION_UNIT_SIZE. Clear text size is put in + * lli_lazysize for proper file size setting at close time. + */ + lli->lli_attr_valid |= OBD_MD_FLLAZYSIZE; + lli->lli_lazysize = attr->cat_size; + attr->cat_size = round_up(attr->cat_size, + LUSTRE_ENCRYPTION_UNIT_SIZE); + } i_size_write(inode, attr->cat_size); inode->i_blocks = attr->cat_blocks; @@ -1684,9 +1722,9 @@ restart: range_locked = true; } - ll_cl_add(file, env, io, LCC_RW); + ll_cl_add(inode, env, io, LCC_RW); rc = cl_io_loop(env, io); - ll_cl_remove(file, env); + ll_cl_remove(inode, env); if (range_locked && !is_parallel_dio) { CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n", @@ -2003,8 +2041,6 @@ static ssize_t ll_do_tiny_write(struct kiocb *iocb, struct iov_iter *iter) if (result > 0) { ll_heat_add(inode, CIT_WRITE, result); - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_WRITE_BYTES, - result); set_bit(LLIF_DATA_MODIFIED, &ll_i2info(inode)->lli_flags); } @@ -2272,7 +2308,7 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, struct ptlrpc_request **request) { struct ll_sb_info *sbi = ll_i2sbi(inode); - struct mdt_body *body; + struct mdt_body *body; struct lov_mds_md *lmm = NULL; struct ptlrpc_request *req = NULL; struct md_op_data *op_data; @@ -2294,8 +2330,8 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, rc = md_getattr_name(sbi->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc < 0) { - CDEBUG(D_INFO, "md_getattr_name failed " - "on %s: rc %d\n", filename, rc); + CDEBUG(D_INFO, "md_getattr_name failed on %s: rc %d\n", + filename, rc); GOTO(out, rc); } @@ -2734,7 +2770,7 @@ int ll_fid2path(struct inode *inode, void __user *arg) ENTRY; if (!capable(CAP_DAC_READ_SEARCH) && - !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH)) + !test_bit(LL_SBI_USER_FID2PATH, ll_i2sbi(inode)->ll_flags)) RETURN(-EPERM); /* Only need to get the buflen */ @@ -3664,6 +3700,9 @@ static long ll_file_unlock_lease(struct file *file, struct ll_ioc_lease *ioc, if (ioc->lil_count != 1) RETURN(-EINVAL); + if (IS_ENCRYPTED(inode)) + RETURN(-EOPNOTSUPP); + arg += sizeof(*ioc); if (copy_from_user(¶m.pa_archive_id, (void __user *)arg, sizeof(__u32))) @@ -4036,9 +4075,10 @@ out: RETURN(rc); } case LL_IOC_HSM_ACTION: { - struct md_op_data *op_data; - struct hsm_current_action *hca; - int rc; + struct md_op_data *op_data; + struct hsm_current_action *hca; + const char *action; + int rc; OBD_ALLOC_PTR(hca); if (hca == NULL) @@ -4053,10 +4093,26 @@ out: rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data), op_data, NULL); + if (rc < 0) + GOTO(skip_copy, rc); + + /* The hsm_current_action retreived from the server could + * contain corrupt information. If it is incorrect data collect + * debug information. We still send the data even if incorrect + * to user land to handle. + */ + action = hsm_user_action2name(hca->hca_action); + if (strcmp(action, "UNKNOWN") == 0 || + hca->hca_state > HPS_DONE) { + CDEBUG(D_HSM, + "HSM current state %s action %s, offset = %llu, length %llu\n", + hsm_progress_state2name(hca->hca_state), action, + hca->hca_location.offset, hca->hca_location.length); + } if (copy_to_user((char __user *)arg, hca, sizeof(*hca))) rc = -EFAULT; - +skip_copy: ll_finish_md_op_data(op_data); OBD_FREE_PTR(hca); RETURN(rc); @@ -4384,6 +4440,12 @@ loff_t ll_lseek(struct file *file, loff_t offset, int whence) cl_env_put(env, &refcheck); + /* Without the key, SEEK_HOLE return value has to be + * rounded up to next LUSTRE_ENCRYPTION_UNIT_SIZE. + */ + if (llcrypt_require_key(inode) == -ENOKEY && whence == SEEK_HOLE) + retval = round_up(retval, LUSTRE_ENCRYPTION_UNIT_SIZE); + RETURN(retval); } @@ -4526,7 +4588,6 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync) /* fsync's caller has already called _fdata{sync,write}, we want * that IO to finish before calling the osc and mdc sync methods */ rc = filemap_write_and_wait_range(inode->i_mapping, start, end); - inode_lock(inode); /* catch async errors that were recorded back when async writeback * failed for pages in this mapping. */ @@ -4567,8 +4628,6 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync) fd->fd_write_failed = false; } - inode_unlock(inode); - if (!rc) ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, ktime_us_delta(ktime_get(), kstart)); @@ -4726,10 +4785,10 @@ int ll_get_fid_by_name(struct inode *parent, const char *name, int namelen, struct lu_fid *fid, struct inode **inode) { - struct md_op_data *op_data = NULL; - struct mdt_body *body; - struct ptlrpc_request *req; - int rc; + struct md_op_data *op_data = NULL; + struct mdt_body *body; + struct ptlrpc_request *req; + int rc; ENTRY; op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0, @@ -4757,7 +4816,7 @@ out_req: } int ll_migrate(struct inode *parent, struct file *file, struct lmv_user_md *lum, - const char *name) + const char *name, __u32 flags) { struct dentry *dchild = NULL; struct inode *child_inode = NULL; @@ -4769,6 +4828,7 @@ int ll_migrate(struct inode *parent, struct file *file, struct lmv_user_md *lum, __u64 data_version = 0; size_t namelen = strlen(name); int lumlen = lmv_user_md_size(lum->lum_stripe_count, lum->lum_magic); + bool oldformat = false; int rc; ENTRY; @@ -4819,21 +4879,18 @@ int ll_migrate(struct inode *parent, struct file *file, struct lmv_user_md *lum, if (is_root_inode(child_inode)) GOTO(out_iput, rc = -EINVAL); - if (IS_ENCRYPTED(child_inode)) { - rc = llcrypt_get_encryption_info(child_inode); - if (rc) - GOTO(out_iput, rc); - if (!llcrypt_has_encryption_key(child_inode)) { - CDEBUG(D_SEC, "no enc key for "DFID"\n", - PFID(ll_inode2fid(child_inode))); - GOTO(out_iput, rc = -ENOKEY); - } - if (unlikely(!llcrypt_policy_has_filename_enc(child_inode))) { - CDEBUG(D_SEC, - "cannot migrate old format encrypted "DFID", please move to new enc dir first\n", - PFID(ll_inode2fid(child_inode))); - GOTO(out_iput, rc = -EUCLEAN); - } + if (IS_ENCRYPTED(parent)) { + if (unlikely(!llcrypt_policy_has_filename_enc(parent))) + oldformat = true; + } else if (IS_ENCRYPTED(child_inode) && + unlikely(!llcrypt_policy_has_filename_enc(child_inode))) { + oldformat = true; + } + if (unlikely(oldformat)) { + CDEBUG(D_SEC, + "cannot migrate old format encrypted "DFID", please move to new enc dir first\n", + PFID(ll_inode2fid(child_inode))); + GOTO(out_iput, rc = -EUCLEAN); } op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, @@ -4854,6 +4911,11 @@ int ll_migrate(struct inode *parent, struct file *file, struct lmv_user_md *lum, op_data->op_data = lum; op_data->op_data_size = lumlen; + /* migrate dirent only for subdirs if MDS_MIGRATE_NSONLY set */ + if (S_ISDIR(child_inode->i_mode) && (flags & MDS_MIGRATE_NSONLY) && + lmv_dir_layout_changing(ll_i2info(parent)->lli_lsm_md)) + op_data->op_bias |= MDS_MIGRATE_NSONLY; + again: if (S_ISREG(child_inode->i_mode)) { och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0); @@ -4878,8 +4940,9 @@ again: spin_unlock(&och->och_mod->mod_open_req->rq_lock); } - rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name, namelen, - name, namelen, &request); + rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, + op_data->op_name, op_data->op_namelen, + op_data->op_name, op_data->op_namelen, &request); if (rc == 0) { LASSERT(request != NULL); ll_update_times(request, parent); @@ -4936,7 +4999,7 @@ ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock) */ if (!(fd->fd_flags & LL_FILE_FLOCK_WARNING)) { fd->fd_flags |= LL_FILE_FLOCK_WARNING; - CDEBUG_LIMIT(D_TTY | D_CONSOLE, + CDEBUG_LIMIT(D_CONSOLE, "flock disabled, mount with '-o [local]flock' to enable\r\n"); } RETURN(-ENOSYS); @@ -5116,7 +5179,10 @@ static int ll_merge_md_attr(struct inode *inode) if (rc != 0) RETURN(rc); + spin_lock(&inode->i_lock); set_nlink(inode, attr.cat_nlink); + spin_unlock(&inode->i_lock); + inode->i_blocks = attr.cat_blocks; i_size_write(inode, attr.cat_size); @@ -5297,7 +5363,8 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) } #endif -int cl_falloc(struct inode *inode, int mode, loff_t offset, loff_t len) +int cl_falloc(struct file *file, struct inode *inode, int mode, loff_t offset, + loff_t len) { struct lu_env *env; struct cl_io *io; @@ -5313,12 +5380,22 @@ int cl_falloc(struct inode *inode, int mode, loff_t offset, loff_t len) io = vvp_env_thread_io(env); io->ci_obj = ll_i2info(inode)->lli_clob; + ll_io_set_mirror(io, file); + io->ci_verify_layout = 1; io->u.ci_setattr.sa_parent_fid = lu_object_fid(&io->ci_obj->co_lu); io->u.ci_setattr.sa_falloc_mode = mode; io->u.ci_setattr.sa_falloc_offset = offset; io->u.ci_setattr.sa_falloc_end = offset + len; io->u.ci_setattr.sa_subtype = CL_SETATTR_FALLOCATE; + + CDEBUG(D_INODE, "UID %u GID %u\n", + from_kuid(&init_user_ns, inode->i_uid), + from_kgid(&init_user_ns, inode->i_gid)); + + io->u.ci_setattr.sa_falloc_uid = from_kuid(&init_user_ns, inode->i_uid); + io->u.ci_setattr.sa_falloc_gid = from_kgid(&init_user_ns, inode->i_gid); + if (io->u.ci_setattr.sa_falloc_end > size) { loff_t newsize = io->u.ci_setattr.sa_falloc_end; @@ -5351,7 +5428,7 @@ out: long ll_fallocate(struct file *filp, int mode, loff_t offset, loff_t len) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); int rc; if (offset < 0 || len <= 0) @@ -5376,7 +5453,7 @@ long ll_fallocate(struct file *filp, int mode, loff_t offset, loff_t len) ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FALLOCATE, 1); - rc = cl_falloc(inode, mode, offset, len); + rc = cl_falloc(filp, inode, mode, offset, len); /* * ENOTSUPP (524) is an NFSv3 specific error code erroneously * used by Lustre in several places. Retuning it here would @@ -5470,7 +5547,7 @@ int ll_inode_permission(struct inode *inode, int mask) squash = &sbi->ll_squash; if (unlikely(squash->rsi_uid != 0 && uid_eq(current_fsuid(), GLOBAL_ROOT_UID) && - !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) { + !test_bit(LL_SBI_NOROOTSQUASH, sbi->ll_flags))) { squash_id = true; } if (squash_id) { @@ -5620,9 +5697,9 @@ const struct file_operations *ll_select_file_operations(struct ll_sb_info *sbi) { const struct file_operations *fops = &ll_file_operations_noflock; - if (sbi->ll_flags & LL_SBI_FLOCK) + if (test_bit(LL_SBI_FLOCK, sbi->ll_flags)) fops = &ll_file_operations_flock; - else if (sbi->ll_flags & LL_SBI_LOCALFLOCK) + else if (test_bit(LL_SBI_LOCALFLOCK, sbi->ll_flags)) fops = &ll_file_operations; return fops; @@ -5910,7 +5987,8 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen) ENTRY; *gen = ll_layout_version_get(lli); - if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE) + if (!test_bit(LL_SBI_LAYOUT_LOCK, sbi->ll_flags) || + *gen != CL_LAYOUT_GEN_NONE) RETURN(0); /* sanity checks */