X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Ffile.c;h=b7bd2bf7eb14afcb39e5210f1937700c2d6ce1a2;hp=471ef97191837fef8bab574af19fa321a38ded7e;hb=9573911bfb4a2c3d7e2047c9d5f5440d9c7e7db5;hpb=5ccd7a4a556b1a847eb5bff8b2395522a6f4bca8 diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 471ef97..b7bd2bf 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -27,7 +27,7 @@ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2014, Intel Corporation. + * Copyright (c) 2011, 2015, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -50,10 +50,11 @@ # include #endif #include + #include +#include #include "cl_object.h" - #include "llite_internal.h" #include "vvp_internal.h" @@ -86,47 +87,36 @@ static void ll_file_data_put(struct ll_file_data *fd) OBD_SLAB_FREE_PTR(fd, ll_file_data_slab); } -void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, - struct lustre_handle *fh) -{ - op_data->op_fid1 = ll_i2info(inode)->lli_fid; - op_data->op_attr.ia_mode = inode->i_mode; - op_data->op_attr.ia_atime = inode->i_atime; - op_data->op_attr.ia_mtime = inode->i_mtime; - op_data->op_attr.ia_ctime = inode->i_ctime; - op_data->op_attr.ia_size = i_size_read(inode); - op_data->op_attr_blocks = inode->i_blocks; - op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags); - if (fh) - op_data->op_handle = *fh; - op_data->op_capa1 = ll_mdscapa_get(inode); - - if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags) - op_data->op_bias |= MDS_DATA_MODIFIED; -} - /** * Packs all the attributes into @op_data for the CLOSE rpc. */ static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data, struct obd_client_handle *och) { - ENTRY; - - op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET | - ATTR_MTIME | ATTR_MTIME_SET | - ATTR_CTIME | ATTR_CTIME_SET; + ENTRY; - if (!(och->och_flags & FMODE_WRITE)) - goto out; + ll_prep_md_op_data(op_data, inode, NULL, NULL, + 0, 0, LUSTRE_OPC_ANY, NULL); + + op_data->op_attr.ia_mode = inode->i_mode; + op_data->op_attr.ia_atime = inode->i_atime; + op_data->op_attr.ia_mtime = inode->i_mtime; + op_data->op_attr.ia_ctime = inode->i_ctime; + op_data->op_attr.ia_size = i_size_read(inode); + op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET | + ATTR_MTIME | ATTR_MTIME_SET | + ATTR_CTIME | ATTR_CTIME_SET; + op_data->op_attr_blocks = inode->i_blocks; + op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags); + op_data->op_handle = och->och_fh; - op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS; + if (och->och_flags & FMODE_WRITE && + ll_file_test_and_clear_flag(ll_i2info(inode), LLIF_DATA_MODIFIED)) + /* For HSM: if inode data has been modified, pack it so that + * MDT can set data dirty flag in the archive. */ + op_data->op_bias |= MDS_DATA_MODIFIED; -out: - ll_pack_inode2opdata(inode, op_data, &och->och_fh); - ll_prep_md_op_data(op_data, inode, NULL, NULL, - 0, 0, LUSTRE_OPC_ANY, NULL); - EXIT; + EXIT; } /** @@ -137,32 +127,28 @@ out: * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to * swap layouts with. */ -static int ll_close_inode_openhandle(struct obd_export *md_exp, +static int ll_close_inode_openhandle(struct inode *inode, struct obd_client_handle *och, - struct inode *inode, - enum mds_op_bias bias, - void *data) + enum mds_op_bias bias, void *data) { - struct obd_export *exp = ll_i2mdexp(inode); - struct md_op_data *op_data; - struct ptlrpc_request *req = NULL; - struct obd_device *obd = class_exp2obd(exp); - int rc; + struct obd_export *md_exp = ll_i2mdexp(inode); + const struct ll_inode_info *lli = ll_i2info(inode); + struct md_op_data *op_data; + struct ptlrpc_request *req = NULL; + int rc; ENTRY; - if (obd == NULL) { - /* - * XXX: in case of LMV, is this correct to access - * ->exp_handle? - */ - CERROR("Invalid MDC connection handle "LPX64"\n", - ll_i2mdexp(inode)->exp_handle.h_cookie); + if (class_exp2obd(md_exp) == NULL) { + CERROR("%s: invalid MDC connection handle closing "DFID"\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(&lli->lli_fid)); GOTO(out, rc = 0); } OBD_ALLOC_PTR(op_data); + /* We leak openhandle and request here on error, but not much to be + * done in OOM case since app won't retry close on error either. */ if (op_data == NULL) - /* XXX We leak openhandle and request here. */ GOTO(out, rc = -ENOMEM); ll_prepare_close(inode, op_data, och); @@ -188,22 +174,10 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, break; } - rc = md_close(md_exp, op_data, och->och_mod, &req); - if (rc) { + rc = md_close(md_exp, op_data, och->och_mod, &req); + if (rc != 0 && rc != -EINTR) CERROR("%s: inode "DFID" mdc close failed: rc = %d\n", - ll_i2mdexp(inode)->exp_obd->obd_name, - PFID(ll_inode2fid(inode)), rc); - } - - /* DATA_MODIFIED flag was successfully sent on close, cancel data - * modification flag. */ - if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) { - struct ll_inode_info *lli = ll_i2info(inode); - - spin_lock(&lli->lli_lock); - lli->lli_flags &= ~LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); - } + md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc); if (rc == 0 && op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)) { @@ -222,9 +196,8 @@ out: och->och_fh.cookie = DEAD_HANDLE_MAGIC; OBD_FREE_PTR(och); - if (req) /* This is close request */ - ptlrpc_req_finished(req); - return rc; + ptlrpc_req_finished(req); /* This is close request */ + return rc; } int ll_md_real_close(struct inode *inode, fmode_t fmode) @@ -263,24 +236,22 @@ int ll_md_real_close(struct inode *inode, fmode_t fmode) if (och != NULL) { /* There might be a race and this handle may already * be closed. */ - rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, - och, inode, 0, NULL); + rc = ll_close_inode_openhandle(inode, och, 0, NULL); } RETURN(rc); } -static int ll_md_close(struct obd_export *md_exp, struct inode *inode, - struct file *file) +static int ll_md_close(struct inode *inode, struct file *file) { - ldlm_policy_data_t policy = { + union ldlm_policy_data policy = { .l_inodebits = { MDS_INODELOCK_OPEN }, }; __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK; struct ll_file_data *fd = LUSTRE_FPRIVATE(file); struct ll_inode_info *lli = ll_i2info(inode); struct lustre_handle lockh; - int lockmode; + enum ldlm_mode lockmode; int rc = 0; ENTRY; @@ -301,8 +272,7 @@ static int ll_md_close(struct obd_export *md_exp, struct inode *inode, } if (fd->fd_och != NULL) { - rc = ll_close_inode_openhandle(md_exp, fd->fd_och, inode, 0, - NULL); + rc = ll_close_inode_openhandle(inode, fd->fd_och, 0, NULL); fd->fd_och = NULL; GOTO(out, rc); } @@ -325,14 +295,13 @@ static int ll_md_close(struct obd_export *md_exp, struct inode *inode, } mutex_unlock(&lli->lli_och_mutex); - if (!md_lock_match(md_exp, flags, ll_inode2fid(inode), + if (!md_lock_match(ll_i2mdexp(inode), flags, ll_inode2fid(inode), LDLM_IBITS, &policy, lockmode, &lockh)) rc = ll_md_real_close(inode, fd->fd_omode); out: LUSTRE_FPRIVATE(file) = NULL; ll_file_data_put(fd); - ll_capa_close(inode); RETURN(rc); } @@ -353,21 +322,7 @@ int ll_file_release(struct inode *inode, struct file *file) CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", PFID(ll_inode2fid(inode)), inode); -#ifdef CONFIG_FS_POSIX_ACL - if (sbi->ll_flags & LL_SBI_RMT_CLIENT && - inode == inode->i_sb->s_root->d_inode) { - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - - LASSERT(fd != NULL); - if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) { - fd->fd_flags &= ~LL_FILE_RMTACL; - rct_del(&sbi->ll_rct, current_pid()); - et_search_free(&sbi->ll_et, current_pid()); - } - } -#endif - - if (inode->i_sb->s_root != file->f_path.dentry) + if (inode->i_sb->s_root != file_dentry(file)) ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1); fd = LUSTRE_FPRIVATE(file); LASSERT(fd != NULL); @@ -377,30 +332,30 @@ int ll_file_release(struct inode *inode, struct file *file) if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd) ll_deauthorize_statahead(inode, fd); - if (inode->i_sb->s_root == file->f_path.dentry) { - LUSTRE_FPRIVATE(file) = NULL; - ll_file_data_put(fd); - RETURN(0); - } + if (inode->i_sb->s_root == file_dentry(file)) { + LUSTRE_FPRIVATE(file) = NULL; + ll_file_data_put(fd); + RETURN(0); + } - if (!S_ISDIR(inode->i_mode)) { + if (!S_ISDIR(inode->i_mode)) { if (lli->lli_clob != NULL) lov_read_and_clear_async_rc(lli->lli_clob); - lli->lli_async_rc = 0; - } + lli->lli_async_rc = 0; + } - rc = ll_md_close(sbi->ll_md_exp, inode, file); + rc = ll_md_close(inode, file); - if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val)) - libcfs_debug_dumplog(); + if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val)) + libcfs_debug_dumplog(); - RETURN(rc); + RETURN(rc); } static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize, struct lookup_intent *itp) { - struct dentry *de = file->f_path.dentry; + struct dentry *de = file_dentry(file); struct ll_sb_info *sbi = ll_i2sbi(de->d_inode); struct dentry *parent = de->d_parent; const char *name = NULL; @@ -452,26 +407,35 @@ static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize, } rc = ll_prep_inode(&de->d_inode, req, NULL, itp); - if (!rc && itp->d.lustre.it_lock_mode) + if (!rc && itp->it_lock_mode) ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL); out: ptlrpc_req_finished(req); ll_intent_drop_lock(itp); + /* We did open by fid, but by the time we got to the server, + * the object disappeared. If this is a create, we cannot really + * tell the userspace that the file it was trying to create + * does not exist. Instead let's return -ESTALE, and the VFS will + * retry the create with LOOKUP_REVAL that we are going to catch + * in ll_revalidate_dentry() and use lookup then. + */ + if (rc == -ENOENT && itp->it_op & IT_CREAT) + rc = -ESTALE; + RETURN(rc); } static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it, struct obd_client_handle *och) { - struct ptlrpc_request *req = it->d.lustre.it_data; struct mdt_body *body; - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); + body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY); och->och_fh = body->mbo_handle; och->och_fid = body->mbo_fid1; - och->och_lease_handle.cookie = it->d.lustre.it_lock_handle; + och->och_lease_handle.cookie = it->it_lock_handle; och->och_magic = OBD_CLIENT_HANDLE_MAGIC; och->och_flags = it->it_flags; @@ -481,7 +445,7 @@ static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it, static int ll_local_open(struct file *file, struct lookup_intent *it, struct ll_file_data *fd, struct obd_client_handle *och) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); ENTRY; LASSERT(!LUSTRE_FPRIVATE(file)); @@ -545,12 +509,12 @@ int ll_file_open(struct inode *inode, struct file *file) if (S_ISDIR(inode->i_mode)) ll_authorize_statahead(inode, fd); - if (inode->i_sb->s_root == file->f_path.dentry) { + if (inode->i_sb->s_root == file_dentry(file)) { LUSTRE_FPRIVATE(file) = fd; RETURN(0); } - if (!it || !it->d.lustre.it_disposition) { + if (!it || !it->it_disposition) { /* Convert f_flags into access mode. We cannot use file->f_mode, * because everything but O_ACCMODE mask was stripped from * there */ @@ -603,7 +567,7 @@ restart: GOTO(out_openerr, rc); } - ll_release_openhandle(file->f_path.dentry, it); + ll_release_openhandle(file_dentry(file), it); } (*och_usecount)++; @@ -615,7 +579,8 @@ restart: } } else { LASSERT(*och_usecount == 0); - if (!it->d.lustre.it_disposition) { + if (!it->it_disposition) { + struct ll_dentry_data *ldd = ll_d2d(file->f_path.dentry); /* We cannot just request lock handle now, new ELC code means that one of other OPEN locks for this file could be cancelled, and since blocking ast handler @@ -629,12 +594,24 @@ restart: * handle to be returned from LOOKUP|OPEN request, * for example if the target entry was a symlink. * - * Always fetch MDS_OPEN_LOCK if this is not setstripe. + * Only fetch MDS_OPEN_LOCK if this is in NFS path, + * marked by a bit set in ll_iget_for_nfs. Clear the + * bit so that it's not confusing later callers. * + * NB; when ldd is NULL, it must have come via normal + * lookup path only, since ll_iget_for_nfs always calls + * ll_d_init(). + */ + if (ldd && ldd->lld_nfs_dentry) { + ldd->lld_nfs_dentry = 0; + it->it_flags |= MDS_OPEN_LOCK; + } + + /* * Always specify MDS_OPEN_BY_FID because we don't want * to get file with different fid. */ - it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID; + it->it_flags |= MDS_OPEN_BY_FID; rc = ll_intent_file_open(file, NULL, 0, it); if (rc) GOTO(out_openerr, rc); @@ -658,7 +635,7 @@ restart: LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF), "inode %p: disposition %x, status %d\n", inode, - it_disposition(it, ~0), it->d.lustre.it_status); + it_disposition(it, ~0), it->it_status); rc = ll_local_open(file, it, fd, *och_p); if (rc) @@ -673,8 +650,6 @@ restart: if (!S_ISREG(inode->i_mode)) GOTO(out_och_free, rc); - ll_capa_open(inode); - cl_lov_delay_create_clear(&file->f_flags); GOTO(out_och_free, rc); @@ -697,7 +672,7 @@ out_openerr: } if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) { - ptlrpc_req_finished(it->d.lustre.it_data); + ptlrpc_req_finished(it->it_request); it_clear_disposition(it, DISP_ENQ_OPEN_REF); } @@ -728,6 +703,95 @@ static int ll_md_blocking_lease_ast(struct ldlm_lock *lock, } /** + * When setting a lease on a file, we take ownership of the lli_mds_*_och + * and save it as fd->fd_och so as to force client to reopen the file even + * if it has an open lock in cache already. + */ +static int ll_lease_och_acquire(struct inode *inode, struct file *file, + struct lustre_handle *old_handle) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct obd_client_handle **och_p; + __u64 *och_usecount; + int rc = 0; + ENTRY; + + /* Get the openhandle of the file */ + mutex_lock(&lli->lli_och_mutex); + if (fd->fd_lease_och != NULL) + GOTO(out_unlock, rc = -EBUSY); + + if (fd->fd_och == NULL) { + if (file->f_mode & FMODE_WRITE) { + LASSERT(lli->lli_mds_write_och != NULL); + och_p = &lli->lli_mds_write_och; + och_usecount = &lli->lli_open_fd_write_count; + } else { + LASSERT(lli->lli_mds_read_och != NULL); + och_p = &lli->lli_mds_read_och; + och_usecount = &lli->lli_open_fd_read_count; + } + + if (*och_usecount > 1) + GOTO(out_unlock, rc = -EBUSY); + + fd->fd_och = *och_p; + *och_usecount = 0; + *och_p = NULL; + } + + *old_handle = fd->fd_och->och_fh; + + EXIT; +out_unlock: + mutex_unlock(&lli->lli_och_mutex); + return rc; +} + +/** + * Release ownership on lli_mds_*_och when putting back a file lease. + */ +static int ll_lease_och_release(struct inode *inode, struct file *file) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct obd_client_handle **och_p; + struct obd_client_handle *old_och = NULL; + __u64 *och_usecount; + int rc = 0; + ENTRY; + + mutex_lock(&lli->lli_och_mutex); + if (file->f_mode & FMODE_WRITE) { + och_p = &lli->lli_mds_write_och; + och_usecount = &lli->lli_open_fd_write_count; + } else { + och_p = &lli->lli_mds_read_och; + och_usecount = &lli->lli_open_fd_read_count; + } + + /* The file may have been open by another process (broken lease) so + * *och_p is not NULL. In this case we should simply increase usecount + * and close fd_och. + */ + if (*och_p != NULL) { + old_och = fd->fd_och; + (*och_usecount)++; + } else { + *och_p = fd->fd_och; + *och_usecount = 1; + } + fd->fd_och = NULL; + mutex_unlock(&lli->lli_och_mutex); + + if (old_och != NULL) + rc = ll_close_inode_openhandle(inode, old_och, 0, NULL); + + RETURN(rc); +} + +/** * Acquire a lease and open the file. */ static struct obd_client_handle * @@ -748,45 +812,12 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode, RETURN(ERR_PTR(-EINVAL)); if (file != NULL) { - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct obd_client_handle **och_p; - __u64 *och_usecount; - if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC)) RETURN(ERR_PTR(-EPERM)); - /* Get the openhandle of the file */ - rc = -EBUSY; - mutex_lock(&lli->lli_och_mutex); - if (fd->fd_lease_och != NULL) { - mutex_unlock(&lli->lli_och_mutex); - RETURN(ERR_PTR(rc)); - } - - if (fd->fd_och == NULL) { - if (file->f_mode & FMODE_WRITE) { - LASSERT(lli->lli_mds_write_och != NULL); - och_p = &lli->lli_mds_write_och; - och_usecount = &lli->lli_open_fd_write_count; - } else { - LASSERT(lli->lli_mds_read_och != NULL); - och_p = &lli->lli_mds_read_och; - och_usecount = &lli->lli_open_fd_read_count; - } - if (*och_usecount == 1) { - fd->fd_och = *och_p; - *och_p = NULL; - *och_usecount = 0; - rc = 0; - } - } - mutex_unlock(&lli->lli_och_mutex); - if (rc < 0) /* more than 1 opener */ + rc = ll_lease_och_acquire(inode, file, &old_handle); + if (rc) RETURN(ERR_PTR(rc)); - - LASSERT(fd->fd_och != NULL); - old_handle = fd->fd_och->och_fh; } OBD_ALLOC_PTR(och); @@ -832,12 +863,12 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode, /* already get lease, handle lease lock */ ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL); - if (it.d.lustre.it_lock_mode == 0 || - it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) { + if (it.it_lock_mode == 0 || + it.it_lock_bits != MDS_INODELOCK_OPEN) { /* open lock must return for lease */ - CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n", - PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode, - it.d.lustre.it_lock_bits); + CERROR(DFID "lease granted but no open lock, %d/%llu.\n", + PFID(ll_inode2fid(inode)), it.it_lock_mode, + it.it_lock_bits); GOTO(out_close, rc = -EPROTO); } @@ -846,13 +877,13 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode, out_close: /* Cancel open lock */ - if (it.d.lustre.it_lock_mode != 0) { + if (it.it_lock_mode != 0) { ldlm_lock_decref_and_cancel(&och->och_lease_handle, - it.d.lustre.it_lock_mode); - it.d.lustre.it_lock_mode = 0; + it.it_lock_mode); + it.it_lock_mode = 0; och->och_lease_handle.cookie = 0ULL; } - rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, och, inode, 0, NULL); + rc2 = ll_close_inode_openhandle(inode, och, 0, NULL); if (rc2 < 0) CERROR("%s: error closing file "DFID": %d\n", ll_get_fsname(inode->i_sb, NULL, 0), @@ -916,8 +947,8 @@ static int ll_swap_layouts_close(struct obd_client_handle *och, /* Close the file and swap layouts between inode & inode2. * NB: lease lock handle is released in mdc_close_layout_swap_pack() * because we still need it to pack l_remote_handle to MDT. */ - rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode, - MDS_CLOSE_LAYOUT_SWAP, inode2); + rc = ll_close_inode_openhandle(inode, och, MDS_CLOSE_LAYOUT_SWAP, + inode2); och = NULL; /* freed in ll_close_inode_openhandle() */ @@ -949,16 +980,15 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, } CDEBUG(D_INODE, "lease for "DFID" broken? %d\n", - PFID(&ll_i2info(inode)->lli_fid), cancelled); + PFID(&ll_i2info(inode)->lli_fid), cancelled); if (!cancelled) ldlm_cli_cancel(&och->och_lease_handle, 0); + if (lease_broken != NULL) *lease_broken = cancelled; - rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode, - 0, NULL); - + rc = ll_close_inode_openhandle(inode, och, 0, NULL); RETURN(rc); } @@ -976,9 +1006,19 @@ int ll_merge_attr(const struct lu_env *env, struct inode *inode) ll_inode_size_lock(inode); - /* merge timestamps the most recently obtained from mds with - timestamps obtained from osts */ - LTIME_S(inode->i_atime) = lli->lli_atime; + /* Merge timestamps the most recently obtained from MDS with + * timestamps obtained from OSTs. + * + * Do not overwrite atime of inode because it may be refreshed + * by file_accessed() function. If the read was served by cache + * data, there is no RPC to be sent so that atime may not be + * transferred to OSTs at all. MDT only updates atime at close time + * if it's at least 'mdd.*.atime_diff' older. + * All in all, the atime in Lustre does not strictly comply with + * POSIX. Solving this problem needs to send an RPC to MDT for each + * read, this will hurt performance. */ + if (LTIME_S(inode->i_atime) < lli->lli_atime) + LTIME_S(inode->i_atime) = lli->lli_atime; LTIME_S(inode->i_mtime) = lli->lli_mtime; LTIME_S(inode->i_ctime) = lli->lli_ctime; @@ -1002,7 +1042,7 @@ int ll_merge_attr(const struct lu_env *env, struct inode *inode) if (mtime < attr->cat_mtime) mtime = attr->cat_mtime; - CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n", + CDEBUG(D_VFSTRACE, DFID" updating i_size %llu\n", PFID(&lli->lli_fid), attr->cat_size); i_size_write(inode, attr->cat_size); @@ -1021,7 +1061,7 @@ out_size_unlock: static bool file_is_noatime(const struct file *file) { const struct vfsmount *mnt = file->f_path.mnt; - const struct inode *inode = file->f_path.dentry->d_inode; + const struct inode *inode = file_inode((struct file *)file); /* Adapted from file_accessed() and touch_atime().*/ if (file->f_flags & O_NOATIME) @@ -1047,7 +1087,7 @@ static bool file_is_noatime(const struct file *file) static void ll_io_init(struct cl_io *io, const struct file *file, int write) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode((struct file *)file); io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK; if (write) { @@ -1074,7 +1114,7 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args, loff_t *ppos, size_t count) { struct vvp_io *vio = vvp_env_io(env); - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct ll_inode_info *lli = ll_i2info(inode); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); struct cl_io *io; @@ -1084,8 +1124,8 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args, ENTRY; - CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n", - file->f_path.dentry->d_name.name, iot, *ppos, count); + CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zu\n", + file_dentry(file)->d_name.name, iot, *ppos, count); restart: io = vvp_env_thread_io(env); @@ -1104,9 +1144,7 @@ restart: switch (vio->vui_io_subtype) { case IO_NORMAL: - vio->vui_iov = args->u.normal.via_iov; - vio->vui_nrsegs = args->u.normal.via_nrsegs; - vio->vui_tot_nrsegs = vio->vui_nrsegs; + vio->vui_iter = args->u.normal.via_iter; vio->vui_iocb = args->u.normal.via_iocb; /* Direct IO reads must also take range lock, * or multiple reads will try to work on the same pages @@ -1132,7 +1170,7 @@ restart: LBUG(); } - ll_cl_add(file, env, io); + ll_cl_add(file, env, io, LCC_RW); rc = cl_io_loop(env, io); ll_cl_remove(file, env); @@ -1152,10 +1190,8 @@ restart: *ppos = io->u.ci_wr.wr.crw_pos; /* for splice */ /* prepare IO restart */ - if (count > 0 && args->via_io_subtype == IO_NORMAL) { - args->u.normal.via_iov = vio->vui_iov; - args->u.normal.via_nrsegs = vio->vui_tot_nrsegs; - } + if (count > 0 && args->via_io_subtype == IO_NORMAL) + args->u.normal.via_iter = vio->vui_iter; } GOTO(out, rc); out: @@ -1164,7 +1200,7 @@ out: if ((rc == 0 || rc == -ENODATA) && count > 0 && io->ci_need_restart) { CDEBUG(D_VFSTRACE, "%s: restart %s from %lld, count:%zu, result: %zd\n", - file->f_path.dentry->d_name.name, + file_dentry(file)->d_name.name, iot == CIT_READ ? "read" : "write", *ppos, count, result); goto restart; @@ -1179,6 +1215,12 @@ out: ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_WRITE_BYTES, result); fd->fd_write_failed = false; + } else if (result == 0 && rc == 0) { + rc = io->ci_result; + if (rc < 0) + fd->fd_write_failed = true; + else + fd->fd_write_failed = false; } else if (rc != -ERESTARTSYS) { fd->fd_write_failed = true; } @@ -1189,81 +1231,182 @@ out: return result > 0 ? result : rc; } -/* - * XXX: exact copy from kernel code (__generic_file_aio_write_nolock) +/** + * The purpose of fast read is to overcome per I/O overhead and improve IOPS + * especially for small I/O. + * + * To serve a read request, CLIO has to create and initialize a cl_io and + * then request DLM lock. This has turned out to have siginificant overhead + * and affects the performance of small I/O dramatically. + * + * It's not necessary to create a cl_io for each I/O. Under the help of read + * ahead, most of the pages being read are already in memory cache and we can + * read those pages directly because if the pages exist, the corresponding DLM + * lock must exist so that page content must be valid. + * + * In fast read implementation, the llite speculatively finds and reads pages + * in memory cache. There are three scenarios for fast read: + * - If the page exists and is uptodate, kernel VM will provide the data and + * CLIO won't be intervened; + * - If the page was brought into memory by read ahead, it will be exported + * and read ahead parameters will be updated; + * - Otherwise the page is not in memory, we can't do fast read. Therefore, + * it will go back and invoke normal read, i.e., a cl_io will be created + * and DLM lock will be requested. + * + * POSIX compliance: posix standard states that read is intended to be atomic. + * Lustre read implementation is in line with Linux kernel read implementation + * and neither of them complies with POSIX standard in this matter. Fast read + * doesn't make the situation worse on single node but it may interleave write + * results from multiple nodes due to short read handling in ll_file_aio_read(). + * + * \param env - lu_env + * \param iocb - kiocb from kernel + * \param iter - user space buffers where the data will be copied + * + * \retval - number of bytes have been read, or error code if error occurred. */ -static int ll_file_get_iov_count(const struct iovec *iov, - unsigned long *nr_segs, size_t *count) +static ssize_t +ll_do_fast_read(const struct lu_env *env, struct kiocb *iocb, + struct iov_iter *iter) { - size_t cnt = 0; - unsigned long seg; + ssize_t result; - for (seg = 0; seg < *nr_segs; seg++) { - const struct iovec *iv = &iov[seg]; + if (!ll_sbi_has_fast_read(ll_i2sbi(file_inode(iocb->ki_filp)))) + return 0; - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - cnt += iv->iov_len; - if (unlikely((ssize_t)(cnt|iv->iov_len) < 0)) - return -EINVAL; - if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) - continue; - if (seg == 0) - return -EFAULT; - *nr_segs = seg; - cnt -= iv->iov_len; /* This segment is no good */ - break; - } - *count = cnt; - return 0; + /* NB: we can't do direct IO for fast read because it will need a lock + * to make IO engine happy. */ + if (iocb->ki_filp->f_flags & O_DIRECT) + return 0; + + ll_cl_add(iocb->ki_filp, env, NULL, LCC_RW); + result = generic_file_read_iter(iocb, iter); + ll_cl_remove(iocb->ki_filp, env); + + /* If the first page is not in cache, generic_file_aio_read() will be + * returned with -ENODATA. + * See corresponding code in ll_readpage(). */ + if (result == -ENODATA) + result = 0; + + if (result > 0) + ll_stats_ops_tally(ll_i2sbi(file_inode(iocb->ki_filp)), + LPROC_LL_READ_BYTES, result); + + return result; } -static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +/* + * Read from a file (through the page cache). + */ +static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { - struct lu_env *env; + struct lu_env *env; struct vvp_io_args *args; - struct iovec *local_iov; - size_t count; - ssize_t result; - int refcheck; - ENTRY; + ssize_t result; + ssize_t rc2; + __u16 refcheck; - result = ll_file_get_iov_count(iov, &nr_segs, &count); - if (result) - RETURN(result); + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - RETURN(PTR_ERR(env)); + result = ll_do_fast_read(env, iocb, to); + if (result < 0 || iov_iter_count(to) == 0) + GOTO(out, result); - if (nr_segs == 1) { - local_iov = &ll_env_info(env)->lti_local_iov; - *local_iov = *iov; - } else { - OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs); - if (local_iov == NULL) { - cl_env_put(env, &refcheck); - RETURN(-ENOMEM); - } + args = ll_env_args(env, IO_NORMAL); + args->u.normal.via_iter = to; + args->u.normal.via_iocb = iocb; - memcpy(local_iov, iov, sizeof(*iov) * nr_segs); - } + rc2 = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ, + &iocb->ki_pos, iov_iter_count(to)); + if (rc2 > 0) + result += rc2; + else if (result == 0) + result = rc2; + +out: + cl_env_put(env, &refcheck); + return result; +} + +/* + * Write to a file (through the page cache). + */ +static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct vvp_io_args *args; + struct lu_env *env; + ssize_t result; + __u16 refcheck; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); args = ll_env_args(env, IO_NORMAL); - args->u.normal.via_iov = local_iov; - args->u.normal.via_nrsegs = nr_segs; + args->u.normal.via_iter = from; args->u.normal.via_iocb = iocb; - result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ, - &iocb->ki_pos, count); - + result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE, + &iocb->ki_pos, iov_iter_count(from)); cl_env_put(env, &refcheck); + return result; +} + +#ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER +/* + * XXX: exact copy from kernel code (__generic_file_aio_write_nolock) + */ +static int ll_file_get_iov_count(const struct iovec *iov, + unsigned long *nr_segs, size_t *count) +{ + size_t cnt = 0; + unsigned long seg; + + for (seg = 0; seg < *nr_segs; seg++) { + const struct iovec *iv = &iov[seg]; + + /* + * If any segment has a negative length, or the cumulative + * length ever wraps negative then return -EINVAL. + */ + cnt += iv->iov_len; + if (unlikely((ssize_t)(cnt|iv->iov_len) < 0)) + return -EINVAL; + if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) + continue; + if (seg == 0) + return -EFAULT; + *nr_segs = seg; + cnt -= iv->iov_len; /* This segment is no good */ + break; + } + *count = cnt; + return 0; +} + +static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct iov_iter to; + size_t iov_count; + ssize_t result; + ENTRY; - if (nr_segs > 1) - OBD_FREE(local_iov, sizeof(*iov) * nr_segs); + result = ll_file_get_iov_count(iov, &nr_segs, &iov_count); + if (result) + RETURN(result); + +# ifdef HAVE_IOV_ITER_INIT_DIRECTION + iov_iter_init(&to, READ, iov, nr_segs, iov_count); +# else /* !HAVE_IOV_ITER_INIT_DIRECTION */ + iov_iter_init(&to, iov, nr_segs, iov_count, 0); +# endif /* HAVE_IOV_ITER_INIT_DIRECTION */ + + result = ll_file_read_iter(iocb, &to); RETURN(result); } @@ -1271,30 +1414,27 @@ static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov, static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct lu_env *env; struct iovec iov = { .iov_base = buf, .iov_len = count }; struct kiocb *kiocb; ssize_t result; - int refcheck; ENTRY; - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - RETURN(PTR_ERR(env)); + OBD_ALLOC_PTR(kiocb); + if (kiocb == NULL) + RETURN(-ENOMEM); - kiocb = &ll_env_info(env)->lti_kiocb; init_sync_kiocb(kiocb, file); kiocb->ki_pos = *ppos; #ifdef HAVE_KIOCB_KI_LEFT - kiocb->ki_left = count; -#else - kiocb->ki_nbytes = count; + kiocb->ki_left = count; +#elif defined(HAVE_KI_NBYTES) + kiocb->ki_nbytes = count; #endif result = ll_file_aio_read(kiocb, &iov, 1, kiocb->ki_pos); *ppos = kiocb->ki_pos; - cl_env_put(env, &refcheck); + OBD_FREE_PTR(kiocb); RETURN(result); } @@ -1305,46 +1445,22 @@ static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count, static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct lu_env *env; - struct vvp_io_args *args; - struct iovec *local_iov; - size_t count; - ssize_t result; - int refcheck; - ENTRY; - - result = ll_file_get_iov_count(iov, &nr_segs, &count); - if (result) - RETURN(result); - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - RETURN(PTR_ERR(env)); - - if (nr_segs == 1) { - local_iov = &ll_env_info(env)->lti_local_iov; - *local_iov = *iov; - } else { - OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs); - if (local_iov == NULL) { - cl_env_put(env, &refcheck); - RETURN(-ENOMEM); - } + struct iov_iter from; + size_t iov_count; + ssize_t result; + ENTRY; - memcpy(local_iov, iov, sizeof(*iov) * nr_segs); - } + result = ll_file_get_iov_count(iov, &nr_segs, &iov_count); + if (result) + RETURN(result); - args = ll_env_args(env, IO_NORMAL); - args->u.normal.via_iov = local_iov; - args->u.normal.via_nrsegs = nr_segs; - args->u.normal.via_iocb = iocb; +# ifdef HAVE_IOV_ITER_INIT_DIRECTION + iov_iter_init(&from, WRITE, iov, nr_segs, iov_count); +# else /* !HAVE_IOV_ITER_INIT_DIRECTION */ + iov_iter_init(&from, iov, nr_segs, iov_count, 0); +# endif /* HAVE_IOV_ITER_INIT_DIRECTION */ - result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE, - &iocb->ki_pos, count); - cl_env_put(env, &refcheck); - - if (nr_segs > 1) - OBD_FREE(local_iov, sizeof(*iov) * nr_segs); + result = ll_file_write_iter(iocb, &from); RETURN(result); } @@ -1357,7 +1473,7 @@ static ssize_t ll_file_write(struct file *file, const char __user *buf, .iov_len = count }; struct kiocb *kiocb; ssize_t result; - int refcheck; + __u16 refcheck; ENTRY; env = cl_env_get(&refcheck); @@ -1368,9 +1484,9 @@ static ssize_t ll_file_write(struct file *file, const char __user *buf, init_sync_kiocb(kiocb, file); kiocb->ki_pos = *ppos; #ifdef HAVE_KIOCB_KI_LEFT - kiocb->ki_left = count; -#else - kiocb->ki_nbytes = count; + kiocb->ki_left = count; +#elif defined(HAVE_KI_NBYTES) + kiocb->ki_nbytes = count; #endif result = ll_file_aio_write(kiocb, &iov, 1, kiocb->ki_pos); @@ -1379,6 +1495,7 @@ static ssize_t ll_file_write(struct file *file, const char __user *buf, cl_env_put(env, &refcheck); RETURN(result); } +#endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */ /* * Send file content (through pagecache) somewhere with helper @@ -1390,7 +1507,7 @@ static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos, struct lu_env *env; struct vvp_io_args *args; ssize_t result; - int refcheck; + __u16 refcheck; ENTRY; env = cl_env_get(&refcheck); @@ -1422,7 +1539,7 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file, if (rc < 0) GOTO(out_unlock, rc); - ll_release_openhandle(file->f_path.dentry, &oit); + ll_release_openhandle(file_dentry(file), &oit); out_unlock: ll_inode_size_unlock(inode); @@ -1534,13 +1651,12 @@ static int ll_lov_setea(struct inode *inode, struct file *file, if (lump == NULL) RETURN(-ENOMEM); - if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) { - OBD_FREE_LARGE(lump, lum_size); - RETURN(-EFAULT); - } + if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) + GOTO(out_lump, rc = -EFAULT); rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size); +out_lump: OBD_FREE_LARGE(lump, lum_size); RETURN(rc); } @@ -1549,7 +1665,7 @@ static int ll_file_getstripe(struct inode *inode, struct lov_user_md __user *lum) { struct lu_env *env; - int refcheck; + __u16 refcheck; int rc; ENTRY; @@ -1706,12 +1822,11 @@ int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it) ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och); - rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, - och, inode, 0, NULL); + rc = ll_close_inode_openhandle(inode, och, 0, NULL); out: /* this one is in place of ll_file_open */ if (it_disposition(it, DISP_ENQ_OPEN_REF)) { - ptlrpc_req_finished(it->d.lustre.it_data); + ptlrpc_req_finished(it->it_request); it_clear_disposition(it, DISP_ENQ_OPEN_REF); } RETURN(rc); @@ -1727,7 +1842,7 @@ static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap, size_t num_bytes) { struct lu_env *env; - int refcheck; + __u16 refcheck; int rc = 0; struct ll_fiemap_info_key fmkey = { .lfik_name = KEY_FIEMAP, }; ENTRY; @@ -1803,6 +1918,10 @@ int ll_fid2path(struct inode *inode, void __user *arg) if (copy_from_user(gfout, arg, sizeof(*gfout))) GOTO(gf_free, rc = -EFAULT); + /* append root FID after gfout to let MDT know the root FID so that it + * can lookup the correct path, this is mainly for fileset. + * old server without fileset mount support will ignore this. */ + *gfout->gf_u.gf_root_fid = *ll_inode2fid(inode); /* Call mdc_iocontrol */ rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL); @@ -1833,7 +1952,7 @@ int ll_data_version(struct inode *inode, __u64 *data_version, int flags) struct cl_object *obj = ll_i2info(inode)->lli_clob; struct lu_env *env; struct cl_io *io; - int refcheck; + __u16 refcheck; int result; ENTRY; @@ -1876,11 +1995,11 @@ restart: */ int ll_hsm_release(struct inode *inode) { - struct cl_env_nest nest; struct lu_env *env; struct obd_client_handle *och = NULL; __u64 data_version = 0; int rc; + __u16 refcheck; ENTRY; CDEBUG(D_INODE, "%s: Releasing file "DFID".\n", @@ -1896,18 +2015,18 @@ int ll_hsm_release(struct inode *inode) if (rc != 0) GOTO(out, rc); - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); if (IS_ERR(env)) GOTO(out, rc = PTR_ERR(env)); ll_merge_attr(env, inode); - cl_env_nested_put(&nest, env); + cl_env_put(env, &refcheck); /* Release the file. * NB: lease lock handle is released in mdc_hsm_release_pack() because * we still need it to pack l_remote_handle to MDT. */ - rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode, - MDS_HSM_RELEASE, &data_version); + rc = ll_close_inode_openhandle(inode, och, MDS_HSM_RELEASE, + &data_version); och = NULL; EXIT; @@ -1941,8 +2060,8 @@ static int ll_swap_layouts(struct file *file1, struct file *file2, if (llss == NULL) RETURN(-ENOMEM); - llss->inode1 = file1->f_path.dentry->d_inode; - llss->inode2 = file2->f_path.dentry->d_inode; + llss->inode1 = file_inode(file1); + llss->inode2 = file_inode(file2); rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2); if (rc < 0) @@ -2033,7 +2152,7 @@ free: RETURN(rc); } -static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss) +int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss) { struct md_op_data *op_data; int rc; @@ -2109,13 +2228,13 @@ static int ll_hsm_import(struct inode *inode, struct file *file, ATTR_MTIME | ATTR_MTIME_SET | ATTR_ATIME | ATTR_ATIME_SET; - mutex_lock(&inode->i_mutex); + inode_lock(inode); - rc = ll_setattr_raw(file->f_path.dentry, attr, true); + rc = ll_setattr_raw(file_dentry(file), attr, true); if (rc == -ENODATA) rc = 0; - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); out: if (hss != NULL) @@ -2133,10 +2252,95 @@ static inline long ll_lease_type_from_fmode(fmode_t fmode) ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0); } +static int ll_file_futimes_3(struct file *file, const struct ll_futimes_3 *lfu) +{ + struct inode *inode = file_inode(file); + struct iattr ia = { + .ia_valid = ATTR_ATIME | ATTR_ATIME_SET | + ATTR_MTIME | ATTR_MTIME_SET | + ATTR_CTIME | ATTR_CTIME_SET, + .ia_atime = { + .tv_sec = lfu->lfu_atime_sec, + .tv_nsec = lfu->lfu_atime_nsec, + }, + .ia_mtime = { + .tv_sec = lfu->lfu_mtime_sec, + .tv_nsec = lfu->lfu_mtime_nsec, + }, + .ia_ctime = { + .tv_sec = lfu->lfu_ctime_sec, + .tv_nsec = lfu->lfu_ctime_nsec, + }, + }; + int rc; + ENTRY; + + if (!capable(CAP_SYS_ADMIN)) + RETURN(-EPERM); + + if (!S_ISREG(inode->i_mode)) + RETURN(-EINVAL); + + inode_lock(inode); + rc = ll_setattr_raw(file_dentry(file), &ia, false); + inode_unlock(inode); + + RETURN(rc); +} + +/* + * Give file access advices + * + * The ladvise interface is similar to Linux fadvise() system call, except it + * forwards the advices directly from Lustre client to server. The server side + * codes will apply appropriate read-ahead and caching techniques for the + * corresponding files. + * + * A typical workload for ladvise is e.g. a bunch of different clients are + * doing small random reads of a file, so prefetching pages into OSS cache + * with big linear reads before the random IO is a net benefit. Fetching + * all that data into each client cache with fadvise() may not be, due to + * much more data being sent to the client. + */ +static int ll_ladvise(struct inode *inode, struct file *file, __u64 flags, + struct llapi_lu_ladvise *ladvise) +{ + struct lu_env *env; + struct cl_io *io; + struct cl_ladvise_io *lio; + int rc; + __u16 refcheck; + ENTRY; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + RETURN(PTR_ERR(env)); + + io = vvp_env_thread_io(env); + io->ci_obj = ll_i2info(inode)->lli_clob; + + /* initialize parameters for ladvise */ + lio = &io->u.ci_ladvise; + lio->li_start = ladvise->lla_start; + lio->li_end = ladvise->lla_end; + lio->li_fid = ll_inode2fid(inode); + lio->li_advice = ladvise->lla_advice; + lio->li_flags = flags; + + if (cl_io_init(env, io, CIT_LADVISE, io->ci_obj) == 0) + rc = cl_io_loop(env, io); + else + rc = io->ci_result; + + cl_io_fini(env, io); + cl_env_put(env, &refcheck); + RETURN(rc); +} + static long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); int flags, rc; ENTRY; @@ -2215,7 +2419,7 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) mutex_unlock(&lli->lli_och_mutex); if (och == NULL) GOTO(out, rc = -ENOLCK); - inode2 = file2->f_path.dentry->d_inode; + inode2 = file_inode(file2); rc = ll_swap_layouts_close(och, inode, inode2); } else { rc = ll_swap_layouts(file, file2, &lsl); @@ -2395,6 +2599,10 @@ out: if (rc < 0) RETURN(rc); + rc = ll_lease_och_release(inode, file); + if (rc < 0) + RETURN(rc); + if (lease_broken) fmode = 0; @@ -2464,7 +2672,66 @@ out: OBD_FREE_PTR(hui); RETURN(rc); } + case LL_IOC_FUTIMES_3: { + struct ll_futimes_3 lfu; + + if (copy_from_user(&lfu, + (const struct ll_futimes_3 __user *)arg, + sizeof(lfu))) + RETURN(-EFAULT); + + RETURN(ll_file_futimes_3(file, &lfu)); + } + case LL_IOC_LADVISE: { + struct llapi_ladvise_hdr *ladvise_hdr; + int i; + int num_advise; + int alloc_size = sizeof(*ladvise_hdr); + + rc = 0; + OBD_ALLOC_PTR(ladvise_hdr); + if (ladvise_hdr == NULL) + RETURN(-ENOMEM); + if (copy_from_user(ladvise_hdr, + (const struct llapi_ladvise_hdr __user *)arg, + alloc_size)) + GOTO(out_ladvise, rc = -EFAULT); + + if (ladvise_hdr->lah_magic != LADVISE_MAGIC || + ladvise_hdr->lah_count < 1) + GOTO(out_ladvise, rc = -EINVAL); + + num_advise = ladvise_hdr->lah_count; + if (num_advise >= LAH_COUNT_MAX) + GOTO(out_ladvise, rc = -EFBIG); + + OBD_FREE_PTR(ladvise_hdr); + alloc_size = offsetof(typeof(*ladvise_hdr), + lah_advise[num_advise]); + OBD_ALLOC(ladvise_hdr, alloc_size); + if (ladvise_hdr == NULL) + RETURN(-ENOMEM); + + /* + * TODO: submit multiple advices to one server in a single RPC + */ + if (copy_from_user(ladvise_hdr, + (const struct llapi_ladvise_hdr __user *)arg, + alloc_size)) + GOTO(out_ladvise, rc = -EFAULT); + + for (i = 0; i < num_advise; i++) { + rc = ll_ladvise(inode, file, ladvise_hdr->lah_flags, + &ladvise_hdr->lah_advise[i]); + if (rc) + break; + } + +out_ladvise: + OBD_FREE(ladvise_hdr, alloc_size); + RETURN(rc); + } default: { int err; @@ -2498,7 +2765,7 @@ static loff_t generic_file_llseek_size(struct file *file, loff_t offset, int origin, loff_t maxsize, loff_t eof) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); switch (origin) { case SEEK_END: @@ -2518,9 +2785,9 @@ generic_file_llseek_size(struct file *file, loff_t offset, int origin, * SEEK_CURs. Note that parallel writes and reads behave * like SEEK_SET. */ - mutex_lock(&inode->i_mutex); + inode_lock(inode); offset = llseek_execute(file, file->f_pos + offset, maxsize); - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); return offset; case SEEK_DATA: /* @@ -2547,7 +2814,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int origin, static loff_t ll_file_seek(struct file *file, loff_t offset, int origin) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); loff_t retval, eof = 0; ENTRY; @@ -2572,7 +2839,7 @@ static loff_t ll_file_seek(struct file *file, loff_t offset, int origin) static int ll_flush(struct file *file, fl_owner_t id) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct ll_inode_info *lli = ll_i2info(inode); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); int rc, err; @@ -2605,18 +2872,18 @@ static int ll_flush(struct file *file, fl_owner_t id) int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, enum cl_fsync_mode mode, int ignore_layout) { - struct cl_env_nest nest; struct lu_env *env; struct cl_io *io; struct cl_fsync_io *fio; int result; + __u16 refcheck; ENTRY; if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL && mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL) RETURN(-EINVAL); - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); if (IS_ERR(env)) RETURN(PTR_ERR(env)); @@ -2639,25 +2906,25 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, if (result == 0) result = fio->fi_nr_written; cl_io_fini(env, io); - cl_env_nested_put(&nest, env); + cl_env_put(env, &refcheck); RETURN(result); } /* - * When dentry is provided (the 'else' case), *file->f_path.dentry may be + * When dentry is provided (the 'else' case), file_dentry() may be * null and dentry must be used directly rather than pulled from - * *file->f_path.dentry as is done otherwise. + * file_dentry() as is done otherwise. */ #ifdef HAVE_FILE_FSYNC_4ARGS int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct dentry *dentry = file->f_path.dentry; + struct dentry *dentry = file_dentry(file); #elif defined(HAVE_FILE_FSYNC_2ARGS) int ll_fsync(struct file *file, int datasync) { - struct dentry *dentry = file->f_path.dentry; + struct dentry *dentry = file_dentry(file); loff_t start = 0; loff_t end = LLONG_MAX; #else @@ -2669,7 +2936,6 @@ int ll_fsync(struct file *file, struct dentry *dentry, int datasync) struct inode *inode = dentry->d_inode; struct ll_inode_info *lli = ll_i2info(inode); struct ptlrpc_request *req; - struct obd_capa *oc; int rc, err; ENTRY; @@ -2679,7 +2945,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int datasync) #ifdef HAVE_FILE_FSYNC_4ARGS rc = filemap_write_and_wait_range(inode->i_mapping, start, end); - mutex_lock(&inode->i_mutex); + inode_lock(inode); #else /* fsync's caller has already called _fdata{sync,write}, we want * that IO to finish before calling the osc and mdc sync methods */ @@ -2698,10 +2964,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int datasync) rc = err; } - oc = ll_mdscapa_get(inode); - err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc, - &req); - capa_put(oc); + err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req); if (!rc) rc = err; if (!err) @@ -2720,7 +2983,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int datasync) } #ifdef HAVE_FILE_FSYNC_4ARGS - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); #endif RETURN(rc); } @@ -2728,7 +2991,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int datasync) static int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct ll_sb_info *sbi = ll_i2sbi(inode); struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK, @@ -2736,8 +2999,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) .ei_cbdata = file_lock, }; struct md_op_data *op_data; - struct lustre_handle lockh = {0}; - ldlm_policy_data_t flock = {{0}}; + struct lustre_handle lockh = { 0 }; + union ldlm_policy_data flock = { { 0 } }; int fl_type = file_lock->fl_type; __u64 flags = 0; int rc; @@ -2830,8 +3093,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) if (IS_ERR(op_data)) RETURN(PTR_ERR(op_data)); - CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, " - "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)), + CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags=%#llx, mode=%u, " + "start=%llu, end=%llu\n", PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end); @@ -2842,6 +3105,11 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) if (!(flags & LDLM_FL_TEST_LOCK)) file_lock->fl_type = fl_type; +#ifdef HAVE_LOCKS_LOCK_FILE_WAIT + if ((rc == 0 || file_lock->fl_type == F_UNLCK) && + !(flags & LDLM_FL_TEST_LOCK)) + rc2 = locks_lock_file_wait(file, file_lock); +#else if ((file_lock->fl_flags & FL_FLOCK) && (rc == 0 || file_lock->fl_type == F_UNLCK)) rc2 = flock_lock_file_wait(file, file_lock); @@ -2849,6 +3117,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) (rc == 0 || file_lock->fl_type == F_UNLCK) && !(flags & LDLM_FL_TEST_LOCK)) rc2 = posix_lock_file_wait(file, file_lock); +#endif /* HAVE_LOCKS_LOCK_FILE_WAIT */ if (rc2 && file_lock->fl_type != F_UNLCK) { einfo.ei_mode = LCK_NL; @@ -2863,7 +3132,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) } int ll_get_fid_by_name(struct inode *parent, const char *name, - int namelen, struct lu_fid *fid) + int namelen, struct lu_fid *fid, + struct inode **inode) { struct md_op_data *op_data = NULL; struct mdt_body *body; @@ -2876,7 +3146,7 @@ int ll_get_fid_by_name(struct inode *parent, const char *name, if (IS_ERR(op_data)) RETURN(PTR_ERR(op_data)); - op_data->op_valid = OBD_MD_FLID; + op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE; rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc < 0) @@ -2887,6 +3157,9 @@ int ll_get_fid_by_name(struct inode *parent, const char *name, GOTO(out_req, rc = -EFAULT); if (fid != NULL) *fid = body->mbo_fid1; + + if (inode != NULL) + rc = ll_prep_inode(inode, req, parent->i_sb, NULL); out_req: ptlrpc_req_finished(req); RETURN(rc); @@ -2899,8 +3172,11 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx, struct inode *child_inode = NULL; struct md_op_data *op_data; struct ptlrpc_request *request = NULL; + struct obd_client_handle *och = NULL; struct qstr qstr; + struct mdt_body *body; int rc; + __u64 data_version = 0; ENTRY; CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n", @@ -2915,39 +3191,68 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx, qstr.hash = full_name_hash(name, namelen); qstr.name = name; qstr.len = namelen; - dchild = d_lookup(file->f_path.dentry, &qstr); + dchild = d_lookup(file_dentry(file), &qstr); if (dchild != NULL) { - if (dchild->d_inode != NULL) { + if (dchild->d_inode != NULL) child_inode = igrab(dchild->d_inode); - if (child_inode != NULL) { - mutex_lock(&child_inode->i_mutex); - op_data->op_fid3 = *ll_inode2fid(child_inode); - ll_invalidate_aliases(child_inode); - } - } dput(dchild); - } else { + } + + if (child_inode == NULL) { rc = ll_get_fid_by_name(parent, name, namelen, - &op_data->op_fid3); + &op_data->op_fid3, &child_inode); if (rc != 0) GOTO(out_free, rc); } + if (child_inode == NULL) + GOTO(out_free, rc = -EINVAL); + + /* + * lfs migrate command needs to be blocked on the client + * by checking the migrate FID against the FID of the + * filesystem root. + */ + if (child_inode == parent->i_sb->s_root->d_inode) + GOTO(out_iput, rc = -EINVAL); + + inode_lock(child_inode); + op_data->op_fid3 = *ll_inode2fid(child_inode); if (!fid_is_sane(&op_data->op_fid3)) { - CERROR("%s: migrate %s , but fid "DFID" is insane\n", + CERROR("%s: migrate %s, but FID "DFID" is insane\n", ll_get_fsname(parent->i_sb, NULL, 0), name, PFID(&op_data->op_fid3)); - GOTO(out_free, rc = -EINVAL); + GOTO(out_unlock, rc = -EINVAL); } rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3); if (rc < 0) - GOTO(out_free, rc); + GOTO(out_unlock, rc); if (rc == mdtidx) { - CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name, + CDEBUG(D_INFO, "%s: "DFID" is already on MDT%04x\n", name, PFID(&op_data->op_fid3), mdtidx); - GOTO(out_free, rc = 0); + GOTO(out_unlock, rc = 0); + } +again: + if (S_ISREG(child_inode->i_mode)) { + och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0); + if (IS_ERR(och)) { + rc = PTR_ERR(och); + och = NULL; + GOTO(out_unlock, rc); + } + + rc = ll_data_version(child_inode, &data_version, + LL_DV_WR_FLUSH); + if (rc != 0) + GOTO(out_close, rc); + + op_data->op_handle = och->och_fh; + op_data->op_data = och->och_mod; + op_data->op_data_version = data_version; + op_data->op_lease_handle = och->och_lease_handle; + op_data->op_bias |= MDS_RENAME_MIGRATE; } op_data->op_mds = mdtidx; @@ -2957,17 +3262,42 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx, if (rc == 0) ll_update_times(request, parent); - ptlrpc_req_finished(request); - if (rc != 0) - GOTO(out_free, rc); + if (request != NULL) { + body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY); + if (body == NULL) { + ptlrpc_req_finished(request); + GOTO(out_close, rc = -EPROTO); + } -out_free: - if (child_inode != NULL) { - clear_nlink(child_inode); - mutex_unlock(&child_inode->i_mutex); - iput(child_inode); + /* If the server does release layout lock, then we cleanup + * the client och here, otherwise release it in out_close: */ + if (och != NULL && + body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) { + obd_mod_put(och->och_mod); + md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp, + och); + och->och_fh.cookie = DEAD_HANDLE_MAGIC; + OBD_FREE_PTR(och); + och = NULL; + } + ptlrpc_req_finished(request); } + /* Try again if the file layout has changed. */ + if (rc == -EAGAIN && S_ISREG(child_inode->i_mode)) { + request = NULL; + goto again; + } +out_close: + if (och != NULL) /* close the file */ + ll_lease_close(och, child_inode, NULL); + if (rc == 0) + clear_nlink(child_inode); +out_unlock: + inode_unlock(child_inode); +out_iput: + iput(child_inode); +out_free: ll_finish_md_op_data(op_data); RETURN(rc); } @@ -2990,16 +3320,16 @@ ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock) * \param l_req_mode [IN] searched lock mode * \retval boolean, true iff all bits are found */ -int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode) +int ll_have_md_lock(struct inode *inode, __u64 *bits, enum ldlm_mode l_req_mode) { - struct lustre_handle lockh; - ldlm_policy_data_t policy; - ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ? - (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode; - struct lu_fid *fid; + struct lustre_handle lockh; + union ldlm_policy_data policy; + enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ? + (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode; + struct lu_fid *fid; __u64 flags; - int i; - ENTRY; + int i; + ENTRY; if (!inode) RETURN(0); @@ -3031,17 +3361,17 @@ int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode) RETURN(*bits == 0); } -ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits, - struct lustre_handle *lockh, __u64 flags, - ldlm_mode_t mode) +enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits, + struct lustre_handle *lockh, __u64 flags, + enum ldlm_mode mode) { - ldlm_policy_data_t policy = { .l_inodebits = {bits}}; - struct lu_fid *fid; - ldlm_mode_t rc; - ENTRY; + union ldlm_policy_data policy = { .l_inodebits = { bits } }; + struct lu_fid *fid; + enum ldlm_mode rc; + ENTRY; - fid = &ll_i2info(inode)->lli_fid; - CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid)); + fid = &ll_i2info(inode)->lli_fid; + CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid)); rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags, fid, LDLM_IBITS, &policy, mode, lockh); @@ -3054,6 +3384,13 @@ static int ll_inode_revalidate_fini(struct inode *inode, int rc) /* Already unlinked. Just update nlink and return success */ if (rc == -ENOENT) { clear_nlink(inode); + /* If it is striped directory, and there is bad stripe + * Let's revalidate the dentry again, instead of returning + * error */ + if (S_ISDIR(inode->i_mode) && + ll_i2info(inode)->lli_lsm_md != NULL) + return 0; + /* This path cannot be hit for regular files unless in * case of obscure races, so no need to to validate * size. */ @@ -3119,8 +3456,11 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits) do_lookup() -> ll_revalidate_it(). We cannot use d_drop here to preserve get_cwd functionality on 2.6. Bug 10503 */ - if (!dentry->d_inode->i_nlink) + if (!dentry->d_inode->i_nlink) { + ll_lock_dcache(inode); d_lustre_invalidate(dentry, 0); + ll_unlock_dcache(inode); + } ll_lookup_finish_locks(&oit, dentry); } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) { @@ -3143,9 +3483,6 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits) RETURN(PTR_ERR(op_data)); op_data->op_valid = valid; - /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one - * capa for this inode. Because we only keep capas of dirs - * fresh. */ rc = md_getattr(sbi->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc) { @@ -3213,7 +3550,7 @@ ll_inode_revalidate(struct dentry *dentry, __u64 ibits) * restore the MDT holds the layout lock so the glimpse will * block up to the end of restore (getattr will block) */ - if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING)) + if (!ll_file_test_flag(ll_i2info(inode), LLIF_FILE_RESTORING)) rc = ll_glimpse_size(inode); } RETURN(rc); @@ -3233,6 +3570,8 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) if (res) return res; + OBD_FAIL_TIMEOUT(OBD_FAIL_GETATTR_DELAY, 30); + stat->dev = inode->i_sb->s_dev; if (ll_need_32bit_api(sbi)) stat->ino = cl_fid_build_ino(&lli->lli_fid, 1); @@ -3407,12 +3746,7 @@ int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd) } ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1); - - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) - rc = lustre_check_remote_perm(inode, mask); - else - rc = ll_generic_permission(inode, mask, flags, ll_check_acl); - + rc = ll_generic_permission(inode, mask, flags, ll_check_acl); /* restore current process's credentials and FS capability */ if (squash_id) { revert_creds(old_cred); @@ -3424,53 +3758,80 @@ int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd) /* -o localflock - only provides locally consistent flock locks */ struct file_operations ll_file_operations = { - .read = ll_file_read, - .aio_read = ll_file_aio_read, - .write = ll_file_write, - .aio_write = ll_file_aio_write, - .unlocked_ioctl = ll_file_ioctl, - .open = ll_file_open, - .release = ll_file_release, - .mmap = ll_file_mmap, - .llseek = ll_file_seek, - .splice_read = ll_file_splice_read, - .fsync = ll_fsync, - .flush = ll_flush +#ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER +# ifdef HAVE_SYNC_READ_WRITE + .read = new_sync_read, + .write = new_sync_write, +# endif + .read_iter = ll_file_read_iter, + .write_iter = ll_file_write_iter, +#else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */ + .read = ll_file_read, + .aio_read = ll_file_aio_read, + .write = ll_file_write, + .aio_write = ll_file_aio_write, +#endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */ + .unlocked_ioctl = ll_file_ioctl, + .open = ll_file_open, + .release = ll_file_release, + .mmap = ll_file_mmap, + .llseek = ll_file_seek, + .splice_read = ll_file_splice_read, + .fsync = ll_fsync, + .flush = ll_flush }; struct file_operations ll_file_operations_flock = { - .read = ll_file_read, - .aio_read = ll_file_aio_read, - .write = ll_file_write, - .aio_write = ll_file_aio_write, - .unlocked_ioctl = ll_file_ioctl, - .open = ll_file_open, - .release = ll_file_release, - .mmap = ll_file_mmap, - .llseek = ll_file_seek, - .splice_read = ll_file_splice_read, - .fsync = ll_fsync, - .flush = ll_flush, - .flock = ll_file_flock, - .lock = ll_file_flock +#ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER +# ifdef HAVE_SYNC_READ_WRITE + .read = new_sync_read, + .write = new_sync_write, +# endif /* HAVE_SYNC_READ_WRITE */ + .read_iter = ll_file_read_iter, + .write_iter = ll_file_write_iter, +#else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */ + .read = ll_file_read, + .aio_read = ll_file_aio_read, + .write = ll_file_write, + .aio_write = ll_file_aio_write, +#endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */ + .unlocked_ioctl = ll_file_ioctl, + .open = ll_file_open, + .release = ll_file_release, + .mmap = ll_file_mmap, + .llseek = ll_file_seek, + .splice_read = ll_file_splice_read, + .fsync = ll_fsync, + .flush = ll_flush, + .flock = ll_file_flock, + .lock = ll_file_flock }; /* These are for -o noflock - to return ENOSYS on flock calls */ struct file_operations ll_file_operations_noflock = { - .read = ll_file_read, - .aio_read = ll_file_aio_read, - .write = ll_file_write, - .aio_write = ll_file_aio_write, - .unlocked_ioctl = ll_file_ioctl, - .open = ll_file_open, - .release = ll_file_release, - .mmap = ll_file_mmap, - .llseek = ll_file_seek, - .splice_read = ll_file_splice_read, - .fsync = ll_fsync, - .flush = ll_flush, - .flock = ll_file_noflock, - .lock = ll_file_noflock +#ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER +# ifdef HAVE_SYNC_READ_WRITE + .read = new_sync_read, + .write = new_sync_write, +# endif /* HAVE_SYNC_READ_WRITE */ + .read_iter = ll_file_read_iter, + .write_iter = ll_file_write_iter, +#else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */ + .read = ll_file_read, + .aio_read = ll_file_aio_read, + .write = ll_file_write, + .aio_write = ll_file_aio_write, +#endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */ + .unlocked_ioctl = ll_file_ioctl, + .open = ll_file_open, + .release = ll_file_release, + .mmap = ll_file_mmap, + .llseek = ll_file_seek, + .splice_read = ll_file_splice_read, + .fsync = ll_fsync, + .flush = ll_flush, + .flock = ll_file_noflock, + .lock = ll_file_noflock }; struct inode_operations ll_file_inode_operations = { @@ -3592,15 +3953,15 @@ int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf) { struct ll_inode_info *lli = ll_i2info(inode); struct cl_object *obj = lli->lli_clob; - struct cl_env_nest nest; struct lu_env *env; int rc; + __u16 refcheck; ENTRY; if (obj == NULL) RETURN(0); - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); if (IS_ERR(env)) RETURN(PTR_ERR(env)); @@ -3635,7 +3996,7 @@ int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf) } out: - cl_env_nested_put(&nest, env); + cl_env_put(env, &refcheck); RETURN(rc); } @@ -3645,7 +4006,6 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock) { struct ll_sb_info *sbi = ll_i2sbi(inode); - struct obd_capa *oc; struct ptlrpc_request *req; struct mdt_body *body; void *lvbdata; @@ -3666,13 +4026,11 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock) * blocked and then granted via completion ast, we have to fetch * layout here. Please note that we can't use the LVB buffer in * completion AST because it doesn't have a large enough buffer */ - oc = ll_mdscapa_get(inode); rc = ll_get_default_mdsize(sbi, &lmmsize); if (rc == 0) - rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, + rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0, lmmsize, 0, &req); - capa_put(oc); if (rc < 0) RETURN(rc); @@ -3702,7 +4060,7 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock) } unlock_res_and_lock(lock); - if (lvbdata != NULL) + if (lvbdata) OBD_FREE_LARGE(lvbdata, lmmsize); EXIT; @@ -3716,7 +4074,7 @@ out: * Apply the layout to the inode. Layout lock is held and will be released * in this function. */ -static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode, +static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode, struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); @@ -3738,7 +4096,7 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode, PFID(&lli->lli_fid), inode); /* in case this is a caching lock and reinstate with new inode */ - md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL); + md_set_lock_data(sbi->ll_md_exp, lockh, inode, NULL); lock_res_and_lock(lock); lvb_ready = ldlm_is_lvb_ready(lock); @@ -3800,9 +4158,9 @@ static int ll_layout_refresh_locked(struct inode *inode) struct ll_inode_info *lli = ll_i2info(inode); struct ll_sb_info *sbi = ll_i2sbi(inode); struct md_op_data *op_data; - struct lookup_intent it; - struct lustre_handle lockh; - ldlm_mode_t mode; + struct lookup_intent it; + struct lustre_handle lockh; + enum ldlm_mode mode; struct ldlm_enqueue_info einfo = { .ei_type = LDLM_IBITS, .ei_mode = LCK_CR, @@ -3840,14 +4198,14 @@ again: PFID(&lli->lli_fid), inode); rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0); - if (it.d.lustre.it_data != NULL) - ptlrpc_req_finished(it.d.lustre.it_data); - it.d.lustre.it_data = NULL; + if (it.it_request != NULL) + ptlrpc_req_finished(it.it_request); + it.it_request = NULL; ll_finish_md_op_data(op_data); - mode = it.d.lustre.it_lock_mode; - it.d.lustre.it_lock_mode = 0; + mode = it.it_lock_mode; + it.it_lock_mode = 0; ll_intent_drop_lock(&it); if (rc == 0) {