Whamcloud - gitweb
LU-11595 mdt: fix read-on-open for big PAGE_SIZE
[fs/lustre-release.git] / lustre / llite / file.c
index 16fb348..f866fba 100644 (file)
@@ -99,14 +99,15 @@ static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
        op_data->op_attr.ia_mtime = inode->i_mtime;
        op_data->op_attr.ia_ctime = inode->i_ctime;
        op_data->op_attr.ia_size = i_size_read(inode);
-       op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
-                                    ATTR_MTIME | ATTR_MTIME_SET |
-                                    ATTR_CTIME | ATTR_CTIME_SET;
+       op_data->op_attr.ia_valid |= (ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
+                                     ATTR_MTIME | ATTR_MTIME_SET |
+                                     ATTR_CTIME);
+       op_data->op_xvalid |= OP_XVALID_CTIME_SET;
        op_data->op_attr_blocks = inode->i_blocks;
        op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
        if (ll_file_test_flag(ll_i2info(inode), LLIF_PROJECT_INHERIT))
                op_data->op_attr_flags |= LUSTRE_PROJINHERIT_FL;
-       op_data->op_handle = och->och_fh;
+       op_data->op_open_handle = och->och_open_handle;
 
        if (och->och_flags & FMODE_WRITE &&
            ll_file_test_and_clear_flag(ll_i2info(inode), LLIF_DATA_MODIFIED))
@@ -154,7 +155,8 @@ static int ll_close_inode_openhandle(struct inode *inode,
        case MDS_CLOSE_LAYOUT_MERGE:
                /* merge blocks from the victim inode */
                op_data->op_attr_blocks += ((struct inode *)data)->i_blocks;
-               op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
+               op_data->op_attr.ia_valid |= ATTR_SIZE;
+               op_data->op_xvalid |= OP_XVALID_BLOCKS;
        case MDS_CLOSE_LAYOUT_SPLIT:
        case MDS_CLOSE_LAYOUT_SWAP: {
                struct split_param *sp = data;
@@ -178,7 +180,8 @@ static int ll_close_inode_openhandle(struct inode *inode,
                LASSERT(data != NULL);
                op_data->op_attr_blocks +=
                        ioc->lil_count * op_data->op_attr_blocks;
-               op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
+               op_data->op_attr.ia_valid |= ATTR_SIZE;
+               op_data->op_xvalid |= OP_XVALID_BLOCKS;
                op_data->op_bias |= MDS_CLOSE_RESYNC_DONE;
 
                op_data->op_lease_handle = och->och_lease_handle;
@@ -193,7 +196,8 @@ static int ll_close_inode_openhandle(struct inode *inode,
                op_data->op_bias |= MDS_HSM_RELEASE;
                op_data->op_data_version = *(__u64 *)data;
                op_data->op_lease_handle = och->och_lease_handle;
-               op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
+               op_data->op_attr.ia_valid |= ATTR_SIZE;
+               op_data->op_xvalid |= OP_XVALID_BLOCKS;
                break;
 
        default:
@@ -202,9 +206,9 @@ static int ll_close_inode_openhandle(struct inode *inode,
        }
 
        if (!(op_data->op_attr.ia_valid & ATTR_SIZE))
-               op_data->op_attr.ia_valid |= MDS_ATTR_LSIZE;
-       if (!(op_data->op_attr.ia_valid & ATTR_BLOCKS))
-               op_data->op_attr.ia_valid |= MDS_ATTR_LBLOCKS;
+               op_data->op_xvalid |= OP_XVALID_LAZYSIZE;
+       if (!(op_data->op_xvalid & OP_XVALID_BLOCKS))
+               op_data->op_xvalid |= OP_XVALID_LAZYBLOCKS;
 
        rc = md_close(md_exp, op_data, och->och_mod, &req);
        if (rc != 0 && rc != -EINTR)
@@ -224,7 +228,7 @@ static int ll_close_inode_openhandle(struct inode *inode,
 out:
 
        md_clear_open_replay_data(md_exp, och);
-       och->och_fh.cookie = DEAD_HANDLE_MAGIC;
+       och->och_open_handle.cookie = DEAD_HANDLE_MAGIC;
        OBD_FREE_PTR(och);
 
        ptlrpc_req_finished(req);       /* This is close request */
@@ -410,14 +414,10 @@ void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req,
        struct page *vmpage;
        struct niobuf_remote *rnb;
        char *data;
-       struct lu_env *env;
-       struct cl_io *io;
-       __u16 refcheck;
        struct lustre_handle lockh;
        struct ldlm_lock *lock;
        unsigned long index, start;
        struct niobuf_local lnb;
-       int rc;
        bool dom_lock = false;
 
        ENTRY;
@@ -432,33 +432,39 @@ void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req,
                        dom_lock = ldlm_has_dom(lock);
                LDLM_LOCK_PUT(lock);
        }
-
        if (!dom_lock)
                RETURN_EXIT;
 
-       env = cl_env_get(&refcheck);
-       if (IS_ERR(env))
-               RETURN_EXIT;
-
        if (!req_capsule_has_field(&req->rq_pill, &RMF_NIOBUF_INLINE,
                                   RCL_SERVER))
-               GOTO(out_env, rc = -ENODATA);
+               RETURN_EXIT;
 
        rnb = req_capsule_server_get(&req->rq_pill, &RMF_NIOBUF_INLINE);
-       data = (char *)rnb + sizeof(*rnb);
-
        if (rnb == NULL || rnb->rnb_len == 0)
-               GOTO(out_env, rc = 0);
+               RETURN_EXIT;
 
-       CDEBUG(D_INFO, "Get data buffer along with open, len %i, i_size %llu\n",
-              rnb->rnb_len, i_size_read(inode));
+       /* LU-11595: Server may return whole file and that is OK always or
+        * it may return just file tail and its offset must be aligned with
+        * client PAGE_SIZE to be used on that client, if server's PAGE_SIZE is
+        * smaller then offset may be not aligned and that data is just ignored.
+        */
+       if (rnb->rnb_offset % PAGE_SIZE)
+               RETURN_EXIT;
 
-       io = vvp_env_thread_io(env);
-       io->ci_obj = obj;
-       io->ci_ignore_layout = 1;
-       rc = cl_io_init(env, io, CIT_MISC, obj);
-       if (rc)
-               GOTO(out_io, rc);
+       /* Server returns whole file or just file tail if it fills in
+        * reply buffer, in both cases total size should be inode size.
+        */
+       if (rnb->rnb_offset + rnb->rnb_len < i_size_read(inode)) {
+               CERROR("%s: server returns off/len %llu/%u < i_size %llu\n",
+                      ll_get_fsname(inode->i_sb, NULL, 0), rnb->rnb_offset,
+                      rnb->rnb_len, i_size_read(inode));
+               RETURN_EXIT;
+       }
+
+       CDEBUG(D_INFO, "Get data along with open at %llu len %i, i_size %llu\n",
+              rnb->rnb_offset, rnb->rnb_len, i_size_read(inode));
+
+       data = (char *)rnb + sizeof(*rnb);
 
        lnb.lnb_file_offset = rnb->rnb_offset;
        start = lnb.lnb_file_offset / PAGE_SIZE;
@@ -466,8 +472,6 @@ void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req,
        LASSERT(lnb.lnb_file_offset % PAGE_SIZE == 0);
        lnb.lnb_page_offset = 0;
        do {
-               struct cl_page *clp;
-
                lnb.lnb_data = data + (index << PAGE_SHIFT);
                lnb.lnb_len = rnb->rnb_len - (index << PAGE_SHIFT);
                if (lnb.lnb_len > PAGE_SIZE)
@@ -483,34 +487,10 @@ void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req,
                              PTR_ERR(vmpage));
                        break;
                }
-               lock_page(vmpage);
-               if (vmpage->mapping == NULL) {
-                       unlock_page(vmpage);
-                       put_page(vmpage);
-                       /* page was truncated */
-                       GOTO(out_io, rc = -ENODATA);
-               }
-               clp = cl_page_find(env, obj, vmpage->index, vmpage,
-                                  CPT_CACHEABLE);
-               if (IS_ERR(clp)) {
-                       unlock_page(vmpage);
-                       put_page(vmpage);
-                       GOTO(out_io, rc = PTR_ERR(clp));
-               }
-
-               /* export page */
-               cl_page_export(env, clp, 1);
-               cl_page_put(env, clp);
-               unlock_page(vmpage);
                put_page(vmpage);
                index++;
        } while (rnb->rnb_len > (index << PAGE_SHIFT));
-       rc = 0;
        EXIT;
-out_io:
-       cl_io_fini(env, io);
-out_env:
-       cl_env_put(env, &refcheck);
 }
 
 static int ll_intent_file_open(struct dentry *de, void *lmm, int lmmsize,
@@ -596,7 +576,7 @@ static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
        struct mdt_body *body;
 
        body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY);
-       och->och_fh = body->mbo_handle;
+       och->och_open_handle = body->mbo_open_handle;
        och->och_fid = body->mbo_fid1;
        och->och_lease_handle.cookie = it->it_lock_handle;
        och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
@@ -686,12 +666,13 @@ int ll_file_open(struct inode *inode, struct file *file)
                 if (file->f_flags & O_TRUNC)
                         oit.it_flags |= FMODE_WRITE;
 
-                /* kernel only call f_op->open in dentry_open.  filp_open calls
-                 * dentry_open after call to open_namei that checks permissions.
-                 * Only nfsd_open call dentry_open directly without checking
-                 * permissions and because of that this code below is safe. */
-                if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
-                        oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
+               /* kernel only call f_op->open in dentry_open.  filp_open calls
+                * dentry_open after call to open_namei that checks permissions.
+                * Only nfsd_open call dentry_open directly without checking
+                * permissions and because of that this code below is safe.
+                */
+               if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
+                       oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
 
                 /* We do not want O_EXCL here, presumably we opened the file
                  * already? XXX - NFS implications? */
@@ -873,7 +854,7 @@ static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
  * if it has an open lock in cache already.
  */
 static int ll_lease_och_acquire(struct inode *inode, struct file *file,
-                               struct lustre_handle *old_handle)
+                               struct lustre_handle *old_open_handle)
 {
        struct ll_inode_info *lli = ll_i2info(inode);
        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
@@ -906,7 +887,7 @@ static int ll_lease_och_acquire(struct inode *inode, struct file *file,
                *och_p = NULL;
        }
 
-       *old_handle = fd->fd_och->och_fh;
+       *old_open_handle = fd->fd_och->och_open_handle;
 
        EXIT;
 out_unlock:
@@ -967,7 +948,7 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
        struct ll_sb_info *sbi = ll_i2sbi(inode);
        struct md_op_data *op_data;
        struct ptlrpc_request *req = NULL;
-       struct lustre_handle old_handle = { 0 };
+       struct lustre_handle old_open_handle = { 0 };
        struct obd_client_handle *och = NULL;
        int rc;
        int rc2;
@@ -980,7 +961,7 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
                if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
                        RETURN(ERR_PTR(-EPERM));
 
-               rc = ll_lease_och_acquire(inode, file, &old_handle);
+               rc = ll_lease_och_acquire(inode, file, &old_open_handle);
                if (rc)
                        RETURN(ERR_PTR(rc));
        }
@@ -995,7 +976,7 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
                GOTO(out, rc = PTR_ERR(op_data));
 
        /* To tell the MDT this openhandle is from the same owner */
-       op_data->op_handle = old_handle;
+       op_data->op_open_handle = old_open_handle;
 
        it.it_flags = fmode | open_flags;
        it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
@@ -1174,10 +1155,11 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
  * After lease is taken, send the RPC MDS_REINT_RESYNC to the MDT
  */
 static int ll_lease_file_resync(struct obd_client_handle *och,
-                               struct inode *inode)
+                               struct inode *inode, unsigned long arg)
 {
        struct ll_sb_info *sbi = ll_i2sbi(inode);
        struct md_op_data *op_data;
+       struct ll_ioc_lease_id ioc;
        __u64 data_version_unused;
        int rc;
        ENTRY;
@@ -1187,6 +1169,10 @@ static int ll_lease_file_resync(struct obd_client_handle *och,
        if (IS_ERR(op_data))
                RETURN(PTR_ERR(op_data));
 
+       if (copy_from_user(&ioc, (struct ll_ioc_lease_id __user *)arg,
+                          sizeof(ioc)))
+               RETURN(-EFAULT);
+
        /* before starting file resync, it's necessary to clean up page cache
         * in client memory, otherwise once the layout version is increased,
         * writing back cached data will be denied the OSTs. */
@@ -1194,7 +1180,8 @@ static int ll_lease_file_resync(struct obd_client_handle *och,
        if (rc)
                GOTO(out, rc);
 
-       op_data->op_handle = och->och_lease_handle;
+       op_data->op_lease_handle = och->och_lease_handle;
+       op_data->op_mirror_id = ioc.lil_mirror_id;
        rc = md_file_resync(sbi->ll_md_exp, op_data);
        if (rc)
                GOTO(out, rc);
@@ -2745,7 +2732,7 @@ static int ll_hsm_import(struct inode *inode, struct file *file,
 
        inode_lock(inode);
 
-       rc = ll_setattr_raw(file_dentry(file), attr, true);
+       rc = ll_setattr_raw(file_dentry(file), attr, 0, true);
        if (rc == -ENODATA)
                rc = 0;
 
@@ -2773,7 +2760,7 @@ static int ll_file_futimes_3(struct file *file, const struct ll_futimes_3 *lfu)
        struct iattr ia = {
                .ia_valid = ATTR_ATIME | ATTR_ATIME_SET |
                            ATTR_MTIME | ATTR_MTIME_SET |
-                           ATTR_CTIME | ATTR_CTIME_SET,
+                           ATTR_CTIME,
                .ia_atime = {
                        .tv_sec = lfu->lfu_atime_sec,
                        .tv_nsec = lfu->lfu_atime_nsec,
@@ -2797,7 +2784,8 @@ static int ll_file_futimes_3(struct file *file, const struct ll_futimes_3 *lfu)
                RETURN(-EINVAL);
 
        inode_lock(inode);
-       rc = ll_setattr_raw(file_dentry(file), &ia, false);
+       rc = ll_setattr_raw(file_dentry(file), &ia, OP_XVALID_CTIME_SET,
+                           false);
        inode_unlock(inode);
 
        RETURN(rc);
@@ -3065,6 +3053,30 @@ int ll_ioctl_fsgetxattr(struct inode *inode, unsigned int cmd,
        RETURN(0);
 }
 
+int ll_ioctl_check_project(struct inode *inode, struct fsxattr *fa)
+{
+       /*
+        * Project Quota ID state is only allowed to change from within the init
+        * namespace. Enforce that restriction only if we are trying to change
+        * the quota ID state. Everything else is allowed in user namespaces.
+        */
+       if (current_user_ns() == &init_user_ns)
+               return 0;
+
+       if (ll_i2info(inode)->lli_projid != fa->fsx_projid)
+               return -EINVAL;
+
+       if (ll_file_test_flag(ll_i2info(inode), LLIF_PROJECT_INHERIT)) {
+               if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT))
+                       return -EINVAL;
+       } else {
+               if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT)
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
 int ll_ioctl_fssetxattr(struct inode *inode, unsigned int cmd,
                        unsigned long arg)
 {
@@ -3077,26 +3089,26 @@ int ll_ioctl_fssetxattr(struct inode *inode, unsigned int cmd,
        struct iattr *attr;
        int flags;
 
-       /* only root could change project ID */
-       if (!cfs_capable(CFS_CAP_SYS_ADMIN))
-               RETURN(-EPERM);
+       if (copy_from_user(&fsxattr,
+                          (const struct fsxattr __user *)arg,
+                          sizeof(fsxattr)))
+               RETURN(-EFAULT);
+
+       rc = ll_ioctl_check_project(inode, &fsxattr);
+       if (rc)
+               RETURN(rc);
 
        op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
                                     LUSTRE_OPC_ANY, NULL);
        if (IS_ERR(op_data))
                RETURN(PTR_ERR(op_data));
 
-       if (copy_from_user(&fsxattr,
-                          (const struct fsxattr __user *)arg,
-                          sizeof(fsxattr)))
-               GOTO(out_fsxattr, rc = -EFAULT);
-
        flags = ll_xflags_to_inode_flags(fsxattr.fsx_xflags);
        op_data->op_attr_flags = ll_inode_to_ext_flags(flags);
        if (fsxattr.fsx_xflags & FS_XFLAG_PROJINHERIT)
                op_data->op_attr_flags |= LUSTRE_PROJINHERIT_FL;
        op_data->op_projid = fsxattr.fsx_projid;
-       op_data->op_attr.ia_valid |= (MDS_ATTR_PROJID | ATTR_ATTR_FLAG);
+       op_data->op_xvalid |= OP_XVALID_PROJID | OP_XVALID_FLAGS;
        rc = md_setattr(ll_i2sbi(inode)->ll_md_exp, op_data, NULL,
                        0, &req);
        ptlrpc_req_finished(req);
@@ -3111,8 +3123,8 @@ int ll_ioctl_fssetxattr(struct inode *inode, unsigned int cmd,
        if (attr == NULL)
                GOTO(out_fsxattr, rc = -ENOMEM);
 
-       attr->ia_valid = ATTR_ATTR_FLAG;
-       rc = cl_setattr_ost(obj, attr, fsxattr.fsx_xflags);
+       rc = cl_setattr_ost(obj, attr, OP_XVALID_FLAGS,
+                           fsxattr.fsx_xflags);
        OBD_FREE_PTR(attr);
 out_fsxattr:
        ll_finish_md_op_data(op_data);
@@ -3286,7 +3298,7 @@ static long ll_file_set_lease(struct file *file, struct ll_ioc_lease *ioc,
                RETURN(PTR_ERR(och));
 
        if (ioc->lil_flags & LL_LEASE_RESYNC) {
-               rc = ll_lease_file_resync(och, inode);
+               rc = ll_lease_file_resync(och, inode, arg);
                if (rc) {
                        ll_lease_close(och, inode, NULL);
                        RETURN(rc);
@@ -4224,7 +4236,7 @@ again:
                if (rc != 0)
                        GOTO(out_close, rc);
 
-               op_data->op_handle = och->och_fh;
+               op_data->op_open_handle = och->och_open_handle;
                op_data->op_data_version = data_version;
                op_data->op_lease_handle = och->och_lease_handle;
                op_data->op_bias |= MDS_CLOSE_MIGRATE;
@@ -4249,7 +4261,7 @@ again:
                        obd_mod_put(och->och_mod);
                        md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp,
                                                  och);
-                       och->och_fh.cookie = DEAD_HANDLE_MAGIC;
+                       och->och_open_handle.cookie = DEAD_HANDLE_MAGIC;
                        OBD_FREE_PTR(och);
                        och = NULL;
                }
@@ -4434,12 +4446,15 @@ out:
 
 static int ll_merge_md_attr(struct inode *inode)
 {
+       struct ll_inode_info *lli = ll_i2info(inode);
        struct cl_attr attr = { 0 };
        int rc;
 
-       LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
+       LASSERT(lli->lli_lsm_md != NULL);
+       down_read(&lli->lli_lsm_sem);
        rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
                           &attr, ll_md_blocking_ast);
+       up_read(&lli->lli_lsm_sem);
        if (rc != 0)
                RETURN(rc);
 
@@ -4917,7 +4932,6 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
 {
        struct ll_sb_info *sbi = ll_i2sbi(inode);
        struct ptlrpc_request *req;
-       struct mdt_body *body;
        void *lvbdata;
        void *lmm;
        int lmmsize;
@@ -4937,17 +4951,16 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
         * layout here. Please note that we can't use the LVB buffer in
         * completion AST because it doesn't have a large enough buffer */
        rc = ll_get_default_mdsize(sbi, &lmmsize);
-       if (rc == 0)
-               rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
-                               OBD_MD_FLXATTR, XATTR_NAME_LOV, lmmsize, &req);
        if (rc < 0)
                RETURN(rc);
 
-       body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-       if (body == NULL)
-               GOTO(out, rc = -EPROTO);
+       rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), OBD_MD_FLXATTR,
+                        XATTR_NAME_LOV, lmmsize, &req);
+       if (rc < 0)
+               RETURN(rc);
 
-       lmmsize = body->mbo_eadatasize;
+       lmmsize = rc;
+       rc = 0;
        if (lmmsize == 0) /* empty layout */
                GOTO(out, rc = 0);