Whamcloud - gitweb
LU-2675 obd: decruft md_enqueue() and md_intent_lock()
[fs/lustre-release.git] / lustre / llite / file.c
index cc3ecaa..2450897 100644 (file)
 #include <lustre_lite.h>
 #include <linux/pagemap.h>
 #include <linux/file.h>
+#include <linux/sched.h>
 #include "llite_internal.h"
 #include <lustre/ll_fiemap.h>
+#include <lustre_ioctl.h>
 
 #include "cl_object.h"
 
@@ -415,7 +417,7 @@ static int ll_intent_file_open(struct file *file, void *lmm,
         const char *name = file->f_dentry->d_name.name;
         const int len = file->f_dentry->d_name.len;
         struct md_op_data *op_data;
-        struct ptlrpc_request *req;
+       struct ptlrpc_request *req = NULL;
         __u32 opc = LUSTRE_OPC_ANY;
         int rc;
         ENTRY;
@@ -444,9 +446,12 @@ static int ll_intent_file_open(struct file *file, void *lmm,
         if (IS_ERR(op_data))
                 RETURN(PTR_ERR(op_data));
 
+       op_data->op_data = lmm;
+       op_data->op_data_size = lmmsize;
+
        itp->it_flags |= MDS_OPEN_BY_FID;
-        rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
-                            0 /*unused */, &req, ll_md_blocking_ast, 0);
+       rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
+                           &ll_md_blocking_ast, 0);
         ll_finish_md_op_data(op_data);
         if (rc == -ESTALE) {
                 /* reason for keep own exit path - don`t flood log
@@ -774,7 +779,7 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
        struct lookup_intent it = { .it_op = IT_OPEN };
        struct ll_sb_info *sbi = ll_i2sbi(inode);
        struct md_op_data *op_data;
-       struct ptlrpc_request *req;
+       struct ptlrpc_request *req = NULL;
        struct lustre_handle old_handle = { 0 };
        struct obd_client_handle *och = NULL;
        int rc;
@@ -840,15 +845,15 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
 
        it.it_flags = fmode | open_flags;
        it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
-       rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
-                               ll_md_blocking_lease_ast,
+       rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
+                           &ll_md_blocking_lease_ast,
        /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
         * it can be cancelled which may mislead applications that the lease is
         * broken;
         * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
         * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
         * doesn't deal with openhandle, so normal openhandle will be leaked. */
-                               LDLM_FL_NO_LRU | LDLM_FL_EXCL);
+                           LDLM_FL_NO_LRU | LDLM_FL_EXCL);
        ll_finish_md_op_data(op_data);
        ptlrpc_req_finished(req);
        if (rc < 0)
@@ -872,7 +877,7 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
        if (it.d.lustre.it_lock_mode == 0 ||
            it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
                /* open lock must return for lease */
-               CERROR(DFID "lease granted but no open lock, %d/%Lu.\n",
+               CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
                        PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
                        it.d.lustre.it_lock_bits);
                GOTO(out_close, rc = -EPROTO);
@@ -1822,12 +1827,12 @@ out:
  * Make the FIEMAP get_info call and returns the result.
  */
 static int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
-                       int num_bytes)
+                       size_t num_bytes)
 {
        struct obd_export *exp = ll_i2dtexp(inode);
        struct lov_stripe_md *lsm = NULL;
         struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
-        int vallen = num_bytes;
+       __u32 vallen = num_bytes;
         int rc;
         ENTRY;
 
@@ -2746,7 +2751,7 @@ static int ll_flush(struct file *file, fl_owner_t id)
 
 /**
  * Called to make sure a portion of file has been written out.
- * if @local_only is not true, it will send OST_SYNC RPCs to ost.
+ * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
  *
  * Return how many pages have been written.
  */
@@ -2812,9 +2817,13 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 int ll_fsync(struct file *file, int datasync)
 {
        struct dentry *dentry = file->f_dentry;
+       loff_t start = 0;
+       loff_t end = LLONG_MAX;
 #else
 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
+       loff_t start = 0;
+       loff_t end = LLONG_MAX;
 #endif
        struct inode *inode = dentry->d_inode;
        struct ll_inode_info *lli = ll_i2info(inode);
@@ -2860,8 +2869,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
        if (S_ISREG(inode->i_mode)) {
                struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
 
-               err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
-                               CL_FSYNC_ALL, 0);
+               err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
                if (rc == 0 && err < 0)
                        rc = err;
                if (rc < 0)
@@ -2889,6 +2897,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
        struct md_op_data *op_data;
        struct lustre_handle lockh = {0};
        ldlm_policy_data_t flock = {{0}};
+       int fl_type = file_lock->fl_type;
        __u64 flags = 0;
        int rc;
        int rc2 = 0;
@@ -2924,7 +2933,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
        if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
                flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
 
-        switch (file_lock->fl_type) {
+       switch (fl_type) {
         case F_RDLCK:
                 einfo.ei_mode = LCK_PR;
                 break;
@@ -2943,8 +2952,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
                 einfo.ei_mode = LCK_PW;
                 break;
         default:
-                CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
-                        file_lock->fl_type);
+               CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
                 RETURN (-ENOTSUPP);
         }
 
@@ -2966,15 +2974,16 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
         case F_GETLK64:
 #endif
                 flags = LDLM_FL_TEST_LOCK;
-                /* Save the old mode so that if the mode in the lock changes we
-                 * can decrement the appropriate reader or writer refcount. */
-                file_lock->fl_type = einfo.ei_mode;
                 break;
         default:
                 CERROR("unknown fcntl lock command: %d\n", cmd);
                 RETURN (-EINVAL);
         }
 
+       /* Save the old mode so that if the mode in the lock changes we
+        * can decrement the appropriate reader or writer refcount. */
+       file_lock->fl_type = einfo.ei_mode;
+
         op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
                                      LUSTRE_OPC_ANY, NULL);
         if (IS_ERR(op_data))
@@ -2985,8 +2994,12 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
               flock.l_flock.pid, flags, einfo.ei_mode,
               flock.l_flock.start, flock.l_flock.end);
 
-        rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
-                        op_data, &lockh, &flock, 0, NULL /* req */, flags);
+       rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
+                       flags);
+
+       /* Restore the file lock type if not TEST lock. */
+       if (!(flags & LDLM_FL_TEST_LOCK))
+               file_lock->fl_type = fl_type;
 
         if ((file_lock->fl_flags & FL_FLOCK) &&
             (rc == 0 || file_lock->fl_type == F_UNLCK))
@@ -2998,8 +3011,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
 
        if (rc2 && file_lock->fl_type != F_UNLCK) {
                einfo.ei_mode = LCK_NL;
-               md_enqueue(sbi->ll_md_exp, &einfo, NULL,
-                       op_data, &lockh, &flock, 0, NULL /* req */, flags);
+               md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
+                          &lockh, flags);
                rc = rc2;
        }
 
@@ -3244,11 +3257,8 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
                         RETURN(PTR_ERR(op_data));
 
                 oit.it_create_mode |= M_CHECK_STALE;
-                rc = md_intent_lock(exp, op_data, NULL, 0,
-                                    /* we are not interested in name
-                                       based lookup */
-                                    &oit, 0, &req,
-                                    ll_md_blocking_ast, 0);
+               rc = md_intent_lock(exp, op_data, &oit, &req,
+                                   &ll_md_blocking_ast, 0);
                 ll_finish_md_op_data(op_data);
                 oit.it_create_mode &= ~M_CHECK_STALE;
                 if (rc < 0) {
@@ -3425,18 +3435,21 @@ static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
         fiemap->fm_extent_count = fieinfo->fi_extents_max;
         fiemap->fm_start = start;
         fiemap->fm_length = len;
-        memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
-               sizeof(struct ll_fiemap_extent));
+       if (extent_count > 0)
+               memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
+                      sizeof(struct ll_fiemap_extent));
 
-        rc = ll_do_fiemap(inode, fiemap, num_bytes);
+       rc = ll_do_fiemap(inode, fiemap, num_bytes);
 
-        fieinfo->fi_flags = fiemap->fm_flags;
-        fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
-        memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
-               fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
+       fieinfo->fi_flags = fiemap->fm_flags;
+       fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
+       if (extent_count > 0)
+               memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
+                      fiemap->fm_mapped_extents *
+                      sizeof(struct ll_fiemap_extent));
 
-        OBD_FREE_LARGE(fiemap, num_bytes);
-        return rc;
+       OBD_FREE_LARGE(fiemap, num_bytes);
+       return rc;
 }
 
 struct posix_acl *ll_get_acl(struct inode *inode, int type)
@@ -3495,8 +3508,14 @@ int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
 # endif
 #endif
 {
-        int rc = 0;
-        ENTRY;
+       int rc = 0;
+       struct ll_sb_info *sbi;
+       struct root_squash_info *squash;
+       struct cred *cred = NULL;
+       const struct cred *old_cred = NULL;
+       cfs_cap_t cap;
+       bool squash_id = false;
+       ENTRY;
 
 #ifdef MAY_NOT_BLOCK
        if (mask & MAY_NOT_BLOCK)
@@ -3519,11 +3538,46 @@ int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
        CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
               PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
 
-       if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
-               return lustre_check_remote_perm(inode, mask);
+       /* squash fsuid/fsgid if needed */
+       sbi = ll_i2sbi(inode);
+       squash = &sbi->ll_squash;
+       if (unlikely(squash->rsi_uid != 0 &&
+                    current_fsuid() == 0 &&
+                    !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
+                       squash_id = true;
+       }
+       if (squash_id) {
+               CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
+                      current_fsuid(), current_fsgid(),
+                      squash->rsi_uid, squash->rsi_gid);
+
+               /* update current process's credentials
+                * and FS capability */
+               cred = prepare_creds();
+               if (cred == NULL)
+                       RETURN(-ENOMEM);
+
+               cred->fsuid = squash->rsi_uid;
+               cred->fsgid = squash->rsi_gid;
+               for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
+                       if ((1 << cap) & CFS_CAP_FS_MASK)
+                               cap_lower(cred->cap_effective, cap);
+               }
+               old_cred = override_creds(cred);
+       }
+
+       ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
 
-       ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
-       rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
+       if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
+               rc = lustre_check_remote_perm(inode, mask);
+       else
+               rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
+
+       /* restore current process's credentials and FS capability */
+       if (squash_id) {
+               revert_creds(old_cred);
+               put_cred(cred);
+       }
 
        RETURN(rc);
 }
@@ -3596,15 +3650,15 @@ struct inode_operations ll_file_inode_operations = {
 /* dynamic ioctl number support routins */
 static struct llioc_ctl_data {
        struct rw_semaphore     ioc_sem;
-        cfs_list_t              ioc_head;
+       struct list_head        ioc_head;
 } llioc = {
-        __RWSEM_INITIALIZER(llioc.ioc_sem),
-        CFS_LIST_HEAD_INIT(llioc.ioc_head)
+       __RWSEM_INITIALIZER(llioc.ioc_sem),
+       LIST_HEAD_INIT(llioc.ioc_head)
 };
 
 
 struct llioc_data {
-        cfs_list_t              iocd_list;
+       struct list_head        iocd_list;
         unsigned int            iocd_size;
         llioc_callback_t        iocd_cb;
         unsigned int            iocd_count;
@@ -3633,7 +3687,7 @@ void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
         memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
 
        down_write(&llioc.ioc_sem);
-        cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
+       list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
        up_write(&llioc.ioc_sem);
 
         RETURN(in_data);
@@ -3647,11 +3701,11 @@ void ll_iocontrol_unregister(void *magic)
                 return;
 
        down_write(&llioc.ioc_sem);
-        cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
+       list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
                 if (tmp == magic) {
                         unsigned int size = tmp->iocd_size;
 
-                        cfs_list_del(&tmp->iocd_list);
+                       list_del(&tmp->iocd_list);
                        up_write(&llioc.ioc_sem);
 
                         OBD_FREE(tmp, size);
@@ -3675,7 +3729,7 @@ ll_iocontrol_call(struct inode *inode, struct file *file,
         int rc = -EINVAL, i;
 
        down_read(&llioc.ioc_sem);
-        cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
+       list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
                 for (i = 0; i < data->iocd_count; i++) {
                         if (cmd != data->iocd_cmd[i])
                                 continue;
@@ -3718,11 +3772,24 @@ int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
                LASSERT(lock != NULL);
                LASSERT(ldlm_has_layout(lock));
                if (result == 0) {
+                       struct lustre_md *md = conf->u.coc_md;
+                       __u32 gen = LL_LAYOUT_GEN_EMPTY;
+
                        /* it can only be allowed to match after layout is
                         * applied to inode otherwise false layout would be
                         * seen. Applying layout shoud happen before dropping
                         * the intent lock. */
                        ldlm_lock_allow_match(lock);
+
+                       lli->lli_has_smd = lsm_has_objects(md->lsm);
+                       if (md->lsm != NULL)
+                               gen = md->lsm->lsm_layout_gen;
+
+                       CDEBUG(D_VFSTRACE,
+                              DFID ": layout version change: %u -> %u\n",
+                              PFID(&lli->lli_fid), ll_layout_version_get(lli),
+                              gen);
+                       ll_layout_version_set(lli, gen);
                }
        }
        RETURN(result);
@@ -3931,8 +3998,8 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen)
        struct ldlm_enqueue_info einfo = {
                .ei_type = LDLM_IBITS,
                .ei_mode = LCK_CR,
-               .ei_cb_bl = ll_md_blocking_ast,
-               .ei_cb_cp = ldlm_completion_ast,
+               .ei_cb_bl = &ll_md_blocking_ast,
+               .ei_cb_cp = &ldlm_completion_ast,
        };
        int rc;
        ENTRY;
@@ -3978,8 +4045,7 @@ again:
                          ll_get_fsname(inode->i_sb, NULL, 0),
                          PFID(&lli->lli_fid), inode);
 
-       rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
-                       NULL, 0, NULL, 0);
+       rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
        if (it.d.lustre.it_data != NULL)
                ptlrpc_req_finished(it.d.lustre.it_data);
        it.d.lustre.it_data = NULL;