Whamcloud - gitweb
LU-2900 llite: Null deref in ll_fsync:mkdir on NFSmounted Lus
[fs/lustre-release.git] / lustre / llite / file.c
index 8d04d4e..14e38cf 100644 (file)
@@ -705,8 +705,7 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
 
         oinfo.oi_md = lsm;
         oinfo.oi_oa = obdo;
-        oinfo.oi_oa->o_id = lsm->lsm_object_id;
-        oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
+       oinfo.oi_oa->o_oi = lsm->lsm_oi;
         oinfo.oi_oa->o_mode = S_IFREG;
         oinfo.oi_oa->o_ioepoch = ioepoch;
         oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
@@ -756,10 +755,11 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
                            capa, obdo, ioepoch, sync);
        capa_put(capa);
        if (rc == 0) {
+               struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
+
                obdo_refresh_inode(inode, obdo, obdo->o_valid);
-               CDEBUG(D_INODE,
-                      "objid "LPX64" size %llu, blocks %llu, blksize %lu\n",
-                      lsm ? lsm->lsm_object_id : 0, i_size_read(inode),
+               CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
+                      " blksize %lu\n", POSTID(oi), i_size_read(inode),
                       (unsigned long long)inode->i_blocks,
                       (unsigned long)ll_inode_blksize(inode));
        }
@@ -767,38 +767,47 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
        RETURN(rc);
 }
 
-int ll_merge_lvb(struct inode *inode)
+int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
 {
        struct ll_inode_info *lli = ll_i2info(inode);
-       struct ll_sb_info *sbi = ll_i2sbi(inode);
-       struct lov_stripe_md *lsm;
+       struct cl_object *obj = lli->lli_clob;
+       struct cl_attr *attr = ccc_env_thread_attr(env);
        struct ost_lvb lvb;
        int rc = 0;
 
        ENTRY;
 
-       lsm = ccc_inode_lsm_get(inode);
        ll_inode_size_lock(inode);
+       /* merge timestamps the most recently obtained from mds with
+          timestamps obtained from osts */
+       LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
+       LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
+       LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
        inode_init_lvb(inode, &lvb);
 
-       /* merge timestamps the most resently obtained from mds with
-          timestamps obtained from osts */
-       lvb.lvb_atime = lli->lli_lvb.lvb_atime;
-       lvb.lvb_mtime = lli->lli_lvb.lvb_mtime;
-       lvb.lvb_ctime = lli->lli_lvb.lvb_ctime;
-       if (lsm != NULL) {
-               rc = obd_merge_lvb(sbi->ll_dt_exp, lsm, &lvb, 0);
-               cl_isize_write_nolock(inode, lvb.lvb_size);
+       cl_object_attr_lock(obj);
+       rc = cl_object_attr_get(env, obj, attr);
+       cl_object_attr_unlock(obj);
+
+       if (rc == 0) {
+               if (lvb.lvb_atime < attr->cat_atime)
+                       lvb.lvb_atime = attr->cat_atime;
+               if (lvb.lvb_ctime < attr->cat_ctime)
+                       lvb.lvb_ctime = attr->cat_ctime;
+               if (lvb.lvb_mtime < attr->cat_mtime)
+                       lvb.lvb_mtime = attr->cat_mtime;
 
                CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
-                               PFID(&lli->lli_fid), lvb.lvb_size);
-               inode->i_blocks = lvb.lvb_blocks;
+                               PFID(&lli->lli_fid), attr->cat_size);
+               cl_isize_write_nolock(inode, attr->cat_size);
+
+               inode->i_blocks = attr->cat_blocks;
+
+               LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
+               LTIME_S(inode->i_atime) = lvb.lvb_atime;
+               LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
        }
-       LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
-       LTIME_S(inode->i_atime) = lvb.lvb_atime;
-       LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
        ll_inode_size_unlock(inode);
-       ccc_inode_lsm_put(inode, lsm);
 
        RETURN(rc);
 }
@@ -827,7 +836,9 @@ void ll_io_init(struct cl_io *io, const struct file *file, int write)
         io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
        if (write) {
                io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
-               io->u.ci_wr.wr_sync = file->f_flags & O_SYNC || IS_SYNC(inode);
+               io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
+                                     file->f_flags & O_DIRECT ||
+                                     IS_SYNC(inode);
        }
         io->ci_obj     = ll_i2info(inode)->lli_clob;
         io->ci_lockreq = CILR_MAYBE;
@@ -850,6 +861,7 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
         ssize_t               result;
         ENTRY;
 
+restart:
         io = ccc_env_thread_io(env);
         ll_io_init(io, file, iot == CIT_WRITE);
 
@@ -908,6 +920,16 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
         GOTO(out, result);
 out:
         cl_io_fini(env, io);
+       /* If any bit been read/written (result != 0), we just return
+        * short read/write instead of restart io. */
+       if (result == 0 && io->ci_need_restart) {
+               CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
+                      iot == CIT_READ ? "read" : "write",
+                      file->f_dentry->d_name.name, *ppos, count);
+               LASSERTF(io->u.ci_rw.crw_count == count, "%zd != %zd\n",
+                        io->u.ci_rw.crw_count, count);
+               goto restart;
+       }
 
         if (iot == CIT_READ) {
                 if (result >= 0)
@@ -918,7 +940,7 @@ out:
                         ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
                                            LPROC_LL_WRITE_BYTES, result);
                        fd->fd_write_failed = false;
-               } else {
+               } else if (result != -ERESTARTSYS) {
                        fd->fd_write_failed = true;
                }
        }
@@ -1669,8 +1691,7 @@ int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
            !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
                GOTO(out, rc = -EOPNOTSUPP);
 
-        fm_key.oa.o_id = lsm->lsm_object_id;
-        fm_key.oa.o_seq = lsm->lsm_object_seq;
+       fm_key.oa.o_oi = lsm->lsm_oi;
         fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
 
         obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
@@ -1726,6 +1747,7 @@ int ll_fid2path(struct inode *inode, void *arg)
        rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
        if (rc)
                GOTO(gf_free, rc);
+
        if (copy_to_user(arg, gfout, outsize))
                rc = -EFAULT;
 
@@ -1832,13 +1854,163 @@ int ll_data_version(struct inode *inode, __u64 *data_version,
        RETURN(rc);
 }
 
-long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+struct ll_swap_stack {
+       struct iattr             ia1, ia2;
+       __u64                    dv1, dv2;
+       struct inode            *inode1, *inode2;
+       bool                     check_dv1, check_dv2;
+};
+
+static int ll_swap_layouts(struct file *file1, struct file *file2,
+                          struct lustre_swap_layouts *lsl)
 {
-        struct inode *inode = file->f_dentry->d_inode;
-        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-        int flags;
+       struct mdc_swap_layouts  msl;
+       struct md_op_data       *op_data;
+       __u32                    gid;
+       __u64                    dv;
+       struct ll_swap_stack    *llss = NULL;
+       int                      rc, rc1;
 
-        ENTRY;
+       OBD_ALLOC_PTR(llss);
+       if (llss == NULL)
+               RETURN(-ENOMEM);
+
+       llss->inode1 = file1->f_dentry->d_inode;
+       llss->inode2 = file2->f_dentry->d_inode;
+
+       if (!S_ISREG(llss->inode2->i_mode))
+               GOTO(free, rc = -EINVAL);
+
+       if (ll_permission(llss->inode1, MAY_WRITE, NULL) ||
+           ll_permission(llss->inode2, MAY_WRITE, NULL))
+               GOTO(free, rc = -EPERM);
+
+       if (llss->inode2->i_sb != llss->inode1->i_sb)
+               GOTO(free, rc = -EXDEV);
+
+       /* we use 2 bool because it is easier to swap than 2 bits */
+       if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
+               llss->check_dv1 = true;
+
+       if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
+               llss->check_dv2 = true;
+
+       /* we cannot use lsl->sl_dvX directly because we may swap them */
+       llss->dv1 = lsl->sl_dv1;
+       llss->dv2 = lsl->sl_dv2;
+
+       rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
+       if (rc == 0) /* same file, done! */
+               GOTO(free, rc = 0);
+
+       if (rc < 0) { /* sequentialize it */
+               swap(llss->inode1, llss->inode2);
+               swap(file1, file2);
+               swap(llss->dv1, llss->dv2);
+               swap(llss->check_dv1, llss->check_dv2);
+       }
+
+       gid = lsl->sl_gid;
+       if (gid != 0) { /* application asks to flush dirty cache */
+               rc = ll_get_grouplock(llss->inode1, file1, gid);
+               if (rc < 0)
+                       GOTO(free, rc);
+
+               rc = ll_get_grouplock(llss->inode2, file2, gid);
+               if (rc < 0) {
+                       ll_put_grouplock(llss->inode1, file1, gid);
+                       GOTO(free, rc);
+               }
+       }
+
+       /* to be able to restore mtime and atime after swap
+        * we need to first save them */
+       if (lsl->sl_flags &
+           (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
+               llss->ia1.ia_mtime = llss->inode1->i_mtime;
+               llss->ia1.ia_atime = llss->inode1->i_atime;
+               llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
+               llss->ia2.ia_mtime = llss->inode2->i_mtime;
+               llss->ia2.ia_atime = llss->inode2->i_atime;
+               llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
+       }
+
+       /* ultimate check, before swaping the layouts we check if
+        * dataversion has changed (if requested) */
+       if (llss->check_dv1) {
+               rc = ll_data_version(llss->inode1, &dv, 0);
+               if (rc)
+                       GOTO(putgl, rc);
+               if (dv != llss->dv1)
+                       GOTO(putgl, rc = -EAGAIN);
+       }
+
+       if (llss->check_dv2) {
+               rc = ll_data_version(llss->inode2, &dv, 0);
+               if (rc)
+                       GOTO(putgl, rc);
+               if (dv != llss->dv2)
+                       GOTO(putgl, rc = -EAGAIN);
+       }
+
+       /* struct md_op_data is used to send the swap args to the mdt
+        * only flags is missing, so we use struct mdc_swap_layouts
+        * through the md_op_data->op_data */
+       /* flags from user space have to be converted before they are send to
+        * server, no flag is sent today, they are only used on the client */
+       msl.msl_flags = 0;
+       rc = -ENOMEM;
+       op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
+                                    0, LUSTRE_OPC_ANY, &msl);
+       if (op_data != NULL) {
+               rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS,
+                                  ll_i2mdexp(llss->inode1),
+                                  sizeof(*op_data), op_data, NULL);
+               ll_finish_md_op_data(op_data);
+       }
+
+putgl:
+       if (gid != 0) {
+               ll_put_grouplock(llss->inode2, file2, gid);
+               ll_put_grouplock(llss->inode1, file1, gid);
+       }
+
+       /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
+       if (rc != 0)
+               GOTO(free, rc);
+
+       /* clear useless flags */
+       if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
+               llss->ia1.ia_valid &= ~ATTR_MTIME;
+               llss->ia2.ia_valid &= ~ATTR_MTIME;
+       }
+
+       if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
+               llss->ia1.ia_valid &= ~ATTR_ATIME;
+               llss->ia2.ia_valid &= ~ATTR_ATIME;
+       }
+
+       /* update time if requested */
+       rc = rc1 = 0;
+       if (llss->ia2.ia_valid != 0)
+               rc = ll_setattr(file1->f_dentry, &llss->ia2);
+
+       if (llss->ia1.ia_valid != 0)
+               rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
+
+free:
+       if (llss != NULL)
+               OBD_FREE_PTR(llss);
+
+       RETURN(rc);
+}
+
+long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       struct inode            *inode = file->f_dentry->d_inode;
+       struct ll_file_data     *fd = LUSTRE_FPRIVATE(file);
+       int                      flags, rc;
+       ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
                inode->i_generation, inode, cmd);
@@ -1878,6 +2050,27 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                 RETURN(ll_lov_setstripe(inode, file, arg));
         case LL_IOC_LOV_SETEA:
                 RETURN(ll_lov_setea(inode, file, arg));
+       case LL_IOC_LOV_SWAP_LAYOUTS: {
+               struct file *file2;
+               struct lustre_swap_layouts lsl;
+
+               if (cfs_copy_from_user(&lsl, (char *)arg,
+                                      sizeof(struct lustre_swap_layouts)))
+                       RETURN(-EFAULT);
+
+               if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
+                       RETURN(-EPERM);
+
+               file2 = fget(lsl.sl_fd);
+               if (file2 == NULL)
+                       RETURN(-EBADF);
+
+               rc = -EPERM;
+               if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
+                       rc = ll_swap_layouts(file, file2, &lsl);
+               fput(file2);
+               RETURN(rc);
+       }
         case LL_IOC_LOV_GETSTRIPE:
                 RETURN(ll_lov_getstripe(inode, arg));
         case LL_IOC_RECREATE_OBJ:
@@ -2009,12 +2202,37 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                OBD_FREE_PTR(hss);
                RETURN(rc);
        }
+       case LL_IOC_HSM_ACTION: {
+               struct md_op_data               *op_data;
+               struct hsm_current_action       *hca;
+               int                              rc;
+
+               OBD_ALLOC_PTR(hca);
+               if (hca == NULL)
+                       RETURN(-ENOMEM);
+
+               op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+                                            LUSTRE_OPC_ANY, hca);
+               if (op_data == NULL) {
+                       OBD_FREE_PTR(hca);
+                       RETURN(-ENOMEM);
+               }
 
+               rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
+                                  op_data, NULL);
+
+               if (cfs_copy_to_user((char *)arg, hca, sizeof(*hca)))
+                       rc = -EFAULT;
+
+               ll_finish_md_op_data(op_data);
+               OBD_FREE_PTR(hca);
+               RETURN(rc);
+       }
        default: {
                int err;
 
                if (LLIOC_STOP ==
-                       ll_iocontrol_call(inode, file, cmd, arg, &err))
+                    ll_iocontrol_call(inode, file, cmd, arg, &err))
                        RETURN(err);
 
                RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
@@ -2110,7 +2328,7 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
                eof = i_size_read(inode);
        }
 
-       retval = generic_file_llseek_size(file, offset, origin,
+       retval = ll_generic_file_llseek_size(file, offset, origin,
                                          ll_file_maxbytes(inode), eof);
        RETURN(retval);
 }
@@ -2193,15 +2411,25 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
        RETURN(result);
 }
 
+/*
+ * When dentry is provided (the 'else' case), *file->f_dentry may be
+ * null and dentry must be used directly rather than pulled from
+ * *file->f_dentry as is done otherwise.
+ */
+
 #ifdef HAVE_FILE_FSYNC_4ARGS
 int ll_fsync(struct file *file, loff_t start, loff_t end, int data)
+{
+       struct dentry *dentry = file->f_dentry;
 #elif defined(HAVE_FILE_FSYNC_2ARGS)
 int ll_fsync(struct file *file, int data)
+{
+       struct dentry *dentry = file->f_dentry;
 #else
 int ll_fsync(struct file *file, struct dentry *dentry, int data)
-#endif
 {
-        struct inode *inode = file->f_dentry->d_inode;
+#endif
+        struct inode *inode = dentry->d_inode;
         struct ll_inode_info *lli = ll_i2info(inode);
         struct ptlrpc_request *req;
         struct obd_capa *oc;
@@ -2502,7 +2730,7 @@ int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
         /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
          *      But under CMD case, it caused some lock issues, should be fixed
          *      with new CMD ibits lock. See bug 12718 */
-        if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
+       if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
                 struct lookup_intent oit = { .it_op = IT_GETATTR };
                 struct md_op_data *op_data;
 
@@ -3050,6 +3278,7 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
                rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
                                  lock->l_lvb_data, lock->l_lvb_len);
                if (rc >= 0) {
+                       *gen = LL_LAYOUT_GEN_EMPTY;
                        if (md.lsm != NULL)
                                *gen = md.lsm->lsm_layout_gen;
                        rc = 0;
@@ -3125,11 +3354,11 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen)
                                           .ei_mode = LCK_CR,
                                           .ei_cb_bl = ll_md_blocking_ast,
                                           .ei_cb_cp = ldlm_completion_ast,
-                                          .ei_cbdata = inode };
+                                          .ei_cbdata = NULL };
        int rc;
        ENTRY;
 
-       *gen = LL_LAYOUT_GEN_ZERO;
+       *gen = LL_LAYOUT_GEN_NONE;
        if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
                RETURN(0);
 
@@ -3188,6 +3417,8 @@ again:
 
        ll_finish_md_op_data(op_data);
 
+       md_set_lock_data(sbi->ll_md_exp, &it.d.lustre.it_lock_handle, inode, NULL);
+
        mode = it.d.lustre.it_lock_mode;
        it.d.lustre.it_lock_mode = 0;
        ll_intent_drop_lock(&it);