Whamcloud - gitweb
LU-2900 llite: Null deref in ll_fsync:mkdir on NFSmounted Lus
[fs/lustre-release.git] / lustre / llite / file.c
index 39a9edd..14e38cf 100644 (file)
@@ -705,8 +705,7 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
 
         oinfo.oi_md = lsm;
         oinfo.oi_oa = obdo;
-        oinfo.oi_oa->o_id = lsm->lsm_object_id;
-        oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
+       oinfo.oi_oa->o_oi = lsm->lsm_oi;
         oinfo.oi_oa->o_mode = S_IFREG;
         oinfo.oi_oa->o_ioepoch = ioepoch;
         oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
@@ -756,10 +755,11 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
                            capa, obdo, ioepoch, sync);
        capa_put(capa);
        if (rc == 0) {
+               struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
+
                obdo_refresh_inode(inode, obdo, obdo->o_valid);
-               CDEBUG(D_INODE,
-                      "objid "LPX64" size %llu, blocks %llu, blksize %lu\n",
-                      lsm ? lsm->lsm_object_id : 0, i_size_read(inode),
+               CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
+                      " blksize %lu\n", POSTID(oi), i_size_read(inode),
                       (unsigned long long)inode->i_blocks,
                       (unsigned long)ll_inode_blksize(inode));
        }
@@ -767,38 +767,47 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
        RETURN(rc);
 }
 
-int ll_merge_lvb(struct inode *inode)
+int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
 {
        struct ll_inode_info *lli = ll_i2info(inode);
-       struct ll_sb_info *sbi = ll_i2sbi(inode);
-       struct lov_stripe_md *lsm;
+       struct cl_object *obj = lli->lli_clob;
+       struct cl_attr *attr = ccc_env_thread_attr(env);
        struct ost_lvb lvb;
        int rc = 0;
 
        ENTRY;
 
-       lsm = ccc_inode_lsm_get(inode);
        ll_inode_size_lock(inode);
+       /* merge timestamps the most recently obtained from mds with
+          timestamps obtained from osts */
+       LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
+       LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
+       LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
        inode_init_lvb(inode, &lvb);
 
-       /* merge timestamps the most resently obtained from mds with
-          timestamps obtained from osts */
-       lvb.lvb_atime = lli->lli_lvb.lvb_atime;
-       lvb.lvb_mtime = lli->lli_lvb.lvb_mtime;
-       lvb.lvb_ctime = lli->lli_lvb.lvb_ctime;
-       if (lsm != NULL) {
-               rc = obd_merge_lvb(sbi->ll_dt_exp, lsm, &lvb, 0);
-               cl_isize_write_nolock(inode, lvb.lvb_size);
+       cl_object_attr_lock(obj);
+       rc = cl_object_attr_get(env, obj, attr);
+       cl_object_attr_unlock(obj);
+
+       if (rc == 0) {
+               if (lvb.lvb_atime < attr->cat_atime)
+                       lvb.lvb_atime = attr->cat_atime;
+               if (lvb.lvb_ctime < attr->cat_ctime)
+                       lvb.lvb_ctime = attr->cat_ctime;
+               if (lvb.lvb_mtime < attr->cat_mtime)
+                       lvb.lvb_mtime = attr->cat_mtime;
 
                CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
-                               PFID(&lli->lli_fid), lvb.lvb_size);
-               inode->i_blocks = lvb.lvb_blocks;
+                               PFID(&lli->lli_fid), attr->cat_size);
+               cl_isize_write_nolock(inode, attr->cat_size);
+
+               inode->i_blocks = attr->cat_blocks;
+
+               LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
+               LTIME_S(inode->i_atime) = lvb.lvb_atime;
+               LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
        }
-       LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
-       LTIME_S(inode->i_atime) = lvb.lvb_atime;
-       LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
        ll_inode_size_unlock(inode);
-       ccc_inode_lsm_put(inode, lsm);
 
        RETURN(rc);
 }
@@ -827,7 +836,9 @@ void ll_io_init(struct cl_io *io, const struct file *file, int write)
         io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
        if (write) {
                io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
-               io->u.ci_wr.wr_sync = file->f_flags & O_SYNC || IS_SYNC(inode);
+               io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
+                                     file->f_flags & O_DIRECT ||
+                                     IS_SYNC(inode);
        }
         io->ci_obj     = ll_i2info(inode)->lli_clob;
         io->ci_lockreq = CILR_MAYBE;
@@ -850,6 +861,7 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
         ssize_t               result;
         ENTRY;
 
+restart:
         io = ccc_env_thread_io(env);
         ll_io_init(io, file, iot == CIT_WRITE);
 
@@ -908,6 +920,16 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
         GOTO(out, result);
 out:
         cl_io_fini(env, io);
+       /* If any bit been read/written (result != 0), we just return
+        * short read/write instead of restart io. */
+       if (result == 0 && io->ci_need_restart) {
+               CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
+                      iot == CIT_READ ? "read" : "write",
+                      file->f_dentry->d_name.name, *ppos, count);
+               LASSERTF(io->u.ci_rw.crw_count == count, "%zd != %zd\n",
+                        io->u.ci_rw.crw_count, count);
+               goto restart;
+       }
 
         if (iot == CIT_READ) {
                 if (result >= 0)
@@ -918,7 +940,7 @@ out:
                         ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
                                            LPROC_LL_WRITE_BYTES, result);
                        fd->fd_write_failed = false;
-               } else {
+               } else if (result != -ERESTARTSYS) {
                        fd->fd_write_failed = true;
                }
        }
@@ -1285,37 +1307,36 @@ out:
 
 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
 {
-        struct ll_recreate_obj ucreat;
-        ENTRY;
+       struct ll_recreate_obj ucreat;
+       ENTRY;
 
-        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
-                RETURN(-EPERM);
+       if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+               RETURN(-EPERM);
 
-        if (cfs_copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
-                               sizeof(struct ll_recreate_obj)))
-                RETURN(-EFAULT);
+       if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
+                          sizeof(ucreat)))
+               RETURN(-EFAULT);
 
-        RETURN(ll_lov_recreate(inode, ucreat.lrc_id, 0,
-                               ucreat.lrc_ost_idx));
+       RETURN(ll_lov_recreate(inode, ucreat.lrc_id, 0,
+                              ucreat.lrc_ost_idx));
 }
 
 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
 {
-        struct lu_fid fid;
-        obd_id id;
-        obd_count ost_idx;
+       struct lu_fid   fid;
+       obd_id          id;
+       obd_count       ost_idx;
         ENTRY;
 
-        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
-                RETURN(-EPERM);
+       if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+               RETURN(-EPERM);
 
-        if (cfs_copy_from_user(&fid, (struct lu_fid *)arg,
-                               sizeof(struct lu_fid)))
-                RETURN(-EFAULT);
+       if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
+               RETURN(-EFAULT);
 
-        id = fid_oid(&fid) | ((fid_seq(&fid) & 0xffff) << 32);
-        ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
-        RETURN(ll_lov_recreate(inode, id, 0, ost_idx));
+       id = fid_oid(&fid) | ((fid_seq(&fid) & 0xffff) << 32);
+       ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
+       RETURN(ll_lov_recreate(inode, id, 0, ost_idx));
 }
 
 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
@@ -1435,56 +1456,55 @@ out:
 static int ll_lov_setea(struct inode *inode, struct file *file,
                             unsigned long arg)
 {
-        int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
-        struct lov_user_md  *lump;
-        int lum_size = sizeof(struct lov_user_md) +
-                       sizeof(struct lov_user_ost_data);
-        int rc;
-        ENTRY;
+       int                      flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
+       struct lov_user_md      *lump;
+       int                      lum_size = sizeof(struct lov_user_md) +
+                                           sizeof(struct lov_user_ost_data);
+       int                      rc;
+       ENTRY;
 
-        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
-                RETURN(-EPERM);
+       if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+               RETURN(-EPERM);
 
-        OBD_ALLOC_LARGE(lump, lum_size);
-        if (lump == NULL) {
+       OBD_ALLOC_LARGE(lump, lum_size);
+       if (lump == NULL)
                 RETURN(-ENOMEM);
-        }
-        if (cfs_copy_from_user(lump, (struct lov_user_md  *)arg, lum_size)) {
-                OBD_FREE_LARGE(lump, lum_size);
-                RETURN(-EFAULT);
-        }
 
-        rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
+       if (copy_from_user(lump, (struct lov_user_md  *)arg, lum_size)) {
+               OBD_FREE_LARGE(lump, lum_size);
+               RETURN(-EFAULT);
+       }
 
-        OBD_FREE_LARGE(lump, lum_size);
-        RETURN(rc);
+       rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
+
+       OBD_FREE_LARGE(lump, lum_size);
+       RETURN(rc);
 }
 
 static int ll_lov_setstripe(struct inode *inode, struct file *file,
-                            unsigned long arg)
-{
-        struct lov_user_md_v3 lumv3;
-        struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
-        struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
-        struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
-        int lum_size;
-        int rc;
-        int flags = FMODE_WRITE;
-        ENTRY;
+                           unsigned long arg)
+{
+       struct lov_user_md_v3    lumv3;
+       struct lov_user_md_v1   *lumv1 = (struct lov_user_md_v1 *)&lumv3;
+       struct lov_user_md_v1   *lumv1p = (struct lov_user_md_v1 *)arg;
+       struct lov_user_md_v3   *lumv3p = (struct lov_user_md_v3 *)arg;
+       int                      lum_size, rc;
+       int                      flags = FMODE_WRITE;
+       ENTRY;
 
-        /* first try with v1 which is smaller than v3 */
-        lum_size = sizeof(struct lov_user_md_v1);
-        if (cfs_copy_from_user(lumv1, lumv1p, lum_size))
-                RETURN(-EFAULT);
+       /* first try with v1 which is smaller than v3 */
+       lum_size = sizeof(struct lov_user_md_v1);
+       if (copy_from_user(lumv1, lumv1p, lum_size))
+               RETURN(-EFAULT);
 
-        if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
-                lum_size = sizeof(struct lov_user_md_v3);
-                if (cfs_copy_from_user(&lumv3, lumv3p, lum_size))
-                        RETURN(-EFAULT);
-        }
+       if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
+               lum_size = sizeof(struct lov_user_md_v3);
+               if (copy_from_user(&lumv3, lumv3p, lum_size))
+                       RETURN(-EFAULT);
+       }
 
-        rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
-        if (rc == 0) {
+       rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
+       if (rc == 0) {
                struct lov_stripe_md *lsm;
                __u32 gen;
 
@@ -1671,8 +1691,7 @@ int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
            !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
                GOTO(out, rc = -EOPNOTSUPP);
 
-        fm_key.oa.o_id = lsm->lsm_object_id;
-        fm_key.oa.o_seq = lsm->lsm_object_seq;
+       fm_key.oa.o_oi = lsm->lsm_oi;
         fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
 
         obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
@@ -1697,43 +1716,44 @@ out:
 
 int ll_fid2path(struct inode *inode, void *arg)
 {
-       struct obd_export *exp = ll_i2mdexp(inode);
-        struct getinfo_fid2path *gfout, *gfin;
-        int outsize, rc;
-        ENTRY;
+       struct obd_export       *exp = ll_i2mdexp(inode);
+       struct getinfo_fid2path *gfout, *gfin;
+       int                      outsize, rc;
+       ENTRY;
 
        if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
            !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
                RETURN(-EPERM);
 
-        /* Need to get the buflen */
-        OBD_ALLOC_PTR(gfin);
-        if (gfin == NULL)
-                RETURN(-ENOMEM);
-        if (cfs_copy_from_user(gfin, arg, sizeof(*gfin))) {
-                OBD_FREE_PTR(gfin);
-                RETURN(-EFAULT);
-        }
+       /* Need to get the buflen */
+       OBD_ALLOC_PTR(gfin);
+       if (gfin == NULL)
+               RETURN(-ENOMEM);
+       if (copy_from_user(gfin, arg, sizeof(*gfin))) {
+               OBD_FREE_PTR(gfin);
+               RETURN(-EFAULT);
+       }
 
-        outsize = sizeof(*gfout) + gfin->gf_pathlen;
-        OBD_ALLOC(gfout, outsize);
-        if (gfout == NULL) {
-                OBD_FREE_PTR(gfin);
-                RETURN(-ENOMEM);
-        }
-        memcpy(gfout, gfin, sizeof(*gfout));
-        OBD_FREE_PTR(gfin);
+       outsize = sizeof(*gfout) + gfin->gf_pathlen;
+       OBD_ALLOC(gfout, outsize);
+       if (gfout == NULL) {
+               OBD_FREE_PTR(gfin);
+               RETURN(-ENOMEM);
+       }
+       memcpy(gfout, gfin, sizeof(*gfout));
+       OBD_FREE_PTR(gfin);
 
-        /* Call mdc_iocontrol */
-        rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
-        if (rc)
-                GOTO(gf_free, rc);
-        if (cfs_copy_to_user(arg, gfout, outsize))
-                rc = -EFAULT;
+       /* Call mdc_iocontrol */
+       rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
+       if (rc)
+               GOTO(gf_free, rc);
+
+       if (copy_to_user(arg, gfout, outsize))
+               rc = -EFAULT;
 
 gf_free:
-        OBD_FREE(gfout, outsize);
-        RETURN(rc);
+       OBD_FREE(gfout, outsize);
+       RETURN(rc);
 }
 
 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
@@ -1755,10 +1775,10 @@ static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
         if (fiemap_s == NULL)
                 RETURN(-ENOMEM);
 
-        /* get the fiemap value */
-        if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
-                           sizeof(*fiemap_s)))
-                GOTO(error, rc = -EFAULT);
+       /* get the fiemap value */
+       if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
+                          sizeof(*fiemap_s)))
+               GOTO(error, rc = -EFAULT);
 
         /* If fm_extent_count is non-zero, read the first extent since
          * it is used to calculate end_offset and device from previous
@@ -1780,8 +1800,8 @@ static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
                 ret_bytes += (fiemap_s->fm_mapped_extents *
                                  sizeof(struct ll_fiemap_extent));
 
-        if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
-                rc = -EFAULT;
+       if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
+               rc = -EFAULT;
 
 error:
         OBD_FREE_LARGE(fiemap_s, num_bytes);
@@ -1834,13 +1854,163 @@ int ll_data_version(struct inode *inode, __u64 *data_version,
        RETURN(rc);
 }
 
-long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+struct ll_swap_stack {
+       struct iattr             ia1, ia2;
+       __u64                    dv1, dv2;
+       struct inode            *inode1, *inode2;
+       bool                     check_dv1, check_dv2;
+};
+
+static int ll_swap_layouts(struct file *file1, struct file *file2,
+                          struct lustre_swap_layouts *lsl)
 {
-        struct inode *inode = file->f_dentry->d_inode;
-        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-        int flags;
+       struct mdc_swap_layouts  msl;
+       struct md_op_data       *op_data;
+       __u32                    gid;
+       __u64                    dv;
+       struct ll_swap_stack    *llss = NULL;
+       int                      rc, rc1;
 
-        ENTRY;
+       OBD_ALLOC_PTR(llss);
+       if (llss == NULL)
+               RETURN(-ENOMEM);
+
+       llss->inode1 = file1->f_dentry->d_inode;
+       llss->inode2 = file2->f_dentry->d_inode;
+
+       if (!S_ISREG(llss->inode2->i_mode))
+               GOTO(free, rc = -EINVAL);
+
+       if (ll_permission(llss->inode1, MAY_WRITE, NULL) ||
+           ll_permission(llss->inode2, MAY_WRITE, NULL))
+               GOTO(free, rc = -EPERM);
+
+       if (llss->inode2->i_sb != llss->inode1->i_sb)
+               GOTO(free, rc = -EXDEV);
+
+       /* we use 2 bool because it is easier to swap than 2 bits */
+       if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
+               llss->check_dv1 = true;
+
+       if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
+               llss->check_dv2 = true;
+
+       /* we cannot use lsl->sl_dvX directly because we may swap them */
+       llss->dv1 = lsl->sl_dv1;
+       llss->dv2 = lsl->sl_dv2;
+
+       rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
+       if (rc == 0) /* same file, done! */
+               GOTO(free, rc = 0);
+
+       if (rc < 0) { /* sequentialize it */
+               swap(llss->inode1, llss->inode2);
+               swap(file1, file2);
+               swap(llss->dv1, llss->dv2);
+               swap(llss->check_dv1, llss->check_dv2);
+       }
+
+       gid = lsl->sl_gid;
+       if (gid != 0) { /* application asks to flush dirty cache */
+               rc = ll_get_grouplock(llss->inode1, file1, gid);
+               if (rc < 0)
+                       GOTO(free, rc);
+
+               rc = ll_get_grouplock(llss->inode2, file2, gid);
+               if (rc < 0) {
+                       ll_put_grouplock(llss->inode1, file1, gid);
+                       GOTO(free, rc);
+               }
+       }
+
+       /* to be able to restore mtime and atime after swap
+        * we need to first save them */
+       if (lsl->sl_flags &
+           (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
+               llss->ia1.ia_mtime = llss->inode1->i_mtime;
+               llss->ia1.ia_atime = llss->inode1->i_atime;
+               llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
+               llss->ia2.ia_mtime = llss->inode2->i_mtime;
+               llss->ia2.ia_atime = llss->inode2->i_atime;
+               llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
+       }
+
+       /* ultimate check, before swaping the layouts we check if
+        * dataversion has changed (if requested) */
+       if (llss->check_dv1) {
+               rc = ll_data_version(llss->inode1, &dv, 0);
+               if (rc)
+                       GOTO(putgl, rc);
+               if (dv != llss->dv1)
+                       GOTO(putgl, rc = -EAGAIN);
+       }
+
+       if (llss->check_dv2) {
+               rc = ll_data_version(llss->inode2, &dv, 0);
+               if (rc)
+                       GOTO(putgl, rc);
+               if (dv != llss->dv2)
+                       GOTO(putgl, rc = -EAGAIN);
+       }
+
+       /* struct md_op_data is used to send the swap args to the mdt
+        * only flags is missing, so we use struct mdc_swap_layouts
+        * through the md_op_data->op_data */
+       /* flags from user space have to be converted before they are send to
+        * server, no flag is sent today, they are only used on the client */
+       msl.msl_flags = 0;
+       rc = -ENOMEM;
+       op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
+                                    0, LUSTRE_OPC_ANY, &msl);
+       if (op_data != NULL) {
+               rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS,
+                                  ll_i2mdexp(llss->inode1),
+                                  sizeof(*op_data), op_data, NULL);
+               ll_finish_md_op_data(op_data);
+       }
+
+putgl:
+       if (gid != 0) {
+               ll_put_grouplock(llss->inode2, file2, gid);
+               ll_put_grouplock(llss->inode1, file1, gid);
+       }
+
+       /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
+       if (rc != 0)
+               GOTO(free, rc);
+
+       /* clear useless flags */
+       if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
+               llss->ia1.ia_valid &= ~ATTR_MTIME;
+               llss->ia2.ia_valid &= ~ATTR_MTIME;
+       }
+
+       if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
+               llss->ia1.ia_valid &= ~ATTR_ATIME;
+               llss->ia2.ia_valid &= ~ATTR_ATIME;
+       }
+
+       /* update time if requested */
+       rc = rc1 = 0;
+       if (llss->ia2.ia_valid != 0)
+               rc = ll_setattr(file1->f_dentry, &llss->ia2);
+
+       if (llss->ia1.ia_valid != 0)
+               rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
+
+free:
+       if (llss != NULL)
+               OBD_FREE_PTR(llss);
+
+       RETURN(rc);
+}
+
+long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       struct inode            *inode = file->f_dentry->d_inode;
+       struct ll_file_data     *fd = LUSTRE_FPRIVATE(file);
+       int                      flags, rc;
+       ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
                inode->i_generation, inode, cmd);
@@ -1880,6 +2050,27 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                 RETURN(ll_lov_setstripe(inode, file, arg));
         case LL_IOC_LOV_SETEA:
                 RETURN(ll_lov_setea(inode, file, arg));
+       case LL_IOC_LOV_SWAP_LAYOUTS: {
+               struct file *file2;
+               struct lustre_swap_layouts lsl;
+
+               if (cfs_copy_from_user(&lsl, (char *)arg,
+                                      sizeof(struct lustre_swap_layouts)))
+                       RETURN(-EFAULT);
+
+               if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
+                       RETURN(-EPERM);
+
+               file2 = fget(lsl.sl_fd);
+               if (file2 == NULL)
+                       RETURN(-EBADF);
+
+               rc = -EPERM;
+               if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
+                       rc = ll_swap_layouts(file, file2, &lsl);
+               fput(file2);
+               RETURN(rc);
+       }
         case LL_IOC_LOV_GETSTRIPE:
                 RETURN(ll_lov_getstripe(inode, arg));
         case LL_IOC_RECREATE_OBJ:
@@ -1907,33 +2098,32 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
         case FSFILT_IOC_SETVERSION_OLD:
         case FSFILT_IOC_SETVERSION:
         */
-        case LL_IOC_FLUSHCTX:
-                RETURN(ll_flush_ctx(inode));
-        case LL_IOC_PATH2FID: {
-                if (cfs_copy_to_user((void *)arg, ll_inode2fid(inode),
-                                     sizeof(struct lu_fid)))
-                        RETURN(-EFAULT);
+       case LL_IOC_FLUSHCTX:
+               RETURN(ll_flush_ctx(inode));
+       case LL_IOC_PATH2FID: {
+               if (copy_to_user((void *)arg, ll_inode2fid(inode),
+                                sizeof(struct lu_fid)))
+                       RETURN(-EFAULT);
 
-                RETURN(0);
-        }
-        case OBD_IOC_FID2PATH:
+               RETURN(0);
+       }
+       case OBD_IOC_FID2PATH:
                RETURN(ll_fid2path(inode, (void *)arg));
-        case LL_IOC_DATA_VERSION: {
-                struct ioc_data_version idv;
-                int rc;
+       case LL_IOC_DATA_VERSION: {
+               struct ioc_data_version idv;
+               int                     rc;
 
-                if (cfs_copy_from_user(&idv, (char *)arg, sizeof(idv)))
-                        RETURN(-EFAULT);
+               if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
+                       RETURN(-EFAULT);
 
-                rc = ll_data_version(inode, &idv.idv_version,
-                                     !(idv.idv_flags & LL_DV_NOFLUSH));
+               rc = ll_data_version(inode, &idv.idv_version,
+                               !(idv.idv_flags & LL_DV_NOFLUSH));
 
-                if (rc == 0 &&
-                    cfs_copy_to_user((char *) arg, &idv, sizeof(idv)))
-                        RETURN(-EFAULT);
+               if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
+                       RETURN(-EFAULT);
 
-                RETURN(rc);
-        }
+               RETURN(rc);
+       }
 
         case LL_IOC_GET_MDTIDX: {
                 int mdtidx;
@@ -1947,20 +2137,108 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
                 RETURN(0);
         }
-        case OBD_IOC_GETDTNAME:
-        case OBD_IOC_GETMDNAME:
-                RETURN(ll_get_obd_name(inode, cmd, arg));
-        default: {
-                int err;
-
-                if (LLIOC_STOP ==
-                    ll_iocontrol_call(inode, file, cmd, arg, &err))
-                        RETURN(err);
-
-                RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
-                                     (void *)arg));
-        }
-        }
+       case OBD_IOC_GETDTNAME:
+       case OBD_IOC_GETMDNAME:
+               RETURN(ll_get_obd_name(inode, cmd, arg));
+       case LL_IOC_HSM_STATE_GET: {
+               struct md_op_data       *op_data;
+               struct hsm_user_state   *hus;
+               int                      rc;
+
+               OBD_ALLOC_PTR(hus);
+               if (hus == NULL)
+                       RETURN(-ENOMEM);
+
+               op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+                                            LUSTRE_OPC_ANY, hus);
+               if (op_data == NULL) {
+                       OBD_FREE_PTR(hus);
+                       RETURN(-ENOMEM);
+               }
+
+               rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
+                                  op_data, NULL);
+
+               if (copy_to_user((void *)arg, hus, sizeof(*hus)))
+                       rc = -EFAULT;
+
+               ll_finish_md_op_data(op_data);
+               OBD_FREE_PTR(hus);
+               RETURN(rc);
+       }
+       case LL_IOC_HSM_STATE_SET: {
+               struct md_op_data       *op_data;
+               struct hsm_state_set    *hss;
+               int                      rc;
+
+               OBD_ALLOC_PTR(hss);
+               if (hss == NULL)
+                       RETURN(-ENOMEM);
+               if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
+                       OBD_FREE_PTR(hss);
+                       RETURN(-EFAULT);
+               }
+
+               /* Non-root users are forbidden to set or clear flags which are
+                * NOT defined in HSM_USER_MASK. */
+               if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK)
+                   && !cfs_capable(CFS_CAP_SYS_ADMIN)) {
+                       OBD_FREE_PTR(hss);
+                       RETURN(-EPERM);
+               }
+
+               op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+                                            LUSTRE_OPC_ANY, hss);
+               if (op_data == NULL) {
+                       OBD_FREE_PTR(hss);
+                       RETURN(-ENOMEM);
+               }
+
+               rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
+                                  op_data, NULL);
+
+               ll_finish_md_op_data(op_data);
+
+               OBD_FREE_PTR(hss);
+               RETURN(rc);
+       }
+       case LL_IOC_HSM_ACTION: {
+               struct md_op_data               *op_data;
+               struct hsm_current_action       *hca;
+               int                              rc;
+
+               OBD_ALLOC_PTR(hca);
+               if (hca == NULL)
+                       RETURN(-ENOMEM);
+
+               op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+                                            LUSTRE_OPC_ANY, hca);
+               if (op_data == NULL) {
+                       OBD_FREE_PTR(hca);
+                       RETURN(-ENOMEM);
+               }
+
+               rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
+                                  op_data, NULL);
+
+               if (cfs_copy_to_user((char *)arg, hca, sizeof(*hca)))
+                       rc = -EFAULT;
+
+               ll_finish_md_op_data(op_data);
+               OBD_FREE_PTR(hca);
+               RETURN(rc);
+       }
+       default: {
+               int err;
+
+               if (LLIOC_STOP ==
+                    ll_iocontrol_call(inode, file, cmd, arg, &err))
+                       RETURN(err);
+
+               RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
+                                    (void *)arg));
+       }
+       }
 }
 
 #ifndef HAVE_FILE_LLSEEK_SIZE
@@ -2050,7 +2328,7 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
                eof = i_size_read(inode);
        }
 
-       retval = generic_file_llseek_size(file, offset, origin,
+       retval = ll_generic_file_llseek_size(file, offset, origin,
                                          ll_file_maxbytes(inode), eof);
        RETURN(retval);
 }
@@ -2133,15 +2411,25 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
        RETURN(result);
 }
 
+/*
+ * When dentry is provided (the 'else' case), *file->f_dentry may be
+ * null and dentry must be used directly rather than pulled from
+ * *file->f_dentry as is done otherwise.
+ */
+
 #ifdef HAVE_FILE_FSYNC_4ARGS
 int ll_fsync(struct file *file, loff_t start, loff_t end, int data)
+{
+       struct dentry *dentry = file->f_dentry;
 #elif defined(HAVE_FILE_FSYNC_2ARGS)
 int ll_fsync(struct file *file, int data)
+{
+       struct dentry *dentry = file->f_dentry;
 #else
 int ll_fsync(struct file *file, struct dentry *dentry, int data)
-#endif
 {
-        struct inode *inode = file->f_dentry->d_inode;
+#endif
+        struct inode *inode = dentry->d_inode;
         struct ll_inode_info *lli = ll_i2info(inode);
         struct ptlrpc_request *req;
         struct obd_capa *oc;
@@ -2442,7 +2730,7 @@ int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
         /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
          *      But under CMD case, it caused some lock issues, should be fixed
          *      with new CMD ibits lock. See bug 12718 */
-        if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
+       if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
                 struct lookup_intent oit = { .it_op = IT_GETATTR };
                 struct md_op_data *op_data;
 
@@ -2990,6 +3278,7 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
                rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
                                  lock->l_lvb_data, lock->l_lvb_len);
                if (rc >= 0) {
+                       *gen = LL_LAYOUT_GEN_EMPTY;
                        if (md.lsm != NULL)
                                *gen = md.lsm->lsm_layout_gen;
                        rc = 0;
@@ -3065,11 +3354,11 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen)
                                           .ei_mode = LCK_CR,
                                           .ei_cb_bl = ll_md_blocking_ast,
                                           .ei_cb_cp = ldlm_completion_ast,
-                                          .ei_cbdata = inode };
+                                          .ei_cbdata = NULL };
        int rc;
        ENTRY;
 
-       *gen = LL_LAYOUT_GEN_ZERO;
+       *gen = LL_LAYOUT_GEN_NONE;
        if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
                RETURN(0);
 
@@ -3128,6 +3417,8 @@ again:
 
        ll_finish_md_op_data(op_data);
 
+       md_set_lock_data(sbi->ll_md_exp, &it.d.lustre.it_lock_handle, inode, NULL);
+
        mode = it.d.lustre.it_lock_mode;
        it.d.lustre.it_lock_mode = 0;
        ll_intent_drop_lock(&it);