Whamcloud - gitweb
LU-3205 llite: Set layout_gen before compatibility check
[fs/lustre-release.git] / lustre / llite / file.c
index 45f2131..e61b696 100644 (file)
@@ -705,8 +705,7 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
 
         oinfo.oi_md = lsm;
         oinfo.oi_oa = obdo;
-        oinfo.oi_oa->o_id = lsm->lsm_object_id;
-        oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
+       oinfo.oi_oa->o_oi = lsm->lsm_oi;
         oinfo.oi_oa->o_mode = S_IFREG;
         oinfo.oi_oa->o_ioepoch = ioepoch;
         oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
@@ -756,10 +755,11 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
                            capa, obdo, ioepoch, sync);
        capa_put(capa);
        if (rc == 0) {
+               struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
+
                obdo_refresh_inode(inode, obdo, obdo->o_valid);
-               CDEBUG(D_INODE,
-                      "objid "LPX64" size %llu, blocks %llu, blksize %lu\n",
-                      lsm ? lsm->lsm_object_id : 0, i_size_read(inode),
+               CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
+                      " blksize %lu\n", POSTID(oi), i_size_read(inode),
                       (unsigned long long)inode->i_blocks,
                       (unsigned long)ll_inode_blksize(inode));
        }
@@ -836,7 +836,9 @@ void ll_io_init(struct cl_io *io, const struct file *file, int write)
         io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
        if (write) {
                io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
-               io->u.ci_wr.wr_sync = file->f_flags & O_SYNC || IS_SYNC(inode);
+               io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
+                                     file->f_flags & O_DIRECT ||
+                                     IS_SYNC(inode);
        }
         io->ci_obj     = ll_i2info(inode)->lli_clob;
         io->ci_lockreq = CILR_MAYBE;
@@ -859,6 +861,7 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
         ssize_t               result;
         ENTRY;
 
+restart:
         io = ccc_env_thread_io(env);
         ll_io_init(io, file, iot == CIT_WRITE);
 
@@ -917,6 +920,15 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
         GOTO(out, result);
 out:
         cl_io_fini(env, io);
+       /* If any bit been read/written (result != 0), we just return
+        * short read/write instead of restart io. */
+       if (result == 0 && io->ci_need_restart) {
+               CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
+                      iot == CIT_READ ? "read" : "write",
+                      file->f_dentry->d_name.name, *ppos, count);
+               LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
+               goto restart;
+       }
 
         if (iot == CIT_READ) {
                 if (result >= 0)
@@ -927,7 +939,7 @@ out:
                         ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
                                            LPROC_LL_WRITE_BYTES, result);
                        fd->fd_write_failed = false;
-               } else {
+               } else if (result != -ERESTARTSYS) {
                        fd->fd_write_failed = true;
                }
        }
@@ -1245,7 +1257,7 @@ static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
 }
 #endif
 
-static int ll_lov_recreate(struct inode *inode, obd_id id, obd_seq seq,
+static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
                            obd_count ost_idx)
 {
        struct obd_export *exp = ll_i2dtexp(inode);
@@ -1271,8 +1283,7 @@ static int ll_lov_recreate(struct inode *inode, obd_id id, obd_seq seq,
         if (lsm2 == NULL)
                 GOTO(out, rc = -ENOMEM);
 
-        oa->o_id = id;
-        oa->o_seq = seq;
+       oa->o_oi = *oi;
         oa->o_nlink = ost_idx;
         oa->o_flags |= OBD_FL_RECREATE_OBJS;
         oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
@@ -1295,6 +1306,7 @@ out:
 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
 {
        struct ll_recreate_obj ucreat;
+       struct ost_id           oi;
        ENTRY;
 
        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
@@ -1304,14 +1316,15 @@ static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
                           sizeof(ucreat)))
                RETURN(-EFAULT);
 
-       RETURN(ll_lov_recreate(inode, ucreat.lrc_id, 0,
-                              ucreat.lrc_ost_idx));
+       ostid_set_seq_mdt0(&oi);
+       ostid_set_id(&oi, ucreat.lrc_id);
+       RETURN(ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx));
 }
 
 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
 {
        struct lu_fid   fid;
-       obd_id          id;
+       struct ost_id   oi;
        obd_count       ost_idx;
         ENTRY;
 
@@ -1321,9 +1334,9 @@ static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
        if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
                RETURN(-EFAULT);
 
-       id = fid_oid(&fid) | ((fid_seq(&fid) & 0xffff) << 32);
+       fid_to_ostid(&fid, &oi);
        ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
-       RETURN(ll_lov_recreate(inode, id, 0, ost_idx));
+       RETURN(ll_lov_recreate(inode, &oi, ost_idx));
 }
 
 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
@@ -1678,8 +1691,7 @@ int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
            !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
                GOTO(out, rc = -EOPNOTSUPP);
 
-        fm_key.oa.o_id = lsm->lsm_object_id;
-        fm_key.oa.o_seq = lsm->lsm_object_seq;
+       fm_key.oa.o_oi = lsm->lsm_oi;
         fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
 
         obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
@@ -1735,6 +1747,7 @@ int ll_fid2path(struct inode *inode, void *arg)
        rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
        if (rc)
                GOTO(gf_free, rc);
+
        if (copy_to_user(arg, gfout, outsize))
                rc = -EFAULT;
 
@@ -1841,13 +1854,173 @@ int ll_data_version(struct inode *inode, __u64 *data_version,
        RETURN(rc);
 }
 
-long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+struct ll_swap_stack {
+       struct iattr             ia1, ia2;
+       __u64                    dv1, dv2;
+       struct inode            *inode1, *inode2;
+       bool                     check_dv1, check_dv2;
+};
+
+static int ll_swap_layouts(struct file *file1, struct file *file2,
+                          struct lustre_swap_layouts *lsl)
 {
-        struct inode *inode = file->f_dentry->d_inode;
-        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-        int flags;
+       struct mdc_swap_layouts  msl;
+       struct md_op_data       *op_data;
+       __u32                    gid;
+       __u64                    dv;
+       struct ll_swap_stack    *llss = NULL;
+       int                      rc;
 
-        ENTRY;
+       OBD_ALLOC_PTR(llss);
+       if (llss == NULL)
+               RETURN(-ENOMEM);
+
+       llss->inode1 = file1->f_dentry->d_inode;
+       llss->inode2 = file2->f_dentry->d_inode;
+
+       if (!S_ISREG(llss->inode2->i_mode))
+               GOTO(free, rc = -EINVAL);
+
+       if (ll_permission(llss->inode1, MAY_WRITE, NULL) ||
+           ll_permission(llss->inode2, MAY_WRITE, NULL))
+               GOTO(free, rc = -EPERM);
+
+       if (llss->inode2->i_sb != llss->inode1->i_sb)
+               GOTO(free, rc = -EXDEV);
+
+       /* we use 2 bool because it is easier to swap than 2 bits */
+       if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
+               llss->check_dv1 = true;
+
+       if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
+               llss->check_dv2 = true;
+
+       /* we cannot use lsl->sl_dvX directly because we may swap them */
+       llss->dv1 = lsl->sl_dv1;
+       llss->dv2 = lsl->sl_dv2;
+
+       rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
+       if (rc == 0) /* same file, done! */
+               GOTO(free, rc = 0);
+
+       if (rc < 0) { /* sequentialize it */
+               swap(llss->inode1, llss->inode2);
+               swap(file1, file2);
+               swap(llss->dv1, llss->dv2);
+               swap(llss->check_dv1, llss->check_dv2);
+       }
+
+       gid = lsl->sl_gid;
+       if (gid != 0) { /* application asks to flush dirty cache */
+               rc = ll_get_grouplock(llss->inode1, file1, gid);
+               if (rc < 0)
+                       GOTO(free, rc);
+
+               rc = ll_get_grouplock(llss->inode2, file2, gid);
+               if (rc < 0) {
+                       ll_put_grouplock(llss->inode1, file1, gid);
+                       GOTO(free, rc);
+               }
+       }
+
+       /* to be able to restore mtime and atime after swap
+        * we need to first save them */
+       if (lsl->sl_flags &
+           (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
+               llss->ia1.ia_mtime = llss->inode1->i_mtime;
+               llss->ia1.ia_atime = llss->inode1->i_atime;
+               llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
+               llss->ia2.ia_mtime = llss->inode2->i_mtime;
+               llss->ia2.ia_atime = llss->inode2->i_atime;
+               llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
+       }
+
+       /* ultimate check, before swaping the layouts we check if
+        * dataversion has changed (if requested) */
+       if (llss->check_dv1) {
+               rc = ll_data_version(llss->inode1, &dv, 0);
+               if (rc)
+                       GOTO(putgl, rc);
+               if (dv != llss->dv1)
+                       GOTO(putgl, rc = -EAGAIN);
+       }
+
+       if (llss->check_dv2) {
+               rc = ll_data_version(llss->inode2, &dv, 0);
+               if (rc)
+                       GOTO(putgl, rc);
+               if (dv != llss->dv2)
+                       GOTO(putgl, rc = -EAGAIN);
+       }
+
+       /* struct md_op_data is used to send the swap args to the mdt
+        * only flags is missing, so we use struct mdc_swap_layouts
+        * through the md_op_data->op_data */
+       /* flags from user space have to be converted before they are send to
+        * server, no flag is sent today, they are only used on the client */
+       msl.msl_flags = 0;
+       rc = -ENOMEM;
+       op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
+                                    0, LUSTRE_OPC_ANY, &msl);
+       if (op_data != NULL) {
+               rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS,
+                                  ll_i2mdexp(llss->inode1),
+                                  sizeof(*op_data), op_data, NULL);
+               ll_finish_md_op_data(op_data);
+       }
+
+putgl:
+       if (gid != 0) {
+               ll_put_grouplock(llss->inode2, file2, gid);
+               ll_put_grouplock(llss->inode1, file1, gid);
+       }
+
+       /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
+       if (rc != 0)
+               GOTO(free, rc);
+
+       /* clear useless flags */
+       if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
+               llss->ia1.ia_valid &= ~ATTR_MTIME;
+               llss->ia2.ia_valid &= ~ATTR_MTIME;
+       }
+
+       if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
+               llss->ia1.ia_valid &= ~ATTR_ATIME;
+               llss->ia2.ia_valid &= ~ATTR_ATIME;
+       }
+
+       /* update time if requested */
+       rc = 0;
+       if (llss->ia2.ia_valid != 0) {
+               mutex_lock(&llss->inode1->i_mutex);
+               rc = ll_setattr(file1->f_dentry, &llss->ia2);
+               mutex_unlock(&llss->inode1->i_mutex);
+       }
+
+       if (llss->ia1.ia_valid != 0) {
+               int rc1;
+
+               mutex_lock(&llss->inode2->i_mutex);
+               rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
+               mutex_unlock(&llss->inode2->i_mutex);
+               if (rc == 0)
+                       rc = rc1;
+       }
+
+free:
+       if (llss != NULL)
+               OBD_FREE_PTR(llss);
+
+       RETURN(rc);
+}
+
+long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       struct inode            *inode = file->f_dentry->d_inode;
+       struct ll_file_data     *fd = LUSTRE_FPRIVATE(file);
+       int                      flags, rc;
+       ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
                inode->i_generation, inode, cmd);
@@ -1887,6 +2060,27 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                 RETURN(ll_lov_setstripe(inode, file, arg));
         case LL_IOC_LOV_SETEA:
                 RETURN(ll_lov_setea(inode, file, arg));
+       case LL_IOC_LOV_SWAP_LAYOUTS: {
+               struct file *file2;
+               struct lustre_swap_layouts lsl;
+
+               if (cfs_copy_from_user(&lsl, (char *)arg,
+                                      sizeof(struct lustre_swap_layouts)))
+                       RETURN(-EFAULT);
+
+               if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
+                       RETURN(-EPERM);
+
+               file2 = fget(lsl.sl_fd);
+               if (file2 == NULL)
+                       RETURN(-EBADF);
+
+               rc = -EPERM;
+               if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
+                       rc = ll_swap_layouts(file, file2, &lsl);
+               fput(file2);
+               RETURN(rc);
+       }
         case LL_IOC_LOV_GETSTRIPE:
                 RETURN(ll_lov_getstripe(inode, arg));
         case LL_IOC_RECREATE_OBJ:
@@ -2144,7 +2338,7 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
                eof = i_size_read(inode);
        }
 
-       retval = generic_file_llseek_size(file, offset, origin,
+       retval = ll_generic_file_llseek_size(file, offset, origin,
                                          ll_file_maxbytes(inode), eof);
        RETURN(retval);
 }
@@ -2227,15 +2421,25 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
        RETURN(result);
 }
 
+/*
+ * When dentry is provided (the 'else' case), *file->f_dentry may be
+ * null and dentry must be used directly rather than pulled from
+ * *file->f_dentry as is done otherwise.
+ */
+
 #ifdef HAVE_FILE_FSYNC_4ARGS
-int ll_fsync(struct file *file, loff_t start, loff_t end, int data)
+int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+       struct dentry *dentry = file->f_dentry;
 #elif defined(HAVE_FILE_FSYNC_2ARGS)
-int ll_fsync(struct file *file, int data)
+int ll_fsync(struct file *file, int datasync)
+{
+       struct dentry *dentry = file->f_dentry;
 #else
-int ll_fsync(struct file *file, struct dentry *dentry, int data)
-#endif
+int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
-        struct inode *inode = file->f_dentry->d_inode;
+#endif
+        struct inode *inode = dentry->d_inode;
         struct ll_inode_info *lli = ll_i2info(inode);
         struct ptlrpc_request *req;
         struct obd_capa *oc;
@@ -2276,7 +2480,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data)
         if (!err)
                 ptlrpc_req_finished(req);
 
-       if (data) {
+       if (datasync && S_ISREG(inode->i_mode)) {
                struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
 
                err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
@@ -2536,7 +2740,7 @@ int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
         /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
          *      But under CMD case, it caused some lock issues, should be fixed
          *      with new CMD ibits lock. See bug 12718 */
-        if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
+       if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
                 struct lookup_intent oit = { .it_op = IT_GETATTR };
                 struct md_op_data *op_data;
 
@@ -3032,6 +3236,72 @@ int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
        RETURN(result);
 }
 
+/* Fetch layout from MDT with getxattr request, if it's not ready yet */
+static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
+
+{
+       struct ll_sb_info *sbi = ll_i2sbi(inode);
+       struct obd_capa *oc;
+       struct ptlrpc_request *req;
+       struct mdt_body *body;
+       void *lvbdata;
+       void *lmm;
+       int lmmsize;
+       int rc;
+       ENTRY;
+
+       if (lock->l_lvb_data != NULL)
+               RETURN(0);
+
+       /* if layout lock was granted right away, the layout is returned
+        * within DLM_LVB of dlm reply; otherwise if the lock was ever
+        * blocked and then granted via completion ast, we have to fetch
+        * layout here. Please note that we can't use the LVB buffer in
+        * completion AST because it doesn't have a large enough buffer */
+       oc = ll_mdscapa_get(inode);
+       rc = ll_get_max_mdsize(sbi, &lmmsize);
+       if (rc == 0)
+               rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
+                               OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
+                               lmmsize, 0, &req);
+       capa_put(oc);
+       if (rc < 0)
+               RETURN(rc);
+
+       body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+       if (body == NULL || body->eadatasize > lmmsize)
+               GOTO(out, rc = -EPROTO);
+
+       lmmsize = body->eadatasize;
+       if (lmmsize == 0) /* empty layout */
+               GOTO(out, rc = 0);
+
+       lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
+       if (lmm == NULL)
+               GOTO(out, rc = -EFAULT);
+
+       OBD_ALLOC_LARGE(lvbdata, lmmsize);
+       if (lvbdata == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       memcpy(lvbdata, lmm, lmmsize);
+       lock_res_and_lock(lock);
+       if (lock->l_lvb_data == NULL) {
+               lock->l_lvb_data = lvbdata;
+               lock->l_lvb_len = lmmsize;
+               lvbdata = NULL;
+       }
+       unlock_res_and_lock(lock);
+
+       if (lvbdata != NULL)
+               OBD_FREE_LARGE(lvbdata, lmmsize);
+       EXIT;
+
+out:
+       ptlrpc_req_finished(req);
+       return rc;
+}
+
 /**
  * Apply the layout to the inode. Layout lock is held and will be released
  * in this function.
@@ -3046,6 +3316,7 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
        struct cl_object_conf conf;
        int rc = 0;
        bool lvb_ready;
+       bool wait_layout = false;
        ENTRY;
 
        LASSERT(lustre_handle_is_used(lockh));
@@ -3063,8 +3334,6 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
        /* checking lvb_ready is racy but this is okay. The worst case is
         * that multi processes may configure the file on the same time. */
        if (lvb_ready || !reconf) {
-               LDLM_LOCK_PUT(lock);
-
                rc = -ENODATA;
                if (lvb_ready) {
                        /* layout_gen must be valid if layout lock is not
@@ -3072,10 +3341,13 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
                        *gen = lli->lli_layout_gen;
                        rc = 0;
                }
-               ldlm_lock_decref(lockh, mode);
-               RETURN(rc);
+               GOTO(out, rc);
        }
 
+       rc = ll_layout_fetch(inode, lock);
+       if (rc < 0)
+               GOTO(out, rc);
+
        /* for layout lock, lmm is returned in lock's lvb.
         * lvb_data is immutable if the lock is held so it's safe to access it
         * without res lock. See the description in ldlm_lock_decref_internal()
@@ -3084,6 +3356,7 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
                rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
                                  lock->l_lvb_data, lock->l_lvb_len);
                if (rc >= 0) {
+                       *gen = LL_LAYOUT_GEN_EMPTY;
                        if (md.lsm != NULL)
                                *gen = md.lsm->lsm_layout_gen;
                        rc = 0;
@@ -3093,11 +3366,8 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
                                PFID(&lli->lli_fid), rc);
                }
        }
-       if (rc < 0) {
-               LDLM_LOCK_PUT(lock);
-               ldlm_lock_decref(lockh, mode);
-               RETURN(rc);
-       }
+       if (rc < 0)
+               GOTO(out, rc);
 
        /* set layout to file. Unlikely this will fail as old layout was
         * surely eliminated */
@@ -3107,15 +3377,20 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
        conf.coc_lock = lock;
        conf.u.coc_md = &md;
        rc = ll_layout_conf(inode, &conf);
-       LDLM_LOCK_PUT(lock);
-
-       ldlm_lock_decref(lockh, mode);
 
        if (md.lsm != NULL)
                obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
 
+       /* refresh layout failed, need to wait */
+       wait_layout = rc == -EBUSY;
+       EXIT;
+
+out:
+       LDLM_LOCK_PUT(lock);
+       ldlm_lock_decref(lockh, mode);
+
        /* wait for IO to complete if it's still being used. */
-       if (rc == -EBUSY) {
+       if (wait_layout) {
                CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n",
                        ll_get_fsname(inode->i_sb, NULL, 0),
                        inode, PFID(&lli->lli_fid));
@@ -3130,7 +3405,6 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
                CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n",
                        PFID(&lli->lli_fid), rc);
        }
-
        RETURN(rc);
 }
 
@@ -3159,11 +3433,11 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen)
                                           .ei_mode = LCK_CR,
                                           .ei_cb_bl = ll_md_blocking_ast,
                                           .ei_cb_cp = ldlm_completion_ast,
-                                          .ei_cbdata = inode };
+                                          .ei_cbdata = NULL };
        int rc;
        ENTRY;
 
-       *gen = LL_LAYOUT_GEN_ZERO;
+       *gen = lli->lli_layout_gen;
        if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
                RETURN(0);
 
@@ -3222,6 +3496,8 @@ again:
 
        ll_finish_md_op_data(op_data);
 
+       md_set_lock_data(sbi->ll_md_exp, &it.d.lustre.it_lock_handle, inode, NULL);
+
        mode = it.d.lustre.it_lock_mode;
        it.d.lustre.it_lock_mode = 0;
        ll_intent_drop_lock(&it);