LU-16637 llite: call truncate_inode_pages() in inode lock

author Bobi Jam <bobijam@whamcloud.com>

Thu, 4 May 2023 02:39:29 +0000 (10:39 +0800)

committer Oleg Drokin <green@whamcloud.com>

Sat, 8 Jul 2023 22:41:50 +0000 (22:41 +0000)
author Bobi Jam <bobijam@whamcloud.com>
Thu, 4 May 2023 02:39:29 +0000 (10:39 +0800)
committer Oleg Drokin <green@whamcloud.com>
Sat, 8 Jul 2023 22:41:50 +0000 (22:41 +0000)
diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h

index 7995996..c09b31a 100644 (file)
--- a/lustre/include/cl_object.h
+++ b/lustre/include/cl_object.h
@@ -300,6 +300,13 @@ struct cl_layout {
         bool            cl_is_released;
  };
  
+enum coo_inode_opc {
+       COIO_INODE_LOCK,
+       COIO_INODE_UNLOCK,
+       COIO_SIZE_LOCK,
+       COIO_SIZE_UNLOCK,
+};
+
  /**
   * Operations implemented for each cl object layer.
   *
@@ -427,6 +434,11 @@ struct cl_object_operations {
         int (*coo_object_flush)(const struct lu_env *env,
                                 struct cl_object *obj,
                                 struct ldlm_lock *lock);
+       /**
+        * operate upon inode. Used in LOV to lock/unlock inode from vvp layer.
+        */
+       int (*coo_inode_ops)(const struct lu_env *env, struct cl_object *obj,
+                            enum coo_inode_opc opc, void *data);
  };
  
  /**
@@ -2139,6 +2151,8 @@ int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj,
  loff_t cl_object_maxbytes(struct cl_object *obj);
  int cl_object_flush(const struct lu_env *env, struct cl_object *obj,
                     struct ldlm_lock *lock);
+int cl_object_inode_ops(const struct lu_env *env, struct cl_object *obj,
+                       enum coo_inode_opc opc, void *data);
  
  
  /**
diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c

index 04f8967..cc634a9 100644 (file)
--- a/lustre/llite/dir.c
+++ b/lustre/llite/dir.c
@@ -2332,7 +2332,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
         loff_t ret = -EINVAL;
         ENTRY;
  
-       inode_lock(inode);
+       ll_inode_lock(inode);
         switch (origin) {
         case SEEK_SET:
                 break;
@@ -2373,7 +2373,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
         GOTO(out, ret);
  
  out:
-       inode_unlock(inode);
+       ll_inode_unlock(inode);
         return ret;
  }
  
diff --git a/lustre/llite/file.c b/lustre/llite/file.c

index 34afe9b..8762331 100644 (file)
--- a/lustre/llite/file.c
+++ b/lustre/llite/file.c
@@ -125,7 +125,8 @@ static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
         op_data->op_open_handle = och->och_open_handle;
  
         if (och->och_flags & FMODE_WRITE &&
-           test_and_clear_bit(LLIF_DATA_MODIFIED, &ll_i2info(inode)->lli_flags))
+           test_and_clear_bit(LLIF_DATA_MODIFIED,
+                              &ll_i2info(inode)->lli_flags))
                 /* For HSM: if inode data has been modified, pack it so that
                  * MDT can set data dirty flag in the archive. */
                 op_data->op_bias |= MDS_DATA_MODIFIED;
@@ -2201,11 +2202,11 @@ static ssize_t ll_do_tiny_write(struct kiocb *iocb, struct iov_iter *iter)
                 RETURN(0);
  
         if (unlikely(lock_inode))
-               inode_lock(inode);
+               ll_inode_lock(inode);
         result = __generic_file_write_iter(iocb, iter);
  
         if (unlikely(lock_inode))
-               inode_unlock(inode);
+               ll_inode_unlock(inode);
  
         /* If the page is not already dirty, ll_tiny_write_begin returns
          * -ENODATA.  We continue on to normal write.
@@ -3062,9 +3063,9 @@ lookup:
                 if (enckey == 0 || nameenc == 0)
                         continue;
  
-               inode_lock(parent);
+               ll_inode_lock(parent);
                 de = lookup_one_len(p, de_parent, len);
-               inode_unlock(parent);
+               ll_inode_unlock(parent);
                 if (IS_ERR_OR_NULL(de) || !de->d_inode) {
                         dput(de_parent);
                         rc = -ENODATA;
@@ -3479,11 +3480,10 @@ static int ll_hsm_import(struct inode *inode, struct file *file,
                          ATTR_ATIME | ATTR_ATIME_SET;
  
         inode_lock(inode);
-
+       /* inode lock owner set in ll_setattr_raw()*/
         rc = ll_setattr_raw(file_dentry(file), attr, 0, true);
         if (rc == -ENODATA)
                 rc = 0;
-
         inode_unlock(inode);
  
  out:
@@ -3532,6 +3532,7 @@ static int ll_file_futimes_3(struct file *file, const struct ll_futimes_3 *lfu)
                 RETURN(-EINVAL);
  
         inode_lock(inode);
+       /* inode lock owner set in ll_setattr_raw()*/
         rc = ll_setattr_raw(file_dentry(file), &ia, OP_XVALID_CTIME_SET,
                             false);
         inode_unlock(inode);
@@ -3907,10 +3908,10 @@ int ll_ioctl_project(struct file *file, unsigned int cmd, void __user *uarg)
         /* apply child dentry if name is valid */
         name_len = strnlen(lu_project.project_name, NAME_MAX);
         if (name_len > 0 && name_len <= NAME_MAX) {
-               inode_lock(inode);
+               ll_inode_lock(inode);
                 child_dentry = lookup_one_len(lu_project.project_name,
                                               dentry, name_len);
-               inode_unlock(inode);
+               ll_inode_unlock(inode);
                 if (IS_ERR(child_dentry)) {
                         rc = PTR_ERR(child_dentry);
                         goto out;
@@ -5145,7 +5146,7 @@ int ll_migrate(struct inode *parent, struct file *file, struct lmv_user_md *lum,
         if (IS_ERR(op_data))
                 GOTO(out_iput, rc = PTR_ERR(op_data));
  
-       inode_lock(child_inode);
+       ll_inode_lock(child_inode);
         op_data->op_fid3 = *ll_inode2fid(child_inode);
         if (!fid_is_sane(&op_data->op_fid3)) {
                 CERROR("%s: migrate %s, but FID "DFID" is insane\n",
@@ -5226,7 +5227,7 @@ out_close:
         if (!rc)
                 clear_nlink(child_inode);
  out_unlock:
-       inode_unlock(child_inode);
+       ll_inode_unlock(child_inode);
         ll_finish_md_op_data(op_data);
  out_iput:
         iput(child_inode);
diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h

index 32023a4..ab7d1e2 100644 (file)
--- a/lustre/llite/llite_internal.h
+++ b/lustre/llite/llite_internal.h
@@ -206,7 +206,8 @@ struct ll_inode_info {
                 /* for non-directory */
                 struct {
                         struct mutex            lli_size_mutex;
-                       char                   *lli_symlink_name;
+                       struct task_struct      *lli_size_lock_owner;
+                       char                    *lli_symlink_name;
                         struct ll_trunc_sem     lli_trunc_sem;
                         struct range_lock_tree  lli_write_tree;
                         struct mutex            lli_setattr_mutex;
@@ -291,6 +292,8 @@ struct ll_inode_info {
         struct list_head                lli_xattrs; /* ll_xattr_entry->xe_list */
         struct list_head                lli_lccs; /* list of ll_cl_context */
         seqlock_t                       lli_page_inv_lock;
+
+       struct task_struct              *lli_inode_lock_owner;
  };
  
  #ifndef HAVE_USER_NAMESPACE_ARG
@@ -434,7 +437,7 @@ static inline void ll_layout_version_set(struct ll_inode_info *lli, __u32 gen)
         spin_unlock(&lli->lli_layout_lock);
  }
  
-enum ll_file_flags {
+enum ll_inode_flags {
         /* File data is modified. */
         LLIF_DATA_MODIFIED      = 0,
         /* File is being restored */
@@ -447,6 +450,7 @@ enum ll_file_flags {
         LLIF_UPDATE_ATIME       = 4,
         /* foreign file/dir can be unlinked unconditionnaly */
         LLIF_FOREIGN_REMOVABLE  = 5,
+       /* 6 is not used for now */
         /* Xattr cache is filled */
         LLIF_XATTR_CACHE_FILLED = 7,
  
@@ -588,6 +592,35 @@ static inline struct pcc_inode *ll_i2pcci(struct inode *inode)
         return ll_i2info(inode)->lli_pcc_inode;
  }
  
+static inline void ll_set_inode_lock_owner(struct inode *inode)
+{
+       ll_i2info(inode)->lli_inode_lock_owner = current;
+}
+
+static inline void ll_clear_inode_lock_owner(struct inode *inode)
+{
+       ll_i2info(inode)->lli_inode_lock_owner = NULL;
+}
+
+static inline struct task_struct *ll_get_inode_lock_owner(struct inode *inode)
+{
+       return ll_i2info(inode)->lli_inode_lock_owner;
+}
+
+/* lock inode and set inode lock owener */
+static inline void ll_inode_lock(struct inode *inode)
+{
+       inode_lock(inode);
+       ll_set_inode_lock_owner(inode);
+}
+
+/* clear inode lock owner and unlock it */
+static inline void ll_inode_unlock(struct inode *inode)
+{
+       ll_clear_inode_lock_owner(inode);
+       inode_unlock(inode);
+}
+
  /* default to use at least 16M for fast read if possible */
  #define RA_REMAIN_WINDOW_MIN                   MiB_TO_PAGES(16UL)
  
@@ -1331,7 +1364,7 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md);
  void ll_update_inode_flags(struct inode *inode, unsigned int ext_flags);
  void ll_update_dir_depth_dmv(struct inode *dir, struct dentry *de);
  int ll_read_inode2(struct inode *inode, void *opaque);
-void ll_truncate_inode_pages_final(struct inode *inode, struct cl_io *io);
+void ll_truncate_inode_pages_final(struct inode *inode);
  void ll_delete_inode(struct inode *inode);
  int ll_iocontrol(struct inode *inode, struct file *file,
                  unsigned int cmd, void __user *uarg);
diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c

index 185ff2a..361e156 100644 (file)
--- a/lustre/llite/llite_lib.c
+++ b/lustre/llite/llite_lib.c
@@ -1274,6 +1274,7 @@ void ll_lli_init(struct ll_inode_info *lli)
         /* ll_cl_context initialize */
         INIT_LIST_HEAD(&lli->lli_lccs);
         seqlock_init(&lli->lli_page_inv_lock);
+       lli->lli_inode_lock_owner = NULL;
  }
  
  #define MAX_STRING_SIZE 128
@@ -1990,10 +1991,10 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data)
          * cache is not cleared yet. */
         op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE);
         if (S_ISREG(inode->i_mode))
-               inode_lock(inode);
+               ll_inode_lock(inode);
         rc = simple_setattr(&init_user_ns, dentry, &op_data->op_attr);
         if (S_ISREG(inode->i_mode))
-               inode_unlock(inode);
+               ll_inode_unlock(inode);
         op_data->op_attr.ia_valid = ia_valid;
  
         rc = ll_update_inode(inode, &md);
@@ -2223,6 +2224,9 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr,
  
         ENTRY;
  
+       /* VFS has locked the inode before calling this */
+       ll_set_inode_lock_owner(inode);
+
         CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, "
                "valid %x, hsm_import %d\n",
                ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid),
@@ -2230,29 +2234,29 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr,
                hsm_import);
  
         if (attr->ia_valid & ATTR_SIZE) {
-                /* Check new size against VFS/VM file size limit and rlimit */
-                rc = inode_newsize_ok(inode, attr->ia_size);
-                if (rc)
-                        RETURN(rc);
-
-                /* The maximum Lustre file size is variable, based on the
-                 * OST maximum object size and number of stripes.  This
-                 * needs another check in addition to the VFS check above. */
-                if (attr->ia_size > ll_file_maxbytes(inode)) {
+               /* Check new size against VFS/VM file size limit and rlimit */
+               rc = inode_newsize_ok(inode, attr->ia_size);
+               if (rc)
+                       GOTO(clear, rc);
+
+               /* The maximum Lustre file size is variable, based on the
+                * OST maximum object size and number of stripes.  This
+                * needs another check in addition to the VFS check above. */
+               if (attr->ia_size > ll_file_maxbytes(inode)) {
                         CDEBUG(D_INODE,"file "DFID" too large %llu > %llu\n",
-                               PFID(&lli->lli_fid), attr->ia_size,
-                               ll_file_maxbytes(inode));
-                        RETURN(-EFBIG);
-                }
+                              PFID(&lli->lli_fid), attr->ia_size,
+                              ll_file_maxbytes(inode));
+                       GOTO(clear, rc = -EFBIG);
+               }
  
-                attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
-        }
+               attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
+       }
  
         /* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */
         if (attr->ia_valid & TIMES_SET_FLAGS) {
                 if ((!uid_eq(current_fsuid(), inode->i_uid)) &&
                     !capable(CAP_FOWNER))
-                       RETURN(-EPERM);
+                       GOTO(clear, rc = -EPERM);
         }
  
         /* We mark all of the fields "set" so MDS/OST does not re-set them */
@@ -2269,8 +2273,8 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr,
         if (!(attr->ia_valid & ATTR_MTIME_SET) &&
             (attr->ia_valid & ATTR_MTIME)) {
                 attr->ia_mtime = current_time(inode);
-                attr->ia_valid |= ATTR_MTIME_SET;
-        }
+               attr->ia_valid |= ATTR_MTIME_SET;
+       }
  
          if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
                 CDEBUG(D_INODE, "setting mtime %lld, ctime %lld, now = %lld\n",
@@ -2278,7 +2282,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr,
                        ktime_get_real_seconds());
  
         if (S_ISREG(inode->i_mode))
-               inode_unlock(inode);
+               ll_inode_unlock(inode);
  
         /* We always do an MDS RPC, even if we're only changing the size;
          * only the MDS knows whether truncate() should fail with -ETXTBUSY */
@@ -2452,7 +2456,7 @@ out:
                 ll_finish_md_op_data(op_data);
  
         if (S_ISREG(inode->i_mode)) {
-               inode_lock(inode);
+               ll_inode_lock(inode);
                 if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
                         inode_dio_wait(inode);
                 /* Once we've got the i_mutex, it's safe to set the S_NOSEC
@@ -2467,6 +2471,8 @@ out:
                 ll_stats_ops_tally(ll_i2sbi(inode), attr->ia_valid & ATTR_SIZE ?
                                         LPROC_LL_TRUNC : LPROC_LL_SETATTR,
                                    ktime_us_delta(ktime_get(), kstart));
+clear:
+       ll_clear_inode_lock_owner(inode);
  
         RETURN(rc);
  }
@@ -2661,6 +2667,7 @@ void ll_inode_size_lock(struct inode *inode)
  
         lli = ll_i2info(inode);
         mutex_lock(&lli->lli_size_mutex);
+       lli->lli_size_lock_owner = current;
  }
  
  void ll_inode_size_unlock(struct inode *inode)
@@ -2668,6 +2675,7 @@ void ll_inode_size_unlock(struct inode *inode)
         struct ll_inode_info *lli;
  
         lli = ll_i2info(inode);
+       lli->lli_size_lock_owner = NULL;
         mutex_unlock(&lli->lli_size_mutex);
  }
  
@@ -2996,14 +3004,16 @@ void ll_update_dir_depth_dmv(struct inode *dir, struct dentry *de)
                        lli->lli_inherit_depth);
  }
  
-void ll_truncate_inode_pages_final(struct inode *inode, struct cl_io *io)
+void ll_truncate_inode_pages_final(struct inode *inode)
  {
         struct address_space *mapping = &inode->i_data;
         unsigned long nrpages;
         unsigned long flags;
  
-       LASSERTF(io == NULL || inode_is_locked(inode), "io %p (type %d)\n",
-                io, io ? io->ci_type : 0);
+       LASSERTF((inode->i_state & I_FREEING) || inode_is_locked(inode),
+                DFID ":inode %p state %#lx, lli_flags %#lx\n",
+                PFID(ll_inode2fid(inode)), inode, inode->i_state,
+                ll_i2info(inode)->lli_flags);
  
         truncate_inode_pages_final(mapping);
  
@@ -3022,11 +3032,11 @@ void ll_truncate_inode_pages_final(struct inode *inode, struct cl_io *io)
         } /* Workaround end */
  
         LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu "
-                "io %p (io_type %d), "
+                "state %#lx, lli_flags %#lx, "
                  "see https://jira.whamcloud.com/browse/LU-118\n",
                  ll_i2sbi(inode)->ll_fsname,
                  PFID(ll_inode2fid(inode)), inode, nrpages,
-                io, io ? io->ci_type : 0);
+                inode->i_state, ll_i2info(inode)->lli_flags);
  }
  
  int ll_read_inode2(struct inode *inode, void *opaque)
@@ -3100,7 +3110,7 @@ void ll_delete_inode(struct inode *inode)
                                    CL_FSYNC_LOCAL : CL_FSYNC_DISCARD, 1);
         }
  
-       ll_truncate_inode_pages_final(inode, NULL);
+       ll_truncate_inode_pages_final(inode);
         ll_clear_inode(inode);
         clear_inode(inode);
  
diff --git a/lustre/llite/llite_nfs.c b/lustre/llite/llite_nfs.c

index 4167e0f..20d96a4 100644 (file)
--- a/lustre/llite/llite_nfs.c
+++ b/lustre/llite/llite_nfs.c
@@ -287,14 +287,14 @@ static int ll_get_name(struct dentry *dentry, char *name, struct dentry *child)
         if (IS_ERR(op_data))
                 GOTO(out, rc = PTR_ERR(op_data));
  
-       inode_lock(dir);
+       ll_inode_lock(dir);
  #ifdef HAVE_DIR_CONTEXT
         rc = ll_dir_read(dir, &pos, op_data, &lgd.ctx, NULL);
  #else
         rc = ll_dir_read(dir, &pos, op_data, &lgd, ll_nfs_get_name_filldir,
                          NULL);
  #endif
-       inode_unlock(dir);
+       ll_inode_unlock(dir);
         ll_finish_md_op_data(op_data);
         if (!rc && !lgd.lgd_found)
                 rc = -ENOENT;
diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c

index bfa031b..f4758fb 100644 (file)
--- a/lustre/llite/namei.c
+++ b/lustre/llite/namei.c
@@ -1097,7 +1097,10 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
                                    unsigned int flags)
  {
         struct lookup_intent *itp, it = { .it_op = IT_GETATTR };
-       struct dentry *de;
+       struct dentry *de = NULL;
+
+       /* VFS has locked the inode before calling this */
+       ll_set_inode_lock_owner(parent);
  
         CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), flags=%u\n",
                dentry, PFID(ll_inode2fid(parent)), parent, flags);
@@ -1110,7 +1113,7 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
         if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN) &&
             (inode_permission(&init_user_ns,
                               parent, MAY_WRITE | MAY_EXEC) == 0))
-               return NULL;
+               goto clear;
  
         if (flags & (LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE))
                 itp = NULL;
@@ -1122,6 +1125,9 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
         if (itp != NULL)
                 ll_intent_release(itp);
  
+clear:
+       ll_clear_inode_lock_owner(parent);
+
         return de;
  }
  
@@ -1168,6 +1174,9 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
         int rc = 0;
  
         ENTRY;
+       /* VFS has locked the inode before calling this */
+       ll_set_inode_lock_owner(dir);
+
         CDEBUG(D_VFSTRACE,
                "VFS Op:name=%pd, dir="DFID"(%p), file %p, open_flags %x, mode %x opened %d\n",
                dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode,
@@ -1183,7 +1192,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
                  * Either way it's a valid race to just return -ENOENT here.
                  */
                 if (!(open_flags & O_CREAT))
-                       return -ENOENT;
+                       GOTO(clear, rc = -ENOENT);
  
                 /* Otherwise we just unhash it to be rehashed afresh via
                  * lookup if necessary
@@ -1193,7 +1202,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
  
         OBD_ALLOC(it, sizeof(*it));
         if (!it)
-               RETURN(-ENOMEM);
+               GOTO(clear, rc = -ENOMEM);
  
         it->it_op = IT_OPEN;
         if (open_flags & O_CREAT) {
@@ -1344,6 +1353,8 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
  out_release:
         ll_intent_release(it);
         OBD_FREE(it, sizeof(*it));
+clear:
+       ll_clear_inode_lock_owner(dir);
  
         RETURN(rc);
  }
@@ -1804,6 +1815,9 @@ static int ll_mknod(struct user_namespace *mnt_userns, struct inode *dir,
         int err;
         ENTRY;
  
+       /* VFS has locked the inode before calling this */
+       ll_set_inode_lock_owner(dir);
+
         CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p) mode %o dev %x\n",
                dchild, PFID(ll_inode2fid(dir)), dir, mode, rdev);
  
@@ -1832,6 +1846,7 @@ static int ll_mknod(struct user_namespace *mnt_userns, struct inode *dir,
         if (!err)
                 ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKNOD,
                                    ktime_us_delta(ktime_get(), kstart));
+       ll_clear_inode_lock_owner(dir);
  
         RETURN(err);
  }
@@ -1846,6 +1861,9 @@ static int ll_create_nd(struct user_namespace *mnt_userns,
         ktime_t kstart = ktime_get();
         int rc;
  
+       /* VFS has locked the inode before calling this */
+       ll_set_inode_lock_owner(dir);
+
         CFS_FAIL_TIMEOUT(OBD_FAIL_LLITE_CREATE_FILE_PAUSE, cfs_fail_val);
  
         CDEBUG(D_VFSTRACE,
@@ -1863,6 +1881,8 @@ static int ll_create_nd(struct user_namespace *mnt_userns,
                 ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_CREATE,
                                    ktime_us_delta(ktime_get(), kstart));
  
+       ll_clear_inode_lock_owner(dir);
+
         return rc;
  }
  
@@ -1875,13 +1895,16 @@ static int ll_symlink(struct user_namespace *mnt_userns, struct inode *dir,
         int err;
         ENTRY;
  
+       /* VFS has locked the inode before calling this */
+       ll_set_inode_lock_owner(dir);
+
         CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), target=%.*s\n",
                dchild, PFID(ll_inode2fid(dir)), dir, 3000, oldpath);
  
         err = llcrypt_prepare_symlink(dir, oldpath, len, dir->i_sb->s_blocksize,
                                       &disk_link);
         if (err)
-               RETURN(err);
+               GOTO(out, err);
  
         err = ll_new_node(dir, dchild, oldpath, S_IFLNK | S_IRWXUGO,
                           (__u64)&disk_link, LUSTRE_OPC_SYMLINK);
@@ -1893,6 +1916,9 @@ static int ll_symlink(struct user_namespace *mnt_userns, struct inode *dir,
                 ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_SYMLINK,
                                    ktime_us_delta(ktime_get(), kstart));
  
+out:
+       ll_clear_inode_lock_owner(dir);
+
         RETURN(err);
  }
  
@@ -1908,6 +1934,10 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir,
         int err;
  
         ENTRY;
+       /* VFS has locked the inodes before calling this */
+       ll_set_inode_lock_owner(src);
+       ll_set_inode_lock_owner(dir);
+
         CDEBUG(D_VFSTRACE,
                "VFS Op: inode="DFID"(%p), dir="DFID"(%p), target=%pd\n",
                PFID(ll_inode2fid(src)), src,
@@ -1915,12 +1945,12 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir,
  
         err = llcrypt_prepare_link(old_dentry, dir, new_dentry);
         if (err)
-               RETURN(err);
+               GOTO(clear, err);
  
         op_data = ll_prep_md_op_data(NULL, src, dir, name->name, name->len,
                                      0, LUSTRE_OPC_ANY, NULL);
         if (IS_ERR(op_data))
-               RETURN(PTR_ERR(op_data));
+               GOTO(clear, err = PTR_ERR(op_data));
  
         err = md_link(sbi->ll_md_exp, op_data, &request);
         ll_finish_md_op_data(op_data);
@@ -1933,6 +1963,10 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir,
         EXIT;
  out:
         ptlrpc_req_finished(request);
+clear:
+       ll_clear_inode_lock_owner(src);
+       ll_clear_inode_lock_owner(dir);
+
         RETURN(err);
  }
  
@@ -1943,6 +1977,9 @@ static int ll_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
         int err;
         ENTRY;
  
+       /* VFS has locked the inode before calling this */
+       ll_set_inode_lock_owner(dir);
+
         CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n",
                dchild, PFID(ll_inode2fid(dir)), dir);
  
@@ -1956,6 +1993,8 @@ static int ll_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
                 ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKDIR,
                                    ktime_us_delta(ktime_get(), kstart));
  
+       ll_clear_inode_lock_owner(dir);
+
         RETURN(err);
  }
  
@@ -1969,20 +2008,24 @@ static int ll_rmdir(struct inode *dir, struct dentry *dchild)
  
         ENTRY;
  
+       /* VFS has locked the inodes before calling this */
+       ll_set_inode_lock_owner(dir);
+       ll_set_inode_lock_owner(dchild->d_inode);
+
         CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n",
                dchild, PFID(ll_inode2fid(dir)), dir);
  
         if (unlikely(d_mountpoint(dchild)))
-                RETURN(-EBUSY);
+               GOTO(out, rc = -EBUSY);
  
         /* some foreign dir may not be allowed to be removed */
         if (!ll_foreign_is_removable(dchild, false))
-               RETURN(-EPERM);
+               GOTO(out, rc = -EPERM);
  
         op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, name->len,
                                      S_IFDIR, LUSTRE_OPC_ANY, NULL);
         if (IS_ERR(op_data))
-               RETURN(PTR_ERR(op_data));
+               GOTO(out, rc = PTR_ERR(op_data));
  
         if (dchild->d_inode != NULL)
                 op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
@@ -2012,6 +2055,9 @@ static int ll_rmdir(struct inode *dir, struct dentry *dchild)
         }
  
         ptlrpc_req_finished(request);
+out:
+       ll_clear_inode_lock_owner(dir);
+       ll_clear_inode_lock_owner(dchild->d_inode);
  
         RETURN(rc);
  }
@@ -2058,6 +2104,10 @@ static int ll_unlink(struct inode *dir, struct dentry *dchild)
  
         ENTRY;
  
+       /* VFS has locked the inodes before calling this */
+       ll_set_inode_lock_owner(dir);
+       ll_set_inode_lock_owner(dchild->d_inode);
+
         CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n",
                dchild, PFID(ll_inode2fid(dir)), dir);
  
@@ -2066,16 +2116,16 @@ static int ll_unlink(struct inode *dir, struct dentry *dchild)
          * just check it as vfs_unlink does.
          */
         if (unlikely(d_mountpoint(dchild)))
-               RETURN(-EBUSY);
+               GOTO(clear, rc = -EBUSY);
  
         /* some foreign file/dir may not be allowed to be unlinked */
         if (!ll_foreign_is_removable(dchild, false))
-               RETURN(-EPERM);
+               GOTO(clear, rc = -EPERM);
  
         op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, name->len, 0,
                                      LUSTRE_OPC_ANY, NULL);
         if (IS_ERR(op_data))
-               RETURN(PTR_ERR(op_data));
+               GOTO(clear, rc = PTR_ERR(op_data));
  
         op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
         /* notify lower layer if inode has dirty pages */
@@ -2108,6 +2158,9 @@ out:
         if (!rc)
                 ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_UNLINK,
                                    ktime_us_delta(ktime_get(), kstart));
+clear:
+       ll_clear_inode_lock_owner(dir);
+       ll_clear_inode_lock_owner(dchild->d_inode);
         RETURN(rc);
  }
  
@@ -2128,9 +2181,15 @@ static int ll_rename(struct user_namespace *mnt_userns,
         int err;
         ENTRY;
  
+       /* VFS has locked the inodes before calling this */
+       ll_set_inode_lock_owner(src);
+       ll_set_inode_lock_owner(tgt);
+       if (tgt_dchild->d_inode)
+               ll_set_inode_lock_owner(tgt_dchild->d_inode);
+
  #if defined(HAVE_USER_NAMESPACE_ARG) || defined(HAVE_IOPS_RENAME_WITH_FLAGS)
         if (flags)
-               return -EINVAL;
+               GOTO(out, err = -EINVAL);
  #endif
  
         CDEBUG(D_VFSTRACE,
@@ -2139,7 +2198,7 @@ static int ll_rename(struct user_namespace *mnt_userns,
                tgt_dchild, PFID(ll_inode2fid(tgt)), tgt);
  
         if (unlikely(d_mountpoint(src_dchild) || d_mountpoint(tgt_dchild)))
-               RETURN(-EBUSY);
+               GOTO(out, err = -EBUSY);
  
  #if defined(HAVE_USER_NAMESPACE_ARG) || defined(HAVE_IOPS_RENAME_WITH_FLAGS)
         err = llcrypt_prepare_rename(src, src_dchild, tgt, tgt_dchild, flags);
@@ -2147,12 +2206,12 @@ static int ll_rename(struct user_namespace *mnt_userns,
         err = llcrypt_prepare_rename(src, src_dchild, tgt, tgt_dchild, 0);
  #endif
         if (err)
-               RETURN(err);
+               GOTO(out, err);
         /* we prevent an encrypted file from being renamed
          * into an unencrypted dir
          */
         if (IS_ENCRYPTED(src) && !IS_ENCRYPTED(tgt))
-               RETURN(-EXDEV);
+               GOTO(out, err = -EXDEV);
  
         if (src_dchild->d_inode)
                 mode = src_dchild->d_inode->i_mode;
@@ -2163,7 +2222,7 @@ static int ll_rename(struct user_namespace *mnt_userns,
         op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, mode,
                                      LUSTRE_OPC_ANY, NULL);
         if (IS_ERR(op_data))
-               RETURN(PTR_ERR(op_data));
+               GOTO(out, err = PTR_ERR(op_data));
  
         /* If the client is using a subdir mount and does a rename to what it
          * sees as /.fscrypt, interpret it as the .fscrypt dir at fs root.
@@ -2182,11 +2241,11 @@ static int ll_rename(struct user_namespace *mnt_userns,
  
         err = ll_setup_filename(src, &src_dchild->d_name, 1, &foldname, NULL);
         if (err)
-               RETURN(err);
+               GOTO(out, err);
         err = ll_setup_filename(tgt, &tgt_dchild->d_name, 1, &fnewname, NULL);
         if (err) {
                 llcrypt_free_filename(&foldname);
-               RETURN(err);
+               GOTO(out, err);
         }
         err = md_rename(sbi->ll_md_exp, op_data,
                         foldname.disk_name.name, foldname.disk_name.len,
@@ -2207,7 +2266,11 @@ static int ll_rename(struct user_namespace *mnt_userns,
                 ll_stats_ops_tally(sbi, LPROC_LL_RENAME,
                                    ktime_us_delta(ktime_get(), kstart));
         }
-
+out:
+       ll_clear_inode_lock_owner(src);
+       ll_clear_inode_lock_owner(tgt);
+       if (tgt_dchild->d_inode)
+               ll_clear_inode_lock_owner(tgt_dchild->d_inode);
         RETURN(err);
  }
  
diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c

index a43bf5e..6c9f11c 100644 (file)
--- a/lustre/llite/vvp_io.c
+++ b/lustre/llite/vvp_io.c
@@ -1345,10 +1345,10 @@ static int vvp_io_write_start(const struct lu_env *env,
                 iter = *vio->vui_iter;
  
                 if (unlikely(lock_inode))
-                       inode_lock(inode);
+                       ll_inode_lock(inode);
                 result = __generic_file_write_iter(vio->vui_iocb, &iter);
                 if (unlikely(lock_inode))
-                       inode_unlock(inode);
+                       ll_inode_unlock(inode);
  
                 written = result;
                 if (result > 0)
@@ -1733,7 +1733,7 @@ static int vvp_io_lseek_start(const struct lu_env *env,
         struct inode *inode = vvp_object_inode(io->ci_obj);
         __u64 start = io->u.ci_lseek.ls_start;
  
-       inode_lock(inode);
+       ll_inode_lock(inode);
         inode_dio_wait(inode);
  
         /* At the moment we have DLM lock so just update inode
@@ -1756,7 +1756,7 @@ static void vvp_io_lseek_end(const struct lu_env *env,
         if (io->u.ci_lseek.ls_result > i_size_read(inode))
                 io->u.ci_lseek.ls_result = -ENXIO;
  
-       inode_unlock(inode);
+       ll_inode_unlock(inode);
  }
  
  static const struct cl_io_operations vvp_io_ops = {
diff --git a/lustre/llite/vvp_object.c b/lustre/llite/vvp_object.c

index d96146d..32bb6d9 100644 (file)
--- a/lustre/llite/vvp_object.c
+++ b/lustre/llite/vvp_object.c
@@ -155,7 +155,6 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj,
  
  static int vvp_prune(const struct lu_env *env, struct cl_object *obj)
  {
-       struct cl_io *io = vvp_env_io(env)->vui_cl.cis_io;
         struct inode *inode = vvp_object_inode(obj);
         int rc;
         ENTRY;
@@ -167,14 +166,16 @@ static int vvp_prune(const struct lu_env *env, struct cl_object *obj)
                 RETURN(rc);
         }
  
-       if (io != NULL)
-               inode_lock(inode);
+       if (ll_get_inode_lock_owner(inode) != current)
+               /* ask LOV get inode lock then lo_type_guard */
+               RETURN(-EAGAIN);
  
-       ll_truncate_inode_pages_final(inode, io);
-       mapping_clear_exiting(inode->i_mapping);
+       LASSERTF(inode_is_locked(inode), DFID ":inode %p lli_flags %#lx\n",
+                PFID(lu_object_fid(&obj->co_lu)), inode,
+                ll_i2info(inode)->lli_flags);
  
-       if (io != NULL)
-               inode_unlock(inode);
+       ll_truncate_inode_pages_final(inode);
+       mapping_clear_exiting(inode->i_mapping);
  
         RETURN(0);
  }
@@ -230,6 +231,46 @@ static void vvp_req_attr_set(const struct lu_env *env, struct cl_object *obj,
         memcpy(attr->cra_jobid, &lli->lli_jobid, sizeof(attr->cra_jobid));
  }
  
+static int vvp_inode_ops(const struct lu_env *env, struct cl_object *obj,
+                        enum coo_inode_opc opc, void *data)
+{
+       struct inode *inode = vvp_object_inode(obj);
+       int rc = 0;
+
+       ENTRY;
+       switch (opc) {
+       case COIO_INODE_LOCK:
+               if (ll_get_inode_lock_owner(inode) != current)
+                       ll_inode_lock(inode);
+               else
+                       rc = -EALREADY;
+               break;
+       case COIO_INODE_UNLOCK:
+               if (ll_get_inode_lock_owner(inode) == current)
+                       ll_inode_unlock(inode);
+               else
+                       rc = -ENOLCK;
+               break;
+       case COIO_SIZE_LOCK:
+               if (ll_i2info(inode)->lli_size_lock_owner != current)
+                       ll_inode_size_lock(inode);
+               else
+                       rc = -EALREADY;
+               break;
+       case COIO_SIZE_UNLOCK:
+               if (ll_i2info(inode)->lli_size_lock_owner == current)
+                       ll_inode_size_unlock(inode);
+               else
+                       rc = -ENOLCK;
+               break;
+       default:
+               rc = -EINVAL;
+               break;
+       }
+
+       RETURN(rc);
+}
+
  static const struct cl_object_operations vvp_ops = {
         .coo_page_init    = vvp_page_init,
         .coo_io_init      = vvp_io_init,
@@ -238,7 +279,8 @@ static const struct cl_object_operations vvp_ops = {
         .coo_conf_set     = vvp_conf_set,
         .coo_prune        = vvp_prune,
         .coo_glimpse      = vvp_object_glimpse,
-       .coo_req_attr_set = vvp_req_attr_set
+       .coo_req_attr_set = vvp_req_attr_set,
+       .coo_inode_ops    = vvp_inode_ops,
  };
  
  static int vvp_object_init0(const struct lu_env *env,
diff --git a/lustre/llite/xattr.c b/lustre/llite/xattr.c

index a6c0d91..b59cb16 100644 (file)
--- a/lustre/llite/xattr.c
+++ b/lustre/llite/xattr.c
@@ -110,6 +110,9 @@ static int ll_xattr_set_common(const struct xattr_handler *handler,
         int rc;
         ENTRY;
  
+       /* VFS has locked the inode before calling this */
+       ll_set_inode_lock_owner(inode);
+
         /* When setxattr() is called with a size of 0 the value is
          * unconditionally replaced by "". When removexattr() is
          * called we get a NULL value and XATTR_REPLACE for flags. */
@@ -121,26 +124,26 @@ static int ll_xattr_set_common(const struct xattr_handler *handler,
         /* FIXME: enable IMA when the conditions are ready */
         if (handler->flags == XATTR_SECURITY_T &&
             (!strcmp(name, "ima") || !strcmp(name, "evm")))
-               RETURN(-EOPNOTSUPP);
+               GOTO(out, rc = -EOPNOTSUPP);
  
         rc = xattr_type_filter(sbi, handler);
         if (rc)
-               RETURN(rc);
+               GOTO(out, rc);
  
         if ((handler->flags == XATTR_ACL_ACCESS_T ||
              handler->flags == XATTR_ACL_DEFAULT_T) &&
             !inode_owner_or_capable(mnt_userns, inode))
-               RETURN(-EPERM);
+               GOTO(out, rc = -EPERM);
  
         /* b10667: ignore lustre special xattr for now */
         if (!strcmp(name, "hsm") ||
             ((handler->flags == XATTR_TRUSTED_T && !strcmp(name, "lov")) ||
              (handler->flags == XATTR_LUSTRE_T && !strcmp(name, "lov"))))
-               RETURN(0);
+               GOTO(out, rc = 0);
  
         rc = ll_security_secctx_name_filter(sbi, handler->flags, name);
         if (rc)
-               RETURN(rc);
+               GOTO(out, rc);
  
         /*
          * In user.* namespace, only regular files and directories can have
@@ -148,7 +151,7 @@ static int ll_xattr_set_common(const struct xattr_handler *handler,
          */
         if (handler->flags == XATTR_USER_T) {
                 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
-                       RETURN(-EPERM);
+                       GOTO(out, rc = -EPERM);
         }
  
         /* This check is required for compatibility with 2.14, in which
@@ -159,11 +162,11 @@ static int ll_xattr_set_common(const struct xattr_handler *handler,
          * context is set directly in the create request.
          */
         if (handler->flags == XATTR_SECURITY_T && strcmp(name, "c") == 0)
-               RETURN(-EPERM);
+               GOTO(out, rc = -EPERM);
  
         fullname = kasprintf(GFP_KERNEL, "%s%s", xattr_prefix(handler), name);
         if (!fullname)
-               RETURN(-ENOMEM);
+               GOTO(out, rc = -ENOMEM);
  
         rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, fullname,
                          pv, size, flags, ll_i2suppgid(inode), &req);
@@ -173,7 +176,7 @@ static int ll_xattr_set_common(const struct xattr_handler *handler,
                         LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n");
                         clear_bit(LL_SBI_USER_XATTR, sbi->ll_flags);
                 }
-               RETURN(rc);
+               GOTO(out, rc);
         }
         ll_i2info(inode)->lli_synced_to_mds = false;
  
@@ -182,8 +185,10 @@ static int ll_xattr_set_common(const struct xattr_handler *handler,
         ll_stats_ops_tally(ll_i2sbi(inode), valid == OBD_MD_FLXATTRRM ?
                                 LPROC_LL_REMOVEXATTR : LPROC_LL_SETXATTR,
                            ktime_us_delta(ktime_get(), kstart));
+out:
+       ll_clear_inode_lock_owner(inode);
  
-       RETURN(0);
+       RETURN(rc);
  }
  
  static int get_hsm_state(struct inode *inode, u32 *hus_states)
@@ -350,11 +355,14 @@ static int ll_xattr_set(const struct xattr_handler *handler,
         ktime_t kstart = ktime_get();
         int op_type = flags == XATTR_REPLACE ? LPROC_LL_REMOVEXATTR :
                                                LPROC_LL_SETXATTR;
-       int rc;
+       int rc = 0;
  
         LASSERT(inode);
         LASSERT(name);
  
+       /* VFS has locked the inode before calling this */
+       ll_set_inode_lock_owner(inode);
+
         CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), xattr %s\n",
                PFID(ll_inode2fid(inode)), inode, name);
  
@@ -364,11 +372,11 @@ static int ll_xattr_set(const struct xattr_handler *handler,
                                        size);
                 ll_stats_ops_tally(ll_i2sbi(inode), op_type,
                                    ktime_us_delta(ktime_get(), kstart));
-               return rc;
+               goto out;
         } else if (!strcmp(name, "lma") || !strcmp(name, "link")) {
                 ll_stats_ops_tally(ll_i2sbi(inode), op_type,
                                    ktime_us_delta(ktime_get(), kstart));
-               return 0;
+               goto out;
         }
  
         if (strncmp(name, "lov.", 4) == 0 &&
@@ -376,8 +384,12 @@ static int ll_xattr_set(const struct xattr_handler *handler,
             le32_to_cpu(LOV_MAGIC_MASK)) == le32_to_cpu(LOV_MAGIC_MAGIC))
                 lustre_swab_lov_user_md((struct lov_user_md *)value, 0);
  
-       return ll_xattr_set_common(handler, mnt_userns, dentry, inode, name,
-                                  value, size, flags);
+       rc = ll_xattr_set_common(handler, mnt_userns, dentry, inode, name,
+                                value, size, flags);
+out:
+       ll_clear_inode_lock_owner(inode);
+
+       return rc;
  }
  
  int ll_xattr_list(struct inode *inode, const char *name, int type, void *buffer,
diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c

index 0c10925..9e85b33 100644 (file)
--- a/lustre/lov/lov_object.c
+++ b/lustre/lov/lov_object.c
@@ -309,10 +309,11 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
         LASSERT(r0->lo_sub[idx] == NULL);
  }
  
-static void lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
+static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
                              struct lov_layout_entry *lle)
  {
         struct lov_layout_raid0 *r0 = &lle->lle_raid0;
+       int rc;
  
         ENTRY;
  
@@ -323,7 +324,9 @@ static void lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
                         struct lovsub_object *los = r0->lo_sub[i];
  
                         if (los != NULL) {
-                               cl_object_prune(env, &los->lso_cl);
+                               rc = cl_object_prune(env, &los->lso_cl);
+                               if (rc)
+                                       RETURN(rc);
                                 /*
                                  * If top-level object is to be evicted from
                                  * the cache, so are its sub-objects.
@@ -333,7 +336,7 @@ static void lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
                 }
         }
  
-       EXIT;
+       RETURN(0);
  }
  
  static void lov_fini_raid0(const struct lu_env *env,
@@ -848,6 +851,7 @@ static int lov_delete_composite(const struct lu_env *env,
                                 union lov_layout_state *state)
  {
         struct lov_layout_entry *entry;
+       int rc;
  
         ENTRY;
  
@@ -858,7 +862,9 @@ static int lov_delete_composite(const struct lu_env *env,
                 if (entry->lle_lsme && lsme_is_foreign(entry->lle_lsme))
                         continue;
  
-               lov_delete_raid0(env, lov, entry);
+               rc = lov_delete_raid0(env, lov, entry);
+               if (rc)
+                       RETURN(rc);
         }
  
         RETURN(0);
@@ -1360,6 +1366,9 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
         struct lov_stripe_md    *lsm = NULL;
         struct lov_object       *lov = cl2lov(obj);
         int                      result = 0;
+       struct cl_object *top = cl_object_top(obj);
+       bool unlock_inode = false;
+       bool lock_inode_size = false;
         ENTRY;
  
         if (conf->coc_opc == OBJECT_CONF_SET &&
@@ -1377,6 +1386,7 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
                 GOTO(out_lsm, result = 0);
         }
  
+retry:
         lov_conf_lock(lov);
         if (conf->coc_opc == OBJECT_CONF_WAIT) {
                 if (test_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags) &&
@@ -1425,14 +1435,49 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
         }
  
         result = lov_layout_change(env, lov, lsm, conf);
-       if (result)
+       if (result) {
+               if (result == -EAGAIN) {
+                       /**
+                        * we need unlocked lov conf and get inode lock.
+                        * It's possible we have already taken inode's size
+                        * mutex, so we need keep such lock order, lest deadlock
+                        * happens:
+                        *   inode lock       (ll_inode_lock())
+                        *   inode size lock  (ll_inode_size_lock())
+                        *   lov conf lock    (lov_conf_lock())
+                        *
+                        * e.g.
+                        *   vfs_setxattr                inode locked
+                        *     ll_lov_setstripe_ea_info  inode size locked
+                        *       ll_prep_inode
+                        *         ll_file_inode_init
+                        *           cl_conf_set
+                        *             lov_conf_set      lov conf locked
+                        */
+                       lov_conf_unlock(lov);
+                       if (cl_object_inode_ops(
+                                       env, top, COIO_SIZE_UNLOCK, NULL) == 0)
+                               lock_inode_size = true;
+
+                       /* take lock in order */
+                       if (cl_object_inode_ops(
+                                       env, top, COIO_INODE_LOCK, NULL) == 0)
+                               unlock_inode = true;
+                       if (lock_inode_size)
+                               cl_object_inode_ops(
+                                       env, top, COIO_SIZE_LOCK, NULL);
+                       goto retry;
+               }
                 set_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags);
-       else
+       } else {
                 clear_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags);
+       }
         EXIT;
  
  out:
         lov_conf_unlock(lov);
+       if (unlock_inode)
+               cl_object_inode_ops(env, top, COIO_INODE_UNLOCK, NULL);
  out_lsm:
         lov_lsm_put(lsm);
         CDEBUG(D_INODE, DFID" lo_layout_invalid=%u\n",
diff --git a/lustre/obdclass/cl_object.c b/lustre/obdclass/cl_object.c

index 8effbec..718f182 100644 (file)
--- a/lustre/obdclass/cl_object.c
+++ b/lustre/obdclass/cl_object.c
@@ -428,6 +428,25 @@ int cl_object_flush(const struct lu_env *env, struct cl_object *top,
  }
  EXPORT_SYMBOL(cl_object_flush);
  
+int cl_object_inode_ops(const struct lu_env *env, struct cl_object *top,
+                       enum coo_inode_opc opc, void *data)
+{
+       struct cl_object *obj;
+       int rc = 0;
+
+       ENTRY;
+
+       cl_object_for_each(obj, top) {
+               if (obj->co_ops->coo_inode_ops) {
+                       rc = obj->co_ops->coo_inode_ops(env, obj, opc, data);
+                       if (rc)
+                               break;
+               }
+       }
+       RETURN(rc);
+}
+EXPORT_SYMBOL(cl_object_inode_ops);
+
  /**
   * Helper function removing all object locks, and marking object for
   * deletion. All object pages must have been deleted at this point.
author	Bobi Jam <bobijam@whamcloud.com>
	Thu, 4 May 2023 02:39:29 +0000 (10:39 +0800)
committer	Oleg Drokin <green@whamcloud.com>
	Sat, 8 Jul 2023 22:41:50 +0000 (22:41 +0000)
lustre/include/cl_object.h		patch \| blob \| history
lustre/llite/dir.c		patch \| blob \| history
lustre/llite/file.c		patch \| blob \| history
lustre/llite/llite_internal.h		patch \| blob \| history
lustre/llite/llite_lib.c		patch \| blob \| history
lustre/llite/llite_nfs.c		patch \| blob \| history
lustre/llite/namei.c		patch \| blob \| history
lustre/llite/vvp_io.c		patch \| blob \| history
lustre/llite/vvp_object.c		patch \| blob \| history
lustre/llite/xattr.c		patch \| blob \| history
lustre/lov/lov_object.c		patch \| blob \| history
lustre/obdclass/cl_object.c		patch \| blob \| history