Whamcloud - gitweb
LU-15535 llite: deadlock on lli_lsm_sem 89/50489/11
authorVitaly Fertman <c17818@cray.com>
Fri, 31 Mar 2023 18:04:44 +0000 (21:04 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 19 Jul 2023 16:47:29 +0000 (16:47 +0000)
it may happen that one process is doing lookup, and after reply while
holding the LDLM lock is trying to update LSM/default LSM under the
write lli_lsm_sem for a dir.

another process has taken the read lli_lsm_sem (taken for all the MD
ops in ll_prep_md_op_data()) and is waiting for a conflicting PW LDLM
lock on server for its modification for this dir.

it may happen on restriping with LSM, on changing the default LSM, but
even more often way is racer run even without striped dirs:
- racer does LFS mkdir -i $i <subdir> per each MDS, what creates a default
  LSM on these subdirs inherited endlessly - to keep the MDS index;
- racer also does mkdir -p <path>, in which case we do:
ll_new_node - create a parent dir, no RMF_DEFAULT_MDT_MD in reply
ll_lookup parent it=open - no RMF_DEFAULT_MDT_MD in reply
ll_new_node - create a child
the default LSM is inherited on the parent creation, however as those RPCs
do not have lookup LDLM lock and no data - the default layout is not set
for the parent in inode at the time of a child creation. thus a parallel
lookup which gets the LSM deadlocks with this ll_new_node().

at the same time, similar to CLIO, we do not need to hold a sem nor an
LDLM lock over the whole operation to avoid LSM modification on server,
we just need to take an uptodate LSM (this is a subject for LU-16320)
and to guarantee this op will be working on the client on this LSM for
the whole operation.

the solution is to let MD ops to work on a copy of LSM therefore letting
others to modify LSM attached to inode in parallel if needed.

HPE-bug-id: LUS-10725
Signed-off-by: Vitaly Fertman <vitaly.fertman@hpe.com>
Change-Id: I3137300b5bcce2e890994ce8751cdf7fce2f3f54
Reviewed-on: https://es-gerrit.hpc.amslabs.hpecorp.net/161525
Reviewed-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Reviewed-by: Andriy Skulysh <c17819@cray.com>
Tested-by: Vitaly Fertman <c17818@cray.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50489
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
14 files changed:
lustre/include/lustre_lmv.h
lustre/include/obd.h
lustre/include/obd_class.h
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_foreign.c
lustre/llite/llite_foreign_symlink.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/namei.c
lustre/lmv/lmv_intent.c
lustre/lmv/lmv_internal.h
lustre/lmv/lmv_obd.c
lustre/mdc/mdc_request.c

index 96ae7f6..dd833b5 100644 (file)
@@ -54,31 +54,39 @@ struct lmv_stripe_md {
        struct lmv_oinfo lsm_md_oinfo[0];
 };
 
-static inline bool lmv_dir_striped(const struct lmv_stripe_md *lsm)
+struct lmv_stripe_object {
+       atomic_t                        lso_refs;
+       union {
+               struct lmv_stripe_md    lso_lsm;
+               struct lmv_foreign_md   lso_lfm;
+       };
+};
+
+static inline bool lmv_dir_striped(const struct lmv_stripe_object *lso)
 {
-       return lsm && lsm->lsm_md_magic == LMV_MAGIC;
+       return lso && lso->lso_lsm.lsm_md_magic == LMV_MAGIC;
 }
 
-static inline bool lmv_dir_foreign(const struct lmv_stripe_md *lsm)
+static inline bool lmv_dir_foreign(const struct lmv_stripe_object *lso)
 {
-       return lsm && lsm->lsm_md_magic == LMV_MAGIC_FOREIGN;
+       return lso && lso->lso_lsm.lsm_md_magic == LMV_MAGIC_FOREIGN;
 }
 
-static inline bool lmv_dir_layout_changing(const struct lmv_stripe_md *lsm)
+static inline bool lmv_dir_layout_changing(const struct lmv_stripe_object *lso)
 {
-       return lmv_dir_striped(lsm) &&
-              lmv_hash_is_layout_changing(lsm->lsm_md_hash_type);
+       return lmv_dir_striped(lso) &&
+              lmv_hash_is_layout_changing(lso->lso_lsm.lsm_md_hash_type);
 }
 
-static inline bool lmv_dir_bad_hash(const struct lmv_stripe_md *lsm)
+static inline bool lmv_dir_bad_hash(const struct lmv_stripe_object *lso)
 {
-       if (!lmv_dir_striped(lsm))
+       if (!lmv_dir_striped(lso))
                return false;
 
-       if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_BAD_TYPE)
+       if (lso->lso_lsm.lsm_md_hash_type & LMV_HASH_FLAG_BAD_TYPE)
                return true;
 
-       return !lmv_is_known_hash_type(lsm->lsm_md_hash_type);
+       return !lmv_is_known_hash_type(lso->lso_lsm.lsm_md_hash_type);
 }
 
 static inline __u8 lmv_inherit_next(__u8 inherit)
@@ -108,9 +116,11 @@ static inline bool lmv_is_inheritable(__u8 inherit)
               (inherit > LMV_INHERIT_END && inherit <= LMV_INHERIT_MAX);
 }
 
-static inline bool
-lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
+static inline bool lsm_md_eq(const struct lmv_stripe_object *lso1,
+                            const struct lmv_stripe_object *lso2)
 {
+       const struct lmv_stripe_md *lsm1 = &lso1->lso_lsm;
+       const struct lmv_stripe_md *lsm2 = &lso2->lso_lsm;
        __u32 idx;
 
        if (lsm1->lsm_md_magic != lsm2->lsm_md_magic ||
@@ -130,7 +140,7 @@ lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
                    sizeof(lsm1->lsm_md_pool_name)) != 0)
                return false;
 
-       if (lmv_dir_striped(lsm1)) {
+       if (lmv_dir_striped(lso1)) {
                for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) {
                        if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid,
                                       &lsm2->lsm_md_oinfo[idx].lmo_fid))
@@ -147,14 +157,16 @@ lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
        return true;
 }
 
-static inline void lsm_md_dump(int mask, const struct lmv_stripe_md *lsm)
+static inline void
+lmv_stripe_object_dump(int mask, const struct lmv_stripe_object *lsmo)
 {
+       const struct lmv_stripe_md *lsm = &lsmo->lso_lsm;
        int i;
 
        CDEBUG_LIMIT(mask,
-              "dump LMV: magic=%#x count=%u index=%u hash=%s:%#x max_inherit=%hhu max_inherit_rr=%hhu version=%u migrate_offset=%u migrate_hash=%s:%x pool=%.*s\n",
-              lsm->lsm_md_magic, lsm->lsm_md_stripe_count,
-              lsm->lsm_md_master_mdt_index,
+              "dump LMV: refs %u magic=%#x count=%u index=%u hash=%s:%#x max_inherit=%hhu max_inherit_rr=%hhu version=%u migrate_offset=%u migrate_hash=%s:%x pool=%.*s\n",
+              lsm->lsm_md_magic, atomic_read(&lsmo->lso_refs),
+              lsm->lsm_md_stripe_count, lsm->lsm_md_master_mdt_index,
               lmv_is_known_hash_type(lsm->lsm_md_hash_type) ?
                mdt_hash_name[lsm->lsm_md_hash_type & LMV_HASH_TYPE_MASK] :
                "invalid", lsm->lsm_md_hash_type,
@@ -165,7 +177,7 @@ static inline void lsm_md_dump(int mask, const struct lmv_stripe_md *lsm)
                "invalid", lsm->lsm_md_migrate_hash,
               LOV_MAXPOOLNAME, lsm->lsm_md_pool_name);
 
-       if (!lmv_dir_striped(lsm))
+       if (!lmv_dir_striped(lsmo))
                return;
 
        for (i = 0; i < lsm->lsm_md_stripe_count; i++)
@@ -174,24 +186,34 @@ static inline void lsm_md_dump(int mask, const struct lmv_stripe_md *lsm)
 }
 
 static inline bool
-lsm_md_inherited(const struct lmv_stripe_md *plsm,
-                const struct lmv_stripe_md *clsm)
+lmv_object_inherited(const struct lmv_stripe_object *plsm,
+                    const struct lmv_stripe_object *clsm)
 {
        return plsm && clsm &&
-              plsm->lsm_md_magic == clsm->lsm_md_magic &&
-              plsm->lsm_md_stripe_count == clsm->lsm_md_stripe_count &&
-              plsm->lsm_md_master_mdt_index ==
-                       clsm->lsm_md_master_mdt_index &&
-              plsm->lsm_md_hash_type == clsm->lsm_md_hash_type &&
-              lmv_inherit_next(plsm->lsm_md_max_inherit) ==
-                       clsm->lsm_md_max_inherit &&
-              lmv_inherit_rr_next(plsm->lsm_md_max_inherit_rr) ==
-                       clsm->lsm_md_max_inherit_rr;
+              plsm->lso_lsm.lsm_md_magic ==
+                       clsm->lso_lsm.lsm_md_magic &&
+              plsm->lso_lsm.lsm_md_stripe_count ==
+                       clsm->lso_lsm.lsm_md_stripe_count &&
+              plsm->lso_lsm.lsm_md_master_mdt_index ==
+                       clsm->lso_lsm.lsm_md_master_mdt_index &&
+              plsm->lso_lsm.lsm_md_hash_type ==
+                       clsm->lso_lsm.lsm_md_hash_type &&
+              lmv_inherit_next(plsm->lso_lsm.lsm_md_max_inherit) ==
+                       clsm->lso_lsm.lsm_md_max_inherit &&
+              lmv_inherit_rr_next(plsm->lso_lsm.lsm_md_max_inherit_rr) ==
+                       clsm->lso_lsm.lsm_md_max_inherit_rr;
 }
 
 union lmv_mds_md;
 
-void lmv_free_memmd(struct lmv_stripe_md *lsm);
+struct lmv_stripe_object *lmv_stripe_object_alloc(__u32 magic,
+                                                 const union lmv_mds_md *lmm,
+                                                 size_t lmm_size);
+
+void lmv_stripe_object_put(struct lmv_stripe_object **lsm_obj);
+
+struct lmv_stripe_object *
+       lmv_stripe_object_get(struct lmv_stripe_object *lsm_obj);
 
 static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst,
                                  const struct lmv_mds_md_v1 *lmv_src)
index 64e1ea3..bbe2bf2 100644 (file)
@@ -894,11 +894,9 @@ struct md_op_data {
        s64                     op_mod_time;
        const char              *op_name;
        size_t                  op_namelen;
-       struct rw_semaphore     *op_mea1_sem;
-       struct rw_semaphore     *op_mea2_sem;
-       struct lmv_stripe_md    *op_mea1;
-       struct lmv_stripe_md    *op_mea2;
-       struct lmv_stripe_md    *op_default_mea1;       /* default LMV */
+       struct lmv_stripe_object *op_lso1;
+       struct lmv_stripe_object *op_lso2;
+       struct lmv_stripe_object *op_default_lso1; /* default LMV */
        __u32                   op_suppgids[2];
        __u32                   op_fsuid;
        __u32                   op_fsgid;
@@ -1156,15 +1154,12 @@ struct obd_ops {
 
 /* lmv structures */
 struct lustre_md {
-       struct mdt_body         *body;
-       struct lu_buf            layout;
-       union {
-               struct lmv_stripe_md    *lmv;
-               struct lmv_foreign_md   *lfm;
-       };
-       struct lmv_stripe_md    *default_lmv;
+       struct mdt_body                 *body;
+       struct lu_buf                   layout;
+       struct lmv_stripe_object        *lsm_obj;
+       struct lmv_stripe_object        *def_lsm_obj;
 #ifdef CONFIG_LUSTRE_FS_POSIX_ACL
-       struct posix_acl        *posix_acl;
+       struct posix_acl                *posix_acl;
 #endif
 };
 
@@ -1276,10 +1271,10 @@ struct md_ops {
                               struct obd_export *, struct obd_export *,
                               struct lustre_md *);
 
-       int (*m_free_lustre_md)(struct obd_export *, struct lustre_md *);
+       int (*m_put_lustre_md)(struct obd_export *, struct lustre_md *);
 
        int (*m_merge_attr)(struct obd_export *,
-                           const struct lmv_stripe_md *lsm,
+                           const struct lmv_stripe_object *,
                            struct cl_attr *attr, ldlm_blocking_callback);
 
        int (*m_set_open_replay_data)(struct obd_export *,
@@ -1302,11 +1297,13 @@ struct md_ops {
                               enum ldlm_cancel_flags flags, void *opaque);
 
        int (*m_get_fid_from_lsm)(struct obd_export *,
-                                 const struct lmv_stripe_md *,
+                                 const struct lmv_stripe_object *,
                                  const char *name, int namelen,
                                  struct lu_fid *fid);
-       int (*m_unpackmd)(struct obd_export *exp, struct lmv_stripe_md **plsm,
-                         const union lmv_mds_md *lmv, size_t lmv_size);
+       int (*m_stripe_object_create)(struct obd_export *exp,
+                                     struct lmv_stripe_object **plso,
+                                     const union lmv_mds_md *lmv,
+                                     size_t lmv_size);
        int (*m_rmfid)(struct obd_export *exp, struct fid_array *fa, int *rcs,
                       struct ptlrpc_request_set *set);
        struct lu_batch *(*m_batch_create)(struct obd_export *exp,
index f1085a2..0cb6d0a 100644 (file)
@@ -1605,7 +1605,7 @@ static inline int md_get_lustre_md(struct obd_export *exp,
        return MDP(exp->exp_obd, get_lustre_md)(exp, pill, dt_exp, md_exp, md);
 }
 
-static inline int md_free_lustre_md(struct obd_export *exp,
+static inline int md_put_lustre_md(struct obd_export *exp,
                                     struct lustre_md *md)
 {
        int rc;
@@ -1614,11 +1614,11 @@ static inline int md_free_lustre_md(struct obd_export *exp,
        if (rc)
                return rc;
 
-       return MDP(exp->exp_obd, free_lustre_md)(exp, md);
+       return MDP(exp->exp_obd, put_lustre_md)(exp, md);
 }
 
 static inline int md_merge_attr(struct obd_export *exp,
-                               const struct lmv_stripe_md *lsm,
+                               const struct lmv_stripe_object *lso,
                                struct cl_attr *attr,
                                ldlm_blocking_callback cb)
 {
@@ -1628,7 +1628,7 @@ static inline int md_merge_attr(struct obd_export *exp,
        if (rc)
                return rc;
 
-       return MDP(exp->exp_obd, merge_attr)(exp, lsm, attr, cb);
+       return MDP(exp->exp_obd, merge_attr)(exp, lso, attr, cb);
 }
 
 static inline int md_setxattr(struct obd_export *exp, const struct lu_fid *fid,
@@ -1782,7 +1782,7 @@ static inline int md_revalidate_lock(struct obd_export *exp,
 }
 
 static inline int md_get_fid_from_lsm(struct obd_export *exp,
-                                     const struct lmv_stripe_md *lsm,
+                                     const struct lmv_stripe_object *lso,
                                      const char *name, int namelen,
                                      struct lu_fid *fid)
 {
@@ -1792,7 +1792,7 @@ static inline int md_get_fid_from_lsm(struct obd_export *exp,
        if (rc)
                return rc;
 
-       return MDP(exp->exp_obd, get_fid_from_lsm)(exp, lsm, name, namelen,
+       return MDP(exp->exp_obd, get_fid_from_lsm)(exp, lso, name, namelen,
                                                   fid);
 }
 
@@ -1802,9 +1802,10 @@ static inline int md_get_fid_from_lsm(struct obd_export *exp,
  * If *plsm != NULL and lmm == NULL then *lsm will be freed.
  * If *plsm == NULL then it will be allocated.
  */
-static inline int md_unpackmd(struct obd_export *exp,
-                             struct lmv_stripe_md **plsm,
-                             const union lmv_mds_md *lmm, size_t lmm_size)
+static inline int md_stripe_object_create(struct obd_export *exp,
+                                         struct lmv_stripe_object **lsop,
+                                         const union lmv_mds_md *lmm,
+                                         size_t lmm_size)
 {
        int rc;
 
@@ -1812,7 +1813,7 @@ static inline int md_unpackmd(struct obd_export *exp,
        if (rc)
                return rc;
 
-       return MDP(exp->exp_obd, unpackmd)(exp, plsm, lmm, lmm_size);
+       return MDP(exp->exp_obd, stripe_object_create)(exp, lsop, lmm, lmm_size);
 }
 
 static inline int md_rmfid(struct obd_export *exp, struct fid_array *fa,
index 2a1393b..342c65f 100644 (file)
@@ -376,8 +376,7 @@ static int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
                GOTO(out, rc = PTR_ERR(op_data));
 
        /* foreign dirs are browsed out of Lustre */
-       if (unlikely(op_data->op_mea1 != NULL &&
-                    op_data->op_mea1->lsm_md_magic == LMV_MAGIC_FOREIGN)) {
+       if (unlikely(lmv_dir_foreign(op_data->op_lso1))) {
                ll_finish_md_op_data(op_data);
                RETURN(-ENODATA);
        }
@@ -688,9 +687,9 @@ static int ll_dir_get_default_lmv(struct inode *inode, struct lmv_user_md *lum)
 
        ENTRY;
 retry:
-       if (lli->lli_default_lsm_md) {
+       if (lli->lli_def_lsm_obj) {
                down_read(&lli->lli_lsm_sem);
-               lsm = lli->lli_default_lsm_md;
+               lsm = &lli->lli_def_lsm_obj->lso_lsm;
                if (lsm) {
                        lum->lum_magic = lsm->lsm_md_magic;
                        lum->lum_stripe_count = lsm->lsm_md_stripe_count;
@@ -2015,7 +2014,7 @@ out_rmdir:
                         * instead of the client.
                         */
                        if (cmd == LL_IOC_MDC_GETINFO_V2 &&
-                           ll_i2info(inode)->lli_lsm_md != NULL)
+                           ll_dir_striped(inode))
                                valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
 
                        if (flagsp && copy_to_user(flagsp, &valid,
index a2d74ed..8004643 100644 (file)
@@ -5172,7 +5172,7 @@ int ll_migrate(struct inode *parent, struct file *file, struct lmv_user_md *lum,
 
        /* migrate dirent only for subdirs if MDS_MIGRATE_NSONLY set */
        if (S_ISDIR(child_inode->i_mode) && (flags & MDS_MIGRATE_NSONLY) &&
-           lmv_dir_layout_changing(ll_i2info(parent)->lli_lsm_md))
+           lmv_dir_layout_changing(op_data->op_lso1))
                op_data->op_bias |= MDS_MIGRATE_NSONLY;
 
 again:
@@ -5424,18 +5424,22 @@ out:
 static int ll_merge_md_attr(struct inode *inode)
 {
        struct ll_inode_info *lli = ll_i2info(inode);
+       struct lmv_stripe_object *lsm_obj;
        struct cl_attr attr = { 0 };
        int rc;
 
-       LASSERT(lli->lli_lsm_md != NULL);
-
-       if (!lmv_dir_striped(lli->lli_lsm_md))
+       if (!ll_dir_striped(inode))
                RETURN(0);
 
        down_read(&lli->lli_lsm_sem);
-       rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
-                          &attr, ll_md_blocking_ast);
+       LASSERT(lli->lli_lsm_obj != NULL);
+
+       lsm_obj = lmv_stripe_object_get(lli->lli_lsm_obj);
        up_read(&lli->lli_lsm_sem);
+
+       rc = md_merge_attr(ll_i2mdexp(inode), lsm_obj,
+                          &attr, ll_md_blocking_ast);
+       lmv_stripe_object_put(&lsm_obj);
        if (rc != 0)
                RETURN(rc);
 
@@ -5531,7 +5535,7 @@ int ll_getattr_dentry(struct dentry *de, struct kstat *stat, u32 request_mask,
        } else {
                /* If object isn't regular a file then don't validate size. */
                /* foreign dir is not striped dir */
-               if (ll_dir_striped(inode) && !foreign) {
+               if (!foreign) {
                        rc = ll_merge_md_attr(inode);
                        if (rc < 0)
                                RETURN(rc);
index 9e2a7cb..54485f1 100644 (file)
@@ -115,17 +115,16 @@ int ll_manage_foreign(struct inode *inode, struct lustre_md *lmd)
                        cl_env_put(env, &refcheck);
                }
        } else if (S_ISDIR((inode)->i_mode)) {
-               if (lmd->lfm != NULL &&
-                   lmd->lfm->lfm_magic == LMV_MAGIC_FOREIGN) {
-                       ll_manage_foreign_dir(inode, lmd->lfm);
+               if (lmv_dir_foreign(lmd->lsm_obj)) {
+                       ll_manage_foreign_dir(inode, &lmd->lsm_obj->lso_lfm);
                } else {
                        struct ll_inode_info *lli = ll_i2info(inode);
-                       struct lmv_foreign_md *lfm;
+                       struct lmv_stripe_object *lsm_obj;
 
                        down_read(&lli->lli_lsm_sem);
-                       lfm = (struct lmv_foreign_md *)(lli->lli_lsm_md);
-                       if (lfm &&  lfm->lfm_magic == LMV_MAGIC_FOREIGN)
-                               ll_manage_foreign_dir(inode, lfm);
+                       lsm_obj = lli->lli_lsm_obj;
+                       if (lmv_dir_foreign(lsm_obj))
+                               ll_manage_foreign_dir(inode, &lsm_obj->lso_lfm);
                        up_read(&lli->lli_lsm_sem);
                }
        }
@@ -253,18 +252,19 @@ bool ll_foreign_is_removable(struct dentry *dentry, bool unset)
                }
        } else if (S_ISDIR(inode->i_mode)) {
                struct ll_inode_info *lli = ll_i2info(inode);
-               struct lmv_foreign_md *lfm;
+               struct lmv_stripe_object *lsm_obj;
 
                down_read(&lli->lli_lsm_sem);
-               lfm = (struct lmv_foreign_md *)(lli->lli_lsm_md);
-               if (!lfm)
+               lsm_obj = lli->lli_lsm_obj;
+               if (!lsm_obj)
                        CDEBUG(D_INFO,
                               "unable to check if dir (%.*s, "DFID") is foreign...\n",
                               name->len, name->name,
                               PFID(ll_inode2fid(inode)));
-               else if (lfm->lfm_magic == LMV_MAGIC_FOREIGN)
-                       preserve_foreign = should_preserve_foreign_dir(lfm, lli,
-                                                                      unset);
+               else if (lmv_dir_foreign(lsm_obj))
+                       preserve_foreign =
+                               should_preserve_foreign_dir(&lsm_obj->lso_lfm,
+                                                           lli, unset);
                up_read(&lli->lli_lsm_sem);
                if (preserve_foreign) {
                        CDEBUG(D_INFO,
index 9b7f813..e5d28cf 100644 (file)
@@ -219,6 +219,7 @@ static int ll_foreign_readlink_internal(struct inode *inode, char **symname)
 {
        struct ll_inode_info *lli = ll_i2info(inode);
        struct ll_sb_info *sbi = ll_i2sbi(inode);
+       struct lmv_stripe_object *lsm_obj = NULL;
        struct lov_foreign_md *lfm = NULL;
        char *destname = NULL;
        size_t lfm_size = 0;
@@ -278,8 +279,11 @@ static int ll_foreign_readlink_internal(struct inode *inode, char **symname)
                 * and LMV formats are identical, and then we also only need
                 * one set of parsing routines for both foreign files and dirs!
                 */
-               lfm = (struct lov_foreign_md *)(lli->lli_lsm_md);
-               if (lfm != NULL) {
+               lsm_obj = lmv_stripe_object_get(lli->lli_lsm_obj);
+               up_read(&lli->lli_lsm_sem);
+
+               if (lsm_obj != NULL) {
+                       lfm = (struct lov_foreign_md *)&lsm_obj->lso_lfm;
                        CDEBUG(D_INFO, "%s: inode "DFID": LMV cached found\n",
                               sbi->ll_fsname, PFID(ll_inode2fid(inode)));
                } else {
@@ -299,7 +303,7 @@ static int ll_foreign_readlink_internal(struct inode *inode, char **symname)
 
 failed:
        if (S_ISDIR(inode->i_mode))
-               up_read(&lli->lli_lsm_sem);
+               lmv_stripe_object_put(&lsm_obj);
 
        if (S_ISREG(inode->i_mode) && lfm)
                OBD_FREE(lfm, lfm_size);
index cf80970..dd0f989 100644 (file)
@@ -192,15 +192,15 @@ struct ll_inode_info {
                         * is for old server, or default LMV is set by
                         * "lfs setdirstripe -D".
                         */
-                                                       lli_default_lmv_set:1;
+                                                       lli_def_lsm_obj_set:1;
                        /* generation for statahead */
                        unsigned int                    lli_sa_generation;
                        /* rw lock protects lli_lsm_md */
                        struct rw_semaphore             lli_lsm_sem;
                        /* directory stripe information */
-                       struct lmv_stripe_md            *lli_lsm_md;
+                       struct lmv_stripe_object        *lli_lsm_obj;
                        /* directory default LMV */
-                       struct lmv_stripe_md            *lli_default_lsm_md;
+                       struct lmv_stripe_object        *lli_def_lsm_obj;
                };
 
                /* for non-directory */
@@ -1546,9 +1546,18 @@ static inline struct lu_fid *ll_inode2fid(struct inode *inode)
 
 static inline bool ll_dir_striped(struct inode *inode)
 {
+       bool rc;
        LASSERT(inode);
-       return S_ISDIR(inode->i_mode) &&
-              lmv_dir_striped(ll_i2info(inode)->lli_lsm_md);
+
+       if (!S_ISDIR(inode->i_mode))
+               return false;
+
+       down_read(&ll_i2info(inode)->lli_lsm_sem);
+       rc = !!(ll_i2info(inode)->lli_lsm_obj &&
+               lmv_dir_striped(ll_i2info(inode)->lli_lsm_obj));
+       up_read(&ll_i2info(inode)->lli_lsm_sem);
+
+       return rc;
 }
 
 static inline loff_t ll_file_maxbytes(struct inode *inode)
index 4767355..5da7d20 100644 (file)
@@ -733,7 +733,7 @@ retry_connect:
        LASSERT(fid_is_sane(&sbi->ll_root_fid));
        api32 = test_bit(LL_SBI_32BIT_API, sbi->ll_flags);
        root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid, api32), &lmd);
-       md_free_lustre_md(sbi->ll_md_exp, &lmd);
+       md_put_lustre_md(sbi->ll_md_exp, &lmd);
 
        if (IS_ERR(root)) {
                lmd_clear_acl(&lmd);
@@ -1600,16 +1600,10 @@ void ll_dir_clear_lsm_md(struct inode *inode)
        struct ll_inode_info *lli = ll_i2info(inode);
 
        LASSERT(S_ISDIR(inode->i_mode));
-
-       if (lli->lli_lsm_md) {
-               lmv_free_memmd(lli->lli_lsm_md);
-               lli->lli_lsm_md = NULL;
-       }
-
-       if (lli->lli_default_lsm_md) {
-               lmv_free_memmd(lli->lli_default_lsm_md);
-               lli->lli_default_lsm_md = NULL;
-       }
+       down_write(&lli->lli_lsm_sem);
+       lmv_stripe_object_put(&lli->lli_lsm_obj);
+       lmv_stripe_object_put(&lli->lli_def_lsm_obj);
+       up_write(&lli->lli_lsm_sem);
 }
 
 static struct inode *ll_iget_anon_dir(struct super_block *sb,
@@ -1624,7 +1618,7 @@ static struct inode *ll_iget_anon_dir(struct super_block *sb,
 
        ENTRY;
 
-       LASSERT(md->lmv);
+       LASSERT(md->lsm_obj);
        ino = cl_fid_build_ino(fid, test_bit(LL_SBI_32BIT_API, sbi->ll_flags));
        inode = iget_locked(sb, ino);
        if (inode == NULL) {
@@ -1674,17 +1668,18 @@ static struct inode *ll_iget_anon_dir(struct super_block *sb,
 static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md)
 {
        struct lu_fid *fid;
-       struct lmv_stripe_md *lsm = md->lmv;
+       struct lmv_stripe_md *lsm;
        struct ll_inode_info *lli = ll_i2info(inode);
        int i;
 
-       LASSERT(lsm != NULL);
+       LASSERT(md->lsm_obj != NULL);
+       lsm = &md->lsm_obj->lso_lsm;
 
        CDEBUG(D_INODE, "%s: "DFID" set dir layout:\n",
               ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid));
-       lsm_md_dump(D_INODE, lsm);
+       lmv_stripe_object_dump(D_INODE, md->lsm_obj);
 
-       if (!lmv_dir_striped(lsm))
+       if (!lmv_dir_striped(md->lsm_obj))
                goto out;
 
        /* XXX sigh, this lsm_root initialization should be in
@@ -1715,51 +1710,50 @@ static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md)
                }
        }
 out:
-       lli->lli_lsm_md = lsm;
-
+       /* move lsm_obj to lli */
+       lli->lli_lsm_obj = md->lsm_obj;
+       md->lsm_obj = NULL;
        return 0;
 }
 
 static void ll_update_default_lsm_md(struct inode *inode, struct lustre_md *md)
 {
        struct ll_inode_info *lli = ll_i2info(inode);
-
+       struct lmv_stripe_object *lsm_obj = md->def_lsm_obj;
        ENTRY;
 
-       if (!md->default_lmv) {
+       if (!lsm_obj) {
                /* clear default lsm */
-               if (lli->lli_default_lsm_md && lli->lli_default_lmv_set) {
+               if (lli->lli_def_lsm_obj && lli->lli_def_lsm_obj_set) {
                        down_write(&lli->lli_lsm_sem);
-                       if (lli->lli_default_lsm_md &&
-                           lli->lli_default_lmv_set) {
-                               lmv_free_memmd(lli->lli_default_lsm_md);
-                               lli->lli_default_lsm_md = NULL;
+                       if (lli->lli_def_lsm_obj_set) {
+                               lmv_stripe_object_put(&lli->lli_def_lsm_obj);
                                lli->lli_inherit_depth = 0;
-                               lli->lli_default_lmv_set = 0;
+                               lli->lli_def_lsm_obj_set = 0;
                        }
                        up_write(&lli->lli_lsm_sem);
                }
                RETURN_EXIT;
        }
 
-       if (lli->lli_default_lsm_md) {
+       if (lli->lli_def_lsm_obj) {
                /* do nonthing if default lsm isn't changed */
                down_read(&lli->lli_lsm_sem);
-               if (lli->lli_default_lsm_md &&
-                   lsm_md_eq(lli->lli_default_lsm_md, md->default_lmv)) {
+               if (lli->lli_def_lsm_obj &&
+                   lsm_md_eq(lli->lli_def_lsm_obj, lsm_obj)) {
                        up_read(&lli->lli_lsm_sem);
                        RETURN_EXIT;
                }
                up_read(&lli->lli_lsm_sem);
        }
-
        down_write(&lli->lli_lsm_sem);
-       if (lli->lli_default_lsm_md)
-               lmv_free_memmd(lli->lli_default_lsm_md);
-       lli->lli_default_lsm_md = md->default_lmv;
-       lli->lli_default_lmv_set = 1;
-       lsm_md_dump(D_INODE, md->default_lmv);
-       md->default_lmv = NULL;
+
+       /* update default lsm. */
+       lmv_stripe_object_put(&lli->lli_def_lsm_obj);
+       lli->lli_def_lsm_obj = lsm_obj;
+       lli->lli_def_lsm_obj_set = 1;
+       lmv_stripe_object_dump(D_INODE, lsm_obj);
+       md->def_lsm_obj = NULL;
        up_write(&lli->lli_lsm_sem);
        RETURN_EXIT;
 }
@@ -1767,18 +1761,18 @@ static void ll_update_default_lsm_md(struct inode *inode, struct lustre_md *md)
 static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
 {
        struct ll_inode_info *lli = ll_i2info(inode);
-       struct lmv_stripe_md *lsm = md->lmv;
+       struct lmv_stripe_object *lsm_obj = md->lsm_obj;
        struct cl_attr  *attr;
        int rc = 0;
 
        ENTRY;
 
        LASSERT(S_ISDIR(inode->i_mode));
-       CDEBUG(D_INODE, "update lsm %p of "DFID"\n", lli->lli_lsm_md,
+       CDEBUG(D_INODE, "update lsm_obj %p of "DFID"\n", lli->lli_lsm_obj,
               PFID(ll_inode2fid(inode)));
 
        /* update default LMV */
-       if (md->default_lmv)
+       if (md->def_lsm_obj)
                ll_update_default_lsm_md(inode, md);
 
        /* after dir migration/restripe, a stripe may be turned into a
@@ -1791,7 +1785,7 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
         * no striped information from request, lustre_md from req does not
         * include stripeEA, see ll_md_setattr()
         */
-       if (!lsm)
+       if (!lsm_obj)
                RETURN(0);
 
        /*
@@ -1801,7 +1795,16 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
        down_read(&lli->lli_lsm_sem);
 
        /* some current lookup initialized lsm, and unchanged */
-       if (lli->lli_lsm_md && lsm_md_eq(lli->lli_lsm_md, lsm))
+       if (lli->lli_lsm_obj && lsm_md_eq(lli->lli_lsm_obj, lsm_obj)) {
+               up_read(&lli->lli_lsm_sem);
+               RETURN(0);
+       }
+
+       up_read(&lli->lli_lsm_sem);
+       down_write(&lli->lli_lsm_sem);
+
+       /* check again in case of a race */
+       if (lli->lli_lsm_obj && lsm_md_eq(lli->lli_lsm_obj, lsm_obj))
                GOTO(unlock, rc = 0);
 
        /* if dir layout doesn't match, check whether version is increased,
@@ -1810,49 +1813,35 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
         *
         * foreign LMV should not change.
         */
-       if (lli->lli_lsm_md && lmv_dir_striped(lli->lli_lsm_md) &&
-           lsm->lsm_md_layout_version <=
-           lli->lli_lsm_md->lsm_md_layout_version) {
+       if (lli->lli_lsm_obj && lmv_dir_striped(lli->lli_lsm_obj) &&
+           lsm_obj->lso_lsm.lsm_md_layout_version <=
+           lli->lli_lsm_obj->lso_lsm.lsm_md_layout_version) {
                CERROR("%s: "DFID" dir layout mismatch:\n",
                       ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid));
-               lsm_md_dump(D_ERROR, lli->lli_lsm_md);
-               lsm_md_dump(D_ERROR, lsm);
+               lmv_stripe_object_dump(D_ERROR, lli->lli_lsm_obj);
+               lmv_stripe_object_dump(D_ERROR, lsm_obj);
                GOTO(unlock, rc = -EINVAL);
        }
 
-       up_read(&lli->lli_lsm_sem);
-       down_write(&lli->lli_lsm_sem);
        /* clear existing lsm */
-       if (lli->lli_lsm_md) {
-               lmv_free_memmd(lli->lli_lsm_md);
-               lli->lli_lsm_md = NULL;
-       }
+       lmv_stripe_object_put(&lli->lli_lsm_obj);
 
        rc = ll_init_lsm_md(inode, md);
-       up_write(&lli->lli_lsm_sem);
-
        if (rc)
-               RETURN(rc);
+               GOTO(unlock, rc);
 
-       /* set md->lmv to NULL, so the following free lustre_md will not free
-        * this lsm.
-        */
-       md->lmv = NULL;
-
-       /* md_merge_attr() may take long, since lsm is already set, switch to
-        * read lock.
-        */
-       down_read(&lli->lli_lsm_sem);
-
-       if (!lmv_dir_striped(lli->lli_lsm_md))
+       if (!lmv_dir_striped(lli->lli_lsm_obj))
                GOTO(unlock, rc = 0);
 
+       lsm_obj = lmv_stripe_object_get(lli->lli_lsm_obj);
+       up_write(&lli->lli_lsm_sem);
+
        OBD_ALLOC_PTR(attr);
        if (!attr)
-               GOTO(unlock, rc = -ENOMEM);
+               GOTO(err, rc = -ENOMEM);
 
        /* validate the lsm */
-       rc = md_merge_attr(ll_i2mdexp(inode), lli->lli_lsm_md, attr,
+       rc = md_merge_attr(ll_i2mdexp(inode), lsm_obj, attr,
                           ll_md_blocking_ast);
        if (!rc) {
                if (md->body->mbo_valid & OBD_MD_FLNLINK)
@@ -1868,10 +1857,12 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
        }
 
        OBD_FREE_PTR(attr);
-       GOTO(unlock, rc);
+       EXIT;
+err:
+       lmv_stripe_object_put(&lsm_obj);
+       return rc;
 unlock:
-       up_read(&lli->lli_lsm_sem);
-
+       up_write(&lli->lli_lsm_sem);
        return rc;
 }
 
@@ -2002,6 +1993,7 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data)
 
        rc = ll_update_inode(inode, &md);
        ptlrpc_req_finished(request);
+       md_put_lustre_md(sbi->ll_md_exp, &md);
 
        RETURN(rc);
 }
@@ -2866,8 +2858,8 @@ static int ll_dir_default_lmv_inherit(struct inode *dir, struct inode *inode)
 {
        struct ll_inode_info *plli = ll_i2info(dir);
        struct ll_inode_info *lli = ll_i2info(inode);
-       struct lmv_stripe_md *plsm;
-       struct lmv_stripe_md *lsm;
+       struct lmv_stripe_object *plsm_obj;
+       struct lmv_stripe_object *lsm_obj;
        int rc = 0;
 
        ENTRY;
@@ -2879,25 +2871,24 @@ static int ll_dir_default_lmv_inherit(struct inode *dir, struct inode *inode)
                RETURN(0);
 
        /* nothing to do if no default LMV on both */
-       if (!plli->lli_default_lsm_md && !lli->lli_default_lsm_md)
+       if (!plli->lli_def_lsm_obj && !lli->lli_def_lsm_obj)
                RETURN(0);
 
        /* subdir default LMV comes from disk */
-       if (lli->lli_default_lsm_md && lli->lli_default_lmv_set)
+       if (lli->lli_def_lsm_obj && lli->lli_def_lsm_obj_set)
                RETURN(0);
 
        /* delete subdir default LMV if parent's is deleted or becomes
         * uninheritable.
         */
        down_read(&plli->lli_lsm_sem);
-       plsm = plli->lli_default_lsm_md;
-       if (!plsm || !lmv_is_inheritable(plsm->lsm_md_max_inherit)) {
-               if (lli->lli_default_lsm_md && !lli->lli_default_lmv_set) {
+       plsm_obj = plli->lli_def_lsm_obj;
+       if (!plsm_obj ||
+           !lmv_is_inheritable(plsm_obj->lso_lsm.lsm_md_max_inherit)) {
+               if (lli->lli_def_lsm_obj && !lli->lli_def_lsm_obj_set) {
                        down_write(&lli->lli_lsm_sem);
-                       if (lli->lli_default_lsm_md &&
-                           !lli->lli_default_lmv_set) {
-                               lmv_free_memmd(lli->lli_default_lsm_md);
-                               lli->lli_default_lsm_md = NULL;
+                       if (!lli->lli_def_lsm_obj_set) {
+                               lmv_stripe_object_put(&lli->lli_def_lsm_obj);
                                lli->lli_inherit_depth = 0;
                        }
                        up_write(&lli->lli_lsm_sem);
@@ -2906,11 +2897,12 @@ static int ll_dir_default_lmv_inherit(struct inode *dir, struct inode *inode)
        }
 
        /* do nothing if inherited LMV is unchanged */
-       if (lli->lli_default_lsm_md) {
+       if (lli->lli_def_lsm_obj) {
                rc = 1;
                down_read(&lli->lli_lsm_sem);
-               if (!lli->lli_default_lmv_set)
-                       rc = lsm_md_inherited(plsm, lli->lli_default_lsm_md);
+               if (!lli->lli_def_lsm_obj_set)
+                       rc = lmv_object_inherited(plsm_obj,
+                                                 lli->lli_def_lsm_obj);
                up_read(&lli->lli_lsm_sem);
                if (rc == 1)
                        GOTO(unlock_parent, rc = 0);
@@ -2918,26 +2910,27 @@ static int ll_dir_default_lmv_inherit(struct inode *dir, struct inode *inode)
 
        /* inherit default LMV */
        down_write(&lli->lli_lsm_sem);
-       if (lli->lli_default_lsm_md) {
+       if (lli->lli_def_lsm_obj) {
                /* checked above, but in case of race, check again with lock */
-               if (lli->lli_default_lmv_set)
+               if (lli->lli_def_lsm_obj_set)
                        GOTO(unlock_child, rc = 0);
                /* always update subdir default LMV in case parent's changed */
-               lsm = lli->lli_default_lsm_md;
+               lsm_obj = lli->lli_def_lsm_obj;
        } else {
-               OBD_ALLOC_PTR(lsm);
-               if (!lsm)
-                       GOTO(unlock_child, rc = -ENOMEM);
-               lli->lli_default_lsm_md = lsm;
+               lsm_obj = lmv_stripe_object_alloc(LMV_USER_MAGIC, NULL, 0);
+               if (IS_ERR(lsm_obj))
+                       GOTO(unlock_child, rc = PTR_ERR(lsm_obj));
+               lli->lli_def_lsm_obj = lsm_obj;
        }
 
-       *lsm = *plsm;
-       lsm->lsm_md_max_inherit = lmv_inherit_next(plsm->lsm_md_max_inherit);
-       lsm->lsm_md_max_inherit_rr =
-                       lmv_inherit_rr_next(plsm->lsm_md_max_inherit_rr);
+       lsm_obj->lso_lsm = plsm_obj->lso_lsm;
+       lsm_obj->lso_lsm.lsm_md_max_inherit =
+               lmv_inherit_next(plsm_obj->lso_lsm.lsm_md_max_inherit);
+       lsm_obj->lso_lsm.lsm_md_max_inherit_rr =
+               lmv_inherit_rr_next(plsm_obj->lso_lsm.lsm_md_max_inherit_rr);
        lli->lli_inherit_depth = plli->lli_inherit_depth + 1;
 
-       lsm_md_dump(D_INODE, lsm);
+       lmv_stripe_object_dump(D_INODE, lsm_obj);
 
        EXIT;
 unlock_child:
@@ -2977,12 +2970,12 @@ void ll_update_dir_depth_dmv(struct inode *dir, struct dentry *de)
        plli = ll_i2info(dir);
        lli = ll_i2info(inode);
        lli->lli_dir_depth = plli->lli_dir_depth + 1;
-       if (lli->lli_default_lsm_md && lli->lli_default_lmv_set) {
-               if (plli->lli_default_lsm_md) {
+       if (lli->lli_def_lsm_obj && lli->lli_def_lsm_obj_set) {
+               if (plli->lli_def_lsm_obj) {
                        down_read_nested(&plli->lli_lsm_sem, LSM_SEM_PARENT);
                        down_read_nested(&lli->lli_lsm_sem, LSM_SEM_CHILD);
-                       if (lsm_md_inherited(plli->lli_default_lsm_md,
-                                            lli->lli_default_lsm_md))
+                       if (lmv_object_inherited(plli->lli_def_lsm_obj,
+                                                lli->lli_def_lsm_obj))
                                lli->lli_inherit_depth =
                                        plli->lli_inherit_depth + 1;
                        else
@@ -2998,11 +2991,11 @@ void ll_update_dir_depth_dmv(struct inode *dir, struct dentry *de)
                ll_dir_default_lmv_inherit(dir, inode);
        }
 
-       if (lli->lli_default_lsm_md)
+       if (lli->lli_def_lsm_obj)
                CDEBUG(D_INODE,
                       "%s "DFID" depth %hu %s default LMV inherit depth %hu\n",
                       de->d_name.name, PFID(&lli->lli_fid), lli->lli_dir_depth,
-                      lli->lli_default_lmv_set ? "server" : "client",
+                      lli->lli_def_lsm_obj_set ? "server" : "client",
                       lli->lli_inherit_depth);
 }
 
@@ -3515,7 +3508,7 @@ static int ll_fileset_default_lmv_fixup(struct inode *inode,
 
        LASSERT(is_root_inode(inode));
        LASSERT(!fid_is_root(&sbi->ll_root_fid));
-       LASSERT(!md->default_lmv);
+       LASSERT(!md->def_lsm_obj);
 
        rc = ll_dir_get_default_layout(inode, (void **)&lmm, &size, &req,
                                       OBD_MD_DEFAULT_MEA,
@@ -3525,7 +3518,8 @@ static int ll_fileset_default_lmv_fixup(struct inode *inode,
 
        rc = 0;
        if (lmm && size) {
-               rc = md_unpackmd(sbi->ll_md_exp, &md->default_lmv, lmm, size);
+               rc = md_stripe_object_create(sbi->ll_md_exp, &md->def_lsm_obj,
+                                            lmm, size);
                if (rc < 0)
                        GOTO(out, rc);
 
@@ -3561,7 +3555,7 @@ int ll_prep_inode(struct inode **inode, struct req_capsule *pill,
         * ll_update_lsm_md() may change md.
         */
        if (it && (it->it_op & (IT_LOOKUP | IT_GETATTR)) &&
-           S_ISDIR(md.body->mbo_mode) && !md.default_lmv) {
+           S_ISDIR(md.body->mbo_mode) && !md.def_lsm_obj) {
                if (unlikely(*inode && is_root_inode(*inode) &&
                             !fid_is_root(&sbi->ll_root_fid))) {
                        rc = ll_fileset_default_lmv_fixup(*inode, &md);
@@ -3569,7 +3563,7 @@ int ll_prep_inode(struct inode **inode, struct req_capsule *pill,
                                GOTO(out, rc);
                }
 
-               if (!md.default_lmv)
+               if (!md.def_lsm_obj)
                        default_lmv_deleted = true;
        }
 
@@ -3645,7 +3639,7 @@ int ll_prep_inode(struct inode **inode, struct req_capsule *pill,
 
 out:
        /* cleanup will be done if necessary */
-       md_free_lustre_md(sbi->ll_md_exp, &md);
+       md_put_lustre_md(sbi->ll_md_exp, &md);
 
        if (rc != 0 && it != NULL && it->it_op & IT_OPEN) {
                ll_intent_drop_lock(it);
@@ -3704,15 +3698,9 @@ out_statfs:
  */
 void ll_unlock_md_op_lsm(struct md_op_data *op_data)
 {
-       if (op_data->op_mea2_sem) {
-               up_read_non_owner(op_data->op_mea2_sem);
-               op_data->op_mea2_sem = NULL;
-       }
-
-       if (op_data->op_mea1_sem) {
-               up_read_non_owner(op_data->op_mea1_sem);
-               op_data->op_mea1_sem = NULL;
-       }
+       lmv_stripe_object_put(&op_data->op_lso2);
+       lmv_stripe_object_put(&op_data->op_lso1);
+       lmv_stripe_object_put(&op_data->op_default_lso1);
 }
 
 /* this function prepares md_op_data hint for passing it down to MD stack. */
@@ -3762,24 +3750,21 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
                op_data->op_fid1 = *ll_inode2fid(i1);
 
        if (S_ISDIR(i1->i_mode)) {
-               down_read_non_owner(&ll_i2info(i1)->lli_lsm_sem);
-               op_data->op_mea1_sem = &ll_i2info(i1)->lli_lsm_sem;
-               op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md;
-               op_data->op_default_mea1 = ll_i2info(i1)->lli_default_lsm_md;
+               down_read(&ll_i2info(i1)->lli_lsm_sem);
+               op_data->op_lso1 =
+                       lmv_stripe_object_get(ll_i2info(i1)->lli_lsm_obj);
+               op_data->op_default_lso1 =
+                       lmv_stripe_object_get(ll_i2info(i1)->lli_def_lsm_obj);
+               up_read(&ll_i2info(i1)->lli_lsm_sem);
        }
 
        if (i2) {
                op_data->op_fid2 = *ll_inode2fid(i2);
                if (S_ISDIR(i2->i_mode)) {
-                       if (i2 != i1) {
-                               /* i2 is typically a child of i1, and MUST be
-                                * further from the root to avoid deadlocks.
-                                */
-                               down_read_non_owner(&ll_i2info(i2)->lli_lsm_sem);
-                               op_data->op_mea2_sem =
-                                               &ll_i2info(i2)->lli_lsm_sem;
-                       }
-                       op_data->op_mea2 = ll_i2info(i2)->lli_lsm_md;
+                       down_read(&ll_i2info(i2)->lli_lsm_sem);
+                       op_data->op_lso2 =
+                           lmv_stripe_object_get(ll_i2info(i2)->lli_lsm_obj);
+                       up_read(&ll_i2info(i2)->lli_lsm_sem);
                }
        } else {
                fid_zero(&op_data->op_fid2);
index 0a58e96..548bd1f 100644 (file)
@@ -776,10 +776,12 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
 
                /* If it is striped directory, get the real stripe parent */
                if (unlikely(ll_dir_striped(parent))) {
+                       down_read(&ll_i2info(parent)->lli_lsm_sem);
                        rc = md_get_fid_from_lsm(ll_i2mdexp(parent),
-                                                ll_i2info(parent)->lli_lsm_md,
+                                                ll_i2info(parent)->lli_lsm_obj,
                                                 (*de)->d_name.name,
                                                 (*de)->d_name.len, &fid);
+                       up_read(&ll_i2info(parent)->lli_lsm_sem);
                        if (rc != 0)
                                GOTO(out, rc);
                }
@@ -1502,11 +1504,11 @@ static void ll_qos_mkdir_prep(struct md_op_data *op_data, struct inode *dir)
        depth = lli->lli_dir_depth;
 
        /* parent directory is striped */
-       if (unlikely(lli->lli_lsm_md))
+       if (unlikely(ll_dir_striped(dir)))
                return;
 
        /* default LMV set on parent directory */
-       if (unlikely(lli->lli_default_lsm_md))
+       if (unlikely(lli->lli_def_lsm_obj))
                return;
 
        /* parent is ROOT */
@@ -1514,13 +1516,13 @@ static void ll_qos_mkdir_prep(struct md_op_data *op_data, struct inode *dir)
                return;
 
        /* default LMV not set on ROOT */
-       if (!rlli->lli_default_lsm_md)
+       if (!rlli->lli_def_lsm_obj)
                return;
 
        down_read(&rlli->lli_lsm_sem);
-       lsm = rlli->lli_default_lsm_md;
-       if (!lsm)
+       if (!rlli->lli_def_lsm_obj)
                goto unlock;
+       lsm = &rlli->lli_def_lsm_obj->lso_lsm;
 
        /* not space balanced */
        if (lsm->lsm_md_master_mdt_index != LMV_OFFSET_DEFAULT)
@@ -1580,7 +1582,7 @@ again:
                ll_qos_mkdir_prep(op_data, dir);
                if ((exp_connect_flags2(ll_i2mdexp(dir)) &
                     OBD_CONNECT2_DMV_IMP_INHERIT) &&
-                   op_data->op_default_mea1 && !lum) {
+                   op_data->op_default_lso1 && !lum) {
                        const struct lmv_stripe_md *lsm;
 
                        /* once DMV_IMP_INHERIT is set, pack default LMV in
@@ -1590,7 +1592,7 @@ again:
                        if (!lum)
                                GOTO(err_exit, err = -ENOMEM);
 
-                       lsm = op_data->op_default_mea1;
+                       lsm = &op_data->op_default_lso1->lso_lsm;
                        lum->lum_magic = cpu_to_le32(lsm->lsm_md_magic);
                        lum->lum_stripe_count =
                                cpu_to_le32(lsm->lsm_md_stripe_count);
@@ -1697,40 +1699,41 @@ again:
                if (err2 == 0) {
                        struct lustre_md md = { NULL };
 
+
                        md.body = req_capsule_server_get(&request->rq_pill,
                                                         &RMF_MDT_BODY);
                        if (!md.body)
                                GOTO(err_exit, err = -EPROTO);
 
-                       OBD_ALLOC_PTR(md.default_lmv);
-                       if (!md.default_lmv)
+                       OBD_ALLOC_PTR(md.def_lsm_obj);
+                       if (!md.def_lsm_obj)
                                GOTO(err_exit, err = -ENOMEM);
 
-                       md.default_lmv->lsm_md_magic = lum->lum_magic;
-                       md.default_lmv->lsm_md_stripe_count =
+                       md.def_lsm_obj->lso_lsm.lsm_md_magic = lum->lum_magic;
+                       md.def_lsm_obj->lso_lsm.lsm_md_stripe_count =
                                lum->lum_stripe_count;
-                       md.default_lmv->lsm_md_master_mdt_index =
+                       md.def_lsm_obj->lso_lsm.lsm_md_master_mdt_index =
                                lum->lum_stripe_offset;
-                       md.default_lmv->lsm_md_hash_type = lum->lum_hash_type;
-                       md.default_lmv->lsm_md_max_inherit =
+                       md.def_lsm_obj->lso_lsm.lsm_md_hash_type =
+                               lum->lum_hash_type;
+                       md.def_lsm_obj->lso_lsm.lsm_md_max_inherit =
                                lum->lum_max_inherit;
-                       md.default_lmv->lsm_md_max_inherit_rr =
+                       md.def_lsm_obj->lso_lsm.lsm_md_max_inherit_rr =
                                lum->lum_max_inherit_rr;
+                       atomic_set(&md.def_lsm_obj->lso_refs, 1);
 
                        err = ll_update_inode(dir, &md);
-                       md_free_lustre_md(sbi->ll_md_exp, &md);
+                       md_put_lustre_md(sbi->ll_md_exp, &md);
                        if (err)
                                GOTO(err_exit, err);
-               } else if (err2 == -ENODATA && lli->lli_default_lsm_md) {
+               } else if (err2 == -ENODATA && lli->lli_def_lsm_obj) {
                        /*
                         * If there are no default stripe EA on the MDT, but the
                         * client has default stripe, then it probably means
                         * default stripe EA has just been deleted.
                         */
                        down_write(&lli->lli_lsm_sem);
-                       if (lli->lli_default_lsm_md)
-                               OBD_FREE_PTR(lli->lli_default_lsm_md);
-                       lli->lli_default_lsm_md = NULL;
+                       lmv_stripe_object_put(&lli->lli_def_lsm_obj);
                        up_write(&lli->lli_lsm_sem);
                } else {
                        GOTO(err_exit, err);
index 97f1d9f..1acdd84 100644 (file)
@@ -301,15 +301,15 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
        ENTRY;
 
        /* do not allow file creation in foreign dir */
-       if ((it->it_op & IT_CREAT) && lmv_dir_foreign(op_data->op_mea1))
+       if ((it->it_op & IT_CREAT) && lmv_dir_foreign(op_data->op_lso1))
                RETURN(-ENODATA);
 
        if ((it->it_op & IT_CREAT) && !(flags & MDS_OPEN_BY_FID)) {
                /* don't allow create under dir with bad hash */
-               if (lmv_dir_bad_hash(op_data->op_mea1))
+               if (lmv_dir_bad_hash(op_data->op_lso1))
                        RETURN(-EBADF);
 
-               if (lmv_dir_layout_changing(op_data->op_mea1)) {
+               if (lmv_dir_layout_changing(op_data->op_lso1)) {
                        if (flags & O_EXCL) {
                                /*
                                 * open(O_CREAT | O_EXCL) needs to check
@@ -340,7 +340,7 @@ retry:
                /* for striped directory, we can't know parent stripe fid
                 * without name, but we can set it to child fid, and MDT
                 * will obtain it from linkea in open in such case. */
-               if (lmv_dir_striped(op_data->op_mea1))
+               if (lmv_dir_striped(op_data->op_lso1))
                        op_data->op_fid1 = op_data->op_fid2;
 
                tgt = lmv_fid2tgt(lmv, &op_data->op_fid2);
@@ -438,7 +438,7 @@ lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
        ENTRY;
 
        /* foreign dir is not striped */
-       if (lmv_dir_foreign(op_data->op_mea1)) {
+       if (lmv_dir_foreign(op_data->op_lso1)) {
                /* only allow getattr/lookup for itself */
                if (op_data->op_name != NULL)
                        RETURN(-ENODATA);
@@ -499,8 +499,9 @@ retry:
        if (*reqp == NULL) {
                /* If RPC happens, lsm information will be revalidated
                 * during update_inode process (see ll_update_lsm_md) */
-               if (lmv_dir_striped(op_data->op_mea2)) {
-                       rc = lmv_revalidate_slaves(exp, op_data->op_mea2,
+               if (lmv_dir_striped(op_data->op_lso2)) {
+                       rc = lmv_revalidate_slaves(exp,
+                                                  &op_data->op_lso2->lso_lsm,
                                                   cb_blocking,
                                                   extra_lock_flags);
                        if (rc != 0)
index 010535a..6340605 100644 (file)
@@ -166,13 +166,15 @@ static inline int lmv_stripe_md_size(int stripe_count)
 
 /* for file under migrating directory, return the target stripe info */
 static inline const struct lmv_oinfo *
-lsm_name_to_stripe_info(const struct lmv_stripe_md *lsm, const char *name,
+lsm_name_to_stripe_info(const struct lmv_stripe_object *lso, const char *name,
                        int namelen, bool new_layout)
 {
+       const struct lmv_stripe_md *lsm;
        int stripe_index;
 
-       LASSERT(lmv_dir_striped(lsm));
+       LASSERT(lmv_dir_striped(lso));
 
+       lsm = &lso->lso_lsm;
        stripe_index = __lmv_name_to_stripe_index(lsm->lsm_md_hash_type,
                                                  lsm->lsm_md_stripe_count,
                                                  lsm->lsm_md_migrate_hash,
@@ -186,18 +188,18 @@ lsm_name_to_stripe_info(const struct lmv_stripe_md *lsm, const char *name,
 
 static inline bool lmv_dir_retry_check_update(struct md_op_data *op_data)
 {
-       const struct lmv_stripe_md *lsm = op_data->op_mea1;
+       const struct lmv_stripe_object *lso = op_data->op_lso1;
 
-       if (!lsm)
+       if (!lso)
                return false;
 
-       if (lmv_dir_layout_changing(lsm) && !op_data->op_new_layout) {
+       if (lmv_dir_layout_changing(lso) && !op_data->op_new_layout) {
                op_data->op_new_layout = true;
                return true;
        }
 
-       if (lmv_dir_bad_hash(lsm) &&
-           op_data->op_stripe_index < lsm->lsm_md_stripe_count - 1) {
+       if (lmv_dir_bad_hash(lso) &&
+           op_data->op_stripe_index < lso->lso_lsm.lsm_md_stripe_count - 1) {
                op_data->op_stripe_index++;
                return true;
        }
index 967270f..7336d09 100644 (file)
@@ -1701,14 +1701,14 @@ unlock:
  * which stripe its dirent is stored.
  */
 static struct lmv_tgt_desc *
-lmv_locate_tgt_by_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm,
+lmv_locate_tgt_by_name(struct lmv_obd *lmv, struct lmv_stripe_object *lso,
                       const char *name, int namelen, struct lu_fid *fid,
                       __u32 *mds, bool new_layout)
 {
        struct lmv_tgt_desc *tgt;
        const struct lmv_oinfo *oinfo;
 
-       if (!lmv_dir_striped(lsm) || !namelen) {
+       if (!lmv_dir_striped(lso) || !namelen) {
                tgt = lmv_fid2tgt(lmv, fid);
                if (IS_ERR(tgt))
                        return tgt;
@@ -1718,11 +1718,11 @@ lmv_locate_tgt_by_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm,
        }
 
        if (CFS_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_NAME_HASH)) {
-               if (cfs_fail_val >= lsm->lsm_md_stripe_count)
+               if (cfs_fail_val >= lso->lso_lsm.lsm_md_stripe_count)
                        return ERR_PTR(-EBADF);
-               oinfo = &lsm->lsm_md_oinfo[cfs_fail_val];
+               oinfo = &lso->lso_lsm.lsm_md_oinfo[cfs_fail_val];
        } else {
-               oinfo = lsm_name_to_stripe_info(lsm, name, namelen, new_layout);
+               oinfo = lsm_name_to_stripe_info(lso, name, namelen, new_layout);
                if (IS_ERR(oinfo))
                        return ERR_CAST(oinfo);
        }
@@ -1752,8 +1752,8 @@ lmv_locate_tgt_by_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm,
  *
  * \param[in] lmv              LMV device
  * \param[in/out] op_data      client MD stack parameters, name, namelen etc,
- *                             op_mds and op_fid1 will be updated if op_mea1
- *                             indicates fid1 represents a striped directory.
+ *                             op_mds and op_fid1 will be updated if op_lso1
+ *                             indicates fid1 represents a striped directory.
  *
  * retval              pointer to the lmv_tgt_desc if succeed.
  *                      ERR_PTR(errno) if failed.
@@ -1761,11 +1761,11 @@ lmv_locate_tgt_by_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm,
 struct lmv_tgt_desc *
 lmv_locate_tgt(struct lmv_obd *lmv, struct md_op_data *op_data)
 {
-       struct lmv_stripe_md *lsm = op_data->op_mea1;
+       struct lmv_stripe_md *lsm;
        struct lmv_oinfo *oinfo;
        struct lmv_tgt_desc *tgt;
 
-       if (lmv_dir_foreign(lsm))
+       if (lmv_dir_foreign(op_data->op_lso1))
                return ERR_PTR(-ENODATA);
 
        /* During creating VOLATILE file, it should honor the mdt
@@ -1777,10 +1777,11 @@ lmv_locate_tgt(struct lmv_obd *lmv, struct md_op_data *op_data)
                if (!tgt)
                        return ERR_PTR(-ENODEV);
 
-               if (lmv_dir_striped(lsm)) {
+               if (lmv_dir_striped(op_data->op_lso1)) {
                        int i;
 
                        /* refill the right parent fid */
+                       lsm = &op_data->op_lso1->lso_lsm;
                        for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
                                oinfo = &lsm->lsm_md_oinfo[i];
                                if (oinfo->lmo_mds == op_data->op_mds) {
@@ -1792,7 +1793,9 @@ lmv_locate_tgt(struct lmv_obd *lmv, struct md_op_data *op_data)
                        if (i == lsm->lsm_md_stripe_count)
                                op_data->op_fid1 = lsm->lsm_md_oinfo[0].lmo_fid;
                }
-       } else if (lmv_dir_bad_hash(lsm)) {
+       } else if (lmv_dir_bad_hash(op_data->op_lso1)) {
+               lsm = &op_data->op_lso1->lso_lsm;
+
                LASSERT(op_data->op_stripe_index < lsm->lsm_md_stripe_count);
                oinfo = &lsm->lsm_md_oinfo[op_data->op_stripe_index];
 
@@ -1802,7 +1805,7 @@ lmv_locate_tgt(struct lmv_obd *lmv, struct md_op_data *op_data)
                if (!tgt)
                        return ERR_PTR(-ENODEV);
        } else {
-               tgt = lmv_locate_tgt_by_name(lmv, op_data->op_mea1,
+               tgt = lmv_locate_tgt_by_name(lmv, op_data->op_lso1,
                                op_data->op_name, op_data->op_namelen,
                                &op_data->op_fid1, &op_data->op_mds,
                                op_data->op_new_layout);
@@ -1819,26 +1822,26 @@ lmv_locate_tgt2(struct lmv_obd *lmv, struct md_op_data *op_data)
        int rc;
 
        LASSERT(op_data->op_name);
-       if (lmv_dir_layout_changing(op_data->op_mea2)) {
+       if (lmv_dir_layout_changing(op_data->op_lso2)) {
                struct lu_fid fid1 = op_data->op_fid1;
-               struct lmv_stripe_md *lsm1 = op_data->op_mea1;
+               struct lmv_stripe_object *lso1 = op_data->op_lso1;
                struct ptlrpc_request *request = NULL;
 
                /*
                 * avoid creating new file under old layout of migrating
                 * directory, check it here.
                 */
-               tgt = lmv_locate_tgt_by_name(lmv, op_data->op_mea2,
+               tgt = lmv_locate_tgt_by_name(lmv, op_data->op_lso2,
                                op_data->op_name, op_data->op_namelen,
                                &op_data->op_fid2, &op_data->op_mds, false);
                if (IS_ERR(tgt))
                        RETURN(tgt);
 
                op_data->op_fid1 = op_data->op_fid2;
-               op_data->op_mea1 = op_data->op_mea2;
+               op_data->op_lso1 = op_data->op_lso2;
                rc = md_getattr_name(tgt->ltd_exp, op_data, &request);
                op_data->op_fid1 = fid1;
-               op_data->op_mea1 = lsm1;
+               op_data->op_lso1 = lso1;
                if (!rc) {
                        ptlrpc_req_finished(request);
                        RETURN(ERR_PTR(-EEXIST));
@@ -1848,9 +1851,10 @@ lmv_locate_tgt2(struct lmv_obd *lmv, struct md_op_data *op_data)
                        RETURN(ERR_PTR(rc));
        }
 
-       return lmv_locate_tgt_by_name(lmv, op_data->op_mea2, op_data->op_name,
-                               op_data->op_namelen, &op_data->op_fid2,
-                               &op_data->op_mds, true);
+       return lmv_locate_tgt_by_name(lmv, op_data->op_lso2,
+                                     op_data->op_name, op_data->op_namelen,
+                                     &op_data->op_fid2, &op_data->op_mds,
+                                     true);
 }
 
 int lmv_old_layout_lookup(struct lmv_obd *lmv, struct md_op_data *op_data)
@@ -1859,7 +1863,7 @@ int lmv_old_layout_lookup(struct lmv_obd *lmv, struct md_op_data *op_data)
        struct ptlrpc_request *request;
        int rc;
 
-       LASSERT(lmv_dir_layout_changing(op_data->op_mea1));
+       LASSERT(lmv_dir_layout_changing(op_data->op_lso1));
        LASSERT(!op_data->op_new_layout);
 
        tgt = lmv_locate_tgt(lmv, op_data);
@@ -1887,7 +1891,7 @@ static inline bool lmv_op_user_qos_mkdir(const struct md_op_data *op_data)
        if (op_data->op_code != LUSTRE_OPC_MKDIR)
                return false;
 
-       if (lmv_dir_striped(op_data->op_mea1))
+       if (lmv_dir_striped(op_data->op_lso1))
                return false;
 
        return (op_data->op_cli_flags & CLI_SET_MEA) && lum &&
@@ -1898,16 +1902,17 @@ static inline bool lmv_op_user_qos_mkdir(const struct md_op_data *op_data)
 /* mkdir by QoS if either ROOT or parent default LMV is space balanced. */
 static inline bool lmv_op_default_qos_mkdir(const struct md_op_data *op_data)
 {
-       const struct lmv_stripe_md *lsm = op_data->op_default_mea1;
+       const struct lmv_stripe_object *lso = op_data->op_default_lso1;
 
        if (op_data->op_code != LUSTRE_OPC_MKDIR)
                return false;
 
-       if (lmv_dir_striped(op_data->op_mea1))
+       if (lmv_dir_striped(op_data->op_lso1))
                return false;
 
        return (op_data->op_flags & MF_QOS_MKDIR) ||
-              (lsm && lsm->lsm_md_master_mdt_index == LMV_OFFSET_DEFAULT);
+              (lso && lso->lso_lsm.lsm_md_master_mdt_index ==
+               LMV_OFFSET_DEFAULT);
 }
 
 /* if parent default LMV is space balanced, and
@@ -1919,11 +1924,11 @@ static inline bool lmv_op_default_qos_mkdir(const struct md_op_data *op_data)
  */
 static inline bool lmv_op_default_rr_mkdir(const struct md_op_data *op_data)
 {
-       const struct lmv_stripe_md *lsm = op_data->op_default_mea1;
+       const struct lmv_stripe_object *lso = op_data->op_default_lso1;
 
        return (op_data->op_flags & MF_RR_MKDIR) ||
-              (lsm && lsm->lsm_md_max_inherit_rr != LMV_INHERIT_RR_NONE) ||
-              fid_is_root(&op_data->op_fid1);
+              (lso && lso->lso_lsm.lsm_md_max_inherit_rr !=
+               LMV_INHERIT_RR_NONE) || fid_is_root(&op_data->op_fid1);
 }
 
 /* 'lfs mkdir -i <specific_MDT>' */
@@ -1943,8 +1948,8 @@ static inline bool
 lmv_op_default_specific_mkdir(const struct md_op_data *op_data)
 {
        return op_data->op_code == LUSTRE_OPC_MKDIR &&
-              op_data->op_default_mea1 &&
-              op_data->op_default_mea1->lsm_md_master_mdt_index !=
+              op_data->op_default_lso1 &&
+              op_data->op_default_lso1->lso_lsm.lsm_md_master_mdt_index !=
                        LMV_OFFSET_DEFAULT;
 }
 
@@ -1996,10 +2001,10 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
        if (!lmv->lmv_mdt_descs.ltd_lmv_desc.ld_active_tgt_count)
                RETURN(-EIO);
 
-       if (lmv_dir_bad_hash(op_data->op_mea1))
+       if (lmv_dir_bad_hash(op_data->op_lso1))
                RETURN(-EBADF);
 
-       if (lmv_dir_layout_changing(op_data->op_mea1)) {
+       if (lmv_dir_layout_changing(op_data->op_lso1)) {
                /*
                 * if parent is migrating, create() needs to lookup existing
                 * name in both old and new layout, check old layout on client.
@@ -2033,8 +2038,9 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
                if (IS_ERR(tgt))
                        RETURN(PTR_ERR(tgt));
        } else if (lmv_op_default_specific_mkdir(op_data)) {
-               op_data->op_mds =
-                       op_data->op_default_mea1->lsm_md_master_mdt_index;
+               struct lmv_stripe_md *lsm = &op_data->op_default_lso1->lso_lsm;
+
+               op_data->op_mds = lsm->lsm_md_master_mdt_index;
                tgt = lmv_tgt(lmv, op_data->op_mds);
                if (!tgt)
                        RETURN(-ENODEV);
@@ -2257,7 +2263,7 @@ static inline bool lmv_op_topdir_migrate(const struct md_op_data *op_data)
        if (!S_ISDIR(op_data->op_mode))
                return false;
 
-       if (lmv_dir_layout_changing(op_data->op_mea1))
+       if (lmv_dir_layout_changing(op_data->op_lso1))
                return false;
 
        return true;
@@ -2292,7 +2298,7 @@ static inline bool lmv_subdir_specific_migrate(const struct md_op_data *op_data)
        if (!S_ISDIR(op_data->op_mode))
                return false;
 
-       if (!lmv_dir_layout_changing(op_data->op_mea1))
+       if (!lmv_dir_layout_changing(op_data->op_lso1))
                return false;
 
        return le32_to_cpu(lum->lum_stripe_offset) != LMV_OFFSET_DEFAULT;
@@ -2304,7 +2310,7 @@ static int lmv_migrate(struct obd_export *exp, struct md_op_data *op_data,
 {
        struct obd_device *obd = exp->exp_obd;
        struct lmv_obd *lmv = &obd->u.lmv;
-       struct lmv_stripe_md *lsm = op_data->op_mea1;
+       struct lmv_stripe_object *lso = op_data->op_lso1;
        struct lmv_tgt_desc *parent_tgt;
        struct lmv_tgt_desc *sp_tgt;
        struct lmv_tgt_desc *tp_tgt = NULL;
@@ -2328,10 +2334,10 @@ static int lmv_migrate(struct obd_export *exp, struct md_op_data *op_data,
        if (IS_ERR(parent_tgt))
                RETURN(PTR_ERR(parent_tgt));
 
-       if (lmv_dir_striped(lsm)) {
+       if (lmv_dir_striped(lso)) {
                const struct lmv_oinfo *oinfo;
 
-               oinfo = lsm_name_to_stripe_info(lsm, name, namelen, false);
+               oinfo = lsm_name_to_stripe_info(lso, name, namelen, false);
                if (IS_ERR(oinfo))
                        RETURN(PTR_ERR(oinfo));
 
@@ -2345,8 +2351,8 @@ static int lmv_migrate(struct obd_export *exp, struct md_op_data *op_data,
                 * if parent is being migrated too, fill op_fid2 with target
                 * stripe fid, otherwise the target stripe is not created yet.
                 */
-               if (lmv_dir_layout_changing(lsm)) {
-                       oinfo = lsm_name_to_stripe_info(lsm, name, namelen,
+               if (lmv_dir_layout_changing(lso)) {
+                       oinfo = lsm_name_to_stripe_info(lso, name, namelen,
                                                        true);
                        if (IS_ERR(oinfo))
                                RETURN(PTR_ERR(oinfo));
@@ -2783,7 +2789,7 @@ static struct lu_dirent *stripe_dirent_load(struct lmv_dir_ctxt *ctxt,
                        hash = end;
                }
 
-               oinfo = &op_data->op_mea1->lsm_md_oinfo[stripe_index];
+               oinfo = &op_data->op_lso1->lso_lsm.lsm_md_oinfo[stripe_index];
                if (!oinfo->lmo_root) {
                        rc = -ENOENT;
                        break;
@@ -2969,7 +2975,7 @@ static int lmv_striped_read_page(struct obd_export *exp,
        last_ent = ent;
 
        /* initalize dir read context */
-       stripe_count = op_data->op_mea1->lsm_md_stripe_count;
+       stripe_count = op_data->op_lso1->lso_lsm.lsm_md_stripe_count;
        OBD_ALLOC(ctxt, offsetof(typeof(*ctxt), ldc_stripes[stripe_count]));
        if (!ctxt)
                GOTO(free_page, rc = -ENOMEM);
@@ -3058,10 +3064,10 @@ static int lmv_read_page(struct obd_export *exp, struct md_op_data *op_data,
 
        ENTRY;
 
-       if (unlikely(lmv_dir_foreign(op_data->op_mea1)))
+       if (unlikely(lmv_dir_foreign(op_data->op_lso1)))
                RETURN(-ENODATA);
 
-       if (unlikely(lmv_dir_striped(op_data->op_mea1))) {
+       if (unlikely(lmv_dir_striped(op_data->op_lso1))) {
                rc = lmv_striped_read_page(exp, op_data, mrinfo, offset, ppage);
                RETURN(rc);
        }
@@ -3470,119 +3476,188 @@ static inline int lmv_unpack_user_md(struct obd_export *exp,
        return 0;
 }
 
-static int lmv_unpackmd(struct obd_export *exp, struct lmv_stripe_md **lsmp,
-                       const union lmv_mds_md *lmm, size_t lmm_size)
+struct lmv_stripe_object *lmv_stripe_object_alloc(__u32 magic,
+                                                 const union lmv_mds_md *lmm,
+                                                 size_t lmm_size)
 {
-       struct lmv_stripe_md     *lsm;
-       int                      lsm_size;
-       int                      rc;
-       bool                     allocated = false;
-       ENTRY;
+       struct lmv_stripe_object *lsm_obj;
+       int size;
 
-       LASSERT(lsmp != NULL);
+       if (magic == LMV_MAGIC_FOREIGN) {
+               struct lmv_foreign_md *lfm;
 
-       lsm = *lsmp;
-       /* Free memmd */
-       if (lsm != NULL && lmm == NULL) {
-               int i;
-               struct lmv_foreign_md *lfm = (struct lmv_foreign_md *)lsm;
+               size = offsetof(typeof(*lfm), lfm_value[0]);
+               if (lmm_size < size)
+                       RETURN(ERR_PTR(-EPROTO));
 
-               if (lfm->lfm_magic == LMV_MAGIC_FOREIGN) {
-                       size_t lfm_size;
+               size += le32_to_cpu(lmm->lmv_foreign_md.lfm_length);
+               if (lmm_size < size)
+                       RETURN(ERR_PTR(-EPROTO));
 
-                       lfm_size = lfm->lfm_length + offsetof(typeof(*lfm),
-                                                             lfm_value[0]);
-                       OBD_FREE_LARGE(lfm, lfm_size);
-                       RETURN(0);
-               }
+               OBD_ALLOC_LARGE(lsm_obj, lmm_size +
+                               offsetof(typeof(*lsm_obj), lso_lfm));
+       } else {
+               if (magic == LMV_MAGIC_V1) {
+                       int count;
+
+                       size = offsetof(struct lmv_mds_md_v1,
+                                       lmv_stripe_fids[0]);
+                       if (lmm_size < size)
+                               RETURN(ERR_PTR(-EPROTO));
 
-               if (lmv_dir_striped(lsm)) {
-                       for (i = 0; i < lsm->lsm_md_stripe_count; i++)
-                               iput(lsm->lsm_md_oinfo[i].lmo_root);
-                       lsm_size = lmv_stripe_md_size(lsm->lsm_md_stripe_count);
+                       count = lmv_mds_md_stripe_count_get(lmm);
+                       size += count * sizeof(struct lu_fid);
+                       if (lmm_size < size)
+                               RETURN(ERR_PTR(-EPROTO));
+
+                       size = lmv_stripe_md_size(count);
                } else {
-                       lsm_size = lmv_stripe_md_size(0);
+                       if (lmm && lmm_size < sizeof(struct lmv_user_md))
+                               RETURN(ERR_PTR(-EPROTO));
+
+                       /**
+                        * Unpack default dirstripe(lmv_user_md) to
+                        * lmv_stripe_md, stripecount should be 0 then.
+                        */
+                       size = lmv_stripe_md_size(0);
                }
-               OBD_FREE(lsm, lsm_size);
-               *lsmp = NULL;
-               RETURN(0);
+               size += offsetof(typeof(*lsm_obj), lso_lsm);
+               OBD_ALLOC(lsm_obj, size);
        }
 
-       /* foreign lmv case */
-       if (le32_to_cpu(lmm->lmv_magic) == LMV_MAGIC_FOREIGN) {
-               struct lmv_foreign_md *lfm = (struct lmv_foreign_md *)lsm;
-
-               if (lfm == NULL) {
-                       OBD_ALLOC_LARGE(lfm, lmm_size);
-                       if (lfm == NULL)
-                               RETURN(-ENOMEM);
-                       *lsmp = (struct lmv_stripe_md *)lfm;
-               }
-               lfm->lfm_magic = le32_to_cpu(lmm->lmv_foreign_md.lfm_magic);
-               lfm->lfm_length = le32_to_cpu(lmm->lmv_foreign_md.lfm_length);
-               lfm->lfm_type = le32_to_cpu(lmm->lmv_foreign_md.lfm_type);
-               lfm->lfm_flags = le32_to_cpu(lmm->lmv_foreign_md.lfm_flags);
-               memcpy(&lfm->lfm_value, &lmm->lmv_foreign_md.lfm_value,
-                      lfm->lfm_length);
-               RETURN(lmm_size);
+       if (lsm_obj) {
+               atomic_set(&lsm_obj->lso_refs, 1);
+               RETURN(lsm_obj);
        }
 
-       if (le32_to_cpu(lmm->lmv_magic) == LMV_MAGIC_STRIPE)
+       RETURN(ERR_PTR(-ENOMEM));
+}
+EXPORT_SYMBOL(lmv_stripe_object_alloc);
+
+static int lmv_stripe_object_create(struct obd_export *exp,
+                                   struct lmv_stripe_object **lsop,
+                                   const union lmv_mds_md *lmm,
+                                   size_t lmm_size)
+{
+       struct lmv_stripe_object *lsm_obj;
+       __u32 magic;
+       int rc;
+       ENTRY;
+
+       LASSERT(lsop != NULL && *lsop == NULL);
+
+       if (lmm_size == 0)
+               RETURN(-EPROTO);
+
+       magic = le32_to_cpu(lmm->lmv_magic);
+       if (magic == LMV_MAGIC_STRIPE)
                RETURN(-EPERM);
 
-       /* Unpack memmd */
-       if (le32_to_cpu(lmm->lmv_magic) != LMV_MAGIC_V1 &&
-           le32_to_cpu(lmm->lmv_magic) != LMV_USER_MAGIC) {
+       if (magic != LMV_MAGIC_V1 && magic != LMV_USER_MAGIC &&
+           magic != LMV_MAGIC_FOREIGN) {
                CERROR("%s: invalid lmv magic %x: rc = %d\n",
-                      exp->exp_obd->obd_name, le32_to_cpu(lmm->lmv_magic),
-                      -EIO);
+                      exp->exp_obd->obd_name, magic, -EIO);
                RETURN(-EIO);
        }
 
-       if (le32_to_cpu(lmm->lmv_magic) == LMV_MAGIC_V1)
-               lsm_size = lmv_stripe_md_size(lmv_mds_md_stripe_count_get(lmm));
-       else
-               /**
-                * Unpack default dirstripe(lmv_user_md) to lmv_stripe_md,
-                * stripecount should be 0 then.
-                */
-               lsm_size = lmv_stripe_md_size(0);
+       /* foreign lmv case */
+       if (magic == LMV_MAGIC_FOREIGN) {
+               struct lmv_foreign_md *lfm;
 
-       if (lsm == NULL) {
-               OBD_ALLOC(lsm, lsm_size);
-               if (lsm == NULL)
-                       RETURN(-ENOMEM);
-               allocated = true;
-               *lsmp = lsm;
+               lsm_obj = lmv_stripe_object_alloc(magic, lmm, lmm_size);
+               if (IS_ERR(lsm_obj))
+                       RETURN(PTR_ERR(lsm_obj));
+
+               *lsop = lsm_obj;
+               lfm = &lsm_obj->lso_lfm;
+               lfm->lfm_magic = magic;
+               lfm->lfm_length = le32_to_cpu(lmm->lmv_foreign_md.lfm_length);
+               lfm->lfm_type = le32_to_cpu(lmm->lmv_foreign_md.lfm_type);
+               lfm->lfm_flags = le32_to_cpu(lmm->lmv_foreign_md.lfm_flags);
+               memcpy(&lfm->lfm_value, &lmm->lmv_foreign_md.lfm_value,
+                      lfm->lfm_length);
+               RETURN(0);
        }
 
-       switch (le32_to_cpu(lmm->lmv_magic)) {
+       /* Unpack memmd */
+       lsm_obj = lmv_stripe_object_alloc(magic, lmm, lmm_size);
+       if (IS_ERR(lsm_obj))
+               RETURN(PTR_ERR(lsm_obj));
+
+       switch (magic) {
        case LMV_MAGIC_V1:
-               rc = lmv_unpack_md_v1(exp, lsm, &lmm->lmv_md_v1);
+               rc = lmv_unpack_md_v1(exp, &lsm_obj->lso_lsm, &lmm->lmv_md_v1);
                break;
        case LMV_USER_MAGIC:
-               rc = lmv_unpack_user_md(exp, lsm, &lmm->lmv_user_md);
+               rc = lmv_unpack_user_md(exp, &lsm_obj->lso_lsm,
+                                       &lmm->lmv_user_md);
                break;
        default:
                CERROR("%s: unrecognized magic %x\n", exp->exp_obd->obd_name,
-                      le32_to_cpu(lmm->lmv_magic));
+                      magic);
                rc = -EINVAL;
                break;
        }
 
-       if (rc != 0 && allocated) {
-               OBD_FREE(lsm, lsm_size);
-               *lsmp = NULL;
-               lsm_size = rc;
-       }
-       RETURN(lsm_size);
+       if (rc != 0)
+               lmv_stripe_object_put(&lsm_obj);
+
+       *lsop = lsm_obj;
+       RETURN(rc);
+}
+
+struct lmv_stripe_object *
+lmv_stripe_object_get(struct lmv_stripe_object *lsm_obj)
+{
+       if (lsm_obj == NULL)
+               return NULL;
+
+       atomic_inc(&lsm_obj->lso_refs);
+       CDEBUG(D_INODE, "get %p %u\n", lsm_obj,
+              atomic_read(&lsm_obj->lso_refs));
+       return lsm_obj;
 }
+EXPORT_SYMBOL(lmv_stripe_object_get);
 
-void lmv_free_memmd(struct lmv_stripe_md *lsm)
+void lmv_stripe_object_put(struct lmv_stripe_object **lsop)
 {
-       lmv_unpackmd(NULL, &lsm, NULL, 0);
+       struct lmv_stripe_object *lsm_obj;
+       size_t size;
+       int i;
+
+       LASSERT(lsop != NULL);
+
+       lsm_obj = *lsop;
+       if (lsm_obj == NULL)
+               return;
+
+       *lsop = NULL;
+       CDEBUG(D_INODE, "put %p %u\n", lsm_obj,
+              atomic_read(&lsm_obj->lso_refs) - 1);
+
+       if (!atomic_dec_and_test(&lsm_obj->lso_refs))
+               return;
+
+       if (lmv_dir_foreign(lsm_obj)) {
+               size = lsm_obj->lso_lfm.lfm_length +
+                       offsetof(typeof(lsm_obj->lso_lfm), lfm_value[0]) +
+                       offsetof(typeof(*lsm_obj), lso_lsm);
+               OBD_FREE_LARGE(lsm_obj, size);
+               return;
+       }
+
+       if (lmv_dir_striped(lsm_obj)) {
+               struct lmv_stripe_md *lsm = &lsm_obj->lso_lsm;
+
+               for (i = 0; i < lsm->lsm_md_stripe_count; i++)
+                       iput(lsm->lsm_md_oinfo[i].lmo_root);
+               size = lmv_stripe_md_size(lsm->lsm_md_stripe_count);
+       } else {
+               size = lmv_stripe_md_size(0);
+       }
+       OBD_FREE(lsm_obj, size + offsetof(typeof(*lsm_obj), lso_lsm));
 }
-EXPORT_SYMBOL(lmv_free_memmd);
+EXPORT_SYMBOL(lmv_stripe_object_put);
 
 static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
                             union ldlm_policy_data *policy,
@@ -3691,25 +3766,19 @@ lmv_get_lustre_md(struct obd_export *exp, struct req_capsule *pill,
        return md_get_lustre_md(tgt->ltd_exp, pill, dt_exp, md_exp, md);
 }
 
-static int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md)
+static int lmv_put_lustre_md(struct obd_export *exp, struct lustre_md *md)
 {
        struct obd_device *obd = exp->exp_obd;
        struct lmv_obd *lmv = &obd->u.lmv;
        struct lmv_tgt_desc *tgt = lmv_tgt(lmv, 0);
-
        ENTRY;
 
-       if (md->default_lmv) {
-               lmv_free_memmd(md->default_lmv);
-               md->default_lmv = NULL;
-       }
-       if (md->lmv != NULL) {
-               lmv_free_memmd(md->lmv);
-               md->lmv = NULL;
-       }
+       lmv_stripe_object_put(&md->def_lsm_obj);
+       lmv_stripe_object_put(&md->lsm_obj);
+
        if (!tgt || !tgt->ltd_exp)
                RETURN(-EINVAL);
-       RETURN(md_free_lustre_md(tgt->ltd_exp, md));
+       RETURN(0);
 }
 
 static int lmv_set_open_replay_data(struct obd_export *exp,
@@ -3799,15 +3868,14 @@ static int lmv_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
 }
 
 static int lmv_get_fid_from_lsm(struct obd_export *exp,
-                               const struct lmv_stripe_md *lsm,
+                               const struct lmv_stripe_object *lso,
                                const char *name, int namelen,
                                struct lu_fid *fid)
 {
        const struct lmv_oinfo *oinfo;
 
-       LASSERT(lmv_dir_striped(lsm));
-
-       oinfo = lsm_name_to_stripe_info(lsm, name, namelen, false);
+       LASSERT(lmv_dir_striped(lso));
+       oinfo = lsm_name_to_stripe_info(lso, name, namelen, false);
        if (IS_ERR(oinfo))
                return PTR_ERR(oinfo);
 
@@ -3867,14 +3935,15 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp,
 }
 
 static int lmv_merge_attr(struct obd_export *exp,
-                         const struct lmv_stripe_md *lsm,
+                         const struct lmv_stripe_object *lso,
                          struct cl_attr *attr,
                          ldlm_blocking_callback cb_blocking)
 {
+       const struct lmv_stripe_md *lsm = &lso->lso_lsm;
        int rc;
        int i;
 
-       if (!lmv_dir_striped(lsm))
+       if (!lmv_dir_striped(lso))
                return 0;
 
        rc = lmv_revalidate_slaves(exp, lsm, cb_blocking, 0);
@@ -4158,14 +4227,14 @@ static const struct md_ops lmv_md_ops = {
         .m_set_lock_data        = lmv_set_lock_data,
         .m_lock_match           = lmv_lock_match,
        .m_get_lustre_md        = lmv_get_lustre_md,
-       .m_free_lustre_md       = lmv_free_lustre_md,
+       .m_put_lustre_md        = lmv_put_lustre_md,
        .m_merge_attr           = lmv_merge_attr,
         .m_set_open_replay_data = lmv_set_open_replay_data,
         .m_clear_open_replay_data = lmv_clear_open_replay_data,
         .m_intent_getattr_async = lmv_intent_getattr_async,
        .m_revalidate_lock      = lmv_revalidate_lock,
        .m_get_fid_from_lsm     = lmv_get_fid_from_lsm,
-       .m_unpackmd             = lmv_unpackmd,
+       .m_stripe_object_create = lmv_stripe_object_create,
        .m_rmfid                = lmv_rmfid,
        .m_batch_create         = lmv_batch_create,
        .m_batch_add            = lmv_batch_add,
index 3bcdd7a..086d13e 100644 (file)
@@ -609,45 +609,27 @@ static int mdc_get_lustre_md(struct obd_export *exp, struct req_capsule *pill,
                        if (lmv == NULL)
                                GOTO(out, rc = -EPROTO);
 
-                       rc = md_unpackmd(md_exp, &md->lmv, lmv, lmv_size);
+                       rc = md_stripe_object_create(md_exp, &md->lsm_obj,
+                                                    lmv, lmv_size);
                        if (rc < 0)
                                GOTO(out, rc);
-
-                       if (rc < (int)sizeof(*md->lmv)) {
-                               struct lmv_foreign_md *lfm = md->lfm;
-
-                               /* short (< sizeof(struct lmv_stripe_md))
-                                * foreign LMV case
-                                */
-                               if (lfm->lfm_magic != LMV_MAGIC_FOREIGN) {
-                                       CDEBUG(D_INFO,
-                                              "lmv size too small: %d < %d\n",
-                                              rc, (int)sizeof(*md->lmv));
-                                       GOTO(out, rc = -EPROTO);
-                               }
-                       }
                }
 
                /* since 2.12.58 intent_getattr fetches default LMV */
                if (md->body->mbo_valid & OBD_MD_DEFAULT_MEA) {
-                       lmv_size = sizeof(struct lmv_user_md);
+                       lmv_size = req_capsule_get_size(pill,
+                                                       &RMF_DEFAULT_MDT_MD,
+                                                       RCL_SERVER);
                        lmv = req_capsule_server_sized_get(pill,
                                                           &RMF_DEFAULT_MDT_MD,
                                                           lmv_size);
                        if (!lmv)
                                GOTO(out, rc = -EPROTO);
 
-                       rc = md_unpackmd(md_exp, &md->default_lmv, lmv,
-                                        lmv_size);
+                       rc = md_stripe_object_create(md_exp, &md->def_lsm_obj,
+                                                    lmv, lmv_size);
                        if (rc < 0)
                                GOTO(out, rc);
-
-                       if (rc < (int)sizeof(*md->default_lmv)) {
-                               CDEBUG(D_INFO,
-                                      "default lmv size too small: %d < %d\n",
-                                       rc, (int)sizeof(*md->default_lmv));
-                               GOTO(out, rc = -EPROTO);
-                       }
                }
        }
        rc = 0;
@@ -665,18 +647,14 @@ out_acl:
 
        EXIT;
 out:
-       if (rc)
+       if (rc) {
                lmd_clear_acl(md);
+               md_put_lustre_md(md_exp, md);
+       }
 
        return rc;
 }
 
-static int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md)
-{
-       ENTRY;
-       RETURN(0);
-}
-
 void mdc_replay_open(struct ptlrpc_request *req)
 {
        struct md_open_data *mod = req->rq_cb_data;
@@ -3037,7 +3015,6 @@ static const struct md_ops mdc_md_ops = {
        .m_set_lock_data    = mdc_set_lock_data,
        .m_lock_match       = mdc_lock_match,
        .m_get_lustre_md    = mdc_get_lustre_md,
-       .m_free_lustre_md   = mdc_free_lustre_md,
        .m_set_open_replay_data = mdc_set_open_replay_data,
        .m_clear_open_replay_data = mdc_clear_open_replay_data,
        .m_intent_getattr_async = mdc_intent_getattr_async,