Whamcloud - gitweb
LU-11213 lmv: mkdir with balanced space usage 60/34360/15
authorLai Siyao <lai.siyao@whamcloud.com>
Fri, 15 Feb 2019 14:07:56 +0000 (22:07 +0800)
committerOleg Drokin <green@whamcloud.com>
Fri, 7 Jun 2019 04:08:50 +0000 (04:08 +0000)
If a plain directory default LMV hash type is "space", create
subdirs on all MDTs with balanced space usage:
* client mkdir allocate FID on MDT with balanced space usage
  (space QoS code is in next patch).
* MDT allows mkdir on different MDT with its parent if it has
  "space" hash type in default LMV, this is normally rejected
  because mkdir shouldn't create remote directory.

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: I284e21f334c07462211be4c8e38e965722d1e8a8
Reviewed-on: https://review.whamcloud.com/34360
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
13 files changed:
lustre/include/lustre_lmv.h
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/namei.c
lustre/lmv/lmv_intent.c
lustre/lmv/lmv_internal.h
lustre/lmv/lmv_obd.c
lustre/lod/lod_internal.h
lustre/lod/lod_lov.c
lustre/lod/lod_object.c
lustre/mdd/mdd_dir.c

index e683e6c..a3a5120 100644 (file)
@@ -54,6 +54,47 @@ struct lmv_stripe_md {
        struct lmv_oinfo lsm_md_oinfo[0];
 };
 
        struct lmv_oinfo lsm_md_oinfo[0];
 };
 
+/* NB: LMV_HASH_TYPE_SPACE is set in default LMV only */
+static inline bool lmv_is_known_hash_type(__u32 type)
+{
+       return (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_FNV_1A_64 ||
+              (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_ALL_CHARS;
+}
+
+static inline bool lmv_dir_striped(const struct lmv_stripe_md *lsm)
+{
+       return lsm && lsm->lsm_md_magic == LMV_MAGIC;
+}
+
+static inline bool lmv_dir_foreign(const struct lmv_stripe_md *lsm)
+{
+       return lsm && lsm->lsm_md_magic == LMV_MAGIC_FOREIGN;
+}
+
+static inline bool lmv_dir_migrating(const struct lmv_stripe_md *lsm)
+{
+       return lmv_dir_striped(lsm) &&
+              lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION;
+}
+
+static inline bool lmv_dir_bad_hash(const struct lmv_stripe_md *lsm)
+{
+       if (!lmv_dir_striped(lsm))
+               return false;
+
+       if (lmv_dir_migrating(lsm) &&
+           lsm->lsm_md_stripe_count - lsm->lsm_md_migrate_offset <= 1)
+               return false;
+
+       return !lmv_is_known_hash_type(lsm->lsm_md_hash_type);
+}
+
+/* NB, this is checking directory default LMV */
+static inline bool lmv_dir_space_hashed(const struct lmv_stripe_md *lsm)
+{
+       return lsm && lsm->lsm_md_hash_type == LMV_HASH_TYPE_SPACE;
+}
+
 static inline bool
 lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
 {
 static inline bool
 lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
 {
@@ -74,7 +115,7 @@ lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
                      lsm2->lsm_md_pool_name) != 0)
                return false;
 
                      lsm2->lsm_md_pool_name) != 0)
                return false;
 
-       if (lsm1->lsm_md_magic == LMV_MAGIC_V1) {
+       if (lmv_dir_striped(lsm1)) {
                for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) {
                        if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid,
                                       &lsm2->lsm_md_oinfo[idx].lmo_fid))
                for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) {
                        if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid,
                                       &lsm2->lsm_md_oinfo[idx].lmo_fid))
@@ -96,7 +137,7 @@ static inline void lsm_md_dump(int mask, const struct lmv_stripe_md *lsm)
                lsm->lsm_md_layout_version, lsm->lsm_md_migrate_offset,
                lsm->lsm_md_migrate_hash, lsm->lsm_md_pool_name);
 
                lsm->lsm_md_layout_version, lsm->lsm_md_migrate_offset,
                lsm->lsm_md_migrate_hash, lsm->lsm_md_pool_name);
 
-       if (lsm->lsm_md_magic != LMV_MAGIC_V1)
+       if (!lmv_dir_striped(lsm))
                return;
 
        for (i = 0; i < lsm->lsm_md_stripe_count; i++)
                return;
 
        for (i = 0; i < lsm->lsm_md_stripe_count; i++)
@@ -190,12 +231,6 @@ static inline int lmv_name_to_stripe_index(__u32 lmv_hash_type,
        return idx;
 }
 
        return idx;
 }
 
-static inline bool lmv_is_known_hash_type(__u32 type)
-{
-       return (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_FNV_1A_64 ||
-              (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_ALL_CHARS;
-}
-
 static inline bool lmv_magic_supported(__u32 lum_magic)
 {
        return lum_magic == LMV_USER_MAGIC ||
 static inline bool lmv_magic_supported(__u32 lum_magic)
 {
        return lum_magic == LMV_USER_MAGIC ||
index 914fff0..06f092e 100644 (file)
@@ -163,8 +163,7 @@ void ll_release_page(struct inode *inode, struct page *page,
 
        /* Always remove the page for striped dir, because the page is
         * built from temporarily in LMV layer */
 
        /* Always remove the page for striped dir, because the page is
         * built from temporarily in LMV layer */
-       if (inode != NULL && S_ISDIR(inode->i_mode) &&
-           ll_i2info(inode)->lli_lsm_md != NULL) {
+       if (inode && ll_dir_striped(inode)) {
                __free_page(page);
                return;
        }
                __free_page(page);
                return;
        }
@@ -341,7 +340,7 @@ static int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
                 */
                GOTO(out, rc = 0);
 
                 */
                GOTO(out, rc = 0);
 
-       if (unlikely(ll_i2info(inode)->lli_lsm_md != NULL)) {
+       if (unlikely(ll_dir_striped(inode))) {
                /*
                 * This is only needed for striped dir to fill ..,
                 * see lmv_read_page()
                /*
                 * This is only needed for striped dir to fill ..,
                 * see lmv_read_page()
index 6f26724..d7c1e9e 100644 (file)
@@ -4216,7 +4216,7 @@ int ll_migrate(struct inode *parent, struct file *file, struct lmv_user_md *lum,
        if (!(exp_connect_flags2(ll_i2sbi(parent)->ll_md_exp) &
              OBD_CONNECT2_DIR_MIGRATE)) {
                if (le32_to_cpu(lum->lum_stripe_count) > 1 ||
        if (!(exp_connect_flags2(ll_i2sbi(parent)->ll_md_exp) &
              OBD_CONNECT2_DIR_MIGRATE)) {
                if (le32_to_cpu(lum->lum_stripe_count) > 1 ||
-                   ll_i2info(child_inode)->lli_lsm_md) {
+                   ll_dir_striped(child_inode)) {
                        CERROR("%s: MDT doesn't support stripe directory "
                               "migration!\n", ll_i2sbi(parent)->ll_fsname);
                        GOTO(out_iput, rc = -EOPNOTSUPP);
                        CERROR("%s: MDT doesn't support stripe directory "
                               "migration!\n", ll_i2sbi(parent)->ll_fsname);
                        GOTO(out_iput, rc = -EOPNOTSUPP);
@@ -4403,8 +4403,7 @@ static int ll_inode_revalidate_fini(struct inode *inode, int rc)
                /* If it is striped directory, and there is bad stripe
                 * Let's revalidate the dentry again, instead of returning
                 * error */
                /* If it is striped directory, and there is bad stripe
                 * Let's revalidate the dentry again, instead of returning
                 * error */
-               if (S_ISDIR(inode->i_mode) &&
-                   ll_i2info(inode)->lli_lsm_md != NULL)
+               if (ll_dir_striped(inode))
                        return 0;
 
                /* This path cannot be hit for regular files unless in
                        return 0;
 
                /* This path cannot be hit for regular files unless in
@@ -4481,8 +4480,7 @@ static int ll_merge_md_attr(struct inode *inode)
 
        LASSERT(lli->lli_lsm_md != NULL);
 
 
        LASSERT(lli->lli_lsm_md != NULL);
 
-       /* foreign dir is not striped dir */
-       if (lli->lli_lsm_md->lsm_md_magic == LMV_MAGIC_FOREIGN)
+       if (!lmv_dir_striped(lli->lli_lsm_md))
                RETURN(0);
 
        down_read(&lli->lli_lsm_sem);
                RETURN(0);
 
        down_read(&lli->lli_lsm_sem);
@@ -4550,8 +4548,7 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
                }
        } else {
                /* If object isn't regular a file then don't validate size. */
                }
        } else {
                /* If object isn't regular a file then don't validate size. */
-               if (S_ISDIR(inode->i_mode) &&
-                   lli->lli_lsm_md != NULL) {
+               if (ll_dir_striped(inode)) {
                        rc = ll_merge_md_attr(inode);
                        if (rc < 0)
                                RETURN(rc);
                        rc = ll_merge_md_attr(inode);
                        if (rc < 0)
                                RETURN(rc);
index e824afb..9ff4cc0 100644 (file)
@@ -1165,6 +1165,13 @@ static inline struct lu_fid *ll_inode2fid(struct inode *inode)
         return fid;
 }
 
         return fid;
 }
 
+static inline bool ll_dir_striped(struct inode *inode)
+{
+       LASSERT(inode);
+       return S_ISDIR(inode->i_mode) &&
+              lmv_dir_striped(ll_i2info(inode)->lli_lsm_md);
+}
+
 static inline loff_t ll_file_maxbytes(struct inode *inode)
 {
        struct cl_object *obj = ll_i2info(inode)->lli_clob;
 static inline loff_t ll_file_maxbytes(struct inode *inode)
 {
        struct cl_object *obj = ll_i2info(inode)->lli_clob;
index 8e6512b..cc84e32 100644 (file)
@@ -1367,6 +1367,9 @@ static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md)
               ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid));
        lsm_md_dump(D_INODE, lsm);
 
               ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid));
        lsm_md_dump(D_INODE, lsm);
 
+       if (!lmv_dir_striped(lsm))
+               goto out;
+
        /* XXX sigh, this lsm_root initialization should be in
         * LMV layer, but it needs ll_iget right now, so we
         * put this here right now. */
        /* XXX sigh, this lsm_root initialization should be in
         * LMV layer, but it needs ll_iget right now, so we
         * put this here right now. */
@@ -1394,7 +1397,7 @@ static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md)
                        return rc;
                }
        }
                        return rc;
                }
        }
-
+out:
        lli->lli_lsm_md = lsm;
 
        return 0;
        lli->lli_lsm_md = lsm;
 
        return 0;
@@ -1478,10 +1481,9 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
         *
         * foreign LMV should not change.
         */
         *
         * foreign LMV should not change.
         */
-       if (lli->lli_lsm_md &&
-           lli->lli_lsm_md->lsm_md_magic != LMV_MAGIC_FOREIGN &&
-          !lsm_md_eq(lli->lli_lsm_md, lsm)) {
-               if (lsm->lsm_md_layout_version <=
+       if (lli->lli_lsm_md && !lsm_md_eq(lli->lli_lsm_md, lsm)) {
+               if (lmv_dir_striped(lli->lli_lsm_md) &&
+                   lsm->lsm_md_layout_version <=
                    lli->lli_lsm_md->lsm_md_layout_version) {
                        CERROR("%s: "DFID" dir layout mismatch:\n",
                               ll_i2sbi(inode)->ll_fsname,
                    lli->lli_lsm_md->lsm_md_layout_version) {
                        CERROR("%s: "DFID" dir layout mismatch:\n",
                               ll_i2sbi(inode)->ll_fsname,
@@ -1501,15 +1503,6 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
        if (!lli->lli_lsm_md) {
                struct cl_attr  *attr;
 
        if (!lli->lli_lsm_md) {
                struct cl_attr  *attr;
 
-               if (lsm->lsm_md_magic == LMV_MAGIC_FOREIGN) {
-                       /* set md->lmv to NULL, so the following free lustre_md
-                        * will not free this lsm */
-                       md->lmv = NULL;
-                       lli->lli_lsm_md = lsm;
-                       up_write(&lli->lli_lsm_sem);
-                       RETURN(0);
-               }
-
                rc = ll_init_lsm_md(inode, md);
                up_write(&lli->lli_lsm_sem);
                if (rc != 0)
                rc = ll_init_lsm_md(inode, md);
                up_write(&lli->lli_lsm_sem);
                if (rc != 0)
@@ -1525,6 +1518,9 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
                 */
                down_read(&lli->lli_lsm_sem);
 
                 */
                down_read(&lli->lli_lsm_sem);
 
+               if (!lmv_dir_striped(lli->lli_lsm_md))
+                       GOTO(unlock, rc);
+
                OBD_ALLOC_PTR(attr);
                if (attr == NULL)
                        GOTO(unlock, rc = -ENOMEM);
                OBD_ALLOC_PTR(attr);
                if (attr == NULL)
                        GOTO(unlock, rc = -ENOMEM);
index 27e7bff..4734210 100644 (file)
@@ -230,6 +230,7 @@ int ll_dom_lock_cancel(struct inode *inode, struct ldlm_lock *lock)
 void ll_lock_cancel_bits(struct ldlm_lock *lock, __u64 to_cancel)
 {
        struct inode *inode = ll_inode_from_resource_lock(lock);
 void ll_lock_cancel_bits(struct ldlm_lock *lock, __u64 to_cancel)
 {
        struct inode *inode = ll_inode_from_resource_lock(lock);
+       struct ll_inode_info *lli;
        __u64 bits = to_cancel;
        int rc;
 
        __u64 bits = to_cancel;
        int rc;
 
@@ -317,15 +318,12 @@ void ll_lock_cancel_bits(struct ldlm_lock *lock, __u64 to_cancel)
                               PFID(ll_inode2fid(inode)), rc);
        }
 
                               PFID(ll_inode2fid(inode)), rc);
        }
 
-       if (bits & MDS_INODELOCK_UPDATE) {
-               struct ll_inode_info *lli = ll_i2info(inode);
+       lli = ll_i2info(inode);
 
 
+       if (bits & MDS_INODELOCK_UPDATE)
                lli->lli_update_atime = 1;
                lli->lli_update_atime = 1;
-       }
 
        if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) {
 
        if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) {
-               struct ll_inode_info *lli = ll_i2info(inode);
-
                CDEBUG(D_INODE, "invalidating inode "DFID" lli = %p, "
                       "pfid  = "DFID"\n", PFID(ll_inode2fid(inode)),
                       lli, PFID(&lli->lli_pfid));
                CDEBUG(D_INODE, "invalidating inode "DFID" lli = %p, "
                       "pfid  = "DFID"\n", PFID(ll_inode2fid(inode)),
                       lli, PFID(&lli->lli_pfid));
@@ -702,7 +700,7 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
                struct lu_fid   fid = ll_i2info(parent)->lli_fid;
 
                /* If it is striped directory, get the real stripe parent */
                struct lu_fid   fid = ll_i2info(parent)->lli_fid;
 
                /* If it is striped directory, get the real stripe parent */
-               if (unlikely(ll_i2info(parent)->lli_lsm_md != NULL)) {
+               if (unlikely(ll_dir_striped(parent))) {
                        rc = md_get_fid_from_lsm(ll_i2mdexp(parent),
                                                 ll_i2info(parent)->lli_lsm_md,
                                                 (*de)->d_name.name,
                        rc = md_get_fid_from_lsm(ll_i2mdexp(parent),
                                                 ll_i2info(parent)->lli_lsm_md,
                                                 (*de)->d_name.name,
index 75be491..2ee7aa9 100644 (file)
@@ -292,16 +292,15 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
        ENTRY;
 
        /* do not allow file creation in foreign dir */
        ENTRY;
 
        /* do not allow file creation in foreign dir */
-       if ((it->it_op & IT_CREAT) && op_data->op_mea1 != NULL &&
-           op_data->op_mea1->lsm_md_magic == LMV_MAGIC_FOREIGN)
+       if ((it->it_op & IT_CREAT) && lmv_dir_foreign(op_data->op_mea1))
                RETURN(-ENODATA);
 
        if ((it->it_op & IT_CREAT) && !(flags & MDS_OPEN_BY_FID)) {
                /* don't allow create under dir with bad hash */
                RETURN(-ENODATA);
 
        if ((it->it_op & IT_CREAT) && !(flags & MDS_OPEN_BY_FID)) {
                /* don't allow create under dir with bad hash */
-               if (lmv_is_dir_bad_hash(op_data->op_mea1))
+               if (lmv_dir_bad_hash(op_data->op_mea1))
                        RETURN(-EBADF);
 
                        RETURN(-EBADF);
 
-               if (lmv_is_dir_migrating(op_data->op_mea1)) {
+               if (lmv_dir_migrating(op_data->op_mea1)) {
                        if (flags & O_EXCL) {
                                /*
                                 * open(O_CREAT | O_EXCL) needs to check
                        if (flags & O_EXCL) {
                                /*
                                 * open(O_CREAT | O_EXCL) needs to check
@@ -310,8 +309,7 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
                                 * file under old layout, check old layout on
                                 * client side.
                                 */
                                 * file under old layout, check old layout on
                                 * client side.
                                 */
-                               tgt = lmv_locate_tgt(lmv, op_data,
-                                                    &op_data->op_fid1);
+                               tgt = lmv_locate_tgt(lmv, op_data);
                                if (IS_ERR(tgt))
                                        RETURN(PTR_ERR(tgt));
 
                                if (IS_ERR(tgt))
                                        RETURN(PTR_ERR(tgt));
 
@@ -345,7 +343,7 @@ retry:
                /* for striped directory, we can't know parent stripe fid
                 * without name, but we can set it to child fid, and MDT
                 * will obtain it from linkea in open in such case. */
                /* for striped directory, we can't know parent stripe fid
                 * without name, but we can set it to child fid, and MDT
                 * will obtain it from linkea in open in such case. */
-               if (op_data->op_mea1 != NULL)
+               if (lmv_dir_striped(op_data->op_mea1))
                        op_data->op_fid1 = op_data->op_fid2;
 
                tgt = lmv_find_target(lmv, &op_data->op_fid2);
                        op_data->op_fid1 = op_data->op_fid2;
 
                tgt = lmv_find_target(lmv, &op_data->op_fid2);
@@ -358,7 +356,7 @@ retry:
                LASSERT(fid_is_zero(&op_data->op_fid2));
                LASSERT(op_data->op_name != NULL);
 
                LASSERT(fid_is_zero(&op_data->op_fid2));
                LASSERT(op_data->op_name != NULL);
 
-               tgt = lmv_locate_tgt(lmv, op_data, &op_data->op_fid1);
+               tgt = lmv_locate_tgt(lmv, op_data);
                if (IS_ERR(tgt))
                        RETURN(PTR_ERR(tgt));
        }
                if (IS_ERR(tgt))
                        RETURN(PTR_ERR(tgt));
        }
@@ -443,8 +441,7 @@ lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
        ENTRY;
 
        /* foreign dir is not striped */
        ENTRY;
 
        /* foreign dir is not striped */
-       if (op_data->op_mea1 &&
-           op_data->op_mea1->lsm_md_magic == LMV_MAGIC_FOREIGN) {
+       if (lmv_dir_foreign(op_data->op_mea1)) {
                /* only allow getattr/lookup for itself */
                if (op_data->op_name != NULL)
                        RETURN(-ENODATA);
                /* only allow getattr/lookup for itself */
                if (op_data->op_name != NULL)
                        RETURN(-ENODATA);
@@ -452,7 +449,7 @@ lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
        }
 
 retry:
        }
 
 retry:
-       tgt = lmv_locate_tgt(lmv, op_data, &op_data->op_fid1);
+       tgt = lmv_locate_tgt(lmv, op_data);
        if (IS_ERR(tgt))
                RETURN(PTR_ERR(tgt));
 
        if (IS_ERR(tgt))
                RETURN(PTR_ERR(tgt));
 
@@ -475,7 +472,7 @@ retry:
        if (*reqp == NULL) {
                /* If RPC happens, lsm information will be revalidated
                 * during update_inode process (see ll_update_lsm_md) */
        if (*reqp == NULL) {
                /* If RPC happens, lsm information will be revalidated
                 * during update_inode process (see ll_update_lsm_md) */
-               if (op_data->op_mea2 != NULL) {
+               if (lmv_dir_striped(op_data->op_mea2)) {
                        rc = lmv_revalidate_slaves(exp, op_data->op_mea2,
                                                   cb_blocking,
                                                   extra_lock_flags);
                        rc = lmv_revalidate_slaves(exp, op_data->op_mea2,
                                                   cb_blocking,
                                                   extra_lock_flags);
index 44debf0..7017e94 100644 (file)
@@ -136,6 +136,8 @@ lsm_name_to_stripe_info(const struct lmv_stripe_md *lsm, const char *name,
        __u32 stripe_count = lsm->lsm_md_stripe_count;
        int stripe_index;
 
        __u32 stripe_count = lsm->lsm_md_stripe_count;
        int stripe_index;
 
+       LASSERT(lmv_dir_striped(lsm));
+
        if (hash_type & LMV_HASH_FLAG_MIGRATION) {
                if (post_migrate) {
                        hash_type &= ~LMV_HASH_FLAG_MIGRATION;
        if (hash_type & LMV_HASH_FLAG_MIGRATION) {
                if (post_migrate) {
                        hash_type &= ~LMV_HASH_FLAG_MIGRATION;
@@ -166,26 +168,6 @@ lsm_name_to_stripe_info(const struct lmv_stripe_md *lsm, const char *name,
        return &lsm->lsm_md_oinfo[stripe_index];
 }
 
        return &lsm->lsm_md_oinfo[stripe_index];
 }
 
-static inline bool lmv_is_dir_migrating(const struct lmv_stripe_md *lsm)
-{
-       return lsm ? lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION : false;
-}
-
-static inline bool lmv_is_dir_bad_hash(const struct lmv_stripe_md *lsm)
-{
-       if (!lsm)
-               return false;
-
-       if (lmv_is_dir_migrating(lsm)) {
-               if (lsm->lsm_md_stripe_count - lsm->lsm_md_migrate_offset > 1)
-                       return !lmv_is_known_hash_type(
-                                       lsm->lsm_md_migrate_hash);
-               return false;
-       }
-
-       return !lmv_is_known_hash_type(lsm->lsm_md_hash_type);
-}
-
 static inline bool lmv_dir_retry_check_update(struct md_op_data *op_data)
 {
        const struct lmv_stripe_md *lsm = op_data->op_mea1;
 static inline bool lmv_dir_retry_check_update(struct md_op_data *op_data)
 {
        const struct lmv_stripe_md *lsm = op_data->op_mea1;
@@ -193,12 +175,12 @@ static inline bool lmv_dir_retry_check_update(struct md_op_data *op_data)
        if (!lsm)
                return false;
 
        if (!lsm)
                return false;
 
-       if (lmv_is_dir_migrating(lsm) && !op_data->op_post_migrate) {
+       if (lmv_dir_migrating(lsm) && !op_data->op_post_migrate) {
                op_data->op_post_migrate = true;
                return true;
        }
 
                op_data->op_post_migrate = true;
                return true;
        }
 
-       if (lmv_is_dir_bad_hash(lsm) &&
+       if (lmv_dir_bad_hash(lsm) &&
            op_data->op_stripe_index < lsm->lsm_md_stripe_count - 1) {
                op_data->op_stripe_index++;
                return true;
            op_data->op_stripe_index < lsm->lsm_md_stripe_count - 1) {
                op_data->op_stripe_index++;
                return true;
@@ -208,8 +190,8 @@ static inline bool lmv_dir_retry_check_update(struct md_op_data *op_data)
 }
 
 struct lmv_tgt_desc *lmv_locate_tgt(struct lmv_obd *lmv,
 }
 
 struct lmv_tgt_desc *lmv_locate_tgt(struct lmv_obd *lmv,
-                                   struct md_op_data *op_data,
-                                   struct lu_fid *fid);
+                                   struct md_op_data *op_data);
+
 /* lproc_lmv.c */
 int lmv_tunables_init(struct obd_device *obd);
 
 /* lproc_lmv.c */
 int lmv_tunables_init(struct obd_device *obd);
 
index da0a710..440bceb 100644 (file)
@@ -1158,26 +1158,26 @@ hsm_req_err:
 /**
  * This is _inode_ placement policy function (not name).
  */
 /**
  * This is _inode_ placement policy function (not name).
  */
-static int lmv_placement_policy(struct obd_device *obd,
-                               struct md_op_data *op_data, u32 *mds)
+static u32 lmv_placement_policy(struct obd_device *obd,
+                               struct md_op_data *op_data)
 {
 {
-       struct lmv_obd     *lmv = &obd->u.lmv;
+       struct lmv_obd *lmv = &obd->u.lmv;
        struct lmv_user_md *lum;
        struct lmv_user_md *lum;
+       u32 mdt;
 
        ENTRY;
 
 
        ENTRY;
 
-       LASSERT(mds != NULL);
-
-       if (lmv->desc.ld_tgt_count == 1) {
-               *mds = 0;
+       if (lmv->desc.ld_tgt_count == 1)
                RETURN(0);
                RETURN(0);
-       }
 
        lum = op_data->op_data;
 
        lum = op_data->op_data;
-       /* Choose MDS by
+       /*
+        * Choose MDT by
         * 1. See if the stripe offset is specified by lum.
         * 1. See if the stripe offset is specified by lum.
-        * 2. Then check if there is default stripe offset.
-        * 3. Finally choose MDS by name hash if the parent
+        * 2. If parent has default LMV, and its hash type is "space", choose
+        *    MDT with QoS. (see lmv_locate_tgt_qos()).
+        * 3. Then check if default LMV stripe offset is not -1.
+        * 4. Finally choose MDS by name hash if the parent
         *    is striped directory. (see lmv_locate_tgt()).
         *
         * presently explicit MDT location is not supported
         *    is striped directory. (see lmv_locate_tgt()).
         *
         * presently explicit MDT location is not supported
@@ -1188,18 +1188,22 @@ static int lmv_placement_policy(struct obd_device *obd,
        if (op_data->op_cli_flags & CLI_SET_MEA && lum != NULL &&
            le32_to_cpu(lum->lum_magic != LMV_MAGIC_FOREIGN) &&
            le32_to_cpu(lum->lum_stripe_offset) != (__u32)-1) {
        if (op_data->op_cli_flags & CLI_SET_MEA && lum != NULL &&
            le32_to_cpu(lum->lum_magic != LMV_MAGIC_FOREIGN) &&
            le32_to_cpu(lum->lum_stripe_offset) != (__u32)-1) {
-               *mds = le32_to_cpu(lum->lum_stripe_offset);
+               mdt = le32_to_cpu(lum->lum_stripe_offset);
+       } else if (op_data->op_code == LUSTRE_OPC_MKDIR &&
+                  !lmv_dir_striped(op_data->op_mea1) &&
+                  lmv_dir_space_hashed(op_data->op_default_mea1)) {
+               mdt = op_data->op_mds;
        } else if (op_data->op_code == LUSTRE_OPC_MKDIR &&
                   op_data->op_default_mea1 &&
                   op_data->op_default_mea1->lsm_md_master_mdt_index !=
        } else if (op_data->op_code == LUSTRE_OPC_MKDIR &&
                   op_data->op_default_mea1 &&
                   op_data->op_default_mea1->lsm_md_master_mdt_index !=
-                        (__u32)-1) {
-               *mds = op_data->op_default_mea1->lsm_md_master_mdt_index;
-               op_data->op_mds = *mds;
+                       (__u32)-1) {
+               mdt = op_data->op_default_mea1->lsm_md_master_mdt_index;
+               op_data->op_mds = mdt;
        } else {
        } else {
-               *mds = op_data->op_mds;
+               mdt = op_data->op_mds;
        }
 
        }
 
-       RETURN(0);
+       RETURN(mdt);
 }
 
 int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds)
 }
 
 int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds)
@@ -1230,38 +1234,32 @@ int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds)
                rc = 0;
        }
 
                rc = 0;
        }
 
-        EXIT;
+       EXIT;
 out:
        mutex_unlock(&tgt->ltd_fid_mutex);
 out:
        mutex_unlock(&tgt->ltd_fid_mutex);
-        return rc;
+       return rc;
 }
 
 int lmv_fid_alloc(const struct lu_env *env, struct obd_export *exp,
                  struct lu_fid *fid, struct md_op_data *op_data)
 {
 }
 
 int lmv_fid_alloc(const struct lu_env *env, struct obd_export *exp,
                  struct lu_fid *fid, struct md_op_data *op_data)
 {
-        struct obd_device     *obd = class_exp2obd(exp);
-        struct lmv_obd        *lmv = &obd->u.lmv;
-       u32                    mds = 0;
-        int                    rc;
-        ENTRY;
+       struct obd_device *obd = class_exp2obd(exp);
+       struct lmv_obd *lmv = &obd->u.lmv;
+       u32 mds;
+       int rc;
 
 
-        LASSERT(op_data != NULL);
-        LASSERT(fid != NULL);
+       ENTRY;
 
 
-        rc = lmv_placement_policy(obd, op_data, &mds);
-        if (rc) {
-                CERROR("Can't get target for allocating fid, "
-                       "rc %d\n", rc);
-                RETURN(rc);
-        }
+       LASSERT(op_data != NULL);
+       LASSERT(fid != NULL);
 
 
-        rc = __lmv_fid_alloc(lmv, fid, mds);
-        if (rc) {
-                CERROR("Can't alloc new fid, rc %d\n", rc);
-                RETURN(rc);
-        }
+       mds = lmv_placement_policy(obd, op_data);
 
 
-        RETURN(rc);
+       rc = __lmv_fid_alloc(lmv, fid, mds);
+       if (rc)
+               CERROR("Can't alloc new fid, rc %d\n", rc);
+
+       RETURN(rc);
 }
 
 static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 }
 
 static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
@@ -1615,20 +1613,30 @@ static int lmv_close(struct obd_export *exp, struct md_op_data *op_data,
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
-struct lmv_tgt_desc*
-__lmv_locate_tgt(struct lmv_obd *lmv, struct lmv_stripe_md *lsm,
-                const char *name, int namelen, struct lu_fid *fid, u32 *mds,
-                bool post_migrate)
+static struct lmv_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt)
+{
+       static unsigned int rr_index;
+
+       /* locate MDT round-robin is the first step */
+       *mdt = rr_index % lmv->tgts_size;
+       rr_index++;
+
+       return lmv->tgts[*mdt];
+}
+
+static struct lmv_tgt_desc *
+lmv_locate_tgt_by_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm,
+                      const char *name, int namelen, struct lu_fid *fid,
+                      __u32 *mds, bool post_migrate)
 {
        struct lmv_tgt_desc *tgt;
        const struct lmv_oinfo *oinfo;
 
 {
        struct lmv_tgt_desc *tgt;
        const struct lmv_oinfo *oinfo;
 
-       if (lsm == NULL || namelen == 0) {
+       if (!lmv_dir_striped(lsm) || !namelen) {
                tgt = lmv_find_target(lmv, fid);
                if (IS_ERR(tgt))
                        return tgt;
 
                tgt = lmv_find_target(lmv, fid);
                if (IS_ERR(tgt))
                        return tgt;
 
-               LASSERT(mds);
                *mds = tgt->ltd_idx;
                return tgt;
        }
                *mds = tgt->ltd_idx;
                return tgt;
        }
@@ -1644,48 +1652,41 @@ __lmv_locate_tgt(struct lmv_obd *lmv, struct lmv_stripe_md *lsm,
                        return ERR_CAST(oinfo);
        }
 
                        return ERR_CAST(oinfo);
        }
 
-       if (fid != NULL)
-               *fid = oinfo->lmo_fid;
-       if (mds != NULL)
-               *mds = oinfo->lmo_mds;
-
+       *fid = oinfo->lmo_fid;
+       *mds = oinfo->lmo_mds;
        tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL);
 
        tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL);
 
-       CDEBUG(D_INFO, "locate on mds %u "DFID"\n", oinfo->lmo_mds,
-              PFID(&oinfo->lmo_fid));
+       CDEBUG(D_INODE, "locate MDT %u parent "DFID"\n", *mds, PFID(fid));
 
        return tgt;
 }
 
 
        return tgt;
 }
 
-
 /**
 /**
- * Locate mdt by fid or name
+ * Locate MDT of op_data->op_fid1
  *
  * For striped directory, it will locate the stripe by name hash, if hash_type
  * is unknown, it will return the stripe specified by 'op_data->op_stripe_index'
  * which is set outside, and if dir is migrating, 'op_data->op_post_migrate'
  * indicates whether old or new layout is used to locate.
  *
  *
  * For striped directory, it will locate the stripe by name hash, if hash_type
  * is unknown, it will return the stripe specified by 'op_data->op_stripe_index'
  * which is set outside, and if dir is migrating, 'op_data->op_post_migrate'
  * indicates whether old or new layout is used to locate.
  *
- * For normal direcotry, it will locate MDS by FID directly.
+ * For plain direcotry, normally it will locate MDT by FID, but if this
+ * directory has default LMV, and its hash type is "space", locate MDT with QoS.
  *
  * \param[in] lmv      LMV device
  * \param[in] op_data  client MD stack parameters, name, namelen
  *                      mds_num etc.
  *
  * \param[in] lmv      LMV device
  * \param[in] op_data  client MD stack parameters, name, namelen
  *                      mds_num etc.
- * \param[in] fid      object FID used to locate MDS.
  *
  * retval              pointer to the lmv_tgt_desc if succeed.
  *                      ERR_PTR(errno) if failed.
  */
  *
  * retval              pointer to the lmv_tgt_desc if succeed.
  *                      ERR_PTR(errno) if failed.
  */
-struct lmv_tgt_desc*
-lmv_locate_tgt(struct lmv_obd *lmv, struct md_op_data *op_data,
-              struct lu_fid *fid)
+struct lmv_tgt_desc *
+lmv_locate_tgt(struct lmv_obd *lmv, struct md_op_data *op_data)
 {
        struct lmv_stripe_md *lsm = op_data->op_mea1;
        struct lmv_oinfo *oinfo;
        struct lmv_tgt_desc *tgt;
 
 {
        struct lmv_stripe_md *lsm = op_data->op_mea1;
        struct lmv_oinfo *oinfo;
        struct lmv_tgt_desc *tgt;
 
-       /* foreign dir is not striped dir */
-       if (lsm && lsm->lsm_md_magic == LMV_MAGIC_FOREIGN)
+       if (lmv_dir_foreign(lsm))
                return ERR_PTR(-ENODATA);
 
        /* During creating VOLATILE file, it should honor the mdt
                return ERR_PTR(-ENODATA);
 
        /* During creating VOLATILE file, it should honor the mdt
@@ -1697,62 +1698,122 @@ lmv_locate_tgt(struct lmv_obd *lmv, struct md_op_data *op_data,
                if (IS_ERR(tgt))
                        return tgt;
 
                if (IS_ERR(tgt))
                        return tgt;
 
-               if (lsm) {
+               if (lmv_dir_striped(lsm)) {
                        int i;
 
                        /* refill the right parent fid */
                        for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
                                oinfo = &lsm->lsm_md_oinfo[i];
                                if (oinfo->lmo_mds == op_data->op_mds) {
                        int i;
 
                        /* refill the right parent fid */
                        for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
                                oinfo = &lsm->lsm_md_oinfo[i];
                                if (oinfo->lmo_mds == op_data->op_mds) {
-                                       *fid = oinfo->lmo_fid;
+                                       op_data->op_fid1 = oinfo->lmo_fid;
                                        break;
                                }
                        }
 
                        if (i == lsm->lsm_md_stripe_count)
                                        break;
                                }
                        }
 
                        if (i == lsm->lsm_md_stripe_count)
-                               *fid = lsm->lsm_md_oinfo[0].lmo_fid;
+                               op_data->op_fid1 = lsm->lsm_md_oinfo[0].lmo_fid;
                }
                }
-       } else if (lmv_is_dir_bad_hash(lsm)) {
+       } else if (lmv_dir_bad_hash(lsm)) {
                LASSERT(op_data->op_stripe_index < lsm->lsm_md_stripe_count);
                oinfo = &lsm->lsm_md_oinfo[op_data->op_stripe_index];
 
                LASSERT(op_data->op_stripe_index < lsm->lsm_md_stripe_count);
                oinfo = &lsm->lsm_md_oinfo[op_data->op_stripe_index];
 
-               *fid = oinfo->lmo_fid;
+               op_data->op_fid1 = oinfo->lmo_fid;
                op_data->op_mds = oinfo->lmo_mds;
                tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL);
                op_data->op_mds = oinfo->lmo_mds;
                tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL);
+       } else if (op_data->op_code == LUSTRE_OPC_MKDIR &&
+                  lmv_dir_space_hashed(op_data->op_default_mea1) &&
+                  !lmv_dir_striped(lsm)) {
+               tgt = lmv_locate_tgt_qos(lmv, &op_data->op_mds);
+               /*
+                * only update statfs when mkdir under dir with "space" hash,
+                * this means the cached statfs may be stale, and current mkdir
+                * may not follow QoS accurately, but it's not serious, and it
+                * avoids periodic statfs when client doesn't mkdir under
+                * "space" hashed directories.
+                */
+               if (!IS_ERR(tgt)) {
+                       struct obd_device *obd;
+
+                       obd = container_of(lmv, struct obd_device, u.lmv);
+                       lmv_statfs_check_update(obd, tgt);
+               }
        } else {
        } else {
-               tgt = __lmv_locate_tgt(lmv, lsm, op_data->op_name,
-                                      op_data->op_namelen, fid,
-                                      &op_data->op_mds,
-                                      op_data->op_post_migrate);
+               tgt = lmv_locate_tgt_by_name(lmv, op_data->op_mea1,
+                               op_data->op_name, op_data->op_namelen,
+                               &op_data->op_fid1, &op_data->op_mds,
+                               op_data->op_post_migrate);
        }
 
        return tgt;
 }
 
        }
 
        return tgt;
 }
 
+/* Locate MDT of op_data->op_fid2 for link/rename */
+static struct lmv_tgt_desc *
+lmv_locate_tgt2(struct lmv_obd *lmv, struct md_op_data *op_data)
+{
+       struct lmv_tgt_desc *tgt;
+       int rc;
+
+       LASSERT(op_data->op_name);
+       if (lmv_dir_migrating(op_data->op_mea2)) {
+               struct lu_fid fid1 = op_data->op_fid1;
+               struct lmv_stripe_md *lsm1 = op_data->op_mea1;
+               struct ptlrpc_request *request = NULL;
+
+               /*
+                * avoid creating new file under old layout of migrating
+                * directory, check it here.
+                */
+               tgt = lmv_locate_tgt_by_name(lmv, op_data->op_mea2,
+                               op_data->op_name, op_data->op_namelen,
+                               &op_data->op_fid2, &op_data->op_mds, false);
+               if (IS_ERR(tgt))
+                       RETURN(tgt);
+
+               op_data->op_fid1 = op_data->op_fid2;
+               op_data->op_mea1 = op_data->op_mea2;
+               rc = md_getattr_name(tgt->ltd_exp, op_data, &request);
+               op_data->op_fid1 = fid1;
+               op_data->op_mea1 = lsm1;
+               if (!rc) {
+                       ptlrpc_req_finished(request);
+                       RETURN(ERR_PTR(-EEXIST));
+               }
+
+               if (rc != -ENOENT)
+                       RETURN(ERR_PTR(rc));
+       }
+
+       return lmv_locate_tgt_by_name(lmv, op_data->op_mea2, op_data->op_name,
+                               op_data->op_namelen, &op_data->op_fid2,
+                               &op_data->op_mds, true);
+}
+
 int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
                const void *data, size_t datalen, umode_t mode, uid_t uid,
                gid_t gid, cfs_cap_t cap_effective, __u64 rdev,
                struct ptlrpc_request **request)
 {
 int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
                const void *data, size_t datalen, umode_t mode, uid_t uid,
                gid_t gid, cfs_cap_t cap_effective, __u64 rdev,
                struct ptlrpc_request **request)
 {
-       struct obd_device       *obd = exp->exp_obd;
-       struct lmv_obd          *lmv = &obd->u.lmv;
-       struct lmv_tgt_desc     *tgt;
-       int                      rc;
+       struct obd_device *obd = exp->exp_obd;
+       struct lmv_obd *lmv = &obd->u.lmv;
+       struct lmv_tgt_desc *tgt;
+       int rc;
+
        ENTRY;
 
        if (!lmv->desc.ld_active_tgt_count)
                RETURN(-EIO);
 
        ENTRY;
 
        if (!lmv->desc.ld_active_tgt_count)
                RETURN(-EIO);
 
-       if (lmv_is_dir_bad_hash(op_data->op_mea1))
+       if (lmv_dir_bad_hash(op_data->op_mea1))
                RETURN(-EBADF);
 
                RETURN(-EBADF);
 
-       if (lmv_is_dir_migrating(op_data->op_mea1)) {
+       if (lmv_dir_migrating(op_data->op_mea1)) {
                /*
                 * if parent is migrating, create() needs to lookup existing
                 * name, to avoid creating new file under old layout of
                 * migrating directory, check old layout here.
                 */
                /*
                 * if parent is migrating, create() needs to lookup existing
                 * name, to avoid creating new file under old layout of
                 * migrating directory, check old layout here.
                 */
-               tgt = lmv_locate_tgt(lmv, op_data, &op_data->op_fid1);
+               tgt = lmv_locate_tgt(lmv, op_data);
                if (IS_ERR(tgt))
                        RETURN(PTR_ERR(tgt));
 
                if (IS_ERR(tgt))
                        RETURN(PTR_ERR(tgt));
 
@@ -1769,7 +1830,7 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
                op_data->op_post_migrate = true;
        }
 
                op_data->op_post_migrate = true;
        }
 
-       tgt = lmv_locate_tgt(lmv, op_data, &op_data->op_fid1);
+       tgt = lmv_locate_tgt(lmv, op_data);
        if (IS_ERR(tgt))
                RETURN(PTR_ERR(tgt));
 
        if (IS_ERR(tgt))
                RETURN(PTR_ERR(tgt));
 
@@ -1789,8 +1850,6 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
                        RETURN(PTR_ERR(tgt));
 
                op_data->op_mds = tgt->ltd_idx;
                        RETURN(PTR_ERR(tgt));
 
                op_data->op_mds = tgt->ltd_idx;
-       } else {
-               CDEBUG(D_CONFIG, "Server doesn't support striped dirs\n");
        }
 
        CDEBUG(D_INODE, "CREATE obj "DFID" -> mds #%x\n",
        }
 
        CDEBUG(D_INODE, "CREATE obj "DFID" -> mds #%x\n",
@@ -1846,7 +1905,7 @@ lmv_getattr_name(struct obd_export *exp,struct md_op_data *op_data,
        ENTRY;
 
 retry:
        ENTRY;
 
 retry:
-       tgt = lmv_locate_tgt(lmv, op_data, &op_data->op_fid1);
+       tgt = lmv_locate_tgt(lmv, op_data);
        if (IS_ERR(tgt))
                RETURN(PTR_ERR(tgt));
 
        if (IS_ERR(tgt))
                RETURN(PTR_ERR(tgt));
 
@@ -1947,39 +2006,7 @@ static int lmv_link(struct obd_export *exp, struct md_op_data *op_data,
        op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
        op_data->op_cap = cfs_curproc_cap_pack();
 
        op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
        op_data->op_cap = cfs_curproc_cap_pack();
 
-       if (lmv_is_dir_migrating(op_data->op_mea2)) {
-               struct lu_fid fid1 = op_data->op_fid1;
-               struct lmv_stripe_md *lsm1 = op_data->op_mea1;
-
-               /*
-                * avoid creating new file under old layout of migrating
-                * directory, check it here.
-                */
-               tgt = __lmv_locate_tgt(lmv, op_data->op_mea2, op_data->op_name,
-                                      op_data->op_namelen, &op_data->op_fid2,
-                                      &op_data->op_mds, false);
-               tgt = lmv_locate_tgt(lmv, op_data, &op_data->op_fid1);
-               if (IS_ERR(tgt))
-                       RETURN(PTR_ERR(tgt));
-
-               op_data->op_fid1 = op_data->op_fid2;
-               op_data->op_mea1 = op_data->op_mea2;
-               rc = md_getattr_name(tgt->ltd_exp, op_data, request);
-               op_data->op_fid1 = fid1;
-               op_data->op_mea1 = lsm1;
-               if (!rc) {
-                       ptlrpc_req_finished(*request);
-                       *request = NULL;
-                       RETURN(-EEXIST);
-               }
-
-               if (rc != -ENOENT)
-                       RETURN(rc);
-       }
-
-       tgt = __lmv_locate_tgt(lmv, op_data->op_mea2, op_data->op_name,
-                              op_data->op_namelen, &op_data->op_fid2,
-                              &op_data->op_mds, true);
+       tgt = lmv_locate_tgt2(lmv, op_data);
        if (IS_ERR(tgt))
                RETURN(PTR_ERR(tgt));
 
        if (IS_ERR(tgt))
                RETURN(PTR_ERR(tgt));
 
@@ -2027,7 +2054,7 @@ static int lmv_migrate(struct obd_export *exp, struct md_op_data *op_data,
        if (IS_ERR(parent_tgt))
                RETURN(PTR_ERR(parent_tgt));
 
        if (IS_ERR(parent_tgt))
                RETURN(PTR_ERR(parent_tgt));
 
-       if (lsm) {
+       if (lmv_dir_striped(lsm)) {
                __u32 hash_type = lsm->lsm_md_hash_type;
                __u32 stripe_count = lsm->lsm_md_stripe_count;
 
                __u32 hash_type = lsm->lsm_md_hash_type;
                __u32 stripe_count = lsm->lsm_md_stripe_count;
 
@@ -2035,7 +2062,7 @@ static int lmv_migrate(struct obd_export *exp, struct md_op_data *op_data,
                 * old stripes are appended after new stripes for migrating
                 * directory.
                 */
                 * old stripes are appended after new stripes for migrating
                 * directory.
                 */
-               if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION) {
+               if (lmv_dir_migrating(lsm)) {
                        hash_type = lsm->lsm_md_migrate_hash;
                        stripe_count -= lsm->lsm_md_migrate_offset;
                }
                        hash_type = lsm->lsm_md_migrate_hash;
                        stripe_count -= lsm->lsm_md_migrate_offset;
                }
@@ -2045,7 +2072,7 @@ static int lmv_migrate(struct obd_export *exp, struct md_op_data *op_data,
                if (rc < 0)
                        RETURN(rc);
 
                if (rc < 0)
                        RETURN(rc);
 
-               if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION)
+               if (lmv_dir_migrating(lsm))
                        rc += lsm->lsm_md_migrate_offset;
 
                /* save it in fid4 temporarily for early cancel */
                        rc += lsm->lsm_md_migrate_offset;
 
                /* save it in fid4 temporarily for early cancel */
@@ -2059,7 +2086,7 @@ static int lmv_migrate(struct obd_export *exp, struct md_op_data *op_data,
                 * if parent is being migrated too, fill op_fid2 with target
                 * stripe fid, otherwise the target stripe is not created yet.
                 */
                 * if parent is being migrated too, fill op_fid2 with target
                 * stripe fid, otherwise the target stripe is not created yet.
                 */
-               if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION) {
+               if (lmv_dir_migrating(lsm)) {
                        hash_type = lsm->lsm_md_hash_type &
                                    ~LMV_HASH_FLAG_MIGRATION;
                        stripe_count = lsm->lsm_md_migrate_offset;
                        hash_type = lsm->lsm_md_hash_type &
                                    ~LMV_HASH_FLAG_MIGRATION;
                        stripe_count = lsm->lsm_md_migrate_offset;
@@ -2188,44 +2215,10 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
        op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
        op_data->op_cap = cfs_curproc_cap_pack();
 
        op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
        op_data->op_cap = cfs_curproc_cap_pack();
 
-       if (lmv_is_dir_migrating(op_data->op_mea2)) {
-               struct lu_fid fid1 = op_data->op_fid1;
-               struct lmv_stripe_md *lsm1 = op_data->op_mea1;
+       op_data->op_name = new;
+       op_data->op_namelen = newlen;
 
 
-               /*
-                * we avoid creating new file under old layout of migrating
-                * directory, if there is an existing file with new name under
-                * old layout, we can't unlink file in old layout and rename to
-                * new layout in one transaction, so return -EBUSY here.`
-                */
-               tgt = __lmv_locate_tgt(lmv, op_data->op_mea2, new, newlen,
-                                      &op_data->op_fid2, &op_data->op_mds,
-                                      false);
-               if (IS_ERR(tgt))
-                       RETURN(PTR_ERR(tgt));
-
-               op_data->op_fid1 = op_data->op_fid2;
-               op_data->op_mea1 = op_data->op_mea2;
-               op_data->op_name = new;
-               op_data->op_namelen = newlen;
-               rc = md_getattr_name(tgt->ltd_exp, op_data, request);
-               op_data->op_fid1 = fid1;
-               op_data->op_mea1 = lsm1;
-               op_data->op_name = NULL;
-               op_data->op_namelen = 0;
-               if (!rc) {
-                       ptlrpc_req_finished(*request);
-                       *request = NULL;
-                       RETURN(-EBUSY);
-               }
-
-               if (rc != -ENOENT)
-                       RETURN(rc);
-       }
-
-       /* rename to new layout for migrating directory */
-       tp_tgt = __lmv_locate_tgt(lmv, op_data->op_mea2, new, newlen,
-                                 &op_data->op_fid2, &op_data->op_mds, true);
+       tp_tgt = lmv_locate_tgt2(lmv, op_data);
        if (IS_ERR(tp_tgt))
                RETURN(PTR_ERR(tp_tgt));
 
        if (IS_ERR(tp_tgt))
                RETURN(PTR_ERR(tp_tgt));
 
@@ -2275,10 +2268,10 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
                        RETURN(rc);
        }
 
                        RETURN(rc);
        }
 
+       op_data->op_name = old;
+       op_data->op_namelen = oldlen;
 retry:
 retry:
-       sp_tgt = __lmv_locate_tgt(lmv, op_data->op_mea1, old, oldlen,
-                                 &op_data->op_fid1, &op_data->op_mds,
-                                 op_data->op_post_migrate);
+       sp_tgt = lmv_locate_tgt(lmv, op_data);
        if (IS_ERR(sp_tgt))
                RETURN(PTR_ERR(sp_tgt));
 
        if (IS_ERR(sp_tgt))
                RETURN(PTR_ERR(sp_tgt));
 
@@ -2748,18 +2741,17 @@ int lmv_read_page(struct obd_export *exp, struct md_op_data *op_data,
                  struct md_callback *cb_op, __u64 offset,
                  struct page **ppage)
 {
                  struct md_callback *cb_op, __u64 offset,
                  struct page **ppage)
 {
-       struct obd_device       *obd = exp->exp_obd;
-       struct lmv_obd          *lmv = &obd->u.lmv;
-       struct lmv_stripe_md    *lsm = op_data->op_mea1;
-       struct lmv_tgt_desc     *tgt;
-       int                     rc;
+       struct obd_device *obd = exp->exp_obd;
+       struct lmv_obd *lmv = &obd->u.lmv;
+       struct lmv_tgt_desc *tgt;
+       int rc;
+
        ENTRY;
 
        ENTRY;
 
-       if (unlikely(lsm != NULL)) {
-               /* foreign dir is not striped dir */
-               if (lsm->lsm_md_magic == LMV_MAGIC_FOREIGN)
-                       return -ENODATA;
+       if (unlikely(lmv_dir_foreign(op_data->op_mea1)))
+               RETURN(-ENODATA);
 
 
+       if (unlikely(lmv_dir_striped(op_data->op_mea1))) {
                rc = lmv_striped_read_page(exp, op_data, cb_op, offset, ppage);
                RETURN(rc);
        }
                rc = lmv_striped_read_page(exp, op_data, cb_op, offset, ppage);
                RETURN(rc);
        }
@@ -2814,7 +2806,7 @@ static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data,
        op_data->op_cap = cfs_curproc_cap_pack();
 
 retry:
        op_data->op_cap = cfs_curproc_cap_pack();
 
 retry:
-       parent_tgt = lmv_locate_tgt(lmv, op_data, &op_data->op_fid1);
+       parent_tgt = lmv_locate_tgt(lmv, op_data);
        if (IS_ERR(parent_tgt))
                RETURN(PTR_ERR(parent_tgt));
 
        if (IS_ERR(parent_tgt))
                RETURN(PTR_ERR(parent_tgt));
 
@@ -3110,7 +3102,7 @@ static int lmv_unpackmd(struct obd_export *exp, struct lmv_stripe_md **lsmp,
                        RETURN(0);
                }
 
                        RETURN(0);
                }
 
-               if (lsm->lsm_md_magic == LMV_MAGIC) {
+               if (lmv_dir_striped(lsm)) {
                        for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
                                if (lsm->lsm_md_oinfo[i].lmo_root)
                                        iput(lsm->lsm_md_oinfo[i].lmo_root);
                        for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
                                if (lsm->lsm_md_oinfo[i].lmo_root)
                                        iput(lsm->lsm_md_oinfo[i].lmo_root);
@@ -3402,7 +3394,8 @@ int lmv_get_fid_from_lsm(struct obd_export *exp,
 {
        const struct lmv_oinfo *oinfo;
 
 {
        const struct lmv_oinfo *oinfo;
 
-       LASSERT(lsm != NULL);
+       LASSERT(lmv_dir_striped(lsm));
+
        oinfo = lsm_name_to_stripe_info(lsm, name, namelen, false);
        if (IS_ERR(oinfo))
                return PTR_ERR(oinfo);
        oinfo = lsm_name_to_stripe_info(lsm, name, namelen, false);
        if (IS_ERR(oinfo))
                return PTR_ERR(oinfo);
@@ -3473,8 +3466,7 @@ static int lmv_merge_attr(struct obd_export *exp,
        int rc;
        int i;
 
        int rc;
        int i;
 
-       /* foreign dir is not striped dir */
-       if (lsm->lsm_md_magic == LMV_MAGIC_FOREIGN)
+       if (!lmv_dir_striped(lsm))
                return 0;
 
        rc = lmv_revalidate_slaves(exp, lsm, cb_blocking, 0);
                return 0;
 
        rc = lmv_revalidate_slaves(exp, lsm, cb_blocking, 0);
index 46fc4c0..a9f5ffe 100644 (file)
@@ -327,8 +327,9 @@ struct lod_object {
                                        /* foreign directory */
                                        ldo_dir_is_foreign;
                        /*
                                        /* foreign directory */
                                        ldo_dir_is_foreign;
                        /*
-                        * default striping is not cached, so this field is
-                        * invalid after create, make sure it's used by
+                        * This default LMV is parent default LMV, which will be
+                        * used in child creation, and it's not cached, so this
+                        * field is invalid after create, make sure it's used by
                         * lod_dir_striping_create_internal() only.
                         */
                        struct lod_default_striping     *ldo_def_striping;
                         * lod_dir_striping_create_internal() only.
                         */
                        struct lod_default_striping     *ldo_def_striping;
index c139e49..c711e39 100644 (file)
@@ -1526,7 +1526,7 @@ int lod_striping_load(const struct lu_env *env, struct lod_object *lo)
                        }
                }
 
                        }
                }
 
-               if (rc < (typeof(rc))sizeof(struct lmv_mds_md_v1)) {
+               if (rc < (int)sizeof(struct lmv_mds_md_v1)) {
                        /* Let's set stripe_loaded to avoid further
                         * stripe loading especially for non-stripe directory,
                         * which can hurt performance. (See LU-9840)
                        /* Let's set stripe_loaded to avoid further
                         * stripe loading especially for non-stripe directory,
                         * which can hurt performance. (See LU-9840)
index 0304a05..cfa8d03 100644 (file)
@@ -3672,8 +3672,9 @@ static int lod_xattr_set_default_lmv_on_dir(const struct lu_env *env,
                                            const char *name, int fl,
                                            struct thandle *th)
 {
                                            const char *name, int fl,
                                            struct thandle *th)
 {
-       struct lmv_user_md_v1   *lum;
-       int                      rc;
+       struct lmv_user_md_v1 *lum;
+       int rc;
+
        ENTRY;
 
        LASSERT(buf != NULL && buf->lb_buf != NULL);
        ENTRY;
 
        LASSERT(buf != NULL && buf->lb_buf != NULL);
@@ -4558,13 +4559,12 @@ static int lod_get_default_lov_striping(const struct lu_env *env,
        struct lov_user_md_v1 *v1 = NULL;
        struct lov_user_md_v3 *v3 = NULL;
        struct lov_comp_md_v1 *comp_v1 = NULL;
        struct lov_user_md_v1 *v1 = NULL;
        struct lov_user_md_v3 *v3 = NULL;
        struct lov_comp_md_v1 *comp_v1 = NULL;
-       __u16   comp_cnt;
-       __u16   mirror_cnt;
-       bool    composite;
-       int     rc, i, j;
-       ENTRY;
+       __u16 comp_cnt;
+       __u16 mirror_cnt;
+       bool composite;
+       int rc, i, j;
 
 
-       lds->lds_def_striping_set = 0;
+       ENTRY;
 
        rc = lod_get_lov_ea(env, lo);
        if (rc < 0)
 
        rc = lod_get_lov_ea(env, lo);
        if (rc < 0)
@@ -4695,27 +4695,30 @@ static int lod_get_default_lmv_striping(const struct lu_env *env,
                                        struct lod_object *lo,
                                        struct lod_default_striping *lds)
 {
                                        struct lod_object *lo,
                                        struct lod_default_striping *lds)
 {
-       struct lod_thread_info  *info = lod_env_info(env);
-       struct lmv_user_md_v1   *v1 = NULL;
-       int                      rc;
-       ENTRY;
+       struct lmv_user_md *lmu;
+       int rc;
 
        lds->lds_dir_def_striping_set = 0;
 
        lds->lds_dir_def_striping_set = 0;
+
        rc = lod_get_default_lmv_ea(env, lo);
        if (rc < 0)
        rc = lod_get_default_lmv_ea(env, lo);
        if (rc < 0)
-               RETURN(rc);
+               return rc;
 
 
-       if (rc < (typeof(rc))sizeof(struct lmv_user_md))
-               RETURN(0);
+       if (rc >= (int)sizeof(*lmu)) {
+               struct lod_thread_info *info = lod_env_info(env);
 
 
-       v1 = info->lti_ea_store;
+               lmu = info->lti_ea_store;
 
 
-       lds->lds_dir_def_stripe_count = le32_to_cpu(v1->lum_stripe_count);
-       lds->lds_dir_def_stripe_offset = le32_to_cpu(v1->lum_stripe_offset);
-       lds->lds_dir_def_hash_type = le32_to_cpu(v1->lum_hash_type);
-       lds->lds_dir_def_striping_set = 1;
+               lds->lds_dir_def_stripe_count =
+                               le32_to_cpu(lmu->lum_stripe_count);
+               lds->lds_dir_def_stripe_offset =
+                               le32_to_cpu(lmu->lum_stripe_offset);
+               lds->lds_dir_def_hash_type =
+                               le32_to_cpu(lmu->lum_hash_type);
+               lds->lds_dir_def_striping_set = 1;
+       }
 
 
-       RETURN(0);
+       return 0;
 }
 
 /**
 }
 
 /**
@@ -5238,6 +5241,36 @@ out:
        RETURN(rc);
 }
 
        RETURN(rc);
 }
 
+static inline int dt_object_space_hashed(const struct lu_env *env,
+                                        struct lu_device *dev,
+                                        struct dt_object *dt)
+{
+       struct lu_object *obj;
+       struct lod_object *lo;
+       struct lmv_user_md *lmu;
+       int rc = 0;
+
+       obj = lu_object_find_slice(env, dev, lu_object_fid(&dt->do_lu), NULL);
+       if (IS_ERR(obj))
+               return PTR_ERR(obj);
+
+       lo = lu2lod_obj(obj);
+
+       rc = lod_get_default_lmv_ea(env, lo);
+       if (rc < 0)
+               return rc;
+
+       if (rc >= (int)sizeof(*lmu)) {
+               struct lod_thread_info *info = lod_env_info(env);
+
+               lmu = info->lti_ea_store;
+               rc = le32_to_cpu(lmu->lum_hash_type) == LMV_HASH_TYPE_SPACE;
+       }
+       dt_object_put(env, dt);
+
+       return rc;
+}
+
 /**
  * Implementation of dt_object_operations::do_declare_create.
  *
 /**
  * Implementation of dt_object_operations::do_declare_create.
  *
@@ -5302,10 +5335,18 @@ static int lod_declare_create(const struct lu_env *env, struct dt_object *dt,
                                GOTO(out, rc = -EREMOTE);
 
                        if (lo->ldo_dir_stripe_offset == -1) {
                                GOTO(out, rc = -EREMOTE);
 
                        if (lo->ldo_dir_stripe_offset == -1) {
-                               /* child and parent should be in the same MDT */
-                               if (hint->dah_parent != NULL &&
-                                   dt_object_remote(hint->dah_parent))
-                                       GOTO(out, rc = -EREMOTE);
+                               /*
+                                * child and parent should be in the same MDT,
+                                * but if parent has plain layout, it's allowed.
+                                */
+                               if (hint->dah_parent &&
+                                   dt_object_remote(hint->dah_parent)) {
+                                       rc = dt_object_space_hashed(env,
+                                                      lo->ldo_obj.do_lu.lo_dev,
+                                                      hint->dah_parent);
+                                       if (rc <= 0)
+                                               GOTO(out, rc ? rc : -EREMOTE);
+                               }
                        } else if (lo->ldo_dir_stripe_offset !=
                                   ss->ss_node_id) {
                                struct lod_device *lod;
                        } else if (lo->ldo_dir_stripe_offset !=
                                   ss->ss_node_id) {
                                struct lod_device *lod;
index 551c370..df65484 100644 (file)
@@ -4501,7 +4501,7 @@ static int mdd_migrate(const struct lu_env *env, struct md_object *md_pobj,
        if (rc)
                GOTO(out, rc);
 
        if (rc)
                GOTO(out, rc);
 
-       mdd_object_make_hint(env, NULL, tobj, attr, spec, hint);
+       mdd_object_make_hint(env, tpobj, tobj, attr, spec, hint);
 
        handle = mdd_trans_create(env, mdd);
        if (IS_ERR(handle))
 
        handle = mdd_trans_create(env, mdd);
        if (IS_ERR(handle))