Whamcloud - gitweb
LU-15971 llite: implicit default LMV inherit 89/47789/19
authorLai Siyao <lai.siyao@whamcloud.com>
Sun, 5 Mar 2023 13:43:08 +0000 (08:43 -0500)
committerOleg Drokin <green@whamcloud.com>
Fri, 9 Jun 2023 05:24:32 +0000 (05:24 +0000)
With implicit default LMV inherit, the inherited default LMV is
not stored on disk, but maintained on client side.

Benefits:
* change of directory default LMV will be populated to all sub levels
  at runtime.
* default LMV will be packed into mkdir request, therefore MDT doesn't
  need to read it from disk, as will improve mkdir performance.

Caveats:
* to disable inherited default LMV on subdir, a default LMV need to be
  set on this subdir explicitly like this:
        "lfs setdirstripe -D -i <subdir_mdt_index> --max-inherit 0"

Changes on client side:
* update inherited default LMV after lookup/open/revalidate.
* pack default LMV in mkdir request.
* add "--raw" option for "lfs getdirstripe -D" to print default LMV
  stored in inode, if directory doesn't have default LMV, or its
  default LMV is implicitly inherited, nothing will be printed.

Changes on MDT side:
* use the default LMV from client in lod_ah_init() to mkdir.
* don't save inherited default LMV in mkdir.

Add sanityn 114.

Test-Parameters: clientversion=2.14 testlist=sanity mdtcount=4 mdscount=2 env=SANITY_EXCEPT="39l 39r 134b 150b 160a 205a 208 220 230e 230p 300g 807"
Test-Parameters: serverversion=2.14 testlist=sanity mdtcount=4 mdscount=2 env=SANITY_EXCEPT="27Cg 39r 65n 413a 413b 905"
Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: Iae109a0ef35a273175c70dd0b394e721a5ce0c45
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/47789
Tested-by: Maloo <maloo@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
28 files changed:
lustre/include/dt_object.h
lustre/include/lustre_lmv.h
lustre/include/md_object.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/llite/dcache.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/namei.c
lustre/llite/statahead.c
lustre/lod/lod_object.c
lustre/mdc/mdc_lib.c
lustre/mdd/mdd_dir.c
lustre/mdd/mdd_object.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_lib.c
lustre/mdt/mdt_lproc.c
lustre/mdt/mdt_reint.c
lustre/obdclass/lprocfs_status.c
lustre/ptlrpc/wiretest.c
lustre/tests/sanityn.sh
lustre/utils/lfs.c
lustre/utils/liblustreapi.c
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index c4e2d62..1484a85 100644 (file)
@@ -409,7 +409,11 @@ struct dt_allocation_hint {
        int                     dah_eadata_len;
        int                     dah_append_stripe_count;
        int                     dah_acl_len;
-       bool                    dah_can_block;
+       unsigned int            dah_can_block:1,
+                               /* implicit default LMV inherit is enabled? */
+                               dah_dmv_imp_inherit:1,
+                               /* eadata is default LMV sent from client  */
+                               dah_eadata_is_dmv:1;
 };
 
 /**
index f29dc50..96ae7f6 100644 (file)
@@ -81,6 +81,33 @@ static inline bool lmv_dir_bad_hash(const struct lmv_stripe_md *lsm)
        return !lmv_is_known_hash_type(lsm->lsm_md_hash_type);
 }
 
+static inline __u8 lmv_inherit_next(__u8 inherit)
+{
+       if (inherit == LMV_INHERIT_END || inherit == LMV_INHERIT_NONE)
+               return LMV_INHERIT_NONE;
+
+       if (inherit == LMV_INHERIT_UNLIMITED || inherit > LMV_INHERIT_MAX)
+               return inherit;
+
+       return inherit - 1;
+}
+
+static inline __u8 lmv_inherit_rr_next(__u8 inherit_rr)
+{
+       if (inherit_rr == LMV_INHERIT_RR_NONE ||
+           inherit_rr == LMV_INHERIT_RR_UNLIMITED ||
+           inherit_rr > LMV_INHERIT_RR_MAX)
+               return inherit_rr;
+
+       return inherit_rr - 1;
+}
+
+static inline bool lmv_is_inheritable(__u8 inherit)
+{
+       return inherit == LMV_INHERIT_UNLIMITED ||
+              (inherit > LMV_INHERIT_END && inherit <= LMV_INHERIT_MAX);
+}
+
 static inline bool
 lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
 {
@@ -146,6 +173,22 @@ static inline void lsm_md_dump(int mask, const struct lmv_stripe_md *lsm)
                       i, PFID(&lsm->lsm_md_oinfo[i].lmo_fid));
 }
 
+static inline bool
+lsm_md_inherited(const struct lmv_stripe_md *plsm,
+                const struct lmv_stripe_md *clsm)
+{
+       return plsm && clsm &&
+              plsm->lsm_md_magic == clsm->lsm_md_magic &&
+              plsm->lsm_md_stripe_count == clsm->lsm_md_stripe_count &&
+              plsm->lsm_md_master_mdt_index ==
+                       clsm->lsm_md_master_mdt_index &&
+              plsm->lsm_md_hash_type == clsm->lsm_md_hash_type &&
+              lmv_inherit_next(plsm->lsm_md_max_inherit) ==
+                       clsm->lsm_md_max_inherit &&
+              lmv_inherit_rr_next(plsm->lsm_md_max_inherit_rr) ==
+                       clsm->lsm_md_max_inherit_rr;
+}
+
 union lmv_mds_md;
 
 void lmv_free_memmd(struct lmv_stripe_md *lsm);
@@ -523,25 +566,4 @@ static inline bool lmv_is_fixed(const struct lmv_mds_md_v1 *lmv)
        return cpu_to_le32(lmv->lmv_hash_type) & LMV_HASH_FLAG_FIXED;
 }
 
-static inline __u8 lmv_inherit_next(__u8 inherit)
-{
-       if (inherit == LMV_INHERIT_END || inherit == LMV_INHERIT_NONE)
-               return LMV_INHERIT_NONE;
-
-       if (inherit == LMV_INHERIT_UNLIMITED || inherit > LMV_INHERIT_MAX)
-               return inherit;
-
-       return inherit - 1;
-}
-
-static inline __u8 lmv_inherit_rr_next(__u8 inherit_rr)
-{
-       if (inherit_rr == LMV_INHERIT_RR_NONE ||
-           inherit_rr == LMV_INHERIT_RR_UNLIMITED ||
-           inherit_rr > LMV_INHERIT_RR_MAX)
-               return inherit_rr;
-
-       return inherit_rr - 1;
-}
-
 #endif
index 0481ca9..020cba8 100644 (file)
@@ -172,7 +172,8 @@ struct md_op_spec {
                     sp_rm_entry:1,  /* only remove name entry */
                     sp_permitted:1, /* do not check permission */
                     sp_migrate_close:1, /* close the file during migrate */
-                    sp_migrate_nsonly:1; /* migrate dirent only */
+                    sp_migrate_nsonly:1, /* migrate dirent only */
+                    sp_dmv_imp_inherit:1; /* implicit default LMV inherit */
 
        /** to create directory */
        const struct dt_index_features *sp_feat;
index 0ace06f..8cbe132 100644 (file)
@@ -913,7 +913,8 @@ struct ptlrpc_body_v2 {
                                OBD_CONNECT2_ATOMIC_OPEN_LOCK | \
                                OBD_CONNECT2_BATCH_RPC | \
                                OBD_CONNECT2_ENCRYPT_NAME | \
-                               OBD_CONNECT2_ENCRYPT_FID2PATH)
+                               OBD_CONNECT2_ENCRYPT_FID2PATH | \
+                               OBD_CONNECT2_DMV_IMP_INHERIT)
 
 #define OST_CONNECT_SUPPORTED  (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
                                OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
@@ -1976,6 +1977,8 @@ enum mds_op_bias {
        MDS_FID_OP              = 1 << 22,
        /* migrate dirent only */
        MDS_MIGRATE_NSONLY      = 1 << 23,
+       /* create with default LMV from client */
+       MDS_CREATE_DEFAULT_LMV  = 1 << 24,
 };
 
 #define MDS_CLOSE_INTENT (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP |         \
index 25a3642..3b954e3 100644 (file)
@@ -1209,6 +1209,8 @@ static inline __u32 lmv_foreign_to_md_stripes(__u32 size)
  */
 enum lmv_type {
        LMV_TYPE_DEFAULT = 0x0000,
+       /* fetch raw default LMV set on directory inode */
+       LMV_TYPE_RAW     = 0x0001,
 };
 
 /* lum_max_inherit will be decreased by 1 after each inheritance if it's not
@@ -1630,7 +1632,9 @@ enum la_valid {
 #define MDS_OPEN_PCC      010000000000000ULL /* PCC: auto RW-PCC cache attach
                                              * for newly created file */
 #define MDS_OP_WITH_FID   020000000000000ULL /* operation carried out by FID */
-#define MDS_OPEN_DEFAULT_LMV  040000000000000ULL /* open fetches default LMV */
+#define MDS_OPEN_DEFAULT_LMV  040000000000000ULL /* open fetches default LMV,
+                                                 * or mkdir with default LMV
+                                                 */
 
 /* lustre internal open flags, which should not be set from user space */
 #define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS |    \
index 495092f..7746563 100644 (file)
@@ -274,10 +274,11 @@ int ll_revalidate_it_finish(struct ptlrpc_request *request,
        ll_set_lock_data(ll_i2sbi(inode)->ll_md_exp, inode, it,
                         &bits);
        if (bits & MDS_INODELOCK_LOOKUP) {
-               ll_update_dir_depth(de->d_parent->d_inode, inode);
                if (!ll_d_setup(de, true))
                        RETURN(-ENOMEM);
                d_lustre_revalidate(de);
+               if (S_ISDIR(inode->i_mode))
+                       ll_update_dir_depth_dmv(de->d_parent->d_inode, de);
        }
 
        RETURN(rc);
@@ -318,8 +319,11 @@ static int ll_revalidate_dentry(struct dentry *dentry,
        /* If this is intermediate component path lookup and we were able to get
         * to this dentry, then its lock has not been revoked and the
         * path component is valid. */
-       if (lookup_flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))
+       if (lookup_flags & (LOOKUP_CONTINUE | LOOKUP_PARENT)) {
+               if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
+                       ll_update_dir_depth_dmv(dir, dentry);
                return 1;
+       }
 
        /* Symlink - always valid as long as the dentry was found */
        /* only special case is to prevent ELOOP error from VFS during open
@@ -350,6 +354,9 @@ static int ll_revalidate_dentry(struct dentry *dentry,
        if (dentry_may_statahead(dir, dentry))
                ll_revalidate_statahead(dir, &dentry, dentry->d_inode == NULL);
 
+       if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
+               ll_update_dir_depth_dmv(dir, dentry);
+
        return 1;
 }
 
index 1377697..235d25f 100644 (file)
@@ -691,6 +691,64 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
        RETURN(rc);
 }
 
+/* get default LMV from client cache */
+static int ll_dir_get_default_lmv(struct inode *inode, struct lmv_user_md *lum)
+{
+       struct ll_inode_info *lli = ll_i2info(inode);
+       const struct lmv_stripe_md *lsm;
+       bool fs_dmv_got = false;
+       int rc = -ENODATA;
+
+       ENTRY;
+retry:
+       if (lli->lli_default_lsm_md) {
+               down_read(&lli->lli_lsm_sem);
+               lsm = lli->lli_default_lsm_md;
+               if (lsm) {
+                       lum->lum_magic = lsm->lsm_md_magic;
+                       lum->lum_stripe_count = lsm->lsm_md_stripe_count;
+                       lum->lum_stripe_offset = lsm->lsm_md_master_mdt_index;
+                       lum->lum_hash_type = lsm->lsm_md_hash_type;
+                       lum->lum_max_inherit = lsm->lsm_md_max_inherit;
+                       lum->lum_max_inherit_rr = lsm->lsm_md_max_inherit_rr;
+                       rc = 0;
+               }
+               up_read(&lli->lli_lsm_sem);
+       }
+
+       if (rc == -ENODATA && !is_root_inode(inode) && !fs_dmv_got) {
+               lli = ll_i2info(inode->i_sb->s_root->d_inode);
+               fs_dmv_got = true;
+               goto retry;
+       }
+
+       if (!rc && fs_dmv_got) {
+               lli = ll_i2info(inode);
+               if (lum->lum_max_inherit != LMV_INHERIT_UNLIMITED) {
+                       if (lum->lum_max_inherit == LMV_INHERIT_NONE ||
+                           lum->lum_max_inherit < LMV_INHERIT_END ||
+                           lum->lum_max_inherit > LMV_INHERIT_MAX ||
+                           lum->lum_max_inherit <= lli->lli_dir_depth)
+                               GOTO(out, rc = -ENODATA);
+
+                       lum->lum_max_inherit -= lli->lli_dir_depth;
+               }
+
+               if (lum->lum_max_inherit_rr != LMV_INHERIT_RR_UNLIMITED) {
+                       if (lum->lum_max_inherit_rr == LMV_INHERIT_NONE ||
+                           lum->lum_max_inherit_rr < LMV_INHERIT_RR_END ||
+                           lum->lum_max_inherit_rr > LMV_INHERIT_RR_MAX ||
+                           lum->lum_max_inherit_rr <= lli->lli_dir_depth)
+                               lum->lum_max_inherit_rr = LMV_INHERIT_RR_NONE;
+
+                       if (lum->lum_max_inherit_rr > lli->lli_dir_depth)
+                               lum->lum_max_inherit_rr -= lli->lli_dir_depth;
+               }
+       }
+out:
+       RETURN(rc);
+}
+
 int ll_dir_get_default_layout(struct inode *inode, void **plmm, int *plmm_size,
                              struct ptlrpc_request **request, u64 valid,
                              enum get_default_layout_type type)
@@ -1658,7 +1716,6 @@ out:
                struct lmv_user_md __user *ulmv = uarg;
                struct lmv_user_md lum;
                struct ptlrpc_request *request = NULL;
-               struct ptlrpc_request *root_request = NULL;
                union lmv_mds_md *lmm = NULL;
                int lmmsize;
                u64 valid = 0;
@@ -1673,6 +1730,19 @@ out:
                if (copy_from_user(&lum, ulmv, sizeof(*ulmv)))
                        RETURN(-EFAULT);
 
+               /* get default LMV */
+               if (lum.lum_magic == LMV_USER_MAGIC &&
+                   lum.lum_type != LMV_TYPE_RAW) {
+                       rc = ll_dir_get_default_lmv(inode, &lum);
+                       if (rc)
+                               RETURN(rc);
+
+                       if (copy_to_user(ulmv, &lum, sizeof(lum)))
+                               RETURN(-EFAULT);
+
+                       RETURN(0);
+               }
+
                max_stripe_count = lum.lum_stripe_count;
                /* lum_magic will indicate which stripe the ioctl will like
                 * to get, LMV_MAGIC_V1 is for normal LMV stripe, LMV_USER_MAGIC
@@ -1685,61 +1755,14 @@ out:
                        RETURN(-EINVAL);
 
                rc = ll_dir_getstripe_default(inode, (void **)&lmm, &lmmsize,
-                                             &request, &root_request, valid);
+                                             &request, NULL, valid);
                if (rc != 0)
                        GOTO(finish_req, rc);
 
-               /* Get default LMV EA */
+               /* get default LMV in raw mode */
                if (lum.lum_magic == LMV_USER_MAGIC) {
-                       struct lmv_user_md *lum;
-                       struct ll_inode_info *lli;
-
-                       if (lmmsize > sizeof(*ulmv))
-                               GOTO(finish_req, rc = -EINVAL);
-
-                       lum = (struct lmv_user_md *)lmm;
-                       if (lum->lum_max_inherit == LMV_INHERIT_NONE)
-                               GOTO(finish_req, rc = -ENODATA);
-
-                       if (root_request != NULL) {
-                               lli = ll_i2info(inode);
-                               if (lum->lum_max_inherit !=
-                                   LMV_INHERIT_UNLIMITED) {
-                                       if (lum->lum_max_inherit <
-                                               LMV_INHERIT_END ||
-                                           lum->lum_max_inherit >
-                                               LMV_INHERIT_MAX ||
-                                           lum->lum_max_inherit <=
-                                               lli->lli_dir_depth)
-                                               GOTO(finish_req, rc = -ENODATA);
-
-                                       lum->lum_max_inherit -=
-                                               lli->lli_dir_depth;
-                               }
-
-                               if (lum->lum_max_inherit_rr !=
-                                       LMV_INHERIT_RR_UNLIMITED) {
-                                       if (lum->lum_max_inherit_rr ==
-                                               LMV_INHERIT_NONE ||
-                                           lum->lum_max_inherit_rr <
-                                               LMV_INHERIT_RR_END ||
-                                           lum->lum_max_inherit_rr >
-                                               LMV_INHERIT_RR_MAX ||
-                                           lum->lum_max_inherit_rr <=
-                                               lli->lli_dir_depth) {
-                                               lum->lum_max_inherit_rr =
-                                                       LMV_INHERIT_RR_NONE;
-                                               goto out_copy;
-                                       }
-
-                                       lum->lum_max_inherit_rr -=
-                                               lli->lli_dir_depth;
-                               }
-                       }
-out_copy:
                        if (copy_to_user(ulmv, lmm, lmmsize))
                                GOTO(finish_req, rc = -EFAULT);
-
                        GOTO(finish_req, rc);
                }
 
@@ -1818,7 +1841,6 @@ out_tmp:
                OBD_FREE(tmp, lum_size);
 finish_req:
                ptlrpc_req_finished(request);
-               ptlrpc_req_finished(root_request);
                return rc;
        }
        case LL_IOC_REMOVE_ENTRY: {
index e3f0ce5..8ac0027 100644 (file)
@@ -708,10 +708,8 @@ retry:
                 * of kernel will deal with that later.
                 */
                ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, &bits);
-               if (bits & MDS_INODELOCK_LOOKUP) {
+               if (bits & MDS_INODELOCK_LOOKUP)
                        d_lustre_revalidate(de);
-                       ll_update_dir_depth(parent->d_inode, de->d_inode);
-               }
 
                /* if DoM bit returned along with LAYOUT bit then there
                 * can be read-on-open data returned.
@@ -719,6 +717,11 @@ retry:
                if (bits & MDS_INODELOCK_DOM && bits & MDS_INODELOCK_LAYOUT)
                        ll_dom_finish_open(de->d_inode, req);
        }
+       /* open may not fetch LOOKUP lock, update dir depth and default LMV
+        * anyway.
+        */
+       if (!rc && S_ISDIR(de->d_inode->i_mode))
+               ll_update_dir_depth_dmv(parent->d_inode, de);
 
 out:
        ptlrpc_req_finished(req);
index a709728..9b98b94 100644 (file)
@@ -187,7 +187,12 @@ struct ll_inode_info {
                         * set upon dir open, and cleared when dir is closed,
                         * statahead hit ratio is too low, or start statahead
                         * thread failed. */
-                       unsigned short                  lli_sa_enabled:1;
+                       unsigned short                  lli_sa_enabled:1,
+                       /* default LMV is explicitly set in inode on MDT, this
+                        * is for old server, or default LMV is set by
+                        * "lfs setdirstripe -D".
+                        */
+                                                       lli_default_lmv_set:1;
                        /* generation for statahead */
                        unsigned int                    lli_sa_generation;
                        /* rw lock protects lli_lsm_md */
@@ -1315,7 +1320,7 @@ int ll_statfs_internal(struct ll_sb_info *sbi, struct obd_statfs *osfs,
                       u32 flags);
 int ll_update_inode(struct inode *inode, struct lustre_md *md);
 void ll_update_inode_flags(struct inode *inode, unsigned int ext_flags);
-void ll_update_dir_depth(struct inode *dir, struct inode *inode);
+void ll_update_dir_depth_dmv(struct inode *dir, struct dentry *de);
 int ll_read_inode2(struct inode *inode, void *opaque);
 void ll_truncate_inode_pages_final(struct inode *inode, struct cl_io *io);
 void ll_delete_inode(struct inode *inode);
index fab2ee7..652b215 100644 (file)
@@ -351,7 +351,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
                                   OBD_CONNECT2_DOM_LVB |
                                   OBD_CONNECT2_REP_MBITS |
                                   OBD_CONNECT2_ATOMIC_OPEN_LOCK |
-                                  OBD_CONNECT2_BATCH_RPC;
+                                  OBD_CONNECT2_BATCH_RPC |
+                                  OBD_CONNECT2_DMV_IMP_INHERIT;
 
 #ifdef HAVE_LRU_RESIZE_SUPPORT
        if (test_bit(LL_SBI_LRU_RESIZE, sbi->ll_flags))
@@ -1724,13 +1725,15 @@ static void ll_update_default_lsm_md(struct inode *inode, struct lustre_md *md)
 
        if (!md->default_lmv) {
                /* clear default lsm */
-               if (lli->lli_default_lsm_md) {
+               if (lli->lli_default_lsm_md && lli->lli_default_lmv_set) {
                        down_write(&lli->lli_lsm_sem);
-                       if (lli->lli_default_lsm_md) {
+                       if (lli->lli_default_lsm_md &&
+                           lli->lli_default_lmv_set) {
                                lmv_free_memmd(lli->lli_default_lsm_md);
                                lli->lli_default_lsm_md = NULL;
+                               lli->lli_inherit_depth = 0;
+                               lli->lli_default_lmv_set = 0;
                        }
-                       lli->lli_inherit_depth = 0;
                        up_write(&lli->lli_lsm_sem);
                }
                RETURN_EXIT;
@@ -1751,6 +1754,7 @@ static void ll_update_default_lsm_md(struct inode *inode, struct lustre_md *md)
        if (lli->lli_default_lsm_md)
                lmv_free_memmd(lli->lli_default_lsm_md);
        lli->lli_default_lsm_md = md->default_lmv;
+       lli->lli_default_lmv_set = 1;
        lsm_md_dump(D_INODE, md->default_lmv);
        md->default_lmv = NULL;
        up_write(&lli->lli_lsm_sem);
@@ -2846,38 +2850,146 @@ static inline bool ll_default_lmv_inherited(struct lmv_stripe_md *pdmv,
        return true;
 }
 
-/* update directory depth to ROOT, called after LOOKUP lock is fetched. */
-void ll_update_dir_depth(struct inode *dir, struct inode *inode)
+/* if default LMV is implicitly inherited, subdir default LMV is maintained on
+ * client side.
+ */
+int ll_dir_default_lmv_inherit(struct inode *dir, struct inode *inode)
 {
+       struct ll_inode_info *plli = ll_i2info(dir);
+       struct ll_inode_info *lli = ll_i2info(inode);
+       struct lmv_stripe_md *plsm;
+       struct lmv_stripe_md *lsm;
+       int rc = 0;
+
+       ENTRY;
+
+       /* ROOT default LMV is not inherited */
+       if (is_root_inode(dir) ||
+           !(exp_connect_flags2(ll_i2mdexp(dir)) &
+                                OBD_CONNECT2_DMV_IMP_INHERIT))
+               RETURN(0);
+
+       /* nothing to do if no default LMV on both */
+       if (!plli->lli_default_lsm_md && !lli->lli_default_lsm_md)
+               RETURN(0);
+
+       /* subdir default LMV comes from disk */
+       if (lli->lli_default_lsm_md && lli->lli_default_lmv_set)
+               RETURN(0);
+
+       /* delete subdir default LMV if parent's is deleted or becomes
+        * uninheritable.
+        */
+       down_read(&plli->lli_lsm_sem);
+       plsm = plli->lli_default_lsm_md;
+       if (!plsm || !lmv_is_inheritable(plsm->lsm_md_max_inherit)) {
+               if (lli->lli_default_lsm_md && !lli->lli_default_lmv_set) {
+                       down_write(&lli->lli_lsm_sem);
+                       if (lli->lli_default_lsm_md &&
+                           !lli->lli_default_lmv_set) {
+                               lmv_free_memmd(lli->lli_default_lsm_md);
+                               lli->lli_default_lsm_md = NULL;
+                               lli->lli_inherit_depth = 0;
+                       }
+                       up_write(&lli->lli_lsm_sem);
+               }
+               GOTO(unlock_parent, rc = 0);
+       }
+
+       /* do nothing if inherited LMV is unchanged */
+       if (lli->lli_default_lsm_md) {
+               rc = 1;
+               down_read(&lli->lli_lsm_sem);
+               if (!lli->lli_default_lmv_set)
+                       rc = lsm_md_inherited(plsm, lli->lli_default_lsm_md);
+               up_read(&lli->lli_lsm_sem);
+               if (rc == 1)
+                       GOTO(unlock_parent, rc = 0);
+       }
+
+       /* inherit default LMV */
+       down_write(&lli->lli_lsm_sem);
+       if (lli->lli_default_lsm_md) {
+               /* checked above, but in case of race, check again with lock */
+               if (lli->lli_default_lmv_set)
+                       GOTO(unlock_child, rc = 0);
+               /* always update subdir default LMV in case parent's changed */
+               lsm = lli->lli_default_lsm_md;
+       } else {
+               OBD_ALLOC_PTR(lsm);
+               if (!lsm)
+                       GOTO(unlock_child, rc = -ENOMEM);
+               lli->lli_default_lsm_md = lsm;
+       }
+
+       *lsm = *plsm;
+       lsm->lsm_md_max_inherit = lmv_inherit_next(plsm->lsm_md_max_inherit);
+       lsm->lsm_md_max_inherit_rr =
+                       lmv_inherit_rr_next(plsm->lsm_md_max_inherit_rr);
+       lli->lli_inherit_depth = plli->lli_inherit_depth + 1;
+
+       lsm_md_dump(D_INODE, lsm);
+
+       EXIT;
+unlock_child:
+       up_write(&lli->lli_lsm_sem);
+unlock_parent:
+       up_read(&plli->lli_lsm_sem);
+
+       return rc;
+}
+
+/**
+ * Update directory depth and default LMV
+ *
+ * Update directory depth to ROOT and inherit default LMV from parent if
+ * parent's default LMV is inheritable. The default LMV set with command
+ * "lfs setdirstripe -D ..." is stored on MDT, while the inherited default LMV
+ * is generated at runtime on client side.
+ *
+ * \param[in]  dir     parent directory inode
+ * \param[in]  de      dentry
+ */
+void ll_update_dir_depth_dmv(struct inode *dir, struct dentry *de)
+{
+       struct inode *inode = de->d_inode;
        struct ll_inode_info *plli;
        struct ll_inode_info *lli;
 
-       if (!S_ISDIR(inode->i_mode))
-               return;
-
+       LASSERT(S_ISDIR(inode->i_mode));
        if (inode == dir)
                return;
 
        plli = ll_i2info(dir);
        lli = ll_i2info(inode);
        lli->lli_dir_depth = plli->lli_dir_depth + 1;
-       if (plli->lli_default_lsm_md && lli->lli_default_lsm_md) {
-               down_read(&plli->lli_lsm_sem);
-               down_read(&lli->lli_lsm_sem);
-               if (ll_default_lmv_inherited(plli->lli_default_lsm_md,
+       if (lli->lli_default_lsm_md && lli->lli_default_lmv_set) {
+               if (plli->lli_default_lsm_md) {
+                       down_read(&plli->lli_lsm_sem);
+                       down_read(&lli->lli_lsm_sem);
+                       if (lsm_md_inherited(plli->lli_default_lsm_md,
                                             lli->lli_default_lsm_md))
-                       lli->lli_inherit_depth =
-                               plli->lli_inherit_depth + 1;
-               else
+                               lli->lli_inherit_depth =
+                                       plli->lli_inherit_depth + 1;
+                       else
+                               /* in case parent default LMV changed */
+                               lli->lli_inherit_depth = 0;
+                       up_read(&lli->lli_lsm_sem);
+                       up_read(&plli->lli_lsm_sem);
+               } else {
+                       /* in case parent default LMV deleted */
                        lli->lli_inherit_depth = 0;
-               up_read(&lli->lli_lsm_sem);
-               up_read(&plli->lli_lsm_sem);
+               }
        } else {
-               lli->lli_inherit_depth = 0;
+               ll_dir_default_lmv_inherit(dir, inode);
        }
 
-       CDEBUG(D_INODE, DFID" depth %hu default LMV depth %hu\n",
-              PFID(&lli->lli_fid), lli->lli_dir_depth, lli->lli_inherit_depth);
+       if (lli->lli_default_lsm_md)
+               CDEBUG(D_INODE,
+                      "%s "DFID" depth %hu %s default LMV inherit depth %hu\n",
+                      de->d_name.name, PFID(&lli->lli_fid), lli->lli_dir_depth,
+                      lli->lli_default_lmv_set ? "server" : "client",
+                      lli->lli_inherit_depth);
 }
 
 void ll_truncate_inode_pages_final(struct inode *inode, struct cl_io *io)
index 15f6cea..bfa031b 100644 (file)
@@ -750,10 +750,13 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
 
        if (!it_disposition(it, DISP_LOOKUP_NEG)) {
                /* We have the "lookup" lock, so unhide dentry */
-               if (bits & MDS_INODELOCK_LOOKUP) {
+               if (bits & MDS_INODELOCK_LOOKUP)
                        d_lustre_revalidate(*de);
-                       ll_update_dir_depth(parent, (*de)->d_inode);
-               }
+               /* open may not fetch LOOKUP lock, update dir depth/dmv anyway
+                * in case it's used uninitialized.
+                */
+               if (S_ISDIR(inode->i_mode))
+                       ll_update_dir_depth_dmv(parent, *de);
 
                if (encrypt) {
                        rc = llcrypt_prepare_readdir(inode);
@@ -1449,7 +1452,8 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry,
        ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, inode, it, &bits);
        if (bits & MDS_INODELOCK_LOOKUP) {
                d_lustre_revalidate(dentry);
-               ll_update_dir_depth(dir, inode);
+               if (S_ISDIR(inode->i_mode))
+                       ll_update_dir_depth_dmv(dir, dentry);
        }
 
        RETURN(0);
@@ -1543,6 +1547,9 @@ static int ll_new_node(struct inode *dir, struct dentry *dchild,
        struct ll_sb_info *sbi = ll_i2sbi(dir);
        struct llcrypt_str *disk_link = NULL;
        bool encrypt = false;
+       struct lmv_user_md *lum = NULL;
+       const void *data = NULL;
+       size_t datalen = 0;
        int err;
 
        ENTRY;
@@ -1551,6 +1558,8 @@ static int ll_new_node(struct inode *dir, struct dentry *dchild,
                rdev = 0;
                if (!disk_link)
                        RETURN(-EINVAL);
+               data = disk_link->name;
+               datalen = disk_link->len;
        }
 
 again:
@@ -1559,8 +1568,36 @@ again:
        if (IS_ERR(op_data))
                GOTO(err_exit, err = PTR_ERR(op_data));
 
-       if (S_ISDIR(mode))
+       if (S_ISDIR(mode)) {
                ll_qos_mkdir_prep(op_data, dir);
+               if ((exp_connect_flags2(ll_i2mdexp(dir)) &
+                    OBD_CONNECT2_DMV_IMP_INHERIT) &&
+                   op_data->op_default_mea1 && !lum) {
+                       const struct lmv_stripe_md *lsm;
+
+                       /* once DMV_IMP_INHERIT is set, pack default LMV in
+                        * create request.
+                        */
+                       OBD_ALLOC_PTR(lum);
+                       if (!lum)
+                               GOTO(err_exit, err = -ENOMEM);
+
+                       lsm = op_data->op_default_mea1;
+                       lum->lum_magic = cpu_to_le32(lsm->lsm_md_magic);
+                       lum->lum_stripe_count =
+                               cpu_to_le32(lsm->lsm_md_stripe_count);
+                       lum->lum_stripe_offset =
+                               cpu_to_le32(lsm->lsm_md_master_mdt_index);
+                       lum->lum_hash_type =
+                               cpu_to_le32(lsm->lsm_md_hash_type);
+                       lum->lum_max_inherit = lsm->lsm_md_max_inherit;
+                       lum->lum_max_inherit_rr = lsm->lsm_md_max_inherit_rr;
+                       lum->lum_pool_name[0] = 0;
+                       op_data->op_bias |= MDS_CREATE_DEFAULT_LMV;
+                       data = lum;
+                       datalen = sizeof(*lum);
+               }
+       }
 
        if (test_bit(LL_SBI_FILE_SECCTX, sbi->ll_flags)) {
                err = ll_dentry_init_security(dchild, mode, &dchild->d_name,
@@ -1621,11 +1658,13 @@ again:
                        dchild->d_sb->s_op->destroy_inode(fakeinode);
                        if (err)
                                GOTO(err_exit, err);
+
+                       data = disk_link->name;
+                       datalen = disk_link->len;
                }
        }
 
-       err = md_create(sbi->ll_md_exp, op_data, tgt ? disk_link->name : NULL,
-                       tgt ? disk_link->len : 0, mode,
+       err = md_create(sbi->ll_md_exp, op_data, data, datalen, mode,
                        from_kuid(&init_user_ns, current_fsuid()),
                        from_kgid(&init_user_ns, current_fsgid()),
                        current_cap(), rdev, &request);
@@ -1750,9 +1789,10 @@ again:
 err_exit:
        if (request != NULL)
                ptlrpc_req_finished(request);
-
        if (!IS_ERR_OR_NULL(op_data))
                ll_finish_md_op_data(op_data);
+       if (lum)
+               OBD_FREE_PTR(lum);
 
        RETURN(err);
 }
index b546b68..6d056fc 100644 (file)
@@ -1625,10 +1625,10 @@ static int revalidate_statahead_dentry(struct inode *dir,
                                GOTO(out, rc = -ESTALE);
                        }
 
-                       if ((bits & MDS_INODELOCK_LOOKUP) &&
-                           d_lustre_invalid(*dentryp)) {
+                       if (bits & MDS_INODELOCK_LOOKUP) {
                                d_lustre_revalidate(*dentryp);
-                               ll_update_dir_depth(dir, (*dentryp)->d_inode);
+                               if (S_ISDIR(inode->i_mode))
+                                       ll_update_dir_depth_dmv(dir, *dentryp);
                        }
 
                        ll_intent_release(&it);
index d069041..bf1982d 100644 (file)
@@ -5412,6 +5412,17 @@ static int lod_get_default_lov_striping(const struct lu_env *env,
        RETURN(rc);
 }
 
+static inline void lod_lum2lds(struct lod_default_striping *lds,
+                              const struct lmv_user_md *lum)
+{
+       lds->lds_dir_def_stripe_count = le32_to_cpu(lum->lum_stripe_count);
+       lds->lds_dir_def_stripe_offset = le32_to_cpu(lum->lum_stripe_offset);
+       lds->lds_dir_def_hash_type = le32_to_cpu(lum->lum_hash_type);
+       lds->lds_dir_def_max_inherit = lum->lum_max_inherit;
+       lds->lds_dir_def_max_inherit_rr = lum->lum_max_inherit_rr;
+       lds->lds_dir_def_striping_set = 1;
+}
+
 /**
  * Get default directory striping.
  *
@@ -5439,16 +5450,7 @@ static int lod_get_default_lmv_striping(const struct lu_env *env,
                struct lod_thread_info *info = lod_env_info(env);
 
                lmu = info->lti_ea_store;
-
-               lds->lds_dir_def_stripe_count =
-                               le32_to_cpu(lmu->lum_stripe_count);
-               lds->lds_dir_def_stripe_offset =
-                               le32_to_cpu(lmu->lum_stripe_offset);
-               lds->lds_dir_def_hash_type =
-                               le32_to_cpu(lmu->lum_hash_type);
-               lds->lds_dir_def_max_inherit = lmu->lum_max_inherit;
-               lds->lds_dir_def_max_inherit_rr = lmu->lum_max_inherit_rr;
-               lds->lds_dir_def_striping_set = 1;
+               lod_lum2lds(lds, lmu);
        }
 
        return 0;
@@ -5468,6 +5470,7 @@ static int lod_get_default_lmv_striping(const struct lu_env *env,
  */
 static int lod_get_default_striping(const struct lu_env *env,
                                    struct lod_object *lo,
+                                   struct dt_allocation_hint *ah,
                                    struct lod_default_striping *lds)
 {
        int rc, rc1;
@@ -5482,9 +5485,15 @@ static int lod_get_default_striping(const struct lu_env *env,
                        lds->lds_def_striping_set = 0;
        }
 
-       rc1 = lod_get_default_lmv_striping(env, lo, lds);
-       if (rc == 0 && rc1 < 0)
-               rc = rc1;
+       if (ah->dah_eadata_is_dmv) {
+               lod_lum2lds(lds, ah->dah_eadata);
+       } else if (ah->dah_dmv_imp_inherit) {
+               lds->lds_dir_def_striping_set = 0;
+       } else {
+               rc1 = lod_get_default_lmv_striping(env, lo, lds);
+               if (rc == 0 && rc1 < 0)
+                       rc = rc1;
+       }
 
        return rc;
 }
@@ -5683,14 +5692,15 @@ static void lod_ah_init(const struct lu_env *env,
                }
 
                if (likely(lp != NULL))
-                       lod_get_default_striping(env, lp, lds);
+                       lod_get_default_striping(env, lp, ah, lds);
 
                /* It should always honour the specified stripes */
                /* Note: old client (< 2.7)might also do lfs mkdir, whose EA
                 * will have old magic. In this case, we should ignore the
                 * stripe count and try to create dir by default stripe.
                 */
-               if (ah->dah_eadata != NULL && ah->dah_eadata_len != 0 &&
+               if (ah->dah_eadata && ah->dah_eadata_len &&
+                   !ah->dah_eadata_is_dmv &&
                    (le32_to_cpu(lum1->lum_magic) == LMV_USER_MAGIC ||
                     le32_to_cpu(lum1->lum_magic) == LMV_USER_MAGIC_SPECIFIC)) {
                        lc->ldo_dir_stripe_count =
@@ -5761,6 +5771,13 @@ static void lod_ah_init(const struct lu_env *env,
                        /* set count 0 to create normal directory */
                        if (lc->ldo_dir_stripe_count == 1)
                                lc->ldo_dir_stripe_count = 0;
+
+                       /* do not save default LMV on server */
+                       if (ah->dah_dmv_imp_inherit) {
+                               lds->lds_dir_def_striping_set = 0;
+                               if (!lds->lds_def_striping_set)
+                                       lc->ldo_def_striping = NULL;
+                       }
                }
 
                /* shrink the stripe count to max_mdt_stripecount if it is -1
index 5d571d3..39dbd21 100644 (file)
@@ -224,6 +224,13 @@ void mdc_create_pack(struct req_capsule *pill, struct md_op_data *op_data,
                 * create only, and don't restripe if object exists.
                 */
                flags |= MDS_OPEN_CREAT;
+       if (op_data->op_bias & MDS_CREATE_DEFAULT_LMV) {
+               /* borrow MDS_OPEN_DEFAULT_LMV flag to indicate parent default
+                * LMV is packed in create request.
+                */
+               flags |= MDS_OPEN_DEFAULT_LMV;
+               LASSERT(data);
+       }
        set_mrc_cr_flags(rec, flags);
        rec->cr_bias     = op_data->op_bias;
        rec->cr_umask    = current_umask();
index 46ae4de..e8aa9da 100644 (file)
@@ -2777,12 +2777,15 @@ use_bigger_buffer:
        if (rc < 0)
                GOTO(out_stop, rc);
 
-       if (S_ISDIR(attr->la_mode)) {
+       /* adjust stripe count to 0 for 'lfs mkdir -c 1 ...' to avoid creating
+        * 1-stripe directory, MDS_OPEN_DEFAULT_LMV means ea is default LMV.
+        */
+       if (unlikely(S_ISDIR(attr->la_mode) && spec->u.sp_ea.eadata &&
+                    !(spec->sp_cr_flags & MDS_OPEN_DEFAULT_LMV))) {
                struct lmv_user_md *lmu = spec->u.sp_ea.eadata;
 
-               /*
-                * migrate may create 1-stripe directory, so lod_ah_init()
-                * doesn't adjust stripe count from lmu.
+               /* migrate may create 1-stripe directory, adjust stripe count
+                * before lod_ah_init().
                 */
                if (lmu && lmu->lum_stripe_count == cpu_to_le32(1))
                        lmu->lum_stripe_count = 0;
index 5243a64..e0cf2db 100644 (file)
@@ -3232,10 +3232,14 @@ void mdd_object_make_hint(const struct lu_env *env, struct mdd_object *parent,
 
        /* For striped directory, give striping EA to lod_ah_init, which will
         * decide the stripe_offset and stripe count by it. */
-       if (S_ISDIR(attr->la_mode) &&
-           unlikely(spec != NULL && spec->sp_cr_flags & MDS_OPEN_HAS_EA)) {
-               hint->dah_eadata = spec->u.sp_ea.eadata;
-               hint->dah_eadata_len = spec->u.sp_ea.eadatalen;
+       if (S_ISDIR(attr->la_mode) && spec) {
+               if (unlikely(spec->sp_cr_flags & MDS_OPEN_HAS_EA)) {
+                       hint->dah_eadata = spec->u.sp_ea.eadata;
+                       hint->dah_eadata_len = spec->u.sp_ea.eadatalen;
+                       if (spec->sp_cr_flags & MDS_OPEN_DEFAULT_LMV)
+                               hint->dah_eadata_is_dmv = 1;
+               }
+               hint->dah_dmv_imp_inherit = spec->sp_dmv_imp_inherit;
        } else if (S_ISREG(attr->la_mode) &&
                   spec->sp_cr_flags & MDS_OPEN_APPEND) {
                hint->dah_append_stripe_count = mdd->mdd_append_stripe_count;
index 28334a9..a8f86b9 100644 (file)
@@ -6219,11 +6219,13 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
        m->mdt_enable_remote_dir_gid = 0;
        m->mdt_enable_remote_rename = 1;
        m->mdt_enable_striped_dir = 1;
+       m->mdt_enable_dmv_implicit_inherit = 1;
        m->mdt_dir_restripe_nsonly = 1;
        m->mdt_max_mod_rpcs_in_flight = OBD_MAX_RIF_DEFAULT;
 
        atomic_set(&m->mdt_mds_mds_conns, 0);
        atomic_set(&m->mdt_async_commit_count, 0);
+       atomic_set(&m->mdt_dmv_old_client_count, 0);
 
        m->mdt_lu_dev.ld_ops = &mdt_lu_ops;
        m->mdt_lu_dev.ld_obd = obd;
@@ -6882,6 +6884,12 @@ static int mdt_connect_internal(const struct lu_env *env,
        if (!mdt->mdt_lut.lut_dt_conf.ddp_has_lseek_data_hole)
                data->ocd_connect_flags2 &= ~OBD_CONNECT2_LSEEK;
 
+       if (!OCD_HAS_FLAG(data, MDS_MDS) && !OCD_HAS_FLAG(data, LIGHTWEIGHT) &&
+           !OCD_HAS_FLAG2(data, DMV_IMP_INHERIT)) {
+               atomic_inc(&mdt->mdt_dmv_old_client_count);
+               mdt->mdt_enable_dmv_implicit_inherit = 0;
+       }
+
        return 0;
 }
 
@@ -7002,6 +7010,8 @@ static int mdt_export_cleanup(struct obd_export *exp)
 
 static int mdt_obd_disconnect(struct obd_export *exp)
 {
+       struct obd_connect_data *data = &exp->exp_connect_data;
+       struct mdt_device *mdt = mdt_dev(exp->exp_obd->obd_lu_dev);
        int rc;
 
        ENTRY;
@@ -7012,13 +7022,14 @@ static int mdt_obd_disconnect(struct obd_export *exp)
        if (!(exp->exp_flags & OBD_OPT_FORCE))
                tgt_grant_sanity_check(exp->exp_obd, __func__);
 
-       if ((exp_connect_flags(exp) & OBD_CONNECT_MDS_MDS) &&
-           !(exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT)) {
-               struct mdt_device *mdt = mdt_dev(exp->exp_obd->obd_lu_dev);
+       if (OCD_HAS_FLAG(data, MDS_MDS) && !OCD_HAS_FLAG(data, LIGHTWEIGHT) &&
+           atomic_dec_and_test(&mdt->mdt_mds_mds_conns))
+               mdt_disable_slc(mdt);
 
-               if (atomic_dec_and_test(&mdt->mdt_mds_mds_conns))
-                       mdt_disable_slc(mdt);
-       }
+       if (!OCD_HAS_FLAG(data, MDS_MDS) && !OCD_HAS_FLAG(data, LIGHTWEIGHT) &&
+           !OCD_HAS_FLAG2(data, DMV_IMP_INHERIT) &&
+           atomic_dec_and_test(&mdt->mdt_dmv_old_client_count))
+               mdt->mdt_enable_dmv_implicit_inherit = 1;
 
        rc = server_disconnect_export(exp);
        if (rc != 0)
index b11a29f..342f8d3 100644 (file)
@@ -309,7 +309,12 @@ struct mdt_device {
                                   mdt_readonly:1,
                                   mdt_skip_lfsck:1,
                                   /* dir restripe migrate dirent only */
-                                  mdt_dir_restripe_nsonly:1;
+                                  mdt_dir_restripe_nsonly:1,
+                                  /* this is enabled by default, but once an
+                                   * old client joins, disable this to handle
+                                   * inherited default LMV on server.
+                                   */
+                                  mdt_enable_dmv_implicit_inherit:1;
 
                                   /* user with gid can create remote/striped
                                    * dir, and set default dir stripe */
@@ -346,6 +351,9 @@ struct mdt_device {
        struct mdt_object         *mdt_md_root;
 
        struct mdt_dir_restriper   mdt_restriper;
+
+       /* count of old clients that doesn't support DMV implicite inherit */
+       atomic_t                   mdt_dmv_old_client_count;
 };
 
 #define MDT_SERVICE_WATCHDOG_FACTOR    (2)
index 9ec750d..7b0d2bc 100644 (file)
@@ -1361,14 +1361,29 @@ static int mdt_create_unpack(struct mdt_thread_info *info)
                        RETURN(-EFAULT);
        } else {
                req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_ACL);
-               if (S_ISDIR(attr->la_mode) &&
-                   req_capsule_get_size(pill, &RMF_EADATA, RCL_CLIENT) > 0) {
-                       sp->u.sp_ea.eadata =
-                               req_capsule_client_get(pill, &RMF_EADATA);
-                       sp->u.sp_ea.eadatalen =
-                               req_capsule_get_size(pill, &RMF_EADATA,
-                                                    RCL_CLIENT);
-                       sp->sp_cr_flags |= MDS_OPEN_HAS_EA;
+               if (S_ISDIR(attr->la_mode)) {
+                       struct obd_export *exp = mdt_info_req(info)->rq_export;
+
+                       sp->sp_dmv_imp_inherit =
+                               info->mti_mdt->mdt_enable_dmv_implicit_inherit;
+                       if (req_capsule_get_size(pill, &RMF_EADATA, RCL_CLIENT)
+                           > 0) {
+                               sp->u.sp_ea.eadata =
+                                       req_capsule_client_get(pill,
+                                                              &RMF_EADATA);
+                               sp->u.sp_ea.eadatalen =
+                                       req_capsule_get_size(pill, &RMF_EADATA,
+                                                            RCL_CLIENT);
+                               sp->sp_cr_flags |= MDS_OPEN_HAS_EA;
+                       }
+                       if (OCD_HAS_FLAG2(&exp->exp_connect_data,
+                                         DMV_IMP_INHERIT)) {
+                               if ((sp->sp_cr_flags & MDS_OPEN_DEFAULT_LMV) &&
+                                   !(sp->sp_cr_flags & MDS_OPEN_HAS_EA))
+                                       RETURN(-EPROTO);
+                       } else if (sp->sp_cr_flags & MDS_OPEN_DEFAULT_LMV) {
+                               RETURN(-EPROTO);
+                       }
                }
        }
 
index b5634ef..c813c05 100644 (file)
@@ -1366,6 +1366,38 @@ static ssize_t enable_remote_subdir_mount_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(enable_remote_subdir_mount);
 
+static ssize_t enable_dmv_implicit_inherit_show(struct kobject *kobj,
+                                               struct attribute *attr,
+                                               char *buf)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+
+       return scnprintf(buf, PAGE_SIZE, "%u\n",
+                        mdt->mdt_enable_dmv_implicit_inherit);
+}
+
+static ssize_t enable_dmv_implicit_inherit_store(struct kobject *kobj,
+                                                struct attribute *attr,
+                                                const char *buffer,
+                                                size_t count)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+       bool val;
+       int rc;
+
+       rc = kstrtobool(buffer, &val);
+       if (rc)
+               return rc;
+
+       mdt->mdt_enable_dmv_implicit_inherit = val;
+       return count;
+}
+LUSTRE_RW_ATTR(enable_dmv_implicit_inherit);
+
 /**
  * Show if the OFD enforces T10PI checksum.
  *
@@ -1578,6 +1610,7 @@ static struct attribute *mdt_attrs[] = {
        &lustre_attr_checksum_t10pi_enforce.attr,
        &lustre_attr_enable_remote_subdir_mount.attr,
        &lustre_attr_max_mod_rpcs_in_flight.attr,
+       &lustre_attr_enable_dmv_implicit_inherit.attr,
        NULL,
 };
 
index 0007a46..ce0e657 100644 (file)
@@ -539,8 +539,12 @@ static int mdt_create(struct mdt_thread_info *info)
        if (!fid_is_md_operative(rr->rr_fid1))
                RETURN(-EPERM);
 
+       /* MDS_OPEN_DEFAULT_LMV means eadata is parent default LMV, which is set
+        * if client maintains inherited default LMV
+        */
        if (S_ISDIR(ma->ma_attr.la_mode) &&
-           spec->u.sp_ea.eadata != NULL && spec->u.sp_ea.eadatalen != 0) {
+           spec->u.sp_ea.eadata != NULL && spec->u.sp_ea.eadatalen != 0 &&
+           !(spec->sp_cr_flags & MDS_OPEN_DEFAULT_LMV)) {
                const struct lmv_user_md *lum = spec->u.sp_ea.eadata;
                struct obd_export *exp = mdt_info_req(info)->rq_export;
 
index 6c25239..9ed044d 100644 (file)
@@ -649,7 +649,7 @@ static const char *const obd_connect_names[] = {
        "atomic_open_lock",             /* 0x4000000 */
        "name_encryption",              /* 0x8000000 */
        "mkdir_replay",                 /* 0x10000000 */
-       "dmv_inherit",                  /* 0x20000000 */
+       "dmv_imp_inherit",              /* 0x20000000 */
        "encryption_fid2path",          /* 0x40000000 */
        NULL
 };
index 63ff323..88b67a3 100644 (file)
@@ -2458,6 +2458,8 @@ void lustre_assert_wire_constants(void)
                (unsigned)MDS_FID_OP);
        LASSERTF(MDS_MIGRATE_NSONLY == 0x00800000UL, "found 0x%.8xUL\n",
                (unsigned)MDS_MIGRATE_NSONLY);
+       LASSERTF(MDS_CREATE_DEFAULT_LMV == 0x01000000UL, "found 0x%.8xUL\n",
+               (unsigned)MDS_CREATE_DEFAULT_LMV);
 
        /* Checks for struct mdt_body */
        LASSERTF((int)sizeof(struct mdt_body) == 216, "found %lld\n",
index 78dd8bb..eda0b4b 100755 (executable)
@@ -6162,6 +6162,116 @@ test_113 () {
 }
 run_test 113 "check servers of specified fs"
 
+check_default_lmv() {
+       local dir=$1
+
+       local enabled
+       local dmv
+       local index
+       local count
+       local inherit
+       local inherit_rr
+       local raw
+
+       enabled=$(do_facet mds1 \
+               $LCTL get_param -n mdt.*-MDT0000*.enable_dmv_implicit_inherit)
+
+       dmv=$($LFS getdirstripe -D $dir)
+       echo $dir $dmv
+       index=$(echo $dmv | awk '{ print $4 }')
+       (( index == $2 )) || error "$dir default stripe index $index != $2"
+
+       count=$(echo $dmv | awk '{ print $2 }')
+       (( count == $3 )) || error "$dir default stripe count $count != $3"
+
+       inherit=$(echo $dmv | awk '{ print $8 }')
+       (( inherit == $4 )) || error "$dir default max-inherit $inherit != $4"
+
+       if [ $index -eq -1 ]; then
+               inherit_rr=$(echo $dmv | awk '{ print $10 }')
+               (( inherit_rr == $5 )) ||
+                       error "$dir default max-inherit-rr $inherit_rr != $5"
+       fi
+
+       # with --raw, print default LMV stored in inode, otherwise print nothing
+       raw=$($LFS getdirstripe -D --raw $dir)
+       if (( enabled == 1 )); then
+               [ -z $raw ] ||
+                       error "implicit inherited DMV is printed with --raw"
+       else
+               # if disabled, dmv is stored in inode, which will always
+               # print max-inherit-rr
+               echo $dir $raw
+               [[ $raw =~ $dmv.* ]] || error "$dir raw $raw != dmv $dmv"
+       fi
+}
+
+test_dmv_imp_inherit() {
+       local dmv
+       local raw
+       local index
+       local count
+       local inherit
+       local inherit_rr
+
+       rm -rf $DIR/$tdir || error "rm $tdir failed"
+       mkdir -p $DIR/$tdir || error "mkdir $tdir failed"
+
+       # set dir default LMV
+       $LFS setdirstripe -D -c1 -X4 --max-inherit-rr 2 $DIR/$tdir ||
+               error "setdirstripe -D $tdir failed"
+       dmv=$($LFS getdirstripe -D $DIR/$tdir)
+       raw=$($LFS getdirstripe -D --raw $DIR/$tdir)
+       [ "$dmv" == "$raw" ] || error "$dmv != $raw"
+
+       mkdir -p $DIR/$tdir/l1/l2/l3 || error "mkdir $DIR/$tdir/l1/l2/l3 failed"
+       check_default_lmv $DIR/$tdir/l1/l2/l3 -1 1 1 0
+       check_default_lmv $DIR2/$tdir/l1/l2/l3 -1 1 1 0
+
+       # below tests are valid only when this feature is enabled
+       local enabled=$(do_facet mds1 \
+               $LCTL get_param -n mdt.*-MDT0000*.enable_dmv_implicit_inherit)
+
+       (( enabled == 1 )) || return 0
+
+       # set l2 default LMV, dmv of l3 should change immediately
+       $LFS setdirstripe -D -i1 -c2 -X4 $DIR/$tdir/l1/l2 ||
+               error "setdirstripe -D $tdir/l1/l2 failed"
+
+       check_default_lmv $DIR/$tdir/l1/l2/l3 1 2 3
+       check_default_lmv $DIR2/$tdir/l1/l2/l3 1 2 3
+
+       # change tdir default LMV, dmv of l3 should be unchanged because dmv
+       # of l2 is explicitly set
+       $LFS setdirstripe -D -i2 -c2 -X3 $DIR/$tdir ||
+               error "setdirstripe -D $tdir failed"
+
+       check_default_lmv $DIR/$tdir/l1 2 2 2
+       check_default_lmv $DIR2/$tdir/l1 2 2 2
+       check_default_lmv $DIR/$tdir/l1/l2/l3 1 2 3
+       check_default_lmv $DIR2/$tdir/l1/l2/l3 1 2 3
+}
+
+test_114() {
+       (( MDSCOUNT >= 2 )) ||
+               skip "We need at least 2 MDTs for this test"
+
+       (( MDS1_VERSION >= $(version_code 2.15.55.45) )) ||
+               skip "Need server version at least 2.15.54.45"
+
+       test_dmv_imp_inherit
+
+       # disable dmv_imp_inherit to simulate old client
+       local mdts=$(comma_list $(mdts_nodes))
+
+       do_nodes $mdts $LCTL set_param -n \
+               mdt.*MDT*.enable_dmv_implicit_inherit=0
+       test_dmv_imp_inherit
+       do_nodes $mdts $LCTL set_param -n \
+               mdt.*MDT*.enable_dmv_implicit_inherit=1
+}
+run_test 114 "implicit default LMV inherit"
+
 log "cleanup: ======================================================"
 
 # kill and wait in each test only guarentee script finish, but command in script
index b405982..d34b0ec 100644 (file)
@@ -378,8 +378,8 @@ command_t cmdlist[] = {
         "or recursively for all directories in a directory tree.\n"
         "usage: getdirstripe [--mdt-count|-c] [--mdt-index|-m|-i]\n"
         "                    [--help|-h] [--hex-idx] [--mdt-hash|-H]\n"
-        "                    [--obd|-O UUID] [--recursive|-r] [--yaml|-y]\n"
-        "                    [--verbose|-v] [--default|-D]\n"
+        "                    [--obd|-O UUID] [--recursive|-r] [--raw|-R]\n"
+        "                    [--yaml|-y] [--verbose|-v] [--default|-D]\n"
         "                    [--max-inherit|-X]\n"
         "                    [--max-inherit-rr] <dir> ..."},
        {"mkdir", lfs_setdirstripe, 0,
@@ -6447,6 +6447,7 @@ static int lfs_getdirstripe(int argc, char **argv)
        { .val = 'm',   .name = "mdt-index",     .has_arg = no_argument },
        { .val = 'O',   .name = "obd",           .has_arg = required_argument },
        { .val = 'r',   .name = "recursive",     .has_arg = no_argument },
+       { .val = 'R',   .name = "raw",          .has_arg = no_argument },
        { .val = 'T',   .name = "mdt-count",     .has_arg = no_argument },
        { .val = 'v',   .name = "verbose",       .has_arg = no_argument },
        { .val = 'X',   .name = "max-inherit",   .has_arg = no_argument },
@@ -6459,7 +6460,7 @@ static int lfs_getdirstripe(int argc, char **argv)
        param.fp_get_lmv = 1;
 
        while ((c = getopt_long(argc, argv,
-                               "cDhHimO:rtTvXy", long_opts, NULL)) != -1) {
+                               "cDhHimO:rRtTvXy", long_opts, NULL)) != -1) {
                switch (c) {
                case 'c':
                case 'T':
@@ -6497,6 +6498,9 @@ static int lfs_getdirstripe(int argc, char **argv)
                case 'r':
                        param.fp_recursive = 1;
                        break;
+               case 'R':
+                       param.fp_raw = 1;
+                       break;
                case 'v':
                        param.fp_verbose |= VERBOSE_DEFAULT;
                        param.fp_verbose |= VERBOSE_DETAIL;
index d4887db..b9a2f8e 100644 (file)
@@ -1561,6 +1561,8 @@ again:
        } else {
                param->fp_lmv_md->lum_magic = LMV_MAGIC_V1;
        }
+       if (param->fp_raw)
+               param->fp_lmv_md->lum_type = LMV_TYPE_RAW;
 
        ret = ioctl(*d, LL_IOC_LMV_GETSTRIPE, param->fp_lmv_md);
 
@@ -6146,6 +6148,8 @@ static int cb_getstripe(char *path, int p, int *dp, void *data,
                        if (param->fp_get_default_lmv) {
                                struct lmv_user_md *lum = param->fp_lmv_md;
 
+                               if (param->fp_raw)
+                                       goto out;
                                lum->lum_magic = LMV_USER_MAGIC;
                                lum->lum_stripe_count = 0;
                                lum->lum_stripe_offset = LMV_OFFSET_DEFAULT;
index 15fb0ec..faf0c0b 100644 (file)
@@ -1195,6 +1195,7 @@ check_mds_op_bias(void)
        CHECK_VALUE_X(MDS_SETSTRIPE_CREATE);
        CHECK_VALUE_X(MDS_FID_OP);
        CHECK_VALUE_X(MDS_MIGRATE_NSONLY);
+       CHECK_VALUE_X(MDS_CREATE_DEFAULT_LMV);
 }
 
 static void
index 9822479..c5dd604 100644 (file)
@@ -2521,6 +2521,8 @@ void lustre_assert_wire_constants(void)
                (unsigned)MDS_FID_OP);
        LASSERTF(MDS_MIGRATE_NSONLY == 0x00800000UL, "found 0x%.8xUL\n",
                (unsigned)MDS_MIGRATE_NSONLY);
+       LASSERTF(MDS_CREATE_DEFAULT_LMV == 0x01000000UL, "found 0x%.8xUL\n",
+               (unsigned)MDS_CREATE_DEFAULT_LMV);
 
        /* Checks for struct mdt_body */
        LASSERTF((int)sizeof(struct mdt_body) == 216, "found %lld\n",