From 388a185eace0e6b16a5f7fa3a373c9c93cac12bc Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Sun, 5 Mar 2023 08:43:08 -0500 Subject: [PATCH] LU-15971 llite: implicit default LMV inherit With implicit default LMV inherit, the inherited default LMV is not stored on disk, but maintained on client side. Benefits: * change of directory default LMV will be populated to all sub levels at runtime. * default LMV will be packed into mkdir request, therefore MDT doesn't need to read it from disk, as will improve mkdir performance. Caveats: * to disable inherited default LMV on subdir, a default LMV need to be set on this subdir explicitly like this: "lfs setdirstripe -D -i --max-inherit 0" Changes on client side: * update inherited default LMV after lookup/open/revalidate. * pack default LMV in mkdir request. * add "--raw" option for "lfs getdirstripe -D" to print default LMV stored in inode, if directory doesn't have default LMV, or its default LMV is implicitly inherited, nothing will be printed. Changes on MDT side: * use the default LMV from client in lod_ah_init() to mkdir. * don't save inherited default LMV in mkdir. Add sanityn 114. Test-Parameters: clientversion=2.14 testlist=sanity mdtcount=4 mdscount=2 env=SANITY_EXCEPT="39l 39r 134b 150b 160a 205a 208 220 230e 230p 300g 807" Test-Parameters: serverversion=2.14 testlist=sanity mdtcount=4 mdscount=2 env=SANITY_EXCEPT="27Cg 39r 65n 413a 413b 905" Signed-off-by: Lai Siyao Change-Id: Iae109a0ef35a273175c70dd0b394e721a5ce0c45 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/47789 Tested-by: Maloo Tested-by: jenkins Reviewed-by: Andreas Dilger Reviewed-by: Hongchao Zhang Reviewed-by: Oleg Drokin --- lustre/include/dt_object.h | 6 +- lustre/include/lustre_lmv.h | 64 ++++++---- lustre/include/md_object.h | 3 +- lustre/include/uapi/linux/lustre/lustre_idl.h | 5 +- lustre/include/uapi/linux/lustre/lustre_user.h | 6 +- lustre/llite/dcache.c | 11 +- lustre/llite/dir.c | 124 ++++++++++++-------- lustre/llite/file.c | 9 +- lustre/llite/llite_internal.h | 9 +- lustre/llite/llite_lib.c | 154 +++++++++++++++++++++---- lustre/llite/namei.c | 56 +++++++-- lustre/llite/statahead.c | 6 +- lustre/lod/lod_object.c | 47 +++++--- lustre/mdc/mdc_lib.c | 7 ++ lustre/mdd/mdd_dir.c | 11 +- lustre/mdd/mdd_object.c | 12 +- lustre/mdt/mdt_handler.c | 23 +++- lustre/mdt/mdt_internal.h | 10 +- lustre/mdt/mdt_lib.c | 31 +++-- lustre/mdt/mdt_lproc.c | 33 ++++++ lustre/mdt/mdt_reint.c | 6 +- lustre/obdclass/lprocfs_status.c | 2 +- lustre/ptlrpc/wiretest.c | 2 + lustre/tests/sanityn.sh | 110 ++++++++++++++++++ lustre/utils/lfs.c | 10 +- lustre/utils/liblustreapi.c | 4 + lustre/utils/wirecheck.c | 1 + lustre/utils/wiretest.c | 2 + 28 files changed, 606 insertions(+), 158 deletions(-) diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index c4e2d62..1484a85 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -409,7 +409,11 @@ struct dt_allocation_hint { int dah_eadata_len; int dah_append_stripe_count; int dah_acl_len; - bool dah_can_block; + unsigned int dah_can_block:1, + /* implicit default LMV inherit is enabled? */ + dah_dmv_imp_inherit:1, + /* eadata is default LMV sent from client */ + dah_eadata_is_dmv:1; }; /** diff --git a/lustre/include/lustre_lmv.h b/lustre/include/lustre_lmv.h index f29dc50..96ae7f6 100644 --- a/lustre/include/lustre_lmv.h +++ b/lustre/include/lustre_lmv.h @@ -81,6 +81,33 @@ static inline bool lmv_dir_bad_hash(const struct lmv_stripe_md *lsm) return !lmv_is_known_hash_type(lsm->lsm_md_hash_type); } +static inline __u8 lmv_inherit_next(__u8 inherit) +{ + if (inherit == LMV_INHERIT_END || inherit == LMV_INHERIT_NONE) + return LMV_INHERIT_NONE; + + if (inherit == LMV_INHERIT_UNLIMITED || inherit > LMV_INHERIT_MAX) + return inherit; + + return inherit - 1; +} + +static inline __u8 lmv_inherit_rr_next(__u8 inherit_rr) +{ + if (inherit_rr == LMV_INHERIT_RR_NONE || + inherit_rr == LMV_INHERIT_RR_UNLIMITED || + inherit_rr > LMV_INHERIT_RR_MAX) + return inherit_rr; + + return inherit_rr - 1; +} + +static inline bool lmv_is_inheritable(__u8 inherit) +{ + return inherit == LMV_INHERIT_UNLIMITED || + (inherit > LMV_INHERIT_END && inherit <= LMV_INHERIT_MAX); +} + static inline bool lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2) { @@ -146,6 +173,22 @@ static inline void lsm_md_dump(int mask, const struct lmv_stripe_md *lsm) i, PFID(&lsm->lsm_md_oinfo[i].lmo_fid)); } +static inline bool +lsm_md_inherited(const struct lmv_stripe_md *plsm, + const struct lmv_stripe_md *clsm) +{ + return plsm && clsm && + plsm->lsm_md_magic == clsm->lsm_md_magic && + plsm->lsm_md_stripe_count == clsm->lsm_md_stripe_count && + plsm->lsm_md_master_mdt_index == + clsm->lsm_md_master_mdt_index && + plsm->lsm_md_hash_type == clsm->lsm_md_hash_type && + lmv_inherit_next(plsm->lsm_md_max_inherit) == + clsm->lsm_md_max_inherit && + lmv_inherit_rr_next(plsm->lsm_md_max_inherit_rr) == + clsm->lsm_md_max_inherit_rr; +} + union lmv_mds_md; void lmv_free_memmd(struct lmv_stripe_md *lsm); @@ -523,25 +566,4 @@ static inline bool lmv_is_fixed(const struct lmv_mds_md_v1 *lmv) return cpu_to_le32(lmv->lmv_hash_type) & LMV_HASH_FLAG_FIXED; } -static inline __u8 lmv_inherit_next(__u8 inherit) -{ - if (inherit == LMV_INHERIT_END || inherit == LMV_INHERIT_NONE) - return LMV_INHERIT_NONE; - - if (inherit == LMV_INHERIT_UNLIMITED || inherit > LMV_INHERIT_MAX) - return inherit; - - return inherit - 1; -} - -static inline __u8 lmv_inherit_rr_next(__u8 inherit_rr) -{ - if (inherit_rr == LMV_INHERIT_RR_NONE || - inherit_rr == LMV_INHERIT_RR_UNLIMITED || - inherit_rr > LMV_INHERIT_RR_MAX) - return inherit_rr; - - return inherit_rr - 1; -} - #endif diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h index 0481ca9..020cba8 100644 --- a/lustre/include/md_object.h +++ b/lustre/include/md_object.h @@ -172,7 +172,8 @@ struct md_op_spec { sp_rm_entry:1, /* only remove name entry */ sp_permitted:1, /* do not check permission */ sp_migrate_close:1, /* close the file during migrate */ - sp_migrate_nsonly:1; /* migrate dirent only */ + sp_migrate_nsonly:1, /* migrate dirent only */ + sp_dmv_imp_inherit:1; /* implicit default LMV inherit */ /** to create directory */ const struct dt_index_features *sp_feat; diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h index 0ace06f..8cbe132 100644 --- a/lustre/include/uapi/linux/lustre/lustre_idl.h +++ b/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -913,7 +913,8 @@ struct ptlrpc_body_v2 { OBD_CONNECT2_ATOMIC_OPEN_LOCK | \ OBD_CONNECT2_BATCH_RPC | \ OBD_CONNECT2_ENCRYPT_NAME | \ - OBD_CONNECT2_ENCRYPT_FID2PATH) + OBD_CONNECT2_ENCRYPT_FID2PATH | \ + OBD_CONNECT2_DMV_IMP_INHERIT) #define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \ OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \ @@ -1976,6 +1977,8 @@ enum mds_op_bias { MDS_FID_OP = 1 << 22, /* migrate dirent only */ MDS_MIGRATE_NSONLY = 1 << 23, + /* create with default LMV from client */ + MDS_CREATE_DEFAULT_LMV = 1 << 24, }; #define MDS_CLOSE_INTENT (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP | \ diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 25a3642..3b954e3 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -1209,6 +1209,8 @@ static inline __u32 lmv_foreign_to_md_stripes(__u32 size) */ enum lmv_type { LMV_TYPE_DEFAULT = 0x0000, + /* fetch raw default LMV set on directory inode */ + LMV_TYPE_RAW = 0x0001, }; /* lum_max_inherit will be decreased by 1 after each inheritance if it's not @@ -1630,7 +1632,9 @@ enum la_valid { #define MDS_OPEN_PCC 010000000000000ULL /* PCC: auto RW-PCC cache attach * for newly created file */ #define MDS_OP_WITH_FID 020000000000000ULL /* operation carried out by FID */ -#define MDS_OPEN_DEFAULT_LMV 040000000000000ULL /* open fetches default LMV */ +#define MDS_OPEN_DEFAULT_LMV 040000000000000ULL /* open fetches default LMV, + * or mkdir with default LMV + */ /* lustre internal open flags, which should not be set from user space */ #define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | \ diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 495092f..7746563 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -274,10 +274,11 @@ int ll_revalidate_it_finish(struct ptlrpc_request *request, ll_set_lock_data(ll_i2sbi(inode)->ll_md_exp, inode, it, &bits); if (bits & MDS_INODELOCK_LOOKUP) { - ll_update_dir_depth(de->d_parent->d_inode, inode); if (!ll_d_setup(de, true)) RETURN(-ENOMEM); d_lustre_revalidate(de); + if (S_ISDIR(inode->i_mode)) + ll_update_dir_depth_dmv(de->d_parent->d_inode, de); } RETURN(rc); @@ -318,8 +319,11 @@ static int ll_revalidate_dentry(struct dentry *dentry, /* If this is intermediate component path lookup and we were able to get * to this dentry, then its lock has not been revoked and the * path component is valid. */ - if (lookup_flags & (LOOKUP_CONTINUE | LOOKUP_PARENT)) + if (lookup_flags & (LOOKUP_CONTINUE | LOOKUP_PARENT)) { + if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) + ll_update_dir_depth_dmv(dir, dentry); return 1; + } /* Symlink - always valid as long as the dentry was found */ /* only special case is to prevent ELOOP error from VFS during open @@ -350,6 +354,9 @@ static int ll_revalidate_dentry(struct dentry *dentry, if (dentry_may_statahead(dir, dentry)) ll_revalidate_statahead(dir, &dentry, dentry->d_inode == NULL); + if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) + ll_update_dir_depth_dmv(dir, dentry); + return 1; } diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 1377697..235d25f 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -691,6 +691,64 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump, RETURN(rc); } +/* get default LMV from client cache */ +static int ll_dir_get_default_lmv(struct inode *inode, struct lmv_user_md *lum) +{ + struct ll_inode_info *lli = ll_i2info(inode); + const struct lmv_stripe_md *lsm; + bool fs_dmv_got = false; + int rc = -ENODATA; + + ENTRY; +retry: + if (lli->lli_default_lsm_md) { + down_read(&lli->lli_lsm_sem); + lsm = lli->lli_default_lsm_md; + if (lsm) { + lum->lum_magic = lsm->lsm_md_magic; + lum->lum_stripe_count = lsm->lsm_md_stripe_count; + lum->lum_stripe_offset = lsm->lsm_md_master_mdt_index; + lum->lum_hash_type = lsm->lsm_md_hash_type; + lum->lum_max_inherit = lsm->lsm_md_max_inherit; + lum->lum_max_inherit_rr = lsm->lsm_md_max_inherit_rr; + rc = 0; + } + up_read(&lli->lli_lsm_sem); + } + + if (rc == -ENODATA && !is_root_inode(inode) && !fs_dmv_got) { + lli = ll_i2info(inode->i_sb->s_root->d_inode); + fs_dmv_got = true; + goto retry; + } + + if (!rc && fs_dmv_got) { + lli = ll_i2info(inode); + if (lum->lum_max_inherit != LMV_INHERIT_UNLIMITED) { + if (lum->lum_max_inherit == LMV_INHERIT_NONE || + lum->lum_max_inherit < LMV_INHERIT_END || + lum->lum_max_inherit > LMV_INHERIT_MAX || + lum->lum_max_inherit <= lli->lli_dir_depth) + GOTO(out, rc = -ENODATA); + + lum->lum_max_inherit -= lli->lli_dir_depth; + } + + if (lum->lum_max_inherit_rr != LMV_INHERIT_RR_UNLIMITED) { + if (lum->lum_max_inherit_rr == LMV_INHERIT_NONE || + lum->lum_max_inherit_rr < LMV_INHERIT_RR_END || + lum->lum_max_inherit_rr > LMV_INHERIT_RR_MAX || + lum->lum_max_inherit_rr <= lli->lli_dir_depth) + lum->lum_max_inherit_rr = LMV_INHERIT_RR_NONE; + + if (lum->lum_max_inherit_rr > lli->lli_dir_depth) + lum->lum_max_inherit_rr -= lli->lli_dir_depth; + } + } +out: + RETURN(rc); +} + int ll_dir_get_default_layout(struct inode *inode, void **plmm, int *plmm_size, struct ptlrpc_request **request, u64 valid, enum get_default_layout_type type) @@ -1658,7 +1716,6 @@ out: struct lmv_user_md __user *ulmv = uarg; struct lmv_user_md lum; struct ptlrpc_request *request = NULL; - struct ptlrpc_request *root_request = NULL; union lmv_mds_md *lmm = NULL; int lmmsize; u64 valid = 0; @@ -1673,6 +1730,19 @@ out: if (copy_from_user(&lum, ulmv, sizeof(*ulmv))) RETURN(-EFAULT); + /* get default LMV */ + if (lum.lum_magic == LMV_USER_MAGIC && + lum.lum_type != LMV_TYPE_RAW) { + rc = ll_dir_get_default_lmv(inode, &lum); + if (rc) + RETURN(rc); + + if (copy_to_user(ulmv, &lum, sizeof(lum))) + RETURN(-EFAULT); + + RETURN(0); + } + max_stripe_count = lum.lum_stripe_count; /* lum_magic will indicate which stripe the ioctl will like * to get, LMV_MAGIC_V1 is for normal LMV stripe, LMV_USER_MAGIC @@ -1685,61 +1755,14 @@ out: RETURN(-EINVAL); rc = ll_dir_getstripe_default(inode, (void **)&lmm, &lmmsize, - &request, &root_request, valid); + &request, NULL, valid); if (rc != 0) GOTO(finish_req, rc); - /* Get default LMV EA */ + /* get default LMV in raw mode */ if (lum.lum_magic == LMV_USER_MAGIC) { - struct lmv_user_md *lum; - struct ll_inode_info *lli; - - if (lmmsize > sizeof(*ulmv)) - GOTO(finish_req, rc = -EINVAL); - - lum = (struct lmv_user_md *)lmm; - if (lum->lum_max_inherit == LMV_INHERIT_NONE) - GOTO(finish_req, rc = -ENODATA); - - if (root_request != NULL) { - lli = ll_i2info(inode); - if (lum->lum_max_inherit != - LMV_INHERIT_UNLIMITED) { - if (lum->lum_max_inherit < - LMV_INHERIT_END || - lum->lum_max_inherit > - LMV_INHERIT_MAX || - lum->lum_max_inherit <= - lli->lli_dir_depth) - GOTO(finish_req, rc = -ENODATA); - - lum->lum_max_inherit -= - lli->lli_dir_depth; - } - - if (lum->lum_max_inherit_rr != - LMV_INHERIT_RR_UNLIMITED) { - if (lum->lum_max_inherit_rr == - LMV_INHERIT_NONE || - lum->lum_max_inherit_rr < - LMV_INHERIT_RR_END || - lum->lum_max_inherit_rr > - LMV_INHERIT_RR_MAX || - lum->lum_max_inherit_rr <= - lli->lli_dir_depth) { - lum->lum_max_inherit_rr = - LMV_INHERIT_RR_NONE; - goto out_copy; - } - - lum->lum_max_inherit_rr -= - lli->lli_dir_depth; - } - } -out_copy: if (copy_to_user(ulmv, lmm, lmmsize)) GOTO(finish_req, rc = -EFAULT); - GOTO(finish_req, rc); } @@ -1818,7 +1841,6 @@ out_tmp: OBD_FREE(tmp, lum_size); finish_req: ptlrpc_req_finished(request); - ptlrpc_req_finished(root_request); return rc; } case LL_IOC_REMOVE_ENTRY: { diff --git a/lustre/llite/file.c b/lustre/llite/file.c index e3f0ce5..8ac0027 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -708,10 +708,8 @@ retry: * of kernel will deal with that later. */ ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, &bits); - if (bits & MDS_INODELOCK_LOOKUP) { + if (bits & MDS_INODELOCK_LOOKUP) d_lustre_revalidate(de); - ll_update_dir_depth(parent->d_inode, de->d_inode); - } /* if DoM bit returned along with LAYOUT bit then there * can be read-on-open data returned. @@ -719,6 +717,11 @@ retry: if (bits & MDS_INODELOCK_DOM && bits & MDS_INODELOCK_LAYOUT) ll_dom_finish_open(de->d_inode, req); } + /* open may not fetch LOOKUP lock, update dir depth and default LMV + * anyway. + */ + if (!rc && S_ISDIR(de->d_inode->i_mode)) + ll_update_dir_depth_dmv(parent->d_inode, de); out: ptlrpc_req_finished(req); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index a709728..9b98b94 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -187,7 +187,12 @@ struct ll_inode_info { * set upon dir open, and cleared when dir is closed, * statahead hit ratio is too low, or start statahead * thread failed. */ - unsigned short lli_sa_enabled:1; + unsigned short lli_sa_enabled:1, + /* default LMV is explicitly set in inode on MDT, this + * is for old server, or default LMV is set by + * "lfs setdirstripe -D". + */ + lli_default_lmv_set:1; /* generation for statahead */ unsigned int lli_sa_generation; /* rw lock protects lli_lsm_md */ @@ -1315,7 +1320,7 @@ int ll_statfs_internal(struct ll_sb_info *sbi, struct obd_statfs *osfs, u32 flags); int ll_update_inode(struct inode *inode, struct lustre_md *md); void ll_update_inode_flags(struct inode *inode, unsigned int ext_flags); -void ll_update_dir_depth(struct inode *dir, struct inode *inode); +void ll_update_dir_depth_dmv(struct inode *dir, struct dentry *de); int ll_read_inode2(struct inode *inode, void *opaque); void ll_truncate_inode_pages_final(struct inode *inode, struct cl_io *io); void ll_delete_inode(struct inode *inode); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index fab2ee7..652b215 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -351,7 +351,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) OBD_CONNECT2_DOM_LVB | OBD_CONNECT2_REP_MBITS | OBD_CONNECT2_ATOMIC_OPEN_LOCK | - OBD_CONNECT2_BATCH_RPC; + OBD_CONNECT2_BATCH_RPC | + OBD_CONNECT2_DMV_IMP_INHERIT; #ifdef HAVE_LRU_RESIZE_SUPPORT if (test_bit(LL_SBI_LRU_RESIZE, sbi->ll_flags)) @@ -1724,13 +1725,15 @@ static void ll_update_default_lsm_md(struct inode *inode, struct lustre_md *md) if (!md->default_lmv) { /* clear default lsm */ - if (lli->lli_default_lsm_md) { + if (lli->lli_default_lsm_md && lli->lli_default_lmv_set) { down_write(&lli->lli_lsm_sem); - if (lli->lli_default_lsm_md) { + if (lli->lli_default_lsm_md && + lli->lli_default_lmv_set) { lmv_free_memmd(lli->lli_default_lsm_md); lli->lli_default_lsm_md = NULL; + lli->lli_inherit_depth = 0; + lli->lli_default_lmv_set = 0; } - lli->lli_inherit_depth = 0; up_write(&lli->lli_lsm_sem); } RETURN_EXIT; @@ -1751,6 +1754,7 @@ static void ll_update_default_lsm_md(struct inode *inode, struct lustre_md *md) if (lli->lli_default_lsm_md) lmv_free_memmd(lli->lli_default_lsm_md); lli->lli_default_lsm_md = md->default_lmv; + lli->lli_default_lmv_set = 1; lsm_md_dump(D_INODE, md->default_lmv); md->default_lmv = NULL; up_write(&lli->lli_lsm_sem); @@ -2846,38 +2850,146 @@ static inline bool ll_default_lmv_inherited(struct lmv_stripe_md *pdmv, return true; } -/* update directory depth to ROOT, called after LOOKUP lock is fetched. */ -void ll_update_dir_depth(struct inode *dir, struct inode *inode) +/* if default LMV is implicitly inherited, subdir default LMV is maintained on + * client side. + */ +int ll_dir_default_lmv_inherit(struct inode *dir, struct inode *inode) { + struct ll_inode_info *plli = ll_i2info(dir); + struct ll_inode_info *lli = ll_i2info(inode); + struct lmv_stripe_md *plsm; + struct lmv_stripe_md *lsm; + int rc = 0; + + ENTRY; + + /* ROOT default LMV is not inherited */ + if (is_root_inode(dir) || + !(exp_connect_flags2(ll_i2mdexp(dir)) & + OBD_CONNECT2_DMV_IMP_INHERIT)) + RETURN(0); + + /* nothing to do if no default LMV on both */ + if (!plli->lli_default_lsm_md && !lli->lli_default_lsm_md) + RETURN(0); + + /* subdir default LMV comes from disk */ + if (lli->lli_default_lsm_md && lli->lli_default_lmv_set) + RETURN(0); + + /* delete subdir default LMV if parent's is deleted or becomes + * uninheritable. + */ + down_read(&plli->lli_lsm_sem); + plsm = plli->lli_default_lsm_md; + if (!plsm || !lmv_is_inheritable(plsm->lsm_md_max_inherit)) { + if (lli->lli_default_lsm_md && !lli->lli_default_lmv_set) { + down_write(&lli->lli_lsm_sem); + if (lli->lli_default_lsm_md && + !lli->lli_default_lmv_set) { + lmv_free_memmd(lli->lli_default_lsm_md); + lli->lli_default_lsm_md = NULL; + lli->lli_inherit_depth = 0; + } + up_write(&lli->lli_lsm_sem); + } + GOTO(unlock_parent, rc = 0); + } + + /* do nothing if inherited LMV is unchanged */ + if (lli->lli_default_lsm_md) { + rc = 1; + down_read(&lli->lli_lsm_sem); + if (!lli->lli_default_lmv_set) + rc = lsm_md_inherited(plsm, lli->lli_default_lsm_md); + up_read(&lli->lli_lsm_sem); + if (rc == 1) + GOTO(unlock_parent, rc = 0); + } + + /* inherit default LMV */ + down_write(&lli->lli_lsm_sem); + if (lli->lli_default_lsm_md) { + /* checked above, but in case of race, check again with lock */ + if (lli->lli_default_lmv_set) + GOTO(unlock_child, rc = 0); + /* always update subdir default LMV in case parent's changed */ + lsm = lli->lli_default_lsm_md; + } else { + OBD_ALLOC_PTR(lsm); + if (!lsm) + GOTO(unlock_child, rc = -ENOMEM); + lli->lli_default_lsm_md = lsm; + } + + *lsm = *plsm; + lsm->lsm_md_max_inherit = lmv_inherit_next(plsm->lsm_md_max_inherit); + lsm->lsm_md_max_inherit_rr = + lmv_inherit_rr_next(plsm->lsm_md_max_inherit_rr); + lli->lli_inherit_depth = plli->lli_inherit_depth + 1; + + lsm_md_dump(D_INODE, lsm); + + EXIT; +unlock_child: + up_write(&lli->lli_lsm_sem); +unlock_parent: + up_read(&plli->lli_lsm_sem); + + return rc; +} + +/** + * Update directory depth and default LMV + * + * Update directory depth to ROOT and inherit default LMV from parent if + * parent's default LMV is inheritable. The default LMV set with command + * "lfs setdirstripe -D ..." is stored on MDT, while the inherited default LMV + * is generated at runtime on client side. + * + * \param[in] dir parent directory inode + * \param[in] de dentry + */ +void ll_update_dir_depth_dmv(struct inode *dir, struct dentry *de) +{ + struct inode *inode = de->d_inode; struct ll_inode_info *plli; struct ll_inode_info *lli; - if (!S_ISDIR(inode->i_mode)) - return; - + LASSERT(S_ISDIR(inode->i_mode)); if (inode == dir) return; plli = ll_i2info(dir); lli = ll_i2info(inode); lli->lli_dir_depth = plli->lli_dir_depth + 1; - if (plli->lli_default_lsm_md && lli->lli_default_lsm_md) { - down_read(&plli->lli_lsm_sem); - down_read(&lli->lli_lsm_sem); - if (ll_default_lmv_inherited(plli->lli_default_lsm_md, + if (lli->lli_default_lsm_md && lli->lli_default_lmv_set) { + if (plli->lli_default_lsm_md) { + down_read(&plli->lli_lsm_sem); + down_read(&lli->lli_lsm_sem); + if (lsm_md_inherited(plli->lli_default_lsm_md, lli->lli_default_lsm_md)) - lli->lli_inherit_depth = - plli->lli_inherit_depth + 1; - else + lli->lli_inherit_depth = + plli->lli_inherit_depth + 1; + else + /* in case parent default LMV changed */ + lli->lli_inherit_depth = 0; + up_read(&lli->lli_lsm_sem); + up_read(&plli->lli_lsm_sem); + } else { + /* in case parent default LMV deleted */ lli->lli_inherit_depth = 0; - up_read(&lli->lli_lsm_sem); - up_read(&plli->lli_lsm_sem); + } } else { - lli->lli_inherit_depth = 0; + ll_dir_default_lmv_inherit(dir, inode); } - CDEBUG(D_INODE, DFID" depth %hu default LMV depth %hu\n", - PFID(&lli->lli_fid), lli->lli_dir_depth, lli->lli_inherit_depth); + if (lli->lli_default_lsm_md) + CDEBUG(D_INODE, + "%s "DFID" depth %hu %s default LMV inherit depth %hu\n", + de->d_name.name, PFID(&lli->lli_fid), lli->lli_dir_depth, + lli->lli_default_lmv_set ? "server" : "client", + lli->lli_inherit_depth); } void ll_truncate_inode_pages_final(struct inode *inode, struct cl_io *io) diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 15f6cea..bfa031b 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -750,10 +750,13 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request, if (!it_disposition(it, DISP_LOOKUP_NEG)) { /* We have the "lookup" lock, so unhide dentry */ - if (bits & MDS_INODELOCK_LOOKUP) { + if (bits & MDS_INODELOCK_LOOKUP) d_lustre_revalidate(*de); - ll_update_dir_depth(parent, (*de)->d_inode); - } + /* open may not fetch LOOKUP lock, update dir depth/dmv anyway + * in case it's used uninitialized. + */ + if (S_ISDIR(inode->i_mode)) + ll_update_dir_depth_dmv(parent, *de); if (encrypt) { rc = llcrypt_prepare_readdir(inode); @@ -1449,7 +1452,8 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, inode, it, &bits); if (bits & MDS_INODELOCK_LOOKUP) { d_lustre_revalidate(dentry); - ll_update_dir_depth(dir, inode); + if (S_ISDIR(inode->i_mode)) + ll_update_dir_depth_dmv(dir, dentry); } RETURN(0); @@ -1543,6 +1547,9 @@ static int ll_new_node(struct inode *dir, struct dentry *dchild, struct ll_sb_info *sbi = ll_i2sbi(dir); struct llcrypt_str *disk_link = NULL; bool encrypt = false; + struct lmv_user_md *lum = NULL; + const void *data = NULL; + size_t datalen = 0; int err; ENTRY; @@ -1551,6 +1558,8 @@ static int ll_new_node(struct inode *dir, struct dentry *dchild, rdev = 0; if (!disk_link) RETURN(-EINVAL); + data = disk_link->name; + datalen = disk_link->len; } again: @@ -1559,8 +1568,36 @@ again: if (IS_ERR(op_data)) GOTO(err_exit, err = PTR_ERR(op_data)); - if (S_ISDIR(mode)) + if (S_ISDIR(mode)) { ll_qos_mkdir_prep(op_data, dir); + if ((exp_connect_flags2(ll_i2mdexp(dir)) & + OBD_CONNECT2_DMV_IMP_INHERIT) && + op_data->op_default_mea1 && !lum) { + const struct lmv_stripe_md *lsm; + + /* once DMV_IMP_INHERIT is set, pack default LMV in + * create request. + */ + OBD_ALLOC_PTR(lum); + if (!lum) + GOTO(err_exit, err = -ENOMEM); + + lsm = op_data->op_default_mea1; + lum->lum_magic = cpu_to_le32(lsm->lsm_md_magic); + lum->lum_stripe_count = + cpu_to_le32(lsm->lsm_md_stripe_count); + lum->lum_stripe_offset = + cpu_to_le32(lsm->lsm_md_master_mdt_index); + lum->lum_hash_type = + cpu_to_le32(lsm->lsm_md_hash_type); + lum->lum_max_inherit = lsm->lsm_md_max_inherit; + lum->lum_max_inherit_rr = lsm->lsm_md_max_inherit_rr; + lum->lum_pool_name[0] = 0; + op_data->op_bias |= MDS_CREATE_DEFAULT_LMV; + data = lum; + datalen = sizeof(*lum); + } + } if (test_bit(LL_SBI_FILE_SECCTX, sbi->ll_flags)) { err = ll_dentry_init_security(dchild, mode, &dchild->d_name, @@ -1621,11 +1658,13 @@ again: dchild->d_sb->s_op->destroy_inode(fakeinode); if (err) GOTO(err_exit, err); + + data = disk_link->name; + datalen = disk_link->len; } } - err = md_create(sbi->ll_md_exp, op_data, tgt ? disk_link->name : NULL, - tgt ? disk_link->len : 0, mode, + err = md_create(sbi->ll_md_exp, op_data, data, datalen, mode, from_kuid(&init_user_ns, current_fsuid()), from_kgid(&init_user_ns, current_fsgid()), current_cap(), rdev, &request); @@ -1750,9 +1789,10 @@ again: err_exit: if (request != NULL) ptlrpc_req_finished(request); - if (!IS_ERR_OR_NULL(op_data)) ll_finish_md_op_data(op_data); + if (lum) + OBD_FREE_PTR(lum); RETURN(err); } diff --git a/lustre/llite/statahead.c b/lustre/llite/statahead.c index b546b68..6d056fc 100644 --- a/lustre/llite/statahead.c +++ b/lustre/llite/statahead.c @@ -1625,10 +1625,10 @@ static int revalidate_statahead_dentry(struct inode *dir, GOTO(out, rc = -ESTALE); } - if ((bits & MDS_INODELOCK_LOOKUP) && - d_lustre_invalid(*dentryp)) { + if (bits & MDS_INODELOCK_LOOKUP) { d_lustre_revalidate(*dentryp); - ll_update_dir_depth(dir, (*dentryp)->d_inode); + if (S_ISDIR(inode->i_mode)) + ll_update_dir_depth_dmv(dir, *dentryp); } ll_intent_release(&it); diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index d069041..bf1982d 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -5412,6 +5412,17 @@ static int lod_get_default_lov_striping(const struct lu_env *env, RETURN(rc); } +static inline void lod_lum2lds(struct lod_default_striping *lds, + const struct lmv_user_md *lum) +{ + lds->lds_dir_def_stripe_count = le32_to_cpu(lum->lum_stripe_count); + lds->lds_dir_def_stripe_offset = le32_to_cpu(lum->lum_stripe_offset); + lds->lds_dir_def_hash_type = le32_to_cpu(lum->lum_hash_type); + lds->lds_dir_def_max_inherit = lum->lum_max_inherit; + lds->lds_dir_def_max_inherit_rr = lum->lum_max_inherit_rr; + lds->lds_dir_def_striping_set = 1; +} + /** * Get default directory striping. * @@ -5439,16 +5450,7 @@ static int lod_get_default_lmv_striping(const struct lu_env *env, struct lod_thread_info *info = lod_env_info(env); lmu = info->lti_ea_store; - - lds->lds_dir_def_stripe_count = - le32_to_cpu(lmu->lum_stripe_count); - lds->lds_dir_def_stripe_offset = - le32_to_cpu(lmu->lum_stripe_offset); - lds->lds_dir_def_hash_type = - le32_to_cpu(lmu->lum_hash_type); - lds->lds_dir_def_max_inherit = lmu->lum_max_inherit; - lds->lds_dir_def_max_inherit_rr = lmu->lum_max_inherit_rr; - lds->lds_dir_def_striping_set = 1; + lod_lum2lds(lds, lmu); } return 0; @@ -5468,6 +5470,7 @@ static int lod_get_default_lmv_striping(const struct lu_env *env, */ static int lod_get_default_striping(const struct lu_env *env, struct lod_object *lo, + struct dt_allocation_hint *ah, struct lod_default_striping *lds) { int rc, rc1; @@ -5482,9 +5485,15 @@ static int lod_get_default_striping(const struct lu_env *env, lds->lds_def_striping_set = 0; } - rc1 = lod_get_default_lmv_striping(env, lo, lds); - if (rc == 0 && rc1 < 0) - rc = rc1; + if (ah->dah_eadata_is_dmv) { + lod_lum2lds(lds, ah->dah_eadata); + } else if (ah->dah_dmv_imp_inherit) { + lds->lds_dir_def_striping_set = 0; + } else { + rc1 = lod_get_default_lmv_striping(env, lo, lds); + if (rc == 0 && rc1 < 0) + rc = rc1; + } return rc; } @@ -5683,14 +5692,15 @@ static void lod_ah_init(const struct lu_env *env, } if (likely(lp != NULL)) - lod_get_default_striping(env, lp, lds); + lod_get_default_striping(env, lp, ah, lds); /* It should always honour the specified stripes */ /* Note: old client (< 2.7)might also do lfs mkdir, whose EA * will have old magic. In this case, we should ignore the * stripe count and try to create dir by default stripe. */ - if (ah->dah_eadata != NULL && ah->dah_eadata_len != 0 && + if (ah->dah_eadata && ah->dah_eadata_len && + !ah->dah_eadata_is_dmv && (le32_to_cpu(lum1->lum_magic) == LMV_USER_MAGIC || le32_to_cpu(lum1->lum_magic) == LMV_USER_MAGIC_SPECIFIC)) { lc->ldo_dir_stripe_count = @@ -5761,6 +5771,13 @@ static void lod_ah_init(const struct lu_env *env, /* set count 0 to create normal directory */ if (lc->ldo_dir_stripe_count == 1) lc->ldo_dir_stripe_count = 0; + + /* do not save default LMV on server */ + if (ah->dah_dmv_imp_inherit) { + lds->lds_dir_def_striping_set = 0; + if (!lds->lds_def_striping_set) + lc->ldo_def_striping = NULL; + } } /* shrink the stripe count to max_mdt_stripecount if it is -1 diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index 5d571d3..39dbd21 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -224,6 +224,13 @@ void mdc_create_pack(struct req_capsule *pill, struct md_op_data *op_data, * create only, and don't restripe if object exists. */ flags |= MDS_OPEN_CREAT; + if (op_data->op_bias & MDS_CREATE_DEFAULT_LMV) { + /* borrow MDS_OPEN_DEFAULT_LMV flag to indicate parent default + * LMV is packed in create request. + */ + flags |= MDS_OPEN_DEFAULT_LMV; + LASSERT(data); + } set_mrc_cr_flags(rec, flags); rec->cr_bias = op_data->op_bias; rec->cr_umask = current_umask(); diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index 46ae4de..e8aa9da 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -2777,12 +2777,15 @@ use_bigger_buffer: if (rc < 0) GOTO(out_stop, rc); - if (S_ISDIR(attr->la_mode)) { + /* adjust stripe count to 0 for 'lfs mkdir -c 1 ...' to avoid creating + * 1-stripe directory, MDS_OPEN_DEFAULT_LMV means ea is default LMV. + */ + if (unlikely(S_ISDIR(attr->la_mode) && spec->u.sp_ea.eadata && + !(spec->sp_cr_flags & MDS_OPEN_DEFAULT_LMV))) { struct lmv_user_md *lmu = spec->u.sp_ea.eadata; - /* - * migrate may create 1-stripe directory, so lod_ah_init() - * doesn't adjust stripe count from lmu. + /* migrate may create 1-stripe directory, adjust stripe count + * before lod_ah_init(). */ if (lmu && lmu->lum_stripe_count == cpu_to_le32(1)) lmu->lum_stripe_count = 0; diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index 5243a64..e0cf2db 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -3232,10 +3232,14 @@ void mdd_object_make_hint(const struct lu_env *env, struct mdd_object *parent, /* For striped directory, give striping EA to lod_ah_init, which will * decide the stripe_offset and stripe count by it. */ - if (S_ISDIR(attr->la_mode) && - unlikely(spec != NULL && spec->sp_cr_flags & MDS_OPEN_HAS_EA)) { - hint->dah_eadata = spec->u.sp_ea.eadata; - hint->dah_eadata_len = spec->u.sp_ea.eadatalen; + if (S_ISDIR(attr->la_mode) && spec) { + if (unlikely(spec->sp_cr_flags & MDS_OPEN_HAS_EA)) { + hint->dah_eadata = spec->u.sp_ea.eadata; + hint->dah_eadata_len = spec->u.sp_ea.eadatalen; + if (spec->sp_cr_flags & MDS_OPEN_DEFAULT_LMV) + hint->dah_eadata_is_dmv = 1; + } + hint->dah_dmv_imp_inherit = spec->sp_dmv_imp_inherit; } else if (S_ISREG(attr->la_mode) && spec->sp_cr_flags & MDS_OPEN_APPEND) { hint->dah_append_stripe_count = mdd->mdd_append_stripe_count; diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 28334a9..a8f86b9 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -6219,11 +6219,13 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, m->mdt_enable_remote_dir_gid = 0; m->mdt_enable_remote_rename = 1; m->mdt_enable_striped_dir = 1; + m->mdt_enable_dmv_implicit_inherit = 1; m->mdt_dir_restripe_nsonly = 1; m->mdt_max_mod_rpcs_in_flight = OBD_MAX_RIF_DEFAULT; atomic_set(&m->mdt_mds_mds_conns, 0); atomic_set(&m->mdt_async_commit_count, 0); + atomic_set(&m->mdt_dmv_old_client_count, 0); m->mdt_lu_dev.ld_ops = &mdt_lu_ops; m->mdt_lu_dev.ld_obd = obd; @@ -6882,6 +6884,12 @@ static int mdt_connect_internal(const struct lu_env *env, if (!mdt->mdt_lut.lut_dt_conf.ddp_has_lseek_data_hole) data->ocd_connect_flags2 &= ~OBD_CONNECT2_LSEEK; + if (!OCD_HAS_FLAG(data, MDS_MDS) && !OCD_HAS_FLAG(data, LIGHTWEIGHT) && + !OCD_HAS_FLAG2(data, DMV_IMP_INHERIT)) { + atomic_inc(&mdt->mdt_dmv_old_client_count); + mdt->mdt_enable_dmv_implicit_inherit = 0; + } + return 0; } @@ -7002,6 +7010,8 @@ static int mdt_export_cleanup(struct obd_export *exp) static int mdt_obd_disconnect(struct obd_export *exp) { + struct obd_connect_data *data = &exp->exp_connect_data; + struct mdt_device *mdt = mdt_dev(exp->exp_obd->obd_lu_dev); int rc; ENTRY; @@ -7012,13 +7022,14 @@ static int mdt_obd_disconnect(struct obd_export *exp) if (!(exp->exp_flags & OBD_OPT_FORCE)) tgt_grant_sanity_check(exp->exp_obd, __func__); - if ((exp_connect_flags(exp) & OBD_CONNECT_MDS_MDS) && - !(exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT)) { - struct mdt_device *mdt = mdt_dev(exp->exp_obd->obd_lu_dev); + if (OCD_HAS_FLAG(data, MDS_MDS) && !OCD_HAS_FLAG(data, LIGHTWEIGHT) && + atomic_dec_and_test(&mdt->mdt_mds_mds_conns)) + mdt_disable_slc(mdt); - if (atomic_dec_and_test(&mdt->mdt_mds_mds_conns)) - mdt_disable_slc(mdt); - } + if (!OCD_HAS_FLAG(data, MDS_MDS) && !OCD_HAS_FLAG(data, LIGHTWEIGHT) && + !OCD_HAS_FLAG2(data, DMV_IMP_INHERIT) && + atomic_dec_and_test(&mdt->mdt_dmv_old_client_count)) + mdt->mdt_enable_dmv_implicit_inherit = 1; rc = server_disconnect_export(exp); if (rc != 0) diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index b11a29f..342f8d3 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -309,7 +309,12 @@ struct mdt_device { mdt_readonly:1, mdt_skip_lfsck:1, /* dir restripe migrate dirent only */ - mdt_dir_restripe_nsonly:1; + mdt_dir_restripe_nsonly:1, + /* this is enabled by default, but once an + * old client joins, disable this to handle + * inherited default LMV on server. + */ + mdt_enable_dmv_implicit_inherit:1; /* user with gid can create remote/striped * dir, and set default dir stripe */ @@ -346,6 +351,9 @@ struct mdt_device { struct mdt_object *mdt_md_root; struct mdt_dir_restriper mdt_restriper; + + /* count of old clients that doesn't support DMV implicite inherit */ + atomic_t mdt_dmv_old_client_count; }; #define MDT_SERVICE_WATCHDOG_FACTOR (2) diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c index 9ec750d..7b0d2bc 100644 --- a/lustre/mdt/mdt_lib.c +++ b/lustre/mdt/mdt_lib.c @@ -1361,14 +1361,29 @@ static int mdt_create_unpack(struct mdt_thread_info *info) RETURN(-EFAULT); } else { req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_ACL); - if (S_ISDIR(attr->la_mode) && - req_capsule_get_size(pill, &RMF_EADATA, RCL_CLIENT) > 0) { - sp->u.sp_ea.eadata = - req_capsule_client_get(pill, &RMF_EADATA); - sp->u.sp_ea.eadatalen = - req_capsule_get_size(pill, &RMF_EADATA, - RCL_CLIENT); - sp->sp_cr_flags |= MDS_OPEN_HAS_EA; + if (S_ISDIR(attr->la_mode)) { + struct obd_export *exp = mdt_info_req(info)->rq_export; + + sp->sp_dmv_imp_inherit = + info->mti_mdt->mdt_enable_dmv_implicit_inherit; + if (req_capsule_get_size(pill, &RMF_EADATA, RCL_CLIENT) + > 0) { + sp->u.sp_ea.eadata = + req_capsule_client_get(pill, + &RMF_EADATA); + sp->u.sp_ea.eadatalen = + req_capsule_get_size(pill, &RMF_EADATA, + RCL_CLIENT); + sp->sp_cr_flags |= MDS_OPEN_HAS_EA; + } + if (OCD_HAS_FLAG2(&exp->exp_connect_data, + DMV_IMP_INHERIT)) { + if ((sp->sp_cr_flags & MDS_OPEN_DEFAULT_LMV) && + !(sp->sp_cr_flags & MDS_OPEN_HAS_EA)) + RETURN(-EPROTO); + } else if (sp->sp_cr_flags & MDS_OPEN_DEFAULT_LMV) { + RETURN(-EPROTO); + } } } diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index b5634ef..c813c05 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -1366,6 +1366,38 @@ static ssize_t enable_remote_subdir_mount_store(struct kobject *kobj, } LUSTRE_RW_ATTR(enable_remote_subdir_mount); +static ssize_t enable_dmv_implicit_inherit_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", + mdt->mdt_enable_dmv_implicit_inherit); +} + +static ssize_t enable_dmv_implicit_inherit_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, + size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + bool val; + int rc; + + rc = kstrtobool(buffer, &val); + if (rc) + return rc; + + mdt->mdt_enable_dmv_implicit_inherit = val; + return count; +} +LUSTRE_RW_ATTR(enable_dmv_implicit_inherit); + /** * Show if the OFD enforces T10PI checksum. * @@ -1578,6 +1610,7 @@ static struct attribute *mdt_attrs[] = { &lustre_attr_checksum_t10pi_enforce.attr, &lustre_attr_enable_remote_subdir_mount.attr, &lustre_attr_max_mod_rpcs_in_flight.attr, + &lustre_attr_enable_dmv_implicit_inherit.attr, NULL, }; diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 0007a46..ce0e657 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -539,8 +539,12 @@ static int mdt_create(struct mdt_thread_info *info) if (!fid_is_md_operative(rr->rr_fid1)) RETURN(-EPERM); + /* MDS_OPEN_DEFAULT_LMV means eadata is parent default LMV, which is set + * if client maintains inherited default LMV + */ if (S_ISDIR(ma->ma_attr.la_mode) && - spec->u.sp_ea.eadata != NULL && spec->u.sp_ea.eadatalen != 0) { + spec->u.sp_ea.eadata != NULL && spec->u.sp_ea.eadatalen != 0 && + !(spec->sp_cr_flags & MDS_OPEN_DEFAULT_LMV)) { const struct lmv_user_md *lum = spec->u.sp_ea.eadata; struct obd_export *exp = mdt_info_req(info)->rq_export; diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 6c25239..9ed044d 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -649,7 +649,7 @@ static const char *const obd_connect_names[] = { "atomic_open_lock", /* 0x4000000 */ "name_encryption", /* 0x8000000 */ "mkdir_replay", /* 0x10000000 */ - "dmv_inherit", /* 0x20000000 */ + "dmv_imp_inherit", /* 0x20000000 */ "encryption_fid2path", /* 0x40000000 */ NULL }; diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 63ff323..88b67a3 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -2458,6 +2458,8 @@ void lustre_assert_wire_constants(void) (unsigned)MDS_FID_OP); LASSERTF(MDS_MIGRATE_NSONLY == 0x00800000UL, "found 0x%.8xUL\n", (unsigned)MDS_MIGRATE_NSONLY); + LASSERTF(MDS_CREATE_DEFAULT_LMV == 0x01000000UL, "found 0x%.8xUL\n", + (unsigned)MDS_CREATE_DEFAULT_LMV); /* Checks for struct mdt_body */ LASSERTF((int)sizeof(struct mdt_body) == 216, "found %lld\n", diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 78dd8bb..eda0b4b 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -6162,6 +6162,116 @@ test_113 () { } run_test 113 "check servers of specified fs" +check_default_lmv() { + local dir=$1 + + local enabled + local dmv + local index + local count + local inherit + local inherit_rr + local raw + + enabled=$(do_facet mds1 \ + $LCTL get_param -n mdt.*-MDT0000*.enable_dmv_implicit_inherit) + + dmv=$($LFS getdirstripe -D $dir) + echo $dir $dmv + index=$(echo $dmv | awk '{ print $4 }') + (( index == $2 )) || error "$dir default stripe index $index != $2" + + count=$(echo $dmv | awk '{ print $2 }') + (( count == $3 )) || error "$dir default stripe count $count != $3" + + inherit=$(echo $dmv | awk '{ print $8 }') + (( inherit == $4 )) || error "$dir default max-inherit $inherit != $4" + + if [ $index -eq -1 ]; then + inherit_rr=$(echo $dmv | awk '{ print $10 }') + (( inherit_rr == $5 )) || + error "$dir default max-inherit-rr $inherit_rr != $5" + fi + + # with --raw, print default LMV stored in inode, otherwise print nothing + raw=$($LFS getdirstripe -D --raw $dir) + if (( enabled == 1 )); then + [ -z $raw ] || + error "implicit inherited DMV is printed with --raw" + else + # if disabled, dmv is stored in inode, which will always + # print max-inherit-rr + echo $dir $raw + [[ $raw =~ $dmv.* ]] || error "$dir raw $raw != dmv $dmv" + fi +} + +test_dmv_imp_inherit() { + local dmv + local raw + local index + local count + local inherit + local inherit_rr + + rm -rf $DIR/$tdir || error "rm $tdir failed" + mkdir -p $DIR/$tdir || error "mkdir $tdir failed" + + # set dir default LMV + $LFS setdirstripe -D -c1 -X4 --max-inherit-rr 2 $DIR/$tdir || + error "setdirstripe -D $tdir failed" + dmv=$($LFS getdirstripe -D $DIR/$tdir) + raw=$($LFS getdirstripe -D --raw $DIR/$tdir) + [ "$dmv" == "$raw" ] || error "$dmv != $raw" + + mkdir -p $DIR/$tdir/l1/l2/l3 || error "mkdir $DIR/$tdir/l1/l2/l3 failed" + check_default_lmv $DIR/$tdir/l1/l2/l3 -1 1 1 0 + check_default_lmv $DIR2/$tdir/l1/l2/l3 -1 1 1 0 + + # below tests are valid only when this feature is enabled + local enabled=$(do_facet mds1 \ + $LCTL get_param -n mdt.*-MDT0000*.enable_dmv_implicit_inherit) + + (( enabled == 1 )) || return 0 + + # set l2 default LMV, dmv of l3 should change immediately + $LFS setdirstripe -D -i1 -c2 -X4 $DIR/$tdir/l1/l2 || + error "setdirstripe -D $tdir/l1/l2 failed" + + check_default_lmv $DIR/$tdir/l1/l2/l3 1 2 3 + check_default_lmv $DIR2/$tdir/l1/l2/l3 1 2 3 + + # change tdir default LMV, dmv of l3 should be unchanged because dmv + # of l2 is explicitly set + $LFS setdirstripe -D -i2 -c2 -X3 $DIR/$tdir || + error "setdirstripe -D $tdir failed" + + check_default_lmv $DIR/$tdir/l1 2 2 2 + check_default_lmv $DIR2/$tdir/l1 2 2 2 + check_default_lmv $DIR/$tdir/l1/l2/l3 1 2 3 + check_default_lmv $DIR2/$tdir/l1/l2/l3 1 2 3 +} + +test_114() { + (( MDSCOUNT >= 2 )) || + skip "We need at least 2 MDTs for this test" + + (( MDS1_VERSION >= $(version_code 2.15.55.45) )) || + skip "Need server version at least 2.15.54.45" + + test_dmv_imp_inherit + + # disable dmv_imp_inherit to simulate old client + local mdts=$(comma_list $(mdts_nodes)) + + do_nodes $mdts $LCTL set_param -n \ + mdt.*MDT*.enable_dmv_implicit_inherit=0 + test_dmv_imp_inherit + do_nodes $mdts $LCTL set_param -n \ + mdt.*MDT*.enable_dmv_implicit_inherit=1 +} +run_test 114 "implicit default LMV inherit" + log "cleanup: ======================================================" # kill and wait in each test only guarentee script finish, but command in script diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index b405982..d34b0ec 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -378,8 +378,8 @@ command_t cmdlist[] = { "or recursively for all directories in a directory tree.\n" "usage: getdirstripe [--mdt-count|-c] [--mdt-index|-m|-i]\n" " [--help|-h] [--hex-idx] [--mdt-hash|-H]\n" - " [--obd|-O UUID] [--recursive|-r] [--yaml|-y]\n" - " [--verbose|-v] [--default|-D]\n" + " [--obd|-O UUID] [--recursive|-r] [--raw|-R]\n" + " [--yaml|-y] [--verbose|-v] [--default|-D]\n" " [--max-inherit|-X]\n" " [--max-inherit-rr] ..."}, {"mkdir", lfs_setdirstripe, 0, @@ -6447,6 +6447,7 @@ static int lfs_getdirstripe(int argc, char **argv) { .val = 'm', .name = "mdt-index", .has_arg = no_argument }, { .val = 'O', .name = "obd", .has_arg = required_argument }, { .val = 'r', .name = "recursive", .has_arg = no_argument }, + { .val = 'R', .name = "raw", .has_arg = no_argument }, { .val = 'T', .name = "mdt-count", .has_arg = no_argument }, { .val = 'v', .name = "verbose", .has_arg = no_argument }, { .val = 'X', .name = "max-inherit", .has_arg = no_argument }, @@ -6459,7 +6460,7 @@ static int lfs_getdirstripe(int argc, char **argv) param.fp_get_lmv = 1; while ((c = getopt_long(argc, argv, - "cDhHimO:rtTvXy", long_opts, NULL)) != -1) { + "cDhHimO:rRtTvXy", long_opts, NULL)) != -1) { switch (c) { case 'c': case 'T': @@ -6497,6 +6498,9 @@ static int lfs_getdirstripe(int argc, char **argv) case 'r': param.fp_recursive = 1; break; + case 'R': + param.fp_raw = 1; + break; case 'v': param.fp_verbose |= VERBOSE_DEFAULT; param.fp_verbose |= VERBOSE_DETAIL; diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index d4887db..b9a2f8e 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -1561,6 +1561,8 @@ again: } else { param->fp_lmv_md->lum_magic = LMV_MAGIC_V1; } + if (param->fp_raw) + param->fp_lmv_md->lum_type = LMV_TYPE_RAW; ret = ioctl(*d, LL_IOC_LMV_GETSTRIPE, param->fp_lmv_md); @@ -6146,6 +6148,8 @@ static int cb_getstripe(char *path, int p, int *dp, void *data, if (param->fp_get_default_lmv) { struct lmv_user_md *lum = param->fp_lmv_md; + if (param->fp_raw) + goto out; lum->lum_magic = LMV_USER_MAGIC; lum->lum_stripe_count = 0; lum->lum_stripe_offset = LMV_OFFSET_DEFAULT; diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 15fb0ec..faf0c0b 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -1195,6 +1195,7 @@ check_mds_op_bias(void) CHECK_VALUE_X(MDS_SETSTRIPE_CREATE); CHECK_VALUE_X(MDS_FID_OP); CHECK_VALUE_X(MDS_MIGRATE_NSONLY); + CHECK_VALUE_X(MDS_CREATE_DEFAULT_LMV); } static void diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 9822479..c5dd604 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -2521,6 +2521,8 @@ void lustre_assert_wire_constants(void) (unsigned)MDS_FID_OP); LASSERTF(MDS_MIGRATE_NSONLY == 0x00800000UL, "found 0x%.8xUL\n", (unsigned)MDS_MIGRATE_NSONLY); + LASSERTF(MDS_CREATE_DEFAULT_LMV == 0x01000000UL, "found 0x%.8xUL\n", + (unsigned)MDS_CREATE_DEFAULT_LMV); /* Checks for struct mdt_body */ LASSERTF((int)sizeof(struct mdt_body) == 216, "found %lld\n", -- 1.8.3.1