From ed0a3ec46fa73b8287ce99e1f307462ef5ee6970 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Sat, 4 Nov 2023 09:32:59 -0400 Subject: [PATCH] LU-17307 mdt: get dirent count by request Add MA_DIRENT_CNT/LA_DIRENT_CNT to notify osd to get dirent count. Set it in mdt_getattr_name_lock() and when auto-split is enabled so it won't cause overhead when auto-split is disabled, and change oo_dirent_count type to atomic_t so the result does not become inaccurate over time from repeated addition/removal (which may be used to know whether directory is empty or compare directories in the future). In osd_dirent_count() set oo_dirent_count to 0 before iteration to avoid multiple threads iterate at the same time, which means the result may not be accurate in this case, but it will be eventually. Lustre-change: https://review.whamcloud.com/53229 Lustre-commit: TBD (from 50080036674faecfe8a94ebcbb0bdbdbeddac53d) Fixes: 03a4431dac ("LU-11025 osd: osd_attr_get() returns dirent count") Signed-off-by: Lai Siyao Change-Id: I2be6c0dcfda1c98995a269585c5d8d781a8a3b42 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53275 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/include/lu_object.h | 2 +- lustre/include/md_object.h | 1 + lustre/include/uapi/linux/lustre/lustre_user.h | 1 + lustre/mdt/mdt_handler.c | 9 +++- lustre/osd-ldiskfs/osd_handler.c | 71 +++++++++++++++----------- lustre/osd-ldiskfs/osd_internal.h | 2 +- 6 files changed, 53 insertions(+), 33 deletions(-) diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index a8c72d2..6a696a4 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -460,7 +460,7 @@ struct lu_attr { __u64 la_dirent_count; }; -#define LU_DIRENT_COUNT_UNSET ~0ULL +#define LU_DIRENT_COUNT_UNSET -1 /** * Layer in the layered object. diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h index 039abce..8c8b4b9 100644 --- a/lustre/include/md_object.h +++ b/lustre/include/md_object.h @@ -76,6 +76,7 @@ enum ma_valid { MA_LMV_DEF = BIT(8), MA_SOM = BIT(9), MA_FORCE_LOG = BIT(10), /* forced close logged in mdt_mfd_close */ + MA_DIRENT_CNT = BIT(11), }; typedef enum { diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 9a0caeb..9d8b626 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -1569,6 +1569,7 @@ enum la_valid { LA_LSIZE = 1 << 17, /* 0x20000 */ LA_LBLOCKS = 1 << 18, /* 0x40000 */ LA_BTIME = 1 << 19, /* 0x80000 */ + LA_DIRENT_CNT = 1 << 20, /* 0x100000 */ /** * Attributes must be transmitted to OST objects */ diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index abc29b6..179549b 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -1245,6 +1245,10 @@ int mdt_attr_get_complex(struct mdt_thread_info *info, if (need & MA_INODE) { ma->ma_need = MA_INODE; + if (need & MA_DIRENT_CNT) + ma->ma_attr.la_valid |= LA_DIRENT_CNT; + else + ma->ma_attr.la_valid &= ~LA_DIRENT_CNT; rc = mo_attr_get(env, next, ma); if (rc) GOTO(out, rc); @@ -2059,6 +2063,9 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info, if (parent == NULL) RETURN(-ENOENT); + if (info->mti_mdt->mdt_enable_dir_auto_split) + ma_need |= MA_DIRENT_CNT; + if (info->mti_cross_ref) { /* Only getattr on the child. Parent is on another node. */ mdt_set_disposition(info, ldlm_rep, @@ -2100,7 +2107,7 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info, RETURN(-ENOENT); } - rc = mdt_getattr_internal(info, child, 0); + rc = mdt_getattr_internal(info, child, ma_need); if (unlikely(rc != 0)) { mdt_object_unlock(info, child, lhc, 1); RETURN(rc); diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index ca767a2..5d3e97e 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -1563,7 +1563,7 @@ static int osd_object_init(const struct lu_env *env, struct lu_object *l, result = 0; } } - obj->oo_dirent_count = LU_DIRENT_COUNT_UNSET; + atomic_set(&obj->oo_dirent_count, LU_DIRENT_COUNT_UNSET); LINVRNT(osd_invariant(obj)); return result; @@ -2871,34 +2871,36 @@ static int osd_dirent_count(const struct lu_env *env, struct dt_object *dt, LASSERT(S_ISDIR(obj->oo_inode->i_mode)); LASSERT(fid_is_namespace_visible(lu_object_fid(&obj->oo_dt.do_lu))); - if (obj->oo_dirent_count != LU_DIRENT_COUNT_UNSET) { - *count = obj->oo_dirent_count; - RETURN(0); - } - /* directory not initialized yet */ if (!dt->do_index_ops) { *count = 0; RETURN(0); } + spin_lock(&obj->oo_guard); + *count = atomic_read(&obj->oo_dirent_count); + if (*count == LU_DIRENT_COUNT_UNSET) + atomic_set(&obj->oo_dirent_count, 0); + spin_unlock(&obj->oo_guard); + if (*count != LU_DIRENT_COUNT_UNSET) + RETURN(0); + + *count = 0; iops = &dt->do_index_ops->dio_it; it = iops->init(env, dt, LUDA_64BITHASH); if (IS_ERR(it)) - RETURN(PTR_ERR(it)); + GOTO(out, rc = PTR_ERR(it)); rc = iops->load(env, it, 0); if (rc < 0) { - if (rc == -ENODATA) { + if (rc == -ENODATA) rc = 0; - *count = 0; - } - GOTO(out, rc); + GOTO(put, rc); } if (rc > 0) rc = iops->next(env, it); - for (*count = 0; rc == 0 || rc == -ESTALE; rc = iops->next(env, it)) { + for (; rc == 0 || rc == -ESTALE; rc = iops->next(env, it)) { if (rc == -ESTALE) continue; @@ -2907,13 +2909,22 @@ static int osd_dirent_count(const struct lu_env *env, struct dt_object *dt, (*count)++; } - if (rc == 1) { - obj->oo_dirent_count = *count; + if (rc == 1 || rc == -ESTALE) rc = 0; - } -out: +put: iops->put(env, it); iops->fini(env, it); +out: + /* If counting dirents failed, use the current count (if any). + * + * At worst this means the directory will not be split until the + * count can be completed successfully (remount or oo_dirent_count + * incremented by adding new entries). This avoids re-walking + * the whole directory on each access and hitting the same error. + */ + if (rc && *count == 0) + *count = LU_DIRENT_COUNT_UNSET; + atomic_set(&obj->oo_dirent_count, *count); RETURN(rc); } @@ -2945,8 +2956,11 @@ static int osd_attr_get(const struct lu_env *env, struct dt_object *dt, spin_unlock(&obj->oo_guard); if (S_ISDIR(obj->oo_inode->i_mode) && + (attr->la_valid & LA_DIRENT_CNT) && fid_is_namespace_visible(lu_object_fid(&dt->do_lu))) rc = osd_dirent_count(env, dt, &attr->la_dirent_count); + else + attr->la_valid &= ~LA_DIRENT_CNT; return rc; } @@ -3512,7 +3526,7 @@ static int osd_mkdir(struct osd_thread_info *info, struct osd_object *obj, oth = container_of(th, struct osd_thandle, ot_super); LASSERT(oth->ot_handle->h_transaction != NULL); if (fid_is_namespace_visible(lu_object_fid(&obj->oo_dt.do_lu))) - obj->oo_dirent_count = 0; + atomic_set(&obj->oo_dirent_count, 0); result = osd_mkfile(info, obj, mode, hint, th, attr); return result; @@ -5739,16 +5753,8 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt, rc = PTR_ERR(bh); } - if (!rc && fid_is_namespace_visible(lu_object_fid(&dt->do_lu)) && - obj->oo_dirent_count != LU_DIRENT_COUNT_UNSET) { - /* NB, dirent count may not be accurate, because it's counted - * without lock. - */ - if (obj->oo_dirent_count) - obj->oo_dirent_count--; - else - obj->oo_dirent_count = LU_DIRENT_COUNT_UNSET; - } + if (!rc && fid_is_namespace_visible(lu_object_fid(&dt->do_lu))) + atomic_dec_if_positive(&obj->oo_dirent_count); if (hlock != NULL) ldiskfs_htree_unlock(hlock); else @@ -6097,9 +6103,14 @@ static int osd_ea_add_rec(const struct lu_env *env, struct osd_object *pobj, hlock, th); } } - if (!rc && fid_is_namespace_visible(lu_object_fid(&pobj->oo_dt.do_lu)) - && pobj->oo_dirent_count != LU_DIRENT_COUNT_UNSET) - pobj->oo_dirent_count++; + if (!rc && fid_is_namespace_visible(lu_object_fid(&pobj->oo_dt.do_lu))){ + int dirent_count = atomic_read(&pobj->oo_dirent_count); + + /* avoid extremely unlikely 2B-entry directory overflow case */ + if (dirent_count != LU_DIRENT_COUNT_UNSET && + likely(dirent_count < INT_MAX - NR_CPUS)) + atomic_inc(&pobj->oo_dirent_count); + } if (hlock != NULL) ldiskfs_htree_unlock(hlock); diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index d00c6a5..380cc2b 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -158,12 +158,12 @@ struct osd_object { /* the i_flags in LMA */ __u32 oo_lma_flags; + atomic_t oo_dirent_count; const struct lu_env *oo_owner; struct list_head oo_xattr_list; struct lu_object_header *oo_header; - __u64 oo_dirent_count; }; struct osd_obj_seq { -- 1.8.3.1