From 915239f279f9ae3a5d510aed4763a263a9156628 Mon Sep 17 00:00:00 2001 From: Alexey Lyashkov Date: Wed, 23 Apr 2025 11:23:53 +0300 Subject: [PATCH] LU-18942 obdclass: rework limits for zfs ZFS ARC is uncontrolled by Linux memory, so this size should not be accounted for all caches, not just the lu_object cache. Let's reduce the number of objects freed in a single batch to avoid high CPU usage in ARC prune threads and increase latency in providing free space. Test-Parameters: trivial HPE-bug-id: LUS-12814, LUS-12813 Fixes: 79b4ae9139c ("LU-1305 osd: osd_handler") Fixes: 0123baecc4e ("LU-5164 osd: Limit lu_object cache") Signed-off-by: Alexey Lyashkov Change-Id: I5342149b185c61c56087d970f26eb4f197a597ef Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/58918 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- libcfs/include/libcfs/linux/linux-mem.h | 10 +-------- libcfs/libcfs/linux/linux-prim.c | 19 +++++++++++++++++ lustre/include/lu_object.h | 2 ++ lustre/obdclass/lu_object.c | 36 ++++++++++++++------------------- lustre/osd-zfs/osd_handler.c | 2 +- 5 files changed, 38 insertions(+), 31 deletions(-) diff --git a/libcfs/include/libcfs/linux/linux-mem.h b/libcfs/include/libcfs/linux/linux-mem.h index 18a900e..fa35779 100644 --- a/libcfs/include/libcfs/linux/linux-mem.h +++ b/libcfs/include/libcfs/linux/linux-mem.h @@ -28,15 +28,7 @@ #include #endif -#ifdef HAVE_TOTALRAM_PAGES_AS_FUNC - #ifndef cfs_totalram_pages - #define cfs_totalram_pages() totalram_pages() - #endif -#else - #ifndef cfs_totalram_pages - #define cfs_totalram_pages() totalram_pages - #endif -#endif +unsigned long cfs_totalram_pages(void); #ifndef HAVE_MEMALLOC_RECLAIM static inline unsigned int memalloc_noreclaim_save(void) diff --git a/libcfs/libcfs/linux/linux-prim.c b/libcfs/libcfs/linux/linux-prim.c index d50db3d..bbb24c7 100644 --- a/libcfs/libcfs/linux/linux-prim.c +++ b/libcfs/libcfs/linux/linux-prim.c @@ -408,3 +408,22 @@ char *nla_strdup(const struct nlattr *nla, gfp_t flags) } EXPORT_SYMBOL(nla_strdup); #endif /* !HAVE_NLA_STRDUP */ + +static unsigned int libcfs_reserved_cache; +module_param(libcfs_reserved_cache, int, 0644); +MODULE_PARM_DESC(libcfs_reserved_cache, "system page cache reservation in mbytes (for arc cache)"); + +#ifdef HAVE_TOTALRAM_PAGES_AS_FUNC + #define _totalram_pages() totalram_pages() +#else + #define _totalram_pages() totalram_pages +#endif + +unsigned long cfs_totalram_pages(void) +{ + if (libcfs_reserved_cache > _totalram_pages()/2) + libcfs_reserved_cache = _totalram_pages() / 2; + + return _totalram_pages() - libcfs_reserved_cache; +} +EXPORT_SYMBOL(cfs_totalram_pages); diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index 1e83626..4b57649 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -737,6 +737,8 @@ void lu_object_unhash(const struct lu_env *env, struct lu_object *o); int lu_site_purge_objects(const struct lu_env *env, struct lu_site *s, int nr, int canblock); +void lu_site_limit(const struct lu_env *env, struct lu_site *s, u64 limit); + static inline int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr) { diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c index b77dec4..61b6a01 100644 --- a/lustre/obdclass/lu_object.c +++ b/lustre/obdclass/lu_object.c @@ -62,11 +62,9 @@ enum { LU_CACHE_PERCENT_DEFAULT = 20 }; -#define LU_CACHE_NR_MAX_ADJUST 512 +#define LU_CACHE_NR_MAX_ADJUST 1024 #define LU_CACHE_NR_UNLIMITED -1 #define LU_CACHE_NR_DEFAULT LU_CACHE_NR_UNLIMITED -/** This is set to roughly (20 * OSS_NTHRS_MAX) to prevent thrashing */ -#define LU_CACHE_NR_ZFS_LIMIT 10240 #define LU_CACHE_NR_MIN 4096 #define LU_CACHE_NR_MAX 0x80000000UL @@ -622,23 +620,30 @@ int lu_object_invariant(const struct lu_object *o) * maximum number of objects is capped by LU_CACHE_MAX_ADJUST. This ensures * that many concurrent threads will not accidentally purge the entire cache. */ -static void lu_object_limit(const struct lu_env *env, - struct lu_device *dev) +void lu_site_limit(const struct lu_env *env, struct lu_site *s, + u64 nr) { - u64 size, nr; + u64 size; - if (lu_cache_nr == LU_CACHE_NR_UNLIMITED) + if (nr == LU_CACHE_NR_UNLIMITED) return; - size = atomic_read(&dev->ld_site->ls_obj_hash.nelems); - nr = (u64)lu_cache_nr; + size = atomic_read(&s->ls_obj_hash.nelems); if (size <= nr) return; - lu_site_purge_objects(env, dev->ld_site, + lu_site_purge_objects(env, s, min_t(u64, size - nr, LU_CACHE_NR_MAX_ADJUST), 0); } +EXPORT_SYMBOL(lu_site_limit); + +static void lu_object_limit(const struct lu_env *env, + struct lu_device *dev) +{ + lu_site_limit(env, dev->ld_site, (u64)lu_cache_nr); +} + static struct lu_object *htable_lookup(const struct lu_env *env, struct lu_device *dev, @@ -1001,17 +1006,6 @@ static void lu_htable_limits(struct lu_device *top) unsigned long cache_size; /* - * For ZFS based OSDs the cache should be disabled by default. This - * allows the ZFS ARC maximum flexibility in determining what buffers - * to cache. If Lustre has objects or buffer which it wants to ensure - * always stay cached it must maintain a hold on them. - */ - if (strcmp(top->ld_type->ldt_name, LUSTRE_OSD_ZFS_NAME) == 0) { - lu_cache_nr = LU_CACHE_NR_ZFS_LIMIT; - return; - } - - /* * Calculate hash table size, assuming that we want reasonable * performance when 20% of total memory is occupied by cache of * lu_objects. diff --git a/lustre/osd-zfs/osd_handler.c b/lustre/osd-zfs/osd_handler.c index 1853b4c..6ef6fd5 100644 --- a/lustre/osd-zfs/osd_handler.c +++ b/lustre/osd-zfs/osd_handler.c @@ -88,7 +88,7 @@ static void arc_prune_func(int64_t bytes, void *private) return; } - lu_site_purge(&env, site, (bytes >> 10)); + lu_site_limit(&env, site, (bytes >> 10)); lu_env_fini(&env); } -- 1.8.3.1