LU-18942 obdclass: rework limits for zfs

author Alexey Lyashkov <alexey.lyashkov@hpe.com>

Wed, 23 Apr 2025 08:23:53 +0000 (11:23 +0300)

committer Oleg Drokin <green@whamcloud.com>

Tue, 17 Jun 2025 02:49:14 +0000 (02:49 +0000)
author Alexey Lyashkov <alexey.lyashkov@hpe.com>
Wed, 23 Apr 2025 08:23:53 +0000 (11:23 +0300)
committer Oleg Drokin <green@whamcloud.com>
Tue, 17 Jun 2025 02:49:14 +0000 (02:49 +0000)
diff --git a/libcfs/include/libcfs/linux/linux-mem.h b/libcfs/include/libcfs/linux/linux-mem.h

index 18a900e..fa35779 100644 (file)
--- a/libcfs/include/libcfs/linux/linux-mem.h
+++ b/libcfs/include/libcfs/linux/linux-mem.h
@@ -28,15 +28,7 @@
  #include <linux/sched/mm.h>
  #endif
  
-#ifdef HAVE_TOTALRAM_PAGES_AS_FUNC
- #ifndef cfs_totalram_pages
-  #define cfs_totalram_pages() totalram_pages()
- #endif
-#else
- #ifndef cfs_totalram_pages
-  #define cfs_totalram_pages() totalram_pages
- #endif
-#endif
+unsigned long cfs_totalram_pages(void);
  
  #ifndef HAVE_MEMALLOC_RECLAIM
  static inline unsigned int memalloc_noreclaim_save(void)
diff --git a/libcfs/libcfs/linux/linux-prim.c b/libcfs/libcfs/linux/linux-prim.c

index d50db3d..bbb24c7 100644 (file)
--- a/libcfs/libcfs/linux/linux-prim.c
+++ b/libcfs/libcfs/linux/linux-prim.c
@@ -408,3 +408,22 @@ char *nla_strdup(const struct nlattr *nla, gfp_t flags)
  }
  EXPORT_SYMBOL(nla_strdup);
  #endif /* !HAVE_NLA_STRDUP */
+
+static unsigned int libcfs_reserved_cache;
+module_param(libcfs_reserved_cache, int, 0644);
+MODULE_PARM_DESC(libcfs_reserved_cache, "system page cache reservation in mbytes (for arc cache)");
+
+#ifdef HAVE_TOTALRAM_PAGES_AS_FUNC
+  #define _totalram_pages() totalram_pages()
+#else
+  #define _totalram_pages() totalram_pages
+#endif
+
+unsigned long cfs_totalram_pages(void)
+{
+       if (libcfs_reserved_cache > _totalram_pages()/2)
+               libcfs_reserved_cache = _totalram_pages() / 2;
+
+       return _totalram_pages() - libcfs_reserved_cache;
+}
+EXPORT_SYMBOL(cfs_totalram_pages);
diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h

index 1e83626..4b57649 100644 (file)
--- a/lustre/include/lu_object.h
+++ b/lustre/include/lu_object.h
@@ -737,6 +737,8 @@ void lu_object_unhash(const struct lu_env *env, struct lu_object *o);
  int lu_site_purge_objects(const struct lu_env *env, struct lu_site *s, int nr,
                           int canblock);
  
+void lu_site_limit(const struct lu_env *env, struct lu_site *s, u64 limit);
+
  static inline int lu_site_purge(const struct lu_env *env, struct lu_site *s,
                                 int nr)
  {
diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c

index b77dec4..61b6a01 100644 (file)
--- a/lustre/obdclass/lu_object.c
+++ b/lustre/obdclass/lu_object.c
@@ -62,11 +62,9 @@ enum {
         LU_CACHE_PERCENT_DEFAULT = 20
  };
  
-#define        LU_CACHE_NR_MAX_ADJUST          512
+#define        LU_CACHE_NR_MAX_ADJUST          1024
  #define        LU_CACHE_NR_UNLIMITED           -1
  #define        LU_CACHE_NR_DEFAULT             LU_CACHE_NR_UNLIMITED
-/** This is set to roughly (20 * OSS_NTHRS_MAX) to prevent thrashing */
-#define        LU_CACHE_NR_ZFS_LIMIT           10240
  
  #define        LU_CACHE_NR_MIN                 4096
  #define        LU_CACHE_NR_MAX                 0x80000000UL
@@ -622,23 +620,30 @@ int lu_object_invariant(const struct lu_object *o)
   * maximum number of objects is capped by LU_CACHE_MAX_ADJUST.  This ensures
   * that many concurrent threads will not accidentally purge the entire cache.
   */
-static void lu_object_limit(const struct lu_env *env,
-                           struct lu_device *dev)
+void lu_site_limit(const struct lu_env *env, struct lu_site *s,
+                  u64 nr)
  {
-       u64 size, nr;
+       u64 size;
  
-       if (lu_cache_nr == LU_CACHE_NR_UNLIMITED)
+       if (nr == LU_CACHE_NR_UNLIMITED)
                 return;
  
-       size = atomic_read(&dev->ld_site->ls_obj_hash.nelems);
-       nr = (u64)lu_cache_nr;
+       size = atomic_read(&s->ls_obj_hash.nelems);
         if (size <= nr)
                 return;
  
-       lu_site_purge_objects(env, dev->ld_site,
+       lu_site_purge_objects(env, s,
                               min_t(u64, size - nr, LU_CACHE_NR_MAX_ADJUST),
                               0);
  }
+EXPORT_SYMBOL(lu_site_limit);
+
+static void lu_object_limit(const struct lu_env *env,
+                           struct lu_device *dev)
+{
+       lu_site_limit(env, dev->ld_site, (u64)lu_cache_nr);
+}
+
  
  static struct lu_object *htable_lookup(const struct lu_env *env,
                                        struct lu_device *dev,
@@ -1001,17 +1006,6 @@ static void lu_htable_limits(struct lu_device *top)
         unsigned long cache_size;
  
         /*
-        * For ZFS based OSDs the cache should be disabled by default.  This
-        * allows the ZFS ARC maximum flexibility in determining what buffers
-        * to cache.  If Lustre has objects or buffer which it wants to ensure
-        * always stay cached it must maintain a hold on them.
-        */
-       if (strcmp(top->ld_type->ldt_name, LUSTRE_OSD_ZFS_NAME) == 0) {
-               lu_cache_nr = LU_CACHE_NR_ZFS_LIMIT;
-               return;
-       }
-
-       /*
          * Calculate hash table size, assuming that we want reasonable
          * performance when 20% of total memory is occupied by cache of
          * lu_objects.
diff --git a/lustre/osd-zfs/osd_handler.c b/lustre/osd-zfs/osd_handler.c

index 1853b4c..6ef6fd5 100644 (file)
--- a/lustre/osd-zfs/osd_handler.c
+++ b/lustre/osd-zfs/osd_handler.c
@@ -88,7 +88,7 @@ static void arc_prune_func(int64_t bytes, void *private)
                 return;
         }
  
-       lu_site_purge(&env, site, (bytes >> 10));
+       lu_site_limit(&env, site, (bytes >> 10));
  
         lu_env_fini(&env);
  }
author	Alexey Lyashkov <alexey.lyashkov@hpe.com>
	Wed, 23 Apr 2025 08:23:53 +0000 (11:23 +0300)
committer	Oleg Drokin <green@whamcloud.com>
	Tue, 17 Jun 2025 02:49:14 +0000 (02:49 +0000)
libcfs/include/libcfs/linux/linux-mem.h		patch \| blob \| history
libcfs/libcfs/linux/linux-prim.c		patch \| blob \| history
lustre/include/lu_object.h		patch \| blob \| history
lustre/obdclass/lu_object.c		patch \| blob \| history
lustre/osd-zfs/osd_handler.c		patch \| blob \| history