From 39a465f6b3c11cbd892d456f68fdb867044d693c Mon Sep 17 00:00:00 2001 From: Qian Yingjin Date: Mon, 26 Aug 2024 15:50:14 +0800 Subject: [PATCH] LU-18169 osc: do not use deprecated NR_UNSTABLE_NFS A performance test script hanged the whole system with the configuration of NUMA and SLES15sp3 OS version. However, when we disalbe the unstable_check for unstable pages, the test can be passed: lctl set_param llite.*.unstable_stats=0 Found the root reason finally: we are using NR_UNSTABLE_NFS wrongly, it was deprecated on SLES15sp3 after backport from the upstream linux kernel with some its own modifications: NR_UNSTABLE_NFS, /* NFS unstable pages - DEPRECATED DO NOT USE */ This is a special bug in SLEL15 kernel. Thus we check whether NR_UNSTABLE_NFS is defined but was deprecated during autoconf checking. Moreover, the cgroups (memcg) does not work for the newer kernel, the reason is that NR_UNSTABLE_NFS was removed, and it is wrongly using NR_ZONE_WRITE_PENDING for memory accounting. According to the kernel patch: "mm/writeback: discard NR_UNSTABLE_NFS, use NR_WRITEBACK instead" kernel v5.8-rc1 commit: 8d92890bd6b8502d6aee4b37430ae6444ade7a8c it should account unstable pages in NR_WRITEBACK and WB_WRITEBACK. This patch fixes it accordingly. This patch also removes the unnecessary NR_ZONE_WRITE_PENDING check. Fixes: d4094475c99(LU-16699 osc: Prefer NR_ZONE_WRITE_PENDING) Signed-off-by: Qian Yingjin Change-Id: I28e36aa152f1c683ad717a94c779069caab6c54c Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56162 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Shaun Tancheff Reviewed-by: Andreas Dilger Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- libcfs/autoconf/lustre-libcfs.m4 | 66 ++++++++++++++++++++++------------- lustre/include/lustre_compat.h | 4 +-- lustre/osc/osc_page.c | 74 +++++++++++++++++++++++++++++++--------- 3 files changed, 101 insertions(+), 43 deletions(-) diff --git a/libcfs/autoconf/lustre-libcfs.m4 b/libcfs/autoconf/lustre-libcfs.m4 index 91542f9..a79e451 100644 --- a/libcfs/autoconf/lustre-libcfs.m4 +++ b/libcfs/autoconf/lustre-libcfs.m4 @@ -858,28 +858,6 @@ AC_DEFUN([LIBCFS_RHASHTABLE_INSERT_FAST], [ ]) # LIBCFS_RHASHTABLE_INSERT_FAST # -# LIBCFS_HAVE_NR_ZONE_WRITE_PENDING -# -# kernel v4.7-5966-g5a1c84b404a7 -# mm: remove reclaim and compaction retry approximations -# -AC_DEFUN([LIBCFS_SRC_HAVE_NR_ZONE_WRITE_PENDING], [ - LB2_LINUX_TEST_SRC([nr_zone_write_pending_exists], [ - #include - ],[ - enum zone_stat_item item = NR_ZONE_WRITE_PENDING; - (void)item; - ],[-Werror]) -]) -AC_DEFUN([LIBCFS_HAVE_NR_ZONE_WRITE_PENDING], [ - LB2_MSG_LINUX_TEST_RESULT([if NR_ZONE_WRITE_PENDING enum is available], - [nr_zone_write_pending_exists], [ - AC_DEFINE(HAVE_NR_ZONE_WRITE_PENDING, 1, - [NR_ZONE_WRITE_PENDING is still in use.]) - ]) -]) # LIBCFS_HAVE_NR_ZONE_WRITE_PENDING - -# # Kernel version 4.7-rc1 commit 8f6fd83c6c5ec66a4a70c728535ddcdfef4f3697 # added 3rd arg to rhashtable_walk_init # @@ -1041,6 +1019,29 @@ AC_DEFUN([LIBCFS_HOTPLUG_STATE_MACHINE], [ ]) # LIBCFS_HOTPLUG_STATE_MACHINE # +# LIBCFS_HAVE_NODE_NR_WRITEBACK +# +# kernel v4.10-rc1 commit 11fb998986a72aa7e997d96d63d52582a01228c5 +# mm: move most file-based accounting to the node +# i.e. NR_UNSTABLE_NFS and NR_WRITEBACK are moved into node_stat_item enum +# +AC_DEFUN([LIBCFS_SRC_HAVE_NODE_NR_WRITEBACK], [ + LB2_LINUX_TEST_SRC([node_nr_writeback_exists], [ + #include + ],[ + enum node_stat_item item = NR_WRITEBACK; + (void)item; + ],[-Werror]) +]) +AC_DEFUN([LIBCFS_HAVE_NODE_NR_WRITEBACK], [ + LB2_MSG_LINUX_TEST_RESULT([if NR_WRITEBACK node_stat_item enum is available], + [node_nr_writeback_exists], [ + AC_DEFINE(HAVE_NODE_NR_WRITEBACK, 1, + [NR_WRITEBACK is moved into the node.]) + ]) +]) # LIBCFS_HAVE_NODE_NR_WRITEBACK + +# # LIBCFS_REFCOUNT_T # # Kernel version 4.10-rc3 commit f405df5de3170c00e5c54f8b7cf4766044a032ba @@ -2022,6 +2023,22 @@ AC_DEFUN([LIBCFS_HAVE_NR_UNSTABLE_NFS], [ ]) # LIBCFS_HAVE_NR_UNSTABLE_NFS # +# LIBCFS_NR_UNSTABLE_NFS_DEPRECATED +# +# SLES15 still defines NR_UNSTABLE_NFS, but DEPRECATED it +# +AC_DEFUN([LIBCFS_NR_UNSTABLE_NFS_DEPRECATED], [ + AC_MSG_CHECKING([if NR_UNSTABLE_NFS is defined but DEPRECATED]) + AS_IF([grep -q -E "NFS unstable pages - DEPRECATED DO NOT USE" "$LINUX/include/linux/mmzone.h" 2>/dev/null], [ + AC_DEFINE([HAVE_NR_UNSTABLE_NFS_DEPRECATED], 1, + [NR_UNSTABLE_NFS is defined but deprecated]) + AC_MSG_RESULT(yes) + ],[ + AC_MSG_RESULT(no) + ]) +]) # LIBCFS_NR_UNSTABLE_NFS_DEPRECATED + +# # LIBCFS_HAVE_MMAP_LOCK # # kernel v5.8-rc1~83^2~24 @@ -2496,7 +2513,6 @@ AC_DEFUN([LIBCFS_PROG_LINUX_SRC], [ LIBCFS_SRC_RHASHTABLE_INSERT_FAST LIBCFS_SRC_RHASHTABLE_WALK_INIT_3ARG # 4.8 - LIBCFS_SRC_HAVE_NR_ZONE_WRITE_PENDING LIBCFS_SRC_RHASHTABLE_LOOKUP LIBCFS_SRC_RHLTABLE LIBCFS_SRC_STACKTRACE_OPS @@ -2506,6 +2522,7 @@ AC_DEFUN([LIBCFS_PROG_LINUX_SRC], [ # 4.10 LIBCFS_SRC_HOTPLUG_STATE_MACHINE LIBCFS_SRC_NLA_PUT_U64_64BIT + LIBCFS_SRC_HAVE_NODE_NR_WRITEBACK # 4.11 LIBCFS_SRC_NL_EXT_ACK LIBCFS_SRC_RHASHTABLE_LOOKUP_GET_INSERT_FAST @@ -2648,7 +2665,6 @@ AC_DEFUN([LIBCFS_PROG_LINUX_RESULTS], [ LIBCFS_RHASHTABLE_INSERT_FAST LIBCFS_RHASHTABLE_WALK_INIT_3ARG # 4.8 - LIBCFS_HAVE_NR_ZONE_WRITE_PENDING LIBCFS_RHASHTABLE_LOOKUP LIBCFS_RHLTABLE LIBCFS_STACKTRACE_OPS @@ -2658,6 +2674,7 @@ AC_DEFUN([LIBCFS_PROG_LINUX_RESULTS], [ # 4.10 LIBCFS_HOTPLUG_STATE_MACHINE LIBCFS_NLA_PUT_U64_64BIT + LIBCFS_HAVE_NODE_NR_WRITEBACK # 4.11 LIBCFS_NL_EXT_ACK LIBCFS_RHASHTABLE_LOOKUP_GET_INSERT_FAST @@ -2717,6 +2734,7 @@ AC_DEFUN([LIBCFS_PROG_LINUX_RESULTS], [ LIBCFS_IP_SET_TOS LIBCFS_VMALLOC_2ARGS LIBCFS_HAVE_NR_UNSTABLE_NFS + LIBCFS_NR_UNSTABLE_NFS_DEPRECATED LIBCFS_KERNEL_SETSOCKOPT LIBCFS_KEY_NEED_UNLINK LIBCFS_SEC_RELEASE_SECCTX diff --git a/lustre/include/lustre_compat.h b/lustre/include/lustre_compat.h index e77df24..2f4a74a 100644 --- a/lustre/include/lustre_compat.h +++ b/lustre/include/lustre_compat.h @@ -772,8 +772,8 @@ ll_shrinker_create(struct ll_shrinker_ops *ops, unsigned int flags, #define ll_access_ok(ptr, len) access_ok(ptr, len) #endif -#ifdef HAVE_WB_STAT_MOD -#define __add_wb_stat(wb, item, amount) wb_stat_mod(wb, item, amount) +#ifndef HAVE_WB_STAT_MOD +#define wb_stat_mod(wb, item, amount) __add_wb_stat(wb, item, amount) #endif #ifdef HAVE_SEC_RELEASE_SECCTX_1ARG diff --git a/lustre/osc/osc_page.c b/lustre/osc/osc_page.c index 57a23a7..287ef8b 100644 --- a/lustre/osc/osc_page.c +++ b/lustre/osc/osc_page.c @@ -1129,38 +1129,28 @@ long osc_unevict_cache_shrink(const struct lu_env *env, struct client_obd *cli) RETURN(rc); } +#if defined(HAVE_NR_UNSTABLE_NFS) && !defined(HAVE_NODE_NR_WRITEBACK) + +/* NR_UNSTABLE_NFS is still in enum zone_stat_item */ /** * Atomic operations are expensive. We accumulate the accounting for the * same page zone to get better performance. * In practice this can work pretty good because the pages in the same RPC * are likely from the same page zone. */ -#ifdef HAVE_NR_UNSTABLE_NFS -/* Old kernels use a separate counter for unstable pages, - * newer kernels treat them like any other writeback. - * (see Linux commit: v5.7-467-g8d92890bd6b8) - */ -#define NR_ZONE_WRITE_PENDING ((enum zone_stat_item)NR_UNSTABLE_NFS) -#elif !defined(HAVE_NR_ZONE_WRITE_PENDING) -#define NR_ZONE_WRITE_PENDING ((enum zone_stat_item)NR_WRITEBACK) -#endif - static inline void unstable_page_accounting(struct ptlrpc_bulk_desc *desc, int factor) { - int page_count; void *zone = NULL; int count = 0; int i; ENTRY; - page_count = desc->bd_iov_count; - CDEBUG(D_PAGE, "%s %d unstable pages\n", - factor == 1 ? "adding" : "removing", page_count); + factor == 1 ? "adding" : "removing", desc->bd_iov_count); - for (i = 0; i < page_count; i++) { + for (i = 0; i < desc->bd_iov_count; i++) { void *pz = page_zone(desc->bd_vec[i].bv_page); if (likely(pz == zone)) { @@ -1169,7 +1159,7 @@ static inline void unstable_page_accounting(struct ptlrpc_bulk_desc *desc, } if (count > 0) { - mod_zone_page_state(zone, NR_ZONE_WRITE_PENDING, + mod_zone_page_state(zone, NR_UNSTABLE_NFS, factor * count); count = 0; } @@ -1177,11 +1167,61 @@ static inline void unstable_page_accounting(struct ptlrpc_bulk_desc *desc, ++count; } if (count > 0) - mod_zone_page_state(zone, NR_ZONE_WRITE_PENDING, + mod_zone_page_state(zone, NR_UNSTABLE_NFS, factor * count); + EXIT; +} + +#else + +#if defined(HAVE_NR_UNSTABLE_NFS) && !defined(HAVE_NR_UNSTABLE_NFS_DEPRECATED) +/* NR_UNSTABLE_NFS is moved into enum node_stat_item and not deprecated. */ +#define __NR_WRITEBACK NR_UNSTABLE_NFS +#else +/* + * NR_UNSTABLE_NFS was removed or defined but deprecated (i.e. SLES15), use + * NR_WRITEBACK instead. + */ +#define __NR_WRITEBACK NR_WRITEBACK +#endif + +/* TODO: add WB_WRITEBACK accounting. */ +static inline void unstable_page_accounting(struct ptlrpc_bulk_desc *desc, + int factor) +{ + void *node = NULL; + int count = 0; + int i; + + ENTRY; + + CDEBUG(D_PAGE, "%s %d unstable pages\n", + factor == 1 ? "adding" : "removing", desc->bd_iov_count); + + for (i = 0; i < desc->bd_iov_count; i++) { + void *pn = page_pgdat(desc->bd_vec[i].bv_page); + + if (likely(pn == node)) { + ++count; + continue; + } + + if (count > 0) { + mod_node_page_state(node, __NR_WRITEBACK, + factor * count); + count = 0; + } + + node = pn; + ++count; + } + + if (count > 0) + mod_node_page_state(node, __NR_WRITEBACK, factor * count); EXIT; } +#endif static inline void add_unstable_pages(struct ptlrpc_bulk_desc *desc) { -- 1.8.3.1