From b698abd415bc4a810f307611fe984e50e007581e Mon Sep 17 00:00:00 2001 From: Jian Yu Date: Wed, 3 Apr 2024 00:38:47 -0700 Subject: [PATCH] LU-17504 build: fix array-index-out-of-bounds warning On Linux kernel 6.5, due to commit 2d47c6956ab3 ("ubsan: Tighten UBSAN_BOUNDS on GCC"), flexible trailing arrays declared like 'lc_array_sum[1];' will generate warnings when CONFIG_UBSAN & co. is enabled: UBSAN: array-index-out-of-bounds in lprocfs_status.c:1609:17 index 1 is out of range for type '__s64 [1]' Since LPROCFS_STATS_FLAG_IRQ_SAFE flag is only used in one place - obd_memory() counter, we can just remove it and change obd_memory over to a regular percpu_counter. This would both simplify the lprocfs_counter() code, move over to using more kernel functionality instead of libcfs, as well as reduce overhead slightly for the memory accounting code. Change-Id: Ic461c4b30317bfd2b1e9f5b6be84c4a7fb4e3eb9 Signed-off-by: Jian Yu Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54365 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lustre/autoconf/lustre-core.m4 | 23 ++++++++++++++++ lustre/include/lprocfs_status.h | 18 +------------ lustre/include/obd_support.h | 54 ++++++++++++++++---------------------- lustre/obdclass/class_obd.c | 41 +++++++---------------------- lustre/obdclass/lprocfs_counters.c | 20 ++------------ lustre/obdclass/lprocfs_status.c | 38 ++++----------------------- 6 files changed, 64 insertions(+), 130 deletions(-) diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 6b6c0fb..dbaeea3 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -2360,6 +2360,27 @@ AC_DEFUN([LC_HAVE_FSMAP_HEADER], [ ]) # LC_HAVE_FSMAP_HEADER # +# LC_HAVE_PERCPU_COUNTER_ADD_BATCH +# +# Linux commit v4.11-12447-g104b4e5139fe +# percpu_counter: Rename __percpu_counter_add to percpu_counter_add_batch +# +AC_DEFUN([LC_SRC_HAVE_PERCPU_COUNTER_ADD_BATCH], [ + LB2_LINUX_TEST_SRC([percpu_counter_add_batch_exists], [ + #include + ],[ + (void)percpu_counter_add_batch(NULL, 0, 0); + ],[-Werror]) +]) +AC_DEFUN([LC_HAVE_PERCPU_COUNTER_ADD_BATCH], [ + LB2_MSG_LINUX_TEST_RESULT([if 'percpu_counter_add_batch()' exists], + [percpu_counter_add_batch_exists], [ + AC_DEFINE(HAVE_PERCPU_COUNTER_ADD_BATCH, 1, + ['percpu_counter_add_batch()' exists]) + ]) +]) # LC_HAVE_PERCPU_COUNTER_ADD_BATCH + +# # Kernel version 4.12 commit 47f38c539e9a42344ff5a664942075bd4df93876 # CURRENT_TIME is not 64 bit time safe so it was replaced with # current_time() @@ -4606,6 +4627,7 @@ AC_DEFUN([LC_PROG_LINUX_SRC], [ LC_SRC_HAVE_KEY_USAGE_REFCOUNT LC_SRC_HAVE_CRYPTO_MAX_ALG_NAME_128 LC_SRC_HAVE_FSMAP_HEADER + LC_SRC_HAVE_PERCPU_COUNTER_ADD_BATCH # 4.12 LC_SRC_CURRENT_TIME @@ -4903,6 +4925,7 @@ AC_DEFUN([LC_PROG_LINUX_RESULTS], [ LC_HAVE_KEY_USAGE_REFCOUNT LC_HAVE_CRYPTO_MAX_ALG_NAME_128 LC_HAVE_FSMAP_HEADER + LC_HAVE_PERCPU_COUNTER_ADD_BATCH # 4.12 LC_CURRENT_TIME diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 95a5492..572564ab 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -169,17 +169,9 @@ struct lprocfs_counter { __s64 lc_count; __s64 lc_min; __s64 lc_max; + __s64 lc_sum; __s64 lc_sumsquare; - /* - * Every counter has lc_array_sum[0], while lc_array_sum[1] is only - * for irq context counter, i.e. stats with - * LPROCFS_STATS_FLAG_IRQ_SAFE flag, its counter need - * lc_array_sum[1] - */ - __s64 lc_array_sum[1]; }; -#define lc_sum lc_array_sum[0] -#define lc_sum_irq lc_array_sum[1] struct lprocfs_percpu { struct lprocfs_counter lp_cntr[0]; @@ -193,7 +185,6 @@ enum lprocfs_stats_lock_ops { enum lprocfs_stats_flags { LPROCFS_STATS_FLAG_NONE = 0x0000, /* per cpu counter */ LPROCFS_STATS_FLAG_NOPERCPU = 0x0001, /* need locking(no percpu area) */ - LPROCFS_STATS_FLAG_IRQ_SAFE = 0x0002, /* alloc need irq safe */ }; enum lprocfs_fields_flags { @@ -477,10 +468,6 @@ lprocfs_stats_counter_size(struct lprocfs_stats *stats) percpusize = offsetof(struct lprocfs_percpu, lp_cntr[stats->ls_num]); - /* irq safe stats need lc_array_sum[1] */ - if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - percpusize += stats->ls_num * sizeof(__s64); - if ((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0) percpusize = L1_CACHE_ALIGN(percpusize); @@ -495,9 +482,6 @@ lprocfs_stats_counter_get(struct lprocfs_stats *stats, unsigned int cpuid, cntr = &stats->ls_percpu[cpuid]->lp_cntr[index]; - if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - cntr = (void *)cntr + index * sizeof(__s64); - return cntr; } diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 4d88f21..e94e8b3 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -44,11 +45,7 @@ #include /* global variables */ -extern struct lprocfs_stats *obd_memory; -enum { - OBD_MEMORY_STAT = 0, - OBD_STATS_NUM, -}; +extern struct percpu_counter obd_memory; extern unsigned int obd_debug_peer_on_timeout; extern unsigned int obd_dump_on_timeout; @@ -784,41 +781,36 @@ extern bool obd_enable_health_write; extern atomic64_t libcfs_kmem; -#ifdef CONFIG_PROC_FS -#define obd_memory_add(size) \ - lprocfs_counter_add(obd_memory, OBD_MEMORY_STAT, (long)(size)) -#define obd_memory_sub(size) \ - lprocfs_counter_sub(obd_memory, OBD_MEMORY_STAT, (long)(size)) -#define obd_memory_sum() \ - lprocfs_stats_collector(obd_memory, OBD_MEMORY_STAT, \ - LPROCFS_FIELDS_FLAGS_SUM) - -extern void obd_update_maxusage(void); -extern __u64 obd_memory_max(void); - -#else /* CONFIG_PROC_FS */ - -extern __u64 obd_alloc; +/* OBD_MEMORY_BATCH is the maximum error allowed per CPU core. Since + * obd_memory_sum() is calling percpu_counter_sum_positive(), it adds + * up the per-core local delta anyway, so the per-core batch size is + * can be large. This could be percpu_counter_add_local(), but that + * only exists in kernel 6.0 and later, and just uses a larger batch. + */ +#define OBD_MEMORY_BATCH (16 * 1024 * 1024) -extern __u64 obd_max_alloc; +#ifndef HAVE_PERCPU_COUNTER_ADD_BATCH +#define percpu_counter_add_batch(fbc, amount, batch) \ + __percpu_counter_add(fbc, amount, batch) +#endif -static inline void obd_memory_add(long size) +static inline void obd_memory_add(size_t size) { - obd_alloc += size; - if (obd_alloc > obd_max_alloc) - obd_max_alloc = obd_alloc; + percpu_counter_add_batch(&obd_memory, size, OBD_MEMORY_BATCH); } -static inline void obd_memory_sub(long size) +static inline void obd_memory_sub(size_t size) { - obd_alloc -= size; + percpu_counter_add_batch(&obd_memory, -size, OBD_MEMORY_BATCH); } -#define obd_memory_sum() (obd_alloc) - -#define obd_memory_max() (obd_max_alloc) +static inline s64 obd_memory_sum(void) +{ + return percpu_counter_sum_positive(&obd_memory); +} -#endif /* !CONFIG_PROC_FS */ +extern void obd_update_maxusage(void); +extern __u64 obd_memory_max(void); #define OBD_DEBUG_MEMUSAGE (1) diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 43deebd..5d95590 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -52,11 +52,7 @@ #include "llog_internal.h" #include -#ifdef CONFIG_PROC_FS static __u64 obd_max_alloc; -#else -__u64 obd_max_alloc; -#endif static DEFINE_SPINLOCK(obd_updatemax_lock); @@ -102,19 +98,15 @@ EXPORT_SYMBOL(at_early_margin); int at_extra = 30; EXPORT_SYMBOL(at_extra); -#ifdef CONFIG_PROC_FS -struct lprocfs_stats *obd_memory = NULL; +struct percpu_counter obd_memory; EXPORT_SYMBOL(obd_memory); -#endif static int obdclass_oom_handler(struct notifier_block *self, unsigned long notused, void *nfreed) { -#ifdef CONFIG_PROC_FS /* in bytes */ pr_info("obd_memory max: %llu, obd_memory current: %llu\n", obd_memory_max(), obd_memory_sum()); -#endif /* CONFIG_PROC_FS */ return NOTIFY_OK; } @@ -715,19 +707,13 @@ static int __init obdclass_init(void) if (err) return err; -#ifdef CONFIG_PROC_FS - obd_memory = lprocfs_stats_alloc(OBD_STATS_NUM, - LPROCFS_STATS_FLAG_NONE | - LPROCFS_STATS_FLAG_IRQ_SAFE); - if (obd_memory == NULL) { - CERROR("kmalloc of 'obd_memory' failed\n"); - return -ENOMEM; + err = percpu_counter_init(&obd_memory, 0, GFP_KERNEL); + if (err < 0) { + CERROR("obdclass: initializing 'obd_memory' failed: rc = %d\n", + err); + return err; } - lprocfs_counter_init(obd_memory, OBD_MEMORY_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES, - "memused"); -#endif err = libcfs_kkuc_init(); if (err) goto cleanup_obd_memory; @@ -742,7 +728,7 @@ static int __init obdclass_init(void) err = misc_register(&obd_psdev); if (err) { - CERROR("cannot register OBD miscdevice: err = %d\n", err); + CERROR("cannot register OBD miscdevice: rc = %d\n", err); goto cleanup_class_handle; } @@ -830,9 +816,7 @@ cleanup_kkuc: libcfs_kkuc_fini(); cleanup_obd_memory: -#ifdef CONFIG_PROC_FS - lprocfs_stats_free(&obd_memory); -#endif + percpu_counter_destroy(&obd_memory); unregister_oom_notifier(&obdclass_oom); return err; @@ -851,7 +835,6 @@ void obd_update_maxusage(void) } EXPORT_SYMBOL(obd_update_maxusage); -#ifdef CONFIG_PROC_FS __u64 obd_memory_max(void) { __u64 ret; @@ -863,14 +846,12 @@ __u64 obd_memory_max(void) return ret; } -#endif /* CONFIG_PROC_FS */ +EXPORT_SYMBOL(obd_memory_max); static void __exit obdclass_exit(void) { -#ifdef CONFIG_PROC_FS __u64 memory_leaked; __u64 memory_max; -#endif /* CONFIG_PROC_FS */ ENTRY; misc_deregister(&obd_psdev); @@ -891,16 +872,14 @@ static void __exit obdclass_exit(void) obd_zombie_impexp_stop(); libcfs_kkuc_fini(); -#ifdef CONFIG_PROC_FS memory_leaked = obd_memory_sum(); memory_max = obd_memory_max(); - lprocfs_stats_free(&obd_memory); + percpu_counter_destroy(&obd_memory); /* the below message is checked in test-framework.sh check_mem_leak() */ CDEBUG((memory_leaked) ? D_ERROR : D_INFO, "obd_memory max: %llu, leaked: %llu\n", memory_max, memory_leaked); -#endif /* CONFIG_PROC_FS */ unregister_oom_notifier(&obdclass_oom); diff --git a/lustre/obdclass/lprocfs_counters.c b/lustre/obdclass/lprocfs_counters.c index 22e6bf3..9b773e1 100644 --- a/lustre/obdclass/lprocfs_counters.c +++ b/lustre/obdclass/lprocfs_counters.c @@ -70,16 +70,8 @@ void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, long amount) * as memory allocation could trigger memory shrinker call * ldlm_pool_shrink(), which calls lprocfs_counter_add(). * LU-1727. - * - * Only obd_memory uses LPROCFS_STATS_FLAG_IRQ_SAFE - * flag, because it needs accurate counting lest memory leak - * check reports error. */ - if (in_interrupt() && - (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - percpu_cntr->lc_sum_irq += amount; - else - percpu_cntr->lc_sum += amount; + percpu_cntr->lc_sum += amount; if (header->lc_config & LPROCFS_CNTR_STDDEV) percpu_cntr->lc_sumsquare += (__s64)amount * amount; @@ -132,16 +124,8 @@ void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx, long amount) * softirq context - right now that's the only case we're in * softirq context here, use separate counter for that. * bz20650. - * - * Only obd_memory uses LPROCFS_STATS_FLAG_IRQ_SAFE - * flag, because it needs accurate counting lest memory leak - * check reports error. */ - if (in_interrupt() && - (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - percpu_cntr->lc_sum_irq -= amount; - else - percpu_cntr->lc_sum -= amount; + percpu_cntr->lc_sum -= amount; } lprocfs_stats_unlock(stats, LPROCFS_GET_SMP_ID, &flags); } diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index e9cf370..5437c9c 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -444,10 +444,7 @@ int lprocfs_stats_lock(struct lprocfs_stats *stats, unsigned long *flags) { if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) { - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - spin_lock_irqsave(&stats->ls_lock, *flags); - else - spin_lock(&stats->ls_lock); + spin_lock(&stats->ls_lock); return opc == LPROCFS_GET_NUM_CPU ? 1 : 0; } @@ -490,10 +487,7 @@ void lprocfs_stats_unlock(struct lprocfs_stats *stats, unsigned long *flags) { if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) { - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - spin_unlock_irqrestore(&stats->ls_lock, *flags); - else - spin_unlock(&stats->ls_lock); + spin_unlock(&stats->ls_lock); } else if (opc == LPROCFS_GET_SMP_ID) { put_cpu(); } @@ -1200,7 +1194,6 @@ int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid) struct lprocfs_counter *cntr; unsigned int percpusize; int rc = -ENOMEM; - unsigned long flags = 0; int i; LASSERT(stats->ls_percpu[cpuid] == NULL); @@ -1211,17 +1204,10 @@ int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid) if (stats->ls_percpu[cpuid]) { rc = 0; if (unlikely(stats->ls_biggest_alloc_num <= cpuid)) { - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - spin_lock_irqsave(&stats->ls_lock, flags); - else - spin_lock(&stats->ls_lock); + spin_lock(&stats->ls_lock); if (stats->ls_biggest_alloc_num <= cpuid) stats->ls_biggest_alloc_num = cpuid + 1; - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) { - spin_unlock_irqrestore(&stats->ls_lock, flags); - } else { - spin_unlock(&stats->ls_lock); - } + spin_unlock(&stats->ls_lock); } /* initialize the ls_percpu[cpuid] non-zero counter */ for (i = 0; i < stats->ls_num; ++i) { @@ -1238,7 +1224,6 @@ struct lprocfs_stats *lprocfs_stats_alloc(unsigned int num, struct lprocfs_stats *stats; unsigned int num_entry; unsigned int percpusize = 0; - int i; if (num == 0) return NULL; @@ -1273,11 +1258,6 @@ struct lprocfs_stats *lprocfs_stats_alloc(unsigned int num, if (!stats->ls_percpu[0]) goto fail; stats->ls_biggest_alloc_num = 1; - } else if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) { - /* alloc all percpu data, currently only obd_memory use this */ - for (i = 0; i < num_entry; ++i) - if (lprocfs_stats_alloc_one(stats, i) < 0) - goto fail; } return stats; @@ -1373,8 +1353,6 @@ void lprocfs_stats_clear(struct lprocfs_stats *stats) percpu_cntr->lc_max = 0; percpu_cntr->lc_sumsquare = 0; percpu_cntr->lc_sum = 0; - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - percpu_cntr->lc_sum_irq = 0; } } stats->ls_init = ktime_get_real(); @@ -1601,8 +1579,6 @@ void lprocfs_counter_init_units(struct lprocfs_stats *stats, int index, percpu_cntr->lc_max = 0; percpu_cntr->lc_sumsquare = 0; percpu_cntr->lc_sum = 0; - if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - percpu_cntr->lc_sum_irq = 0; } lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags); } @@ -1724,8 +1700,6 @@ __s64 lprocfs_read_helper(struct lprocfs_counter *lc, break; case LPROCFS_FIELDS_FLAGS_SUM: ret = lc->lc_sum; - if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - ret += lc->lc_sum_irq; break; case LPROCFS_FIELDS_FLAGS_MIN: ret = lc->lc_min; @@ -1734,9 +1708,7 @@ __s64 lprocfs_read_helper(struct lprocfs_counter *lc, ret = lc->lc_max; break; case LPROCFS_FIELDS_FLAGS_AVG: - ret = div64_u64((flags & LPROCFS_STATS_FLAG_IRQ_SAFE ? - lc->lc_sum_irq : 0) + lc->lc_sum, - lc->lc_count); + ret = div64_u64(lc->lc_sum, lc->lc_count); break; case LPROCFS_FIELDS_FLAGS_SUMSQUARE: ret = lc->lc_sumsquare; -- 1.8.3.1