From: Amir Shehata Date: Wed, 7 Oct 2020 21:27:14 +0000 (-0700) Subject: LU-14016 libcfs: use atomic64_t for libcfs_kmem X-Git-Tag: 2.13.57~116 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=9397f8cf866ad363bc3908b248f41adfa5bd8f1c LU-14016 libcfs: use atomic64_t for libcfs_kmem libcfs_kmem keeps track of LNet's memory usage. It uses an int type, so it could wrap around if usage grows beyond 2.14 GB. Use atomic64_t to avoid this issue. Signed-off-by: Amir Shehata Change-Id: If96fb8391c6ffb1924e47cef3dfca02eabc5f912 Reviewed-on: https://review.whamcloud.com/40168 Tested-by: jenkins Reviewed-by: John L. Hammond Reviewed-by: Serguei Smirnov Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/libcfs/include/libcfs/libcfs_private.h b/libcfs/include/libcfs/libcfs_private.h index daae959..cbc003d 100644 --- a/libcfs/include/libcfs/libcfs_private.h +++ b/libcfs/include/libcfs/libcfs_private.h @@ -125,20 +125,20 @@ do { \ */ #ifdef LIBCFS_DEBUG -extern atomic_t libcfs_kmemory; +extern atomic64_t libcfs_kmem; # define libcfs_kmem_inc(ptr, size) \ do { \ - atomic_add(size, &libcfs_kmemory); \ + atomic64_add(size, &libcfs_kmem); \ } while (0) # define libcfs_kmem_dec(ptr, size) \ do { \ - atomic_sub(size, &libcfs_kmemory); \ + atomic64_sub(size, &libcfs_kmem); \ } while (0) # define libcfs_kmem_read() \ - atomic_read(&libcfs_kmemory) + (long long)atomic64_read(&libcfs_kmem) #else # define libcfs_kmem_inc(ptr, size) do {} while (0) @@ -154,21 +154,21 @@ do { \ do { \ LASSERT(!in_interrupt() || \ ((size) <= LIBCFS_VMALLOC_SIZE && \ - ((mask) & GFP_ATOMIC)) != 0); \ + ((mask) & GFP_ATOMIC)) != 0); \ } while (0) -#define LIBCFS_ALLOC_POST(ptr, size) \ -do { \ - if (unlikely((ptr) == NULL)) { \ - CERROR("LNET: out of memory at %s:%d (tried to alloc '" \ - #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size)); \ - CERROR("LNET: %d total bytes allocated by lnet\n", \ - libcfs_kmem_read()); \ - } else { \ - libcfs_kmem_inc((ptr), (size)); \ - CDEBUG(D_MALLOC, "alloc '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), (ptr), libcfs_kmem_read()); \ - } \ +#define LIBCFS_ALLOC_POST(ptr, size) \ +do { \ + if (unlikely((ptr) == NULL)) { \ + CERROR("LNET: out of memory at %s:%d (tried to alloc '" \ + #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size)); \ + CERROR("LNET: %lld total bytes allocated by lnet\n", \ + libcfs_kmem_read()); \ + } else { \ + libcfs_kmem_inc((ptr), (size)); \ + CDEBUG(D_MALLOC, "alloc '" #ptr "': %d at %p (tot %lld).\n", \ + (int)(size), (ptr), libcfs_kmem_read()); \ + } \ } while (0) /** @@ -216,16 +216,16 @@ do { \ #define LIBCFS_FREE(ptr, size) \ do { \ - int s = (size); \ - if (unlikely((ptr) == NULL)) { \ - CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \ - "%s:%d\n", s, __FILE__, __LINE__); \ - break; \ - } \ - libcfs_kmem_dec((ptr), s); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ + int s = (size); \ + if (unlikely((ptr) == NULL)) { \ + CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \ + "%s:%d\n", s, __FILE__, __LINE__); \ + break; \ + } \ + libcfs_kmem_dec((ptr), s); \ + CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %lld).\n", \ s, (ptr), libcfs_kmem_read()); \ - if (unlikely(s > LIBCFS_VMALLOC_SIZE)) \ + if (unlikely(s > LIBCFS_VMALLOC_SIZE)) \ vfree(ptr); \ else \ kfree(ptr); \ diff --git a/libcfs/libcfs/debug.c b/libcfs/libcfs/debug.c index c45ef08..512f64f 100644 --- a/libcfs/libcfs/debug.c +++ b/libcfs/libcfs/debug.c @@ -244,8 +244,8 @@ unsigned int libcfs_panic_on_lbug = 1; module_param(libcfs_panic_on_lbug, uint, 0644); MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG"); -atomic_t libcfs_kmemory = ATOMIC_INIT(0); -EXPORT_SYMBOL(libcfs_kmemory); +atomic64_t libcfs_kmem = ATOMIC64_INIT(0); +EXPORT_SYMBOL(libcfs_kmem); static DECLARE_COMPLETION(debug_complete); diff --git a/libcfs/libcfs/module.c b/libcfs/libcfs/module.c index c908d34..ca2a797 100644 --- a/libcfs/libcfs/module.c +++ b/libcfs/libcfs/module.c @@ -531,10 +531,10 @@ static struct ctl_table lnet_table[] = { }, { .procname = "lnet_memused", - .data = (int *)&libcfs_kmemory.counter, - .maxlen = sizeof(int), + .data = (u64 *)&libcfs_kmem.counter, + .maxlen = sizeof(u64), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_doulongvec_minmax, }, { .procname = "catastrophe", @@ -788,8 +788,8 @@ static void __exit libcfs_exit(void) debugfs_remove_recursive(lnet_debugfs_root); lnet_debugfs_root = NULL; - CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); + CDEBUG(D_MALLOC, "before Portals cleanup: kmem %lld\n", + libcfs_kmem_read()); llcrypt_exit(); @@ -806,9 +806,9 @@ static void __exit libcfs_exit(void) cfs_cpu_fini(); /* the below message is checked in test-framework.sh check_mem_leak() */ - if (atomic_read(&libcfs_kmemory) != 0) - CERROR("Portals memory leaked: %d bytes\n", - atomic_read(&libcfs_kmemory)); + if (libcfs_kmem_read() != 0) + CERROR("Portals memory leaked: %lld bytes\n", + libcfs_kmem_read()); rc = libcfs_debug_cleanup(); if (rc) diff --git a/lnet/klnds/gnilnd/gnilnd.c b/lnet/klnds/gnilnd/gnilnd.c index f1a7734..2548fde 100644 --- a/lnet/klnds/gnilnd/gnilnd.c +++ b/lnet/klnds/gnilnd/gnilnd.c @@ -2117,7 +2117,7 @@ kgnilnd_dev_fini(kgn_device_t *dev) int kgnilnd_base_startup(void) { struct timeval tv; - int pkmem = atomic_read(&libcfs_kmemory); + long long pkmem = libcfs_kmem_read(); int rc; int i; kgn_device_t *dev; @@ -2410,7 +2410,7 @@ int kgnilnd_base_startup(void) kgnilnd_data.kgn_init = GNILND_INIT_ALL; /*****************************************************/ - CDEBUG(D_MALLOC, "initial kmem %d\n", pkmem); + CDEBUG(D_MALLOC, "initial kmem %lld\n", pkmem); RETURN(0); failed: @@ -2582,8 +2582,8 @@ kgnilnd_base_shutdown(void) kfree(kgnilnd_data.kgn_cksum_map_pages); } - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); + CDEBUG(D_MALLOC, "after NAL cleanup: kmem %lld\n", + libcfs_kmem_read()); kgnilnd_data.kgn_init = GNILND_INIT_NOTHING; module_put(THIS_MODULE); @@ -2704,8 +2704,8 @@ kgnilnd_shutdown(struct lnet_ni *ni) /* Serialize with startup. */ mutex_lock(&kgnilnd_data.kgn_quiesce_mutex); - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); + CDEBUG(D_MALLOC, "before NAL cleanup: kmem %lld\n", + libcfs_kmem_read()); if (net == NULL) { CERROR("got NULL net for ni %p\n", ni); @@ -2783,8 +2783,8 @@ out: kgnilnd_base_shutdown(); } } - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); + CDEBUG(D_MALLOC, "after NAL cleanup: kmem %lld\n", + libcfs_kmem_read()); mutex_unlock(&kgnilnd_data.kgn_quiesce_mutex); EXIT; diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index e846e9d..dbd11e95 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -2901,8 +2901,8 @@ kiblnd_base_shutdown(void) LASSERT(list_empty(&kiblnd_data.kib_devs)); - CDEBUG(D_MALLOC, "before LND base cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); + CDEBUG(D_MALLOC, "before LND base cleanup: kmem %lld\n", + libcfs_kmem_read()); switch (kiblnd_data.kib_init) { default: @@ -2948,8 +2948,8 @@ kiblnd_base_shutdown(void) if (kiblnd_data.kib_scheds != NULL) cfs_percpt_free(kiblnd_data.kib_scheds); - CDEBUG(D_MALLOC, "after LND base cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); + CDEBUG(D_MALLOC, "after LND base cleanup: kmem %lld\n", + libcfs_kmem_read()); kiblnd_data.kib_init = IBLND_INIT_NOTHING; module_put(THIS_MODULE); @@ -2967,8 +2967,8 @@ kiblnd_shutdown(struct lnet_ni *ni) if (net == NULL) goto out; - CDEBUG(D_MALLOC, "before LND net cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); + CDEBUG(D_MALLOC, "before LND net cleanup: kmem %lld\n", + libcfs_kmem_read()); write_lock_irqsave(g_lock, flags); net->ibn_shutdown = 1; @@ -3009,8 +3009,8 @@ kiblnd_shutdown(struct lnet_ni *ni) break; } - CDEBUG(D_MALLOC, "after LND net cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); + CDEBUG(D_MALLOC, "after LND net cleanup: kmem %lld\n", + libcfs_kmem_read()); net->ibn_init = IBLND_INIT_NOTHING; ni->ni_data = NULL; diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 19f4b27..0321b25 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -2221,8 +2221,8 @@ ksocknal_base_shutdown(void) struct ksock_peer_ni *peer_ni; int i; - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); + CDEBUG(D_MALLOC, "before NAL cleanup: kmem %lld\n", + libcfs_kmem_read()); LASSERT (ksocknal_data.ksnd_nnets == 0); switch (ksocknal_data.ksnd_init) { @@ -2274,8 +2274,8 @@ ksocknal_base_shutdown(void) break; } - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); + CDEBUG(D_MALLOC, "after NAL cleanup: kmem %lld\n", + libcfs_kmem_read()); module_put(THIS_MODULE); } diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 815a8a2..47f0095 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -565,8 +565,8 @@ ksocknal_process_transmit(struct ksock_conn *conn, struct ksock_tx *tx, counter++; /* exponential backoff warnings */ if ((counter & (-counter)) == counter) - CWARN("%u ENOMEM tx %p (%u allocated)\n", - counter, conn, atomic_read(&libcfs_kmemory)); + CWARN("%u ENOMEM tx %p (%lld allocated)\n", + counter, conn, libcfs_kmem_read()); /* Queue on ksnd_enomem_conns for retry after a timeout */ spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index bed99d1..62d6266 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -726,7 +726,7 @@ extern char obd_jobid_var[]; #define LUT_FAIL_MDT LUT_FAIL_CLASS(OBD_FAIL_MDS) #define LUT_FAIL_OST LUT_FAIL_CLASS(OBD_FAIL_OST) -extern atomic_t libcfs_kmemory; +extern atomic64_t libcfs_kmem; #ifdef CONFIG_PROC_FS #define obd_memory_add(size) \ @@ -829,8 +829,8 @@ do { \ if (unlikely((ptr) == NULL)) { \ CERROR("vmalloc of '" #ptr "' (%d bytes) failed\n", \ (int)(size)); \ - CERROR("%llu total bytes allocated by Lustre, %d by LNET\n", \ - obd_memory_sum(), atomic_read(&libcfs_kmemory)); \ + CERROR("%llu total bytes allocated by Lustre, %lld by LNET\n",\ + obd_memory_sum(), libcfs_kmem_read());\ } else { \ OBD_ALLOC_POST(ptr, size, "vmalloced"); \ } \ diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 0599697..6cfde4d 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -809,16 +809,16 @@ load_modules () { } check_mem_leak () { - LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true) - LEAK_PORTALS=$(dmesg | tail -n 20 | grep "Portals memory leaked" || true) - if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then - echo "$LEAK_LUSTRE" 1>&2 - echo "$LEAK_PORTALS" 1>&2 - mv $TMP/debug $TMP/debug-leak.`date +%s` || true - echo "Memory leaks detected" - [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true - return 1 - fi + LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true) + LEAK_PORTALS=$(dmesg | tail -n 20 | egrep -i "libcfs.*memory leaked" || true) + if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then + echo "$LEAK_LUSTRE" 1>&2 + echo "$LEAK_PORTALS" 1>&2 + mv $TMP/debug $TMP/debug-leak.`date +%s` || true + echo "Memory leaks detected" + [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true + return 1 + fi } unload_modules() {