Whamcloud - gitweb
LU-14016 libcfs: use atomic64_t for libcfs_kmem 68/40168/5
authorAmir Shehata <ashehata@whamcloud.com>
Wed, 7 Oct 2020 21:27:14 +0000 (14:27 -0700)
committerOleg Drokin <green@whamcloud.com>
Thu, 29 Oct 2020 04:30:36 +0000 (04:30 +0000)
libcfs_kmem keeps track of LNet's memory usage. It uses an
int type, so it could wrap around if usage grows beyond 2.14 GB.
Use atomic64_t to avoid this issue.

Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Change-Id: If96fb8391c6ffb1924e47cef3dfca02eabc5f912
Reviewed-on: https://review.whamcloud.com/40168
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: John L. Hammond <jhammond@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
libcfs/include/libcfs/libcfs_private.h
libcfs/libcfs/debug.c
libcfs/libcfs/module.c
lnet/klnds/gnilnd/gnilnd.c
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd_cb.c
lustre/include/obd_support.h
lustre/tests/test-framework.sh

index daae959..cbc003d 100644 (file)
@@ -125,20 +125,20 @@ do {                                                                    \
  */
 #ifdef LIBCFS_DEBUG
 
  */
 #ifdef LIBCFS_DEBUG
 
-extern atomic_t libcfs_kmemory;
+extern atomic64_t libcfs_kmem;
 
 # define libcfs_kmem_inc(ptr, size)            \
 do {                                           \
 
 # define libcfs_kmem_inc(ptr, size)            \
 do {                                           \
-       atomic_add(size, &libcfs_kmemory);      \
+       atomic64_add(size, &libcfs_kmem);       \
 } while (0)
 
 # define libcfs_kmem_dec(ptr, size)            \
 do {                                           \
 } while (0)
 
 # define libcfs_kmem_dec(ptr, size)            \
 do {                                           \
-       atomic_sub(size, &libcfs_kmemory);      \
+       atomic64_sub(size, &libcfs_kmem);       \
 } while (0)
 
 # define libcfs_kmem_read()                    \
 } while (0)
 
 # define libcfs_kmem_read()                    \
-       atomic_read(&libcfs_kmemory)
+       (long long)atomic64_read(&libcfs_kmem)
 
 #else
 # define libcfs_kmem_inc(ptr, size) do {} while (0)
 
 #else
 # define libcfs_kmem_inc(ptr, size) do {} while (0)
@@ -154,21 +154,21 @@ do {                                              \
 do {                                                                       \
        LASSERT(!in_interrupt() ||                                          \
                ((size) <= LIBCFS_VMALLOC_SIZE &&                           \
 do {                                                                       \
        LASSERT(!in_interrupt() ||                                          \
                ((size) <= LIBCFS_VMALLOC_SIZE &&                           \
-                ((mask) & GFP_ATOMIC)) != 0);                      \
+                ((mask) & GFP_ATOMIC)) != 0);                              \
 } while (0)
 
 } while (0)
 
-#define LIBCFS_ALLOC_POST(ptr, size)                                       \
-do {                                                                       \
-       if (unlikely((ptr) == NULL)) {                                      \
-               CERROR("LNET: out of memory at %s:%d (tried to alloc '"     \
-                      #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));  \
-               CERROR("LNET: %d total bytes allocated by lnet\n",          \
-                      libcfs_kmem_read());                                 \
-       } else {                                                            \
-               libcfs_kmem_inc((ptr), (size));                             \
-               CDEBUG(D_MALLOC, "alloc '" #ptr "': %d at %p (tot %d).\n",  \
-                      (int)(size), (ptr), libcfs_kmem_read());             \
-       }                                                                   \
+#define LIBCFS_ALLOC_POST(ptr, size)                                         \
+do {                                                                         \
+       if (unlikely((ptr) == NULL)) {                                        \
+               CERROR("LNET: out of memory at %s:%d (tried to alloc '"       \
+                      #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));    \
+               CERROR("LNET: %lld total bytes allocated by lnet\n",          \
+                      libcfs_kmem_read());                           \
+       } else {                                                              \
+               libcfs_kmem_inc((ptr), (size));                               \
+               CDEBUG(D_MALLOC, "alloc '" #ptr "': %d at %p (tot %lld).\n",  \
+                      (int)(size), (ptr), libcfs_kmem_read());               \
+       }                                                                     \
 } while (0)
 
 /**
 } while (0)
 
 /**
@@ -216,16 +216,16 @@ do {                                                                          \
 
 #define LIBCFS_FREE(ptr, size)                                         \
 do {                                                                   \
 
 #define LIBCFS_FREE(ptr, size)                                         \
 do {                                                                   \
-       int s = (size);                                                 \
-       if (unlikely((ptr) == NULL)) {                                  \
-               CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at "    \
-                      "%s:%d\n", s, __FILE__, __LINE__);               \
-               break;                                                  \
-       }                                                               \
-       libcfs_kmem_dec((ptr), s);                                      \
-       CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n",     \
+       int s = (size);                                                 \
+       if (unlikely((ptr) == NULL)) {                                  \
+               CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at "    \
+                      "%s:%d\n", s, __FILE__, __LINE__);               \
+               break;                                                  \
+       }                                                               \
+       libcfs_kmem_dec((ptr), s);                                      \
+       CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %lld).\n",   \
               s, (ptr), libcfs_kmem_read());                           \
               s, (ptr), libcfs_kmem_read());                           \
-       if (unlikely(s > LIBCFS_VMALLOC_SIZE))                          \
+       if (unlikely(s > LIBCFS_VMALLOC_SIZE))                          \
                vfree(ptr);                                             \
        else                                                            \
                kfree(ptr);                                             \
                vfree(ptr);                                             \
        else                                                            \
                kfree(ptr);                                             \
index c45ef08..512f64f 100644 (file)
@@ -244,8 +244,8 @@ unsigned int libcfs_panic_on_lbug = 1;
 module_param(libcfs_panic_on_lbug, uint, 0644);
 MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG");
 
 module_param(libcfs_panic_on_lbug, uint, 0644);
 MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG");
 
-atomic_t libcfs_kmemory = ATOMIC_INIT(0);
-EXPORT_SYMBOL(libcfs_kmemory);
+atomic64_t libcfs_kmem = ATOMIC64_INIT(0);
+EXPORT_SYMBOL(libcfs_kmem);
 
 static DECLARE_COMPLETION(debug_complete);
 
 
 static DECLARE_COMPLETION(debug_complete);
 
index c908d34..ca2a797 100644 (file)
@@ -531,10 +531,10 @@ static struct ctl_table lnet_table[] = {
        },
        {
                .procname       = "lnet_memused",
        },
        {
                .procname       = "lnet_memused",
-               .data           = (int *)&libcfs_kmemory.counter,
-               .maxlen         = sizeof(int),
+               .data           = (u64 *)&libcfs_kmem.counter,
+               .maxlen         = sizeof(u64),
                .mode           = 0444,
                .mode           = 0444,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = &proc_doulongvec_minmax,
        },
        {
                .procname       = "catastrophe",
        },
        {
                .procname       = "catastrophe",
@@ -788,8 +788,8 @@ static void __exit libcfs_exit(void)
        debugfs_remove_recursive(lnet_debugfs_root);
        lnet_debugfs_root = NULL;
 
        debugfs_remove_recursive(lnet_debugfs_root);
        lnet_debugfs_root = NULL;
 
-       CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n",
-              atomic_read(&libcfs_kmemory));
+       CDEBUG(D_MALLOC, "before Portals cleanup: kmem %lld\n",
+              libcfs_kmem_read());
 
        llcrypt_exit();
 
 
        llcrypt_exit();
 
@@ -806,9 +806,9 @@ static void __exit libcfs_exit(void)
        cfs_cpu_fini();
 
        /* the below message is checked in test-framework.sh check_mem_leak() */
        cfs_cpu_fini();
 
        /* the below message is checked in test-framework.sh check_mem_leak() */
-       if (atomic_read(&libcfs_kmemory) != 0)
-               CERROR("Portals memory leaked: %d bytes\n",
-                      atomic_read(&libcfs_kmemory));
+       if (libcfs_kmem_read() != 0)
+               CERROR("Portals memory leaked: %lld bytes\n",
+                      libcfs_kmem_read());
 
        rc = libcfs_debug_cleanup();
        if (rc)
 
        rc = libcfs_debug_cleanup();
        if (rc)
index f1a7734..2548fde 100644 (file)
@@ -2117,7 +2117,7 @@ kgnilnd_dev_fini(kgn_device_t *dev)
 int kgnilnd_base_startup(void)
 {
        struct timeval       tv;
 int kgnilnd_base_startup(void)
 {
        struct timeval       tv;
-       int                  pkmem = atomic_read(&libcfs_kmemory);
+       long long            pkmem = libcfs_kmem_read();
        int                  rc;
        int                  i;
        kgn_device_t        *dev;
        int                  rc;
        int                  i;
        kgn_device_t        *dev;
@@ -2410,7 +2410,7 @@ int kgnilnd_base_startup(void)
        kgnilnd_data.kgn_init = GNILND_INIT_ALL;
        /*****************************************************/
 
        kgnilnd_data.kgn_init = GNILND_INIT_ALL;
        /*****************************************************/
 
-       CDEBUG(D_MALLOC, "initial kmem %d\n", pkmem);
+       CDEBUG(D_MALLOC, "initial kmem %lld\n", pkmem);
        RETURN(0);
 
 failed:
        RETURN(0);
 
 failed:
@@ -2582,8 +2582,8 @@ kgnilnd_base_shutdown(void)
                kfree(kgnilnd_data.kgn_cksum_map_pages);
        }
 
                kfree(kgnilnd_data.kgn_cksum_map_pages);
        }
 
-       CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
-              atomic_read(&libcfs_kmemory));
+       CDEBUG(D_MALLOC, "after NAL cleanup: kmem %lld\n",
+              libcfs_kmem_read());
 
        kgnilnd_data.kgn_init = GNILND_INIT_NOTHING;
        module_put(THIS_MODULE);
 
        kgnilnd_data.kgn_init = GNILND_INIT_NOTHING;
        module_put(THIS_MODULE);
@@ -2704,8 +2704,8 @@ kgnilnd_shutdown(struct lnet_ni *ni)
 
        /* Serialize with startup. */
        mutex_lock(&kgnilnd_data.kgn_quiesce_mutex);
 
        /* Serialize with startup. */
        mutex_lock(&kgnilnd_data.kgn_quiesce_mutex);
-       CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
-              atomic_read(&libcfs_kmemory));
+       CDEBUG(D_MALLOC, "before NAL cleanup: kmem %lld\n",
+              libcfs_kmem_read());
 
        if (net == NULL) {
                CERROR("got NULL net for ni %p\n", ni);
 
        if (net == NULL) {
                CERROR("got NULL net for ni %p\n", ni);
@@ -2783,8 +2783,8 @@ out:
                        kgnilnd_base_shutdown();
                }
        }
                        kgnilnd_base_shutdown();
                }
        }
-       CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
-              atomic_read(&libcfs_kmemory));
+       CDEBUG(D_MALLOC, "after NAL cleanup: kmem %lld\n",
+              libcfs_kmem_read());
 
        mutex_unlock(&kgnilnd_data.kgn_quiesce_mutex);
        EXIT;
 
        mutex_unlock(&kgnilnd_data.kgn_quiesce_mutex);
        EXIT;
index e846e9d..dbd11e9 100644 (file)
@@ -2901,8 +2901,8 @@ kiblnd_base_shutdown(void)
 
        LASSERT(list_empty(&kiblnd_data.kib_devs));
 
 
        LASSERT(list_empty(&kiblnd_data.kib_devs));
 
-        CDEBUG(D_MALLOC, "before LND base cleanup: kmem %d\n",
-              atomic_read(&libcfs_kmemory));
+       CDEBUG(D_MALLOC, "before LND base cleanup: kmem %lld\n",
+              libcfs_kmem_read());
 
         switch (kiblnd_data.kib_init) {
         default:
 
         switch (kiblnd_data.kib_init) {
         default:
@@ -2948,8 +2948,8 @@ kiblnd_base_shutdown(void)
        if (kiblnd_data.kib_scheds != NULL)
                cfs_percpt_free(kiblnd_data.kib_scheds);
 
        if (kiblnd_data.kib_scheds != NULL)
                cfs_percpt_free(kiblnd_data.kib_scheds);
 
-        CDEBUG(D_MALLOC, "after LND base cleanup: kmem %d\n",
-              atomic_read(&libcfs_kmemory));
+       CDEBUG(D_MALLOC, "after LND base cleanup: kmem %lld\n",
+              libcfs_kmem_read());
 
        kiblnd_data.kib_init = IBLND_INIT_NOTHING;
        module_put(THIS_MODULE);
 
        kiblnd_data.kib_init = IBLND_INIT_NOTHING;
        module_put(THIS_MODULE);
@@ -2967,8 +2967,8 @@ kiblnd_shutdown(struct lnet_ni *ni)
         if (net == NULL)
                 goto out;
 
         if (net == NULL)
                 goto out;
 
-        CDEBUG(D_MALLOC, "before LND net cleanup: kmem %d\n",
-              atomic_read(&libcfs_kmemory));
+       CDEBUG(D_MALLOC, "before LND net cleanup: kmem %lld\n",
+              libcfs_kmem_read());
 
        write_lock_irqsave(g_lock, flags);
        net->ibn_shutdown = 1;
 
        write_lock_irqsave(g_lock, flags);
        net->ibn_shutdown = 1;
@@ -3009,8 +3009,8 @@ kiblnd_shutdown(struct lnet_ni *ni)
                 break;
         }
 
                 break;
         }
 
-        CDEBUG(D_MALLOC, "after LND net cleanup: kmem %d\n",
-              atomic_read(&libcfs_kmemory));
+       CDEBUG(D_MALLOC, "after LND net cleanup: kmem %lld\n",
+              libcfs_kmem_read());
 
         net->ibn_init = IBLND_INIT_NOTHING;
         ni->ni_data = NULL;
 
         net->ibn_init = IBLND_INIT_NOTHING;
         ni->ni_data = NULL;
index 19f4b27..0321b25 100644 (file)
@@ -2221,8 +2221,8 @@ ksocknal_base_shutdown(void)
        struct ksock_peer_ni *peer_ni;
        int i;
 
        struct ksock_peer_ni *peer_ni;
        int i;
 
-       CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
-              atomic_read (&libcfs_kmemory));
+       CDEBUG(D_MALLOC, "before NAL cleanup: kmem %lld\n",
+              libcfs_kmem_read());
        LASSERT (ksocknal_data.ksnd_nnets == 0);
 
        switch (ksocknal_data.ksnd_init) {
        LASSERT (ksocknal_data.ksnd_nnets == 0);
 
        switch (ksocknal_data.ksnd_init) {
@@ -2274,8 +2274,8 @@ ksocknal_base_shutdown(void)
                break;
        }
 
                break;
        }
 
-       CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
-              atomic_read (&libcfs_kmemory));
+       CDEBUG(D_MALLOC, "after NAL cleanup: kmem %lld\n",
+              libcfs_kmem_read());
 
        module_put(THIS_MODULE);
 }
 
        module_put(THIS_MODULE);
 }
index 815a8a2..47f0095 100644 (file)
@@ -565,8 +565,8 @@ ksocknal_process_transmit(struct ksock_conn *conn, struct ksock_tx *tx,
 
                counter++;   /* exponential backoff warnings */
                if ((counter & (-counter)) == counter)
 
                counter++;   /* exponential backoff warnings */
                if ((counter & (-counter)) == counter)
-                       CWARN("%u ENOMEM tx %p (%u allocated)\n",
-                             counter, conn, atomic_read(&libcfs_kmemory));
+                       CWARN("%u ENOMEM tx %p (%lld allocated)\n",
+                             counter, conn, libcfs_kmem_read());
 
                /* Queue on ksnd_enomem_conns for retry after a timeout */
                spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
 
                /* Queue on ksnd_enomem_conns for retry after a timeout */
                spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
index bed99d1..62d6266 100644 (file)
@@ -726,7 +726,7 @@ extern char obd_jobid_var[];
 #define LUT_FAIL_MDT                           LUT_FAIL_CLASS(OBD_FAIL_MDS)
 #define LUT_FAIL_OST                           LUT_FAIL_CLASS(OBD_FAIL_OST)
 
 #define LUT_FAIL_MDT                           LUT_FAIL_CLASS(OBD_FAIL_MDS)
 #define LUT_FAIL_OST                           LUT_FAIL_CLASS(OBD_FAIL_OST)
 
-extern atomic_t libcfs_kmemory;
+extern atomic64_t libcfs_kmem;
 
 #ifdef CONFIG_PROC_FS
 #define obd_memory_add(size)                                                  \
 
 #ifdef CONFIG_PROC_FS
 #define obd_memory_add(size)                                                  \
@@ -829,8 +829,8 @@ do {                                                                              \
        if (unlikely((ptr) == NULL)) {                                        \
                CERROR("vmalloc of '" #ptr "' (%d bytes) failed\n",           \
                       (int)(size));                                          \
        if (unlikely((ptr) == NULL)) {                                        \
                CERROR("vmalloc of '" #ptr "' (%d bytes) failed\n",           \
                       (int)(size));                                          \
-               CERROR("%llu total bytes allocated by Lustre, %d by LNET\n", \
-                      obd_memory_sum(), atomic_read(&libcfs_kmemory));       \
+               CERROR("%llu total bytes allocated by Lustre, %lld by LNET\n",\
+                      obd_memory_sum(), libcfs_kmem_read());\
        } else {                                                              \
                OBD_ALLOC_POST(ptr, size, "vmalloced");                       \
        }                                                                     \
        } else {                                                              \
                OBD_ALLOC_POST(ptr, size, "vmalloced");                       \
        }                                                                     \
index 0599697..6cfde4d 100755 (executable)
@@ -809,16 +809,16 @@ load_modules () {
 }
 
 check_mem_leak () {
 }
 
 check_mem_leak () {
-    LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true)
-    LEAK_PORTALS=$(dmesg | tail -n 20 | grep "Portals memory leaked" || true)
-    if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then
-        echo "$LEAK_LUSTRE" 1>&2
-        echo "$LEAK_PORTALS" 1>&2
-        mv $TMP/debug $TMP/debug-leak.`date +%s` || true
-        echo "Memory leaks detected"
-        [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true
-        return 1
-    fi
+       LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true)
+       LEAK_PORTALS=$(dmesg | tail -n 20 | egrep -i "libcfs.*memory leaked" || true)
+       if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then
+               echo "$LEAK_LUSTRE" 1>&2
+               echo "$LEAK_PORTALS" 1>&2
+               mv $TMP/debug $TMP/debug-leak.`date +%s` || true
+               echo "Memory leaks detected"
+               [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true
+               return 1
+       fi
 }
 
 unload_modules() {
 }
 
 unload_modules() {