Whamcloud - gitweb
Branch HEAD
authorbwzhou <bwzhou>
Mon, 5 Nov 2007 15:45:48 +0000 (15:45 +0000)
committerbwzhou <bwzhou>
Mon, 5 Nov 2007 15:45:48 +0000 (15:45 +0000)
b=11622
i=adilger
i=johann

1. add page allocation statistics for lustre;
2. use percpu counters for both memory and page accounting;
3. update leak-finder to understand the new format of debug info.

15 files changed:
lustre/include/lprocfs_status.h
lustre/include/obd_support.h
lustre/liblustre/dir.c
lustre/llite/lproc_llite.c
lustre/lvfs/lvfs_linux.c
lustre/mds/handler.c
lustre/obdclass/class_obd.c
lustre/obdclass/linux/linux-sysctl.c
lustre/obdclass/obd_mount.c
lustre/obdecho/echo.c
lustre/obdecho/echo_client.c
lustre/obdfilter/filter.c
lustre/ost/ost_handler.c
lustre/ptlrpc/pinger.c
lustre/tests/leak_finder.pl

index 44d1a2a..3444250 100644 (file)
@@ -123,11 +123,11 @@ struct lprocfs_atomic {
 struct lprocfs_counter {
         struct lprocfs_atomic  lc_cntl;  /* may need to move to per set */
         unsigned int           lc_config;
-        __u64                  lc_count;
-        __u64                  lc_sum;
-        __u64                  lc_min;
-        __u64                  lc_max;
-        __u64                  lc_sumsquare;
+        __s64                  lc_count;
+        __s64                  lc_sum;
+        __s64                  lc_min;
+        __s64                  lc_max;
+        __s64                  lc_sumsquare;
         const char            *lc_name;   /* must be static */
         const char            *lc_units;  /* must be static */
 };
@@ -140,10 +140,21 @@ struct lprocfs_percpu {
 #define LPROCFS_GET_SMP_ID  0x0002
 
 enum lprocfs_stats_flags {
+        LPROCFS_STATS_FLAG_PERCPU   = 0x0000, /* per cpu counter */
         LPROCFS_STATS_FLAG_NOPERCPU = 0x0001, /* stats have no percpu
                                                * area and need locking */
 };
 
+enum lprocfs_fields_flags {
+        LPROCFS_FIELDS_FLAGS_CONFIG     = 0x0001,
+        LPROCFS_FIELDS_FLAGS_SUM        = 0x0002,
+        LPROCFS_FIELDS_FLAGS_MIN        = 0x0003,
+        LPROCFS_FIELDS_FLAGS_MAX        = 0x0004,
+        LPROCFS_FIELDS_FLAGS_AVG        = 0x0005,
+        LPROCFS_FIELDS_FLAGS_SUMSQUARE  = 0x0006,
+        LPROCFS_FIELDS_FLAGS_COUNT      = 0x0007,
+};
+
 struct lprocfs_stats {
         unsigned int           ls_num;     /* # of counters */
         unsigned int           ls_percpu_size;
@@ -225,7 +236,11 @@ static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
         lprocfs_stats_unlock(stats);
 }
 
-static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int idx)
+#define lprocfs_counter_incr(stats, idx) \
+        lprocfs_counter_add(stats, idx, 1)
+
+static inline void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx,
+                                       long amount)
 {
         struct lprocfs_counter *percpu_cntr;
         int smp_id;
@@ -233,14 +248,35 @@ static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int idx)
         if (stats == NULL)
                 return;
 
+        /* With per-client stats, statistics are allocated only for
+         * single CPU area, so the smp_id should be 0 always. */
         smp_id = lprocfs_stats_lock(stats, LPROCFS_GET_SMP_ID);
-        percpu_cntr = &(stats->ls_percpu[smp_id]->lp_cntr[idx]);
 
+        percpu_cntr = &(stats->ls_percpu[smp_id]->lp_cntr[idx]);
         atomic_inc(&percpu_cntr->lc_cntl.la_entry);
-        percpu_cntr->lc_count++;
+        if (percpu_cntr->lc_config & LPROCFS_CNTR_AVGMINMAX)
+                percpu_cntr->lc_sum -= amount;
         atomic_inc(&percpu_cntr->lc_cntl.la_exit);
         lprocfs_stats_unlock(stats);
 }
+#define lprocfs_counter_decr(stats, idx) \
+        lprocfs_counter_sub(stats, idx, 1)
+
+extern __s64 lprocfs_read_helper(struct lprocfs_counter *lc, 
+                                 enum lprocfs_fields_flags field);
+static inline __u64 lprocfs_stats_collector(struct lprocfs_stats *stats, 
+                                            int idx, 
+                                            enum lprocfs_fields_flags field)
+{
+        __u64 ret = 0;
+        int i;
+
+        LASSERT(stats != NULL);
+        for (i = 0; i < num_possible_cpus(); i++)
+                ret += lprocfs_read_helper(&(stats->ls_percpu[i]->lp_cntr[idx]),
+                                           field);
+        return ret;
+}
 
 extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
                                                  enum lprocfs_stats_flags flags);
@@ -440,13 +476,19 @@ static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
                                        int index, long amount) { return; }
 static inline void lprocfs_counter_incr(struct lprocfs_stats *stats,
                                         int index) { return; }
+static inline void lprocfs_counter_sub(struct lprocfs_stats *stats,
+                                       int index, long amount) { return; }
 static inline void lprocfs_counter_init(struct lprocfs_stats *stats,
                                         int index, unsigned conf,
                                         const char *name, const char *units)
 { return; }
 
+static inline __u64 lc_read_helper(struct lprocfs_counter *lc, 
+                                   enum lprocfs_fields_flags field) 
+{ return 0; }
+
 static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num,
-                                                        int client_stat)
+                                                        enum lprocfs_stats_flags flags)
 { return NULL; }
 static inline void lprocfs_clear_stats(struct lprocfs_stats *stats)
 { return; }
@@ -551,6 +593,11 @@ static inline
 int lprocfs_counter_write(struct file *file, const char *buffer,
                           unsigned long count, void *data) { return 0; }
 
+static inline
+__u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx, 
+                               enum lprocfs_fields_flags field)
+{ return (__u64)0; }
+
 #define LPROCFS_ENTRY()
 #define LPROCFS_EXIT()
 #define LPROCFS_ENTRY_AND_CHECK(dp)
index 6d5efb3..a883129 100644 (file)
 #define _OBD_SUPPORT
 
 #include <libcfs/kp30.h>
+#include <lprocfs_status.h>
 
 /* global variables */
-extern atomic_t obd_memory;
-extern int obd_memmax;
+extern struct lprocfs_stats *obd_memory;
+enum {
+        OBD_MEMORY_STAT = 0,
+        OBD_MEMORY_PAGES_STAT = 1,
+        OBD_STATS_NUM,
+};
+
 extern unsigned int obd_fail_loc;
 extern unsigned int obd_fail_val;
 extern unsigned int obd_debug_peer_on_timeout;
@@ -361,6 +367,68 @@ do {                                                                         \
 
 extern atomic_t libcfs_kmemory;
 
+#ifdef LPROCFS 
+#define obd_memory_add(size)                                                  \
+        lprocfs_counter_add(obd_memory, OBD_MEMORY_STAT, (long)(size))
+#define obd_memory_sub(size)                                                  \
+        lprocfs_counter_sub(obd_memory, OBD_MEMORY_STAT, (long)(size))
+#define obd_memory_sum()                                                      \
+        lprocfs_stats_collector(obd_memory, OBD_MEMORY_STAT,                  \
+                                LPROCFS_FIELDS_FLAGS_SUM)
+#define obd_pages_add(order)                                                  \
+        lprocfs_counter_add(obd_memory, OBD_MEMORY_PAGES_STAT,                \
+                            (long)(1 << (order)))
+#define obd_pages_sub(order)                                                  \
+        lprocfs_counter_sub(obd_memory, OBD_MEMORY_PAGES_STAT,                \
+                            (long)(1 << (order)))
+#define obd_pages_sum()                                                       \
+        lprocfs_stats_collector(obd_memory, OBD_MEMORY_PAGES_STAT,            \
+                                LPROCFS_FIELDS_FLAGS_SUM)
+
+extern void obd_update_maxusage(void);
+extern __u64 obd_memory_max(void);
+extern __u64 obd_pages_max(void);
+
+#else
+
+extern __u64 obd_alloc;
+extern __u64 obd_pages;
+
+extern __u64 obd_max_alloc;
+extern __u64 obd_max_pages;
+
+static inline void obd_memory_add(long size)
+{
+        obd_alloc += size;
+        if (obd_alloc > obd_max_alloc)
+                obd_max_alloc = obd_alloc;
+}
+
+static inline void obd_memory_sub(long size)
+{
+        obd_alloc -= size;
+}
+
+static inline void obd_pages_add(int order) 
+{
+        obd_pages += 1<< order;
+        if (obd_pages > obd_max_pages)
+                obd_max_pages = obd_pages;
+}
+
+static inline void obd_pages_sub(int order)
+{
+        obd_pages -= 1<< order;
+}
+
+#define obd_memory_sum() (obd_alloc)
+#define obd_pages_sum()  (obd_pages)
+
+#define obd_memory_max() (obd_max_alloc)
+#define obd_pages_max() (obd_max_pages)
+
+#endif
+
 #if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
 
 #define OBD_MT_WRONG_SIZE    (1 << 0)
@@ -476,19 +544,17 @@ __put_mem_track(void *ptr, int size,
 
 #if OBD_DEBUG_MEMUSAGE
 #define OBD_ALLOC_POST(ptr, size, name)                                 \
-                atomic_add(size, &obd_memory);                          \
-                if (atomic_read(&obd_memory) > obd_memmax)              \
-                        obd_memmax = atomic_read(&obd_memory);          \
+                obd_memory_add(size);                                   \
                 get_mem_track((ptr), (size), __FILE__, __LINE__);       \
-                CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p (tot %d)\n", \
-                       (int)(size), ptr, atomic_read(&obd_memory))
+                CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p.\n",       \
+                       (int)(size), ptr)
 
 #define OBD_FREE_PRE(ptr, size, name)                                   \
         LASSERT(ptr);                                                   \
         put_mem_track((ptr), (size), __FILE__, __LINE__);               \
-        atomic_sub(size, &obd_memory);                                  \
-        CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p (tot %d).\n",      \
-               (int)(size), ptr, atomic_read(&obd_memory));             \
+        obd_memory_sub(size);                                           \
+        CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p.\n",               \
+               (int)(size), ptr);                                       \
         POISON(ptr, 0x5a, size)
 
 #else /* !OBD_DEBUG_MEMUSAGE */
@@ -516,10 +582,10 @@ do {                                                                          \
 do {                                                                          \
         (ptr) = cfs_alloc(size, (gfp_mask));                                  \
         if (unlikely((ptr) == NULL)) {                                        \
-                CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
-                       (int)(size), __FILE__, __LINE__);                      \
-                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
-                       atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\
+                CERROR("kmalloc of '" #ptr "' (%d bytes) failed\n",           \
+                       (int)(size));                                          \
+                CERROR(LPU64" total bytes allocated by Lustre, %d by LNET\n", \
+                       obd_memory_sum(), atomic_read(&libcfs_kmemory));       \
         } else {                                                              \
                 memset(ptr, 0, size);                                         \
                 OBD_ALLOC_POST(ptr, size, "kmalloced");                       \
@@ -543,10 +609,10 @@ do {                                                                          \
 do {                                                                          \
         (ptr) = cfs_alloc_large(size);                                        \
         if (unlikely((ptr) == NULL)) {                                        \
-                CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
-                       (int)(size), __FILE__, __LINE__);                      \
-                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
-                       atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\
+                CERROR("vmalloc of '" #ptr "' (%d bytes) failed\n",           \
+                       (int)(size));                                          \
+                CERROR(LPU64" total bytes allocated by Lustre, %d by LNET\n", \
+                       obd_memory_sum(), atomic_read(&libcfs_kmemory));       \
         } else {                                                              \
                 memset(ptr, 0, size);                                         \
                 OBD_ALLOC_POST(ptr, size, "vmalloced");                       \
@@ -625,10 +691,10 @@ do {                                                                          \
         LASSERT(!in_interrupt());                                             \
         (ptr) = cfs_mem_cache_alloc(slab, (type));                            \
         if (unlikely((ptr) == NULL)) {                                        \
-                CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \
-                       (int)(size), __FILE__, __LINE__);                      \
-                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
-                       atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\
+                CERROR("slab-alloc of '"#ptr"' (%d bytes) failed\n",          \
+                       (int)(size));                                          \
+                CERROR(LPU64" total bytes allocated by Lustre, %d by LNET\n", \
+                       obd_memory_sum(), atomic_read(&libcfs_kmemory));       \
         } else {                                                              \
                 memset(ptr, 0, size);                                         \
                 OBD_ALLOC_POST(ptr, size, "slab-alloced");                    \
@@ -651,6 +717,47 @@ do {                                                                          \
 
 #define KEY_IS(str) (keylen >= strlen(str) && strcmp(key, str) == 0)
 
+/* Wrapper for contiguous page frame allocation */
+#define OBD_PAGES_ALLOC(ptr, order, gfp_mask)                                 \
+do {                                                                          \
+        (ptr) = cfs_alloc_pages(gfp_mask, order);                             \
+        if (unlikely((ptr) == NULL)) {                                        \
+                CERROR("alloc_pages of '" #ptr "' %d page(s) / "LPU64" bytes "\
+                       "failed\n", (int)(1 << (order)),                       \
+                       (__u64)((1 << (order)) << CFS_PAGE_SHIFT));            \
+                CERROR(LPU64" total bytes and "LPU64" total pages "           \
+                       "("LPU64" bytes) allocated by Lustre, "                \
+                       "%d total bytes by LNET\n",                            \
+                       obd_memory_sum(),                                      \
+                       obd_pages_sum() << CFS_PAGE_SHIFT,                     \
+                       obd_pages_sum(),                                       \
+                       atomic_read(&libcfs_kmemory));                         \
+        } else {                                                              \
+                obd_pages_add(order);                                         \
+                CDEBUG(D_MALLOC, "alloc_pages '" #ptr "': %d page(s) / "      \
+                       LPU64" bytes at %p.\n",                                \
+                       (int)(1 << (order)),                                   \
+                       (__u64)((1 << (order)) << CFS_PAGE_SHIFT), ptr);       \
+        }                                                                     \
+} while (0)
+
+#define OBD_PAGE_ALLOC(ptr, gfp_mask)                                         \
+        OBD_PAGES_ALLOC(ptr, 0, gfp_mask)
+
+#define OBD_PAGES_FREE(ptr, order)                                            \
+do {                                                                          \
+        LASSERT(ptr);                                                         \
+        obd_pages_sub(order);                                                 \
+        CDEBUG(D_MALLOC, "free_pages '" #ptr "': %d page(s) / "LPU64" bytes " \
+               "at %p.\n",                                                    \
+               (int)(1 << (order)), (__u64)((1 << (order)) << CFS_PAGE_SHIFT),\
+               ptr);                                                          \
+        __cfs_free_pages(ptr, order);                                         \
+        (ptr) = (void *)0xdeadbeef;                                           \
+} while (0)
+
+#define OBD_PAGE_FREE(ptr) OBD_PAGES_FREE(ptr, 0)
+
 #if defined(__linux__)
 #include <linux/obd_support.h>
 #elif defined(__APPLE__)
index 0f2bc90..88c0f3e 100644 (file)
@@ -126,16 +126,14 @@ static struct page *llu_dir_read_page(struct inode *ino, unsigned long pgidx)
         int rc;
         ENTRY;
 
-        page = cfs_alloc_page(0);
-        if (!page) {
-                CERROR("alloc page failed\n");
+        OBD_PAGE_ALLOC(page, 0);
+        if (!page)
                 RETURN(ERR_PTR(-ENOMEM));
-        }
         page->index = pgidx;
 
         rc = llu_dir_do_readpage(ino, page);
         if (rc) {
-                free_page(page);
+                OBD_PAGE_FREE(page);
                 RETURN(ERR_PTR(rc));
         }
 
@@ -248,7 +246,7 @@ ssize_t llu_iop_filldirentries(struct inode *ino, _SYSIO_OFF_T *basep,
                                                 le16_to_cpu(de->rec_len),
                                                 le32_to_cpu(de->inode), d_type, &filled);
                                 if (over) {
-                                        free_page(page);
+                                        OBD_PAGE_FREE(page);
                                         /*
                                          * if buffer overflow with no data
                                          * returned yet, then report error
@@ -261,7 +259,7 @@ ssize_t llu_iop_filldirentries(struct inode *ino, _SYSIO_OFF_T *basep,
                         }
                 }
                 
-                free_page(page);
+                OBD_PAGE_FREE(page);
         }
 done:
         lli->lli_dir_pos = pgidx << CFS_PAGE_SHIFT | offset;
index e172e78..82b166d 100644 (file)
@@ -620,7 +620,8 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
         entry->data = sbi;
 
         /* File operations stats */
-        sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES, 0);
+        sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES, 
+                                            LPROCFS_STATS_FLAG_PERCPU);
         if (sbi->ll_stats == NULL)
                 GOTO(out, err = -ENOMEM);
         /* do counter init */
index 7d26d82..9bdcf35 100644 (file)
 #include <lustre_lib.h>
 #include <lustre_quota.h>
 
-atomic_t obd_memory;
-int obd_memmax;
+__u64 obd_max_pages = 0;
+__u64 obd_max_alloc = 0;
+struct lprocfs_stats *obd_memory = NULL;
+spinlock_t obd_updatemax_lock = SPIN_LOCK_UNLOCKED;
+/* refine later and change to seqlock or simlar from libcfs */
 
 /* Debugging check only needed during development */
 #ifdef OBD_CTXT_DEBUG
@@ -446,8 +449,6 @@ long l_readdir(struct file *file, struct list_head *dentry_list)
         return 0;
 }
 EXPORT_SYMBOL(l_readdir);
-EXPORT_SYMBOL(obd_memory);
-EXPORT_SYMBOL(obd_memmax);
 
 #if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
 static spinlock_t obd_memlist_lock = SPIN_LOCK_UNLOCKED;
@@ -606,19 +607,12 @@ void lvfs_memdbg_show(void)
         struct obd_mem_track *mt;
         int header = 0;
 #endif
-        int leaked;
        
 #if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
        int i;
 #endif
 
-        leaked = atomic_read(&obd_memory);
-
-        if (leaked > 0) {
-                CWARN("Memory leaks detected (max %d, leaked %d)\n",
-                      obd_memmax, leaked);
-        }
-        
+       
 #if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
         spin_lock(&obd_memlist_lock);
         for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) {
@@ -687,6 +681,94 @@ int lvfs_check_io_health(struct obd_device *obd, struct file *file)
 EXPORT_SYMBOL(lvfs_check_io_health);
 #endif /* LUSTRE_KERNEL_VERSION */
 
+void obd_update_maxusage()
+{
+        __u64 max1, max2;
+
+        max1 = obd_pages_sum();
+        max2 = obd_memory_sum();
+
+        spin_lock(&obd_updatemax_lock);
+        if (max1 > obd_max_pages)
+                obd_max_pages = max1;
+        if (max2 > obd_max_alloc)
+                obd_max_alloc = max2;
+        spin_unlock(&obd_updatemax_lock);
+        
+}
+
+__u64 obd_memory_max(void)
+{
+        __u64 ret;
+
+        spin_lock(&obd_updatemax_lock);
+        ret = obd_max_alloc;
+        spin_unlock(&obd_updatemax_lock);
+
+        return ret;
+}
+
+__u64 obd_pages_max(void)
+{
+        __u64 ret;
+
+        spin_lock(&obd_updatemax_lock);
+        ret = obd_max_pages;
+        spin_unlock(&obd_updatemax_lock);
+
+        return ret;
+}
+
+EXPORT_SYMBOL(obd_update_maxusage);
+EXPORT_SYMBOL(obd_pages_max);
+EXPORT_SYMBOL(obd_memory_max);
+EXPORT_SYMBOL(obd_memory);
+
+#ifdef LPROCFS
+__s64 lprocfs_read_helper(struct lprocfs_counter *lc,
+                          enum lprocfs_fields_flags field)
+{
+        __u64 ret = 0;
+        int centry;
+
+        if (!lc)
+                RETURN(0);
+        do {
+                centry = atomic_read(&lc->lc_cntl.la_entry);
+
+                switch (field) {
+                        case LPROCFS_FIELDS_FLAGS_CONFIG:
+                                ret = lc->lc_config;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_SUM:
+                                ret = lc->lc_sum;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_MIN:
+                                ret = lc->lc_min;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_MAX:
+                                ret = lc->lc_max;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_AVG:
+                                ret = (lc->lc_max - lc->lc_min)/2;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_SUMSQUARE:
+                                ret = lc->lc_sumsquare;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_COUNT:
+                                ret = lc->lc_count;
+                                break;
+                        default:
+                                break;
+                };
+        } while (centry != atomic_read(&lc->lc_cntl.la_entry) &&
+                 centry != atomic_read(&lc->lc_cntl.la_exit));
+
+        RETURN(ret);
+}
+EXPORT_SYMBOL(lprocfs_read_helper);
+#endif /* LPROCFS */
+
 static int __init lvfs_linux_init(void)
 {
         ENTRY;
index 571d85b..76821e4 100644 (file)
@@ -93,7 +93,7 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file,
         for (i = 0, tmpcount = count; i < npages; i++, tmpcount -= tmpsize) {
                 tmpsize = tmpcount > CFS_PAGE_SIZE ? CFS_PAGE_SIZE : tmpcount;
 
-                pages[i] = cfs_alloc_page(CFS_ALLOC_STD);
+                OBD_PAGE_ALLOC(pages[i], CFS_ALLOC_STD);
                 if (pages[i] == NULL)
                         GOTO(cleanup_buf, rc = -ENOMEM);
 
@@ -152,7 +152,7 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file,
  cleanup_buf:
         for (i = 0; i < npages; i++)
                 if (pages[i])
-                        __cfs_free_page(pages[i]);
+                        OBD_PAGE_FREE(pages[i]);
 
         ptlrpc_free_bulk(desc);
  out_free:
index 84753fb..2e87394 100644 (file)
@@ -51,9 +51,12 @@ atomic_t libcfs_kmemory = {0};
 struct obd_device *obd_devs[MAX_OBD_DEVICES];
 struct list_head obd_types;
 spinlock_t obd_dev_lock = SPIN_LOCK_UNLOCKED;
+
 #ifndef __KERNEL__
-atomic_t obd_memory;
-int obd_memmax;
+__u64 obd_max_pages = 0;
+__u64 obd_max_alloc = 0;
+__u64 obd_alloc;
+__u64 obd_pages;
 #endif
 
 /* The following are visible and mutable through /proc/sys/lustre/. */
@@ -545,7 +548,21 @@ int init_obdclass(void)
         spin_lock_init(&obd_types_lock);
         cfs_waitq_init(&obd_race_waitq);
         obd_zombie_impexp_init();
-
+#ifdef LPROCFS
+        obd_memory = lprocfs_alloc_stats(OBD_STATS_NUM, 
+                                         LPROCFS_STATS_FLAG_PERCPU);
+        if (obd_memory == NULL) {
+                CERROR("kmalloc of 'obd_memory' failed\n");
+                RETURN(-ENOMEM);
+        }
+        lprocfs_counter_init(obd_memory, OBD_MEMORY_STAT,
+                             LPROCFS_CNTR_AVGMINMAX, 
+                             "memused", "bytes");
+        lprocfs_counter_init(obd_memory, OBD_MEMORY_PAGES_STAT,
+                             LPROCFS_CNTR_AVGMINMAX, 
+                             "pagesused", "pages");
+#endif
         err = obd_init_checks();
         if (err == -EOVERFLOW)
                 return err;
@@ -594,6 +611,8 @@ static void cleanup_obdclass(void)
 {
         int i;
         int lustre_unregister_fs(void);
+        __u64 memory_leaked, pages_leaked;
+        __u64 memory_max, pages_max;
         ENTRY;
 
         lustre_unregister_fs();
@@ -617,6 +636,23 @@ static void cleanup_obdclass(void)
 
         class_handle_cleanup();
         class_exit_uuidlist();
+
+        memory_leaked = obd_memory_sum();
+        pages_leaked = obd_pages_sum();
+        
+        memory_max = obd_memory_max();
+        pages_max = obd_pages_max();
+
+        lprocfs_free_stats(&obd_memory);
+        if (memory_leaked > 0) {
+                CWARN("Memory leaks detected (max "LPU64", leaked "LPD64")\n",
+                      memory_max, memory_leaked);
+        }
+        if (pages_leaked > 0) {
+                CWARN("Page leaks detected (max "LPU64", leaked "LPU64")\n",
+                      pages_max, pages_leaked);
+        }
         EXIT;
 }
 
index ba3d186..05b4baf 100644 (file)
@@ -53,6 +53,9 @@ enum {
         OBD_TIMEOUT,            /* RPC timeout before recovery/intr */
         OBD_DUMP_ON_TIMEOUT,    /* dump kernel debug log upon eviction */
         OBD_MEMUSED,            /* bytes currently OBD_ALLOCated */
+        OBD_PAGESUSED,          /* pages currently OBD_PAGE_ALLOCated */
+        OBD_MAXMEMUSED,         /* maximum bytes OBD_ALLOCated concurrently */
+        OBD_MAXPAGESUSED,       /* maximum pages OBD_PAGE_ALLOCated concurrently */
         OBD_SYNCFILTER,         /* XXX temporary, as we play with sync osts.. */
         OBD_LDLM_TIMEOUT,       /* LDLM timeout for ASTs before client eviction */
         OBD_DUMP_ON_EVICTION,   /* dump kernel debug log upon eviction */
@@ -80,6 +83,102 @@ int LL_PROC_PROTO(proc_set_timeout)
         return rc;
 }
 
+int LL_PROC_PROTO(proc_memory_alloc)
+{
+        char buf[22];
+        int len;
+        DECLARE_LL_PROC_PPOS_DECL;
+
+        if (!*lenp || (*ppos && !write)) {
+                *lenp = 0;
+                return 0;
+        }
+        if (write) 
+                return -EINVAL;
+        
+        len = snprintf(buf, sizeof(buf), LPU64"\n", obd_memory_sum());
+        if (len > *lenp)
+                len = *lenp;
+        buf[len] = '\0';
+        if (copy_to_user(buffer, buf, len))
+                return -EFAULT;
+        *lenp = len;
+        *ppos += *lenp;
+        return 0;
+}
+
+int LL_PROC_PROTO(proc_pages_alloc)
+{
+        char buf[22];
+        int len;
+        DECLARE_LL_PROC_PPOS_DECL;
+
+        if (!*lenp || (*ppos && !write)) {
+                *lenp = 0;
+                return 0;
+        }
+        if (write)
+                return -EINVAL;
+
+        len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_sum());
+        if (len > *lenp)
+                len = *lenp;
+        buf[len] = '\0';
+        if (copy_to_user(buffer, buf, len))
+                return -EFAULT;
+        *lenp = len;
+        *ppos += *lenp;
+        return 0;
+}
+
+int LL_PROC_PROTO(proc_mem_max)
+{
+        char buf[22];
+        int len;
+        DECLARE_LL_PROC_PPOS_DECL;
+
+        if (!*lenp || (*ppos && !write)) {
+                *lenp = 0;
+                return 0;
+        }
+        if (write)
+                return -EINVAL;
+
+        len = snprintf(buf, sizeof(buf), LPU64"\n", obd_memory_max());
+        if (len > *lenp)
+                len = *lenp;
+        buf[len] = '\0';
+        if (copy_to_user(buffer, buf, len))
+                return -EFAULT;
+        *lenp = len;
+        *ppos += *lenp;
+        return 0;
+}
+
+int LL_PROC_PROTO(proc_pages_max)
+{
+        char buf[22];
+        int len;
+        DECLARE_LL_PROC_PPOS_DECL;
+
+        if (!*lenp || (*ppos && !write)) {
+                *lenp = 0;
+                return 0;
+        }
+        if (write)
+                return -EINVAL;
+
+        len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_max());
+        if (len > *lenp)
+                len = *lenp;
+        buf[len] = '\0';
+        if (copy_to_user(buffer, buf, len))
+                return -EFAULT;
+        *lenp = len;
+        *ppos += *lenp;
+        return 0;
+}
+
 static cfs_sysctl_table_t obd_table[] = {
         {
                 .ctl_name = OBD_FAIL_LOC,
@@ -132,10 +231,34 @@ static cfs_sysctl_table_t obd_table[] = {
         {
                 .ctl_name = OBD_MEMUSED,
                 .procname = "memused",
-                .data     = (int *)&obd_memory.counter,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec
+                .data     = NULL,
+                .maxlen   = 0,
+                .mode     = 0444,
+                .proc_handler = &proc_memory_alloc
+        },
+        {
+                .ctl_name = OBD_PAGESUSED,
+                .procname = "pagesused",
+                .data     = NULL,
+                .maxlen   = 0,
+                .mode     = 0444,
+                .proc_handler = &proc_pages_alloc
+        },
+        {
+                .ctl_name = OBD_MAXMEMUSED,
+                .procname = "memused_max",
+                .data     = NULL,
+                .maxlen   = 0,
+                .mode     = 0444,
+                .proc_handler = &proc_mem_max
+        },
+        {
+                .ctl_name = OBD_MAXPAGESUSED,
+                .procname = "pagesused_max",
+                .data     = NULL,
+                .maxlen   = 0,
+                .mode     = 0444,
+                .proc_handler = &proc_pages_max
         },
         {
                 .ctl_name = OBD_LDLM_TIMEOUT,
index 89572b5..04e2924 100644 (file)
@@ -1305,6 +1305,7 @@ static struct vfsmount *server_kernel_mount(struct super_block *sb)
         struct vfsmount *mnt;
         char *options = NULL;
         unsigned long page, s_flags;
+        struct page *__page;
         int rc;
         ENTRY;
 
@@ -1343,9 +1344,10 @@ static struct vfsmount *server_kernel_mount(struct super_block *sb)
         /* Done with our pre-mount, now do the real mount. */
 
         /* Glom up mount options */
-        page = __get_free_page(GFP_KERNEL);
-        if (!page)
+        OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD);
+        if (!__page)
                 GOTO(out_free, rc = -ENOMEM);
+        page = (unsigned long)cfs_page_address(__page);
 
         options = (char *)page;
         memset(options, 0, CFS_PAGE_SIZE);
@@ -1367,7 +1369,7 @@ static struct vfsmount *server_kernel_mount(struct super_block *sb)
                MT_STR(ldd), lmd->lmd_dev, options);
         mnt = ll_kern_mount(MT_STR(ldd), s_flags, lmd->lmd_dev,
                             (void *)options);
-        free_page(page);
+        OBD_PAGE_FREE(__page);
         if (IS_ERR(mnt)) {
                 rc = PTR_ERR(mnt);
                 CERROR("ll_kern_mount failed: rc = %d\n", rc);
index e47ee66..6ac884e 100644 (file)
@@ -311,7 +311,7 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
                                 /* Take extra ref so __free_pages() can be called OK */
                                 cfs_get_page (r->page);
                         } else {
-                                r->page = cfs_alloc_page(gfp_mask);
+                                OBD_PAGE_ALLOC(r->page, gfp_mask);
                                 if (r->page == NULL) {
                                         CERROR("can't get page %u/%u for id "
                                                LPU64"\n",
@@ -362,7 +362,7 @@ preprw_cleanup:
                 cfs_kunmap(r->page);
                 /* NB if this is a persistent page, __free_pages will just
                  * lose the extra ref gained above */
-                cfs_free_page(r->page);
+                OBD_PAGE_FREE(r->page);
                 atomic_dec(&obd->u.echo.eo_prep);
         }
         memset(res, 0, sizeof(*res) * niocount);
@@ -433,7 +433,7 @@ int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa,
 
                         cfs_kunmap(page);
                         /* NB see comment above regarding persistent pages */
-                        cfs_free_page(page);
+                        OBD_PAGE_FREE(page);
                         atomic_dec(&obd->u.echo.eo_prep);
                 }
         }
@@ -448,7 +448,7 @@ commitrw_cleanup:
                 cfs_page_t *page = r->page;
 
                 /* NB see comment above regarding persistent pages */
-                cfs_free_page(page);
+                OBD_PAGE_FREE(page);
                 atomic_dec(&obd->u.echo.eo_prep);
         }
         return rc;
@@ -544,7 +544,7 @@ echo_persistent_pages_fini (void)
 
         for (i = 0; i < ECHO_PERSISTENT_PAGES; i++)
                 if (echo_persistent_pages[i] != NULL) {
-                        cfs_free_page (echo_persistent_pages[i]);
+                        OBD_PAGE_FREE(echo_persistent_pages[i]);
                         echo_persistent_pages[i] = NULL;
                 }
 }
@@ -559,7 +559,7 @@ echo_persistent_pages_init (void)
                 int gfp_mask = (i < ECHO_PERSISTENT_PAGES/2) ?
                         CFS_ALLOC_STD : CFS_ALLOC_HIGHUSER;
 
-                pg = cfs_alloc_page (gfp_mask);
+                OBD_PAGE_ALLOC(pg, gfp_mask);
                 if (pg == NULL) {
                         echo_persistent_pages_fini ();
                         return (-ENOMEM);
index c5d32ab..00cd063 100644 (file)
@@ -543,7 +543,7 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa,
                 LASSERT (pgp->pg == NULL);      /* for cleanup */
 
                 rc = -ENOMEM;
-                pgp->pg = cfs_alloc_page (gfp_mask);
+                OBD_PAGE_ALLOC(pgp->pg, gfp_mask);
                 if (pgp->pg == NULL)
                         goto out;
 
@@ -575,7 +575,7 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa,
                         if (vrc != 0 && rc == 0)
                                 rc = vrc;
                 }
-                cfs_free_page(pgp->pg);
+                OBD_PAGE_FREE(pgp->pg);
         }
         OBD_FREE(pga, npages * sizeof(*pga));
         return (rc);
@@ -734,13 +734,14 @@ static int echo_client_async_page(struct obd_export *exp, int rw,
         /* prepare the group of pages that we're going to be keeping
          * in flight */
         for (i = 0; i < npages; i++) {
-                cfs_page_t *page = cfs_alloc_page(CFS_ALLOC_STD);
+                cfs_page_t *page;
+                OBD_PAGE_ALLOC(page, CFS_ALLOC_STD);
                 if (page == NULL)
                         GOTO(out, rc = -ENOMEM);
 
                 OBD_ALLOC(eap, sizeof(*eap));
                 if (eap == NULL) {
-                        cfs_free_page(page);
+                        OBD_PAGE_FREE(page);
                         GOTO(out, rc = -ENOMEM);
                 }
 
@@ -835,7 +836,7 @@ out:
                                 obd_teardown_async_page(exp, lsm, NULL,
                                                         eap->eap_cookie);
                         OBD_FREE(eap, sizeof(*eap));
-                        cfs_free_page(page);
+                        OBD_PAGE_FREE(page);
                 }
                 OBD_FREE(aps, npages * sizeof aps[0]);
         }
index 3bf974d..bcdd5f5 100644 (file)
@@ -2015,7 +2015,8 @@ err_mntput:
 static int filter_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
 {
         struct lprocfs_static_vars lvars;
-        unsigned long page;
+        unsigned long addr;
+        struct page *page;
         int rc;
 
         CLASSERT(offsetof(struct obd_device, u.obt) ==
@@ -2025,9 +2026,11 @@ static int filter_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                 RETURN(-EINVAL);
 
         /* 2.6.9 selinux wants a full option page for do_kern_mount (bug6471) */
-        page = get_zeroed_page(GFP_KERNEL);
+        OBD_PAGE_ALLOC(page, CFS_ALLOC_STD);
         if (!page)
                 RETURN(-ENOMEM);
+        addr = (unsigned long)cfs_page_address(page);
+        clear_page((void *)addr);
 
         /* lprocfs must be setup before the filter so state can be safely added
          * to /proc incrementally as the filter is setup */
@@ -2047,10 +2050,10 @@ static int filter_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                                                    obd->obd_proc_entry);
         }
 
-        memcpy((void *)page, lustre_cfg_buf(lcfg, 4),
+        memcpy((void *)addr, lustre_cfg_buf(lcfg, 4),
                LUSTRE_CFG_BUFLEN(lcfg, 4));
-        rc = filter_common_setup(obd, lcfg, (void *)page);
-        free_page(page);
+        rc = filter_common_setup(obd, lcfg, (void *)addr);
+        OBD_PAGE_FREE(page);
 
         if (rc) {
                 lprocfs_obd_cleanup(obd);
index 833efef..27e25bb 100644 (file)
@@ -1698,7 +1698,7 @@ static void ost_thread_done(struct ptlrpc_thread *thread)
         if (tls != NULL) {
                 for (i = 0; i < OST_THREAD_POOL_SIZE; ++ i) {
                         if (tls->page[i] != NULL)
-                                __cfs_free_page(tls->page[i]);
+                                OBD_PAGE_FREE(tls->page[i]);
                 }
                 OBD_FREE_PTR(tls);
                 thread->t_data = NULL;
@@ -1729,7 +1729,7 @@ static int ost_thread_init(struct ptlrpc_thread *thread)
                  * populate pool
                  */
                 for (i = 0; i < OST_THREAD_POOL_SIZE; ++ i) {
-                        tls->page[i] = cfs_alloc_page(OST_THREAD_POOL_GFP);
+                        OBD_PAGE_ALLOC(tls->page[i], OST_THREAD_POOL_GFP);
                         if (tls->page[i] == NULL) {
                                 ost_thread_done(thread);
                                 result = -ENOMEM;
index 4d5a9c9..aa65e6b 100644 (file)
@@ -222,6 +222,8 @@ static int ptlrpc_pinger_main(void *arg)
                                 ptlrpc_update_next_ping(imp);
                 }
                 mutex_up(&pinger_sem);
+                /* update memory usage info */
+                obd_update_maxusage();
 
                 /* Wait until the next ping time, or until we're stopped. */
                 time_to_next_ping = cfs_time_sub(cfs_time_add(this_ping, 
index 5219996..aa783bb 100644 (file)
@@ -14,7 +14,7 @@ my $max = 0;
 while ($line = <>) {
     $debug_line++;
     my ($file, $func, $lno, $name, $size, $addr, $type);
-    if ($line =~ m/^.*(\.).*\((.*):(\d+):(.*)\(\)\) (k|v|slab-)(.*) '(.*)': (\d+) at (.*) \(tot (.*)\).*$/){
+    if ($line =~ m/^.*(\.).*\((.*):(\d+):(.*)\(\)\) (k|v|slab-)(.*) '(.*)': (\d+) at (.*)\..*$/){
         $file = $2;
         $lno  = $3;
         $func = $4;
@@ -22,7 +22,6 @@ while ($line = <>) {
         $name = $7;
         $size = $8;
         $addr = $9;
-        $tot  = $10;
 
        # we can't dump the log after portals has exited, so skip "leaks"
        # from memory freed in the portals module unloading.
@@ -69,10 +68,6 @@ while ($line = <>) {
         delete $memory->{$addr};
         $total -= $size;
     }
-    if ($total != int($tot)) {
-        print "kernel total $tot != my total $total\n";
-        $total = $tot;
-    }
 }
 
 # Sort leak output by allocation time