From: bwzhou Date: Mon, 5 Nov 2007 15:45:48 +0000 (+0000) Subject: Branch HEAD X-Git-Tag: v1_7_0_51~535 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=8ff60aa7ec26d864eb3d260a6e27e4d2958a827a Branch HEAD b=11622 i=adilger i=johann 1. add page allocation statistics for lustre; 2. use percpu counters for both memory and page accounting; 3. update leak-finder to understand the new format of debug info. --- diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 44d1a2a..3444250 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -123,11 +123,11 @@ struct lprocfs_atomic { struct lprocfs_counter { struct lprocfs_atomic lc_cntl; /* may need to move to per set */ unsigned int lc_config; - __u64 lc_count; - __u64 lc_sum; - __u64 lc_min; - __u64 lc_max; - __u64 lc_sumsquare; + __s64 lc_count; + __s64 lc_sum; + __s64 lc_min; + __s64 lc_max; + __s64 lc_sumsquare; const char *lc_name; /* must be static */ const char *lc_units; /* must be static */ }; @@ -140,10 +140,21 @@ struct lprocfs_percpu { #define LPROCFS_GET_SMP_ID 0x0002 enum lprocfs_stats_flags { + LPROCFS_STATS_FLAG_PERCPU = 0x0000, /* per cpu counter */ LPROCFS_STATS_FLAG_NOPERCPU = 0x0001, /* stats have no percpu * area and need locking */ }; +enum lprocfs_fields_flags { + LPROCFS_FIELDS_FLAGS_CONFIG = 0x0001, + LPROCFS_FIELDS_FLAGS_SUM = 0x0002, + LPROCFS_FIELDS_FLAGS_MIN = 0x0003, + LPROCFS_FIELDS_FLAGS_MAX = 0x0004, + LPROCFS_FIELDS_FLAGS_AVG = 0x0005, + LPROCFS_FIELDS_FLAGS_SUMSQUARE = 0x0006, + LPROCFS_FIELDS_FLAGS_COUNT = 0x0007, +}; + struct lprocfs_stats { unsigned int ls_num; /* # of counters */ unsigned int ls_percpu_size; @@ -225,7 +236,11 @@ static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, lprocfs_stats_unlock(stats); } -static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int idx) +#define lprocfs_counter_incr(stats, idx) \ + lprocfs_counter_add(stats, idx, 1) + +static inline void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx, + long amount) { struct lprocfs_counter *percpu_cntr; int smp_id; @@ -233,14 +248,35 @@ static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int idx) if (stats == NULL) return; + /* With per-client stats, statistics are allocated only for + * single CPU area, so the smp_id should be 0 always. */ smp_id = lprocfs_stats_lock(stats, LPROCFS_GET_SMP_ID); - percpu_cntr = &(stats->ls_percpu[smp_id]->lp_cntr[idx]); + percpu_cntr = &(stats->ls_percpu[smp_id]->lp_cntr[idx]); atomic_inc(&percpu_cntr->lc_cntl.la_entry); - percpu_cntr->lc_count++; + if (percpu_cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) + percpu_cntr->lc_sum -= amount; atomic_inc(&percpu_cntr->lc_cntl.la_exit); lprocfs_stats_unlock(stats); } +#define lprocfs_counter_decr(stats, idx) \ + lprocfs_counter_sub(stats, idx, 1) + +extern __s64 lprocfs_read_helper(struct lprocfs_counter *lc, + enum lprocfs_fields_flags field); +static inline __u64 lprocfs_stats_collector(struct lprocfs_stats *stats, + int idx, + enum lprocfs_fields_flags field) +{ + __u64 ret = 0; + int i; + + LASSERT(stats != NULL); + for (i = 0; i < num_possible_cpus(); i++) + ret += lprocfs_read_helper(&(stats->ls_percpu[i]->lp_cntr[idx]), + field); + return ret; +} extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num, enum lprocfs_stats_flags flags); @@ -440,13 +476,19 @@ static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int index, long amount) { return; } static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int index) { return; } +static inline void lprocfs_counter_sub(struct lprocfs_stats *stats, + int index, long amount) { return; } static inline void lprocfs_counter_init(struct lprocfs_stats *stats, int index, unsigned conf, const char *name, const char *units) { return; } +static inline __u64 lc_read_helper(struct lprocfs_counter *lc, + enum lprocfs_fields_flags field) +{ return 0; } + static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num, - int client_stat) + enum lprocfs_stats_flags flags) { return NULL; } static inline void lprocfs_clear_stats(struct lprocfs_stats *stats) { return; } @@ -551,6 +593,11 @@ static inline int lprocfs_counter_write(struct file *file, const char *buffer, unsigned long count, void *data) { return 0; } +static inline +__u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx, + enum lprocfs_fields_flags field) +{ return (__u64)0; } + #define LPROCFS_ENTRY() #define LPROCFS_EXIT() #define LPROCFS_ENTRY_AND_CHECK(dp) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 6d5efb3..a883129 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -24,10 +24,16 @@ #define _OBD_SUPPORT #include +#include /* global variables */ -extern atomic_t obd_memory; -extern int obd_memmax; +extern struct lprocfs_stats *obd_memory; +enum { + OBD_MEMORY_STAT = 0, + OBD_MEMORY_PAGES_STAT = 1, + OBD_STATS_NUM, +}; + extern unsigned int obd_fail_loc; extern unsigned int obd_fail_val; extern unsigned int obd_debug_peer_on_timeout; @@ -361,6 +367,68 @@ do { \ extern atomic_t libcfs_kmemory; +#ifdef LPROCFS +#define obd_memory_add(size) \ + lprocfs_counter_add(obd_memory, OBD_MEMORY_STAT, (long)(size)) +#define obd_memory_sub(size) \ + lprocfs_counter_sub(obd_memory, OBD_MEMORY_STAT, (long)(size)) +#define obd_memory_sum() \ + lprocfs_stats_collector(obd_memory, OBD_MEMORY_STAT, \ + LPROCFS_FIELDS_FLAGS_SUM) +#define obd_pages_add(order) \ + lprocfs_counter_add(obd_memory, OBD_MEMORY_PAGES_STAT, \ + (long)(1 << (order))) +#define obd_pages_sub(order) \ + lprocfs_counter_sub(obd_memory, OBD_MEMORY_PAGES_STAT, \ + (long)(1 << (order))) +#define obd_pages_sum() \ + lprocfs_stats_collector(obd_memory, OBD_MEMORY_PAGES_STAT, \ + LPROCFS_FIELDS_FLAGS_SUM) + +extern void obd_update_maxusage(void); +extern __u64 obd_memory_max(void); +extern __u64 obd_pages_max(void); + +#else + +extern __u64 obd_alloc; +extern __u64 obd_pages; + +extern __u64 obd_max_alloc; +extern __u64 obd_max_pages; + +static inline void obd_memory_add(long size) +{ + obd_alloc += size; + if (obd_alloc > obd_max_alloc) + obd_max_alloc = obd_alloc; +} + +static inline void obd_memory_sub(long size) +{ + obd_alloc -= size; +} + +static inline void obd_pages_add(int order) +{ + obd_pages += 1<< order; + if (obd_pages > obd_max_pages) + obd_max_pages = obd_pages; +} + +static inline void obd_pages_sub(int order) +{ + obd_pages -= 1<< order; +} + +#define obd_memory_sum() (obd_alloc) +#define obd_pages_sum() (obd_pages) + +#define obd_memory_max() (obd_max_alloc) +#define obd_pages_max() (obd_max_pages) + +#endif + #if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) #define OBD_MT_WRONG_SIZE (1 << 0) @@ -476,19 +544,17 @@ __put_mem_track(void *ptr, int size, #if OBD_DEBUG_MEMUSAGE #define OBD_ALLOC_POST(ptr, size, name) \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ + obd_memory_add(size); \ get_mem_track((ptr), (size), __FILE__, __LINE__); \ - CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p (tot %d)\n", \ - (int)(size), ptr, atomic_read(&obd_memory)) + CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p.\n", \ + (int)(size), ptr) #define OBD_FREE_PRE(ptr, size, name) \ LASSERT(ptr); \ put_mem_track((ptr), (size), __FILE__, __LINE__); \ - atomic_sub(size, &obd_memory); \ - CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ + obd_memory_sub(size); \ + CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p.\n", \ + (int)(size), ptr); \ POISON(ptr, 0x5a, size) #else /* !OBD_DEBUG_MEMUSAGE */ @@ -516,10 +582,10 @@ do { \ do { \ (ptr) = cfs_alloc(size, (gfp_mask)); \ if (unlikely((ptr) == NULL)) { \ - CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ + CERROR("kmalloc of '" #ptr "' (%d bytes) failed\n", \ + (int)(size)); \ + CERROR(LPU64" total bytes allocated by Lustre, %d by LNET\n", \ + obd_memory_sum(), atomic_read(&libcfs_kmemory)); \ } else { \ memset(ptr, 0, size); \ OBD_ALLOC_POST(ptr, size, "kmalloced"); \ @@ -543,10 +609,10 @@ do { \ do { \ (ptr) = cfs_alloc_large(size); \ if (unlikely((ptr) == NULL)) { \ - CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ + CERROR("vmalloc of '" #ptr "' (%d bytes) failed\n", \ + (int)(size)); \ + CERROR(LPU64" total bytes allocated by Lustre, %d by LNET\n", \ + obd_memory_sum(), atomic_read(&libcfs_kmemory)); \ } else { \ memset(ptr, 0, size); \ OBD_ALLOC_POST(ptr, size, "vmalloced"); \ @@ -625,10 +691,10 @@ do { \ LASSERT(!in_interrupt()); \ (ptr) = cfs_mem_cache_alloc(slab, (type)); \ if (unlikely((ptr) == NULL)) { \ - CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ + CERROR("slab-alloc of '"#ptr"' (%d bytes) failed\n", \ + (int)(size)); \ + CERROR(LPU64" total bytes allocated by Lustre, %d by LNET\n", \ + obd_memory_sum(), atomic_read(&libcfs_kmemory)); \ } else { \ memset(ptr, 0, size); \ OBD_ALLOC_POST(ptr, size, "slab-alloced"); \ @@ -651,6 +717,47 @@ do { \ #define KEY_IS(str) (keylen >= strlen(str) && strcmp(key, str) == 0) +/* Wrapper for contiguous page frame allocation */ +#define OBD_PAGES_ALLOC(ptr, order, gfp_mask) \ +do { \ + (ptr) = cfs_alloc_pages(gfp_mask, order); \ + if (unlikely((ptr) == NULL)) { \ + CERROR("alloc_pages of '" #ptr "' %d page(s) / "LPU64" bytes "\ + "failed\n", (int)(1 << (order)), \ + (__u64)((1 << (order)) << CFS_PAGE_SHIFT)); \ + CERROR(LPU64" total bytes and "LPU64" total pages " \ + "("LPU64" bytes) allocated by Lustre, " \ + "%d total bytes by LNET\n", \ + obd_memory_sum(), \ + obd_pages_sum() << CFS_PAGE_SHIFT, \ + obd_pages_sum(), \ + atomic_read(&libcfs_kmemory)); \ + } else { \ + obd_pages_add(order); \ + CDEBUG(D_MALLOC, "alloc_pages '" #ptr "': %d page(s) / " \ + LPU64" bytes at %p.\n", \ + (int)(1 << (order)), \ + (__u64)((1 << (order)) << CFS_PAGE_SHIFT), ptr); \ + } \ +} while (0) + +#define OBD_PAGE_ALLOC(ptr, gfp_mask) \ + OBD_PAGES_ALLOC(ptr, 0, gfp_mask) + +#define OBD_PAGES_FREE(ptr, order) \ +do { \ + LASSERT(ptr); \ + obd_pages_sub(order); \ + CDEBUG(D_MALLOC, "free_pages '" #ptr "': %d page(s) / "LPU64" bytes " \ + "at %p.\n", \ + (int)(1 << (order)), (__u64)((1 << (order)) << CFS_PAGE_SHIFT),\ + ptr); \ + __cfs_free_pages(ptr, order); \ + (ptr) = (void *)0xdeadbeef; \ +} while (0) + +#define OBD_PAGE_FREE(ptr) OBD_PAGES_FREE(ptr, 0) + #if defined(__linux__) #include #elif defined(__APPLE__) diff --git a/lustre/liblustre/dir.c b/lustre/liblustre/dir.c index 0f2bc90..88c0f3e 100644 --- a/lustre/liblustre/dir.c +++ b/lustre/liblustre/dir.c @@ -126,16 +126,14 @@ static struct page *llu_dir_read_page(struct inode *ino, unsigned long pgidx) int rc; ENTRY; - page = cfs_alloc_page(0); - if (!page) { - CERROR("alloc page failed\n"); + OBD_PAGE_ALLOC(page, 0); + if (!page) RETURN(ERR_PTR(-ENOMEM)); - } page->index = pgidx; rc = llu_dir_do_readpage(ino, page); if (rc) { - free_page(page); + OBD_PAGE_FREE(page); RETURN(ERR_PTR(rc)); } @@ -248,7 +246,7 @@ ssize_t llu_iop_filldirentries(struct inode *ino, _SYSIO_OFF_T *basep, le16_to_cpu(de->rec_len), le32_to_cpu(de->inode), d_type, &filled); if (over) { - free_page(page); + OBD_PAGE_FREE(page); /* * if buffer overflow with no data * returned yet, then report error @@ -261,7 +259,7 @@ ssize_t llu_iop_filldirentries(struct inode *ino, _SYSIO_OFF_T *basep, } } - free_page(page); + OBD_PAGE_FREE(page); } done: lli->lli_dir_pos = pgidx << CFS_PAGE_SHIFT | offset; diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index e172e78..82b166d 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -620,7 +620,8 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, entry->data = sbi; /* File operations stats */ - sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES, 0); + sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES, + LPROCFS_STATS_FLAG_PERCPU); if (sbi->ll_stats == NULL) GOTO(out, err = -ENOMEM); /* do counter init */ diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index 7d26d82..9bdcf35 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -50,8 +50,11 @@ #include #include -atomic_t obd_memory; -int obd_memmax; +__u64 obd_max_pages = 0; +__u64 obd_max_alloc = 0; +struct lprocfs_stats *obd_memory = NULL; +spinlock_t obd_updatemax_lock = SPIN_LOCK_UNLOCKED; +/* refine later and change to seqlock or simlar from libcfs */ /* Debugging check only needed during development */ #ifdef OBD_CTXT_DEBUG @@ -446,8 +449,6 @@ long l_readdir(struct file *file, struct list_head *dentry_list) return 0; } EXPORT_SYMBOL(l_readdir); -EXPORT_SYMBOL(obd_memory); -EXPORT_SYMBOL(obd_memmax); #if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) static spinlock_t obd_memlist_lock = SPIN_LOCK_UNLOCKED; @@ -606,19 +607,12 @@ void lvfs_memdbg_show(void) struct obd_mem_track *mt; int header = 0; #endif - int leaked; #if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) int i; #endif - leaked = atomic_read(&obd_memory); - - if (leaked > 0) { - CWARN("Memory leaks detected (max %d, leaked %d)\n", - obd_memmax, leaked); - } - + #if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) spin_lock(&obd_memlist_lock); for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) { @@ -687,6 +681,94 @@ int lvfs_check_io_health(struct obd_device *obd, struct file *file) EXPORT_SYMBOL(lvfs_check_io_health); #endif /* LUSTRE_KERNEL_VERSION */ +void obd_update_maxusage() +{ + __u64 max1, max2; + + max1 = obd_pages_sum(); + max2 = obd_memory_sum(); + + spin_lock(&obd_updatemax_lock); + if (max1 > obd_max_pages) + obd_max_pages = max1; + if (max2 > obd_max_alloc) + obd_max_alloc = max2; + spin_unlock(&obd_updatemax_lock); + +} + +__u64 obd_memory_max(void) +{ + __u64 ret; + + spin_lock(&obd_updatemax_lock); + ret = obd_max_alloc; + spin_unlock(&obd_updatemax_lock); + + return ret; +} + +__u64 obd_pages_max(void) +{ + __u64 ret; + + spin_lock(&obd_updatemax_lock); + ret = obd_max_pages; + spin_unlock(&obd_updatemax_lock); + + return ret; +} + +EXPORT_SYMBOL(obd_update_maxusage); +EXPORT_SYMBOL(obd_pages_max); +EXPORT_SYMBOL(obd_memory_max); +EXPORT_SYMBOL(obd_memory); + +#ifdef LPROCFS +__s64 lprocfs_read_helper(struct lprocfs_counter *lc, + enum lprocfs_fields_flags field) +{ + __u64 ret = 0; + int centry; + + if (!lc) + RETURN(0); + do { + centry = atomic_read(&lc->lc_cntl.la_entry); + + switch (field) { + case LPROCFS_FIELDS_FLAGS_CONFIG: + ret = lc->lc_config; + break; + case LPROCFS_FIELDS_FLAGS_SUM: + ret = lc->lc_sum; + break; + case LPROCFS_FIELDS_FLAGS_MIN: + ret = lc->lc_min; + break; + case LPROCFS_FIELDS_FLAGS_MAX: + ret = lc->lc_max; + break; + case LPROCFS_FIELDS_FLAGS_AVG: + ret = (lc->lc_max - lc->lc_min)/2; + break; + case LPROCFS_FIELDS_FLAGS_SUMSQUARE: + ret = lc->lc_sumsquare; + break; + case LPROCFS_FIELDS_FLAGS_COUNT: + ret = lc->lc_count; + break; + default: + break; + }; + } while (centry != atomic_read(&lc->lc_cntl.la_entry) && + centry != atomic_read(&lc->lc_cntl.la_exit)); + + RETURN(ret); +} +EXPORT_SYMBOL(lprocfs_read_helper); +#endif /* LPROCFS */ + static int __init lvfs_linux_init(void) { ENTRY; diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 571d85b..76821e4 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -93,7 +93,7 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file, for (i = 0, tmpcount = count; i < npages; i++, tmpcount -= tmpsize) { tmpsize = tmpcount > CFS_PAGE_SIZE ? CFS_PAGE_SIZE : tmpcount; - pages[i] = cfs_alloc_page(CFS_ALLOC_STD); + OBD_PAGE_ALLOC(pages[i], CFS_ALLOC_STD); if (pages[i] == NULL) GOTO(cleanup_buf, rc = -ENOMEM); @@ -152,7 +152,7 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file, cleanup_buf: for (i = 0; i < npages; i++) if (pages[i]) - __cfs_free_page(pages[i]); + OBD_PAGE_FREE(pages[i]); ptlrpc_free_bulk(desc); out_free: diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 84753fb..2e87394 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -51,9 +51,12 @@ atomic_t libcfs_kmemory = {0}; struct obd_device *obd_devs[MAX_OBD_DEVICES]; struct list_head obd_types; spinlock_t obd_dev_lock = SPIN_LOCK_UNLOCKED; + #ifndef __KERNEL__ -atomic_t obd_memory; -int obd_memmax; +__u64 obd_max_pages = 0; +__u64 obd_max_alloc = 0; +__u64 obd_alloc; +__u64 obd_pages; #endif /* The following are visible and mutable through /proc/sys/lustre/. */ @@ -545,7 +548,21 @@ int init_obdclass(void) spin_lock_init(&obd_types_lock); cfs_waitq_init(&obd_race_waitq); obd_zombie_impexp_init(); - +#ifdef LPROCFS + obd_memory = lprocfs_alloc_stats(OBD_STATS_NUM, + LPROCFS_STATS_FLAG_PERCPU); + if (obd_memory == NULL) { + CERROR("kmalloc of 'obd_memory' failed\n"); + RETURN(-ENOMEM); + } + + lprocfs_counter_init(obd_memory, OBD_MEMORY_STAT, + LPROCFS_CNTR_AVGMINMAX, + "memused", "bytes"); + lprocfs_counter_init(obd_memory, OBD_MEMORY_PAGES_STAT, + LPROCFS_CNTR_AVGMINMAX, + "pagesused", "pages"); +#endif err = obd_init_checks(); if (err == -EOVERFLOW) return err; @@ -594,6 +611,8 @@ static void cleanup_obdclass(void) { int i; int lustre_unregister_fs(void); + __u64 memory_leaked, pages_leaked; + __u64 memory_max, pages_max; ENTRY; lustre_unregister_fs(); @@ -617,6 +636,23 @@ static void cleanup_obdclass(void) class_handle_cleanup(); class_exit_uuidlist(); + + memory_leaked = obd_memory_sum(); + pages_leaked = obd_pages_sum(); + + memory_max = obd_memory_max(); + pages_max = obd_pages_max(); + + lprocfs_free_stats(&obd_memory); + if (memory_leaked > 0) { + CWARN("Memory leaks detected (max "LPU64", leaked "LPD64")\n", + memory_max, memory_leaked); + } + if (pages_leaked > 0) { + CWARN("Page leaks detected (max "LPU64", leaked "LPU64")\n", + pages_max, pages_leaked); + } + EXIT; } diff --git a/lustre/obdclass/linux/linux-sysctl.c b/lustre/obdclass/linux/linux-sysctl.c index ba3d186..05b4baf 100644 --- a/lustre/obdclass/linux/linux-sysctl.c +++ b/lustre/obdclass/linux/linux-sysctl.c @@ -53,6 +53,9 @@ enum { OBD_TIMEOUT, /* RPC timeout before recovery/intr */ OBD_DUMP_ON_TIMEOUT, /* dump kernel debug log upon eviction */ OBD_MEMUSED, /* bytes currently OBD_ALLOCated */ + OBD_PAGESUSED, /* pages currently OBD_PAGE_ALLOCated */ + OBD_MAXMEMUSED, /* maximum bytes OBD_ALLOCated concurrently */ + OBD_MAXPAGESUSED, /* maximum pages OBD_PAGE_ALLOCated concurrently */ OBD_SYNCFILTER, /* XXX temporary, as we play with sync osts.. */ OBD_LDLM_TIMEOUT, /* LDLM timeout for ASTs before client eviction */ OBD_DUMP_ON_EVICTION, /* dump kernel debug log upon eviction */ @@ -80,6 +83,102 @@ int LL_PROC_PROTO(proc_set_timeout) return rc; } +int LL_PROC_PROTO(proc_memory_alloc) +{ + char buf[22]; + int len; + DECLARE_LL_PROC_PPOS_DECL; + + if (!*lenp || (*ppos && !write)) { + *lenp = 0; + return 0; + } + if (write) + return -EINVAL; + + len = snprintf(buf, sizeof(buf), LPU64"\n", obd_memory_sum()); + if (len > *lenp) + len = *lenp; + buf[len] = '\0'; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + *lenp = len; + *ppos += *lenp; + return 0; +} + +int LL_PROC_PROTO(proc_pages_alloc) +{ + char buf[22]; + int len; + DECLARE_LL_PROC_PPOS_DECL; + + if (!*lenp || (*ppos && !write)) { + *lenp = 0; + return 0; + } + if (write) + return -EINVAL; + + len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_sum()); + if (len > *lenp) + len = *lenp; + buf[len] = '\0'; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + *lenp = len; + *ppos += *lenp; + return 0; +} + +int LL_PROC_PROTO(proc_mem_max) +{ + char buf[22]; + int len; + DECLARE_LL_PROC_PPOS_DECL; + + if (!*lenp || (*ppos && !write)) { + *lenp = 0; + return 0; + } + if (write) + return -EINVAL; + + len = snprintf(buf, sizeof(buf), LPU64"\n", obd_memory_max()); + if (len > *lenp) + len = *lenp; + buf[len] = '\0'; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + *lenp = len; + *ppos += *lenp; + return 0; +} + +int LL_PROC_PROTO(proc_pages_max) +{ + char buf[22]; + int len; + DECLARE_LL_PROC_PPOS_DECL; + + if (!*lenp || (*ppos && !write)) { + *lenp = 0; + return 0; + } + if (write) + return -EINVAL; + + len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_max()); + if (len > *lenp) + len = *lenp; + buf[len] = '\0'; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + *lenp = len; + *ppos += *lenp; + return 0; +} + static cfs_sysctl_table_t obd_table[] = { { .ctl_name = OBD_FAIL_LOC, @@ -132,10 +231,34 @@ static cfs_sysctl_table_t obd_table[] = { { .ctl_name = OBD_MEMUSED, .procname = "memused", - .data = (int *)&obd_memory.counter, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec + .data = NULL, + .maxlen = 0, + .mode = 0444, + .proc_handler = &proc_memory_alloc + }, + { + .ctl_name = OBD_PAGESUSED, + .procname = "pagesused", + .data = NULL, + .maxlen = 0, + .mode = 0444, + .proc_handler = &proc_pages_alloc + }, + { + .ctl_name = OBD_MAXMEMUSED, + .procname = "memused_max", + .data = NULL, + .maxlen = 0, + .mode = 0444, + .proc_handler = &proc_mem_max + }, + { + .ctl_name = OBD_MAXPAGESUSED, + .procname = "pagesused_max", + .data = NULL, + .maxlen = 0, + .mode = 0444, + .proc_handler = &proc_pages_max }, { .ctl_name = OBD_LDLM_TIMEOUT, diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 89572b5..04e2924 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -1305,6 +1305,7 @@ static struct vfsmount *server_kernel_mount(struct super_block *sb) struct vfsmount *mnt; char *options = NULL; unsigned long page, s_flags; + struct page *__page; int rc; ENTRY; @@ -1343,9 +1344,10 @@ static struct vfsmount *server_kernel_mount(struct super_block *sb) /* Done with our pre-mount, now do the real mount. */ /* Glom up mount options */ - page = __get_free_page(GFP_KERNEL); - if (!page) + OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD); + if (!__page) GOTO(out_free, rc = -ENOMEM); + page = (unsigned long)cfs_page_address(__page); options = (char *)page; memset(options, 0, CFS_PAGE_SIZE); @@ -1367,7 +1369,7 @@ static struct vfsmount *server_kernel_mount(struct super_block *sb) MT_STR(ldd), lmd->lmd_dev, options); mnt = ll_kern_mount(MT_STR(ldd), s_flags, lmd->lmd_dev, (void *)options); - free_page(page); + OBD_PAGE_FREE(__page); if (IS_ERR(mnt)) { rc = PTR_ERR(mnt); CERROR("ll_kern_mount failed: rc = %d\n", rc); diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c index e47ee66..6ac884e 100644 --- a/lustre/obdecho/echo.c +++ b/lustre/obdecho/echo.c @@ -311,7 +311,7 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa, /* Take extra ref so __free_pages() can be called OK */ cfs_get_page (r->page); } else { - r->page = cfs_alloc_page(gfp_mask); + OBD_PAGE_ALLOC(r->page, gfp_mask); if (r->page == NULL) { CERROR("can't get page %u/%u for id " LPU64"\n", @@ -362,7 +362,7 @@ preprw_cleanup: cfs_kunmap(r->page); /* NB if this is a persistent page, __free_pages will just * lose the extra ref gained above */ - cfs_free_page(r->page); + OBD_PAGE_FREE(r->page); atomic_dec(&obd->u.echo.eo_prep); } memset(res, 0, sizeof(*res) * niocount); @@ -433,7 +433,7 @@ int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa, cfs_kunmap(page); /* NB see comment above regarding persistent pages */ - cfs_free_page(page); + OBD_PAGE_FREE(page); atomic_dec(&obd->u.echo.eo_prep); } } @@ -448,7 +448,7 @@ commitrw_cleanup: cfs_page_t *page = r->page; /* NB see comment above regarding persistent pages */ - cfs_free_page(page); + OBD_PAGE_FREE(page); atomic_dec(&obd->u.echo.eo_prep); } return rc; @@ -544,7 +544,7 @@ echo_persistent_pages_fini (void) for (i = 0; i < ECHO_PERSISTENT_PAGES; i++) if (echo_persistent_pages[i] != NULL) { - cfs_free_page (echo_persistent_pages[i]); + OBD_PAGE_FREE(echo_persistent_pages[i]); echo_persistent_pages[i] = NULL; } } @@ -559,7 +559,7 @@ echo_persistent_pages_init (void) int gfp_mask = (i < ECHO_PERSISTENT_PAGES/2) ? CFS_ALLOC_STD : CFS_ALLOC_HIGHUSER; - pg = cfs_alloc_page (gfp_mask); + OBD_PAGE_ALLOC(pg, gfp_mask); if (pg == NULL) { echo_persistent_pages_fini (); return (-ENOMEM); diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index c5d32ab..00cd063 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -543,7 +543,7 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa, LASSERT (pgp->pg == NULL); /* for cleanup */ rc = -ENOMEM; - pgp->pg = cfs_alloc_page (gfp_mask); + OBD_PAGE_ALLOC(pgp->pg, gfp_mask); if (pgp->pg == NULL) goto out; @@ -575,7 +575,7 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa, if (vrc != 0 && rc == 0) rc = vrc; } - cfs_free_page(pgp->pg); + OBD_PAGE_FREE(pgp->pg); } OBD_FREE(pga, npages * sizeof(*pga)); return (rc); @@ -734,13 +734,14 @@ static int echo_client_async_page(struct obd_export *exp, int rw, /* prepare the group of pages that we're going to be keeping * in flight */ for (i = 0; i < npages; i++) { - cfs_page_t *page = cfs_alloc_page(CFS_ALLOC_STD); + cfs_page_t *page; + OBD_PAGE_ALLOC(page, CFS_ALLOC_STD); if (page == NULL) GOTO(out, rc = -ENOMEM); OBD_ALLOC(eap, sizeof(*eap)); if (eap == NULL) { - cfs_free_page(page); + OBD_PAGE_FREE(page); GOTO(out, rc = -ENOMEM); } @@ -835,7 +836,7 @@ out: obd_teardown_async_page(exp, lsm, NULL, eap->eap_cookie); OBD_FREE(eap, sizeof(*eap)); - cfs_free_page(page); + OBD_PAGE_FREE(page); } OBD_FREE(aps, npages * sizeof aps[0]); } diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 3bf974d..bcdd5f5 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -2015,7 +2015,8 @@ err_mntput: static int filter_setup(struct obd_device *obd, struct lustre_cfg* lcfg) { struct lprocfs_static_vars lvars; - unsigned long page; + unsigned long addr; + struct page *page; int rc; CLASSERT(offsetof(struct obd_device, u.obt) == @@ -2025,9 +2026,11 @@ static int filter_setup(struct obd_device *obd, struct lustre_cfg* lcfg) RETURN(-EINVAL); /* 2.6.9 selinux wants a full option page for do_kern_mount (bug6471) */ - page = get_zeroed_page(GFP_KERNEL); + OBD_PAGE_ALLOC(page, CFS_ALLOC_STD); if (!page) RETURN(-ENOMEM); + addr = (unsigned long)cfs_page_address(page); + clear_page((void *)addr); /* lprocfs must be setup before the filter so state can be safely added * to /proc incrementally as the filter is setup */ @@ -2047,10 +2050,10 @@ static int filter_setup(struct obd_device *obd, struct lustre_cfg* lcfg) obd->obd_proc_entry); } - memcpy((void *)page, lustre_cfg_buf(lcfg, 4), + memcpy((void *)addr, lustre_cfg_buf(lcfg, 4), LUSTRE_CFG_BUFLEN(lcfg, 4)); - rc = filter_common_setup(obd, lcfg, (void *)page); - free_page(page); + rc = filter_common_setup(obd, lcfg, (void *)addr); + OBD_PAGE_FREE(page); if (rc) { lprocfs_obd_cleanup(obd); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 833efef..27e25bb 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -1698,7 +1698,7 @@ static void ost_thread_done(struct ptlrpc_thread *thread) if (tls != NULL) { for (i = 0; i < OST_THREAD_POOL_SIZE; ++ i) { if (tls->page[i] != NULL) - __cfs_free_page(tls->page[i]); + OBD_PAGE_FREE(tls->page[i]); } OBD_FREE_PTR(tls); thread->t_data = NULL; @@ -1729,7 +1729,7 @@ static int ost_thread_init(struct ptlrpc_thread *thread) * populate pool */ for (i = 0; i < OST_THREAD_POOL_SIZE; ++ i) { - tls->page[i] = cfs_alloc_page(OST_THREAD_POOL_GFP); + OBD_PAGE_ALLOC(tls->page[i], OST_THREAD_POOL_GFP); if (tls->page[i] == NULL) { ost_thread_done(thread); result = -ENOMEM; diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 4d5a9c9..aa65e6b 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -222,6 +222,8 @@ static int ptlrpc_pinger_main(void *arg) ptlrpc_update_next_ping(imp); } mutex_up(&pinger_sem); + /* update memory usage info */ + obd_update_maxusage(); /* Wait until the next ping time, or until we're stopped. */ time_to_next_ping = cfs_time_sub(cfs_time_add(this_ping, diff --git a/lustre/tests/leak_finder.pl b/lustre/tests/leak_finder.pl index 5219996..aa783bb 100644 --- a/lustre/tests/leak_finder.pl +++ b/lustre/tests/leak_finder.pl @@ -14,7 +14,7 @@ my $max = 0; while ($line = <>) { $debug_line++; my ($file, $func, $lno, $name, $size, $addr, $type); - if ($line =~ m/^.*(\.).*\((.*):(\d+):(.*)\(\)\) (k|v|slab-)(.*) '(.*)': (\d+) at (.*) \(tot (.*)\).*$/){ + if ($line =~ m/^.*(\.).*\((.*):(\d+):(.*)\(\)\) (k|v|slab-)(.*) '(.*)': (\d+) at (.*)\..*$/){ $file = $2; $lno = $3; $func = $4; @@ -22,7 +22,6 @@ while ($line = <>) { $name = $7; $size = $8; $addr = $9; - $tot = $10; # we can't dump the log after portals has exited, so skip "leaks" # from memory freed in the portals module unloading. @@ -69,10 +68,6 @@ while ($line = <>) { delete $memory->{$addr}; $total -= $size; } - if ($total != int($tot)) { - print "kernel total $tot != my total $total\n"; - $total = $tot; - } } # Sort leak output by allocation time