Whamcloud - gitweb
LU-1282 lprocfs: reduce lprocfs stats memory use
authorBobi Jam <bobijam.xu@intel.com>
Fri, 18 Jan 2013 16:54:32 +0000 (00:54 +0800)
committerOleg Drokin <green@whamcloud.com>
Thu, 31 Jan 2013 06:37:43 +0000 (01:37 -0500)
* Move percpu counter common data out, do not need to store them
  redundantly in percpu counter.
* LPROCFS_STATS_FLAG_IRQ_SAFE flag implies three things:
  1. lprocfs_counter needs lc_sum_irq.
  2. when a stats is a percpu counter stat, all its percpu counter
     gets allocated along with the stats itself.
  3. when a stats is a non-percpu stat, lprocfs_stats_lock() needs
     disable irq.
* change lprocfs_counter to make non-irq-safe stats counter do not
  use lc_sum_irq counter, which can save memory.
* Right now, only obd_memory stats use LPROCFS_STATS_FLAG_IRQ_SAFE
  flag.

Signed-off-by: Bobi Jam <bobijam.xu@intel.com>
Change-Id: I2eb037f9dcda983844857fc068c428c4fa387e7a
Reviewed-on: http://review.whamcloud.com/3246
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
lustre/include/lprocfs_status.h
lustre/lvfs/lvfs_lib.c
lustre/lvfs/lvfs_linux.c
lustre/obdclass/lprocfs_jobstats.c
lustre/obdclass/lprocfs_status.c

index f5312b3..2904728 100644 (file)
@@ -146,17 +146,27 @@ enum {
 
 #define LC_MIN_INIT ((~(__u64)0) >> 1)
 
+struct lprocfs_counter_header {
+       unsigned int            lc_config;
+       const char              *lc_name;   /* must be static */
+       const char              *lc_units;  /* must be static */
+};
+
 struct lprocfs_counter {
-        unsigned int           lc_config;
-        __s64                  lc_count;
-        __s64                  lc_sum;
-        __s64                  lc_sum_irq;
-        __s64                  lc_min;
-        __s64                  lc_max;
-        __s64                  lc_sumsquare;
-        const char            *lc_name;   /* must be static */
-        const char            *lc_units;  /* must be static */
+       __s64   lc_count;
+       __s64   lc_min;
+       __s64   lc_max;
+       __s64   lc_sumsquare;
+       /*
+        * Every counter has lc_array_sum[0], while lc_array_sum[1] is only
+        * for irq context counter, i.e. stats with
+        * LPROCFS_STATS_FLAG_IRQ_SAFE flag, its counter need
+        * lc_array_sum[1]
+        */
+       __s64   lc_array_sum[1];
 };
+#define lc_sum         lc_array_sum[0]
+#define lc_sum_irq     lc_array_sum[1]
 
 struct lprocfs_percpu {
 #ifndef __GNUC__
@@ -186,16 +196,18 @@ enum lprocfs_fields_flags {
 };
 
 struct lprocfs_stats {
-       unsigned short          ls_num; /* # of counters */
-       unsigned short          ls_biggest_alloc_num;
-                                       /* 1 + the highest slot index which has
-                                        * been allocated, the 0th entry is
-                                        * a statically intialized template */
-       int                     ls_flags; /* See LPROCFS_STATS_FLAG_* */
+       /* # of counters */
+       unsigned short                  ls_num;
+       /* 1 + the biggest cpu # whose ls_percpu slot has been allocated */
+       unsigned short                  ls_biggest_alloc_num;
+       enum lprocfs_stats_flags        ls_flags;
        /* Lock used when there are no percpu stats areas; For percpu stats,
         * it is used to protect ls_biggest_alloc_num change */
-       spinlock_t              ls_lock;
-       struct lprocfs_percpu   *ls_percpu[0];
+       spinlock_t                      ls_lock;
+
+       /* has ls_num of counter headers */
+       struct lprocfs_counter_header   *ls_cnt_header;
+       struct lprocfs_percpu           *ls_percpu[0];
 };
 
 #define OPC_RANGE(seg) (seg ## _LAST_OPC - seg ## _FIRST_OPC)
@@ -378,46 +390,41 @@ static inline int lprocfs_stats_lock(struct lprocfs_stats *stats, int opc,
                                     unsigned long *flags)
 {
        int             rc = 0;
-       unsigned int    cpuid;
 
        switch (opc) {
        default:
                LBUG();
 
        case LPROCFS_GET_SMP_ID:
-               /* percpu counter stats */
-               if ((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0) {
-                       cpuid = cfs_get_cpu();
+               if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+                       if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
+                               spin_lock_irqsave(&stats->ls_lock, *flags);
+                       else
+                               spin_lock(&stats->ls_lock);
+                       return 0;
+               } else {
+                       unsigned int cpuid = cfs_get_cpu();
 
-                       if (unlikely(stats->ls_percpu[cpuid + 1] == NULL)) {
-                               rc = lprocfs_stats_alloc_one(stats, cpuid + 1);
+                       if (unlikely(stats->ls_percpu[cpuid] == NULL)) {
+                               rc = lprocfs_stats_alloc_one(stats, cpuid);
                                if (rc < 0) {
                                        cfs_put_cpu();
                                        return rc;
                                }
                        }
-
-                       return cpuid + 1;
+                       return cpuid;
                }
 
-               /* non-percpu counter stats */
-               if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
-                       spin_lock_irqsave(&stats->ls_lock, *flags);
-               else
-                       spin_lock(&stats->ls_lock);
-               return 0;
-
        case LPROCFS_GET_NUM_CPU:
-               /* percpu counter stats */
-               if ((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0)
+               if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+                       if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
+                               spin_lock_irqsave(&stats->ls_lock, *flags);
+                       else
+                               spin_lock(&stats->ls_lock);
+                       return 1;
+               } else {
                        return stats->ls_biggest_alloc_num;
-
-               /* non-percpu counter stats */
-               if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
-                       spin_lock_irqsave(&stats->ls_lock, *flags);
-               else
-                       spin_lock(&stats->ls_lock);
-               return 1;
+               }
        }
 }
 
@@ -454,6 +461,37 @@ static inline void lprocfs_stats_unlock(struct lprocfs_stats *stats, int opc,
        }
 }
 
+static inline unsigned int
+lprocfs_stats_counter_size(struct lprocfs_stats *stats)
+{
+       unsigned int percpusize;
+
+       percpusize = offsetof(struct lprocfs_percpu, lp_cntr[stats->ls_num]);
+
+       /* irq safe stats need lc_array_sum[1] */
+       if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
+               percpusize += stats->ls_num * sizeof(__s64);
+
+       if ((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0)
+               percpusize = CFS_L1_CACHE_ALIGN(percpusize);
+
+       return percpusize;
+}
+
+static inline struct lprocfs_counter *
+lprocfs_stats_counter_get(struct lprocfs_stats *stats, unsigned int cpuid,
+                         int index)
+{
+       struct lprocfs_counter *cntr;
+
+       cntr = &stats->ls_percpu[cpuid]->lp_cntr[index];
+
+       if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
+               cntr = (void *)cntr + index * sizeof(__s64);
+
+       return cntr;
+}
+
 /* Two optimized LPROCFS counter increment functions are provided:
  *     lprocfs_counter_incr(cntr, value) - optimized for by-one counters
  *     lprocfs_counter_add(cntr) - use for multi-valued counters
@@ -472,7 +510,9 @@ extern void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx,
         lprocfs_counter_sub(stats, idx, 1)
 
 extern __s64 lprocfs_read_helper(struct lprocfs_counter *lc,
-                                 enum lprocfs_fields_flags field);
+                                struct lprocfs_counter_header *header,
+                                enum lprocfs_stats_flags flags,
+                                enum lprocfs_fields_flags field);
 static inline __u64 lprocfs_stats_collector(struct lprocfs_stats *stats,
                                             int idx,
                                             enum lprocfs_fields_flags field)
@@ -488,8 +528,10 @@ static inline __u64 lprocfs_stats_collector(struct lprocfs_stats *stats,
        for (i = 0; i < num_cpu; i++) {
                if (stats->ls_percpu[i] == NULL)
                        continue;
-               ret += lprocfs_read_helper(&(stats->ls_percpu[i]->lp_cntr[idx]),
-                                          field);
+               ret += lprocfs_read_helper(
+                               lprocfs_stats_counter_get(stats, i, idx),
+                               &stats->ls_cnt_header[idx], stats->ls_flags,
+                               field);
        }
        lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
        return ret;
index 2c4c696..04f87ba 100644 (file)
@@ -71,15 +71,18 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 EXPORT_SYMBOL(obd_alloc_fail);
 
 #ifdef LPROCFS
-void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
-                                       long amount)
+void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, long amount)
 {
-       struct lprocfs_counter *percpu_cntr;
-       int                     smp_id;
-       unsigned long           flags = 0;
+       struct lprocfs_counter          *percpu_cntr;
+       struct lprocfs_counter_header   *header;
+       int                             smp_id;
+       unsigned long                   flags = 0;
+
+       if (stats == NULL)
+               return;
 
-        if (stats == NULL)
-                return;
+       LASSERT(ergo((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) == 0,
+                    !cfs_in_interrupt()));
 
        /* With per-client stats, statistics are allocated only for
         * single CPU area, so the smp_id should be 0 always. */
@@ -87,9 +90,11 @@ void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
        if (smp_id < 0)
                return;
 
-        percpu_cntr = &(stats->ls_percpu[smp_id]->lp_cntr[idx]);
-        percpu_cntr->lc_count++;
-        if (percpu_cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) {
+       header = &stats->ls_cnt_header[idx];
+       percpu_cntr = lprocfs_stats_counter_get(stats, smp_id, idx);
+       percpu_cntr->lc_count++;
+
+       if (header->lc_config & LPROCFS_CNTR_AVGMINMAX) {
                /*
                 * lprocfs_counter_add() can be called in interrupt context,
                 * as memory allocation could trigger memory shrinker call
@@ -100,25 +105,30 @@ void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
                        percpu_cntr->lc_sum_irq += amount;
                else
                        percpu_cntr->lc_sum += amount;
-                if (percpu_cntr->lc_config & LPROCFS_CNTR_STDDEV)
-                        percpu_cntr->lc_sumsquare += (__s64)amount * amount;
-                if (amount < percpu_cntr->lc_min)
-                        percpu_cntr->lc_min = amount;
-                if (amount > percpu_cntr->lc_max)
-                        percpu_cntr->lc_max = amount;
-        }
-        lprocfs_stats_unlock(stats, LPROCFS_GET_SMP_ID, &flags);
+
+               if (header->lc_config & LPROCFS_CNTR_STDDEV)
+                       percpu_cntr->lc_sumsquare += (__s64)amount * amount;
+               if (amount < percpu_cntr->lc_min)
+                       percpu_cntr->lc_min = amount;
+               if (amount > percpu_cntr->lc_max)
+                       percpu_cntr->lc_max = amount;
+       }
+       lprocfs_stats_unlock(stats, LPROCFS_GET_SMP_ID, &flags);
 }
 EXPORT_SYMBOL(lprocfs_counter_add);
 
 void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx, long amount)
 {
-       struct lprocfs_counter *percpu_cntr;
-       int                     smp_id;
-       unsigned long           flags = 0;
+       struct lprocfs_counter          *percpu_cntr;
+       struct lprocfs_counter_header   *header;
+       int                             smp_id;
+       unsigned long                   flags = 0;
+
+       if (stats == NULL)
+               return;
 
-        if (stats == NULL)
-                return;
+       LASSERT(ergo((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) == 0,
+                    !cfs_in_interrupt()));
 
        /* With per-client stats, statistics are allocated only for
         * single CPU area, so the smp_id should be 0 always. */
@@ -126,8 +136,9 @@ void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx, long amount)
        if (smp_id < 0)
                return;
 
-        percpu_cntr = &(stats->ls_percpu[smp_id]->lp_cntr[idx]);
-        if (percpu_cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) {
+       header = &stats->ls_cnt_header[idx];
+       percpu_cntr = lprocfs_stats_counter_get(stats, smp_id, idx);
+       if (header->lc_config & LPROCFS_CNTR_AVGMINMAX) {
                /*
                 * Sometimes we use RCU callbacks to free memory which calls
                 * lprocfs_counter_sub(), and RCU callbacks may execute in
@@ -135,50 +146,48 @@ void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx, long amount)
                 * softirq context here, use separate counter for that.
                 * bz20650.
                 */
-                if (cfs_in_interrupt())
-                        percpu_cntr->lc_sum_irq -= amount;
-                else
-                        percpu_cntr->lc_sum -= amount;
-        }
-        lprocfs_stats_unlock(stats, LPROCFS_GET_SMP_ID, &flags);
+               if (cfs_in_interrupt())
+                       percpu_cntr->lc_sum_irq -= amount;
+               else
+                       percpu_cntr->lc_sum -= amount;
+       }
+       lprocfs_stats_unlock(stats, LPROCFS_GET_SMP_ID, &flags);
 }
 EXPORT_SYMBOL(lprocfs_counter_sub);
 
-int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int idx)
+int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid)
 {
-       unsigned int    percpusize;
-       int             rc      = -ENOMEM;
-       unsigned long   flags   = 0;
-
-       /* the 1st percpu entry was statically allocated in
-        * lprocfs_alloc_stats() */
-       LASSERT(idx != 0 && stats->ls_percpu[0] != NULL);
-       LASSERT(stats->ls_percpu[idx] == NULL);
+       struct lprocfs_counter  *cntr;
+       unsigned int            percpusize;
+       int                     rc = -ENOMEM;
+       unsigned long           flags = 0;
+       int                     i;
+
+       LASSERT(stats->ls_percpu[cpuid] == NULL);
        LASSERT((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0);
 
-       percpusize = CFS_L1_CACHE_ALIGN(offsetof(struct lprocfs_percpu,
-                                                lp_cntr[stats->ls_num]));
-       OBD_ALLOC_GFP(stats->ls_percpu[idx], percpusize, CFS_ALLOC_ATOMIC);
-       if (stats->ls_percpu[idx] != NULL) {
+       percpusize = lprocfs_stats_counter_size(stats);
+       LIBCFS_ALLOC_ATOMIC(stats->ls_percpu[cpuid], percpusize);
+       if (stats->ls_percpu[cpuid] != NULL) {
                rc = 0;
-               if (unlikely(stats->ls_biggest_alloc_num <= idx)) {
+               if (unlikely(stats->ls_biggest_alloc_num <= cpuid)) {
                        if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
                                spin_lock_irqsave(&stats->ls_lock, flags);
                        else
                                spin_lock(&stats->ls_lock);
-                       if (stats->ls_biggest_alloc_num <= idx)
-                               stats->ls_biggest_alloc_num = idx + 1;
+                       if (stats->ls_biggest_alloc_num <= cpuid)
+                               stats->ls_biggest_alloc_num = cpuid + 1;
                        if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) {
                                spin_unlock_irqrestore(&stats->ls_lock, flags);
                        } else {
                                spin_unlock(&stats->ls_lock);
                        }
                }
-
-               /* initialize the ls_percpu[idx] by copying the 0th template
-                * entry */
-               memcpy(stats->ls_percpu[idx], stats->ls_percpu[0],
-                      percpusize);
+               /* initialize the ls_percpu[cpuid] non-zero counter */
+               for (i = 0; i < stats->ls_num; ++i) {
+                       cntr = lprocfs_stats_counter_get(stats, cpuid, i);
+                       cntr->lc_min = LC_MIN_INIT;
+               }
        }
 
        return rc;
index 14042b1..eb73a06 100644 (file)
@@ -368,19 +368,23 @@ EXPORT_SYMBOL(obd_pages_max);
 
 #ifdef LPROCFS
 __s64 lprocfs_read_helper(struct lprocfs_counter *lc,
-                          enum lprocfs_fields_flags field)
+                         struct lprocfs_counter_header *header,
+                         enum lprocfs_stats_flags flags,
+                         enum lprocfs_fields_flags field)
 {
        __s64 ret = 0;
 
-       if (lc == NULL)
+       if (lc == NULL || header == NULL)
                RETURN(0);
 
        switch (field) {
                case LPROCFS_FIELDS_FLAGS_CONFIG:
-                       ret = lc->lc_config;
+                       ret = header->lc_config;
                        break;
                case LPROCFS_FIELDS_FLAGS_SUM:
-                       ret = lc->lc_sum + lc->lc_sum_irq;
+                       ret = lc->lc_sum;
+                       if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
+                               ret += lc->lc_sum_irq;
                        break;
                case LPROCFS_FIELDS_FLAGS_MIN:
                        ret = lc->lc_min;
index aa8ba78..16fbcef 100644 (file)
@@ -366,10 +366,12 @@ static int inline width(const char *str, int len)
 
 static int lprocfs_jobstats_seq_show(struct seq_file *p, void *v)
 {
-       struct job_stat *job = v;
-       struct lprocfs_stats *s;
-       struct lprocfs_counter ret, *cntr;
-       int i;
+       struct job_stat                 *job = v;
+       struct lprocfs_stats            *s;
+       struct lprocfs_counter          ret;
+       struct lprocfs_counter          *cntr;
+       struct lprocfs_counter_header   *cntr_header;
+       int                             i;
 
        if (v == SEQ_START_TOKEN) {
                seq_printf(p, "job_stats:\n");
@@ -381,23 +383,25 @@ static int lprocfs_jobstats_seq_show(struct seq_file *p, void *v)
 
        s = job->js_stats;
        for (i = 0; i < s->ls_num; i++) {
-               cntr = &(s->ls_percpu[0]->lp_cntr[i]);
+               cntr = lprocfs_stats_counter_get(s, 0, i);
+               cntr_header = &s->ls_cnt_header[i];
                lprocfs_stats_collect(s, i, &ret);
 
                seq_printf(p, "  %s:%.*s { samples: %11"LPF64"u",
-                          cntr->lc_name, width(cntr->lc_name, 15), spaces,
+                          cntr_header->lc_name,
+                          width(cntr_header->lc_name, 15), spaces,
                           ret.lc_count);
-               if (cntr->lc_units[0] != '\0')
-                       seq_printf(p, ", unit: %5s", cntr->lc_units);
+               if (cntr_header->lc_units[0] != '\0')
+                       seq_printf(p, ", unit: %5s", cntr_header->lc_units);
 
-               if (cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) {
+               if (cntr_header->lc_config & LPROCFS_CNTR_AVGMINMAX) {
                        seq_printf(p, ", min:%8"LPF64"u, max:%8"LPF64"u,"
                                   " sum:%16"LPF64"u",
                                   ret.lc_count ? ret.lc_min : 0,
                                   ret.lc_count ? ret.lc_max : 0,
                                   ret.lc_count ? ret.lc_sum : 0);
                }
-               if (cntr->lc_config & LPROCFS_CNTR_STDDEV) {
+               if (cntr_header->lc_config & LPROCFS_CNTR_STDDEV) {
                        seq_printf(p, ", sumsq: %18"LPF64"u",
                                   ret.lc_count ? ret.lc_sumsquare : 0);
                }
index 039d810..026076b 100644 (file)
@@ -863,12 +863,13 @@ EXPORT_SYMBOL(lprocfs_rd_conn_uuid);
 
 /** add up per-cpu counters */
 void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx,
-                           struct lprocfs_counter *cnt)
+                          struct lprocfs_counter *cnt)
 {
-       struct lprocfs_counter *ptr;
-       unsigned int            num_entry;
-       int                     i;
-       unsigned long           flags = 0;
+       unsigned int                    num_entry;
+       struct lprocfs_counter          *percpu_cntr;
+       struct lprocfs_counter_header   *cntr_header;
+       int                             i;
+       unsigned long                   flags = 0;
 
        memset(cnt, 0, sizeof(*cnt));
 
@@ -885,18 +886,18 @@ void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx,
        for (i = 0; i < num_entry; i++) {
                if (stats->ls_percpu[i] == NULL)
                        continue;
-
-               ptr = &(stats->ls_percpu[i])->lp_cntr[idx];
-               cnt->lc_count += ptr->lc_count;
-               cnt->lc_sum += ptr->lc_sum;
-               if (ptr->lc_min < cnt->lc_min)
-                       cnt->lc_min = ptr->lc_min;
-               if (ptr->lc_max > cnt->lc_max)
-                       cnt->lc_max = ptr->lc_max;
-               cnt->lc_sumsquare += ptr->lc_sumsquare;
+               cntr_header = &stats->ls_cnt_header[idx];
+               percpu_cntr = lprocfs_stats_counter_get(stats, i, idx);
+
+               cnt->lc_count += percpu_cntr->lc_count;
+               cnt->lc_sum += percpu_cntr->lc_sum;
+               if (percpu_cntr->lc_min < cnt->lc_min)
+                       cnt->lc_min = percpu_cntr->lc_min;
+               if (percpu_cntr->lc_max > cnt->lc_max)
+                       cnt->lc_max = percpu_cntr->lc_max;
+               cnt->lc_sumsquare += percpu_cntr->lc_sumsquare;
        }
 
-       cnt->lc_units = stats->ls_percpu[0]->lp_cntr[idx].lc_units;
        lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
 }
 EXPORT_SYMBOL(lprocfs_stats_collect);
@@ -998,11 +999,15 @@ EXPORT_SYMBOL(obd_connect_flags2str);
 int lprocfs_rd_import(char *page, char **start, off_t off, int count,
                       int *eof, void *data)
 {
-        struct lprocfs_counter ret;
-        struct obd_device *obd = (struct obd_device *)data;
-        struct obd_import *imp;
-        struct obd_import_conn *conn;
-        int i, j, k, rw = 0;
+       struct lprocfs_counter          ret;
+       struct lprocfs_counter_header   *header;
+       struct obd_device               *obd    = (struct obd_device *)data;
+       struct obd_import               *imp;
+       struct obd_import_conn          *conn;
+       int                             i;
+       int                             j;
+       int                             k;
+       int                             rw      = 0;
 
         LASSERT(obd != NULL);
         LPROCFS_CLIMP_CHECK(obd);
@@ -1052,6 +1057,7 @@ int lprocfs_rd_import(char *page, char **start, off_t off, int count,
                      cfs_atomic_read(&imp->imp_inval_count));
        spin_unlock(&imp->imp_lock);
 
+       header = &obd->obd_svc_stats->ls_cnt_header[PTLRPC_REQWAIT_CNTR];
         lprocfs_stats_collect(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR, &ret);
         if (ret.lc_count != 0) {
                 /* first argument to do_div MUST be __u64 */
@@ -1069,7 +1075,7 @@ int lprocfs_rd_import(char *page, char **start, off_t off, int count,
                       cfs_atomic_read(&imp->imp_inflight),
                       cfs_atomic_read(&imp->imp_unregistering),
                       cfs_atomic_read(&imp->imp_timeouts),
-                      ret.lc_sum, ret.lc_units);
+                     ret.lc_sum, header->lc_units);
 
         k = 0;
         for(j = 0; j < IMP_AT_MAX_PORTALS; j++) {
@@ -1112,6 +1118,7 @@ int lprocfs_rd_import(char *page, char **start, off_t off, int count,
                 }
                 k = (int)ret.lc_sum;
                 j = opcode_offset(OST_READ + rw) + EXTRA_MAX_OPCODES;
+               header = &obd->obd_svc_stats->ls_cnt_header[j];
                 lprocfs_stats_collect(obd->obd_svc_stats, j, &ret);
                 if (ret.lc_sum > 0 && ret.lc_count != 0) {
                         /* first argument to do_div MUST be __u64 */
@@ -1120,7 +1127,7 @@ int lprocfs_rd_import(char *page, char **start, off_t off, int count,
                         ret.lc_sum = sum;
                         i += snprintf(page + i, count - i,
                                       "       %s_per_rpc: "LPU64"\n",
-                                      ret.lc_units, ret.lc_sum);
+                                     header->lc_units, ret.lc_sum);
                         j = (int)ret.lc_sum;
                         if (j > 0)
                                 i += snprintf(page + i, count - i,
@@ -1357,9 +1364,10 @@ EXPORT_SYMBOL(lprocfs_free_per_client_stats);
 struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
                                           enum lprocfs_stats_flags flags)
 {
-       struct lprocfs_stats *stats;
-       unsigned int percpusize;
-       unsigned int num_entry;
+       struct lprocfs_stats    *stats;
+       unsigned int            num_entry;
+       unsigned int            percpusize = 0;
+       int                     i;
 
         if (num == 0)
                 return NULL;
@@ -1370,31 +1378,42 @@ struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
        if (flags & LPROCFS_STATS_FLAG_NOPERCPU)
                num_entry = 1;
        else
-               num_entry = cfs_num_possible_cpus() + 1;
+               num_entry = cfs_num_possible_cpus();
 
        /* alloc percpu pointers for all possible cpu slots */
-       OBD_ALLOC(stats, offsetof(struct lprocfs_stats, ls_percpu[num_entry]));
+       LIBCFS_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[num_entry]));
        if (stats == NULL)
                return NULL;
 
        stats->ls_num = num;
-       stats->ls_biggest_alloc_num = 1;
        stats->ls_flags = flags;
        spin_lock_init(&stats->ls_lock);
 
-       percpusize = offsetof(struct lprocfs_percpu, lp_cntr[num]);
-       if (num_entry > 1)
-               percpusize = CFS_L1_CACHE_ALIGN(percpusize);
-
-       /* for no percpu area, the 0th entry is for real use,
-        * for percpu area, the 0th entry is for intialized entry template */
-       OBD_ALLOC(stats->ls_percpu[0], percpusize);
-       if (stats->ls_percpu[0] == NULL) {
-               OBD_FREE(stats,
-                        offsetof(struct lprocfs_stats, ls_percpu[num_entry]));
-               stats = NULL;
+       /* alloc num of counter headers */
+       LIBCFS_ALLOC(stats->ls_cnt_header,
+                    stats->ls_num * sizeof(struct lprocfs_counter_header));
+       if (stats->ls_cnt_header == NULL)
+               goto fail;
+
+       if ((flags & LPROCFS_STATS_FLAG_NOPERCPU) != 0) {
+               /* contains only one set counters */
+               percpusize = lprocfs_stats_counter_size(stats);
+               LIBCFS_ALLOC_ATOMIC(stats->ls_percpu[0], percpusize);
+               if (stats->ls_percpu[0] == NULL)
+                       goto fail;
+               stats->ls_biggest_alloc_num = 1;
+       } else if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) {
+               /* alloc all percpu data, currently only obd_memory use this */
+               for (i = 0; i < num_entry; ++i)
+                       if (lprocfs_stats_alloc_one(stats, i) < 0)
+                               goto fail;
        }
+
        return stats;
+
+fail:
+       lprocfs_free_stats(&stats);
+       return NULL;
 }
 EXPORT_SYMBOL(lprocfs_alloc_stats);
 
@@ -1412,25 +1431,27 @@ void lprocfs_free_stats(struct lprocfs_stats **statsh)
        if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
                num_entry = 1;
        else
-               num_entry = cfs_num_possible_cpus() + 1;
+               num_entry = cfs_num_possible_cpus();
 
-       percpusize = offsetof(struct lprocfs_percpu, lp_cntr[stats->ls_num]);
-       if (num_entry > 1)
-               percpusize = CFS_L1_CACHE_ALIGN(percpusize);
+       percpusize = lprocfs_stats_counter_size(stats);
        for (i = 0; i < num_entry; i++)
                if (stats->ls_percpu[i] != NULL)
-                       OBD_FREE(stats->ls_percpu[i], percpusize);
-       OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_entry]));
+                       LIBCFS_FREE(stats->ls_percpu[i], percpusize);
+       if (stats->ls_cnt_header != NULL)
+               LIBCFS_FREE(stats->ls_cnt_header, stats->ls_num *
+                                       sizeof(struct lprocfs_counter_header));
+       LIBCFS_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_entry]));
 }
 EXPORT_SYMBOL(lprocfs_free_stats);
 
 void lprocfs_clear_stats(struct lprocfs_stats *stats)
 {
-       struct lprocfs_counter *percpu_cntr;
-       int                     i;
-       int                     j;
-       unsigned int            num_entry;
-       unsigned long           flags = 0;
+       struct lprocfs_counter          *percpu_cntr;
+       struct lprocfs_counter_header   *header;
+       int                             i;
+       int                             j;
+       unsigned int                    num_entry;
+       unsigned long                   flags = 0;
 
        num_entry = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
 
@@ -1438,12 +1459,15 @@ void lprocfs_clear_stats(struct lprocfs_stats *stats)
                if (stats->ls_percpu[i] == NULL)
                        continue;
                for (j = 0; j < stats->ls_num; j++) {
-                       percpu_cntr = &(stats->ls_percpu[i])->lp_cntr[j];
-                       percpu_cntr->lc_count = 0;
-                       percpu_cntr->lc_sum = 0;
-                       percpu_cntr->lc_min = LC_MIN_INIT;
-                       percpu_cntr->lc_max = 0;
-                       percpu_cntr->lc_sumsquare = 0;
+                       header = &stats->ls_cnt_header[j];
+                       percpu_cntr = lprocfs_stats_counter_get(stats, i, j);
+                       percpu_cntr->lc_count           = 0;
+                       percpu_cntr->lc_min             = LC_MIN_INIT;
+                       percpu_cntr->lc_max             = 0;
+                       percpu_cntr->lc_sumsquare       = 0;
+                       percpu_cntr->lc_sum             = 0;
+                       if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
+                               percpu_cntr->lc_sum_irq = 0;
                }
        }
 
@@ -1467,7 +1491,7 @@ static void *lprocfs_stats_seq_start(struct seq_file *p, loff_t *pos)
         struct lprocfs_stats *stats = p->private;
         /* return 1st cpu location */
         return (*pos >= stats->ls_num) ? NULL :
-                &(stats->ls_percpu[0]->lp_cntr[*pos]);
+               lprocfs_stats_counter_get(stats, 0, *pos);
 }
 
 static void lprocfs_stats_seq_stop(struct seq_file *p, void *v)
@@ -1479,51 +1503,60 @@ static void *lprocfs_stats_seq_next(struct seq_file *p, void *v, loff_t *pos)
         struct lprocfs_stats *stats = p->private;
         ++*pos;
         return (*pos >= stats->ls_num) ? NULL :
-                &(stats->ls_percpu[0]->lp_cntr[*pos]);
+               lprocfs_stats_counter_get(stats, 0, *pos);
 }
 
 /* seq file export of one lprocfs counter */
 static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
 {
-       struct lprocfs_stats *stats = p->private;
-       struct lprocfs_counter *cntr = v;
-       struct lprocfs_counter ret;
-       int idx, rc = 0;
-
-       if (cntr == &(stats->ls_percpu[0])->lp_cntr[0]) {
-               struct timeval now;
-               cfs_gettimeofday(&now);
-               rc = seq_printf(p, "%-25s %lu.%lu secs.usecs\n",
-                               "snapshot_time", now.tv_sec, now.tv_usec);
-               if (rc < 0)
-                       return rc;
-       }
-       idx = cntr - &(stats->ls_percpu[0])->lp_cntr[0];
-
-       lprocfs_stats_collect(stats, idx, &ret);
-
-       if (ret.lc_count == 0)
-               goto out;
-
-       rc = seq_printf(p, "%-25s "LPD64" samples [%s]", cntr->lc_name,
-                       ret.lc_count, cntr->lc_units);
-
-       if (rc < 0)
-               goto out;
-
-       if ((cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) && (ret.lc_count > 0)) {
-               rc = seq_printf(p, " "LPD64" "LPD64" "LPD64,
-                               ret.lc_min, ret.lc_max, ret.lc_sum);
-               if (rc < 0)
-                       goto out;
-               if (cntr->lc_config & LPROCFS_CNTR_STDDEV)
-                       rc = seq_printf(p, " "LPD64, ret.lc_sumsquare);
-               if (rc < 0)
-                       goto out;
-       }
-       rc = seq_printf(p, "\n");
+       struct lprocfs_stats            *stats  = p->private;
+       struct lprocfs_counter          *cntr   = v;
+       struct lprocfs_counter          ret;
+       struct lprocfs_counter_header   *header;
+       int                             entry_size;
+       int                             idx;
+       int                             rc      = 0;
+
+       if (cntr == &(stats->ls_percpu[0])->lp_cntr[0]) {
+               struct timeval now;
+               cfs_gettimeofday(&now);
+               rc = seq_printf(p, "%-25s %lu.%lu secs.usecs\n",
+                               "snapshot_time", now.tv_sec, now.tv_usec);
+               if (rc < 0)
+                       return rc;
+       }
+       entry_size = sizeof(*cntr);
+       if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
+               entry_size += sizeof(__s64);
+       idx = ((void *)cntr - (void *)&(stats->ls_percpu[0])->lp_cntr[0]) /
+               entry_size;
+
+       header = &stats->ls_cnt_header[idx];
+       lprocfs_stats_collect(stats, idx, &ret);
+
+       if (ret.lc_count == 0)
+               goto out;
+
+       rc = seq_printf(p, "%-25s "LPD64" samples [%s]", header->lc_name,
+                       ret.lc_count, header->lc_units);
+
+       if (rc < 0)
+               goto out;
+
+       if ((header->lc_config & LPROCFS_CNTR_AVGMINMAX) &&
+           (ret.lc_count > 0)) {
+               rc = seq_printf(p, " "LPD64" "LPD64" "LPD64,
+                               ret.lc_min, ret.lc_max, ret.lc_sum);
+               if (rc < 0)
+                       goto out;
+               if (header->lc_config & LPROCFS_CNTR_STDDEV)
+                       rc = seq_printf(p, " "LPD64, ret.lc_sumsquare);
+               if (rc < 0)
+                       goto out;
+       }
+       rc = seq_printf(p, "\n");
  out:
-       return (rc < 0) ? rc : 0;
+       return (rc < 0) ? rc : 0;
 }
 
 struct seq_operations lprocfs_stats_seq_sops = {
@@ -1586,22 +1619,35 @@ EXPORT_SYMBOL(lprocfs_register_stats);
 void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
                          unsigned conf, const char *name, const char *units)
 {
-       struct lprocfs_counter *c;
-       unsigned long           flags = 0;
+       struct lprocfs_counter_header   *header;
+       struct lprocfs_counter          *percpu_cntr;
+       unsigned long                   flags = 0;
+       unsigned int                    i;
+       unsigned int                    num_cpu;
 
        LASSERT(stats != NULL);
-       LASSERT(stats->ls_percpu[0] != NULL);
-
-       c = &(stats->ls_percpu[0]->lp_cntr[index]);
-
-       lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
-       c->lc_config = conf;
-       c->lc_count = 0;
-       c->lc_sum = 0;
-       c->lc_min = LC_MIN_INIT;
-       c->lc_max = 0;
-       c->lc_name = name;
-       c->lc_units = units;
+
+       header = &stats->ls_cnt_header[index];
+       LASSERTF(header != NULL, "Failed to allocate stats header:[%d]%s/%s\n",
+                index, name, units);
+
+       header->lc_config = conf;
+       header->lc_name   = name;
+       header->lc_units  = units;
+
+       num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
+       for (i = 0; i < num_cpu; ++i) {
+               if (stats->ls_percpu[i] == NULL)
+                       continue;
+               percpu_cntr = lprocfs_stats_counter_get(stats, i, index);
+               percpu_cntr->lc_count           = 0;
+               percpu_cntr->lc_min             = LC_MIN_INIT;
+               percpu_cntr->lc_max             = 0;
+               percpu_cntr->lc_sumsquare       = 0;
+               percpu_cntr->lc_sum             = 0;
+               if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
+                       percpu_cntr->lc_sum_irq = 0;
+       }
        lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
 }
 EXPORT_SYMBOL(lprocfs_counter_init);
@@ -1709,7 +1755,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
                  * <obd.h>, and that the corresponding line item
                  * LPROCFS_OBD_OP_INIT(.., .., opname)
                  * is missing from the list above. */
-                LASSERTF(stats->ls_percpu[0]->lp_cntr[i].lc_name != NULL,
+               LASSERTF(stats->ls_cnt_header[i].lc_name != NULL,
                          "Missing obd_stat initializer obd_op "
                          "operation at offset %d.\n", i - num_private_stats);
         }
@@ -1795,7 +1841,7 @@ int lprocfs_alloc_md_stats(struct obd_device *obd,
         lprocfs_init_mps_stats(num_private_stats, stats);
 
         for (i = num_private_stats; i < num_stats; i++) {
-                if (stats->ls_percpu[0]->lp_cntr[i].lc_name == NULL) {
+               if (stats->ls_cnt_header[i].lc_name == NULL) {
                         CERROR("Missing md_stat initializer md_op "
                                "operation at offset %d. Aborting.\n",
                                i - num_private_stats);