From fde40ce32c91c804cb85be085f2aaf06170047b6 Mon Sep 17 00:00:00 2001 From: Lei Feng Date: Wed, 17 Aug 2022 08:48:33 +0800 Subject: [PATCH] LU-16087 lprocfs: add histogram to stats counter Add histogram to stats counter. Enable histogram for read/write_bytes in mdt/obdfilter job stats. Sample job_stats: - job_id: md5sum.0 snapshot_time : 3143196.864165417 secs.nsecs start_time : 3143196.707206168 secs.nsecs elapsed_time : 0.156959249 secs.nsecs read_bytes: { samples: 2, ..., hist: { 32K: 1, 1M: 1 } } write_bytes: { samples: 1, ..., hist: { 1K: 1 } } Signed-off-by: Lei Feng Change-Id: I75b6909c8b63f08b74c3c411ff3dcd27881bb839 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48278 Reviewed-by: Andreas Dilger Reviewed-by: Jian Yu Reviewed-by: Shuichi Ihara Reviewed-by: James Simmons Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lustre/include/lprocfs_status.h | 11 +++++++++-- lustre/mdt/mdt_fs.c | 2 +- lustre/mdt/mdt_internal.h | 3 ++- lustre/mdt/mdt_lproc.c | 13 +++++++++---- lustre/obdclass/lprocfs_counters.c | 21 ++++++++++++++++---- lustre/obdclass/lprocfs_jobstats.c | 40 ++++++++++++++++++++++++++++++++++---- lustre/obdclass/lprocfs_status.c | 27 ++++++++++++++++++++++--- lustre/ofd/lproc_ofd.c | 37 +++++++++++++++++++---------------- lustre/ofd/ofd_internal.h | 9 ++++++--- lustre/ofd/ofd_io.c | 2 +- lustre/ofd/ofd_obd.c | 2 +- 11 files changed, 126 insertions(+), 41 deletions(-) diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 52b2440..4d2df22 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -126,12 +126,16 @@ struct obd_hist_pcpu { * squares (for multi-valued counter samples only). This allows * external computation of standard deviation, but involves a 64-bit * multiply per counter increment. + * + * LPROCFS_CNTR_HISTOGRAM indicates that the counter should track a + * exponential histogram. */ enum lprocfs_counter_config { LPROCFS_CNTR_EXTERNALLOCK = 0x0001, LPROCFS_CNTR_AVGMINMAX = 0x0002, LPROCFS_CNTR_STDDEV = 0x0004, + LPROCFS_CNTR_HISTOGRAM = 0x0008, /* counter unit type */ LPROCFS_TYPE_REQS = 0x0000, /* default if config = 0 */ @@ -149,6 +153,8 @@ enum lprocfs_counter_config { LPROCFS_TYPE_BYTES_FULL = LPROCFS_TYPE_BYTES | LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, + LPROCFS_TYPE_BYTES_FULL_HISTOGRAM = LPROCFS_TYPE_BYTES_FULL | + LPROCFS_CNTR_HISTOGRAM, }; #define LC_MIN_INIT ((~(__u64)0) >> 1) @@ -156,6 +162,7 @@ struct lprocfs_counter_header { enum lprocfs_counter_config lc_config; const char *lc_name; /* must be static */ const char *lc_units; /* must be static */ + struct obd_histogram *lc_hist; }; struct lprocfs_counter { @@ -438,8 +445,8 @@ struct obd_device; #define JOBSTATS_SESSION "session" typedef void (*cntr_init_callback)(struct lprocfs_stats *stats, - unsigned int offset); - + unsigned int offset, + enum lprocfs_counter_config cntr_umask); struct obd_job_stats { struct cfs_hash *ojs_hash; /* hash of jobids */ struct list_head ojs_list; /* list of job_stat structs */ diff --git a/lustre/mdt/mdt_fs.c b/lustre/mdt/mdt_fs.c index ac9d7c6..7aaf5d1 100644 --- a/lustre/mdt/mdt_fs.c +++ b/lustre/mdt/mdt_fs.c @@ -85,7 +85,7 @@ int mdt_export_stats_init(struct obd_device *obd, struct obd_export *exp, if (stats->nid_stats == NULL) RETURN(-ENOMEM); - mdt_stats_counter_init(stats->nid_stats, 0); + mdt_stats_counter_init(stats->nid_stats, 0, LPROCFS_CNTR_HISTOGRAM); rc = lprocfs_register_stats(stats->nid_proc, "stats", stats->nid_stats); if (rc != 0) { diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 978f676..559138a 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -1323,7 +1323,8 @@ enum mdt_stat_idx { }; void mdt_counter_incr(struct ptlrpc_request *req, int opcode, long amount); -void mdt_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset); +void mdt_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset, + enum lprocfs_counter_config cntr_umask); int mdt_tunables_init(struct mdt_device *mdt, const char *name); void mdt_tunables_fini(struct mdt_device *mdt); diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index 0965d34..029bbbd 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -1659,7 +1659,8 @@ static const char * const mdt_stats[] = { [LPROC_MDT_FALLOCATE] = "fallocate", }; -void mdt_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset) +void mdt_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset, + enum lprocfs_counter_config cntr_umask) { int array_size = ARRAY_SIZE(mdt_stats); int oidx; /* obd_md_stats index */ @@ -1672,10 +1673,13 @@ void mdt_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset) if (midx == LPROC_MDT_IO_READ_BYTES || midx == LPROC_MDT_IO_WRITE_BYTES) lprocfs_counter_init(stats, oidx, - LPROCFS_TYPE_BYTES_FULL, + LPROCFS_TYPE_BYTES_FULL_HISTOGRAM & + (~cntr_umask), mdt_stats[midx]); else - lprocfs_counter_init(stats, oidx, LPROCFS_TYPE_LATENCY, + lprocfs_counter_init(stats, oidx, + LPROCFS_TYPE_LATENCY & + (~cntr_umask), mdt_stats[midx]); } } @@ -1722,7 +1726,8 @@ int mdt_tunables_init(struct mdt_device *mdt, const char *name) return rc; /* add additional MDT md_stats after the default ones */ - mdt_stats_counter_init(obd->obd_md_stats, LPROC_MD_LAST_OPC); + mdt_stats_counter_init(obd->obd_md_stats, LPROC_MD_LAST_OPC, + LPROCFS_CNTR_HISTOGRAM); rc = lprocfs_job_stats_init(obd, ARRAY_SIZE(mdt_stats), mdt_stats_counter_init); diff --git a/lustre/obdclass/lprocfs_counters.c b/lustre/obdclass/lprocfs_counters.c index 521e59c..22e6bf3 100644 --- a/lustre/obdclass/lprocfs_counters.c +++ b/lustre/obdclass/lprocfs_counters.c @@ -42,10 +42,11 @@ #ifdef CONFIG_PROC_FS void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, long amount) { - struct lprocfs_counter *percpu_cntr; - struct lprocfs_counter_header *header; - int smp_id; - unsigned long flags = 0; + struct lprocfs_counter *percpu_cntr; + struct lprocfs_counter_header *header; + int smp_id; + unsigned long flags = 0; + struct obd_histogram *hist; if (stats == NULL) return; @@ -87,6 +88,18 @@ void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, long amount) if (amount > percpu_cntr->lc_max) percpu_cntr->lc_max = amount; } + /* no counter in interrupt has historgram for now */ + hist = stats->ls_cnt_header[idx].lc_hist; + if (hist != NULL) { + unsigned int val = 0; + + if (likely(amount != 0)) + val = min(fls(amount - 1), OBD_HIST_MAX - 1); + spin_lock(&hist->oh_lock); + hist->oh_buckets[val]++; + spin_unlock(&hist->oh_lock); + } + lprocfs_stats_unlock(stats, LPROCFS_GET_SMP_ID, &flags); } EXPORT_SYMBOL(lprocfs_counter_add); diff --git a/lustre/obdclass/lprocfs_jobstats.c b/lustre/obdclass/lprocfs_jobstats.c index df1e152..d123ee9 100644 --- a/lustre/obdclass/lprocfs_jobstats.c +++ b/lustre/obdclass/lprocfs_jobstats.c @@ -260,7 +260,7 @@ static struct job_stat *job_alloc(char *jobid, struct obd_job_stats *jobs) return NULL; } - jobs->ojs_cntr_init_fn(job->js_stats, 0); + jobs->ojs_cntr_init_fn(job->js_stats, 0, 0); memcpy(job->js_jobid, jobid, sizeof(job->js_jobid)); job->js_time_init = ktime_get(); @@ -444,7 +444,7 @@ static int lprocfs_jobstats_seq_show(struct seq_file *p, void *v) int i, joblen = 0; if (v == SEQ_START_TOKEN) { - seq_printf(p, "job_stats:\n"); + seq_puts(p, "job_stats:\n"); return 0; } @@ -473,6 +473,8 @@ static int lprocfs_jobstats_seq_show(struct seq_file *p, void *v) s = job->js_stats; for (i = 0; i < s->ls_num; i++) { + struct obd_histogram *hist; + cntr_header = &s->ls_cnt_header[i]; lprocfs_stats_collect(s, i, &ret); @@ -494,8 +496,38 @@ static int lprocfs_jobstats_seq_show(struct seq_file *p, void *v) ret.lc_count ? ret.lc_sumsquare : 0); } - seq_printf(p, " }\n"); - + /* show obd_histogram */ + hist = s->ls_cnt_header[i].lc_hist; + if (hist != NULL) { + bool first = true; + int j; + + seq_puts(p, ", hist: { "); + for (j = 0; j < ARRAY_SIZE(hist->oh_buckets); j++) { + unsigned long val = hist->oh_buckets[j]; + + if (val == 0) + continue; + if (first) + first = false; + else + seq_puts(p, ", "); + + if (j < 10) + seq_printf(p, "%lu: %lu", BIT(j), val); + else if (j < 20) + seq_printf(p, "%luK: %lu", BIT(j - 10), + val); + else if (j < 30) + seq_printf(p, "%luM: %lu", BIT(j - 20), + val); + else + seq_printf(p, "%luG: %lu", BIT(j - 30), + val); + } + seq_puts(p, " }"); + } + seq_puts(p, " }\n"); } return 0; diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 3a9b351..b3f2830 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -1269,8 +1269,14 @@ void lprocfs_free_stats(struct lprocfs_stats **statsh) for (i = 0; i < num_entry; i++) if (stats->ls_percpu[i]) LIBCFS_FREE(stats->ls_percpu[i], percpusize); - if (stats->ls_cnt_header) + + if (stats->ls_cnt_header) { + for (i = 0; i < stats->ls_num; i++) + if (stats->ls_cnt_header[i].lc_hist != NULL) + CFS_FREE_PTR(stats->ls_cnt_header[i].lc_hist); CFS_FREE_PTR_ARRAY(stats->ls_cnt_header, stats->ls_num); + } + LIBCFS_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_entry])); } EXPORT_SYMBOL(lprocfs_free_stats); @@ -1304,13 +1310,20 @@ EXPORT_SYMBOL(lprocfs_stats_collector); void lprocfs_clear_stats(struct lprocfs_stats *stats) { struct lprocfs_counter *percpu_cntr; - int i; - int j; + int i, j; unsigned int num_entry; unsigned long flags = 0; num_entry = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags); + /* clear histogram if exists */ + for (j = 0; j < stats->ls_num; j++) { + struct obd_histogram *hist = stats->ls_cnt_header[j].lc_hist; + + if (hist != NULL) + lprocfs_oh_clear(hist); + } + for (i = 0; i < num_entry; i++) { if (!stats->ls_percpu[i]) continue; @@ -1506,6 +1519,14 @@ void lprocfs_counter_init_units(struct lprocfs_stats *stats, int index, header->lc_name = name; header->lc_units = units; + if (config & LPROCFS_CNTR_HISTOGRAM) { + CFS_ALLOC_PTR(stats->ls_cnt_header[index].lc_hist); + if (stats->ls_cnt_header[index].lc_hist == NULL) + CERROR("LprocFS: Failed to allocate histogram:[%d]%s/%s\n", + index, name, units); + else + spin_lock_init(&stats->ls_cnt_header[index].lc_hist->oh_lock); + } num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags); for (i = 0; i < num_cpu; ++i) { if (!stats->ls_percpu[i]) diff --git a/lustre/ofd/lproc_ofd.c b/lustre/ofd/lproc_ofd.c index fbcac34..32ff9d3 100644 --- a/lustre/ofd/lproc_ofd.c +++ b/lustre/ofd/lproc_ofd.c @@ -1052,40 +1052,43 @@ struct lprocfs_vars lprocfs_ofd_obd_vars[] = { * * param[in] stats statistics counters */ -void ofd_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset) +void ofd_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset, + enum lprocfs_counter_config cntr_umask) { LASSERT(stats && stats->ls_num >= LPROC_OFD_STATS_LAST); lprocfs_counter_init(stats, LPROC_OFD_STATS_READ_BYTES, - LPROCFS_TYPE_BYTES_FULL, "read_bytes"); + LPROCFS_TYPE_BYTES_FULL_HISTOGRAM & (~cntr_umask), + "read_bytes"); lprocfs_counter_init(stats, LPROC_OFD_STATS_WRITE_BYTES, - LPROCFS_TYPE_BYTES_FULL, "write_bytes"); + LPROCFS_TYPE_BYTES_FULL_HISTOGRAM & (~cntr_umask), + "write_bytes"); lprocfs_counter_init(stats, LPROC_OFD_STATS_READ, - LPROCFS_TYPE_LATENCY, "read"); + LPROCFS_TYPE_LATENCY & (~cntr_umask), "read"); lprocfs_counter_init(stats, LPROC_OFD_STATS_WRITE, - LPROCFS_TYPE_LATENCY, "write"); + LPROCFS_TYPE_LATENCY & (~cntr_umask), "write"); lprocfs_counter_init(stats, LPROC_OFD_STATS_GETATTR, - LPROCFS_TYPE_LATENCY, "getattr"); + LPROCFS_TYPE_LATENCY & (~cntr_umask), "getattr"); lprocfs_counter_init(stats, LPROC_OFD_STATS_SETATTR, - LPROCFS_TYPE_LATENCY, "setattr"); + LPROCFS_TYPE_LATENCY & (~cntr_umask), "setattr"); lprocfs_counter_init(stats, LPROC_OFD_STATS_PUNCH, - LPROCFS_TYPE_LATENCY, "punch"); + LPROCFS_TYPE_LATENCY & (~cntr_umask), "punch"); lprocfs_counter_init(stats, LPROC_OFD_STATS_SYNC, - LPROCFS_TYPE_LATENCY, "sync"); + LPROCFS_TYPE_LATENCY & (~cntr_umask), "sync"); lprocfs_counter_init(stats, LPROC_OFD_STATS_DESTROY, - LPROCFS_TYPE_LATENCY, "destroy"); + LPROCFS_TYPE_LATENCY & (~cntr_umask), "destroy"); lprocfs_counter_init(stats, LPROC_OFD_STATS_CREATE, - LPROCFS_TYPE_LATENCY, "create"); + LPROCFS_TYPE_LATENCY & (~cntr_umask), "create"); lprocfs_counter_init(stats, LPROC_OFD_STATS_STATFS, - LPROCFS_TYPE_LATENCY, "statfs"); + LPROCFS_TYPE_LATENCY & (~cntr_umask), "statfs"); lprocfs_counter_init(stats, LPROC_OFD_STATS_GET_INFO, - LPROCFS_TYPE_LATENCY, "get_info"); + LPROCFS_TYPE_LATENCY & (~cntr_umask), "get_info"); lprocfs_counter_init(stats, LPROC_OFD_STATS_SET_INFO, - LPROCFS_TYPE_LATENCY, "set_info"); + LPROCFS_TYPE_LATENCY & (~cntr_umask), "set_info"); lprocfs_counter_init(stats, LPROC_OFD_STATS_QUOTACTL, - LPROCFS_TYPE_LATENCY, "quotactl"); + LPROCFS_TYPE_LATENCY & (~cntr_umask), "quotactl"); lprocfs_counter_init(stats, LPROC_OFD_STATS_PREALLOC, - LPROCFS_TYPE_LATENCY, "prealloc"); + LPROCFS_TYPE_LATENCY & (~cntr_umask), "prealloc"); } LPROC_SEQ_FOPS(lprocfs_nid_stats_clear); @@ -1185,7 +1188,7 @@ int ofd_tunables_init(struct ofd_device *ofd) GOTO(obd_free_stats, rc); } - ofd_stats_counter_init(obd->obd_stats, 0); + ofd_stats_counter_init(obd->obd_stats, 0, LPROCFS_CNTR_HISTOGRAM); rc = lprocfs_job_stats_init(obd, LPROC_OFD_STATS_LAST, ofd_stats_counter_init); diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index 06d1934..7970ed4 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -365,10 +365,13 @@ int ofd_txn_stop_cb(const struct lu_env *env, struct thandle *txn, /* lproc_ofd.c */ int ofd_tunables_init(struct ofd_device *ofd); #ifdef CONFIG_PROC_FS -void ofd_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset); +void ofd_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset, + enum lprocfs_counter_config cntr_umask); #else -static inline void ofd_stats_counter_init(struct lprocfs_stats *stats, - unsigned int offset) {} +static inline void +ofd_stats_counter_init(struct lprocfs_stats *stats, + unsigned int offset, + enum lprocfs_counter_config cntr_umask) {} #endif /* ofd_objects.c */ diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index 27da3ed..dd84015 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -1539,7 +1539,7 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp, /* see comment on LPROC_OFD_STATS_WRITE_BYTES usage above */ ofd_counter_incr(exp, LPROC_OFD_STATS_READ_BYTES, jobid, nob); ofd_counter_incr(exp, LPROC_OFD_STATS_READ, jobid, - ktime_us_delta(ktime_get(), kstart)); + ktime_us_delta(ktime_get(), kstart)); rc = ofd_commitrw_read(env, ofd, fid, objcount, npages, lnb); diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index b6768be..c921a21 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -93,7 +93,7 @@ static int ofd_export_stats_init(struct ofd_device *ofd, if (!stats->nid_stats) RETURN(-ENOMEM); - ofd_stats_counter_init(stats->nid_stats, 0); + ofd_stats_counter_init(stats->nid_stats, 0, LPROCFS_CNTR_HISTOGRAM); rc = lprocfs_register_stats(stats->nid_proc, "stats", stats->nid_stats); if (rc != 0) { -- 1.8.3.1