From 5efb892396e394e71340ac8c2c37abcb419327a1 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Wed, 19 Sep 2018 17:08:47 -0400 Subject: [PATCH] LU-11407 obdclass: add start time to stats files When the stats files are initialized or reset, store the current timestamp with the stats. That allows computing average IO and RPC rates over the accumulated stats lifetime, in addition to the normal incremental operation rates found by comparing successive values read from the stats file with the read interval. Any stats that currently print the "snapshot_time:" header will now also print "start_time:" and "elapsed_time:" fields as well. Consolodate this printing into a helper function instead of duplicating very similar code in many different functions. Output can't be exactly the same for all callers, because these fields are embedded into different types of output files, but it is very close. Change struct rename_stats and brw_stats to use a common name prefix. Change the obd_job_stats timestamps to ktime_t so that we can use the common helper function for printing the header. It is easier to store ojs_cleanup_interval internally as 1/2 of the maximum stats age, since since division is more easily done when the value is initially set as seconds compared to when it is ktime_t. This may also be a tiny bit more efficient since we don't do a divide/shift on each access. Lustre-change: https://review.whamcloud.com/33201 Lustre-commit: ea2cd3af7bfabfa6876727ee44495f4c331bea8e LU-16231 misc: fix stats snapshot_time to use wallclock merged into this patch here to avoid landing broken stats. Some "init" times were not initialized when stats were allocated or cleared, do this for all stats shown by lprocfs_stats_header(). Rename struct osc_device fields from od_ to osc_ to avoid confusion with struct osd_device. Having two od_stats was especially confusing. Add a test case to verify snapshot_time, start_time, elapsed_time. Lustre-change: https://review.whamcloud.com/48821 Lustre-commit: e42efe35eec7b9725f7f4fff86aaee04093366b0 LU-16110 lprocfs: make job_stats and rename_stats valid YAML related to lprocfs_stats_header() are include here that were not present when that patch was backported. This fixes the output to correctly indent the items to follow YAML formatting rules. Lustre-change: https://review.whamcloud.com/48417 Lustre-commit: e96cb6ff1fea7a2bc62a6c0786fb0e07cbfda81a Signed-off-by: Andreas Dilger Change-Id: Iacefa17def455ef53a28fd14b6d4c670463ebbe5 Reviewed-by: Ben Evans Reviewed-by: Arshad Hussain Reviewed-by: Feng Lei Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/49745 Tested-by: jenkins Tested-by: Maloo --- lustre/include/lprocfs_status.h | 11 +++- lustre/include/lustre_osc.h | 17 +++--- lustre/include/obd.h | 4 +- lustre/ldlm/ldlm_lib.c | 2 + lustre/llite/llite_internal.h | 21 ++++--- lustre/llite/lproc_llite.c | 34 +++++------ lustre/mdc/lproc_mdc.c | 14 ++--- lustre/mdc/mdc_dev.c | 13 ++-- lustre/mdt/mdt_lproc.c | 114 +++++++++++++++++------------------ lustre/obdclass/genops.c | 9 +-- lustre/obdclass/lprocfs_jobstats.c | 87 +++++++++++++++++---------- lustre/obdclass/lprocfs_status.c | 49 ++++++++++++--- lustre/osc/lproc_osc.c | 23 ++++--- lustre/osc/osc_cache.c | 2 +- lustre/osc/osc_dev.c | 55 ++++++++--------- lustre/osd-ldiskfs/osd_io.c | 8 +-- lustre/osd-ldiskfs/osd_lproc.c | 119 ++++++++++++++++++------------------- lustre/osd-zfs/osd_io.c | 6 +- lustre/osd-zfs/osd_lproc.c | 40 ++++++------- lustre/tests/sanity.sh | 48 +++++++++++++-- lustre/utils/llobdstat | 2 +- lustre/utils/llstat | 2 +- 22 files changed, 386 insertions(+), 294 deletions(-) diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 06d9041..e593a69 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -122,7 +122,8 @@ enum { }; struct brw_stats { - struct obd_histogram hist[BRW_LAST]; + ktime_t bs_init; + struct obd_histogram bs_hist[BRW_LAST]; }; /* An lprocfs counter can be configured using the enum bit masks below. @@ -220,6 +221,7 @@ struct lprocfs_stats { /* 1 + the biggest cpu # whose ls_percpu slot has been allocated */ unsigned short ls_biggest_alloc_num; enum lprocfs_stats_flags ls_flags; + ktime_t ls_init; /* Lock used when there are no percpu stats areas; For percpu stats, * it is used to protect ls_biggest_alloc_num change */ spinlock_t ls_lock; @@ -410,8 +412,8 @@ struct obd_job_stats { struct cfs_hash *ojs_hash; /* hash of jobids */ struct list_head ojs_list; /* list of job_stat structs */ rwlock_t ojs_lock; /* protect ojs_list/js_list */ - unsigned int ojs_cleanup_interval;/* seconds before expiry */ - time64_t ojs_last_cleanup; /* previous cleanup time */ + ktime_t ojs_cleanup_interval;/* 1/2 expiry seconds */ + ktime_t ojs_cleanup_last;/* previous cleanup time */ cntr_init_callback ojs_cntr_init_fn;/* lprocfs_stats initializer */ unsigned short ojs_cntr_num; /* number of stats in struct */ bool ojs_cleaning; /* currently expiring stats */ @@ -553,6 +555,9 @@ extern int lprocfs_seq_create(struct proc_dir_entry *parent, const char *name, extern int lprocfs_obd_seq_create(struct obd_device *obd, const char *name, mode_t mode, const struct proc_ops *seq_fops, void *data); +extern void lprocfs_stats_header(struct seq_file *seq, ktime_t now, + ktime_t ts_init, int width, const char *colon, + bool show_units, const char *prefix); /* Generic callbacks */ extern int lprocfs_uuid_seq_show(struct seq_file *m, void *data); diff --git a/lustre/include/lustre_osc.h b/lustre/include/lustre_osc.h index 4a30995..fc1474a 100644 --- a/lustre/include/lustre_osc.h +++ b/lustre/include/lustre_osc.h @@ -101,17 +101,18 @@ static inline struct osc_async_page *brw_page2oap(struct brw_page *pga) } struct osc_device { - struct cl_device od_cl; - struct obd_export *od_exp; + struct cl_device osc_cl; + struct obd_export *osc_exp; /* Write stats is actually protected by client_obd's lock. */ struct osc_stats { + ktime_t os_init; uint64_t os_lockless_writes; /* by bytes */ uint64_t os_lockless_reads; /* by bytes */ - } od_stats; + } osc_stats; /* configuration item(s) */ - time64_t od_contention_time; + time64_t osc_contention_time; }; struct osc_extent; @@ -755,12 +756,12 @@ static inline struct osc_io *osc_env_io(const struct lu_env *env) static inline struct osc_device *lu2osc_dev(const struct lu_device *d) { - return container_of_safe(d, struct osc_device, od_cl.cd_lu_dev); + return container_of_safe(d, struct osc_device, osc_cl.cd_lu_dev); } static inline struct obd_export *osc_export(const struct osc_object *obj) { - return lu2osc_dev(obj->oo_cl.co_lu.lo_dev)->od_exp; + return lu2osc_dev(obj->oo_cl.co_lu.lo_dev)->osc_exp; } static inline struct client_obd *osc_cli(const struct osc_object *obj) @@ -781,12 +782,12 @@ static inline struct cl_object *osc2cl(const struct osc_object *obj) static inline struct osc_device *obd2osc_dev(const struct obd_device *obd) { return container_of_safe(obd->obd_lu_dev, struct osc_device, - od_cl.cd_lu_dev); + osc_cl.cd_lu_dev); } static inline struct lu_device *osc2lu_dev(struct osc_device *osc) { - return &osc->od_cl.cd_lu_dev; + return &osc->osc_cl.cd_lu_dev; } static inline struct lu_object *osc2lu(struct osc_object *osc) diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 4798197..96098d4 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -282,6 +282,7 @@ struct client_obd { u32 cl_max_pages_per_rpc; u32 cl_max_rpcs_in_flight; u32 cl_max_short_io_bytes; + ktime_t cl_stats_init; struct obd_histogram cl_read_rpc_hist; struct obd_histogram cl_write_rpc_hist; struct obd_histogram cl_read_page_hist; @@ -336,9 +337,10 @@ struct client_obd { __u16 cl_close_rpcs_in_flight; wait_queue_head_t cl_mod_rpcs_waitq; unsigned long *cl_mod_tag_bitmap; + ktime_t cl_mod_rpcs_init; struct obd_histogram cl_mod_rpcs_hist; - /* mgc datastruct */ + /* mgc datastruct */ struct mutex cl_mgc_mutex; struct local_oid_storage *cl_mgc_los; struct dt_object *cl_mgc_configs_dir; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 4bb4feb..ba87e12 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -440,6 +440,7 @@ int client_obd_setup(struct obd_device *obd, struct lustre_cfg *lcfg) cli->cl_r_in_flight = 0; cli->cl_w_in_flight = 0; + cli->cl_stats_init = ktime_get_real(); spin_lock_init(&cli->cl_read_rpc_hist.oh_lock); spin_lock_init(&cli->cl_write_rpc_hist.oh_lock); spin_lock_init(&cli->cl_read_page_hist.oh_lock); @@ -514,6 +515,7 @@ int client_obd_setup(struct obd_device *obd, struct lustre_cfg *lcfg) cli->cl_mod_rpcs_in_flight = 0; cli->cl_close_rpcs_in_flight = 0; init_waitqueue_head(&cli->cl_mod_rpcs_waitq); + cli->cl_mod_rpcs_init = ktime_get_real(); cli->cl_mod_tag_bitmap = NULL; INIT_LIST_HEAD(&cli->cl_chg_dev_linkage); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index e3a6651..feac50e 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -690,14 +690,15 @@ struct ra_io_arg { #define LL_HIST_START 12 /* buckets start at 2^12 = 4k */ #define LL_PROCESS_HIST_MAX 10 struct per_process_info { - pid_t pid; - struct obd_histogram pp_r_hist; - struct obd_histogram pp_w_hist; + pid_t pid; + struct obd_histogram pp_r_hist; + struct obd_histogram pp_w_hist; }; /* pp_extents[LL_PROCESS_HIST_MAX] will hold the combined process info */ struct ll_rw_extents_info { - struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1]; + ktime_t pp_init; + struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1]; }; #define LL_OFFSET_HIST_MAX 100 @@ -844,16 +845,18 @@ struct ll_sb_info { struct lu_site *ll_site; struct cl_device *ll_cl; + /* Statistics */ struct ll_rw_extents_info ll_rw_extents_info; - int ll_extent_process_count; + int ll_extent_process_count; + unsigned int ll_offset_process_count; + ktime_t ll_process_stats_init; struct ll_rw_process_info ll_rw_process_info[LL_PROCESS_HIST_MAX]; - unsigned int ll_offset_process_count; struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX]; - unsigned int ll_rw_offset_entry_count; - int ll_stats_track_id; + unsigned int ll_rw_offset_entry_count; + int ll_stats_track_id; enum stats_track_type ll_stats_track_type; - int ll_rw_stats_on; + int ll_rw_stats_on; /* metadata stat-ahead */ unsigned int ll_sa_running_max;/* max concurrent diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index 1588b0c..65e3654 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -2365,23 +2365,20 @@ static void ll_display_extents_info(struct ll_rw_extents_info *io_extents, static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v) { - struct timespec64 now; struct ll_sb_info *sbi = seq->private; struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; int k; - ktime_get_real_ts64(&now); - if (!sbi->ll_rw_stats_on) { seq_puts(seq, "disabled\n write anything to this file to activate, then '0' or 'disable' to deactivate\n"); return 0; } - seq_printf(seq, "snapshot_time: %llu.%09lu (secs.nsecs)\n", - (s64)now.tv_sec, now.tv_nsec); - seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write"); - seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n", - "extents", "calls", "%", "cum%", - "calls", "%", "cum%"); + lprocfs_stats_header(seq, ktime_get_real(), io_extents->pp_init, 25, + ":", true, ""); + seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write"); + seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n", + "extents", "calls", "%", "cum%", + "calls", "%", "cum%"); spin_lock(&sbi->ll_pp_extent_lock); for (k = 0; k < LL_PROCESS_HIST_MAX; k++) { if (io_extents->pp_extents[k].pid != 0) { @@ -2416,6 +2413,7 @@ static ssize_t ll_rw_extents_stats_pp_seq_write(struct file *file, sbi->ll_rw_stats_on = 1; spin_lock(&sbi->ll_pp_extent_lock); + io_extents->pp_init = ktime_get_real(); for (i = 0; i < LL_PROCESS_HIST_MAX; i++) { io_extents->pp_extents[i].pid = 0; lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist); @@ -2429,18 +2427,16 @@ LDEBUGFS_SEQ_FOPS(ll_rw_extents_stats_pp); static int ll_rw_extents_stats_seq_show(struct seq_file *seq, void *v) { - struct timespec64 now; struct ll_sb_info *sbi = seq->private; struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; - ktime_get_real_ts64(&now); - if (!sbi->ll_rw_stats_on) { seq_puts(seq, "disabled\n write anything to this file to activate, then '0' or 'disable' to deactivate\n"); return 0; } - seq_printf(seq, "snapshot_time: %llu.%09lu (secs.nsecs)\n", - (s64)now.tv_sec, now.tv_nsec); + + lprocfs_stats_header(seq, ktime_get_real(), io_extents->pp_init, 25, + ":", true, ""); seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write"); seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n", @@ -2474,6 +2470,7 @@ static ssize_t ll_rw_extents_stats_seq_write(struct file *file, sbi->ll_rw_stats_on = 1; spin_lock(&sbi->ll_pp_extent_lock); + io_extents->pp_init = ktime_get_real(); for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) { io_extents->pp_extents[i].pid = 0; lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist); @@ -2591,22 +2588,20 @@ void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v) { - struct timespec64 now; struct ll_sb_info *sbi = seq->private; struct ll_rw_process_info *offset = sbi->ll_rw_offset_info; struct ll_rw_process_info *process = sbi->ll_rw_process_info; int i; - ktime_get_real_ts64(&now); - if (!sbi->ll_rw_stats_on) { seq_puts(seq, "disabled\n write anything to this file to activate, then '0' or 'disable' to deactivate\n"); return 0; } + spin_lock(&sbi->ll_process_lock); + lprocfs_stats_header(seq, ktime_get_real(), sbi->ll_process_stats_init, + 25, ":", true, ""); - seq_printf(seq, "snapshot_time: %llu.%09lu (secs.nsecs)\n", - (s64)now.tv_sec, now.tv_nsec); seq_printf(seq, "%3s %10s %14s %14s %17s %17s %14s\n", "R/W", "PID", "RANGE START", "RANGE END", "SMALLEST EXTENT", "LARGEST EXTENT", "OFFSET"); @@ -2666,6 +2661,7 @@ static ssize_t ll_rw_offset_stats_seq_write(struct file *file, spin_lock(&sbi->ll_process_lock); sbi->ll_offset_process_count = 0; sbi->ll_rw_offset_entry_count = 0; + sbi->ll_process_stats_init = ktime_get_real(); memset(process_info, 0, sizeof(struct ll_rw_process_info) * LL_PROCESS_HIST_MAX); memset(offset_info, 0, sizeof(struct ll_rw_process_info) * diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c index b493bd9..f0b79e9 100644 --- a/lustre/mdc/lproc_mdc.c +++ b/lustre/mdc/lproc_mdc.c @@ -413,6 +413,7 @@ static ssize_t mdc_rpc_stats_seq_write(struct file *file, lprocfs_oh_clear(&cli->cl_write_page_hist); lprocfs_oh_clear(&cli->cl_read_offset_hist); lprocfs_oh_clear(&cli->cl_write_offset_hist); + cli->cl_mod_rpcs_init = ktime_get_real(); return len; } @@ -513,14 +514,11 @@ LPROC_SEQ_FOPS(mdc_rpc_stats); static int mdc_stats_seq_show(struct seq_file *seq, void *v) { - struct timespec64 now; struct obd_device *obd = seq->private; - struct osc_stats *stats = &obd2osc_dev(obd)->od_stats; + struct osc_stats *stats = &obd2osc_dev(obd)->osc_stats; - ktime_get_real_ts64(&now); - - seq_printf(seq, "snapshot_time: %lld.%09lu (secs.nsecs)\n", - (s64)now.tv_sec, now.tv_nsec); + lprocfs_stats_header(seq, ktime_get_real(), stats->os_init, 25, ":", + true, ""); seq_printf(seq, "lockless_write_bytes\t\t%llu\n", stats->os_lockless_writes); seq_printf(seq, "lockless_read_bytes\t\t%llu\n", @@ -534,9 +532,11 @@ static ssize_t mdc_stats_seq_write(struct file *file, { struct seq_file *seq = file->private_data; struct obd_device *obd = seq->private; - struct osc_stats *stats = &obd2osc_dev(obd)->od_stats; + struct osc_stats *stats = &obd2osc_dev(obd)->osc_stats; memset(stats, 0, sizeof(*stats)); + stats->os_init = ktime_get_real(); + return len; } LPROC_SEQ_FOPS(mdc_stats); diff --git a/lustre/mdc/mdc_dev.c b/lustre/mdc/mdc_dev.c index 8a0687b..96e896c 100644 --- a/lustre/mdc/mdc_dev.c +++ b/lustre/mdc/mdc_dev.c @@ -1569,16 +1569,16 @@ static struct lu_device *mdc_device_alloc(const struct lu_env *env, struct lustre_cfg *cfg) { struct lu_device *d; - struct osc_device *od; + struct osc_device *oc; struct obd_device *obd; int rc; - OBD_ALLOC_PTR(od); - if (od == NULL) + OBD_ALLOC_PTR(oc); + if (oc == NULL) RETURN(ERR_PTR(-ENOMEM)); - cl_device_init(&od->od_cl, t); - d = osc2lu_dev(od); + cl_device_init(&oc->osc_cl, t); + d = osc2lu_dev(oc); d->ld_ops = &mdc_lu_ops; /* Setup MDC OBD */ @@ -1591,7 +1591,8 @@ static struct lu_device *mdc_device_alloc(const struct lu_env *env, osc_device_free(env, d); RETURN(ERR_PTR(rc)); } - od->od_exp = obd->obd_self_export; + oc->osc_exp = obd->obd_self_export; + oc->osc_stats.os_init = ktime_get_real(); RETURN(d); } diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index 798cf94..aac0834 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -62,7 +62,9 @@ /** * The rename stats output would be YAML formats, like * rename_stats: - * - snapshot_time: 1234567890.123456 + * - snapshot_time: 1234567890.123456789 + * - start_time: 1234567880.987654321 + * - elapsed_time: 9.135802468 * - same_dir: * 4kB: { samples: 1230, pct: 33, cum_pct: 45 } * 8kB: { samples: 1242, pct: 33, cum_pct: 78 } @@ -78,76 +80,69 @@ **/ static void display_rename_stats(struct seq_file *seq, char *name, - struct obd_histogram *hist) -{ - unsigned long tot, t, cum = 0; - int i; - - tot = lprocfs_oh_sum(hist); - if (tot > 0) - seq_printf(seq, "- %s:\n", name); - /* dir size start from 4K, start i from 10(2^10) here */ - for (i = 0; i < OBD_HIST_MAX; i++) { - t = hist->oh_buckets[i]; - cum += t; - if (cum == 0) - continue; - - if (i < 10) - seq_printf(seq, "%6s%d%s", " ", 1<< i, "bytes:"); - else if (i < 20) - seq_printf(seq, "%6s%d%s", " ", 1<<(i-10), "KB:"); - else - seq_printf(seq, "%6s%d%s", " ", 1<<(i-20), "MB:"); + struct obd_histogram *rs_hist) +{ + unsigned long tot, t, cum = 0; + int i; - seq_printf(seq, " { sample: %3lu, pct: %3u, cum_pct: %3u }\n", - t, pct(t, tot), pct(cum, tot)); + tot = lprocfs_oh_sum(rs_hist); + if (tot > 0) + seq_printf(seq, "- %s:\n", name); - if (cum == tot) - break; - } -} + for (i = 0; i < OBD_HIST_MAX; i++) { + t = rs_hist->oh_buckets[i]; + cum += t; + if (cum == 0) + continue; -static void rename_stats_show(struct seq_file *seq, - struct rename_stats *rename_stats) -{ - struct timespec64 now; + if (i < 10) + seq_printf(seq, "%6s%d%s", " ", 1 << i, "bytes:"); + else if (i < 20) + seq_printf(seq, "%6s%d%s", " ", 1 << (i - 10), "KB:"); + else + seq_printf(seq, "%6s%d%s", " ", 1 << (i - 20), "MB:"); - /* this sampling races with updates */ - ktime_get_real_ts64(&now); - seq_printf(seq, "rename_stats:\n-\n"); - seq_printf(seq, " %-15s %llu.%9lu\n", "snapshot_time:", - (s64)now.tv_sec, now.tv_nsec); + seq_printf(seq, " { sample: %3lu, pct: %3u, cum_pct: %3u }\n", + t, pct(t, tot), pct(cum, tot)); - display_rename_stats(seq, "same_dir", - &rename_stats->rs_hist[RENAME_SAMEDIR_SIZE]); - display_rename_stats(seq, "crossdir_src", - &rename_stats->rs_hist[RENAME_CROSSDIR_SRC_SIZE]); - display_rename_stats(seq, "crossdir_tgt", - &rename_stats->rs_hist[RENAME_CROSSDIR_TGT_SIZE]); + if (cum == tot) + break; + } } static int mdt_rename_stats_seq_show(struct seq_file *seq, void *v) { - struct mdt_device *mdt = seq->private; + struct mdt_device *mdt = seq->private; + struct rename_stats *rename_stats = &mdt->mdt_rename_stats; + + /* this sampling races with updates */ + seq_puts(seq, "rename_stats:\n-\n"); + lprocfs_stats_header(seq, ktime_get_real(), rename_stats->rs_init, 15, + ":", false, " "); - rename_stats_show(seq, &mdt->mdt_rename_stats); + display_rename_stats(seq, "same_dir", + &rename_stats->rs_hist[RENAME_SAMEDIR_SIZE]); + display_rename_stats(seq, "crossdir_src", + &rename_stats->rs_hist[RENAME_CROSSDIR_SRC_SIZE]); + display_rename_stats(seq, "crossdir_tgt", + &rename_stats->rs_hist[RENAME_CROSSDIR_TGT_SIZE]); - return 0; + return 0; } static ssize_t mdt_rename_stats_seq_write(struct file *file, const char __user *buf, size_t len, loff_t *off) { - struct seq_file *seq = file->private_data; - struct mdt_device *mdt = seq->private; - int i; + struct seq_file *seq = file->private_data; + struct mdt_device *mdt = seq->private; + int i; - for (i = 0; i < RENAME_LAST; i++) - lprocfs_oh_clear(&mdt->mdt_rename_stats.rs_hist[i]); + for (i = 0; i < RENAME_LAST; i++) + lprocfs_oh_clear(&mdt->mdt_rename_stats.rs_hist[i]); + mdt->mdt_rename_stats.rs_init = ktime_get_real(); - return len; + return len; } LPROC_SEQ_FOPS(mdt_rename_stats); @@ -157,6 +152,7 @@ static int lproc_mdt_attach_rename_seqstat(struct mdt_device *mdt) for (i = 0; i < RENAME_LAST; i++) spin_lock_init(&mdt->mdt_rename_stats.rs_hist[i].oh_lock); + mdt->mdt_rename_stats.rs_init = ktime_get_real(); return lprocfs_obd_seq_create(mdt2obd_dev(mdt), "rename_stats", 0644, &mdt_rename_stats_fops, mdt); @@ -168,9 +164,9 @@ void mdt_rename_counter_tally(struct mdt_thread_info *info, struct mdt_object *src, struct mdt_object *tgt, enum mdt_stat_idx msi, s64 ktime_delta) { - struct md_attr *ma = &info->mti_attr; - struct rename_stats *rstats = &mdt->mdt_rename_stats; - int rc; + struct md_attr *ma = &info->mti_attr; + struct rename_stats *rstats = &mdt->mdt_rename_stats; + int rc; mdt_counter_incr(req, LPROC_MDT_RENAME, ktime_delta); @@ -180,8 +176,8 @@ void mdt_rename_counter_tally(struct mdt_thread_info *info, if (rc) { CERROR("%s: "DFID" attr_get, rc = %d\n", mdt_obd_name(mdt), PFID(mdt_object_fid(src)), rc); - return; - } + return; + } if (src == tgt) { mdt_counter_incr(req, LPROC_MDT_RENAME_SAMEDIR, ktime_delta); @@ -202,8 +198,8 @@ void mdt_rename_counter_tally(struct mdt_thread_info *info, if (rc) { CERROR("%s: "DFID" attr_get, rc = %d\n", mdt_obd_name(mdt), PFID(mdt_object_fid(tgt)), rc); - return; - } + return; + } lprocfs_oh_tally_log2(&rstats->rs_hist[RENAME_CROSSDIR_TGT_SIZE], (unsigned int)ma->ma_attr.la_size); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 35bd981..75d68dd 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -2198,15 +2198,11 @@ int obd_mod_rpc_stats_seq_show(struct client_obd *cli, struct seq_file *seq) { unsigned long mod_tot = 0, mod_cum; - struct timespec64 now; int i; - ktime_get_real_ts64(&now); - spin_lock(&cli->cl_mod_rpcs_lock); - - seq_printf(seq, "snapshot_time: %llu.%9lu (secs.nsecs)\n", - (s64)now.tv_sec, now.tv_nsec); + lprocfs_stats_header(seq, ktime_get_real(), cli->cl_mod_rpcs_init, 25, + ":", true, ""); seq_printf(seq, "modify_RPCs_in_flight: %hu\n", cli->cl_mod_rpcs_in_flight); @@ -2218,6 +2214,7 @@ int obd_mod_rpc_stats_seq_show(struct client_obd *cli, mod_cum = 0; for (i = 0; i < OBD_HIST_MAX; i++) { unsigned long mod = cli->cl_mod_rpcs_hist.oh_buckets[i]; + mod_cum += mod; seq_printf(seq, "%d:\t\t%10lu %3u %3u\n", i, mod, pct(mod, mod_tot), diff --git a/lustre/obdclass/lprocfs_jobstats.c b/lustre/obdclass/lprocfs_jobstats.c index 7870da5..a577201 100644 --- a/lustre/obdclass/lprocfs_jobstats.c +++ b/lustre/obdclass/lprocfs_jobstats.c @@ -66,7 +66,7 @@ struct job_stat { struct list_head js_list; /* on ojs_list, with ojs_lock */ atomic_t js_refcount; /* num users of this struct */ char js_jobid[LUSTRE_JOBID_SIZE]; /* job name + NUL*/ - time64_t js_timestamp; /* seconds of most recent stat*/ + ktime_t js_time_latest; /* time of most recent stat*/ struct lprocfs_stats *js_stats; /* per-job statistics */ struct obd_job_stats *js_jobstats; /* for accessing ojs_lock */ }; @@ -162,11 +162,11 @@ static int job_cleanup_iter_callback(struct cfs_hash *hs, struct cfs_hash_bd *bd, struct hlist_node *hnode, void *data) { - time64_t oldest_time = *((time64_t *)data); + ktime_t oldest_time = *((ktime_t *)data); struct job_stat *job; job = hlist_entry(hnode, struct job_stat, js_hash); - if (job->js_timestamp < oldest_time) + if (ktime_before(job->js_time_latest, oldest_time)) cfs_hash_bd_del_locked(hs, bd, hnode); return 0; @@ -177,30 +177,37 @@ static int job_cleanup_iter_callback(struct cfs_hash *hs, * * Since this function may be called frequently, do not scan all of the * jobstats on each call, only twice per cleanup interval. That means stats - * may be around on average cleanup_interval / 4 longer than necessary, - * but that is not considered harmful. + * may be on average around cleanup_interval / 4 older than the cleanup + * interval, but that is not considered harmful. * - * If \a before is negative then this will force clean up all jobstats due - * to the expiry time being in the future (e.g. at shutdown). + * The value stored in ojs_cleanup_interval is how often to perform a cleanup + * scan, and 1/2 of the maximum age of the individual statistics. This is + * done rather than dividing the interval by two each time, because it is + * much easier to do the division when the value is initially set (in seconds) + * rather than after it has been converted to ktime_t, and maybe a bit faster. + * + * If \a clear is true then this will force clean up all jobstats + * (e.g. at shutdown). * * If there is already another thread doing jobstats cleanup, don't try to * do this again in the current thread unless this is a force cleanup. * * \param[in] stats stucture tracking all job stats for this device - * \param[in] before expire jobstats updated more than this many seconds ago + * \param[in] clear clear all job stats if true */ -static void lprocfs_job_cleanup(struct obd_job_stats *stats, int before) +static void lprocfs_job_cleanup(struct obd_job_stats *stats, bool clear) { - time64_t now = ktime_get_real_seconds(); - time64_t oldest; - - if (likely(before >= 0)) { - unsigned int cleanup_interval = stats->ojs_cleanup_interval; + ktime_t cleanup_interval = stats->ojs_cleanup_interval; + ktime_t now = ktime_get_real(); + ktime_t oldest; - if (cleanup_interval == 0 || before == 0) + if (likely(!clear)) { + /* ojs_cleanup_interval of zero means never clean up stats */ + if (ktime_to_ns(cleanup_interval) == 0) return; - if (now < stats->ojs_last_cleanup + cleanup_interval / 2) + if (ktime_before(now, ktime_add(stats->ojs_cleanup_last, + cleanup_interval))) return; if (stats->ojs_cleaning) @@ -208,7 +215,7 @@ static void lprocfs_job_cleanup(struct obd_job_stats *stats, int before) } write_lock(&stats->ojs_lock); - if (before >= 0 && stats->ojs_cleaning) { + if (!clear && stats->ojs_cleaning) { write_unlock(&stats->ojs_lock); return; } @@ -225,14 +232,16 @@ static void lprocfs_job_cleanup(struct obd_job_stats *stats, int before) * Holding ojs_lock isn't necessary for safety of the hash iteration, * since locking of the hash is handled internally, but there isn't * any benefit to having multiple threads doing cleanup at one time. + * + * Subtract twice the cleanup_interval, since it is 1/2 the maximum age. */ - oldest = now - before; + oldest = ktime_sub(now, ktime_add(cleanup_interval, cleanup_interval)); cfs_hash_for_each_safe(stats->ojs_hash, job_cleanup_iter_callback, &oldest); write_lock(&stats->ojs_lock); stats->ojs_cleaning = false; - stats->ojs_last_cleanup = ktime_get_real_seconds(); + stats->ojs_cleanup_last = ktime_get_real(); write_unlock(&stats->ojs_lock); } @@ -253,7 +262,7 @@ static struct job_stat *job_alloc(char *jobid, struct obd_job_stats *jobs) jobs->ojs_cntr_init_fn(job->js_stats, 0); memcpy(job->js_jobid, jobid, sizeof(job->js_jobid)); - job->js_timestamp = ktime_get_real_seconds(); + job->js_time_latest = job->js_stats->ls_init; job->js_jobstats = jobs; INIT_HLIST_NODE(&job->js_hash); INIT_LIST_HEAD(&job->js_list); @@ -289,7 +298,7 @@ int lprocfs_job_stats_log(struct obd_device *obd, char *jobid, if (job) goto found; - lprocfs_job_cleanup(stats, stats->ojs_cleanup_interval); + lprocfs_job_cleanup(stats, false); job = job_alloc(jobid, stats); if (job == NULL) @@ -314,7 +323,7 @@ int lprocfs_job_stats_log(struct obd_device *obd, char *jobid, found: LASSERT(stats == job->js_jobstats); - job->js_timestamp = ktime_get_real_seconds(); + job->js_time_latest = ktime_get_real(); lprocfs_counter_add(job->js_stats, event, amount); job_putref(job); @@ -330,7 +339,7 @@ void lprocfs_job_stats_fini(struct obd_device *obd) if (stats->ojs_hash == NULL) return; - lprocfs_job_cleanup(stats, -99); + lprocfs_job_cleanup(stats, true); cfs_hash_putref(stats->ojs_hash); stats->ojs_hash = NULL; LASSERT(list_empty(&stats->ojs_list)); @@ -384,7 +393,9 @@ static void *lprocfs_jobstats_seq_next(struct seq_file *p, void *v, loff_t *pos) * * job_stats: * - job_id: dd.4854 - * snapshot_time: 1322494486 + * snapshot_time: 1322494486.123456789 + * start_time: 1322494476.012345678 + * elapsed_time: 10.111111111 * open: { samples: 1, unit: reqs } * close: { samples: 1, unit: reqs } * mknod: { samples: 0, unit: reqs } @@ -404,7 +415,9 @@ static void *lprocfs_jobstats_seq_next(struct seq_file *p, void *v, loff_t *pos) * * job_stats: * - job_id dd.4854 - * snapshot_time: 1322494602 + * snapshot_time: 1322494602.123456789 + * start_time: 1322494592.987654321 + * elapsed_time: 9.135802468 * read: { samples: 0, unit: bytes, min: 0, max: 0, sum: 0 } * write: { samples: 1, unit: bytes, min: 4096, max: 4096, sum: 4096 } * setattr: { samples: 0, unit: reqs } @@ -458,7 +471,8 @@ static int lprocfs_jobstats_seq_show(struct seq_file *p, void *v) seq_printf(p, "- %-16s %s%*s%s\n", "job_id:", quote, joblen, escaped, quote); - seq_printf(p, " %-16s %lld\n", "snapshot_time:", job->js_timestamp); + lprocfs_stats_header(p, job->js_time_latest, job->js_stats->ls_init, + 16, ":", true, " "); s = job->js_stats; for (i = 0; i < s->ls_num; i++) { @@ -534,7 +548,7 @@ static ssize_t lprocfs_jobstats_seq_write(struct file *file, jobid[len - 1] = 0; if (strcmp(jobid, "clear") == 0) { - lprocfs_job_cleanup(stats, -99); + lprocfs_job_cleanup(stats, true); return len; } @@ -569,7 +583,7 @@ static int lprocfs_jobstats_seq_release(struct inode *inode, struct file *file) struct seq_file *seq = file->private_data; struct obd_job_stats *stats = seq->private; - lprocfs_job_cleanup(stats, stats->ojs_cleanup_interval); + lprocfs_job_cleanup(stats, false); return lprocfs_seq_release(inode, file); } @@ -624,8 +638,11 @@ int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num, rwlock_init(&stats->ojs_lock); stats->ojs_cntr_num = cntr_num; stats->ojs_cntr_init_fn = init_fn; - stats->ojs_cleanup_interval = 600; /* 10 mins by default */ - stats->ojs_last_cleanup = ktime_get_real_seconds(); + /* Store 1/2 the actual interval, since we use that the most, and + * it is easier to work with. + */ + stats->ojs_cleanup_interval = ktime_set(600 / 2, 0); /* default 10 min*/ + stats->ojs_cleanup_last = ktime_get_real(); entry = lprocfs_add_simple(obd->obd_proc_entry, "job_stats", stats, &lprocfs_jobstats_seq_fops); @@ -644,9 +661,12 @@ ssize_t job_cleanup_interval_show(struct kobject *kobj, struct attribute *attr, struct obd_device *obd = container_of(kobj, struct obd_device, obd_kset.kobj); struct obd_job_stats *stats; + struct timespec64 ts; stats = &obd->u.obt.obt_jobstats; - return scnprintf(buf, PAGE_SIZE, "%d\n", stats->ojs_cleanup_interval); + ts = ktime_to_timespec64(stats->ojs_cleanup_interval); + + return scnprintf(buf, PAGE_SIZE, "%lld\n", (long long)ts.tv_sec * 2); } EXPORT_SYMBOL(job_cleanup_interval_show); @@ -666,8 +686,9 @@ ssize_t job_cleanup_interval_store(struct kobject *kobj, if (rc) return rc; - stats->ojs_cleanup_interval = val; - lprocfs_job_cleanup(stats, stats->ojs_cleanup_interval); + stats->ojs_cleanup_interval = ktime_set(val / 2, 0); + lprocfs_job_cleanup(stats, false); + return count; } EXPORT_SYMBOL(job_cleanup_interval_store); diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index e7e9e62..5a1d4b2 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -1225,6 +1225,7 @@ struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num, stats->ls_num = num; stats->ls_flags = flags; + stats->ls_init = ktime_get_real(); spin_lock_init(&stats->ls_lock); /* alloc num of counter headers */ @@ -1330,6 +1331,7 @@ void lprocfs_clear_stats(struct lprocfs_stats *stats) percpu_cntr->lc_sum_irq = 0; } } + stats->ls_init = ktime_get_real(); lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags); } @@ -1365,6 +1367,43 @@ static void *lprocfs_stats_seq_next(struct seq_file *p, void *v, loff_t *pos) return lprocfs_stats_seq_start(p, pos); } +/** + * print header of stats including snapshot_time, start_time and elapsed_time. + * + * \param seq the file to print content to + * \param now end time to calculate elapsed_time + * \param ts_init start time to calculate elapsed_time + * \param width the width of key to align them well + * \param colon "" or ":" + * \param show_units show units or not + * \param prefix prefix (indent) before printing each line of header + * to align them with other content + */ +void lprocfs_stats_header(struct seq_file *seq, ktime_t now, + ktime_t ts_init, int width, const char *colon, + bool show_units, const char *prefix) +{ + const char *units = show_units ? " secs.nsecs" : ""; + struct timespec64 ts; + const char *field; + + field = (colon && colon[0]) ? "snapshot_time:" : "snapshot_time"; + ts = ktime_to_timespec64(now); + seq_printf(seq, "%s%-*s %llu.%09lu%s\n", prefix, width, field, + (s64)ts.tv_sec, ts.tv_nsec, units); + + field = (colon && colon[0]) ? "start_time:" : "start_time"; + ts = ktime_to_timespec64(ts_init); + seq_printf(seq, "%s%-*s %llu.%09lu%s\n", prefix, width, field, + (s64)ts.tv_sec, ts.tv_nsec, units); + + field = (colon && colon[0]) ? "elapsed_time:" : "elapsed_time"; + ts = ktime_to_timespec64(ktime_sub(now, ts_init)); + seq_printf(seq, "%s%-*s %llu.%09lu%s\n", prefix, width, field, + (s64)ts.tv_sec, ts.tv_nsec, units); +} +EXPORT_SYMBOL(lprocfs_stats_header); + /* seq file export of one lprocfs counter */ static int lprocfs_stats_seq_show(struct seq_file *p, void *v) { @@ -1373,13 +1412,9 @@ static int lprocfs_stats_seq_show(struct seq_file *p, void *v) struct lprocfs_counter ctr; int idx = *(loff_t *)v; - if (idx == 0) { - struct timespec64 now; - - ktime_get_real_ts64(&now); - seq_printf(p, "%-25s %llu.%09lu secs.nsecs\n", - "snapshot_time", (s64)now.tv_sec, now.tv_nsec); - } + if (idx == 0) + lprocfs_stats_header(p, ktime_get_real(), stats->ls_init, 25, + "", true, ""); hdr = &stats->ls_cnt_header[idx]; lprocfs_stats_collect(stats, idx, &ctr); diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index 4ed63fc..a61300e 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -703,18 +703,15 @@ struct lprocfs_vars lprocfs_osc_obd_vars[] = { static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) { - struct timespec64 now; struct obd_device *obd = seq->private; struct client_obd *cli = &obd->u.cli; unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum; int i; - ktime_get_real_ts64(&now); - spin_lock(&cli->cl_loi_list_lock); - seq_printf(seq, "snapshot_time: %lld.%09lu (secs.nsecs)\n", - (s64)now.tv_sec, now.tv_nsec); + lprocfs_stats_header(seq, ktime_get_real(), cli->cl_stats_init, 25, + ":", true, ""); seq_printf(seq, "read RPCs in flight: %d\n", cli->cl_r_in_flight); seq_printf(seq, "write RPCs in flight: %d\n", @@ -800,7 +797,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) static ssize_t osc_rpc_stats_seq_write(struct file *file, const char __user *buf, - size_t len, loff_t *off) + size_t len, loff_t *off) { struct seq_file *seq = file->private_data; struct obd_device *obd = seq->private; @@ -812,6 +809,7 @@ static ssize_t osc_rpc_stats_seq_write(struct file *file, lprocfs_oh_clear(&cli->cl_write_page_hist); lprocfs_oh_clear(&cli->cl_read_offset_hist); lprocfs_oh_clear(&cli->cl_write_offset_hist); + cli->cl_stats_init = ktime_get_real(); return len; } @@ -819,14 +817,11 @@ LPROC_SEQ_FOPS(osc_rpc_stats); static int osc_stats_seq_show(struct seq_file *seq, void *v) { - struct timespec64 now; struct obd_device *obd = seq->private; - struct osc_stats *stats = &obd2osc_dev(obd)->od_stats; - - ktime_get_real_ts64(&now); + struct osc_stats *stats = &obd2osc_dev(obd)->osc_stats; - seq_printf(seq, "snapshot_time: %lld.%09lu (secs.nsecs)\n", - (s64)now.tv_sec, now.tv_nsec); + lprocfs_stats_header(seq, ktime_get_real(), stats->os_init, 25, ":", + true, ""); seq_printf(seq, "lockless_write_bytes\t\t%llu\n", stats->os_lockless_writes); seq_printf(seq, "lockless_read_bytes\t\t%llu\n", @@ -840,9 +835,11 @@ static ssize_t osc_stats_seq_write(struct file *file, { struct seq_file *seq = file->private_data; struct obd_device *obd = seq->private; - struct osc_stats *stats = &obd2osc_dev(obd)->od_stats; + struct osc_stats *stats = &obd2osc_dev(obd)->osc_stats; memset(stats, 0, sizeof(*stats)); + stats->os_init = ktime_get_real(); + return len; } diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index 66bf03b..e98d3a6 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -1351,7 +1351,7 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap, /* statistic */ if (rc == 0 && srvlock) { struct lu_device *ld = opg->ops_cl.cpl_obj->co_lu.lo_dev; - struct osc_stats *stats = &lu2osc_dev(ld)->od_stats; + struct osc_stats *stats = &lu2osc_dev(ld)->osc_stats; size_t bytes = oap->oap_count; if (crt == CRT_READ) diff --git a/lustre/osc/osc_dev.c b/lustre/osc/osc_dev.c index 68d8a17..0686bed 100644 --- a/lustre/osc/osc_dev.c +++ b/lustre/osc/osc_dev.c @@ -191,41 +191,42 @@ EXPORT_SYMBOL(osc_device_fini); struct lu_device *osc_device_free(const struct lu_env *env, struct lu_device *d) { - struct osc_device *od = lu2osc_dev(d); + struct osc_device *oc = lu2osc_dev(d); cl_device_fini(lu2cl_dev(d)); - OBD_FREE_PTR(od); + OBD_FREE_PTR(oc); return NULL; } EXPORT_SYMBOL(osc_device_free); static struct lu_device *osc_device_alloc(const struct lu_env *env, - struct lu_device_type *t, - struct lustre_cfg *cfg) + struct lu_device_type *t, + struct lustre_cfg *cfg) { - struct lu_device *d; - struct osc_device *od; - struct obd_device *obd; - int rc; - - OBD_ALLOC_PTR(od); - if (od == NULL) - RETURN(ERR_PTR(-ENOMEM)); - - cl_device_init(&od->od_cl, t); - d = osc2lu_dev(od); - d->ld_ops = &osc_lu_ops; - - /* Setup OSC OBD */ - obd = class_name2obd(lustre_cfg_string(cfg, 0)); - LASSERT(obd != NULL); - rc = osc_setup(obd, cfg); - if (rc) { - osc_device_free(env, d); - RETURN(ERR_PTR(rc)); - } - od->od_exp = obd->obd_self_export; - RETURN(d); + struct lu_device *d; + struct osc_device *osc; + struct obd_device *obd; + int rc; + + OBD_ALLOC_PTR(osc); + if (osc == NULL) + RETURN(ERR_PTR(-ENOMEM)); + + cl_device_init(&osc->osc_cl, t); + d = osc2lu_dev(osc); + d->ld_ops = &osc_lu_ops; + + /* Setup OSC OBD */ + obd = class_name2obd(lustre_cfg_string(cfg, 0)); + LASSERT(obd != NULL); + rc = osc_setup(obd, cfg); + if (rc) { + osc_device_free(env, d); + RETURN(ERR_PTR(rc)); + } + osc->osc_exp = obd->obd_self_export; + osc->osc_stats.os_init = ktime_get_real(); + RETURN(d); } static const struct lu_device_type_operations osc_device_type_ops = { diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index c9e074c..39ec7cb 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -151,9 +151,9 @@ void osd_fini_iobuf(struct osd_device *d, struct osd_iobuf *iobuf) iobuf->dr_elapsed_valid = 0; LASSERT(iobuf->dr_dev == d); LASSERT(iobuf->dr_frags > 0); - lprocfs_oh_tally(&d->od_brw_stats.hist[BRW_R_DIO_FRAGS+rw], + lprocfs_oh_tally(&d->od_brw_stats.bs_hist[BRW_R_DIO_FRAGS + rw], iobuf->dr_frags); - lprocfs_oh_tally_log2(&d->od_brw_stats.hist[BRW_R_IO_TIME+rw], + lprocfs_oh_tally_log2(&d->od_brw_stats.bs_hist[BRW_R_IO_TIME+rw], ktime_to_ms(iobuf->dr_elapsed)); } } @@ -230,8 +230,8 @@ static void dio_complete_routine(struct bio *bio, int error) static void record_start_io(struct osd_iobuf *iobuf, int size) { - struct osd_device *osd = iobuf->dr_dev; - struct obd_histogram *h = osd->od_brw_stats.hist; + struct osd_device *osd = iobuf->dr_dev; + struct obd_histogram *h = osd->od_brw_stats.bs_hist; iobuf->dr_frags++; atomic_inc(&iobuf->dr_numreqs); diff --git a/lustre/osd-ldiskfs/osd_lproc.c b/lustre/osd-ldiskfs/osd_lproc.c index 4175157..0a9c8b3 100644 --- a/lustre/osd-ldiskfs/osd_lproc.c +++ b/lustre/osd-ldiskfs/osd_lproc.c @@ -44,38 +44,38 @@ void osd_brw_stats_update(struct osd_device *osd, struct osd_iobuf *iobuf) { - struct brw_stats *s = &osd->od_brw_stats; + struct brw_stats *bs = &osd->od_brw_stats; sector_t *last_block = NULL; - struct page **pages = iobuf->dr_pages; - struct page *last_page = NULL; - unsigned long discont_pages = 0; - unsigned long discont_blocks = 0; + struct page **pages = iobuf->dr_pages; + struct page *last_page = NULL; + unsigned long discont_pages = 0; + unsigned long discont_blocks = 0; sector_t *blocks = iobuf->dr_blocks; - int i, nr_pages = iobuf->dr_npages; - int blocks_per_page; - int rw = iobuf->dr_rw; + int i, nr_pages = iobuf->dr_npages; + int blocks_per_page; + int rw = iobuf->dr_rw; - if (unlikely(nr_pages == 0)) - return; + if (unlikely(nr_pages == 0)) + return; blocks_per_page = PAGE_SIZE >> osd_sb(osd)->s_blocksize_bits; - lprocfs_oh_tally_log2(&s->hist[BRW_R_PAGES+rw], nr_pages); - - while (nr_pages-- > 0) { - if (last_page && (*pages)->index != (last_page->index + 1)) - discont_pages++; - last_page = *pages; - pages++; - for (i = 0; i < blocks_per_page; i++) { - if (last_block && *blocks != (*last_block + 1)) - discont_blocks++; - last_block = blocks++; - } - } + lprocfs_oh_tally_log2(&bs->bs_hist[BRW_R_PAGES + rw], nr_pages); + + while (nr_pages-- > 0) { + if (last_page && (*pages)->index != (last_page->index + 1)) + discont_pages++; + last_page = *pages; + pages++; + for (i = 0; i < blocks_per_page; i++) { + if (last_block && *blocks != (*last_block + 1)) + discont_blocks++; + last_block = blocks++; + } + } - lprocfs_oh_tally(&s->hist[BRW_R_DISCONT_PAGES+rw], discont_pages); - lprocfs_oh_tally(&s->hist[BRW_R_DISCONT_BLOCKS+rw], discont_blocks); + lprocfs_oh_tally(&bs->bs_hist[BRW_R_DISCONT_PAGES+rw], discont_pages); + lprocfs_oh_tally(&bs->bs_hist[BRW_R_DISCONT_BLOCKS+rw], discont_blocks); } static void display_brw_stats(struct seq_file *seq, char *name, char *units, @@ -118,41 +118,37 @@ static void display_brw_stats(struct seq_file *seq, char *name, char *units, static void brw_stats_show(struct seq_file *seq, struct brw_stats *brw_stats) { - struct timespec64 now; - /* this sampling races with updates */ - ktime_get_real_ts64(&now); + lprocfs_stats_header(seq, ktime_get_real(), brw_stats->bs_init, 25, + ":", true, ""); - seq_printf(seq, "snapshot_time: %lld.%09ld (secs.nsecs)\n", - (s64)now.tv_sec, now.tv_nsec); + display_brw_stats(seq, "pages per bulk r/w", "rpcs", + &brw_stats->bs_hist[BRW_R_PAGES], + &brw_stats->bs_hist[BRW_W_PAGES], 1); - display_brw_stats(seq, "pages per bulk r/w", "rpcs", - &brw_stats->hist[BRW_R_PAGES], - &brw_stats->hist[BRW_W_PAGES], 1); + display_brw_stats(seq, "discontiguous pages", "rpcs", + &brw_stats->bs_hist[BRW_R_DISCONT_PAGES], + &brw_stats->bs_hist[BRW_W_DISCONT_PAGES], 0); - display_brw_stats(seq, "discontiguous pages", "rpcs", - &brw_stats->hist[BRW_R_DISCONT_PAGES], - &brw_stats->hist[BRW_W_DISCONT_PAGES], 0); + display_brw_stats(seq, "discontiguous blocks", "rpcs", + &brw_stats->bs_hist[BRW_R_DISCONT_BLOCKS], + &brw_stats->bs_hist[BRW_W_DISCONT_BLOCKS], 0); - display_brw_stats(seq, "discontiguous blocks", "rpcs", - &brw_stats->hist[BRW_R_DISCONT_BLOCKS], - &brw_stats->hist[BRW_W_DISCONT_BLOCKS], 0); + display_brw_stats(seq, "disk fragmented I/Os", "ios", + &brw_stats->bs_hist[BRW_R_DIO_FRAGS], + &brw_stats->bs_hist[BRW_W_DIO_FRAGS], 0); - display_brw_stats(seq, "disk fragmented I/Os", "ios", - &brw_stats->hist[BRW_R_DIO_FRAGS], - &brw_stats->hist[BRW_W_DIO_FRAGS], 0); - - display_brw_stats(seq, "disk I/Os in flight", "ios", - &brw_stats->hist[BRW_R_RPC_HIST], - &brw_stats->hist[BRW_W_RPC_HIST], 0); + display_brw_stats(seq, "disk I/Os in flight", "ios", + &brw_stats->bs_hist[BRW_R_RPC_HIST], + &brw_stats->bs_hist[BRW_W_RPC_HIST], 0); display_brw_stats(seq, "I/O time (1/1000s)", "ios", - &brw_stats->hist[BRW_R_IO_TIME], - &brw_stats->hist[BRW_W_IO_TIME], 1); + &brw_stats->bs_hist[BRW_R_IO_TIME], + &brw_stats->bs_hist[BRW_W_IO_TIME], 1); - display_brw_stats(seq, "disk I/O size", "ios", - &brw_stats->hist[BRW_R_DISK_IOSIZE], - &brw_stats->hist[BRW_W_DISK_IOSIZE], 1); + display_brw_stats(seq, "disk I/O size", "ios", + &brw_stats->bs_hist[BRW_R_DISK_IOSIZE], + &brw_stats->bs_hist[BRW_W_DISK_IOSIZE], 1); } static int osd_brw_stats_seq_show(struct seq_file *seq, void *v) @@ -169,24 +165,27 @@ static ssize_t osd_brw_stats_seq_write(struct file *file, size_t len, loff_t *off) { struct seq_file *seq = file->private_data; - struct osd_device *osd = seq->private; - int i; + struct osd_device *osd = seq->private; + int i; - for (i = 0; i < BRW_LAST; i++) - lprocfs_oh_clear(&osd->od_brw_stats.hist[i]); + for (i = 0; i < BRW_LAST; i++) + lprocfs_oh_clear(&osd->od_brw_stats.bs_hist[i]); + osd->od_brw_stats.bs_init = ktime_get_real(); - return len; + return len; } LPROC_SEQ_FOPS(osd_brw_stats); static int osd_stats_init(struct osd_device *osd) { - int i, result; - ENTRY; + int i, result; + + ENTRY; + osd->od_brw_stats.bs_init = ktime_get_real(); - for (i = 0; i < BRW_LAST; i++) - spin_lock_init(&osd->od_brw_stats.hist[i].oh_lock); + for (i = 0; i < BRW_LAST; i++) + spin_lock_init(&osd->od_brw_stats.bs_hist[i].oh_lock); osd->od_stats = lprocfs_alloc_stats(LPROC_OSD_LAST, 0); if (osd->od_stats != NULL) { diff --git a/lustre/osd-zfs/osd_io.c b/lustre/osd-zfs/osd_io.c index cec9127..b183a1e 100644 --- a/lustre/osd-zfs/osd_io.c +++ b/lustre/osd-zfs/osd_io.c @@ -71,27 +71,25 @@ static void dbuf_set_pending_evict(dmu_buf_t *db) static void record_start_io(struct osd_device *osd, int rw, int discont_pages) { - struct obd_histogram *h = osd->od_brw_stats.hist; + struct obd_histogram *h = osd->od_brw_stats.bs_hist; if (rw == READ) { atomic_inc(&osd->od_r_in_flight); lprocfs_oh_tally(&h[BRW_R_RPC_HIST], atomic_read(&osd->od_r_in_flight)); lprocfs_oh_tally(&h[BRW_R_DISCONT_PAGES], discont_pages); - } else { atomic_inc(&osd->od_w_in_flight); lprocfs_oh_tally(&h[BRW_W_RPC_HIST], atomic_read(&osd->od_w_in_flight)); lprocfs_oh_tally(&h[BRW_W_DISCONT_PAGES], discont_pages); - } } static void record_end_io(struct osd_device *osd, int rw, unsigned long elapsed, int disksize, int npages) { - struct obd_histogram *h = osd->od_brw_stats.hist; + struct obd_histogram *h = osd->od_brw_stats.bs_hist; if (rw == READ) atomic_dec(&osd->od_r_in_flight); diff --git a/lustre/osd-zfs/osd_lproc.c b/lustre/osd-zfs/osd_lproc.c index 0fa9f88..ed2f1dd 100644 --- a/lustre/osd-zfs/osd_lproc.c +++ b/lustre/osd-zfs/osd_lproc.c @@ -87,39 +87,37 @@ static void display_brw_stats(struct seq_file *seq, char *name, char *units, static void brw_stats_show(struct seq_file *seq, struct brw_stats *brw_stats) { - struct timespec64 now; - /* this sampling races with updates */ - ktime_get_real_ts64(&now); - seq_printf(seq, "snapshot_time: %llu.%09lu (secs.nsecs)\n", - (s64)now.tv_sec, now.tv_nsec); + lprocfs_stats_header(seq, ktime_get_real(), brw_stats->bs_init, 25, + ":", true, ""); display_brw_stats(seq, "pages per bulk r/w", "rpcs", - &brw_stats->hist[BRW_R_PAGES], - &brw_stats->hist[BRW_W_PAGES], 1); + &brw_stats->bs_hist[BRW_R_PAGES], + &brw_stats->bs_hist[BRW_W_PAGES], 1); + display_brw_stats(seq, "discontiguous pages", "rpcs", - &brw_stats->hist[BRW_R_DISCONT_PAGES], - &brw_stats->hist[BRW_W_DISCONT_PAGES], 0); + &brw_stats->bs_hist[BRW_R_DISCONT_PAGES], + &brw_stats->bs_hist[BRW_W_DISCONT_PAGES], 0); #if 0 display_brw_stats(seq, "discontiguous blocks", "rpcs", - &brw_stats->hist[BRW_R_DISCONT_BLOCKS], - &brw_stats->hist[BRW_W_DISCONT_BLOCKS], 0); + &brw_stats->bs_hist[BRW_R_DISCONT_BLOCKS], + &brw_stats->bs_hist[BRW_W_DISCONT_BLOCKS], 0); display_brw_stats(seq, "disk fragmented I/Os", "ios", - &brw_stats->hist[BRW_R_DIO_FRAGS], - &brw_stats->hist[BRW_W_DIO_FRAGS], 0); + &brw_stats->bs_hist[BRW_R_DIO_FRAGS], + &brw_stats->bs_hist[BRW_W_DIO_FRAGS], 0); #endif display_brw_stats(seq, "disk I/Os in flight", "ios", - &brw_stats->hist[BRW_R_RPC_HIST], - &brw_stats->hist[BRW_W_RPC_HIST], 0); + &brw_stats->bs_hist[BRW_R_RPC_HIST], + &brw_stats->bs_hist[BRW_W_RPC_HIST], 0); display_brw_stats(seq, "I/O time (1/1000s)", "ios", - &brw_stats->hist[BRW_R_IO_TIME], - &brw_stats->hist[BRW_W_IO_TIME], 1); + &brw_stats->bs_hist[BRW_R_IO_TIME], + &brw_stats->bs_hist[BRW_W_IO_TIME], 1); display_brw_stats(seq, "disk I/O size", "ios", - &brw_stats->hist[BRW_R_DISK_IOSIZE], - &brw_stats->hist[BRW_W_DISK_IOSIZE], 1); + &brw_stats->bs_hist[BRW_R_DISK_IOSIZE], + &brw_stats->bs_hist[BRW_W_DISK_IOSIZE], 1); } static int osd_brw_stats_seq_show(struct seq_file *seq, void *v) @@ -140,7 +138,7 @@ static ssize_t osd_brw_stats_seq_write(struct file *file, int i; for (i = 0; i < BRW_LAST; i++) - lprocfs_oh_clear(&osd->od_brw_stats.hist[i]); + lprocfs_oh_clear(&osd->od_brw_stats.bs_hist[i]); return len; } @@ -153,7 +151,7 @@ static int osd_stats_init(struct osd_device *osd) ENTRY; for (i = 0; i < BRW_LAST; i++) - spin_lock_init(&osd->od_brw_stats.hist[i].oh_lock); + spin_lock_init(&osd->od_brw_stats.bs_hist[i].oh_lock); osd->od_stats = lprocfs_alloc_stats(LPROC_OSD_LAST, 0); if (osd->od_stats != NULL) { diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 100bd0c..c003379 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -13232,20 +13232,44 @@ run_test 126 "check that the fsgid provided by the client is taken into account" test_127a() { # bug 15521 [ $PARALLEL == "yes" ] && skip "skip parallel run" local name count samp unit min max sum sumsq + local tmpfile=$TMP/$tfile.tmp $LFS setstripe -i 0 -c 1 $DIR/$tfile || error "setstripe failed" echo "stats before reset" - $LCTL get_param osc.*.stats + stack_trap "rm -f $tmpfile" + local now=$(date +%s) + + $LCTL get_param osc.*.stats | tee $tmpfile + + local snapshot_time=$(awk '/snapshot_time/ { print $2; exit }' $tmpfile) + local start_time=$(awk '/start_time/ { print $2; exit }' $tmpfile) + local elapsed=$(awk '/elapsed_time/ { print $2; exit }' $tmpfile) + local uptime=$(awk '{ print $1 }' /proc/uptime) + + # snapshot_time should match POSIX epoch time, allow some delta for VMs + (( ${snapshot_time%\.*} >= $now - 5 && + ${snapshot_time%\.*} <= $now + 5 )) || + error "snapshot_time=$snapshot_time != now=$now" + # elapsed _should_ be from mount, but at least less than uptime + (( ${elapsed%\.*} < ${uptime%\.*} )) || + error "elapsed=$elapsed > uptime=$uptime" + (( ${snapshot_time%\.*} - ${start_time%\.*} >= ${elapsed%\.*} - 2 && + ${snapshot_time%\.*} - ${start_time%\.*} <= ${elapsed%\.*} + 2 )) || + error "elapsed=$elapsed != $snapshot_time - $start_time" + $LCTL set_param osc.*.stats=0 + local reset=$(date +%s) local fsize=$((2048 * 1024)) dd if=/dev/zero of=$DIR/$tfile bs=$fsize count=1 cancel_lru_locks osc dd if=$DIR/$tfile of=/dev/null bs=$fsize - $LCTL get_param osc.*0000-osc-*.stats | grep samples > $DIR/$tfile.tmp - stack_trap "rm -f $TMP/$tfile.tmp" + now=$(date +%s) + $LCTL get_param osc.*0000-osc-*.stats > $tmpfile while read name count samp unit min max sum sumsq; do + [[ "$samp" == "samples" ]] || continue + echo "got name=$name count=$count unit=$unit min=$min max=$max" [ ! $min ] && error "Missing min value for $name proc entry" eval $name=$count || error "Wrong proc format" @@ -13270,13 +13294,29 @@ test_127a() { # bug 15521 ;; *) ;; esac - done < $DIR/$tfile.tmp + done < $tmpfile #check that we actually got some stats [ "$read_bytes" ] || error "Missing read_bytes stats" [ "$write_bytes" ] || error "Missing write_bytes stats" [ "$read_bytes" != 0 ] || error "no read done" [ "$write_bytes" != 0 ] || error "no write done" + + snapshot_time=$(awk '/snapshot_time/ { print $2; exit }' $tmpfile) + start_time=$(awk '/start_time/ { print $2; exit }' $tmpfile) + elapsed=$(awk '/elapsed_time/ { print $2; exit }' $tmpfile) + + # snapshot_time should match POSIX epoch time, allow some delta for VMs + (( ${snapshot_time%\.*} >= $now - 5 && + ${snapshot_time%\.*} <= $now + 5 )) || + error "reset snapshot_time=$snapshot_time != now=$now" + # elapsed should be from time of stats reset + (( ${elapsed%\.*} >= $now - $reset - 2 && + ${elapsed%\.*} <= $now - $reset + 2 )) || + error "reset elapsed=$elapsed > $now - $reset" + (( ${snapshot_time%\.*} - ${start_time%\.*} >= ${elapsed%\.*} - 2 && + ${snapshot_time%\.*} - ${start_time%\.*} <= ${elapsed%\.*} + 2 )) || + error "reset elapsed=$elapsed != $snapshot_time - $start_time" } run_test 127a "verify the client stats are sane" diff --git a/lustre/utils/llobdstat b/lustre/utils/llobdstat index 7a3a4ba..ddd1514 100755 --- a/lustre/utils/llobdstat +++ b/lustre/utils/llobdstat @@ -142,7 +142,7 @@ sub readstat() if (defined($prevcount)) { $last{$name} = $prevcount; } - if ($name =~ /^$timestamp$/) { + if ($name =~ /^$timestamp/) { $cur{$name} = $iodata[1]; } elsif ($name =~ /^$read_bytes$/) { if (defined($cur{"read_ops"})) { diff --git a/lustre/utils/llstat b/lustre/utils/llstat index 63dea6a..adbf028 100755 --- a/lustre/utils/llstat +++ b/lustre/utils/llstat @@ -159,7 +159,7 @@ sub readstat() $prevcount = $cumulhash->{$name}; if (defined($prevcount)) { $diff = $cumulcount - $prevcount; - if ($name eq "snapshot_time") { + if ($name =~ /^snapshot_time/) { $tdiff = int($diff); &print_headings($cumulcount); $| = 1; -- 1.8.3.1