Whamcloud - gitweb
New tag 2.15.63
[fs/lustre-release.git] / lustre / obdclass / lprocfs_jobstats.c
index 0072feb..c2d27e2 100644 (file)
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
 /*
- * Copyright (c) 2011 Whamcloud, Inc.
+ * Copyright (c) 2012, 2016, Intel Corporation.
  * Use is subject to license terms.
  *
  * Author: Niu Yawei <niu@whamcloud.com>
  * lustre/obdclass/lprocfs_jobstats.c
  */
 
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
 #define DEBUG_SUBSYSTEM S_CLASS
 
-#ifndef __KERNEL__
-# include <liblustre.h>
-#endif
-
 #include <obd_class.h>
 #include <lprocfs_status.h>
-#include <lustre/lustre_idl.h>
 
-#if defined(LPROCFS)
+#ifdef CONFIG_PROC_FS
 
 /*
  * JobID formats & JobID environment variable names for supported
  */
 
 struct job_stat {
-       cfs_hlist_node_t      js_hash;
-       cfs_list_t            js_list;
-       cfs_atomic_t          js_refcount;
-       char                  js_jobid[JOBSTATS_JOBID_SIZE];
-       time_t                js_timestamp; /* seconds */
-       struct lprocfs_stats *js_stats;
-       struct obd_job_stats *js_jobstats;
+       struct hlist_node       js_hash;        /* hash struct for this jobid */
+       struct list_head        js_list;        /* on ojs_list, with ojs_lock */
+       struct kref             js_refcount;    /* num users of this struct */
+       char                    js_jobid[LUSTRE_JOBID_SIZE]; /* job name + NUL*/
+       ktime_t                 js_time_init;   /* time of initial stat*/
+       ktime_t                 js_time_latest; /* time of most recent stat*/
+       struct lprocfs_stats    *js_stats;      /* per-job statistics */
+       struct obd_job_stats    *js_jobstats;   /* for accessing ojs_lock */
+       struct rcu_head         js_rcu;         /* RCU head for job_reclaim_rcu*/
 };
 
-static unsigned job_stat_hash(cfs_hash_t *hs, const void *key, unsigned mask)
+static unsigned int
+job_stat_hash(struct cfs_hash *hs, const void *key, const unsigned int bits)
 {
-       return cfs_hash_djb2_hash(key, strlen(key), mask);
+       return cfs_hash_djb2_hash(key, strlen(key), bits);
 }
 
-static void *job_stat_key(cfs_hlist_node_t *hnode)
+static void *job_stat_key(struct hlist_node *hnode)
 {
        struct job_stat *job;
-       job = cfs_hlist_entry(hnode, struct job_stat, js_hash);
+       job = hlist_entry(hnode, struct job_stat, js_hash);
        return job->js_jobid;
 }
 
-static int job_stat_keycmp(const void *key, cfs_hlist_node_t *hnode)
+static int job_stat_keycmp(const void *key, struct hlist_node *hnode)
 {
        struct job_stat *job;
-       job = cfs_hlist_entry(hnode, struct job_stat, js_hash);
+       job = hlist_entry(hnode, struct job_stat, js_hash);
        return (strlen(job->js_jobid) == strlen(key)) &&
               !strncmp(job->js_jobid, key, strlen(key));
 }
 
-static void *job_stat_object(cfs_hlist_node_t *hnode)
+static void *job_stat_object(struct hlist_node *hnode)
 {
-       return cfs_hlist_entry(hnode, struct job_stat, js_hash);
+       return hlist_entry(hnode, struct job_stat, js_hash);
 }
 
-static void job_stat_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
+static bool job_getref_try(struct job_stat *job)
 {
-       struct job_stat *job;
-       job = cfs_hlist_entry(hnode, struct job_stat, js_hash);
-       cfs_atomic_inc(&job->js_refcount);
+       return kref_get_unless_zero(&job->js_refcount);
 }
 
-static void job_free(struct job_stat *job)
+static void job_stat_get(struct cfs_hash *hs, struct hlist_node *hnode)
 {
-       LASSERT(atomic_read(&job->js_refcount) == 0);
-       LASSERT(job->js_jobstats);
+       struct job_stat *job;
+       job = hlist_entry(hnode, struct job_stat, js_hash);
+       kref_get(&job->js_refcount);
+}
 
-       cfs_write_lock(&job->js_jobstats->ojs_lock);
-       cfs_list_del_init(&job->js_list);
-       cfs_write_unlock(&job->js_jobstats->ojs_lock);
+static void job_reclaim_rcu(struct rcu_head *head)
+{
+       struct job_stat *job = container_of(head, typeof(*job), js_rcu);
 
-       lprocfs_free_stats(&job->js_stats);
+       lprocfs_stats_free(&job->js_stats);
        OBD_FREE_PTR(job);
 }
 
+static void job_free(struct kref *kref)
+{
+       struct job_stat *job = container_of(kref, struct job_stat,
+                                           js_refcount);
+
+       LASSERT(job->js_jobstats != NULL);
+       spin_lock(&job->js_jobstats->ojs_lock);
+       list_del_rcu(&job->js_list);
+       spin_unlock(&job->js_jobstats->ojs_lock);
+
+       call_rcu(&job->js_rcu, job_reclaim_rcu);
+}
+
 static void job_putref(struct job_stat *job)
 {
-       LASSERT(atomic_read(&job->js_refcount) > 0);
-       if (atomic_dec_and_test(&job->js_refcount))
-               job_free(job);
+       LASSERT(kref_read(&job->js_refcount) > 0);
+       kref_put(&job->js_refcount, job_free);
 }
 
-static void job_stat_put_locked(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
+static void job_stat_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
 {
        struct job_stat *job;
-       job = cfs_hlist_entry(hnode, struct job_stat, js_hash);
+
+       job = hlist_entry(hnode, struct job_stat, js_hash);
        job_putref(job);
 }
 
-static void job_stat_exit(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
+static void job_stat_exit(struct cfs_hash *hs, struct hlist_node *hnode)
 {
-       CERROR("Should not have any items!");
+       CERROR("should not have any items\n");
 }
 
-static cfs_hash_ops_t job_stats_hash_ops = {
+static struct cfs_hash_ops job_stats_hash_ops = {
        .hs_hash       = job_stat_hash,
        .hs_key        = job_stat_key,
        .hs_keycmp     = job_stat_keycmp,
@@ -153,30 +161,132 @@ static cfs_hash_ops_t job_stats_hash_ops = {
        .hs_exit       = job_stat_exit,
 };
 
-static struct job_stat *job_alloc(char *jobid, struct obd_job_stats *jobs)
+/**
+ * Jobstats expiry iterator to clean up old jobids
+ *
+ * Called for each job_stat structure on this device, it should delete stats
+ * older than the specified \a oldest_time in seconds.  If \a oldest_time is
+ * in the future then this will delete all statistics (e.g. during shutdown).
+ *
+ * \param[in] hs       hash of all jobids on this device
+ * \param[in] bd       hash bucket containing this jobid
+ * \param[in] hnode    hash structure for this jobid
+ * \param[in] data     pointer to stats expiry time in seconds
+ */
+static int job_cleanup_iter_callback(struct cfs_hash *hs,
+                                    struct cfs_hash_bd *bd,
+                                    struct hlist_node *hnode, void *data)
 {
+       ktime_t oldest_time = *((ktime_t *)data);
        struct job_stat *job;
 
-       LASSERT(jobs->ojs_cntr_num && jobs->ojs_cntr_init_fn);
+       job = hlist_entry(hnode, struct job_stat, js_hash);
+       if (ktime_before(job->js_time_latest, oldest_time))
+               cfs_hash_bd_del_locked(hs, bd, hnode);
+
+       return 0;
+}
+
+/**
+ * Clean up jobstats that were updated more than \a before seconds ago.
+ *
+ * Since this function may be called frequently, do not scan all of the
+ * jobstats on each call, only twice per cleanup interval.  That means stats
+ * may be on average around cleanup_interval / 4 older than the cleanup
+ * interval, but that is not considered harmful.
+ *
+ * The value stored in ojs_cleanup_interval is how often to perform a cleanup
+ * scan, and 1/2 of the maximum age of the individual statistics.  This is
+ * done rather than dividing the interval by two each time, because it is
+ * much easier to do the division when the value is initially set (in seconds)
+ * rather than after it has been converted to ktime_t, and maybe a bit faster.
+ *
+ * If \a clear is true then this will force clean up all jobstats
+ * (e.g. at shutdown).
+ *
+ * If there is already another thread doing jobstats cleanup, don't try to
+ * do this again in the current thread unless this is a force cleanup.
+ *
+ * \param[in] stats    stucture tracking all job stats for this device
+ * \param[in] clear    clear all job stats if true
+ */
+static void lprocfs_job_cleanup(struct obd_job_stats *stats, bool clear)
+{
+       ktime_t cleanup_interval = stats->ojs_cleanup_interval;
+       ktime_t now = ktime_get_real();
+       ktime_t oldest;
+
+       if (likely(!clear)) {
+               /* ojs_cleanup_interval of zero means never clean up stats */
+               if (ktime_to_ns(cleanup_interval) == 0)
+                       return;
+
+               if (ktime_before(now, ktime_add(stats->ojs_cleanup_last,
+                                               cleanup_interval)))
+                       return;
+
+               if (stats->ojs_cleaning)
+                       return;
+       }
+
+       spin_lock(&stats->ojs_lock);
+       if (!clear && stats->ojs_cleaning) {
+               spin_unlock(&stats->ojs_lock);
+               return;
+       }
+
+       stats->ojs_cleaning = true;
+       spin_unlock(&stats->ojs_lock);
+
+       /* Can't hold ojs_lock over hash iteration, since it is grabbed by
+        * job_cleanup_iter_callback()
+        *   ->cfs_hash_bd_del_locked()
+        *     ->job_putref()
+        *       ->job_free()
+        *
+        * Holding ojs_lock isn't necessary for safety of the hash iteration,
+        * since locking of the hash is handled internally, but there isn't
+        * any benefit to having multiple threads doing cleanup at one time.
+        *
+        * Subtract or add twice the cleanup_interval, since it is 1/2 the
+        * maximum age.  When clearing all stats, push oldest into the future.
+        */
+       cleanup_interval = ktime_add(cleanup_interval, cleanup_interval);
+       if (likely(!clear))
+               oldest = ktime_sub(now, cleanup_interval);
+       else
+               oldest = ktime_add(now, cleanup_interval);
+       cfs_hash_for_each_safe(stats->ojs_hash, job_cleanup_iter_callback,
+                              &oldest);
+
+       spin_lock(&stats->ojs_lock);
+       stats->ojs_cleaning = false;
+       stats->ojs_cleanup_last = ktime_get_real();
+       spin_unlock(&stats->ojs_lock);
+}
+
+static struct job_stat *job_alloc(char *jobid, struct obd_job_stats *jobs)
+{
+       struct job_stat *job;
 
        OBD_ALLOC_PTR(job);
        if (job == NULL)
                return NULL;
 
-       job->js_stats = lprocfs_alloc_stats(jobs->ojs_cntr_num, 0);
+       job->js_stats = lprocfs_stats_alloc(jobs->ojs_cntr_num, 0);
        if (job->js_stats == NULL) {
                OBD_FREE_PTR(job);
                return NULL;
        }
 
-       jobs->ojs_cntr_init_fn(job->js_stats);
+       jobs->ojs_cntr_init_fn(job->js_stats, 0, 0);
 
-       memcpy(job->js_jobid, jobid, JOBSTATS_JOBID_SIZE);
-       job->js_timestamp = cfs_time_current_sec();
+       memcpy(job->js_jobid, jobid, sizeof(job->js_jobid));
+       job->js_time_latest = job->js_stats->ls_init;
        job->js_jobstats = jobs;
-       CFS_INIT_HLIST_NODE(&job->js_hash);
-       CFS_INIT_LIST_HEAD(&job->js_list);
-       cfs_atomic_set(&job->js_refcount, 1);
+       INIT_HLIST_NODE(&job->js_hash);
+       INIT_LIST_HEAD(&job->js_list);
+       kref_init(&job->js_refcount);
 
        return job;
 }
@@ -184,18 +294,23 @@ static struct job_stat *job_alloc(char *jobid, struct obd_job_stats *jobs)
 int lprocfs_job_stats_log(struct obd_device *obd, char *jobid,
                          int event, long amount)
 {
-       struct obd_job_stats *stats = &obd->u.obt.obt_jobstats;
+       struct obd_job_stats *stats = &obd2obt(obd)->obt_jobstats;
        struct job_stat *job, *job2;
        ENTRY;
 
-       LASSERT(stats && stats->ojs_hash);
+       LASSERT(stats != NULL);
+       LASSERT(stats->ojs_hash != NULL);
 
-       if (!jobid || !strlen(jobid))
+       if (event >= stats->ojs_cntr_num)
                RETURN(-EINVAL);
 
-       if (strlen(jobid) >= JOBSTATS_JOBID_SIZE) {
-               CERROR("Invalid jobid size (%lu), expect(%d)\n",
-                      (unsigned long)strlen(jobid) + 1, JOBSTATS_JOBID_SIZE);
+       if (jobid == NULL || strlen(jobid) == 0)
+               RETURN(0);
+
+       /* unterminated jobid should be handled in lustre_msg_get_jobid() */
+       if (strlen(jobid) >= LUSTRE_JOBID_SIZE) {
+               CERROR("%s: invalid jobid size %lu, expect %d\n", obd->obd_name,
+                      (unsigned long)strlen(jobid) + 1, LUSTRE_JOBID_SIZE);
                RETURN(-EINVAL);
        }
 
@@ -203,6 +318,8 @@ int lprocfs_job_stats_log(struct obd_device *obd, char *jobid,
        if (job)
                goto found;
 
+       lprocfs_job_cleanup(stats, false);
+
        job = job_alloc(jobid, stats);
        if (job == NULL)
                RETURN(-ENOMEM);
@@ -212,64 +329,67 @@ int lprocfs_job_stats_log(struct obd_device *obd, char *jobid,
        if (job2 != job) {
                job_putref(job);
                job = job2;
-               LASSERT(!cfs_list_empty(&job->js_list));
+               /* We cannot LASSERT(!list_empty(&job->js_list)) here,
+                * since we just lost the race for inserting "job" into the
+                * ojs_list, and some other thread is doing it _right_now_.
+                * Instead, be content the other thread is doing this, since
+                * "job2" was initialized in job_alloc() already. LU-2163 */
        } else {
-               LASSERT(cfs_list_empty(&job->js_list));
-               cfs_write_lock(&stats->ojs_lock);
-               cfs_list_add_tail(&job->js_list, &stats->ojs_list);
-               cfs_write_unlock(&stats->ojs_lock);
+               LASSERT(list_empty(&job->js_list));
+               spin_lock(&stats->ojs_lock);
+               list_add_tail_rcu(&job->js_list, &stats->ojs_list);
+               spin_unlock(&stats->ojs_lock);
        }
 
 found:
        LASSERT(stats == job->js_jobstats);
-       LASSERT(stats->ojs_cntr_num > event);
-       job->js_timestamp = cfs_time_current_sec();
+       job->js_time_latest = ktime_get_real();
        lprocfs_counter_add(job->js_stats, event, amount);
 
        job_putref(job);
+
        RETURN(0);
 }
 EXPORT_SYMBOL(lprocfs_job_stats_log);
 
-static int job_iter_callback(cfs_hash_t *hs, cfs_hash_bd_t *bd,
-                            cfs_hlist_node_t *hnode, void *data)
-{
-       time_t oldest = *((time_t *)data);
-       struct job_stat *job;
-
-       job = cfs_hlist_entry(hnode, struct job_stat, js_hash);
-       if (!oldest || job->js_timestamp < oldest)
-               cfs_hash_bd_del_locked(hs, bd, hnode);
-
-       return 0;
-}
-
 void lprocfs_job_stats_fini(struct obd_device *obd)
 {
-       struct obd_job_stats *stats = &obd->u.obt.obt_jobstats;
-       time_t oldest = 0;
+       struct obd_job_stats *stats = &obd2obt(obd)->obt_jobstats;
 
        if (stats->ojs_hash == NULL)
                return;
-       cfs_timer_disarm(&stats->ojs_cleanup_timer);
-       cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback, &oldest);
+
+       lprocfs_job_cleanup(stats, true);
        cfs_hash_putref(stats->ojs_hash);
        stats->ojs_hash = NULL;
-       LASSERT(cfs_list_empty(&stats->ojs_list));
+       LASSERT(list_empty(&stats->ojs_list));
 }
 EXPORT_SYMBOL(lprocfs_job_stats_fini);
 
+
+struct lprocfs_jobstats_data {
+       struct obd_job_stats *pjd_stats;
+       loff_t pjd_last_pos;
+       struct job_stat *pjd_last_job;
+};
+
 static void *lprocfs_jobstats_seq_start(struct seq_file *p, loff_t *pos)
 {
-       struct obd_job_stats *stats = p->private;
+       struct lprocfs_jobstats_data *data = p->private;
+       struct obd_job_stats *stats = data->pjd_stats;
        loff_t off = *pos;
        struct job_stat *job;
 
-       cfs_read_lock(&stats->ojs_lock);
+       rcu_read_lock();
        if (off == 0)
                return SEQ_START_TOKEN;
+
+       /* if pos matches the offset of last saved job, start from saved job */
+       if (data->pjd_last_job && data->pjd_last_pos == off)
+               return data->pjd_last_job;
+
        off--;
-       cfs_list_for_each_entry(job, &stats->ojs_list, js_list) {
+       list_for_each_entry_rcu(job, &stats->ojs_list, js_list) {
                if (!off--)
                        return job;
        }
@@ -278,44 +398,66 @@ static void *lprocfs_jobstats_seq_start(struct seq_file *p, loff_t *pos)
 
 static void lprocfs_jobstats_seq_stop(struct seq_file *p, void *v)
 {
-       struct obd_job_stats *stats = p->private;
+       struct lprocfs_jobstats_data *data = p->private;
+       struct job_stat *job = NULL;
+
+       /* try to get a ref on current job (not deleted) */
+       if (v && v != SEQ_START_TOKEN && job_getref_try(v))
+               job = v;
+
+       rcu_read_unlock();
+
+       /* drop the ref on the old saved job */
+       if (data->pjd_last_job) {
+               job_putref(data->pjd_last_job);
+               data->pjd_last_job = NULL;
+       }
 
-       cfs_read_unlock(&stats->ojs_lock);
+       /* save the current job for the next read */
+       if (job)
+               data->pjd_last_job = job;
 }
 
 static void *lprocfs_jobstats_seq_next(struct seq_file *p, void *v, loff_t *pos)
 {
-       struct obd_job_stats *stats = p->private;
+       struct lprocfs_jobstats_data *data = p->private;
+       struct obd_job_stats *stats = data->pjd_stats;
        struct job_stat *job;
-       cfs_list_t *next;
+       struct list_head *cur;
 
        ++*pos;
+       data->pjd_last_pos = *pos;
        if (v == SEQ_START_TOKEN) {
-               next = stats->ojs_list.next;
+               cur = &stats->ojs_list;
        } else {
                job = (struct job_stat *)v;
-               next = job->js_list.next;
+               cur = &job->js_list;
        }
 
-       return next == &stats->ojs_list ? NULL :
-               cfs_list_entry(next, struct job_stat, js_list);
+       job = list_entry_rcu(cur->next, struct job_stat, js_list);
+       if (&job->js_list == &stats->ojs_list)
+               return NULL;
+
+       return job;
 }
 
 /*
  * Example of output on MDT:
  *
  * job_stats:
- * - job_id:        test_id.222.25844
- *   snapshot_time: 1322494486
- *   open:          { samples:        3, unit: reqs }
- *   close:         { samples:        3, unit: reqs }
+ * - job_id:        dd.4854
+ *   snapshot_time: 1322494486.123456789
+ *   start_time:    1322494476.012345678
+ *   elapsed_time:  10.111111111
+ *   open:          { samples:        1, unit: reqs }
+ *   close:         { samples:        1, unit: reqs }
  *   mknod:         { samples:        0, unit: reqs }
  *   link:          { samples:        0, unit: reqs }
  *   unlink:        { samples:        0, unit: reqs }
  *   mkdir:         { samples:        0, unit: reqs }
  *   rmdir:         { samples:        0, unit: reqs }
- *   rename:        { samples:        1, unit: reqs }
- *   getattr:       { samples:        7, unit: reqs }
+ *   rename:        { samples:        0, unit: reqs }
+ *   getattr:       { samples:        1, unit: reqs }
  *   setattr:       { samples:        0, unit: reqs }
  *   getxattr:      { samples:        0, unit: reqs }
  *   setxattr:      { samples:        0, unit: reqs }
@@ -325,13 +467,15 @@ static void *lprocfs_jobstats_seq_next(struct seq_file *p, void *v, loff_t *pos)
  * Example of output on OST:
  *
  * job_stats:
- * - job_id         4854
- *   snapshot_time: 1322494602
- *   read:          { samples:  0, unit: bytes, min:  0, max:  0, sum:  0 }
- *   write:         { samples:  1, unit: bytes, min: 10, max: 10, sum: 10 }
- *   setattr:       { samples:  0, unit: reqs }
- *   punch:         { samples:  0, unit: reqs }
- *   sync:          { samples:  0, unit: reqs }
+ * - job_id         dd.4854
+ *   snapshot_time: 1322494602.123456789
+ *   start_time:    1322494592.987654321
+ *   elapsed_time:  9.135802468
+ *   read:          { samples: 0, unit: bytes, min:  0, max:  0, sum:  0 }
+ *   write:         { samples: 1, unit: bytes, min: 4096, max: 4096, sum: 4096 }
+ *   setattr:       { samples: 0, unit: reqs }
+ *   punch:         { samples: 0, unit: reqs }
+ *   sync:          { samples: 0, unit: reqs }
  */
 
 static const char spaces[] = "                    ";
@@ -345,103 +489,190 @@ static int lprocfs_jobstats_seq_show(struct seq_file *p, void *v)
 {
        struct job_stat *job = v;
        struct lprocfs_stats *s;
-       struct lprocfs_counter ret, *cntr;
-       int i;
+       struct lprocfs_counter ret;
+       struct lprocfs_counter_header *cntr_header;
+       char escaped[LUSTRE_JOBID_SIZE * 4] = "";
+       char *quote = "", *c, *end;
+       int i, joblen = 0;
 
        if (v == SEQ_START_TOKEN) {
-               seq_printf(p, "job_stats:\n");
+               seq_puts(p, "job_stats:\n");
                return 0;
        }
 
-       seq_printf(p, "- %-16s %s\n", "job_id:", job->js_jobid);
-       seq_printf(p, "  %-16s %ld\n", "snapshot_time:", job->js_timestamp);
+       /* Quote and escape jobid characters to escape hex codes "\xHH" if
+        * it contains any non-standard characters (space, newline, etc),
+        * so it will be confined to single line and not break parsing.
+        */
+       for (c = job->js_jobid, end = job->js_jobid + sizeof(job->js_jobid);
+            c < end && *c != '\0';
+            c++, joblen++) {
+               if (!isalnum(*c) && strchr(".@-_:/", *c) == NULL) {
+                       quote = "\"";
+                       snprintf(escaped + joblen, sizeof(escaped), "\\x%02X",
+                                (unsigned char)*c);
+                       joblen += 3;
+               } else {
+                       escaped[joblen] = *c;
+                       /* if jobid has ':', it should be quoted too */
+                       if (*c == ':')
+                               quote = "\"";
+               }
+       }
+       /* '@' is reserved in YAML, so it cannot start a bare string. */
+       if (escaped[0] == '@')
+               quote = "\"";
+
+       seq_printf(p, "- %-16s %s%*s%s\n",
+                  "job_id:", quote, joblen, escaped, quote);
+       lprocfs_stats_header(p, job->js_time_latest, job->js_stats->ls_init,
+                            16, ":", true, "  ");
 
        s = job->js_stats;
        for (i = 0; i < s->ls_num; i++) {
-               cntr = &(s->ls_percpu[0]->lp_cntr[i]);
+               struct obd_histogram *hist;
+
+               cntr_header = &s->ls_cnt_header[i];
                lprocfs_stats_collect(s, i, &ret);
 
-               seq_printf(p, "  %s:%.*s { samples: %11"LPF64"u",
-                          cntr->lc_name, width(cntr->lc_name, 15), spaces,
+               seq_printf(p, "  %s:%.*s { samples: %11llu",
+                          cntr_header->lc_name,
+                          width(cntr_header->lc_name, 15), spaces,
                           ret.lc_count);
-               if (cntr->lc_units[0] != '\0')
-                       seq_printf(p, ", unit: %5s", cntr->lc_units);
+               if (cntr_header->lc_units[0] != '\0')
+                       seq_printf(p, ", unit: %5s", cntr_header->lc_units);
 
-               if (cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) {
-                       seq_printf(p, ", min:%8"LPF64"u, max:%8"LPF64"u,"
-                                  " sum:%16"LPF64"u",
+               if (cntr_header->lc_config & LPROCFS_CNTR_AVGMINMAX) {
+                       seq_printf(p, ", min: %8llu, max: %8llu, sum: %16llu",
                                   ret.lc_count ? ret.lc_min : 0,
                                   ret.lc_count ? ret.lc_max : 0,
                                   ret.lc_count ? ret.lc_sum : 0);
                }
-               if (cntr->lc_config & LPROCFS_CNTR_STDDEV) {
-                       seq_printf(p, ", sumsq: %18"LPF64"u",
+               if (cntr_header->lc_config & LPROCFS_CNTR_STDDEV) {
+                       seq_printf(p, ", sumsq: %18llu",
                                   ret.lc_count ? ret.lc_sumsquare : 0);
                }
 
-               seq_printf(p, " }\n");
-
+               /* show obd_histogram */
+               hist = s->ls_cnt_header[i].lc_hist;
+               if (hist != NULL) {
+                       bool first = true;
+                       int j;
+
+                       seq_puts(p, ", hist: { ");
+                       for (j = 0; j < ARRAY_SIZE(hist->oh_buckets); j++) {
+                               unsigned long val = hist->oh_buckets[j];
+
+                               if (val == 0)
+                                       continue;
+                               if (first)
+                                       first = false;
+                               else
+                                       seq_puts(p, ", ");
+
+                               if (j < 10)
+                                       seq_printf(p, "%lu: %lu", BIT(j), val);
+                               else if (j < 20)
+                                       seq_printf(p, "%luK: %lu", BIT(j - 10),
+                                                  val);
+                               else if (j < 30)
+                                       seq_printf(p, "%luM: %lu", BIT(j - 20),
+                                                  val);
+                               else
+                                       seq_printf(p, "%luG: %lu", BIT(j - 30),
+                                                  val);
+                       }
+                       seq_puts(p, " }");
+               }
+               seq_puts(p, " }\n");
        }
+
        return 0;
 }
 
-struct seq_operations lprocfs_jobstats_seq_sops = {
-       start: lprocfs_jobstats_seq_start,
-       stop:  lprocfs_jobstats_seq_stop,
-       next:  lprocfs_jobstats_seq_next,
-       show:  lprocfs_jobstats_seq_show,
+static const struct seq_operations lprocfs_jobstats_seq_sops = {
+       .start  = lprocfs_jobstats_seq_start,
+       .stop   = lprocfs_jobstats_seq_stop,
+       .next   = lprocfs_jobstats_seq_next,
+       .show   = lprocfs_jobstats_seq_show,
 };
 
 static int lprocfs_jobstats_seq_open(struct inode *inode, struct file *file)
 {
-       struct proc_dir_entry *dp = PDE(inode);
+       struct lprocfs_jobstats_data *data = NULL;
        struct seq_file *seq;
        int rc;
 
-       if (LPROCFS_ENTRY_AND_CHECK(dp))
-               return -ENOENT;
-
        rc = seq_open(file, &lprocfs_jobstats_seq_sops);
-       if (rc) {
-               LPROCFS_EXIT();
+       if (rc)
                return rc;
-       }
+
+       OBD_ALLOC_PTR(data);
+       if (!data)
+               return -ENOMEM;
+
+       data->pjd_stats = pde_data(inode);
+       data->pjd_last_job = NULL;
+       data->pjd_last_pos = 0;
        seq = file->private_data;
-       seq->private = dp->data;
+       seq->private = data;
        return 0;
 }
 
-static ssize_t lprocfs_jobstats_seq_write(struct file *file, const char *buf,
+static ssize_t lprocfs_jobstats_seq_write(struct file *file,
+                                         const char __user *buf,
                                          size_t len, loff_t *off)
 {
        struct seq_file *seq = file->private_data;
-       struct obd_job_stats *stats = seq->private;
-       char jobid[JOBSTATS_JOBID_SIZE];
-       int all = 0;
+       struct lprocfs_jobstats_data *data = seq->private;
+       struct obd_job_stats *stats = data->pjd_stats;
+       char jobid[4 * LUSTRE_JOBID_SIZE]; /* all escaped chars, plus ""\n\0 */
+       char *p1, *p2, *last;
+       unsigned int c;
        struct job_stat *job;
 
-       if (!memcmp(buf, "clear", strlen("clear"))) {
-               all = 1;
-       } else if (len < JOBSTATS_JOBID_SIZE) {
-               memset(jobid, 0, JOBSTATS_JOBID_SIZE);
-               /* Trim '\n' if any */
-               if (buf[len - 1] == '\n')
-                       memcpy(jobid, buf, len - 1);
-               else
-                       memcpy(jobid, buf, len);
-       } else {
+       if (len == 0 || len >= 4 * LUSTRE_JOBID_SIZE)
                return -EINVAL;
+
+       if (stats->ojs_hash == NULL)
+               return -ENODEV;
+
+       if (copy_from_user(jobid, buf, len))
+               return -EFAULT;
+       jobid[len] = 0;
+       last = jobid + len - 1;
+
+       /* Trim '\n' if any */
+       if (*last == '\n')
+               *(last--) = 0;
+
+       /* decode escaped chars if jobid is a quoted string */
+       if (jobid[0] == '"' && *last == '"') {
+               last--;
+
+               for (p1 = jobid, p2 = jobid + 1; p2 <= last; p1++, p2++) {
+                       if (*p2 != '\\') {
+                               *p1 = *p2;
+                       } else if (p2 + 3 <= last && *(p2 + 1) == 'x' &&
+                                sscanf(p2 + 2, "%02X", &c) == 1) {
+                               *p1 = c;
+                               p2 += 3;
+                       } else {
+                               return -EINVAL;
+                       }
+               }
+               *p1 = 0;
+
        }
+       jobid[LUSTRE_JOBID_SIZE - 1] = 0;
+
+       if (strcmp(jobid, "clear") == 0) {
+               lprocfs_job_cleanup(stats, true);
 
-       LASSERT(stats->ojs_hash);
-       if (all) {
-               time_t oldest = 0;
-               cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback,
-                                      &oldest);
                return len;
        }
 
-       if (!strlen(jobid))
+       if (strlen(jobid) == 0)
                return -EINVAL;
 
        job = cfs_hash_lookup(stats->ojs_hash, jobid);
@@ -454,29 +685,45 @@ static ssize_t lprocfs_jobstats_seq_write(struct file *file, const char *buf,
        return len;
 }
 
-struct file_operations lprocfs_jobstats_seq_fops = {
-       .owner   = THIS_MODULE,
-       .open    = lprocfs_jobstats_seq_open,
-       .read    = seq_read,
-       .write   = lprocfs_jobstats_seq_write,
-       .llseek  = seq_lseek,
-       .release = lprocfs_seq_release,
-};
-
-static void job_cleanup_callback(unsigned long data)
+/**
+ * Clean up the seq file state when the /proc file is closed.
+ *
+ * This also expires old job stats from the cache after they have been
+ * printed in case the system is idle and not generating new jobstats.
+ *
+ * \param[in] inode    struct inode for seq file being closed
+ * \param[in] file     struct file for seq file being closed
+ *
+ * \retval             0 on success
+ * \retval             negative errno on failure
+ */
+static int lprocfs_jobstats_seq_release(struct inode *inode, struct file *file)
 {
-       struct obd_job_stats *stats = (struct obd_job_stats *)data;
-       time_t oldest;
-
-       if (stats->ojs_cleanup_interval) {
-               oldest = cfs_time_current_sec() - stats->ojs_cleanup_interval;
-               cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback,
-                                      &oldest);
-               cfs_timer_arm(&stats->ojs_cleanup_timer,
-                             cfs_time_shift(stats->ojs_cleanup_interval));
+       struct seq_file *seq = file->private_data;
+       struct lprocfs_jobstats_data *data = seq->private;
+
+       /* drop the ref of last saved job */
+       if (data->pjd_last_job) {
+               job_putref(data->pjd_last_job);
+               data->pjd_last_pos = 0;
+               data->pjd_last_job = NULL;
        }
+
+       lprocfs_job_cleanup(data->pjd_stats, false);
+       OBD_FREE_PTR(data);
+
+       return lprocfs_seq_release(inode, file);
 }
 
+static const struct proc_ops lprocfs_jobstats_seq_fops = {
+       PROC_OWNER(THIS_MODULE)
+       .proc_open      = lprocfs_jobstats_seq_open,
+       .proc_read      = seq_read,
+       .proc_write     = lprocfs_jobstats_seq_write,
+       .proc_lseek     = seq_lseek,
+       .proc_release   = lprocfs_jobstats_seq_release,
+};
+
 int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num,
                           cntr_init_callback init_fn)
 {
@@ -487,12 +734,20 @@ int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num,
        LASSERT(obd->obd_proc_entry != NULL);
        LASSERT(obd->obd_type->typ_name);
 
-       if (strcmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME) &&
-           strcmp(obd->obd_type->typ_name, LUSTRE_OST_NAME)) {
-               CERROR("Invalid obd device type.\n");
+       if (cntr_num <= 0)
+               RETURN(-EINVAL);
+
+       if (init_fn == NULL)
+               RETURN(-EINVAL);
+
+       /* Currently needs to be a target due to the use of obt_jobstats. */
+       if (strcmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME) != 0 &&
+           strcmp(obd->obd_type->typ_name, LUSTRE_OST_NAME) != 0) {
+               CERROR("%s: invalid device type %s for job stats: rc = %d\n",
+                      obd->obd_name, obd->obd_type->typ_name, -EINVAL);
                RETURN(-EINVAL);
        }
-       stats = &obd->u.obt.obt_jobstats;
+       stats = &obd2obt(obd)->obt_jobstats;
 
        LASSERT(stats->ojs_hash == NULL);
        stats->ojs_hash = cfs_hash_create("JOB_STATS",
@@ -506,66 +761,61 @@ int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num,
        if (stats->ojs_hash == NULL)
                RETURN(-ENOMEM);
 
-       CFS_INIT_LIST_HEAD(&stats->ojs_list);
-       cfs_rwlock_init(&stats->ojs_lock);
+       INIT_LIST_HEAD(&stats->ojs_list);
+       spin_lock_init(&stats->ojs_lock);
        stats->ojs_cntr_num = cntr_num;
        stats->ojs_cntr_init_fn = init_fn;
-       cfs_timer_init(&stats->ojs_cleanup_timer, job_cleanup_callback, stats);
-       stats->ojs_cleanup_interval = 600; /* 10 mins by default */
-       cfs_timer_arm(&stats->ojs_cleanup_timer,
-                     cfs_time_shift(stats->ojs_cleanup_interval));
-
-       LPROCFS_WRITE_ENTRY();
-       entry = create_proc_entry("job_stats", 0644, obd->obd_proc_entry);
-       LPROCFS_WRITE_EXIT();
-       if (entry) {
-               entry->proc_fops = &lprocfs_jobstats_seq_fops;
-               entry->data = stats;
-               RETURN(0);
-       } else {
+       /* Store 1/2 the actual interval, since we use that the most, and
+        * it is easier to work with.
+        */
+       stats->ojs_cleanup_interval = ktime_set(600 / 2, 0); /* default 10 min*/
+       stats->ojs_cleanup_last = ktime_get_real();
+
+       entry = lprocfs_add_simple(obd->obd_proc_entry, "job_stats", stats,
+                                  &lprocfs_jobstats_seq_fops);
+       if (IS_ERR(entry)) {
                lprocfs_job_stats_fini(obd);
                RETURN(-ENOMEM);
        }
+       RETURN(0);
 }
 EXPORT_SYMBOL(lprocfs_job_stats_init);
+#endif /* CONFIG_PROC_FS*/
 
-int lprocfs_rd_job_interval(char *page, char **start, off_t off,
-                           int count, int *eof, void *data)
+ssize_t job_cleanup_interval_show(struct kobject *kobj, struct attribute *attr,
+                                 char *buf)
 {
-       struct obd_device *obd = (struct obd_device *)data;
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
        struct obd_job_stats *stats;
+       struct timespec64 ts;
+
+       stats = &obd2obt(obd)->obt_jobstats;
+       ts = ktime_to_timespec64(stats->ojs_cleanup_interval);
 
-       LASSERT(obd != NULL);
-       stats = &obd->u.obt.obt_jobstats;
-       *eof = 1;
-       return snprintf(page, count, "%d\n", stats->ojs_cleanup_interval);
+       return scnprintf(buf, PAGE_SIZE, "%lld\n", (long long)ts.tv_sec * 2);
 }
-EXPORT_SYMBOL(lprocfs_rd_job_interval);
+EXPORT_SYMBOL(job_cleanup_interval_show);
 
-int lprocfs_wr_job_interval(struct file *file, const char *buffer,
-                           unsigned long count, void *data)
+ssize_t job_cleanup_interval_store(struct kobject *kobj,
+                                  struct attribute *attr,
+                                  const char *buffer, size_t count)
 {
-       struct obd_device *obd = (struct obd_device *)data;
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
        struct obd_job_stats *stats;
-       int val, rc;
+       unsigned int val;
+       int rc;
 
-       LASSERT(obd != NULL);
-       stats = &obd->u.obt.obt_jobstats;
+       stats = &obd2obt(obd)->obt_jobstats;
 
-       rc = lprocfs_write_helper(buffer, count, &val);
+       rc = kstrtouint(buffer, 0, &val);
        if (rc)
                return rc;
 
-       stats->ojs_cleanup_interval = val;
-       if (!stats->ojs_cleanup_interval)
-               cfs_timer_disarm(&stats->ojs_cleanup_timer);
-       else
-               cfs_timer_arm(&stats->ojs_cleanup_timer,
-                             cfs_time_shift(stats->ojs_cleanup_interval));
+       stats->ojs_cleanup_interval = ktime_set(val / 2, 0);
+       lprocfs_job_cleanup(stats, false);
 
        return count;
-
 }
-EXPORT_SYMBOL(lprocfs_wr_job_interval);
-
-#endif /* LPROCFS*/
+EXPORT_SYMBOL(job_cleanup_interval_store);