Whamcloud - gitweb
LU-17705 ptlrpc: replace synchronize_rcu() with rcu_barrier()
[fs/lustre-release.git] / lustre / obdclass / lprocfs_status.c
index 780ab60..5437c9c 100644 (file)
@@ -27,7 +27,6 @@
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
  *
  * lustre/obdclass/lprocfs_status.c
  *
@@ -41,6 +40,9 @@
 
 #ifdef CONFIG_PROC_FS
 
+/* enable start/elapsed_time in stats headers by default */
+unsigned int obd_enable_stats_header = 1;
+
 static int lprocfs_no_percpu_stats = 0;
 module_param(lprocfs_no_percpu_stats, int, 0644);
 MODULE_PARM_DESC(lprocfs_no_percpu_stats, "Do not alloc percpu data for lprocfs stats");
@@ -59,43 +61,29 @@ int lprocfs_seq_release(struct inode *inode, struct file *file)
 }
 EXPORT_SYMBOL(lprocfs_seq_release);
 
-struct dentry *ldebugfs_add_simple(struct dentry *root,
-                                  char *name, void *data,
-                                  const struct file_operations *fops)
+static umode_t default_mode(const struct proc_ops *ops)
 {
-       struct dentry *entry;
        umode_t mode = 0;
 
-       if (!root || !name || !fops)
-               return ERR_PTR(-EINVAL);
-
-       if (fops->read)
+       if (ops->proc_read)
                mode = 0444;
-       if (fops->write)
+       if (ops->proc_write)
                mode |= 0200;
-       entry = debugfs_create_file(name, mode, root, data, fops);
-       if (IS_ERR_OR_NULL(entry)) {
-               CERROR("LprocFS: No memory to create <debugfs> entry %s", name);
-               return entry ?: ERR_PTR(-ENOMEM);
-       }
-       return entry;
+
+       return mode;
 }
-EXPORT_SYMBOL(ldebugfs_add_simple);
 
 struct proc_dir_entry *
 lprocfs_add_simple(struct proc_dir_entry *root, char *name,
-                  void *data, const struct file_operations *fops)
+                  void *data, const struct proc_ops *fops)
 {
        struct proc_dir_entry *proc;
-       mode_t mode = 0;
+       umode_t mode;
 
        if (!root || !name || !fops)
                return ERR_PTR(-EINVAL);
 
-       if (fops->read)
-               mode = 0444;
-       if (fops->write)
-               mode |= 0200;
+       mode = default_mode(fops);
        proc = proc_create_data(name, mode, root, fops, data);
        if (!proc) {
                CERROR("LprocFS: No memory to create /proc entry %s\n",
@@ -135,9 +123,9 @@ struct proc_dir_entry *lprocfs_add_symlink(const char *name,
 }
 EXPORT_SYMBOL(lprocfs_add_symlink);
 
-static const struct file_operations lprocfs_generic_fops = { };
+static const struct file_operations ldebugfs_empty_ops = { };
 
-void ldebugfs_add_vars(struct dentry *parent, struct lprocfs_vars *list,
+void ldebugfs_add_vars(struct dentry *parent, struct ldebugfs_vars *list,
                       void *data)
 {
        if (IS_ERR_OR_NULL(parent) || IS_ERR_OR_NULL(list))
@@ -156,12 +144,14 @@ void ldebugfs_add_vars(struct dentry *parent, struct lprocfs_vars *list,
                }
                debugfs_create_file(list->name, mode, parent,
                                    list->data ? : data,
-                                   list->fops ? : &lprocfs_generic_fops);
+                                   list->fops ? : &ldebugfs_empty_ops);
                list++;
        }
 }
 EXPORT_SYMBOL_GPL(ldebugfs_add_vars);
 
+static const struct proc_ops lprocfs_empty_ops = { };
+
 /**
  * Add /proc entries.
  *
@@ -182,18 +172,14 @@ lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
 
        while (list->name) {
                struct proc_dir_entry *proc;
-               mode_t mode = 0;
+               umode_t mode = 0;
 
-               if (list->proc_mode != 0000) {
+               if (list->proc_mode)
                        mode = list->proc_mode;
-               } else if (list->fops) {
-                       if (list->fops->read)
-                               mode = 0444;
-                       if (list->fops->write)
-                               mode |= 0200;
-               }
+               else if (list->fops)
+                       mode = default_mode(list->fops);
                proc = proc_create_data(list->name, mode, root,
-                                       list->fops ?: &lprocfs_generic_fops,
+                                       list->fops ?: &lprocfs_empty_ops,
                                        list->data ?: data);
                if (!proc)
                        return -ENOMEM;
@@ -410,7 +396,7 @@ int lprocfs_server_uuid_seq_show(struct seq_file *m, void *data)
 {
        struct obd_device *obd = data;
        struct obd_import *imp;
-       char *imp_state_name = NULL;
+       const char *imp_state_name = NULL;
        int rc = 0;
 
        LASSERT(obd != NULL);
@@ -458,10 +444,7 @@ int lprocfs_stats_lock(struct lprocfs_stats *stats,
                       unsigned long *flags)
 {
        if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
-               if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
-                       spin_lock_irqsave(&stats->ls_lock, *flags);
-               else
-                       spin_lock(&stats->ls_lock);
+               spin_lock(&stats->ls_lock);
                return opc == LPROCFS_GET_NUM_CPU ? 1 : 0;
        }
 
@@ -504,10 +487,7 @@ void lprocfs_stats_unlock(struct lprocfs_stats *stats,
                          unsigned long *flags)
 {
        if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
-               if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
-                       spin_unlock_irqrestore(&stats->ls_lock, *flags);
-               else
-                       spin_unlock(&stats->ls_lock);
+               spin_unlock(&stats->ls_lock);
        } else if (opc == LPROCFS_GET_SMP_ID) {
                put_cpu();
        }
@@ -571,89 +551,108 @@ static void obd_import_flags2str(struct obd_import *imp, struct seq_file *m)
        flag2str(imp, connect_tried);
 }
 
-static const char *obd_connect_names[] = {
-       /* flags names  */
-       "read_only",
-       "lov_index",
-       "connect_from_mds",
-       "write_grant",
-       "server_lock",
-       "version",
-       "request_portal",
-       "acl",
-       "xattr",
-       "create_on_write",
-       "truncate_lock",
-       "initial_transno",
-       "inode_bit_locks",
-       "barrier",
-       "getattr_by_fid",
-       "no_oh_for_devices",
-       "remote_client",
-       "remote_client_by_force",
-       "max_byte_per_rpc",
-       "64bit_qdata",
-       "mds_capability",
-       "oss_capability",
-       "early_lock_cancel",
-       "som",
-       "adaptive_timeouts",
-       "lru_resize",
-       "mds_mds_connection",
-       "real_conn",
-       "change_qunit_size",
-       "alt_checksum_algorithm",
-       "fid_is_enabled",
-       "version_recovery",
-       "pools",
-       "grant_shrink",
-       "skip_orphan",
-       "large_ea",
-       "full20",
-       "layout_lock",
-       "64bithash",
-       "object_max_bytes",
-       "imp_recov",
-       "jobstats",
-       "umask",
-       "einprogress",
-       "grant_param",
-       "flock_owner",
-       "lvb_type",
-       "nanoseconds_times",
-       "lightweight_conn",
-       "short_io",
-       "pingless",
-       "flock_deadlock",
-       "disp_stripe",
-       "open_by_fid",
-       "lfsck",
-       "unknown",
-       "unlink_close",
-       "multi_mod_rpcs",
-       "dir_stripe",
-       "subtree",
-       "lockahead",
-       "bulk_mbits",
-       "compact_obdo",
-       "second_flags",
-       /* flags2 names */
-       "file_secctx",  /* 0x01 */
-       "lockaheadv2",  /* 0x02 */
-       "dir_migrate",  /* 0x04 */
-       "sum_statfs",   /* 0x08 */
-       "overstriping", /* 0x10 */
-       "flr",          /* 0x20 */
-       "wbc",          /* 0x40 */
-       "lock_convert",  /* 0x80 */
-       "archive_id_array",     /* 0x100 */
-       "increasing_xid",       /* 0x200 */
-       "selinux_policy",       /* 0x400 */
-       "lsom",                 /* 0x800 */
-       "pcc",                  /* 0x1000 */
-       "crush",                /* 0x2000 */
-       "async_discard",        /* 0x4000 */
-       "client_encryption",    /* 0x8000 */
+static const char *const obd_connect_names[] = {
+       "read_only",                    /* 0x01 */
+       "lov_index",                    /* 0x02 */
+       "connect_from_mds",             /* 0x03 */
+       "write_grant",                  /* 0x04 */
+       "server_lock",                  /* 0x10 */
+       "version",                      /* 0x20 */
+       "request_portal",               /* 0x40 */
+       "acl",                          /* 0x80 */
+       "xattr",                        /* 0x100 */
+       "create_on_write",              /* 0x200 */
+       "truncate_lock",                /* 0x400 */
+       "initial_transno",              /* 0x800 */
+       "inode_bit_locks",              /* 0x1000 */
+       "barrier",                      /* 0x2000 */
+       "getattr_by_fid",               /* 0x4000 */
+       "no_oh_for_devices",            /* 0x8000 */
+       "remote_client",                /* 0x10000 */
+       "remote_client_by_force",       /* 0x20000 */
+       "max_byte_per_rpc",             /* 0x40000 */
+       "64bit_qdata",                  /* 0x80000 */
+       "mds_capability",               /* 0x100000 */
+       "oss_capability",               /* 0x200000 */
+       "early_lock_cancel",            /* 0x400000 */
+       "som",                          /* 0x800000 */
+       "adaptive_timeouts",            /* 0x1000000 */
+       "lru_resize",                   /* 0x2000000 */
+       "mds_mds_connection",           /* 0x4000000 */
+       "real_conn",                    /* 0x8000000 */
+       "change_qunit_size",            /* 0x10000000 */
+       "alt_checksum_algorithm",       /* 0x20000000 */
+       "fid_is_enabled",               /* 0x40000000 */
+       "version_recovery",             /* 0x80000000 */
+       "pools",                        /* 0x100000000 */
+       "grant_shrink",                 /* 0x200000000 */
+       "skip_orphan",                  /* 0x400000000 */
+       "large_ea",                     /* 0x800000000 */
+       "full20",                       /* 0x1000000000 */
+       "layout_lock",                  /* 0x2000000000 */
+       "64bithash",                    /* 0x4000000000 */
+       "object_max_bytes",             /* 0x8000000000 */
+       "imp_recov",                    /* 0x10000000000 */
+       "jobstats",                     /* 0x20000000000 */
+       "umask",                        /* 0x40000000000 */
+       "einprogress",                  /* 0x80000000000 */
+       "grant_param",                  /* 0x100000000000 */
+       "flock_owner",                  /* 0x200000000000 */
+       "lvb_type",                     /* 0x400000000000 */
+       "nanoseconds_times",            /* 0x800000000000 */
+       "lightweight_conn",             /* 0x1000000000000 */
+       "short_io",                     /* 0x2000000000000 */
+       "pingless",                     /* 0x4000000000000 */
+       "flock_deadlock",               /* 0x8000000000000 */
+       "disp_stripe",                  /* 0x10000000000000 */
+       "open_by_fid",                  /* 0x20000000000000 */
+       "lfsck",                        /* 0x40000000000000 */
+       "unknown",                      /* 0x80000000000000 */
+       "unlink_close",                 /* 0x100000000000000 */
+       "multi_mod_rpcs",               /* 0x200000000000000 */
+       "dir_stripe",                   /* 0x400000000000000 */
+       "subtree",                      /* 0x800000000000000 */
+       "lockahead",                    /* 0x1000000000000000 */
+       "bulk_mbits",                   /* 0x2000000000000000 */
+       "compact_obdo",                 /* 0x4000000000000000 */
+       "second_flags",                 /* 0x8000000000000000 */
+       /* ocd_connect_flags2 names */
+       "file_secctx",                  /* 0x01 */
+       "lockaheadv2",                  /* 0x02 */
+       "dir_migrate",                  /* 0x04 */
+       "sum_statfs",                   /* 0x08 */
+       "overstriping",                 /* 0x10 */
+       "flr",                          /* 0x20 */
+       "wbc",                          /* 0x40 */
+       "lock_convert",                 /* 0x80 */
+       "archive_id_array",             /* 0x100 */
+       "increasing_xid",               /* 0x200 */
+       "selinux_policy",               /* 0x400 */
+       "lsom",                         /* 0x800 */
+       "pcc",                          /* 0x1000 */
+       "crush",                        /* 0x2000 */
+       "async_discard",                /* 0x4000 */
+       "client_encryption",            /* 0x8000 */
+       "fidmap",                       /* 0x10000 */
+       "getattr_pfid",                 /* 0x20000 */
+       "lseek",                        /* 0x40000 */
+       "dom_lvb",                      /* 0x80000 */
+       "reply_mbits",                  /* 0x100000 */
+       "mode_convert",                 /* 0x200000 */
+       "batch_rpc",                    /* 0x400000 */
+       "pcc_ro",                       /* 0x800000 */
+       "mne_nid_type",                 /* 0x1000000 */
+       "lock_contend",                 /* 0x2000000 */
+       "atomic_open_lock",             /* 0x4000000 */
+       "name_encryption",              /* 0x8000000 */
+       "mkdir_replay",                 /* 0x10000000 */
+       "dmv_imp_inherit",              /* 0x20000000 */
+       "encryption_fid2path",          /* 0x40000000 */
+       "replay_create",                /* 0x80000000 */
+       "large_nid",                    /* 0x100000000 */
+       "compressed_file",              /* 0x200000000 */
+       "unaligned_dio",                /* 0x400000000 */
+       "conn_policy",                  /* 0x800000000 */
        NULL
 };
 
@@ -824,18 +823,41 @@ static void lprocfs_import_seq_show_locked(struct seq_file *m,
        spin_lock(&imp->imp_lock);
        j = 0;
        list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
-               libcfs_nid2str_r(conn->oic_conn->c_peer.nid,
-                                nidstr, sizeof(nidstr));
-               seq_printf(m, "%s%s", j ? ", " : "", nidstr);
+               libcfs_nidstr_r(&conn->oic_conn->c_peer.nid,
+                                 nidstr, sizeof(nidstr));
+               if (j)
+                       seq_puts(m, ", ");
+               /* Place nidstr in quotes */
+               seq_printf(m, "\"%s\"", nidstr);
                j++;
        }
        if (imp->imp_connection)
-               libcfs_nid2str_r(imp->imp_connection->c_peer.nid,
-                                nidstr, sizeof(nidstr));
+               libcfs_nidstr_r(&imp->imp_connection->c_peer.nid,
+                                 nidstr, sizeof(nidstr));
        else
                strncpy(nidstr, "<none>", sizeof(nidstr));
        seq_printf(m, " ]\n"
-                  "       current_connection: %s\n"
+                  "       nids_stats:");
+       list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
+               libcfs_nidstr_r(&conn->oic_conn->c_peer.nid,
+                                 nidstr, sizeof(nidstr));
+               seq_printf(m, "\n          \"%s\": { connects: %u, replied: %u,"
+                          " uptodate: %s, sec_ago: ",
+                          nidstr, conn->oic_attempts, conn->oic_replied,
+                          conn->oic_uptodate ? "true" : "false");
+               if (conn->oic_last_attempt)
+                       seq_printf(m, "%lld }", ktime_get_seconds() -
+                                  conn->oic_last_attempt);
+               else
+                       seq_puts(m, "never }");
+       }
+       if (imp->imp_connection)
+               libcfs_nidstr_r(&imp->imp_connection->c_peer.nid,
+                                 nidstr, sizeof(nidstr));
+       else
+               strncpy(nidstr, "<none>", sizeof(nidstr));
+       seq_printf(m, "\n"
+                  "       current_connection: \"%s\"\n"
                   "       connection_attempts: %u\n"
                   "       generation: %u\n"
                   "       in-progress_invalidations: %u\n"
@@ -871,13 +893,14 @@ static void lprocfs_import_seq_show_locked(struct seq_file *m,
                if (imp->imp_at.iat_portal[j] == 0)
                        break;
                k = max_t(unsigned int, k,
-                         at_get(&imp->imp_at.iat_service_estimate[j]));
+                         obd_at_get(imp->imp_obd,
+                                    &imp->imp_at.iat_service_estimate[j]));
        }
        seq_printf(m, "    service_estimates:\n"
                   "       services: %u sec\n"
-                  "       network: %u sec\n",
+                  "       network: %d sec\n",
                   k,
-                  at_get(&imp->imp_at.iat_net_latency));
+                  obd_at_get(imp->imp_obd, &imp->imp_at.iat_net_latency));
 
        seq_printf(m, "    transactions:\n"
                   "       last_replay: %llu\n"
@@ -970,8 +993,8 @@ static void lprocfs_timeouts_seq_show_locked(struct seq_file *m,
                                             struct obd_device *obd,
                                             struct obd_import *imp)
 {
-       unsigned int cur, worst;
-       time64_t now, worstt;
+       timeout_t cur_timeout, worst_timeout;
+       time64_t now, worst_timestamp;
        int i;
 
        LASSERT(obd != NULL);
@@ -983,23 +1006,29 @@ static void lprocfs_timeouts_seq_show_locked(struct seq_file *m,
                   "last reply", (s64)imp->imp_last_reply_time,
                   (s64)(now - imp->imp_last_reply_time));
 
-       cur = at_get(&imp->imp_at.iat_net_latency);
-       worst = imp->imp_at.iat_net_latency.at_worst_ever;
-       worstt = imp->imp_at.iat_net_latency.at_worst_time;
+       cur_timeout = obd_at_get(imp->imp_obd, &imp->imp_at.iat_net_latency);
+       worst_timeout = imp->imp_at.iat_net_latency.at_worst_timeout_ever;
+       worst_timestamp = imp->imp_at.iat_net_latency.at_worst_timestamp;
        seq_printf(m, "%-10s : cur %3u  worst %3u (at %lld, %llds ago) ",
-                  "network", cur, worst, (s64)worstt, (s64)(now - worstt));
+                  "network", cur_timeout, worst_timeout, worst_timestamp,
+                  now - worst_timestamp);
        lprocfs_at_hist_helper(m, &imp->imp_at.iat_net_latency);
 
        for(i = 0; i < IMP_AT_MAX_PORTALS; i++) {
+               struct adaptive_timeout *service_est;
+
                if (imp->imp_at.iat_portal[i] == 0)
                        break;
-               cur = at_get(&imp->imp_at.iat_service_estimate[i]);
-               worst = imp->imp_at.iat_service_estimate[i].at_worst_ever;
-               worstt = imp->imp_at.iat_service_estimate[i].at_worst_time;
+
+               service_est = &imp->imp_at.iat_service_estimate[i];
+               cur_timeout = obd_at_get(imp->imp_obd, service_est);
+               worst_timeout = service_est->at_worst_timeout_ever;
+               worst_timestamp = service_est->at_worst_timestamp;
                seq_printf(m, "portal %-2d  : cur %3u  worst %3u (at %lld, %llds ago) ",
-                          imp->imp_at.iat_portal[i], cur, worst, (s64)worstt,
-                          (s64)(now - worstt));
-               lprocfs_at_hist_helper(m, &imp->imp_at.iat_service_estimate[i]);
+                          imp->imp_at.iat_portal[i], cur_timeout,
+                          worst_timeout, worst_timestamp,
+                          now - worst_timestamp);
+               lprocfs_at_hist_helper(m, service_est);
        }
 }
 
@@ -1062,7 +1091,7 @@ static void obd_sysfs_release(struct kobject *kobj)
 
 int lprocfs_obd_setup(struct obd_device *obd, bool uuid_only)
 {
-       struct lprocfs_vars *debugfs_vars = NULL;
+       struct ldebugfs_vars *debugfs_vars = NULL;
        int rc;
 
        if (!obd || obd->obd_magic != OBD_DEVICE_MAGIC)
@@ -1094,7 +1123,7 @@ int lprocfs_obd_setup(struct obd_device *obd, bool uuid_only)
        }
 
        if (!obd->obd_type->typ_procroot)
-               debugfs_vars = obd->obd_vars;
+               debugfs_vars = obd->obd_debugfs_vars;
        obd->obd_debugfs_entry = debugfs_create_dir(
                obd->obd_name, obd->obd_type->typ_debugfs_entry);
        ldebugfs_add_vars(obd->obd_debugfs_entry, debugfs_vars, obd);
@@ -1128,6 +1157,9 @@ int lprocfs_obd_cleanup(struct obd_device *obd)
        if (!obd)
                return -EINVAL;
 
+       debugfs_remove_recursive(obd->obd_debugfs_gss_dir);
+       obd->obd_debugfs_gss_dir = NULL;
+
        if (obd->obd_proc_exports_entry) {
                /* Should be no exports left */
                lprocfs_remove(&obd->obd_proc_exports_entry);
@@ -1162,7 +1194,6 @@ int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid)
        struct lprocfs_counter *cntr;
        unsigned int percpusize;
        int rc = -ENOMEM;
-       unsigned long flags = 0;
        int i;
 
        LASSERT(stats->ls_percpu[cpuid] == NULL);
@@ -1173,17 +1204,10 @@ int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid)
        if (stats->ls_percpu[cpuid]) {
                rc = 0;
                if (unlikely(stats->ls_biggest_alloc_num <= cpuid)) {
-                       if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
-                               spin_lock_irqsave(&stats->ls_lock, flags);
-                       else
-                               spin_lock(&stats->ls_lock);
+                       spin_lock(&stats->ls_lock);
                        if (stats->ls_biggest_alloc_num <= cpuid)
                                stats->ls_biggest_alloc_num = cpuid + 1;
-                       if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) {
-                               spin_unlock_irqrestore(&stats->ls_lock, flags);
-                       } else {
-                               spin_unlock(&stats->ls_lock);
-                       }
+                       spin_unlock(&stats->ls_lock);
                }
                /* initialize the ls_percpu[cpuid] non-zero counter */
                for (i = 0; i < stats->ls_num; ++i) {
@@ -1194,13 +1218,12 @@ int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid)
        return rc;
 }
 
-struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
+struct lprocfs_stats *lprocfs_stats_alloc(unsigned int num,
                                           enum lprocfs_stats_flags flags)
 {
        struct lprocfs_stats *stats;
        unsigned int num_entry;
        unsigned int percpusize = 0;
-       int i;
 
        if (num == 0)
                return NULL;
@@ -1220,6 +1243,7 @@ struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
 
        stats->ls_num = num;
        stats->ls_flags = flags;
+       stats->ls_init = ktime_get_real();
        spin_lock_init(&stats->ls_lock);
 
        /* alloc num of counter headers */
@@ -1234,22 +1258,17 @@ struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
                if (!stats->ls_percpu[0])
                        goto fail;
                stats->ls_biggest_alloc_num = 1;
-       } else if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) {
-               /* alloc all percpu data, currently only obd_memory use this */
-               for (i = 0; i < num_entry; ++i)
-                       if (lprocfs_stats_alloc_one(stats, i) < 0)
-                               goto fail;
        }
 
        return stats;
 
 fail:
-       lprocfs_free_stats(&stats);
+       lprocfs_stats_free(&stats);
        return NULL;
 }
-EXPORT_SYMBOL(lprocfs_alloc_stats);
+EXPORT_SYMBOL(lprocfs_stats_alloc);
 
-void lprocfs_free_stats(struct lprocfs_stats **statsh)
+void lprocfs_stats_free(struct lprocfs_stats **statsh)
 {
        struct lprocfs_stats *stats = *statsh;
        unsigned int num_entry;
@@ -1269,11 +1288,17 @@ void lprocfs_free_stats(struct lprocfs_stats **statsh)
        for (i = 0; i < num_entry; i++)
                if (stats->ls_percpu[i])
                        LIBCFS_FREE(stats->ls_percpu[i], percpusize);
-       if (stats->ls_cnt_header)
+
+       if (stats->ls_cnt_header) {
+               for (i = 0; i < stats->ls_num; i++)
+                       if (stats->ls_cnt_header[i].lc_hist != NULL)
+                               CFS_FREE_PTR(stats->ls_cnt_header[i].lc_hist);
                CFS_FREE_PTR_ARRAY(stats->ls_cnt_header, stats->ls_num);
+       }
+
        LIBCFS_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_entry]));
 }
-EXPORT_SYMBOL(lprocfs_free_stats);
+EXPORT_SYMBOL(lprocfs_stats_free);
 
 u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx,
                            enum lprocfs_fields_flags field)
@@ -1301,16 +1326,23 @@ u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx,
 }
 EXPORT_SYMBOL(lprocfs_stats_collector);
 
-void lprocfs_clear_stats(struct lprocfs_stats *stats)
+void lprocfs_stats_clear(struct lprocfs_stats *stats)
 {
        struct lprocfs_counter *percpu_cntr;
-       int i;
-       int j;
        unsigned int num_entry;
        unsigned long flags = 0;
+       int i, j;
 
        num_entry = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
 
+       /* clear histogram if exists */
+       for (j = 0; j < stats->ls_num; j++) {
+               struct obd_histogram *hist = stats->ls_cnt_header[j].lc_hist;
+
+               if (hist != NULL)
+                       lprocfs_oh_clear(hist);
+       }
+
        for (i = 0; i < num_entry; i++) {
                if (!stats->ls_percpu[i])
                        continue;
@@ -1321,14 +1353,13 @@ void lprocfs_clear_stats(struct lprocfs_stats *stats)
                        percpu_cntr->lc_max             = 0;
                        percpu_cntr->lc_sumsquare       = 0;
                        percpu_cntr->lc_sum             = 0;
-                       if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
-                               percpu_cntr->lc_sum_irq = 0;
                }
        }
+       stats->ls_init = ktime_get_real();
 
        lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
 }
-EXPORT_SYMBOL(lprocfs_clear_stats);
+EXPORT_SYMBOL(lprocfs_stats_clear);
 
 static ssize_t lprocfs_stats_seq_write(struct file *file,
                                       const char __user *buf,
@@ -1337,7 +1368,7 @@ static ssize_t lprocfs_stats_seq_write(struct file *file,
        struct seq_file *seq = file->private_data;
        struct lprocfs_stats *stats = seq->private;
 
-       lprocfs_clear_stats(stats);
+       lprocfs_stats_clear(stats);
 
        return len;
 }
@@ -1360,6 +1391,46 @@ static void *lprocfs_stats_seq_next(struct seq_file *p, void *v, loff_t *pos)
        return lprocfs_stats_seq_start(p, pos);
 }
 
+/**
+ * print header of stats including snapshot_time, start_time and elapsed_time.
+ *
+ * \param seq          the file to print content to
+ * \param now          end time to calculate elapsed_time
+ * \param ts_init      start time to calculate elapsed_time
+ * \param width                the width of key to align them well
+ * \param colon                "" or ":"
+ * \param show_units   show units or not
+ * \param prefix       prefix (indent) before printing each line of header
+ *                     to align them with other content
+ */
+void lprocfs_stats_header(struct seq_file *seq, ktime_t now, ktime_t ts_init,
+                         int width, const char *colon, bool show_units,
+                         const char *prefix)
+{
+       const char *units = show_units ? " secs.nsecs" : "";
+       struct timespec64 ts;
+       const char *field;
+
+       field = (colon && colon[0]) ? "snapshot_time:" : "snapshot_time";
+       ts = ktime_to_timespec64(now);
+       seq_printf(seq, "%s%-*s %llu.%09lu%s\n", prefix, width, field,
+                  (s64)ts.tv_sec, ts.tv_nsec, units);
+
+       if (!obd_enable_stats_header)
+               return;
+
+       field = (colon && colon[0]) ? "start_time:" : "start_time";
+       ts = ktime_to_timespec64(ts_init);
+       seq_printf(seq, "%s%-*s %llu.%09lu%s\n", prefix, width, field,
+                  (s64)ts.tv_sec, ts.tv_nsec, units);
+
+       field = (colon && colon[0]) ? "elapsed_time:" : "elapsed_time";
+       ts = ktime_to_timespec64(ktime_sub(now, ts_init));
+       seq_printf(seq, "%s%-*s %llu.%09lu%s\n", prefix, width, field,
+                  (s64)ts.tv_sec, ts.tv_nsec, units);
+}
+EXPORT_SYMBOL(lprocfs_stats_header);
+
 /* seq file export of one lprocfs counter */
 static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
 {
@@ -1368,13 +1439,9 @@ static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
        struct lprocfs_counter ctr;
        int idx = *(loff_t *)v;
 
-       if (idx == 0) {
-               struct timespec64 now;
-
-               ktime_get_real_ts64(&now);
-               seq_printf(p, "%-25s %llu.%09lu secs.nsecs\n",
-                          "snapshot_time", (s64)now.tv_sec, now.tv_nsec);
-       }
+       if (idx == 0)
+               lprocfs_stats_header(p, ktime_get_real(), stats->ls_init, 25,
+                                    "", true, "");
 
        hdr = &stats->ls_cnt_header[idx];
        lprocfs_stats_collect(stats, idx, &ctr);
@@ -1411,11 +1478,11 @@ static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
        if (rc)
                return rc;
        seq = file->private_data;
-       seq->private = inode->i_private ? inode->i_private : PDE_DATA(inode);
+       seq->private = inode->i_private ? inode->i_private : pde_data(inode);
        return 0;
 }
 
-const struct file_operations lprocfs_stats_seq_fops = {
+const struct file_operations ldebugfs_stats_seq_fops = {
        .owner   = THIS_MODULE,
        .open    = lprocfs_stats_seq_open,
        .read    = seq_read,
@@ -1423,24 +1490,60 @@ const struct file_operations lprocfs_stats_seq_fops = {
        .llseek  = seq_lseek,
        .release = lprocfs_seq_release,
 };
-EXPORT_SYMBOL(lprocfs_stats_seq_fops);
+EXPORT_SYMBOL(ldebugfs_stats_seq_fops);
+
+static const struct proc_ops lprocfs_stats_seq_fops = {
+       PROC_OWNER(THIS_MODULE)
+       .proc_open      = lprocfs_stats_seq_open,
+       .proc_read      = seq_read,
+       .proc_write     = lprocfs_stats_seq_write,
+       .proc_lseek     = seq_lseek,
+       .proc_release   = lprocfs_seq_release,
+};
 
-int lprocfs_register_stats(struct proc_dir_entry *root, const char *name,
-                           struct lprocfs_stats *stats)
+int lprocfs_stats_register(struct proc_dir_entry *root, const char *name,
+                          struct lprocfs_stats *stats)
 {
        struct proc_dir_entry *entry;
-       LASSERT(root != NULL);
 
+       LASSERT(root != NULL);
        entry = proc_create_data(name, 0644, root,
                                 &lprocfs_stats_seq_fops, stats);
        if (!entry)
                return -ENOMEM;
+
        return 0;
 }
-EXPORT_SYMBOL(lprocfs_register_stats);
+EXPORT_SYMBOL(lprocfs_stats_register);
 
-void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
-                         unsigned conf, const char *name, const char *units)
+static const char *lprocfs_counter_config_units(const char *name,
+                                        enum lprocfs_counter_config config)
+{
+       const char *units;
+
+       switch (config & LPROCFS_TYPE_MASK) {
+       default:
+               units = "reqs"; break;
+       case LPROCFS_TYPE_BYTES:
+               units = "bytes"; break;
+       case LPROCFS_TYPE_PAGES:
+               units = "pages"; break;
+       case LPROCFS_TYPE_LOCKS:
+               units = "locks"; break;
+       case LPROCFS_TYPE_LOCKSPS:
+               units = "locks/s"; break;
+       case LPROCFS_TYPE_SECS:
+               units = "secs"; break;
+       case LPROCFS_TYPE_USECS:
+               units = "usecs"; break;
+       }
+
+       return units;
+}
+
+void lprocfs_counter_init_units(struct lprocfs_stats *stats, int index,
+                               enum lprocfs_counter_config config,
+                               const char *name, const char *units)
 {
        struct lprocfs_counter_header *header;
        struct lprocfs_counter *percpu_cntr;
@@ -1454,10 +1557,18 @@ void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
        LASSERTF(header != NULL, "Failed to allocate stats header:[%d]%s/%s\n",
                 index, name, units);
 
-       header->lc_config = conf;
-       header->lc_name   = name;
-       header->lc_units  = units;
+       header->lc_config = config;
+       header->lc_name = name;
+       header->lc_units = units;
 
+       if (config & LPROCFS_CNTR_HISTOGRAM) {
+               CFS_ALLOC_PTR(stats->ls_cnt_header[index].lc_hist);
+               if (stats->ls_cnt_header[index].lc_hist == NULL)
+                       CERROR("LprocFS: Failed to allocate histogram:[%d]%s/%s\n",
+                              index, name, units);
+               else
+                       spin_lock_init(&stats->ls_cnt_header[index].lc_hist->oh_lock);
+       }
        num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
        for (i = 0; i < num_cpu; ++i) {
                if (!stats->ls_percpu[i])
@@ -1468,11 +1579,18 @@ void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
                percpu_cntr->lc_max             = 0;
                percpu_cntr->lc_sumsquare       = 0;
                percpu_cntr->lc_sum             = 0;
-               if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
-                       percpu_cntr->lc_sum_irq = 0;
        }
        lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
 }
+EXPORT_SYMBOL(lprocfs_counter_init_units);
+
+void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
+                         enum lprocfs_counter_config config,
+                         const char *name)
+{
+       lprocfs_counter_init_units(stats, index, config, name,
+                                  lprocfs_counter_config_units(name, config));
+}
 EXPORT_SYMBOL(lprocfs_counter_init);
 
 static const char * const mps_stats[] = {
@@ -1513,12 +1631,13 @@ int lprocfs_alloc_md_stats(struct obd_device *obd,
        LASSERT(obd->obd_md_stats == NULL);
 
        num_stats = ARRAY_SIZE(mps_stats) + num_private_stats;
-       stats = lprocfs_alloc_stats(num_stats, 0);
+       stats = lprocfs_stats_alloc(num_stats, 0);
        if (!stats)
                return -ENOMEM;
 
        for (i = 0; i < ARRAY_SIZE(mps_stats); i++) {
-               lprocfs_counter_init(stats, i, 0, mps_stats[i], "reqs");
+               lprocfs_counter_init(stats, i, LPROCFS_TYPE_REQS,
+                                    mps_stats[i]);
                if (!stats->ls_cnt_header[i].lc_name) {
                        CERROR("Missing md_stat initializer md_op operation at offset %d. Aborting.\n",
                               i);
@@ -1526,9 +1645,9 @@ int lprocfs_alloc_md_stats(struct obd_device *obd,
                }
        }
 
-       rc = lprocfs_register_stats(obd->obd_proc_entry, "md_stats", stats);
+       rc = lprocfs_stats_register(obd->obd_proc_entry, "md_stats", stats);
        if (rc < 0) {
-               lprocfs_free_stats(&stats);
+               lprocfs_stats_free(&stats);
        } else {
                obd->obd_md_stats = stats;
        }
@@ -1543,31 +1662,25 @@ void lprocfs_free_md_stats(struct obd_device *obd)
 
        if (stats) {
                obd->obd_md_stats = NULL;
-               lprocfs_free_stats(&stats);
+               lprocfs_stats_free(&stats);
        }
 }
 EXPORT_SYMBOL(lprocfs_free_md_stats);
 
 void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats)
 {
-       lprocfs_counter_init(ldlm_stats,
-                            LDLM_ENQUEUE - LDLM_FIRST_OPC,
-                            0, "ldlm_enqueue", "reqs");
-       lprocfs_counter_init(ldlm_stats,
-                            LDLM_CONVERT - LDLM_FIRST_OPC,
-                            0, "ldlm_convert", "reqs");
-       lprocfs_counter_init(ldlm_stats,
-                            LDLM_CANCEL - LDLM_FIRST_OPC,
-                            0, "ldlm_cancel", "reqs");
-       lprocfs_counter_init(ldlm_stats,
-                            LDLM_BL_CALLBACK - LDLM_FIRST_OPC,
-                            0, "ldlm_bl_callback", "reqs");
-       lprocfs_counter_init(ldlm_stats,
-                            LDLM_CP_CALLBACK - LDLM_FIRST_OPC,
-                            0, "ldlm_cp_callback", "reqs");
-       lprocfs_counter_init(ldlm_stats,
-                            LDLM_GL_CALLBACK - LDLM_FIRST_OPC,
-                            0, "ldlm_gl_callback", "reqs");
+       lprocfs_counter_init(ldlm_stats, LDLM_ENQUEUE - LDLM_FIRST_OPC,
+                            LPROCFS_TYPE_REQS, "ldlm_enqueue");
+       lprocfs_counter_init(ldlm_stats, LDLM_CONVERT - LDLM_FIRST_OPC,
+                            LPROCFS_TYPE_REQS, "ldlm_convert");
+       lprocfs_counter_init(ldlm_stats, LDLM_CANCEL - LDLM_FIRST_OPC,
+                            LPROCFS_TYPE_REQS, "ldlm_cancel");
+       lprocfs_counter_init(ldlm_stats, LDLM_BL_CALLBACK - LDLM_FIRST_OPC,
+                            LPROCFS_TYPE_REQS, "ldlm_bl_callback");
+       lprocfs_counter_init(ldlm_stats, LDLM_CP_CALLBACK - LDLM_FIRST_OPC,
+                            LPROCFS_TYPE_REQS, "ldlm_cp_callback");
+       lprocfs_counter_init(ldlm_stats, LDLM_GL_CALLBACK - LDLM_FIRST_OPC,
+                            LPROCFS_TYPE_REQS, "ldlm_gl_callback");
 }
 EXPORT_SYMBOL(lprocfs_init_ldlm_stats);
 
@@ -1587,8 +1700,6 @@ __s64 lprocfs_read_helper(struct lprocfs_counter *lc,
                        break;
                case LPROCFS_FIELDS_FLAGS_SUM:
                        ret = lc->lc_sum;
-                       if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
-                               ret += lc->lc_sum_irq;
                        break;
                case LPROCFS_FIELDS_FLAGS_MIN:
                        ret = lc->lc_min;
@@ -1597,7 +1708,7 @@ __s64 lprocfs_read_helper(struct lprocfs_counter *lc,
                        ret = lc->lc_max;
                        break;
                case LPROCFS_FIELDS_FLAGS_AVG:
-                       ret = (lc->lc_max - lc->lc_min) / 2;
+                       ret = div64_u64(lc->lc_sum, lc->lc_count);
                        break;
                case LPROCFS_FIELDS_FLAGS_SUMSQUARE:
                        ret = lc->lc_sumsquare;
@@ -1849,14 +1960,14 @@ EXPORT_SYMBOL(lprocfs_find_named_value);
 int lprocfs_seq_create(struct proc_dir_entry *parent,
                       const char *name,
                       mode_t mode,
-                      const struct file_operations *seq_fops,
+                      const struct proc_ops *seq_fops,
                       void *data)
 {
        struct proc_dir_entry *entry;
        ENTRY;
 
        /* Disallow secretly (un)writable entries. */
-       LASSERT((seq_fops->write == NULL) == ((mode & 0222) == 0));
+       LASSERT(!seq_fops->proc_write == !(mode & 0222));
 
        entry = proc_create_data(name, mode, parent, seq_fops, data);
 
@@ -1870,7 +1981,7 @@ EXPORT_SYMBOL(lprocfs_seq_create);
 int lprocfs_obd_seq_create(struct obd_device *obd,
                           const char *name,
                           mode_t mode,
-                          const struct file_operations *seq_fops,
+                          const struct proc_ops *seq_fops,
                           void *data)
 {
        return lprocfs_seq_create(obd->obd_proc_entry, name,
@@ -1919,6 +2030,95 @@ void lprocfs_oh_clear(struct obd_histogram *oh)
 }
 EXPORT_SYMBOL(lprocfs_oh_clear);
 
+void lprocfs_oh_tally_pcpu(struct obd_hist_pcpu *oh,
+                          unsigned int value)
+{
+       if (value >= OBD_HIST_MAX)
+               value = OBD_HIST_MAX - 1;
+
+       percpu_counter_inc(&oh->oh_pc_buckets[value]);
+}
+EXPORT_SYMBOL(lprocfs_oh_tally_pcpu);
+
+void lprocfs_oh_tally_log2_pcpu(struct obd_hist_pcpu *oh,
+                               unsigned int value)
+{
+       unsigned int val = 0;
+
+       if (likely(value != 0))
+               val = min(fls(value - 1), OBD_HIST_MAX);
+
+       lprocfs_oh_tally_pcpu(oh, val);
+}
+EXPORT_SYMBOL(lprocfs_oh_tally_log2_pcpu);
+
+unsigned long lprocfs_oh_counter_pcpu(struct obd_hist_pcpu *oh,
+                                     unsigned int value)
+{
+       return percpu_counter_sum(&oh->oh_pc_buckets[value]);
+}
+EXPORT_SYMBOL(lprocfs_oh_counter_pcpu);
+
+unsigned long lprocfs_oh_sum_pcpu(struct obd_hist_pcpu *oh)
+{
+       unsigned long ret = 0;
+       int i;
+
+       for (i = 0; i < OBD_HIST_MAX; i++)
+               ret += percpu_counter_sum(&oh->oh_pc_buckets[i]);
+
+       return ret;
+}
+EXPORT_SYMBOL(lprocfs_oh_sum_pcpu);
+
+int lprocfs_oh_alloc_pcpu(struct obd_hist_pcpu *oh)
+{
+       int i, rc;
+
+       if (oh->oh_initialized)
+               return 0;
+
+       for (i = 0; i < OBD_HIST_MAX; i++) {
+               rc = percpu_counter_init(&oh->oh_pc_buckets[i], 0, GFP_KERNEL);
+               if (rc)
+                       goto out;
+       }
+
+       oh->oh_initialized = true;
+
+       return 0;
+
+out:
+       for (i--; i >= 0; i--)
+               percpu_counter_destroy(&oh->oh_pc_buckets[i]);
+
+       return rc;
+}
+EXPORT_SYMBOL(lprocfs_oh_alloc_pcpu);
+
+void lprocfs_oh_clear_pcpu(struct obd_hist_pcpu *oh)
+{
+       int i;
+
+       for (i = 0; i < OBD_HIST_MAX; i++)
+               percpu_counter_set(&oh->oh_pc_buckets[i], 0);
+}
+EXPORT_SYMBOL(lprocfs_oh_clear_pcpu);
+
+void lprocfs_oh_release_pcpu(struct obd_hist_pcpu *oh)
+{
+       int i;
+
+       if (!oh->oh_initialized)
+               return;
+
+       for (i = 0; i < OBD_HIST_MAX; i++)
+               percpu_counter_destroy(&oh->oh_pc_buckets[i]);
+
+       oh->oh_initialized = false;
+}
+EXPORT_SYMBOL(lprocfs_oh_release_pcpu);
+
 ssize_t lustre_attr_show(struct kobject *kobj,
                         struct attribute *attr, char *buf)
 {
@@ -2156,7 +2356,7 @@ int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count,
                RETURN(count);
        }
 
-       if (cfs_parse_nidlist(kernbuf, count, &tmp) <= 0) {
+       if (cfs_parse_nidlist(kernbuf, &tmp) < 0) {
                errmsg = "can't parse";
                GOTO(failed, rc = -EINVAL);
        }