Whamcloud - gitweb
LU-11850 obd: use netlink to get lustre stats 56/34256/28
authorJames Simmons <jsimmons@infradead.org>
Sun, 18 Aug 2024 00:18:18 +0000 (20:18 -0400)
committerOleg Drokin <green@whamcloud.com>
Mon, 2 Dec 2024 05:41:02 +0000 (05:41 +0000)
This adds the ability to collect performance metrics from lustre
in another way then from proc / debugfs files. The move to debugfs
has limited the scope of access to only root. Additionally there
is an expensive cost accessing many virtual file system files to
collect that data. Netlink will scale much better in this case as
well as offer a much more flexiable API.

The new ldebugfs_stats_alloc() replaces lprocfs_stats_alloc() and
registers the stats to be accessible BOTH throught debugfs AND
through Netlink. The new global "lstats_list" contains a list of
all registered sets of statistics, so it mirrors a subset of
debugfs. Netlink access can report on any statistics registered in
lstats_list.

Test-Parameters: trivial
Change-Id: If2d662baa62348fe6f0dd5c8d77344650c2a27d8
Signed-off-by: James Simmons <jsimmons@infradead.org>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/34256
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Reviewed-by: Neil Brown <neilb@suse.de>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
20 files changed:
lnet/include/lnet/lib-types.h
lustre/include/lprocfs_status.h
lustre/include/lustre_kernelcomm.h
lustre/include/uapi/linux/lustre/lustre_kernelcomm.h
lustre/ldlm/ldlm_pool.c
lustre/llite/lproc_llite.c
lustre/mdt/mdt_lproc.c
lustre/mgs/lproc_mgs.c
lustre/obdclass/kernelcomm.c
lustre/obdclass/lprocfs_status.c
lustre/obdclass/lprocfs_status_server.c
lustre/obdecho/echo.c
lustre/ofd/lproc_ofd.c
lustre/ofd/ofd_dev.c
lustre/osd-ldiskfs/osd_lproc.c
lustre/osd-zfs/osd_lproc.c
lustre/ptlrpc/lproc_ptlrpc.c
lustre/utils/liblustreapi_param.c
lustre/utils/lustre_param.c
lustre/utils/lustreapi_internal.h

index 6e8d5b9..8a13cc1 100644 (file)
@@ -2074,12 +2074,6 @@ struct lnet {
        atomic_t                        ln_pb_update_ready;
 };
 
-struct genl_filter_list {
-       struct list_head         lp_list;
-       void                    *lp_cursor;
-       bool                     lp_first;
-};
-
 static const struct nla_policy scalar_attr_policy[LN_SCALAR_MAX + 1] = {
        [LN_SCALAR_ATTR_LIST]           = { .type = NLA_NESTED },
        [LN_SCALAR_ATTR_LIST_SIZE]      = { .type = NLA_U16 },
index 3bd5611..a7a9e9f 100644 (file)
@@ -179,8 +179,14 @@ enum lprocfs_fields_flags {
 };
 
 struct lprocfs_stats {
+       /* source for the stats */
+       char                            *ls_source;
+       /* index in Xarray */
+       unsigned int                    ls_index;
        /* # of counters */
        unsigned short                  ls_num;
+       /* track reference */
+       struct kref                     ls_refcount;
        /* 1 + the biggest cpu # whose ls_percpu slot has been allocated */
        unsigned short                  ls_biggest_alloc_num;
        enum lprocfs_stats_flags        ls_flags;
@@ -398,9 +404,8 @@ struct brw_stats {
 int lprocfs_init_brw_stats(struct brw_stats *brw_stats);
 void lprocfs_fini_brw_stats(struct brw_stats *brw_stats);
 
-void ldebugfs_register_osd_stats(struct dentry *parent,
-                                struct brw_stats *brw_stats,
-                                struct lprocfs_stats *stats);
+void ldebugfs_register_brw_stats(struct dentry *parent,
+                                struct brw_stats *brw_stats);
 #endif /* HAVE_SERVER_SUPPORT */
 
 #define EXTRA_FIRST_OPC LDLM_GLIMPSE_ENQUEUE
@@ -495,8 +500,12 @@ lprocfs_stats_alloc(unsigned int num, enum lprocfs_stats_flags flags);
 extern void lprocfs_stats_clear(struct lprocfs_stats *stats);
 extern void lprocfs_stats_free(struct lprocfs_stats **stats);
 extern void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats);
-extern int lprocfs_alloc_obd_stats(struct obd_device *obd,
-                                  unsigned int num_stats);
+struct lprocfs_stats *ldebugfs_stats_alloc(int num, char *name,
+                                          struct dentry *entry,
+                                          struct kobject *kobj,
+                                          enum lprocfs_stats_flags flags);
+extern int ldebugfs_alloc_obd_stats(struct obd_device *obd,
+                                   unsigned int num_stats);
 extern int lprocfs_alloc_md_stats(struct obd_device *obd,
                                  unsigned int num_private_stats);
 extern void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
@@ -505,7 +514,7 @@ extern void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
 extern void lprocfs_counter_init_units(struct lprocfs_stats *stats, int index,
                                       enum lprocfs_counter_config config,
                                       const char *name, const char *units);
-extern void lprocfs_free_obd_stats(struct obd_device *obd);
+extern void ldebugfs_free_obd_stats(struct obd_device *obd);
 extern void lprocfs_free_md_stats(struct obd_device *obd);
 struct obd_export;
 struct nid_stat;
@@ -979,22 +988,12 @@ static inline void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats)
 {
 }
 
-static inline int lprocfs_alloc_obd_stats(struct obd_device *obd,
-                                         unsigned int num_stats)
-{
-       return 0;
-}
-
 static inline int lprocfs_alloc_md_stats(struct obd_device *obd,
                                         unsigned int num_private_stats)
 {
        return 0;
 }
 
-static inline void lprocfs_free_obd_stats(struct obd_device *obd)
-{
-}
-
 static inline void lprocfs_free_md_stats(struct obd_device *obd)
 {
 }
index bb1e82f..cb85267 100644 (file)
@@ -19,6 +19,9 @@
 #ifndef __LUSTRE_KERNELCOMM_H__
 #define __LUSTRE_KERNELCOMM_H__
 
+#include <linux/generic-radix-tree.h>
+#include <net/genetlink.h>
+#include <net/sock.h>
 /* For declarations shared with userspace */
 #include <uapi/linux/lustre/lustre_kernelcomm.h>
 
@@ -56,6 +59,106 @@ enum lustre_device_attrs {
 
 #define LUSTRE_DEVICE_ATTR_MAX (__LUSTRE_DEVICE_ATTR_MAX_PLUS_ONE - 1)
 
+/**
+ * enum lustre_param_list_attrs              - General header to list all sources
+ *                                     supporting an specific query.
+ *
+ * @LUSTRE_PARAM_ATTR_UNSPEC:          unspecified attribute to catch errors
+ *
+ * @LUSTRE_PARAM_ATTR_HDR:             groups params belong to (NLA_NUL_STRING)
+ * @LUSTRE_PARAM_ATTR_SOURCE:          source of the params (NLA_STRING)
+ */
+enum lustre_param_list_attrs {
+       LUSTRE_PARAM_ATTR_UNSPEC = 0,
+
+       LUSTRE_PARAM_ATTR_HDR,
+       LUSTRE_PARAM_ATTR_SOURCE,
+
+       __LUSTRE_PARAM_ATTR_MAX_PLUS_ONE
+};
+
+#define LUSTRE_PARAM_ATTR_MAX (__LUSTRE_PARAM_ATTR_MAX_PLUS_ONE - 1)
+
+/**
+ * enum lustre_stats_attrs          - Lustre stats netlink attributes used
+ *                                    to compose messages for sending or
+ *                                    receiving.
+ *
+ * @LUSTRE_STATS_ATTR_UNSPEC:         unspecified attribute to catch errors
+ * @LUSTRE_STATS_ATTR_PAD:            padding for 64-bit attributes, ignore
+ *
+ * @LUSTRE_STATS_ATTR_HDR:            groups stats belong to (NLA_NUL_STRING)
+ * @LUSTRE_STATS_ATTR_SOURCE:         source of the stats (NLA_STRING)
+ * @LUSTRE_STATS_ATTR_TIMESTAMP:       time of collection in nanoseconds
+ *                                    (NLA_S64)
+ * @LUSTRE_STATS_ATTR_START_TIME:      start time of collection (NLA_S64)
+ * @LUSTRE_STATS_ATTR_ELPASE_TIME:     elpase time of collection (NLA_S64)
+ * @LUSTRE_STATS_ATTR_DATASET:        bookmarks for that stats data
+ *                                    (NLA_NESTED)
+ */
+enum lustre_stats_attrs {
+       LUSTRE_STATS_ATTR_UNSPEC = 0,
+       LUSTRE_STATS_ATTR_PAD = LUSTRE_STATS_ATTR_UNSPEC,
+
+       LUSTRE_STATS_ATTR_HDR,
+       LUSTRE_STATS_ATTR_SOURCE,
+       LUSTRE_STATS_ATTR_TIMESTAMP,
+       LUSTRE_STATS_ATTR_START_TIME,
+       LUSTRE_STATS_ATTR_ELAPSE_TIME,
+       LUSTRE_STATS_ATTR_DATASET,
+
+       __LUSTRE_STATS_ATTR_MAX_PLUS_ONE,
+};
+
+#define LUSTRE_STATS_ATTR_MAX  (__LUSTRE_STATS_ATTR_MAX_PLUS_ONE - 1)
+
+/**
+ * enum lustre_stats_dataset_attrs    - Lustre stats counter's netlink
+ *                                     attributes used to compose messages
+ *                                     for sending or receiving.
+ *
+ * @LUSTRE_STATS_ATTR_DATASET_UNSPEC:  unspecified attribute to catch errors
+ * @LUSTRE_STATS_ATTR_DATASET_PAD:     padding for 64-bit attributes, ignore
+ *
+ * @LUSTRE_STATS_ATTR_DATASET_NAME:    name of counter (NLA_NUL_STRING)
+ * @LUSTRE_STATS_ATTR_DATASET_COUNT:   counter interation (NLA_U64)
+ * @LUSTRE_STATS_ATTR_DATASET_UNITS:   units of counter values (NLA_STRING)
+ * @LUSTRE_STATS_ATTR_DATASET_MINIMUM: smallest counter value collected
+ *                                     (NLA_U64)
+ * @LUSTRE_STATS_ATTR_DATASET_MAXIMUM: largest count value collected (NLA_U64)
+ * @LUSTRE_STATS_ATTR_DATASET_SUM:     total of all values of the counter
+ *                                     (NLA_U64)
+ * @LUSTRE_STATS_ATTR_DATASET_SUMSQUARE: Sum of the square of all values.
+ *                                      Allows user land apps to calculate
+ *                                      standard deviation. (NLA_U64)
+ */
+enum lustre_stats_dataset_attrs {
+       LUSTRE_STATS_ATTR_DATASET_UNSPEC = 0,
+       LUSTRE_STATS_ATTR_DATASET_PAD = LUSTRE_STATS_ATTR_DATASET_UNSPEC,
+
+       LUSTRE_STATS_ATTR_DATASET_NAME,
+       LUSTRE_STATS_ATTR_DATASET_COUNT,
+       LUSTRE_STATS_ATTR_DATASET_UNITS,
+       LUSTRE_STATS_ATTR_DATASET_MINIMUM,
+       LUSTRE_STATS_ATTR_DATASET_MAXIMUM,
+       LUSTRE_STATS_ATTR_DATASET_SUM,
+       LUSTRE_STATS_ATTR_DATASET_SUMSQUARE,
+
+       __LUSTRE_STATS_ATTR_DATASET_MAX_PLUS_ONE,
+};
+
+#define LUSTRE_STATS_ATTR_DATASET_MAX  (__LUSTRE_STATS_ATTR_DATASET_MAX_PLUS_ONE - 1)
+
+struct lustre_stats_list {
+       GENRADIX(struct lprocfs_stats *)        gfl_list;
+       unsigned int                            gfl_count;
+       unsigned int                            gfl_index;
+};
+
+unsigned int lustre_stats_scan(struct lustre_stats_list *slist, const char *filter);
+int lustre_stats_dump(struct sk_buff *msg, struct netlink_callback *cb);
+int lustre_stats_done(struct netlink_callback *cb);
+
 /* prototype for callback function on kuc groups */
 typedef int (*libcfs_kkuc_cb_t)(void *data, void *cb_arg);
 
index ad52cf4..71c0d1b 100644 (file)
 #include <linux/types.h>
 
 #define LUSTRE_GENL_NAME               "lustre"
-#define LUSTRE_GENL_VERSION            0x1
+#define LUSTRE_GENL_VERSION            0x2
 
 /*
  * enum lustre_commands                      - Supported Lustre Netlink commands
  *
  * @LUSTRE_CMD_UNSPEC:                 unspecified command to catch errors
+ *
  * @LUSTRE_CMD_DEVICES:                        command to manage the Lustre devices
+ * @LUSTRE_CMD_STATS:                  Lustre stats collection command
  */
 enum lustre_commands {
        LUSTRE_CMD_UNSPEC       = 0,
+
        LUSTRE_CMD_DEVICES      = 1,
+       LUSTRE_CMD_STATS        = 5,
 
        __LUSTRE_CMD_MAX_PLUS_ONE
 };
index f84c88f..ab24521 100644 (file)
@@ -820,8 +820,10 @@ static int ldlm_pool_debugfs_init(struct ldlm_pool *pl)
        ldlm_add_var(&pool_vars[0], pl->pl_debugfs_entry, "state", pl,
                     &lprocfs_pool_state_fops);
 
-       pl->pl_stats = lprocfs_stats_alloc(LDLM_POOL_LAST_STAT -
-                                          LDLM_POOL_FIRST_STAT, 0);
+       pl->pl_stats = ldebugfs_stats_alloc(LDLM_POOL_LAST_STAT -
+                                           LDLM_POOL_FIRST_STAT, "stats",
+                                           pl->pl_debugfs_entry,
+                                           &pl->pl_kobj, 0);
        if (!pl->pl_stats)
                GOTO(out, rc = -ENOMEM);
 
@@ -857,8 +859,6 @@ static int ldlm_pool_debugfs_init(struct ldlm_pool *pl)
        lprocfs_counter_init(pl->pl_stats, LDLM_POOL_TIMING_STAT,
                             LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_SECS,
                             "recalc_timing");
-       debugfs_create_file("stats", 0644, pl->pl_debugfs_entry,
-                           pl->pl_stats, &ldebugfs_stats_seq_fops);
 
        EXIT;
 out:
@@ -912,11 +912,11 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
                pl->pl_recalc_period = LDLM_POOL_CLI_DEF_RECALC_PERIOD;
        }
        pl->pl_client_lock_volume = 0;
-       rc = ldlm_pool_debugfs_init(pl);
+       rc = ldlm_pool_sysfs_init(pl);
        if (rc)
                RETURN(rc);
 
-       rc = ldlm_pool_sysfs_init(pl);
+       rc = ldlm_pool_debugfs_init(pl);
        if (rc)
                RETURN(rc);
 
index 7d2b3a4..790a389 100644 (file)
@@ -2566,8 +2566,22 @@ int ll_debugfs_register_super(struct super_block *sb, const char *name)
        ENTRY;
        LASSERT(sbi);
 
+       /* Yes we also register sysfs mount kset here as well */
+       sbi->ll_kset.kobj.parent = llite_kobj;
+       sbi->ll_kset.kobj.ktype = &sbi_ktype;
+       init_completion(&sbi->ll_kobj_unregister);
+       err = kobject_set_name(&sbi->ll_kset.kobj, "%s", name);
+       if (err)
+               RETURN(err);
+
+       err = kset_register(&sbi->ll_kset);
+       if (err)
+               RETURN(err);
+
+       lsi->lsi_kobj = kobject_get(&sbi->ll_kset.kobj);
+
        if (IS_ERR_OR_NULL(llite_root))
-               goto out_ll_kset;
+               RETURN(0);
 
        sbi->ll_debugfs_entry = debugfs_create_dir(name, llite_root);
        ldebugfs_add_vars(sbi->ll_debugfs_entry, lprocfs_llite_obd_vars, sb);
@@ -2586,9 +2600,11 @@ int ll_debugfs_register_super(struct super_block *sb, const char *name)
                            &ll_rw_offset_stats_fops);
 
        /* File operations stats */
-       sbi->ll_stats = lprocfs_stats_alloc(LPROC_LL_FILE_OPCODES,
-                                           LPROCFS_STATS_FLAG_NONE);
-       if (sbi->ll_stats == NULL)
+       sbi->ll_stats = ldebugfs_stats_alloc(LPROC_LL_FILE_OPCODES, "stats",
+                                            sbi->ll_debugfs_entry,
+                                            &sbi->ll_kset.kobj,
+                                            LPROCFS_STATS_FLAG_NONE);
+       if (!sbi->ll_stats)
                GOTO(out_debugfs, err = -ENOMEM);
 
        /* do counter init */
@@ -2598,9 +2614,6 @@ int ll_debugfs_register_super(struct super_block *sb, const char *name)
                                     llite_opcode_table[id].lfo_config,
                                     llite_opcode_table[id].lfo_opname);
 
-       debugfs_create_file("stats", 0644, sbi->ll_debugfs_entry,
-                           sbi->ll_stats, &ldebugfs_stats_seq_fops);
-
        sbi->ll_ra_stats = lprocfs_stats_alloc(ARRAY_SIZE(ra_stat_string),
                                               LPROCFS_STATS_FLAG_NONE);
        if (sbi->ll_ra_stats == NULL)
@@ -2621,24 +2634,7 @@ int ll_debugfs_register_super(struct super_block *sb, const char *name)
        debugfs_create_file("read_ahead_stats", 0644, sbi->ll_debugfs_entry,
                            sbi->ll_ra_stats, &ldebugfs_stats_seq_fops);
 
-out_ll_kset:
-       /* Yes we also register sysfs mount kset here as well */
-       sbi->ll_kset.kobj.parent = llite_kobj;
-       sbi->ll_kset.kobj.ktype = &sbi_ktype;
-       init_completion(&sbi->ll_kobj_unregister);
-       err = kobject_set_name(&sbi->ll_kset.kobj, "%s", name);
-       if (err)
-               GOTO(out_ra_stats, err);
-
-       err = kset_register(&sbi->ll_kset);
-       if (err)
-               GOTO(out_ra_stats, err);
-
-       lsi->lsi_kobj = kobject_get(&sbi->ll_kset.kobj);
-
        RETURN(0);
-out_ra_stats:
-       lprocfs_stats_free(&sbi->ll_ra_stats);
 out_stats:
        lprocfs_stats_free(&sbi->ll_stats);
 out_debugfs:
index fc335ef..3fe23f1 100644 (file)
@@ -1635,6 +1635,6 @@ void mdt_tunables_fini(struct mdt_device *mdt)
        tgt_tunables_fini(&mdt->mdt_lut);
        lprocfs_obd_cleanup(obd);
        lprocfs_free_md_stats(obd);
-       lprocfs_free_obd_stats(obd);
+       ldebugfs_free_obd_stats(obd);
        lprocfs_job_stats_fini(obd);
 }
index 69a9335..ca03239 100644 (file)
@@ -360,7 +360,7 @@ void lproc_mgs_cleanup(struct mgs_device *mgs)
 
        lprocfs_free_per_client_stats(obd);
        lprocfs_obd_cleanup(obd);
-       lprocfs_free_obd_stats(obd);
+       ldebugfs_free_obd_stats(obd);
        lprocfs_free_md_stats(obd);
 }
 
index 64cb167..f130d0f 100644 (file)
@@ -38,8 +38,7 @@
 
 #include <linux/file.h>
 #include <linux/glob.h>
-#include <net/genetlink.h>
-#include <net/sock.h>
+#include <linux/types.h>
 
 #include <libcfs/linux/linux-net.h>
 #include <obd_class.h>
@@ -289,8 +288,532 @@ static int lustre_device_done(struct netlink_callback *cb)
        return 0;
 }
 
+struct ln_key_list stats_params = {
+       .lkl_maxattr    = LUSTRE_PARAM_ATTR_MAX,
+       .lkl_list       = {
+               [LUSTRE_PARAM_ATTR_HDR] = {
+                       .lkp_value      = "stats",
+                       .lkp_key_format = LNKF_SEQUENCE | LNKF_MAPPING,
+                       .lkp_data_type  = NLA_NUL_STRING,
+               },
+               [LUSTRE_PARAM_ATTR_SOURCE] = {
+                       .lkp_value      = "source",
+                       .lkp_data_type  = NLA_STRING,
+               },
+       },
+};
+
+static const struct ln_key_list stats_list = {
+       .lkl_maxattr                    = LUSTRE_STATS_ATTR_MAX,
+       .lkl_list                       = {
+               [LUSTRE_STATS_ATTR_HDR] = {
+                       .lkp_value              = "stats",
+                       .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
+                       .lkp_data_type          = NLA_NUL_STRING,
+               },
+               [LUSTRE_STATS_ATTR_SOURCE]      = {
+                       .lkp_value              = "source",
+                       .lkp_data_type          = NLA_STRING,
+               },
+               [LUSTRE_STATS_ATTR_TIMESTAMP]   = {
+                       .lkp_value              = "snapshot_time",
+                       .lkp_data_type          = NLA_S64,
+               },
+               [LUSTRE_STATS_ATTR_START_TIME]  = {
+                       .lkp_value              = "start_time",
+                       .lkp_data_type          = NLA_S64,
+               },
+               [LUSTRE_STATS_ATTR_ELAPSE_TIME] = {
+                       .lkp_value              = "elapsed_time",
+                       .lkp_data_type          = NLA_S64,
+               },
+               [LUSTRE_STATS_ATTR_DATASET]     = {
+                       .lkp_key_format         = LNKF_FLOW | LNKF_MAPPING,
+                       .lkp_data_type          = NLA_NESTED,
+               },
+       },
+};
+
+static const struct ln_key_list stats_dataset_list = {
+       .lkl_maxattr                            = LUSTRE_STATS_ATTR_DATASET_MAX,
+       .lkl_list                               = {
+               [LUSTRE_STATS_ATTR_DATASET_NAME]        = {
+                       .lkp_data_type                  = NLA_NUL_STRING,
+               },
+               [LUSTRE_STATS_ATTR_DATASET_COUNT]       = {
+                       .lkp_value                      = "samples",
+                       .lkp_data_type                  = NLA_U64,
+               },
+               [LUSTRE_STATS_ATTR_DATASET_UNITS]       = {
+                       .lkp_value                      = "units",
+                       .lkp_data_type                  = NLA_STRING,
+               },
+               [LUSTRE_STATS_ATTR_DATASET_MINIMUM]     = {
+                       .lkp_value                      = "min",
+                       .lkp_data_type                  = NLA_U64,
+               },
+               [LUSTRE_STATS_ATTR_DATASET_MAXIMUM]     = {
+                       .lkp_value                      = "max",
+                       .lkp_data_type                  = NLA_U64,
+               },
+               [LUSTRE_STATS_ATTR_DATASET_SUM]         = {
+                       .lkp_value                      = "sum",
+                       .lkp_data_type                  = NLA_U64,
+               },
+               [LUSTRE_STATS_ATTR_DATASET_SUMSQUARE]   = {
+                       .lkp_value                      = "stddev",
+                       .lkp_data_type                  = NLA_U64,
+               },
+       },
+};
+
+#ifndef HAVE_GENL_DUMPIT_INFO
+static struct cfs_genl_dumpit_info service_info = {
+       .family         = &lustre_family,
+};
+#endif
+
+static inline struct lustre_stats_list *
+stats_dump_ctx(struct netlink_callback *cb)
+{
+       return (struct lustre_stats_list *)cb->args[0];
+}
+
+int lustre_stats_done(struct netlink_callback *cb)
+{
+       struct lustre_stats_list *list = stats_dump_ctx(cb);
+
+       if (list) {
+               genradix_free(&list->gfl_list);
+               OBD_FREE(list, sizeof(*list));
+               cb->args[0] = 0;
+       }
+       return 0;
+}
+EXPORT_SYMBOL(lustre_stats_done);
+
+/* Min size for key table and its matching values:
+ *     header          strlen("stats")
+ *     source          strlen("source") + MAX_OBD_NAME * 2
+ *     timestamp       strlen("snapshot_time") + s64
+ *     start time      strlen("start time") + s64
+ *     elapsed_time    strlen("elapse time") + s64
+ */
+#define STATS_MSG_MIN_SIZE     (267 + 58)
+
+/* key table + values for each dataset entry:
+ *     dataset name    25
+ *     dataset count   strlen("samples") + u64
+ *     dataset units   strlen("units") + 5
+ *     dataset min     strlen("min") + u64
+ *     dataset max     strlen("max") + u64
+ *     dataset sum     strlen("sum") + u64
+ *     dataset stdev   strlen("stddev") + u64
+ */
+#define STATS_MSG_DATASET_SIZE (97)
+
+static int lustre_stats_start(struct netlink_callback *cb)
+{
+       struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
+       unsigned long len = STATS_MSG_MIN_SIZE;
+#ifdef HAVE_NL_PARSE_WITH_EXT_ACK
+       struct netlink_ext_ack *extack = NULL;
+#endif
+       struct lustre_stats_list *slist;
+       int msg_len = genlmsg_len(gnlh);
+       int rc = 0;
+
+#ifdef HAVE_NL_DUMP_WITH_EXT_ACK
+       extack = cb->extack;
+#endif
+#ifndef HAVE_GENL_DUMPIT_INFO
+       cb->args[1] = (unsigned long)&service_info;
+#endif
+       OBD_ALLOC(slist, sizeof(*slist));
+       if (!slist) {
+               NL_SET_ERR_MSG(extack, "failed to setup obd list");
+               return -ENOMEM;
+       }
+
+       genradix_init(&slist->gfl_list);
+       slist->gfl_index = 0;
+       cb->args[0] = (long)slist;
+
+       if (msg_len > 0) {
+               struct nlattr *params = genlmsg_data(gnlh);
+               struct nlattr *dev;
+               int rem;
+
+               if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
+                       NL_SET_ERR_MSG(extack, "no configuration");
+                       GOTO(report_err, rc);
+               }
+
+               nla_for_each_nested(dev, params, rem) {
+                       struct nlattr *item;
+                       int rem2;
+
+                       nla_for_each_nested(item, dev, rem2) {
+                               char filter[MAX_OBD_NAME * 2];
+
+                               if (nla_type(item) != LN_SCALAR_ATTR_VALUE ||
+                                   nla_strcmp(item, "source") != 0)
+                                       continue;
+
+                               item = nla_next(item, &rem2);
+                               if (nla_type(item) != LN_SCALAR_ATTR_VALUE) {
+                                       NL_SET_ERR_MSG(extack,
+                                                      "source has invalid value");
+                                       GOTO(report_err, rc = -EINVAL);
+                               }
+
+                               memset(filter, 0, sizeof(filter));
+                               rc = nla_strscpy(filter, item, sizeof(filter));
+                               if (rc < 0) {
+                                       NL_SET_ERR_MSG(extack,
+                                                      "source key string is invalud");
+                                       GOTO(report_err, rc);
+                               }
+
+                               rc = lustre_stats_scan(slist, filter);
+                               if (rc < 0) {
+                                       NL_SET_ERR_MSG(extack,
+                                                      "stat scan failure");
+                                       GOTO(report_err, rc);
+                               }
+
+                               if (gnlh->version)
+                                       len += STATS_MSG_DATASET_SIZE * rc;
+                               rc = 0;
+                       }
+               }
+       } else {
+               rc = lustre_stats_scan(slist, NULL);
+               if (rc < 0) {
+                       NL_SET_ERR_MSG(extack, "stat scan failure");
+                       GOTO(report_err, rc);
+               }
+
+               if (gnlh->version)
+                       len += STATS_MSG_DATASET_SIZE * rc;
+               rc = 0;
+       }
+
+       /* Older kernels only support 64K. Our stats can be huge. */
+       if (len >= (1UL << (sizeof(cb->min_dump_alloc) << 3))) {
+               struct lprocfs_stats **stats;
+               struct genradix_iter iter;
+
+               genradix_for_each(&slist->gfl_list, iter, stats)
+                       lprocfs_stats_free(stats);
+               NL_SET_ERR_MSG(extack, "Netlink msg is too large");
+               rc = -EMSGSIZE;
+       } else {
+               cb->min_dump_alloc = len;
+       }
+report_err:
+       if (rc < 0)
+               lustre_stats_done(cb);
+
+       return rc;
+}
+
+int lustre_stats_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+       const struct cfs_genl_dumpit_info *info = lnet_genl_dumpit_info(cb);
+       struct lustre_stats_list *slist = stats_dump_ctx(cb);
+       struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
+#ifdef HAVE_NL_PARSE_WITH_EXT_ACK
+       struct netlink_ext_ack *extack = NULL;
+#endif
+       int portid = NETLINK_CB(cb->skb).portid;
+       struct lprocfs_stats *prev = NULL;
+       int seq = cb->nlh->nlmsg_seq;
+       int idx = slist->gfl_index;
+       int count, i, rc = 0;
+       bool started = true;
+
+#ifdef HAVE_NL_DUMP_WITH_EXT_ACK
+       extack = cb->extack;
+#endif
+       while (idx < slist->gfl_count) {
+               struct lprocfs_stats **tmp, *stats;
+               struct nlattr *dataset = NULL;
+               void *ghdr = NULL;
+               char *src;
+
+               tmp = genradix_ptr(&slist->gfl_list, idx);
+               stats = tmp[0];
+               if (!stats)
+                       continue;
+
+               if (gnlh->version &&
+                  (!idx || (prev && strcmp(stats->ls_cnt_header[2].lc_name,
+                                           prev->ls_cnt_header[2].lc_name) != 0))) {
+                       size_t len = sizeof(struct ln_key_list);
+                       int flags = NLM_F_CREATE | NLM_F_MULTI;
+                       const struct ln_key_list **all;
+                       struct ln_key_list *start;
+
+                       /* LUSTRE_STATS_ATTR_MAX includes one stat entry
+                        * by default since we need to define what a stat
+                        * entry is.
+                        */
+                       count = LUSTRE_STATS_ATTR_MAX + stats->ls_num - 1;
+                       len += sizeof(struct ln_key_props) * count;
+                       OBD_ALLOC(start, len);
+                       if (!start) {
+                               NL_SET_ERR_MSG(extack,
+                                              "first key list allocation failure");
+                               GOTO(out_cancel, rc = -ENOMEM);
+                       }
+                       *start = stats_list; /* Set initial values */
+                       start->lkl_maxattr += stats->ls_num;
+                       for (i = LUSTRE_STATS_ATTR_MAX + 1;
+                            i <= start->lkl_maxattr; i++)
+                               start->lkl_list[i] =
+                                       stats_list.lkl_list[LUSTRE_STATS_ATTR_DATASET];
+
+                       OBD_ALLOC_PTR_ARRAY(all, stats->ls_num + 2);
+                       if (!all) {
+                               NL_SET_ERR_MSG(extack,
+                                              "key list allocation failure");
+                               OBD_FREE(start, len);
+                               GOTO(out_cancel, rc = -ENOMEM);
+                       }
+
+                       all[0] = start;
+                       for (i = 1; i <= stats->ls_num; i++)
+                               all[i] = &stats_dataset_list;
+                       all[i] = NULL;
+
+                       if (idx)
+                               flags |= NLM_F_REPLACE;
+                       rc = lnet_genl_send_scalar_list(msg, portid, seq,
+                                                       info->family, flags,
+                                                       gnlh->cmd, all);
+                       OBD_FREE_PTR_ARRAY(all, stats->ls_num + 2);
+                       OBD_FREE(start, len);
+                       if (rc < 0) {
+                               NL_SET_ERR_MSG(extack,
+                                              "failed to send key table");
+                               GOTO(out_cancel, rc);
+                       }
+               } else if (!gnlh->version && !idx) {
+                       /* We just want the source of the stats */
+                       const struct ln_key_list *all[] = {
+                               &stats_params, NULL
+                       };
+
+                       rc = lnet_genl_send_scalar_list(msg, portid, seq,
+                                                       info->family,
+                                                       NLM_F_CREATE | NLM_F_MULTI,
+                                                       gnlh->cmd, all);
+                       if (rc < 0) {
+                               NL_SET_ERR_MSG(extack,
+                                              "failed to send key table");
+                               GOTO(out_cancel, rc);
+                       }
+               }
+               prev = stats;
+
+               ghdr = genlmsg_put(msg, portid, seq, info->family, NLM_F_MULTI,
+                                 gnlh->cmd);
+               if (!ghdr)
+                       GOTO(out_cancel, rc = -EMSGSIZE);
+
+               if (started) {
+                       nla_put_string(msg, LUSTRE_STATS_ATTR_HDR, "");
+                       started = false;
+               }
+
+               src = stats->ls_source;
+               if (strstarts(stats->ls_source, ".fs.lustre."))
+                       src += strlen(".fs.lustre.");
+               nla_put_string(msg, LUSTRE_STATS_ATTR_SOURCE, src);
+
+               if (!gnlh->version) { /* We just want the source of the stats */
+                       idx++;
+                       GOTO(out_cancel, rc = 0);
+               }
+
+               rc = nla_put_s64(msg, LUSTRE_STATS_ATTR_TIMESTAMP,
+                                ktime_get_real_ns(), LUSTRE_STATS_ATTR_PAD);
+               if (rc < 0)
+                       GOTO(out_cancel, rc);
+
+               if (gnlh->version > 1) {
+                       rc = nla_put_s64(msg, LUSTRE_STATS_ATTR_START_TIME,
+                                        ktime_to_ns(stats->ls_init),
+                                        LUSTRE_STATS_ATTR_PAD);
+                       if (rc < 0)
+                               GOTO(out_cancel, rc);
+
+                       rc = nla_put_s64(msg, LUSTRE_STATS_ATTR_ELAPSE_TIME,
+                                        ktime_to_ns(ktime_sub(stats->ls_init,
+                                                              ktime_get())),
+                                        LUSTRE_STATS_ATTR_PAD);
+                       if (rc < 0)
+                               GOTO(out_cancel, rc);
+               }
+
+               i = 0;
+               for (count = 0; count < stats->ls_num; count++) {
+                       struct lprocfs_counter_header *hdr;
+                       struct lprocfs_counter ctr;
+                       struct nlattr *stat_attr;
+
+                       lprocfs_stats_collect(stats, count, &ctr);
+
+                       if (ctr.lc_count == 0)
+                               continue;
+
+                       hdr = &stats->ls_cnt_header[count];
+                       dataset = nla_nest_start(msg,
+                                                LUSTRE_STATS_ATTR_DATASET + i++);
+                       stat_attr = nla_nest_start(msg, 0);
+
+                       nla_put_string(msg, LUSTRE_STATS_ATTR_DATASET_NAME,
+                                      hdr->lc_name);
+                       nla_put_u64_64bit(msg, LUSTRE_STATS_ATTR_DATASET_COUNT,
+                                         ctr.lc_count,
+                                         LUSTRE_STATS_ATTR_DATASET_PAD);
+
+                       nla_put_string(msg, LUSTRE_STATS_ATTR_DATASET_UNITS,
+                                      hdr->lc_units);
+
+                       if (hdr->lc_config & LPROCFS_CNTR_AVGMINMAX) {
+                               nla_put_u64_64bit(msg,
+                                                 LUSTRE_STATS_ATTR_DATASET_MINIMUM,
+                                                 ctr.lc_min,
+                                                 LUSTRE_STATS_ATTR_DATASET_PAD);
+
+                               nla_put_u64_64bit(msg,
+                                                 LUSTRE_STATS_ATTR_DATASET_MAXIMUM,
+                                                 ctr.lc_max,
+                                                 LUSTRE_STATS_ATTR_DATASET_PAD);
+
+                               nla_put_u64_64bit(msg,
+                                                 LUSTRE_STATS_ATTR_DATASET_SUM,
+                                                 ctr.lc_sum,
+                                                 LUSTRE_STATS_ATTR_DATASET_PAD);
+
+                               if (hdr->lc_config & LPROCFS_CNTR_STDDEV) {
+                                       nla_put_u64_64bit(msg,
+                                                         LUSTRE_STATS_ATTR_DATASET_SUMSQUARE,
+                                                         ctr.lc_sumsquare,
+                                                         LUSTRE_STATS_ATTR_DATASET_PAD);
+                               }
+                       }
+                       nla_nest_end(msg, stat_attr);
+                       nla_nest_end(msg, dataset);
+               }
+               idx++;
+out_cancel:
+               lprocfs_stats_free(&stats);
+               if (rc < 0) {
+                       genlmsg_cancel(msg, ghdr);
+                       return rc;
+               }
+               genlmsg_end(msg, ghdr);
+       }
+       slist->gfl_index = idx;
+
+       return rc;
+}
+EXPORT_SYMBOL(lustre_stats_dump);
+
+#ifndef HAVE_NETLINK_CALLBACK_START
+int lustre_old_stats_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+       struct lustre_stats_list *slist = stats_dump_ctx(cb);
+
+       if (!slist) {
+               int rc = lustre_stats_start(cb);
+
+               if (rc < 0)
+                       return lnet_nl_send_error(cb->skb,
+                                                 NETLINK_CB(cb->skb).portid,
+                                                 cb->nlh->nlmsg_seq,
+                                                 rc);
+       }
+
+       return lustre_stats_dump(msg, cb);
+}
+#endif
+
+static int lustre_stats_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+       struct nlmsghdr *nlh = nlmsg_hdr(skb);
+       struct genlmsghdr *gnlh = nlmsg_data(nlh);
+       struct nlattr *params = genlmsg_data(gnlh);
+       int msg_len, rem, idx = 0, rc = 0;
+       struct lustre_stats_list slist;
+       struct nlattr *attr;
+
+       msg_len = genlmsg_len(gnlh);
+       if (!msg_len) {
+               GENL_SET_ERR_MSG(info, "no configuration");
+               GOTO(report_err, rc = -ENOMSG);
+       }
+
+       if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
+               GENL_SET_ERR_MSG(info, "invalid configuration");
+               GOTO(report_err, rc = -EINVAL);
+       }
+
+       genradix_init(&slist.gfl_list);
+       slist.gfl_count = 0;
+
+       nla_for_each_nested(attr, params, rem) {
+               struct nlattr *prop;
+               int rem2;
+
+               if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
+                       continue;
+
+               nla_for_each_nested(prop, attr, rem2) {
+                       char source[MAX_OBD_NAME * 2];
+
+                       if (nla_type(prop) != LN_SCALAR_ATTR_VALUE ||
+                           nla_strcmp(prop, "source") != 0)
+                               continue;
+
+                       prop = nla_next(prop, &rem2);
+                       if (nla_type(prop) != LN_SCALAR_ATTR_VALUE)
+                               GOTO(report_err, rc = -EINVAL);
+
+                       memset(source, 0, sizeof(source));
+                       rc = nla_strscpy(source, prop, sizeof(source));
+                       if (rc < 0)
+                               GOTO(report_err, rc);
+
+                       rc = lustre_stats_scan(&slist, source);
+                       if (rc < 0) {
+                               GENL_SET_ERR_MSG(info,
+                                                "stat scan failure");
+                               GOTO(report_err, rc);
+                       }
+                       rc = 0;
+               }
+       }
+
+       while (idx < slist.gfl_count) {
+               struct lprocfs_stats **stats;
+
+               stats = genradix_ptr(&slist.gfl_list, idx++);
+               if (!stats[0])
+                       continue;
+
+               lprocfs_stats_clear(stats[0]);
+       }
+report_err:
+       return rc;
+}
+
 static const struct genl_multicast_group lustre_mcast_grps[] = {
        { .name         = "devices",            },
+       { .name         = "stats",              },
 };
 
 static const struct genl_ops lustre_genl_ops[] = {
@@ -304,6 +827,17 @@ static const struct genl_ops lustre_genl_ops[] = {
 #endif
                .done           = lustre_device_done,
        },
+       {
+               .cmd            = LUSTRE_CMD_STATS,
+#ifdef HAVE_NETLINK_CALLBACK_START
+               .start          = lustre_stats_start,
+               .dumpit         = lustre_stats_dump,
+#else
+               .dumpit         = lustre_old_stats_dump,
+#endif
+               .done           = lustre_stats_done,
+               .doit           = lustre_stats_cmd,
+       },
 };
 
 static struct genl_family lustre_family = {
index 9f35021..5a22fc1 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_CLASS
 
+#include <linux/glob.h>
 #include <obd_class.h>
 #include <lprocfs_status.h>
+#include <lustre_kernelcomm.h>
 
 #ifdef CONFIG_PROC_FS
 
@@ -1262,6 +1264,9 @@ struct lprocfs_stats *lprocfs_stats_alloc(unsigned int num,
        stats->ls_flags = flags;
        stats->ls_init = ktime_get_real();
        spin_lock_init(&stats->ls_lock);
+       kref_init(&stats->ls_refcount);
+       stats->ls_source = NULL;
+       stats->ls_index = -1;
 
        /* alloc num of counter headers */
        CFS_ALLOC_PTR_ARRAY(stats->ls_cnt_header, stats->ls_num);
@@ -1285,16 +1290,63 @@ fail:
 }
 EXPORT_SYMBOL(lprocfs_stats_alloc);
 
-void lprocfs_stats_free(struct lprocfs_stats **statsh)
+/* stats_list is a mirror of those parts of debugfs which contain lustre
+ * statistics. It is used to provide netlink access to those statistics.
+ * Any lustre module and register or deregister a set of statistics.
+ */
+static atomic_t lstats_count = ATOMIC_INIT(0);
+static DEFINE_XARRAY_ALLOC(lstats_list);
+
+struct lprocfs_stats *ldebugfs_stats_alloc(int num, char *name,
+                                          struct dentry *debugfs_entry,
+                                          struct kobject *kobj,
+                                          enum lprocfs_stats_flags flags)
 {
-       struct lprocfs_stats *stats = *statsh;
+       struct lprocfs_stats *stats = lprocfs_stats_alloc(num, flags);
+       char *param;
+       int rc;
+
+       if (!stats)
+               return NULL;
+
+       xa_lock(&lstats_list);
+       stats->ls_index = atomic_read(&lstats_count);
+       rc = __xa_alloc(&lstats_list, &stats->ls_index, stats, xa_limit_31b,
+                       GFP_KERNEL);
+       if (rc < 0) {
+               xa_unlock(&lstats_list);
+               lprocfs_stats_free(&stats);
+               return NULL;
+       }
+       atomic_inc(&lstats_count);
+       xa_unlock(&lstats_list);
+
+       stats->ls_source = kobject_get_path(kobj, GFP_KERNEL);
+       if (!stats->ls_source) {
+               lprocfs_stats_free(&stats);
+               return NULL;
+       }
+
+       param = stats->ls_source;
+       while ((param = strchr(param, '/')) != NULL)
+               *param = '.';
+
+       debugfs_create_file(name, 0644, debugfs_entry, stats,
+                           &ldebugfs_stats_seq_fops);
+       return stats;
+}
+EXPORT_SYMBOL(ldebugfs_stats_alloc);
+
+static void stats_free(struct kref *kref)
+{
+       struct lprocfs_stats *stats = container_of(kref, struct lprocfs_stats,
+                                                  ls_refcount);
        unsigned int num_entry;
        unsigned int percpusize;
        unsigned int i;
 
        if (!stats || stats->ls_num == 0)
                return;
-       *statsh = NULL;
 
        if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
                num_entry = 1;
@@ -1313,10 +1365,111 @@ void lprocfs_stats_free(struct lprocfs_stats **statsh)
                CFS_FREE_PTR_ARRAY(stats->ls_cnt_header, stats->ls_num);
        }
 
+       if (stats->ls_index != -1) {
+               xa_lock(&lstats_list);
+               __xa_erase(&lstats_list, stats->ls_index);
+               atomic_dec(&lstats_count);
+               xa_unlock(&lstats_list);
+       }
+
+       kfree(stats->ls_source); /* allocated by kobject_get_path */
+
        LIBCFS_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_entry]));
 }
+
+void lprocfs_stats_free(struct lprocfs_stats **statsh)
+{
+       struct lprocfs_stats *stats = *statsh;
+
+       if (!stats)
+               return;
+
+       if (kref_put(&stats->ls_refcount, stats_free))
+               *statsh = NULL;
+}
 EXPORT_SYMBOL(lprocfs_stats_free);
 
+unsigned int lustre_stats_scan(struct lustre_stats_list *slist, const char *source)
+{
+       struct lprocfs_stats *item, **stats;
+       unsigned int cnt = 0, snum;
+       const char *tmp = source;
+       unsigned long idx = 0;
+
+       if (source)
+               for (snum = 0; tmp[snum]; tmp[snum] == '.' ? snum++ : *tmp++);
+
+       xa_for_each(&lstats_list, idx, item) {
+               if (!kref_get_unless_zero(&item->ls_refcount))
+                       continue;
+
+               if (strlen(item->ls_source) == 0) {
+                       lprocfs_stats_free(&item);
+                       continue;
+               }
+
+               if (source) {
+                       char filter[PATH_MAX / 8], *src = item->ls_source;
+                       unsigned int num;
+
+                       if (strstarts(src, ".fs.lustre."))
+                               src += strlen(".fs.lustre.");
+
+                       /* glob_match() has a hard time telling *.* from *.*.*
+                        * from *.*.* so we need to compare the number of '.'
+                        * and filter on that as well. This actually avoids
+                        * the overhead of calling glob_match() every time.
+                        */
+                       tmp = src;
+                       for (num = 0; tmp[num]; tmp[num] == '.' ? num++ : *tmp++);
+                       if (snum != num) {
+                               lprocfs_stats_free(&item);
+                               continue;
+                       }
+
+                       /* glob_match() does not like *.--- patterns so
+                        * we have to do special handling in this case.
+                        * Replace '*.' with obd_type names.
+                        */
+                       if (strstarts(source, "*.")) {
+                               char *start = strchr(src, '.');
+                               int len;
+
+                               /* If start is NULL this means its a top
+                                * level stats. We are looking for "*."
+                                * which is one level down. Let's skip it.
+                                */
+                               if (!start) {
+                                       lprocfs_stats_free(&item);
+                                       continue;
+                               }
+
+                               /* We know src -> start is the obd_type */
+                               len = start - src;
+                               snprintf(filter, sizeof(filter), "%.*s%s",
+                                        len, src, source + 1);
+                               filter[strlen(filter) - 1] = '\0';
+                       } else {
+                               strscpy(filter, source, strlen(source) + 1);
+                       }
+                       if (!glob_match(filter, src)) {
+                               lprocfs_stats_free(&item);
+                               continue;
+                       }
+               }
+               stats = genradix_ptr_alloc(&slist->gfl_list, slist->gfl_count++,
+                                          GFP_ATOMIC);
+               if (!stats) {
+                       lprocfs_stats_free(&item);
+                       return -ENOMEM;
+               }
+               *stats = item;
+               cnt += item->ls_num;
+       }
+
+       return slist->gfl_count ? cnt : -ENOENT;
+}
+
 u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx,
                            enum lprocfs_fields_flags field)
 {
index ae88ccd..6cb68dc 100644 (file)
@@ -704,34 +704,22 @@ int lprocfs_exp_cleanup(struct obd_export *exp)
        return 0;
 }
 
-int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned int num_stats)
+int ldebugfs_alloc_obd_stats(struct obd_device *obd, unsigned int num_stats)
 {
-       struct lprocfs_stats *stats;
-       int rc;
-
-       LASSERT(obd->obd_stats == NULL);
-       LASSERT(obd->obd_proc_entry != NULL);
-
-       stats = lprocfs_stats_alloc(num_stats, 0);
-       if (stats == NULL)
-               return -ENOMEM;
-
-       rc = lprocfs_stats_register(obd->obd_proc_entry, "stats", stats);
-       if (rc < 0)
-               lprocfs_stats_free(&stats);
-       else
-               obd->obd_stats = stats;
-
-       return rc;
+       LASSERT(!obd->obd_stats);
+       obd->obd_stats = ldebugfs_stats_alloc(num_stats, "stats",
+                                             obd->obd_debugfs_entry,
+                                             &obd->obd_type->typ_kobj, 0);
+       return obd->obd_stats ? 0 : -ENOMEM;
 }
-EXPORT_SYMBOL(lprocfs_alloc_obd_stats);
+EXPORT_SYMBOL(ldebugfs_alloc_obd_stats);
 
-void lprocfs_free_obd_stats(struct obd_device *obd)
+void ldebugfs_free_obd_stats(struct obd_device *obd)
 {
        if (obd->obd_stats)
                lprocfs_stats_free(&obd->obd_stats);
 }
-EXPORT_SYMBOL(lprocfs_free_obd_stats);
+EXPORT_SYMBOL(ldebugfs_free_obd_stats);
 
 static void display_brw_stats(struct seq_file *seq, const char *name,
                              const char *units, struct obd_hist_pcpu *read,
@@ -866,9 +854,8 @@ void lprocfs_fini_brw_stats(struct brw_stats *brw_stats)
 }
 EXPORT_SYMBOL(lprocfs_fini_brw_stats);
 
-void ldebugfs_register_osd_stats(struct dentry *parent,
-                                struct brw_stats *brw_stats,
-                                struct lprocfs_stats *stats)
+void ldebugfs_register_brw_stats(struct dentry *parent,
+                                struct brw_stats *brw_stats)
 {
        int i;
 
@@ -889,12 +876,8 @@ void ldebugfs_register_osd_stats(struct dentry *parent,
 
        debugfs_create_file("brw_stats", 0644, parent, brw_stats,
                            &brw_stats_fops);
-
-       if (stats)
-               debugfs_create_file("stats", 0644, parent, stats,
-                                   &ldebugfs_stats_seq_fops);
 }
-EXPORT_SYMBOL(ldebugfs_register_osd_stats);
+EXPORT_SYMBOL(ldebugfs_register_brw_stats);
 
 int lprocfs_hash_seq_show(struct seq_file *m, void *data)
 {
index 6243709..88b802a 100644 (file)
@@ -778,7 +778,7 @@ static int echo_srv_init0(const struct lu_env *env,
 
        obd->obd_vars = lprocfs_echo_obd_vars;
        if (!lprocfs_obd_setup(obd, true) &&
-           lprocfs_alloc_obd_stats(obd, LPROC_ECHO_LAST) == 0) {
+           ldebugfs_alloc_obd_stats(obd, LPROC_ECHO_LAST) == 0) {
                lprocfs_counter_init(obd->obd_stats, LPROC_ECHO_READ_BYTES,
                                     LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
                                     "read_bytes");
@@ -807,7 +807,7 @@ err_out:
        obd->obd_namespace = NULL;
 
        lprocfs_obd_cleanup(obd);
-       lprocfs_free_obd_stats(obd);
+       ldebugfs_free_obd_stats(obd);
        RETURN(rc);
 }
 
@@ -845,7 +845,7 @@ static void echo_srv_fini(const struct lu_env *env,
        }
 
        lprocfs_obd_cleanup(obd);
-       lprocfs_free_obd_stats(obd);
+       ldebugfs_free_obd_stats(obd);
 
        leaked = atomic_read(&obd2echo(obd)->eo_prep);
        if (leaked != 0)
index b0c4210..91e7094 100644 (file)
@@ -1082,7 +1082,7 @@ int ofd_tunables_init(struct ofd_device *ofd)
                GOTO(obd_cleanup, rc);
        }
 
-       rc = lprocfs_alloc_obd_stats(obd, LPROC_OFD_STATS_LAST);
+       rc = ldebugfs_alloc_obd_stats(obd, LPROC_OFD_STATS_LAST);
        if (rc) {
                CERROR("%s: lprocfs_alloc_obd_stats failed: %d.\n",
                       obd->obd_name, rc);
@@ -1123,7 +1123,7 @@ int ofd_tunables_init(struct ofd_device *ofd)
        RETURN(0);
 
 obd_free_stats:
-       lprocfs_free_obd_stats(obd);
+       ldebugfs_free_obd_stats(obd);
 tgt_cleanup:
        tgt_tunables_fini(&ofd->ofd_lut);
 obd_cleanup:
index 1ed08f2..2ab4ead 100644 (file)
@@ -721,7 +721,7 @@ static void ofd_procfs_fini(struct ofd_device *ofd)
        tgt_tunables_fini(&ofd->ofd_lut);
        lprocfs_free_per_client_stats(obd);
        lprocfs_obd_cleanup(obd);
-       lprocfs_free_obd_stats(obd);
+       ldebugfs_free_obd_stats(obd);
        lprocfs_job_stats_fini(obd);
 }
 
index 70166a2..4d3e9c7 100644 (file)
@@ -81,7 +81,9 @@ static int osd_stats_init(struct osd_device *osd)
        int result = -ENOMEM;
 
        ENTRY;
-       osd->od_stats = lprocfs_stats_alloc(LPROC_OSD_LAST, 0);
+       osd->od_stats = ldebugfs_stats_alloc(LPROC_OSD_LAST, "stats",
+                                            osd->od_dt_dev.dd_debugfs_entry,
+                                            &osd->od_dt_dev.dd_kobj, 0);
        if (osd->od_stats) {
                lprocfs_counter_init(osd->od_stats, LPROC_OSD_GET_PAGE,
                                     LPROCFS_TYPE_LATENCY, "get_page");
@@ -114,8 +116,8 @@ static int osd_stats_init(struct osd_device *osd)
                result = 0;
        }
 
-       ldebugfs_register_osd_stats(osd->od_dt_dev.dd_debugfs_entry,
-                                   &osd->od_brw_stats, osd->od_stats);
+       ldebugfs_register_brw_stats(osd->od_dt_dev.dd_debugfs_entry,
+                                   &osd->od_brw_stats);
 
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 17, 53, 0)
        osd_symlink_brw_stats(osd);
index 58c6383..37b091d 100644 (file)
@@ -69,7 +69,9 @@ static int osd_stats_init(struct osd_device *osd)
        int result = -ENOMEM;
 
        ENTRY;
-       osd->od_stats = lprocfs_stats_alloc(LPROC_OSD_LAST, 0);
+       osd->od_stats = ldebugfs_stats_alloc(LPROC_OSD_LAST, "stats",
+                                            osd->od_dt_dev.dd_debugfs_entry,
+                                            &osd->od_dt_dev.dd_kobj, 0);
        if (osd->od_stats) {
                lprocfs_counter_init(osd->od_stats, LPROC_OSD_GET_PAGE,
                                LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV |
@@ -109,8 +111,8 @@ static int osd_stats_init(struct osd_device *osd)
                result = 0;
        }
 
-       ldebugfs_register_osd_stats(osd->od_dt_dev.dd_debugfs_entry,
-                                   &osd->od_brw_stats, osd->od_stats);
+       ldebugfs_register_brw_stats(osd->od_dt_dev.dd_debugfs_entry,
+                                   &osd->od_brw_stats);
 
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 17, 53, 0)
        osd_symlink_brw_stats(osd);
index 54b0e8d..62f1a30 100644 (file)
@@ -205,6 +205,7 @@ static const char *ll_eopcode2str(__u32 opcode)
 static void
 ptlrpc_ldebugfs_register(struct dentry *root, char *dir, char *name,
                         struct dentry **debugfs_root_ret,
+                        struct kobject *kobj,
                         struct lprocfs_stats **stats_ret)
 {
        struct dentry *svc_debugfs_entry;
@@ -216,16 +217,16 @@ ptlrpc_ldebugfs_register(struct dentry *root, char *dir, char *name,
        LASSERT(!*debugfs_root_ret);
        LASSERT(!*stats_ret);
 
-       svc_stats = lprocfs_stats_alloc(EXTRA_MAX_OPCODES + LUSTRE_MAX_OPCODES,
-                                       0);
-       if (!svc_stats)
-               return;
-
        if (dir)
                svc_debugfs_entry = debugfs_create_dir(dir, root);
        else
                svc_debugfs_entry = root;
 
+       svc_stats = ldebugfs_stats_alloc(EXTRA_MAX_OPCODES + LUSTRE_MAX_OPCODES,
+                                        name, svc_debugfs_entry, kobj, 0);
+       if (!svc_stats)
+               return;
+
        lprocfs_counter_init(svc_stats, PTLRPC_REQWAIT_CNTR,
                             config | LPROCFS_TYPE_USECS, "req_waittime");
        lprocfs_counter_init(svc_stats, PTLRPC_REQQDEPTH_CNTR,
@@ -256,9 +257,6 @@ ptlrpc_ldebugfs_register(struct dentry *root, char *dir, char *name,
                                     ll_opcode2str(opcode));
        }
 
-       debugfs_create_file(name, 0644, svc_debugfs_entry, svc_stats,
-                           &ldebugfs_stats_seq_fops);
-
        if (dir)
                *debugfs_root_ret = svc_debugfs_entry;
        *stats_ret = svc_stats;
@@ -1243,7 +1241,8 @@ void ptlrpc_ldebugfs_register_service(struct dentry *entry,
        };
 
        ptlrpc_ldebugfs_register(entry, svc->srv_name, "stats",
-                                &svc->srv_debugfs_entry, &svc->srv_stats);
+                                &svc->srv_debugfs_entry,
+                                &svc->srv_kobj, &svc->srv_stats);
        if (!svc->srv_debugfs_entry)
                return;
 
@@ -1257,6 +1256,7 @@ void ptlrpc_lprocfs_register_obd(struct obd_device *obd)
 {
        ptlrpc_ldebugfs_register(obd->obd_debugfs_entry, NULL, "stats",
                                 &obd->obd_svc_debugfs_entry,
+                                &obd->obd_kset.kobj,
                                 &obd->obd_svc_stats);
 }
 EXPORT_SYMBOL(ptlrpc_lprocfs_register_obd);
index 49b0773..1153ca2 100644 (file)
@@ -514,8 +514,137 @@ static int print_out_devices(yaml_parser_t *reply, enum lctl_param_flags flags)
        return rc;
 }
 
+static int print_out_stats(yaml_parser_t *reply, int version, int flags)
+{
+       bool show_path = flags & PARAM_FLAGS_SHOW_SOURCE;
+       char buf[64 * 1024], *tmp = NULL;
+       yaml_event_t event;
+       bool done = false;
+       int index = 0;
+       int rc;
+
+       bzero(buf, sizeof(buf));
+       tmp = buf;
+
+       while (!done) {
+               rc = yaml_parser_parse(reply, &event);
+               if (rc == 0)
+                       break;
+
+               if (event.type == YAML_MAPPING_END_EVENT) {
+                       size_t len = strlen(buf);
+
+                       if (len > 0) {
+                               /* eat last white space */
+                               buf[len - 1] = '\0';
+                               printf("%s\n",  buf);
+                       }
+                       bzero(buf, sizeof(buf));
+                       tmp = buf;
+                       index = 0;
+               }
+
+               if (event.type == YAML_SEQUENCE_START_EVENT) {
+                       bzero(buf, sizeof(buf));
+                       tmp = buf;
+                       index = 0;
+               }
+
+               if (event.type == YAML_SCALAR_EVENT) {
+                       char *value = (char *)event.data.scalar.value;
+                       int64_t num;
+
+                       if (strcmp(value, "snapshot_time") == 0) {
+                               yaml_event_delete(&event);
+                               rc = yaml_parser_parse(reply, &event);
+                               if (rc == 0)
+                                       break;
+
+                               value = (char *)event.data.scalar.value;
+                               num = strtoll(value, NULL, 10);
+                               fprintf(stdout, "%-25s %lu.%09lu secs.nsecs\n",
+                                       "snapshot_time", num / 1000000000L,
+                                       num % 1000000000L);
+                       } else if (strcmp(value, "start_time") == 0) {
+                               yaml_event_delete(&event);
+                               rc = yaml_parser_parse(reply, &event);
+                               if (rc == 0)
+                                       break;
+
+                               value = (char *)event.data.scalar.value;
+                               num = strtoll(value, NULL, 10);
+                               fprintf(stdout, "%-25s %lu.%09lu secs.nsecs\n",
+                                       "start_time", num / 1000000000L,
+                                       num % 1000000000L);
+                       } else if (strcmp(value, "elapsed_time") == 0) {
+                               yaml_event_delete(&event);
+                               rc = yaml_parser_parse(reply, &event);
+                               if (rc == 0)
+                                       break;
+
+                               value = (char *)event.data.scalar.value;
+                               num = strtoll(value, NULL, 10);
+                               fprintf(stdout, "%-25s %lu.%09lu secs.nsecs\n",
+                                       "elapsed_time", num / 1000000000L,
+                                       num % 1000000000L);
+                       } else if (strcmp(value, "source") == 0) {
+                               yaml_event_delete(&event);
+                               rc = yaml_parser_parse(reply, &event);
+                               if (rc == 0)
+                                       break;
+
+                               if (show_path) {
+                                       value = (char *)event.data.scalar.value;
+                                       if (version) {
+                                               fprintf(stdout, "%s.stats=\n",
+                                                       value);
+                                       } else {
+                                               fprintf(stdout, "%s.stats\n",
+                                                       value);
+                                       }
+                               }
+                       } else if (strcmp(value, "samples") == 0) {
+                               size_t len;
+
+                               yaml_event_delete(&event);
+                               rc = yaml_parser_parse(reply, &event);
+                               if (rc == 0)
+                                       break;
+
+                               value = (char *)event.data.scalar.value;
+                               len = sprintf(tmp, "%s samples", value);
+                               tmp += len;
+                       } else {
+                               size_t len;
+
+                               if (tmp != buf) {
+                                       yaml_event_delete(&event);
+                                       rc = yaml_parser_parse(reply, &event);
+                                       if (rc == 0)
+                                               break;
+                               }
+
+                               value = (char *)event.data.scalar.value;
+                               if (tmp == buf)
+                                       len = sprintf(tmp, "%-26s", value);
+                               else if (index == 1)
+                                       len = sprintf(tmp, " [%s]", value);
+                               else
+                                       len = sprintf(tmp, " %s", value);
+                               tmp += len;
+                               index++;
+                       }
+               }
+
+               done = (event.type == YAML_DOCUMENT_END_EVENT);
+               yaml_event_delete(&event);
+       }
+
+       return rc;
+}
+
 static int lcfg_param_get_yaml(yaml_parser_t *reply, struct nl_sock *sk,
-                              int version, char *pattern)
+                              int version, int flags, char *pattern)
 {
        char source[PATH_MAX / 2], group[GENL_NAMSIZ + 1];
        char *family = "lustre", *tmp;
@@ -542,6 +671,8 @@ static int lcfg_param_get_yaml(yaml_parser_t *reply, struct nl_sock *sk,
 
        if (strcmp(group, "devices") == 0)
                cmd = LUSTRE_CMD_DEVICES;
+       else if (strcmp(group, "stats") == 0)
+               cmd = LUSTRE_CMD_STATS;
 
        if (!cmd)
                return -EOPNOTSUPP;
@@ -559,7 +690,7 @@ static int lcfg_param_get_yaml(yaml_parser_t *reply, struct nl_sock *sk,
        yaml_emitter_initialize(&request);
        rc = yaml_emitter_set_output_netlink(&request, sk,
                                             family, version,
-                                            cmd, NLM_F_DUMP);
+                                            cmd, flags);
        if (rc == 0)
                goto error;
 
@@ -678,7 +809,7 @@ int llapi_param_display_value(char *path, int version,
        if (!sk)
                return -ENOMEM;
 
-       rc = lcfg_param_get_yaml(&reply, sk, version, path);
+       rc = lcfg_param_get_yaml(&reply, sk, version, NLM_F_DUMP, path);
        if (rc < 0)
                return rc;
 
@@ -723,6 +854,9 @@ int llapi_param_display_value(char *path, int version,
 
                                if (strcmp(value, "devices") == 0)
                                        rc = print_out_devices(&reply, flags);
+                               else if (strcmp(value, "stats") == 0)
+                                       rc = print_out_stats(&reply, version,
+                                                            flags);
                                if (rc == 0)
                                        break;
                        }
@@ -742,6 +876,46 @@ free_reply:
        return rc == 1 ? 0 : rc;
 }
 
+int llapi_param_set_value(char *path, char *value, int version,
+                         enum lctl_param_flags flags, FILE *fp)
+{
+       yaml_document_t results;
+       yaml_parser_t reply;
+       struct nl_sock *sk;
+       int rc;
+
+       /* Currently only stats allow changing settings */
+       if (!strstr(path, "/stats"))
+               return -ENOENT;
+
+       /* Only clear is currently supported */
+       if (strcmp(value, "clear") != 0)
+               return -EINVAL;
+
+       sk = nl_socket_alloc();
+       if (!sk)
+               return -ENOMEM;
+
+       rc = lcfg_param_get_yaml(&reply, sk, version, NLM_F_REPLACE, path);
+       if (rc < 0)
+               return rc;
+
+       /* load the reply results */
+       rc = yaml_parser_load(&reply, &results);
+       if (rc == 0) {
+               yaml_parser_log_error(&reply, stderr, "set_param: ");
+               yaml_document_delete(&results);
+               rc = -EINVAL;
+               goto free_reply;
+       }
+
+       yaml_document_delete(&results);
+free_reply:
+       yaml_parser_delete(&reply);
+       nl_socket_free(sk);
+       return rc == 1 ? 0 : rc;
+}
+
 /**
  * Read the value of the file with location \a path
  * into a buffer.
index 758baf9..6a65e32 100644 (file)
@@ -687,7 +687,8 @@ int jt_lcfg_listparam(int argc, char **argv)
                                rc = rc2;
 
                        if (rc2 == -ENOENT && getuid() != 0)
-                               rc2 = llapi_param_display_value(path, 0, 0,
+                               rc2 = llapi_param_display_value(path, 0,
+                                                               PARAM_FLAGS_SHOW_SOURCE,
                                                                stdout);
                        if (rc2 < 0) {
                                fprintf(stderr, "error: %s: listing '%s': %s\n",
@@ -958,10 +959,20 @@ int jt_lcfg_setparam(int argc, char **argv)
                        break;
 
                rc = do_param_op(&popt, path, value, SET_PARAM, wq_ptr);
-               if (rc < 0)
-                       fprintf(stderr, "error: %s: setting '%s'='%s': %s\n",
-                               jt_cmdname(argv[0]), path, value,
-                               strerror(-rc));
+               if (rc < 0) {
+                       if (rc == -ENOENT && getuid() != 0) {
+                               rc = llapi_param_set_value(path, value,
+                                                          LUSTRE_GENL_VERSION,
+                                                          0, stdout);
+                       }
+
+                       if (rc < 0) {
+                               fprintf(stderr,
+                                       "error: %s: setting '%s'='%s': %s\n",
+                                       jt_cmdname(argv[0]), path, value,
+                                       strerror(-rc));
+                       }
+               }
        }
 
        if (popt_is_parallel(popt)) {
index a67e745..cf08f0b 100644 (file)
@@ -197,6 +197,8 @@ enum lctl_param_flags {
 
 int llapi_param_display_value(char *path, int version,
                              enum lctl_param_flags flags, FILE *fp);
+int llapi_param_set_value(char *path, char *value, int version,
+                         enum lctl_param_flags flags, FILE *fp);
 
 enum get_lmd_info_type {
        GET_LMD_INFO = 1,