Whamcloud - gitweb
LU-14927 osd: share brw_stats code between OSD back ends.
[fs/lustre-release.git] / lustre / obdclass / lprocfs_status_server.c
index 30e551f..7a78959 100644 (file)
@@ -27,7 +27,6 @@
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
  *
  * lustre/obdclass/lprocfs_status_server.c
  */
@@ -72,11 +71,6 @@ struct dentry *ldebugfs_add_symlink(const char *name, const char *target,
        va_end(ap);
 
        entry = debugfs_create_symlink(name, parent, dest);
-       if (IS_ERR_OR_NULL(entry)) {
-               CERROR("LdebugFS: Could not create symbolic link from %s to %s\n",
-                      name, dest);
-               entry = NULL;
-       }
 
        OBD_FREE(dest, MAX_STRING_SIZE + 1);
 no_entry:
@@ -129,7 +123,8 @@ lprocfs_evict_client_seq_write(struct file *file, const char __user *buffer,
                count = -EFAULT;
                goto out;
        }
-       tmpbuf = cfs_firststr(kbuf, min_t(unsigned long, BUFLEN - 1, count));
+       tmpbuf = skip_spaces(kbuf);
+       tmpbuf = strsep(&tmpbuf, " \t\n\f\v\r");
        class_incref(obd, __func__, current);
 
        if (strncmp(tmpbuf, "nid:", 4) == 0)
@@ -167,16 +162,15 @@ static int obd_export_flags2str(struct obd_export *exp, struct seq_file *m)
        flag2str(exp, in_recovery);
        flag2str(exp, disconnected);
        flag2str(exp, connecting);
+       flag2str(exp, no_recovery);
 
        return 0;
 }
 
 static int
-lprocfs_exp_print_export_seq(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                            struct hlist_node *hnode, void *cb_data)
+lprocfs_exp_print_export_seq(struct obd_export *exp, void *cb_data)
 {
        struct seq_file         *m = cb_data;
-       struct obd_export       *exp = cfs_hash_object(hs, hnode);
        struct obd_device       *obd;
        struct obd_connect_data *ocd;
 
@@ -201,6 +195,19 @@ lprocfs_exp_print_export_seq(struct cfs_hash *hs, struct cfs_hash_bd *bd,
        obd_export_flags2str(exp, m);
        seq_printf(m, " ]\n");
 
+       if (obd->obd_type &&
+           strcmp(obd->obd_type->typ_name, "obdfilter") == 0) {
+               struct filter_export_data *fed = &exp->exp_filter_data;
+
+               seq_printf(m, "    grant:\n");
+               seq_printf(m, "       granted: %ld\n",
+                       fed->fed_ted.ted_grant);
+               seq_printf(m, "       dirty: %ld\n",
+                       fed->fed_ted.ted_dirty);
+               seq_printf(m, "       pending: %ld\n",
+                       fed->fed_ted.ted_pending);
+       }
+
 out:
        return 0;
 }
@@ -231,11 +238,9 @@ out:
 static int lprocfs_exp_export_seq_show(struct seq_file *m, void *data)
 {
        struct nid_stat *stats = m->private;
-       struct obd_device *obd = stats->nid_obd;
 
-       cfs_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
-                             lprocfs_exp_print_export_seq, m);
-       return 0;
+       return obd_nid_export_for_each(stats->nid_obd, stats->nid,
+                                      lprocfs_exp_print_export_seq, m);
 }
 LPROC_SEQ_FOPS_RO(lprocfs_exp_export);
 
@@ -258,7 +263,6 @@ static void lprocfs_free_client_stats(struct nid_stat *client_stat)
                lprocfs_free_stats(&client_stat->nid_ldlm_stats);
 
        OBD_FREE_PTR(client_stat);
-       return;
 }
 
 void lprocfs_free_per_client_stats(struct obd_device *obd)
@@ -281,64 +285,94 @@ void lprocfs_free_per_client_stats(struct obd_device *obd)
 EXPORT_SYMBOL(lprocfs_free_per_client_stats);
 
 static int
-lprocfs_exp_print_uuid_seq(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                          struct hlist_node *hnode, void *cb_data)
+lprocfs_exp_print_nodemap_seq(struct obd_export *exp, void *cb_data)
 {
+       struct lu_nodemap *nodemap = exp->exp_target_data.ted_nodemap;
        struct seq_file *m = cb_data;
-       struct obd_export *exp = cfs_hash_object(hs, hnode);
 
-       if (exp->exp_nid_stats != NULL)
-               seq_printf(m, "%s\n", obd_uuid2str(&exp->exp_client_uuid));
+       if (nodemap)
+               seq_printf(m, "%s\n", nodemap->nm_name);
        return 0;
 }
 
 static int
-lprocfs_exp_print_nodemap_seq(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                             struct hlist_node *hnode, void *cb_data)
+lprocfs_exp_nodemap_seq_show(struct seq_file *m, void *data)
 {
-       struct seq_file *m = cb_data;
-       struct obd_export *exp = cfs_hash_object(hs, hnode);
-       struct lu_nodemap *nodemap = exp->exp_target_data.ted_nodemap;
+       struct nid_stat *stats = m->private;
 
-       if (nodemap != NULL)
-               seq_printf(m, "%s\n", nodemap->nm_name);
-       return 0;
+       return obd_nid_export_for_each(stats->nid_obd, stats->nid,
+                                      lprocfs_exp_print_nodemap_seq, m);
 }
+LPROC_SEQ_FOPS_RO(lprocfs_exp_nodemap);
 
 static int
-lprocfs_exp_nodemap_seq_show(struct seq_file *m, void *data)
+lprocfs_exp_print_uuid_seq(struct obd_export *exp, void *cb_data)
 {
-       struct nid_stat *stats = m->private;
-       struct obd_device *obd = stats->nid_obd;
+       struct seq_file *m = cb_data;
 
-       cfs_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
-                             lprocfs_exp_print_nodemap_seq, m);
+       if (exp->exp_nid_stats)
+               seq_printf(m, "%s\n", obd_uuid2str(&exp->exp_client_uuid));
        return 0;
 }
-LPROC_SEQ_FOPS_RO(lprocfs_exp_nodemap);
 
 static int lprocfs_exp_uuid_seq_show(struct seq_file *m, void *data)
 {
        struct nid_stat *stats = m->private;
-       struct obd_device *obd = stats->nid_obd;
 
-       cfs_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
-                               lprocfs_exp_print_uuid_seq, m);
-       return 0;
+       return obd_nid_export_for_each(stats->nid_obd, stats->nid,
+                                      lprocfs_exp_print_uuid_seq, m);
 }
 LPROC_SEQ_FOPS_RO(lprocfs_exp_uuid);
 
+#define HASH_NAME_LEN  16
+
+static void ldebugfs_rhash_seq_show(const char *name, struct rhashtable *ht,
+                                   struct seq_file *m)
+{
+       unsigned int max_size = ht->p.max_size ? ht->p.max_size : UINT_MAX;
+       struct bucket_table *tbl;
+       int dist[8] = { 0, };
+       int maxdep = 0;
+       int i;
+
+       rcu_read_lock();
+       tbl = rht_dereference(ht->tbl, ht);
+       for (i = 0; i < tbl->size; i++) {
+               struct rhash_head *pos;
+               int count = 0;
+
+               rht_for_each(pos, tbl, i)
+                       count++;
+
+               if (count)
+                       maxdep = max(maxdep, count);
+
+               dist[min(fls(count), 7)]++;
+       }
+
+       seq_printf(m, "%-*s %5d %5d %10u %d.%03d 0.300 0.750 0x%03x %7d %7d %7d ",
+                  HASH_NAME_LEN, name, tbl->size, ht->p.min_size, max_size,
+                  atomic_read(&ht->nelems) / tbl->size,
+                  atomic_read(&ht->nelems) * 1000 / tbl->size,
+                  ht->p.automatic_shrinking, 0,
+                  atomic_read(&ht->nelems), maxdep);
+       rcu_read_unlock();
+
+       for (i = 0; i < 8; i++)
+               seq_printf(m, "%d%c",  dist[i], (i == 7) ? '\n' : '/');
+}
+
 static int
-lprocfs_exp_print_hash_seq(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                          struct hlist_node *hnode, void *cb_data)
+lprocfs_exp_print_hash_seq(struct obd_export *exp, void *cb_data)
 
 {
+       struct obd_device *obd = exp->exp_obd;
        struct seq_file *m = cb_data;
-       struct obd_export *exp = cfs_hash_object(hs, hnode);
 
        if (exp->exp_lock_hash != NULL) {
-               cfs_hash_debug_header(m);
-               cfs_hash_debug_str(hs, m);
+               seq_printf(m, "%-*s   cur   min        max theta t-min t-max flags rehash   count distribution\n",
+                          HASH_NAME_LEN, "name");
+               ldebugfs_rhash_seq_show("NID_HASH", &obd->obd_nid_hash.ht, m);
        }
        return 0;
 }
@@ -346,19 +380,15 @@ lprocfs_exp_print_hash_seq(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 static int lprocfs_exp_hash_seq_show(struct seq_file *m, void *data)
 {
        struct nid_stat *stats = m->private;
-       struct obd_device *obd = stats->nid_obd;
 
-       cfs_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
-                               lprocfs_exp_print_hash_seq, m);
-       return 0;
+       return obd_nid_export_for_each(stats->nid_obd, stats->nid,
+                                      lprocfs_exp_print_hash_seq, m);
 }
 LPROC_SEQ_FOPS_RO(lprocfs_exp_hash);
 
-int lprocfs_exp_print_replydata_seq(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                                   struct hlist_node *hnode, void *cb_data)
+int lprocfs_exp_print_replydata_seq(struct obd_export *exp, void *cb_data)
 
 {
-       struct obd_export *exp = cfs_hash_object(hs, hnode);
        struct seq_file *m = cb_data;
        struct tg_export_data *ted = &exp->exp_target_data;
 
@@ -376,19 +406,14 @@ int lprocfs_exp_print_replydata_seq(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 int lprocfs_exp_replydata_seq_show(struct seq_file *m, void *data)
 {
        struct nid_stat *stats = m->private;
-       struct obd_device *obd = stats->nid_obd;
 
-       cfs_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
-                               lprocfs_exp_print_replydata_seq, m);
-       return 0;
+       return obd_nid_export_for_each(stats->nid_obd, stats->nid,
+                                      lprocfs_exp_print_replydata_seq, m);
 }
 LPROC_SEQ_FOPS_RO(lprocfs_exp_replydata);
 
-int lprocfs_exp_print_fmd_count_seq(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                                   struct hlist_node *hnode, void *cb_data)
-
+int lprocfs_exp_print_fmd_count_seq(struct obd_export *exp, void *cb_data)
 {
-       struct obd_export *exp = cfs_hash_object(hs, hnode);
        struct seq_file *m = cb_data;
        struct tg_export_data *ted = &exp->exp_target_data;
 
@@ -400,11 +425,9 @@ int lprocfs_exp_print_fmd_count_seq(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 int lprocfs_exp_fmd_count_seq_show(struct seq_file *m, void *data)
 {
        struct nid_stat *stats = m->private;
-       struct obd_device *obd = stats->nid_obd;
 
-       cfs_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
-                             lprocfs_exp_print_fmd_count_seq, m);
-       return 0;
+       return obd_nid_export_for_each(stats->nid_obd, stats->nid,
+                                      lprocfs_exp_print_fmd_count_seq, m);
 }
 LPROC_SEQ_FOPS_RO(lprocfs_exp_fmd_count);
 
@@ -442,9 +465,8 @@ lprocfs_nid_stats_clear_seq_write(struct file *file, const char __user *buffer,
        struct seq_file *m = file->private_data;
        struct obd_device *obd = m->private;
        struct nid_stat *client_stat;
-       struct list_head free_list;
+       LIST_HEAD(free_list);
 
-       INIT_LIST_HEAD(&free_list);
        cfs_hash_cond_del(obd->obd_nid_stats_hash,
                          lprocfs_nid_stats_clear_write_cb, &free_list);
 
@@ -648,6 +670,142 @@ void lprocfs_free_obd_stats(struct obd_device *obd)
 }
 EXPORT_SYMBOL(lprocfs_free_obd_stats);
 
+static void display_brw_stats(struct seq_file *seq, const char *name,
+                             const char *units, struct obd_histogram *read,
+                             struct obd_histogram *write, bool scale)
+{
+       unsigned long read_tot, write_tot, r, w, read_cum = 0, write_cum = 0;
+       unsigned int i;
+
+       seq_printf(seq, "\n%26s read      |     write\n", " ");
+       seq_printf(seq, "%-22s %-5s %% cum %% |  %-11s %% cum %%\n",
+                  name, units, units);
+
+       read_tot = lprocfs_oh_sum(read);
+       write_tot = lprocfs_oh_sum(write);
+
+       if (!read_tot && !write_tot)
+               return;
+
+       for (i = 0; i < OBD_HIST_MAX; i++) {
+               r = read->oh_buckets[i];
+               w = write->oh_buckets[i];
+               read_cum += r;
+               write_cum += w;
+               if (read_cum == 0 && write_cum == 0)
+                       continue;
+
+               if (!scale)
+                       seq_printf(seq, "%u", i);
+               else if (i < 10)
+                       seq_printf(seq, "%lu", BIT(i));
+               else if (i < 20)
+                       seq_printf(seq, "%luK", BIT(i - 10));
+               else
+                       seq_printf(seq, "%luM", BIT(i - 20));
+
+               seq_printf(seq, ":\t\t%10lu %3u %3u   | %4lu %3u %3u\n",
+                          r, pct(r, read_tot), pct(read_cum, read_tot),
+                          w, pct(w, write_tot), pct(write_cum, write_tot));
+
+               if (read_cum == read_tot && write_cum == write_tot)
+                       break;
+       }
+}
+
+static const struct brw_stats_props brw_props[] = {
+       { .bsp_name     = "pages per bulk r/w",
+         .bsp_units    = "rpcs",
+         .bsp_scale    = true                          },
+       { .bsp_name     = "discontiguous pages",
+         .bsp_units    = "rpcs",
+         .bsp_scale    = false                         },
+       { .bsp_name     = "discontiguous blocks",
+         .bsp_units    = "rpcs",
+         .bsp_scale    = false                         },
+       { .bsp_name     = "disk fragmented I/Os",
+         .bsp_units    = "ios",
+         .bsp_scale    = false                         },
+       { .bsp_name     = "disk I/Os in flight",
+         .bsp_units    = "ios",
+         .bsp_scale    = false                         },
+       { .bsp_name     = "I/O time (1/1000s)",
+         .bsp_units    = "ios",
+         .bsp_scale    = true                          },
+       { .bsp_name     = "disk I/O size",
+         .bsp_units    = "ios",
+         .bsp_scale    = true                          },
+};
+
+static int brw_stats_seq_show(struct seq_file *seq, void *v)
+{
+       struct brw_stats *brw_stats = seq->private;
+       int i;
+
+       /* this sampling races with updates */
+       lprocfs_stats_header(seq, ktime_get(), brw_stats->bs_init, 25, ":", 1);
+
+       for (i = 0; i < ARRAY_SIZE(brw_stats->bs_props); i++) {
+               if (!brw_stats->bs_props[i].bsp_name)
+                       continue;
+
+               display_brw_stats(seq, brw_stats->bs_props[i].bsp_name,
+                                 brw_stats->bs_props[i].bsp_units,
+                                 &brw_stats->bs_hist[i * 2],
+                                 &brw_stats->bs_hist[i * 2 + 1],
+                                 brw_stats->bs_props[i].bsp_scale);
+       }
+
+       return 0;
+}
+
+static ssize_t brw_stats_seq_write(struct file *file,
+                                  const char __user *buf,
+                                  size_t len, loff_t *off)
+{
+       struct seq_file *seq = file->private_data;
+       struct brw_stats *brw_stats = seq->private;
+       int i;
+
+       for (i = 0; i < BRW_RW_STATS_NUM; i++)
+               lprocfs_oh_clear(&brw_stats->bs_hist[i]);
+
+       return len;
+}
+
+LDEBUGFS_SEQ_FOPS(brw_stats);
+
+void ldebugfs_register_osd_stats(struct dentry *parent,
+                                struct brw_stats *brw_stats,
+                                struct lprocfs_stats *stats)
+{
+       int i;
+
+       LASSERT(brw_stats);
+       brw_stats->bs_init = ktime_get();
+       for (i = 0; i < BRW_RW_STATS_NUM; i++) {
+               struct brw_stats_props *props = brw_stats->bs_props;
+
+               spin_lock_init(&brw_stats->bs_hist[i].oh_lock);
+               if (i % 2) {
+                       props[i / 2].bsp_name = brw_props[i / 2].bsp_name;
+                       props[i / 2].bsp_units = brw_props[i / 2].bsp_units;
+                       props[i / 2].bsp_scale = brw_props[i / 2].bsp_scale;
+               }
+       }
+
+       if (!parent)
+               return;
+
+       debugfs_create_file("brw_stats", 0644, parent, brw_stats,
+                           &brw_stats_fops);
+
+       if (stats)
+               debugfs_create_file("stats", 0644, parent, stats,
+                                   &ldebugfs_stats_seq_fops);
+}
+EXPORT_SYMBOL(ldebugfs_register_osd_stats);
+
 int lprocfs_hash_seq_show(struct seq_file *m, void *data)
 {
        struct obd_device *obd = m->private;
@@ -655,9 +813,13 @@ int lprocfs_hash_seq_show(struct seq_file *m, void *data)
        if (obd == NULL)
                return 0;
 
+       /* header for rhashtable state */
+       seq_printf(m, "%-*s   cur   min        max theta t-min t-max flags  rehash   count  maxdep distribution\n",
+                  HASH_NAME_LEN, "name");
+       ldebugfs_rhash_seq_show("UUID_HASH", &obd->obd_uuid_hash, m);
+       ldebugfs_rhash_seq_show("NID_HASH", &obd->obd_nid_hash.ht, m);
+
        cfs_hash_debug_header(m);
-       cfs_hash_debug_str(obd->obd_uuid_hash, m);
-       cfs_hash_debug_str(obd->obd_nid_hash, m);
        cfs_hash_debug_str(obd->obd_nid_stats_hash, m);
        return 0;
 }
@@ -676,15 +838,19 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
                goto out;
        }
 
-       /* sampled unlocked, but really... */
-       if (obd->obd_recovering == 0) {
+       /* There is gap between client data read from storage and setting
+        * obd_recovering so check obd_recovery_end as well to make sure
+        * recovery is really finished
+        */
+       if (obd->obd_recovery_end > 0 && !obd->obd_recovering) {
                seq_printf(m, "COMPLETE\n");
                seq_printf(m, "recovery_start: %lld\n",
-                          (s64)obd->obd_recovery_start);
+                          (s64)ktime_get_real_seconds() -
+                          (ktime_get_seconds() - obd->obd_recovery_start));
                seq_printf(m, "recovery_duration: %lld\n",
                           obd->obd_recovery_end ?
                           obd->obd_recovery_end - obd->obd_recovery_start :
-                          ktime_get_real_seconds() - obd->obd_recovery_start);
+                          ktime_get_seconds() - obd->obd_recovery_start);
                /* Number of clients that have completed recovery */
                seq_printf(m, "completed_clients: %d/%d\n",
                           atomic_read(&obd->obd_max_recoverable_clients) -
@@ -715,9 +881,11 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
                        seq_printf(m, "non-ready MDTs: %s\n",
                                   buf ? buf : "unknown (not enough RAM)");
                        seq_printf(m, "recovery_start: %lld\n",
-                                  (s64)obd->obd_recovery_start);
+                                  (s64)ktime_get_real_seconds() -
+                                  (ktime_get_seconds() -
+                                   obd->obd_recovery_start));
                        seq_printf(m, "time_waited: %lld\n",
-                                  (s64)(ktime_get_real_seconds() -
+                                  (s64)(ktime_get_seconds() -
                                         obd->obd_recovery_start));
                }
 
@@ -735,14 +903,15 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
        }
 
        seq_printf(m, "RECOVERING\n");
-       seq_printf(m, "recovery_start: %lld\n", (s64)obd->obd_recovery_start);
+       seq_printf(m, "recovery_start: %lld\n", (s64)ktime_get_real_seconds() -
+                  (ktime_get_seconds() - obd->obd_recovery_start));
        seq_printf(m, "time_remaining: %lld\n",
-                  ktime_get_real_seconds() >=
+                  ktime_get_seconds() >=
                   obd->obd_recovery_start +
                   obd->obd_recovery_timeout ? 0 :
                   (s64)(obd->obd_recovery_start +
                         obd->obd_recovery_timeout -
-                        ktime_get_real_seconds()));
+                        ktime_get_seconds()));
        seq_printf(m, "connected_clients: %d/%d\n",
                   atomic_read(&obd->obd_connected_clients),
                   atomic_read(&obd->obd_max_recoverable_clients));
@@ -830,7 +999,7 @@ ssize_t recovery_time_soft_show(struct kobject *kobj, struct attribute *attr,
        struct obd_device *obd = container_of(kobj, struct obd_device,
                                              obd_kset.kobj);
 
-       return scnprintf(buf, PAGE_SIZE, "%ld\n", obd->obd_recovery_timeout);
+       return scnprintf(buf, PAGE_SIZE, "%d\n", obd->obd_recovery_timeout);
 }
 EXPORT_SYMBOL(recovery_time_soft_show);
 
@@ -858,7 +1027,7 @@ ssize_t recovery_time_hard_show(struct kobject *kobj, struct attribute *attr,
        struct obd_device *obd = container_of(kobj, struct obd_device,
                                              obd_kset.kobj);
 
-       return scnprintf(buf, PAGE_SIZE, "%ld\n", obd->obd_recovery_time_hard);
+       return scnprintf(buf, PAGE_SIZE, "%d\n", obd->obd_recovery_time_hard);
 }
 EXPORT_SYMBOL(recovery_time_hard_show);