Whamcloud - gitweb
LU-14927 osd: share brw_stats code between OSD back ends.
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_lproc.c
index b264b75..e2cc801 100644 (file)
@@ -27,7 +27,6 @@
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
  *
  * lustre/osd/osd_lproc.c
  *
 
 #include "osd_internal.h"
 
-#ifdef CONFIG_PROC_FS
-
 void osd_brw_stats_update(struct osd_device *osd, struct osd_iobuf *iobuf)
 {
-        struct brw_stats *s = &osd->od_brw_stats;
+       struct brw_stats *bs = &osd->od_brw_stats;
        sector_t         *last_block = NULL;
-        struct page     **pages = iobuf->dr_pages;
-        struct page      *last_page = NULL;
-        unsigned long     discont_pages = 0;
-        unsigned long     discont_blocks = 0;
+       struct page     **pages = iobuf->dr_pages;
+       struct page      *last_page = NULL;
+       unsigned long     discont_pages = 0;
+       unsigned long     discont_blocks = 0;
        sector_t         *blocks = iobuf->dr_blocks;
-        int               i, nr_pages = iobuf->dr_npages;
-        int               blocks_per_page;
-        int               rw = iobuf->dr_rw;
+       int               i, nr_pages = iobuf->dr_npages;
+       int               blocks_per_page;
+       int               rw = iobuf->dr_rw;
 
-        if (unlikely(nr_pages == 0))
-                return;
+       if (unlikely(nr_pages == 0))
+               return;
 
        blocks_per_page = PAGE_SIZE >> osd_sb(osd)->s_blocksize_bits;
 
-        lprocfs_oh_tally_log2(&s->hist[BRW_R_PAGES+rw], nr_pages);
-
-        while (nr_pages-- > 0) {
-                if (last_page && (*pages)->index != (last_page->index + 1))
-                        discont_pages++;
-                last_page = *pages;
-                pages++;
-                for (i = 0; i < blocks_per_page; i++) {
-                        if (last_block && *blocks != (*last_block + 1))
-                                discont_blocks++;
-                        last_block = blocks++;
-                }
-        }
-
-        lprocfs_oh_tally(&s->hist[BRW_R_DISCONT_PAGES+rw], discont_pages);
-        lprocfs_oh_tally(&s->hist[BRW_R_DISCONT_BLOCKS+rw], discont_blocks);
-}
-
-static void display_brw_stats(struct seq_file *seq, char *name, char *units,
-        struct obd_histogram *read, struct obd_histogram *write, int scale)
-{
-        unsigned long read_tot, write_tot, r, w, read_cum = 0, write_cum = 0;
-        int i;
-
-        seq_printf(seq, "\n%26s read      |     write\n", " ");
-        seq_printf(seq, "%-22s %-5s %% cum %% |  %-11s %% cum %%\n",
-                   name, units, units);
-
-        read_tot = lprocfs_oh_sum(read);
-        write_tot = lprocfs_oh_sum(write);
-        for (i = 0; i < OBD_HIST_MAX; i++) {
-                r = read->oh_buckets[i];
-                w = write->oh_buckets[i];
-                read_cum += r;
-                write_cum += w;
-                if (read_cum == 0 && write_cum == 0)
-                        continue;
-
-                if (!scale)
-                        seq_printf(seq, "%u", i);
-                else if (i < 10)
-                        seq_printf(seq, "%u", scale << i);
-                else if (i < 20)
-                        seq_printf(seq, "%uK", scale << (i-10));
-                else
-                        seq_printf(seq, "%uM", scale << (i-20));
-
-               seq_printf(seq, ":\t\t%10lu %3u %3u   | %4lu %3u %3u\n",
-                           r, pct(r, read_tot), pct(read_cum, read_tot),
-                           w, pct(w, write_tot), pct(write_cum, write_tot));
-
-                if (read_cum == read_tot && write_cum == write_tot)
-                        break;
-        }
-}
-
-static void brw_stats_show(struct seq_file *seq, struct brw_stats *brw_stats)
-{
-       struct timespec64 now;
-
-       /* this sampling races with updates */
-       ktime_get_real_ts64(&now);
-
-       seq_printf(seq, "snapshot_time:         %lld.%09ld (secs.nsecs)\n",
-                  (s64)now.tv_sec, now.tv_nsec);
-
-        display_brw_stats(seq, "pages per bulk r/w", "rpcs",
-                          &brw_stats->hist[BRW_R_PAGES],
-                          &brw_stats->hist[BRW_W_PAGES], 1);
-
-        display_brw_stats(seq, "discontiguous pages", "rpcs",
-                          &brw_stats->hist[BRW_R_DISCONT_PAGES],
-                          &brw_stats->hist[BRW_W_DISCONT_PAGES], 0);
-
-        display_brw_stats(seq, "discontiguous blocks", "rpcs",
-                          &brw_stats->hist[BRW_R_DISCONT_BLOCKS],
-                          &brw_stats->hist[BRW_W_DISCONT_BLOCKS], 0);
-
-        display_brw_stats(seq, "disk fragmented I/Os", "ios",
-                          &brw_stats->hist[BRW_R_DIO_FRAGS],
-                          &brw_stats->hist[BRW_W_DIO_FRAGS], 0);
-
-        display_brw_stats(seq, "disk I/Os in flight", "ios",
-                          &brw_stats->hist[BRW_R_RPC_HIST],
-                          &brw_stats->hist[BRW_W_RPC_HIST], 0);
-
-       display_brw_stats(seq, "I/O time (1/1000s)", "ios",
-                         &brw_stats->hist[BRW_R_IO_TIME],
-                         &brw_stats->hist[BRW_W_IO_TIME], 1);
-
-        display_brw_stats(seq, "disk I/O size", "ios",
-                          &brw_stats->hist[BRW_R_DISK_IOSIZE],
-                          &brw_stats->hist[BRW_W_DISK_IOSIZE], 1);
-}
-
-static int osd_brw_stats_seq_show(struct seq_file *seq, void *v)
-{
-        struct osd_device *osd = seq->private;
-
-        brw_stats_show(seq, &osd->od_brw_stats);
-
-        return 0;
-}
-
-static ssize_t osd_brw_stats_seq_write(struct file *file,
-                                      const char __user *buf,
-                                      size_t len, loff_t *off)
-{
-       struct seq_file *seq = file->private_data;
-        struct osd_device *osd = seq->private;
-        int i;
-
-        for (i = 0; i < BRW_LAST; i++)
-                lprocfs_oh_clear(&osd->od_brw_stats.hist[i]);
+       lprocfs_oh_tally_log2(&bs->bs_hist[BRW_R_PAGES + rw], nr_pages);
+
+       while (nr_pages-- > 0) {
+               if (last_page && (*pages)->index != (last_page->index + 1))
+                       discont_pages++;
+               last_page = *pages;
+               pages++;
+               for (i = 0; i < blocks_per_page; i++) {
+                       if (last_block && *blocks != (*last_block + 1))
+                               discont_blocks++;
+                       last_block = blocks++;
+               }
+       }
 
-        return len;
+       lprocfs_oh_tally(&bs->bs_hist[BRW_R_DISCONT_PAGES+rw], discont_pages);
+       lprocfs_oh_tally(&bs->bs_hist[BRW_R_DISCONT_BLOCKS+rw], discont_blocks);
 }
 
-LPROC_SEQ_FOPS(osd_brw_stats);
-
 static int osd_stats_init(struct osd_device *osd)
 {
-        int i, result;
-        ENTRY;
-
-        for (i = 0; i < BRW_LAST; i++)
-               spin_lock_init(&osd->od_brw_stats.hist[i].oh_lock);
+       int result = -ENOMEM;
 
+       ENTRY;
         osd->od_stats = lprocfs_alloc_stats(LPROC_OSD_LAST, 0);
-        if (osd->od_stats != NULL) {
-                result = lprocfs_register_stats(osd->od_proc_entry, "stats",
-                                                osd->od_stats);
-                if (result)
-                        GOTO(out, result);
-
+       if (osd->od_stats) {
                 lprocfs_counter_init(osd->od_stats, LPROC_OSD_GET_PAGE,
                                      LPROCFS_CNTR_AVGMINMAX|LPROCFS_CNTR_STDDEV,
                                      "get_page", "usec");
@@ -221,13 +108,13 @@ static int osd_stats_init(struct osd_device *osd)
                                      LPROCFS_CNTR_AVGMINMAX,
                                      "thandle closing", "usec");
 #endif
-               result = lprocfs_seq_create(osd->od_proc_entry, "brw_stats",
-                                           0644, &osd_brw_stats_fops, osd);
-        } else
-                result = -ENOMEM;
+               result = 0;
+       }
 
-out:
-        RETURN(result);
+       ldebugfs_register_osd_stats(osd->od_dt_dev.dd_debugfs_entry,
+                                   &osd->od_brw_stats, osd->od_stats);
+
+       RETURN(result);
 }
 
 static ssize_t fstype_show(struct kobject *kobj, struct attribute *attr,
@@ -285,7 +172,7 @@ static ssize_t read_cache_enable_store(struct kobject *kobj,
        if (rc)
                return rc;
 
-       osd->od_read_cache = val;
+       osd->od_read_cache = !!val;
        return count;
 }
 LUSTRE_RW_ATTR(read_cache_enable);
@@ -324,11 +211,58 @@ static ssize_t writethrough_cache_enable_store(struct kobject *kobj,
        if (rc)
                return rc;
 
-       osd->od_writethrough_cache = val;
+       osd->od_writethrough_cache = !!val;
        return count;
 }
 LUSTRE_RW_ATTR(writethrough_cache_enable);
 
+static ssize_t fallocate_zero_blocks_show(struct kobject *kobj,
+                                         struct attribute *attr,
+                                         char *buf)
+{
+       struct dt_device *dt = container_of(kobj, struct dt_device,
+                                           dd_kobj);
+       struct osd_device *osd = osd_dt_dev(dt);
+
+       LASSERT(osd);
+       if (unlikely(!osd->od_mnt))
+               return -EINPROGRESS;
+
+       return scnprintf(buf, PAGE_SIZE, "%d\n", osd->od_fallocate_zero_blocks);
+}
+
+/*
+ * Set how fallocate() interacts with the backing filesystem:
+ * -1: fallocate is disabled and returns -EOPNOTSUPP
+ *  0: fallocate allocates unwritten extents (like ext4)
+ *  1: fallocate zeroes allocated extents on disk
+ */
+static ssize_t fallocate_zero_blocks_store(struct kobject *kobj,
+                                          struct attribute *attr,
+                                          const char *buffer, size_t count)
+{
+       struct dt_device *dt = container_of(kobj, struct dt_device,
+                                           dd_kobj);
+       struct osd_device *osd = osd_dt_dev(dt);
+       long val;
+       int rc;
+
+       LASSERT(osd);
+       if (unlikely(!osd->od_mnt))
+               return -EINPROGRESS;
+
+       rc = kstrtol(buffer, 0, &val);
+       if (rc)
+               return rc;
+
+       if (val < -1 || val > 1)
+               return -EINVAL;
+
+       osd->od_fallocate_zero_blocks = val;
+       return count;
+}
+LUSTRE_RW_ATTR(fallocate_zero_blocks);
+
 ssize_t force_sync_store(struct kobject *kobj, struct attribute *attr,
                         const char *buffer, size_t count)
 {
@@ -423,7 +357,8 @@ static ssize_t auto_scrub_show(struct kobject *kobj, struct attribute *attr,
        if (unlikely(!dev->od_mnt))
                return -EINPROGRESS;
 
-       return sprintf(buf, "%lld\n", dev->od_auto_scrub_interval);
+       return scnprintf(buf, PAGE_SIZE, "%lld\n",
+                        dev->od_scrub.os_scrub.os_auto_scrub_interval);
 }
 
 static ssize_t auto_scrub_store(struct kobject *kobj, struct attribute *attr,
@@ -443,7 +378,7 @@ static ssize_t auto_scrub_store(struct kobject *kobj, struct attribute *attr,
        if (rc)
                return rc;
 
-       dev->od_auto_scrub_interval = val;
+       dev->od_scrub.os_scrub.os_auto_scrub_interval = val;
        return count;
 }
 LUSTRE_RW_ATTR(auto_scrub);
@@ -528,6 +463,24 @@ static ssize_t full_scrub_threshold_rate_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(full_scrub_threshold_rate);
 
+static ssize_t extent_bytes_allocation_show(struct kobject *kobj,
+                                           struct attribute *attr, char *buf)
+{
+       struct dt_device *dt = container_of(kobj, struct dt_device,
+                                           dd_kobj);
+       struct osd_device *dev = osd_dt_dev(dt);
+       int i;
+       unsigned int min = (unsigned int)(~0), cur;
+
+       for_each_online_cpu(i) {
+               cur = *per_cpu_ptr(dev->od_extent_bytes_percpu, i);
+               if (cur < min)
+                       min = cur;
+       }
+       return snprintf(buf, PAGE_SIZE, "%u\n", min);
+}
+LUSTRE_RO_ATTR(extent_bytes_allocation);
+
 static int ldiskfs_osd_oi_scrub_seq_show(struct seq_file *m, void *data)
 {
        struct osd_device *dev = osd_dt_dev((struct dt_device *)m->private);
@@ -561,6 +514,7 @@ ldiskfs_osd_readcache_seq_write(struct file *file, const char __user *buffer,
        struct seq_file *m = file->private_data;
        struct dt_device *dt = m->private;
        struct osd_device *osd = osd_dt_dev(dt);
+       char kernbuf[22] = "";
        u64 val;
        int rc;
 
@@ -568,8 +522,15 @@ ldiskfs_osd_readcache_seq_write(struct file *file, const char __user *buffer,
        if (unlikely(osd->od_mnt == NULL))
                return -EINPROGRESS;
 
-       rc = lprocfs_str_with_units_to_u64(buffer, count, &val, '1');
-       if (rc)
+       if (count >= sizeof(kernbuf))
+               return -EINVAL;
+
+       if (copy_from_user(kernbuf, buffer, count))
+               return -EFAULT;
+       kernbuf[count] = 0;
+
+       rc = sysfs_memparse(kernbuf, count, &val, "B");
+       if (rc < 0)
                return rc;
 
        osd->od_readcache_max_filesize = val > OSD_MAX_CACHE_SIZE ?
@@ -599,18 +560,24 @@ ldiskfs_osd_readcache_max_io_seq_write(struct file *file,
        struct seq_file *m = file->private_data;
        struct dt_device *dt = m->private;
        struct osd_device *osd = osd_dt_dev(dt);
-       s64 val;
+       char kernbuf[22] = "";
+       u64 val;
        int rc;
 
        LASSERT(osd != NULL);
        if (unlikely(osd->od_mnt == NULL))
                return -EINPROGRESS;
 
-       rc = lprocfs_str_with_units_to_s64(buffer, count, &val, 'M');
-       if (rc)
+       if (count >= sizeof(kernbuf))
+               return -EINVAL;
+
+       if (copy_from_user(kernbuf, buffer, count))
+               return -EFAULT;
+       kernbuf[count] = 0;
+
+       rc = sysfs_memparse(kernbuf, count, &val, "MiB");
+       if (rc < 0)
                return rc;
-       if (val < 0)
-               return -ERANGE;
 
        if (val > PTLRPC_MAX_BRW_SIZE)
                return -ERANGE;
@@ -641,18 +608,24 @@ ldiskfs_osd_writethrough_max_io_seq_write(struct file *file,
        struct seq_file *m = file->private_data;
        struct dt_device *dt = m->private;
        struct osd_device *osd = osd_dt_dev(dt);
-       s64 val;
+       char kernbuf[22] = "";
+       u64 val;
        int rc;
 
        LASSERT(osd != NULL);
        if (unlikely(osd->od_mnt == NULL))
                return -EINPROGRESS;
 
-       rc = lprocfs_str_with_units_to_s64(buffer, count, &val, 'M');
-       if (rc)
+       if (count >= sizeof(kernbuf))
+               return -EINVAL;
+
+       if (copy_from_user(kernbuf, buffer, count))
+               return -EFAULT;
+       kernbuf[count] = 0;
+
+       rc = sysfs_memparse(kernbuf, count, &val, "MiB");
+       if (rc < 0)
                return rc;
-       if (val < 0)
-               return -ERANGE;
 
        if (val > PTLRPC_MAX_BRW_SIZE)
                return -ERANGE;
@@ -772,7 +745,7 @@ ssize_t index_backup_store(struct kobject *kobj, struct attribute *attr,
 }
 LUSTRE_RW_ATTR(index_backup);
 
-struct lprocfs_vars lprocfs_osd_obd_vars[] = {
+struct ldebugfs_vars ldebugfs_osd_obd_vars[] = {
        { .name =       "oi_scrub",
          .fops =       &ldiskfs_osd_oi_scrub_fops      },
        { .name =       "readcache_max_filesize",
@@ -789,6 +762,7 @@ static struct attribute *ldiskfs_attrs[] = {
        &lustre_attr_writethrough_cache_enable.attr,
        &lustre_attr_fstype.attr,
        &lustre_attr_mntdev.attr,
+       &lustre_attr_fallocate_zero_blocks.attr,
        &lustre_attr_force_sync.attr,
        &lustre_attr_nonrotational.attr,
        &lustre_attr_index_backup.attr,
@@ -796,6 +770,7 @@ static struct attribute *ldiskfs_attrs[] = {
        &lustre_attr_pdo.attr,
        &lustre_attr_full_scrub_ratio.attr,
        &lustre_attr_full_scrub_threshold_rate.attr,
+       &lustre_attr_extent_bytes_allocation.attr,
        NULL,
 };
 
@@ -814,14 +789,14 @@ int osd_procfs_init(struct osd_device *osd, const char *name)
        LASSERT(name);
        LASSERT(type);
 
-       LCONSOLE_INFO("osd-ldiskfs create tunables for %s\n", name);
+       CDEBUG(D_CONFIG, "%s: register osd-ldiskfs tunable parameters\n", name);
 
        /* put reference taken by class_search_type */
        kobject_put(&type->typ_kobj);
 
        osd->od_dt_dev.dd_ktype.default_attrs = ldiskfs_attrs;
        rc = dt_tunables_init(&osd->od_dt_dev, type, name,
-                             lprocfs_osd_obd_vars);
+                             ldebugfs_osd_obd_vars);
        if (rc) {
                CERROR("%s: cannot setup sysfs / debugfs entry: %d\n",
                       name, rc);
@@ -861,4 +836,3 @@ int osd_procfs_fini(struct osd_device *osd)
 
        return dt_tunables_fini(&osd->od_dt_dev);
 }
-#endif