4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2015, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * lustre/osd/osd_lproc.c
34 * Author: Mikhail Pershin <tappro@sun.com>
37 #define DEBUG_SUBSYSTEM S_OSD
39 #include <lprocfs_status.h>
41 #include "osd_internal.h"
45 void osd_brw_stats_update(struct osd_device *osd, struct osd_iobuf *iobuf)
47 struct brw_stats *s = &osd->od_brw_stats;
48 sector_t *last_block = NULL;
49 struct page **pages = iobuf->dr_pages;
50 struct page *last_page = NULL;
51 unsigned long discont_pages = 0;
52 unsigned long discont_blocks = 0;
53 sector_t *blocks = iobuf->dr_blocks;
54 int i, nr_pages = iobuf->dr_npages;
56 int rw = iobuf->dr_rw;
58 if (unlikely(nr_pages == 0))
61 blocks_per_page = PAGE_SIZE >> osd_sb(osd)->s_blocksize_bits;
63 lprocfs_oh_tally_log2(&s->hist[BRW_R_PAGES+rw], nr_pages);
65 while (nr_pages-- > 0) {
66 if (last_page && (*pages)->index != (last_page->index + 1))
70 for (i = 0; i < blocks_per_page; i++) {
71 if (last_block && *blocks != (*last_block + 1))
73 last_block = blocks++;
77 lprocfs_oh_tally(&s->hist[BRW_R_DISCONT_PAGES+rw], discont_pages);
78 lprocfs_oh_tally(&s->hist[BRW_R_DISCONT_BLOCKS+rw], discont_blocks);
81 static void display_brw_stats(struct seq_file *seq, char *name, char *units,
82 struct obd_histogram *read, struct obd_histogram *write, int scale)
84 unsigned long read_tot, write_tot, r, w, read_cum = 0, write_cum = 0;
87 seq_printf(seq, "\n%26s read | write\n", " ");
88 seq_printf(seq, "%-22s %-5s %% cum %% | %-11s %% cum %%\n",
91 read_tot = lprocfs_oh_sum(read);
92 write_tot = lprocfs_oh_sum(write);
93 for (i = 0; i < OBD_HIST_MAX; i++) {
94 r = read->oh_buckets[i];
95 w = write->oh_buckets[i];
98 if (read_cum == 0 && write_cum == 0)
102 seq_printf(seq, "%u", i);
104 seq_printf(seq, "%u", scale << i);
106 seq_printf(seq, "%uK", scale << (i-10));
108 seq_printf(seq, "%uM", scale << (i-20));
110 seq_printf(seq, ":\t\t%10lu %3u %3u | %4lu %3u %3u\n",
111 r, pct(r, read_tot), pct(read_cum, read_tot),
112 w, pct(w, write_tot), pct(write_cum, write_tot));
114 if (read_cum == read_tot && write_cum == write_tot)
119 static void brw_stats_show(struct seq_file *seq, struct brw_stats *brw_stats)
121 struct timespec64 now;
123 /* this sampling races with updates */
124 ktime_get_real_ts64(&now);
126 seq_printf(seq, "snapshot_time: %lld.%09ld (secs.nsecs)\n",
127 (s64)now.tv_sec, now.tv_nsec);
129 display_brw_stats(seq, "pages per bulk r/w", "rpcs",
130 &brw_stats->hist[BRW_R_PAGES],
131 &brw_stats->hist[BRW_W_PAGES], 1);
133 display_brw_stats(seq, "discontiguous pages", "rpcs",
134 &brw_stats->hist[BRW_R_DISCONT_PAGES],
135 &brw_stats->hist[BRW_W_DISCONT_PAGES], 0);
137 display_brw_stats(seq, "discontiguous blocks", "rpcs",
138 &brw_stats->hist[BRW_R_DISCONT_BLOCKS],
139 &brw_stats->hist[BRW_W_DISCONT_BLOCKS], 0);
141 display_brw_stats(seq, "disk fragmented I/Os", "ios",
142 &brw_stats->hist[BRW_R_DIO_FRAGS],
143 &brw_stats->hist[BRW_W_DIO_FRAGS], 0);
145 display_brw_stats(seq, "disk I/Os in flight", "ios",
146 &brw_stats->hist[BRW_R_RPC_HIST],
147 &brw_stats->hist[BRW_W_RPC_HIST], 0);
149 display_brw_stats(seq, "I/O time (1/1000s)", "ios",
150 &brw_stats->hist[BRW_R_IO_TIME],
151 &brw_stats->hist[BRW_W_IO_TIME], 1);
153 display_brw_stats(seq, "disk I/O size", "ios",
154 &brw_stats->hist[BRW_R_DISK_IOSIZE],
155 &brw_stats->hist[BRW_W_DISK_IOSIZE], 1);
158 static int osd_brw_stats_seq_show(struct seq_file *seq, void *v)
160 struct osd_device *osd = seq->private;
162 brw_stats_show(seq, &osd->od_brw_stats);
167 static ssize_t osd_brw_stats_seq_write(struct file *file,
168 const char __user *buf,
169 size_t len, loff_t *off)
171 struct seq_file *seq = file->private_data;
172 struct osd_device *osd = seq->private;
175 for (i = 0; i < BRW_LAST; i++)
176 lprocfs_oh_clear(&osd->od_brw_stats.hist[i]);
181 LPROC_SEQ_FOPS(osd_brw_stats);
183 static int osd_stats_init(struct osd_device *osd)
188 for (i = 0; i < BRW_LAST; i++)
189 spin_lock_init(&osd->od_brw_stats.hist[i].oh_lock);
191 osd->od_stats = lprocfs_alloc_stats(LPROC_OSD_LAST, 0);
192 if (osd->od_stats != NULL) {
193 result = lprocfs_register_stats(osd->od_proc_entry, "stats",
198 lprocfs_counter_init(osd->od_stats, LPROC_OSD_GET_PAGE,
199 LPROCFS_CNTR_AVGMINMAX|LPROCFS_CNTR_STDDEV,
201 lprocfs_counter_init(osd->od_stats, LPROC_OSD_NO_PAGE,
202 LPROCFS_CNTR_AVGMINMAX,
203 "get_page_failures", "num");
204 lprocfs_counter_init(osd->od_stats, LPROC_OSD_CACHE_ACCESS,
205 LPROCFS_CNTR_AVGMINMAX,
206 "cache_access", "pages");
207 lprocfs_counter_init(osd->od_stats, LPROC_OSD_CACHE_HIT,
208 LPROCFS_CNTR_AVGMINMAX,
209 "cache_hit", "pages");
210 lprocfs_counter_init(osd->od_stats, LPROC_OSD_CACHE_MISS,
211 LPROCFS_CNTR_AVGMINMAX,
212 "cache_miss", "pages");
213 #if OSD_THANDLE_STATS
214 lprocfs_counter_init(osd->od_stats, LPROC_OSD_THANDLE_STARTING,
215 LPROCFS_CNTR_AVGMINMAX,
216 "thandle starting", "usec");
217 lprocfs_counter_init(osd->od_stats, LPROC_OSD_THANDLE_OPEN,
218 LPROCFS_CNTR_AVGMINMAX,
219 "thandle open", "usec");
220 lprocfs_counter_init(osd->od_stats, LPROC_OSD_THANDLE_CLOSING,
221 LPROCFS_CNTR_AVGMINMAX,
222 "thandle closing", "usec");
224 result = lprocfs_seq_create(osd->od_proc_entry, "brw_stats",
225 0644, &osd_brw_stats_fops, osd);
233 static ssize_t fstype_show(struct kobject *kobj, struct attribute *attr,
236 return sprintf(buf, "ldiskfs\n");
238 LUSTRE_RO_ATTR(fstype);
240 static ssize_t mntdev_show(struct kobject *kobj, struct attribute *attr,
243 struct dt_device *dt = container_of(kobj, struct dt_device,
245 struct osd_device *osd = osd_dt_dev(dt);
248 if (unlikely(!osd->od_mnt))
251 return sprintf(buf, "%s\n", osd->od_mntdev);
253 LUSTRE_RO_ATTR(mntdev);
255 static ssize_t read_cache_enable_show(struct kobject *kobj,
256 struct attribute *attr,
259 struct dt_device *dt = container_of(kobj, struct dt_device,
261 struct osd_device *osd = osd_dt_dev(dt);
264 if (unlikely(!osd->od_mnt))
267 return sprintf(buf, "%u\n", osd->od_read_cache);
270 static ssize_t read_cache_enable_store(struct kobject *kobj,
271 struct attribute *attr,
272 const char *buffer, size_t count)
274 struct dt_device *dt = container_of(kobj, struct dt_device,
276 struct osd_device *osd = osd_dt_dev(dt);
281 if (unlikely(!osd->od_mnt))
284 rc = kstrtobool(buffer, &val);
288 osd->od_read_cache = val;
291 LUSTRE_RW_ATTR(read_cache_enable);
293 static ssize_t writethrough_cache_enable_show(struct kobject *kobj,
294 struct attribute *attr,
297 struct dt_device *dt = container_of(kobj, struct dt_device,
299 struct osd_device *osd = osd_dt_dev(dt);
302 if (unlikely(!osd->od_mnt))
305 return sprintf(buf, "%u\n", osd->od_writethrough_cache);
308 static ssize_t writethrough_cache_enable_store(struct kobject *kobj,
309 struct attribute *attr,
313 struct dt_device *dt = container_of(kobj, struct dt_device,
315 struct osd_device *osd = osd_dt_dev(dt);
320 if (unlikely(!osd->od_mnt))
323 rc = kstrtobool(buffer, &val);
327 osd->od_writethrough_cache = val;
330 LUSTRE_RW_ATTR(writethrough_cache_enable);
332 ssize_t force_sync_store(struct kobject *kobj, struct attribute *attr,
333 const char *buffer, size_t count)
335 struct dt_device *dt = container_of(kobj, struct dt_device,
337 struct osd_device *osd = osd_dt_dev(dt);
342 if (unlikely(!osd->od_mnt))
345 rc = lu_env_init(&env, LCT_LOCAL);
349 rc = dt_sync(&env, dt);
352 return rc == 0 ? count : rc;
354 LUSTRE_WO_ATTR(force_sync);
356 static ssize_t nonrotational_show(struct kobject *kobj, struct attribute *attr,
359 struct dt_device *dt = container_of(kobj, struct dt_device,
361 struct osd_device *osd = osd_dt_dev(dt);
364 if (unlikely(!osd->od_mnt))
367 return sprintf(buf, "%u\n", osd->od_nonrotational);
370 static ssize_t nonrotational_store(struct kobject *kobj,
371 struct attribute *attr, const char *buffer,
374 struct dt_device *dt = container_of(kobj, struct dt_device,
376 struct osd_device *osd = osd_dt_dev(dt);
381 if (unlikely(!osd->od_mnt))
384 rc = kstrtobool(buffer, &val);
388 osd->od_nonrotational = val;
391 LUSTRE_RW_ATTR(nonrotational);
393 static ssize_t pdo_show(struct kobject *kobj, struct attribute *attr,
396 return sprintf(buf, "%s\n", ldiskfs_pdo ? "ON" : "OFF");
399 static ssize_t pdo_store(struct kobject *kobj, struct attribute *attr,
400 const char *buffer, size_t count)
405 rc = kstrtobool(buffer, &pdo);
415 static ssize_t auto_scrub_show(struct kobject *kobj, struct attribute *attr,
418 struct dt_device *dt = container_of(kobj, struct dt_device,
420 struct osd_device *dev = osd_dt_dev(dt);
423 if (unlikely(!dev->od_mnt))
426 return sprintf(buf, "%lld\n", dev->od_auto_scrub_interval);
429 static ssize_t auto_scrub_store(struct kobject *kobj, struct attribute *attr,
430 const char *buffer, size_t count)
432 struct dt_device *dt = container_of(kobj, struct dt_device,
434 struct osd_device *dev = osd_dt_dev(dt);
439 if (unlikely(!dev->od_mnt))
442 rc = kstrtoll(buffer, 0, &val);
446 dev->od_auto_scrub_interval = val;
449 LUSTRE_RW_ATTR(auto_scrub);
451 static ssize_t full_scrub_ratio_show(struct kobject *kobj,
452 struct attribute *attr,
455 struct dt_device *dt = container_of(kobj, struct dt_device,
457 struct osd_device *dev = osd_dt_dev(dt);
460 if (unlikely(!dev->od_mnt))
463 return sprintf(buf, "%llu\n", dev->od_full_scrub_ratio);
466 static ssize_t full_scrub_ratio_store(struct kobject *kobj,
467 struct attribute *attr,
468 const char *buffer, size_t count)
470 struct dt_device *dt = container_of(kobj, struct dt_device,
472 struct osd_device *dev = osd_dt_dev(dt);
477 if (unlikely(!dev->od_mnt))
480 rc = kstrtoll(buffer, 0, &val);
487 dev->od_full_scrub_ratio = val;
490 LUSTRE_RW_ATTR(full_scrub_ratio);
492 static ssize_t full_scrub_threshold_rate_show(struct kobject *kobj,
493 struct attribute *attr,
496 struct dt_device *dt = container_of(kobj, struct dt_device,
498 struct osd_device *dev = osd_dt_dev(dt);
501 if (unlikely(!dev->od_mnt))
504 return sprintf(buf, "%llu (bad OI mappings/minute)\n",
505 dev->od_full_scrub_threshold_rate);
508 static ssize_t full_scrub_threshold_rate_store(struct kobject *kobj,
509 struct attribute *attr,
510 const char *buffer, size_t count)
512 struct dt_device *dt = container_of(kobj, struct dt_device,
514 struct osd_device *dev = osd_dt_dev(dt);
519 if (unlikely(!dev->od_mnt))
522 rc = kstrtoull(buffer, 0, &val);
526 dev->od_full_scrub_threshold_rate = val;
529 LUSTRE_RW_ATTR(full_scrub_threshold_rate);
531 static int ldiskfs_osd_oi_scrub_seq_show(struct seq_file *m, void *data)
533 struct osd_device *dev = osd_dt_dev((struct dt_device *)m->private);
535 LASSERT(dev != NULL);
536 if (unlikely(dev->od_mnt == NULL))
539 osd_scrub_dump(m, dev);
543 LDEBUGFS_SEQ_FOPS_RO(ldiskfs_osd_oi_scrub);
545 static int ldiskfs_osd_readcache_seq_show(struct seq_file *m, void *data)
547 struct osd_device *osd = osd_dt_dev((struct dt_device *)m->private);
549 LASSERT(osd != NULL);
550 if (unlikely(osd->od_mnt == NULL))
553 seq_printf(m, "%llu\n", osd->od_readcache_max_filesize);
558 ldiskfs_osd_readcache_seq_write(struct file *file, const char __user *buffer,
559 size_t count, loff_t *off)
561 struct seq_file *m = file->private_data;
562 struct dt_device *dt = m->private;
563 struct osd_device *osd = osd_dt_dev(dt);
564 char kernbuf[22] = "";
568 LASSERT(osd != NULL);
569 if (unlikely(osd->od_mnt == NULL))
572 if (count >= sizeof(kernbuf))
575 if (copy_from_user(kernbuf, buffer, count))
579 rc = sysfs_memparse(kernbuf, count, &val, "B");
583 osd->od_readcache_max_filesize = val > OSD_MAX_CACHE_SIZE ?
584 OSD_MAX_CACHE_SIZE : val;
588 LDEBUGFS_SEQ_FOPS(ldiskfs_osd_readcache);
590 static int ldiskfs_osd_readcache_max_io_seq_show(struct seq_file *m, void *data)
592 struct osd_device *osd = osd_dt_dev((struct dt_device *)m->private);
594 LASSERT(osd != NULL);
595 if (unlikely(osd->od_mnt == NULL))
598 seq_printf(m, "%lu\n", osd->od_readcache_max_iosize >> 20);
603 ldiskfs_osd_readcache_max_io_seq_write(struct file *file,
604 const char __user *buffer,
605 size_t count, loff_t *off)
607 struct seq_file *m = file->private_data;
608 struct dt_device *dt = m->private;
609 struct osd_device *osd = osd_dt_dev(dt);
610 char kernbuf[22] = "";
614 LASSERT(osd != NULL);
615 if (unlikely(osd->od_mnt == NULL))
618 if (count >= sizeof(kernbuf))
621 if (copy_from_user(kernbuf, buffer, count))
625 rc = sysfs_memparse(kernbuf, count, &val, "MiB");
629 if (val > PTLRPC_MAX_BRW_SIZE)
631 osd->od_readcache_max_iosize = val;
635 LDEBUGFS_SEQ_FOPS(ldiskfs_osd_readcache_max_io);
637 static int ldiskfs_osd_writethrough_max_io_seq_show(struct seq_file *m,
640 struct osd_device *osd = osd_dt_dev((struct dt_device *)m->private);
642 LASSERT(osd != NULL);
643 if (unlikely(osd->od_mnt == NULL))
646 seq_printf(m, "%lu\n", osd->od_writethrough_max_iosize >> 20);
651 ldiskfs_osd_writethrough_max_io_seq_write(struct file *file,
652 const char __user *buffer,
653 size_t count, loff_t *off)
655 struct seq_file *m = file->private_data;
656 struct dt_device *dt = m->private;
657 struct osd_device *osd = osd_dt_dev(dt);
658 char kernbuf[22] = "";
662 LASSERT(osd != NULL);
663 if (unlikely(osd->od_mnt == NULL))
666 if (count >= sizeof(kernbuf))
669 if (copy_from_user(kernbuf, buffer, count))
673 rc = sysfs_memparse(kernbuf, count, &val, "MiB");
677 if (val > PTLRPC_MAX_BRW_SIZE)
679 osd->od_writethrough_max_iosize = val;
683 LDEBUGFS_SEQ_FOPS(ldiskfs_osd_writethrough_max_io);
685 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 52, 0)
686 static ssize_t index_in_idif_show(struct kobject *kobj, struct attribute *attr,
689 struct dt_device *dt = container_of(kobj, struct dt_device,
691 struct osd_device *dev = osd_dt_dev(dt);
694 if (unlikely(!dev->od_mnt))
697 return sprintf(buf, "%d\n", (int)(dev->od_index_in_idif));
700 static ssize_t index_in_idif_store(struct kobject *kobj,
701 struct attribute *attr,
702 const char *buffer, size_t count)
704 struct dt_device *dt = container_of(kobj, struct dt_device,
706 struct osd_device *dev = osd_dt_dev(dt);
707 struct lu_target *tgt;
713 if (unlikely(!dev->od_mnt))
716 rc = kstrtobool(buffer, &val);
720 if (dev->od_index_in_idif) {
724 LCONSOLE_WARN("%s: OST-index in IDIF has been enabled, "
725 "it cannot be reverted back.\n", osd_name(dev));
732 rc = lu_env_init(&env, LCT_DT_THREAD);
736 tgt = dev->od_dt_dev.dd_lu_dev.ld_site->ls_tgt;
737 tgt->lut_lsd.lsd_feature_rocompat |= OBD_ROCOMPAT_IDX_IN_IDIF;
738 rc = tgt_server_data_update(&env, tgt, 1);
743 LCONSOLE_INFO("%s: enable OST-index in IDIF successfully, "
744 "it cannot be reverted back.\n", osd_name(dev));
746 dev->od_index_in_idif = 1;
749 LUSTRE_RW_ATTR(index_in_idif);
751 int osd_register_proc_index_in_idif(struct osd_device *osd)
753 struct dt_device *dt = &osd->od_dt_dev;
755 return sysfs_create_file(&dt->dd_kobj, &lustre_attr_index_in_idif.attr);
759 static ssize_t index_backup_show(struct kobject *kobj, struct attribute *attr,
762 struct dt_device *dt = container_of(kobj, struct dt_device,
764 struct osd_device *dev = osd_dt_dev(dt);
767 if (unlikely(!dev->od_mnt))
770 return sprintf(buf, "%d\n", dev->od_index_backup_policy);
773 ssize_t index_backup_store(struct kobject *kobj, struct attribute *attr,
774 const char *buffer, size_t count)
776 struct dt_device *dt = container_of(kobj, struct dt_device,
778 struct osd_device *dev = osd_dt_dev(dt);
783 if (unlikely(!dev->od_mnt))
786 rc = kstrtoint(buffer, 0, &val);
790 dev->od_index_backup_policy = val;
793 LUSTRE_RW_ATTR(index_backup);
795 struct lprocfs_vars lprocfs_osd_obd_vars[] = {
796 { .name = "oi_scrub",
797 .fops = &ldiskfs_osd_oi_scrub_fops },
798 { .name = "readcache_max_filesize",
799 .fops = &ldiskfs_osd_readcache_fops },
800 { .name = "readcache_max_io_mb",
801 .fops = &ldiskfs_osd_readcache_max_io_fops },
802 { .name = "writethrough_max_io_mb",
803 .fops = &ldiskfs_osd_writethrough_max_io_fops },
807 static struct attribute *ldiskfs_attrs[] = {
808 &lustre_attr_read_cache_enable.attr,
809 &lustre_attr_writethrough_cache_enable.attr,
810 &lustre_attr_fstype.attr,
811 &lustre_attr_mntdev.attr,
812 &lustre_attr_force_sync.attr,
813 &lustre_attr_nonrotational.attr,
814 &lustre_attr_index_backup.attr,
815 &lustre_attr_auto_scrub.attr,
816 &lustre_attr_pdo.attr,
817 &lustre_attr_full_scrub_ratio.attr,
818 &lustre_attr_full_scrub_threshold_rate.attr,
822 int osd_procfs_init(struct osd_device *osd, const char *name)
824 struct obd_type *type;
829 /* at the moment there is no linkage between lu_type
830 * and obd_type, so we lookup obd_type this way
832 type = class_search_type(LUSTRE_OSD_LDISKFS_NAME);
837 LCONSOLE_INFO("osd-ldiskfs create tunables for %s\n", name);
839 /* put reference taken by class_search_type */
840 kobject_put(&type->typ_kobj);
842 osd->od_dt_dev.dd_ktype.default_attrs = ldiskfs_attrs;
843 rc = dt_tunables_init(&osd->od_dt_dev, type, name,
844 lprocfs_osd_obd_vars);
846 CERROR("%s: cannot setup sysfs / debugfs entry: %d\n",
851 if (osd->od_proc_entry)
854 /* Find the type procroot and add the proc entry for this device */
855 osd->od_proc_entry = lprocfs_register(name, type->typ_procroot,
856 NULL, &osd->od_dt_dev);
857 if (IS_ERR(osd->od_proc_entry)) {
858 rc = PTR_ERR(osd->od_proc_entry);
859 CERROR("Error %d setting up lprocfs for %s\n",
861 osd->od_proc_entry = NULL;
865 rc = osd_stats_init(osd);
870 osd_procfs_fini(osd);
874 int osd_procfs_fini(struct osd_device *osd)
877 lprocfs_free_stats(&osd->od_stats);
879 if (osd->od_proc_entry)
880 lprocfs_remove(&osd->od_proc_entry);
882 return dt_tunables_fini(&osd->od_dt_dev);