4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2016, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 #define DEBUG_SUBSYSTEM S_CLASS
34 #include <lprocfs_status.h>
35 #include <obd_class.h>
36 #include <linux/seq_file.h>
37 #include "lod_internal.h"
38 #include <uapi/linux/lustre/lustre_param.h>
41 * Notice, all the functions below (except for lod_procfs_init() and
42 * lod_procfs_fini()) are not supposed to be used directly. They are
43 * called by Linux kernel's procfs.
49 * Show default stripe size.
51 * \param[in] m seq file
52 * \param[in] v unused for single entry
54 * \retval 0 on success
55 * \retval negative error code if failed
57 static int lod_stripesize_seq_show(struct seq_file *m, void *v)
59 struct obd_device *dev = m->private;
60 struct lod_device *lod;
63 lod = lu2lod_dev(dev->obd_lu_dev);
64 seq_printf(m, "%llu\n",
65 lod->lod_desc.ld_default_stripe_size);
70 * Set default stripe size.
72 * \param[in] file proc file
73 * \param[in] buffer string containing the maximum number of bytes stored in
74 * each object before moving to the next object in the
76 * \param[in] count @buffer length
77 * \param[in] off unused for single entry
79 * \retval @count on success
80 * \retval negative error code if failed
83 lod_stripesize_seq_write(struct file *file, const char __user *buffer,
84 size_t count, loff_t *off)
86 struct seq_file *m = file->private_data;
87 struct obd_device *dev = m->private;
88 struct lod_device *lod;
93 lod = lu2lod_dev(dev->obd_lu_dev);
94 rc = lprocfs_str_with_units_to_s64(buffer, count, &val, '1');
100 lod_fix_desc_stripe_size(&val);
101 lod->lod_desc.ld_default_stripe_size = val;
105 LPROC_SEQ_FOPS(lod_stripesize);
108 * Show default stripe offset.
110 * \param[in] m seq file
111 * \param[in] v unused for single entry
113 * \retval 0 on success
114 * \retval negative error code if failed
116 static int lod_stripeoffset_seq_show(struct seq_file *m, void *v)
118 struct obd_device *dev = m->private;
119 struct lod_device *lod;
121 LASSERT(dev != NULL);
122 lod = lu2lod_dev(dev->obd_lu_dev);
123 seq_printf(m, "%lld\n", lod->lod_desc.ld_default_stripe_offset);
128 * Set default stripe offset.
130 * Usually contains -1 allowing Lustre to balance objects among OST
131 * otherwise may cause severe OST imbalance.
133 * \param[in] file proc file
134 * \param[in] buffer string describing starting OST index for new files
135 * \param[in] count @buffer length
136 * \param[in] off unused for single entry
138 * \retval @count on success
139 * \retval negative error code if failed
142 lod_stripeoffset_seq_write(struct file *file, const char __user *buffer,
143 size_t count, loff_t *off)
145 struct seq_file *m = file->private_data;
146 struct obd_device *dev = m->private;
147 struct lod_device *lod;
151 LASSERT(dev != NULL);
152 lod = lu2lod_dev(dev->obd_lu_dev);
153 rc = lprocfs_str_to_s64(buffer, count, &val);
159 lod->lod_desc.ld_default_stripe_offset = val;
163 LPROC_SEQ_FOPS(lod_stripeoffset);
166 * Show default striping pattern (LOV_PATTERN_*).
168 * \param[in] m seq file
169 * \param[in] v unused for single entry
171 * \retval 0 on success
172 * \retval negative error code if failed
174 static int lod_stripetype_seq_show(struct seq_file *m, void *v)
176 struct obd_device *dev = m->private;
177 struct lod_device *lod;
179 LASSERT(dev != NULL);
180 lod = lu2lod_dev(dev->obd_lu_dev);
181 seq_printf(m, "%u\n", lod->lod_desc.ld_pattern);
186 * Set default striping pattern (a number, not a human-readable string).
188 * \param[in] file proc file
189 * \param[in] buffer string containing the default striping pattern for new
190 * files. This is an integer LOV_PATTERN_* value
191 * \param[in] count @buffer length
192 * \param[in] off unused for single entry
194 * \retval @count on success
195 * \retval negative error code if failed
198 lod_stripetype_seq_write(struct file *file, const char __user *buffer,
199 size_t count, loff_t *off)
201 struct seq_file *m = file->private_data;
202 struct obd_device *dev = m->private;
203 struct lod_device *lod;
208 LASSERT(dev != NULL);
209 lod = lu2lod_dev(dev->obd_lu_dev);
210 rc = lprocfs_str_to_s64(buffer, count, &val);
217 lod_fix_desc_pattern(&pattern);
218 lod->lod_desc.ld_pattern = pattern;
222 LPROC_SEQ_FOPS(lod_stripetype);
225 * Show default number of stripes.
227 * \param[in] m seq file
228 * \param[in] v unused for single entry
230 * \retval 0 on success,
231 * \retval negative error code if failed
233 static int lod_stripecount_seq_show(struct seq_file *m, void *v)
235 struct obd_device *dev = m->private;
236 struct lod_device *lod;
238 LASSERT(dev != NULL);
239 lod = lu2lod_dev(dev->obd_lu_dev);
240 seq_printf(m, "%d\n",
241 (__s16)(lod->lod_desc.ld_default_stripe_count + 1) - 1);
246 * Set default number of stripes.
248 * \param[in] file proc file
249 * \param[in] buffer string containing the default number of stripes
251 * \param[in] count @buffer length
252 * \param[in] off unused for single entry
254 * \retval @count on success
255 * \retval negative error code otherwise
258 lod_stripecount_seq_write(struct file *file, const char __user *buffer,
259 size_t count, loff_t *off)
261 struct seq_file *m = file->private_data;
262 struct obd_device *dev = m->private;
263 struct lod_device *lod;
268 LASSERT(dev != NULL);
269 lod = lu2lod_dev(dev->obd_lu_dev);
270 rc = lprocfs_str_to_s64(buffer, count, &val);
277 lod_fix_desc_stripe_count(&stripe_count);
278 lod->lod_desc.ld_default_stripe_count = stripe_count;
282 LPROC_SEQ_FOPS(lod_stripecount);
285 * Show number of targets.
287 * \param[in] m seq file
288 * \param[in] v unused for single entry
290 * \retval 0 on success
291 * \retval negative error code if failed
293 static int lod_numobd_seq_show(struct seq_file *m, void *v)
295 struct obd_device *dev = m->private;
296 struct lod_device *lod;
298 LASSERT(dev != NULL);
299 lod = lu2lod_dev(dev->obd_lu_dev);
300 seq_printf(m, "%u\n", lod->lod_desc.ld_tgt_count);
303 LPROC_SEQ_FOPS_RO(lod_numobd);
306 * Show number of active targets.
308 * \param[in] m seq file
309 * \param[in] v unused for single entry
311 * \retval 0 on success
312 * \retval negative error code if failed
314 static int lod_activeobd_seq_show(struct seq_file *m, void *v)
316 struct obd_device *dev = m->private;
317 struct lod_device *lod;
319 LASSERT(dev != NULL);
320 lod = lu2lod_dev(dev->obd_lu_dev);
321 seq_printf(m, "%u\n", lod->lod_desc.ld_active_tgt_count);
324 LPROC_SEQ_FOPS_RO(lod_activeobd);
327 * Show UUID of LOD device.
329 * \param[in] m seq file
330 * \param[in] v unused for single entry
332 * \retval 0 on success
333 * \retval negative error code if failed
335 static int lod_desc_uuid_seq_show(struct seq_file *m, void *v)
337 struct obd_device *dev = m->private;
338 struct lod_device *lod;
340 LASSERT(dev != NULL);
341 lod = lu2lod_dev(dev->obd_lu_dev);
342 seq_printf(m, "%s\n", lod->lod_desc.ld_uuid.uuid);
345 LPROC_SEQ_FOPS_RO(lod_desc_uuid);
348 * Show QoS priority parameter.
350 * The printed value is a percentage value (0-100%) indicating the priority
351 * of free space compared to performance. 0% means select OSTs equally
352 * regardless of their free space, 100% means select OSTs only by their free
353 * space even if it results in very imbalanced load on the OSTs.
355 * \param[in] m seq file
356 * \param[in] v unused for single entry
358 * \retval 0 on success
359 * \retval negative error code if failed
361 static int lod_qos_priofree_seq_show(struct seq_file *m, void *v)
363 struct obd_device *dev = m->private;
364 struct lod_device *lod = lu2lod_dev(dev->obd_lu_dev);
366 LASSERT(lod != NULL);
367 seq_printf(m, "%d%%\n",
368 (lod->lod_qos.lq_prio_free * 100 + 255) >> 8);
373 * Set QoS free space priority parameter.
375 * Set the relative priority of free OST space compared to OST load when OSTs
376 * are space imbalanced. See lod_qos_priofree_seq_show() for description of
377 * this parameter. See lod_qos_thresholdrr_seq_write() and lq_threshold_rr to
378 * determine what constitutes "space imbalanced" OSTs.
380 * \param[in] file proc file
381 * \param[in] buffer string which contains the free space priority (0-100)
382 * \param[in] count @buffer length
383 * \param[in] off unused for single entry
385 * \retval @count on success
386 * \retval negative error code if failed
389 lod_qos_priofree_seq_write(struct file *file, const char __user *buffer,
390 size_t count, loff_t *off)
392 struct seq_file *m = file->private_data;
393 struct obd_device *dev = m->private;
394 struct lod_device *lod;
398 LASSERT(dev != NULL);
399 lod = lu2lod_dev(dev->obd_lu_dev);
401 rc = lprocfs_str_to_s64(buffer, count, &val);
405 if (val < 0 || val > 100)
407 lod->lod_qos.lq_prio_free = (val << 8) / 100;
408 lod->lod_qos.lq_dirty = 1;
409 lod->lod_qos.lq_reset = 1;
413 LPROC_SEQ_FOPS(lod_qos_priofree);
416 * Show threshold for "same space on all OSTs" rule.
418 * \param[in] m seq file
419 * \param[in] v unused for single entry
421 * \retval 0 on success
422 * \retval negative error code if failed
424 static int lod_qos_thresholdrr_seq_show(struct seq_file *m, void *v)
426 struct obd_device *dev = m->private;
427 struct lod_device *lod;
429 LASSERT(dev != NULL);
430 lod = lu2lod_dev(dev->obd_lu_dev);
431 seq_printf(m, "%d%%\n",
432 (lod->lod_qos.lq_threshold_rr * 100 + 255) >> 8);
437 * Set threshold for "same space on all OSTs" rule.
439 * This sets the maximum percentage difference of free space between the most
440 * full and most empty OST in the currently available OSTs. If this percentage
441 * is exceeded, use the QoS allocator to select OSTs based on their available
442 * space so that more full OSTs are chosen less often, otherwise use the
443 * round-robin allocator for efficiency and performance.
445 * \param[in] file proc file
446 * \param[in] buffer string containing percentage difference of free space
447 * \param[in] count @buffer length
448 * \param[in] off unused for single entry
450 * \retval @count on success
451 * \retval negative error code if failed
454 lod_qos_thresholdrr_seq_write(struct file *file, const char __user *buffer,
455 size_t count, loff_t *off)
457 struct seq_file *m = file->private_data;
458 struct obd_device *dev = m->private;
459 struct lod_device *lod;
463 LASSERT(dev != NULL);
464 lod = lu2lod_dev(dev->obd_lu_dev);
466 rc = lprocfs_str_with_units_to_s64(buffer, count, &val, '%');
470 if (val > 100 || val < 0)
473 lod->lod_qos.lq_threshold_rr = (val << 8) / 100;
474 lod->lod_qos.lq_dirty = 1;
478 LPROC_SEQ_FOPS(lod_qos_thresholdrr);
481 * Show expiration period used to refresh cached statfs data, which
482 * is used to implement QoS/RR striping allocation algorithm.
484 * \param[in] m seq file
485 * \param[in] v unused for single entry
487 * \retval 0 on success
488 * \retval negative error code if failed
490 static int lod_qos_maxage_seq_show(struct seq_file *m, void *v)
492 struct obd_device *dev = m->private;
493 struct lod_device *lod;
495 LASSERT(dev != NULL);
496 lod = lu2lod_dev(dev->obd_lu_dev);
497 seq_printf(m, "%u Sec\n", lod->lod_desc.ld_qos_maxage);
502 * Set expiration period used to refresh cached statfs data.
504 * \param[in] file proc file
505 * \param[in] buffer string contains maximum age of statfs data in seconds
506 * \param[in] count @buffer length
507 * \param[in] off unused for single entry
509 * \retval @count on success
510 * \retval negative error code if failed
513 lod_qos_maxage_seq_write(struct file *file, const char __user *buffer,
514 size_t count, loff_t *off)
516 struct seq_file *m = file->private_data;
517 struct obd_device *dev = m->private;
518 struct lustre_cfg_bufs bufs;
519 struct lod_device *lod;
520 struct lu_device *next;
521 struct lustre_cfg *lcfg;
527 LASSERT(dev != NULL);
528 lod = lu2lod_dev(dev->obd_lu_dev);
530 rc = lprocfs_str_to_s64(buffer, count, &val);
536 lod->lod_desc.ld_qos_maxage = val;
539 * propogate the value down to OSPs
541 lustre_cfg_bufs_reset(&bufs, NULL);
542 snprintf(str, 32, "%smaxage=%u", PARAM_OSP, (__u32)val);
543 lustre_cfg_bufs_set_string(&bufs, 1, str);
544 OBD_ALLOC(lcfg, lustre_cfg_len(bufs.lcfg_bufcount, bufs.lcfg_buflen));
547 lustre_cfg_init(lcfg, LCFG_PARAM, &bufs);
549 lod_getref(&lod->lod_ost_descs);
550 lod_foreach_ost(lod, i) {
551 next = &OST_TGT(lod,i)->ltd_ost->dd_lu_dev;
552 rc = next->ld_ops->ldo_process_config(NULL, next, lcfg);
554 CERROR("can't set maxage on #%d: %d\n", i, rc);
556 lod_putref(lod, &lod->lod_ost_descs);
557 OBD_FREE(lcfg, lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens));
561 LPROC_SEQ_FOPS(lod_qos_maxage);
563 static void *lod_osts_seq_start(struct seq_file *p, loff_t *pos)
565 struct obd_device *dev = p->private;
566 struct lod_device *lod;
568 LASSERT(dev != NULL);
569 lod = lu2lod_dev(dev->obd_lu_dev);
571 lod_getref(&lod->lod_ost_descs); /* released in lod_osts_seq_stop */
572 if (*pos >= lod->lod_ost_bitmap->size)
575 *pos = find_next_bit(lod->lod_ost_bitmap->data,
576 lod->lod_ost_bitmap->size, *pos);
577 if (*pos < lod->lod_ost_bitmap->size)
578 return OST_TGT(lod,*pos);
583 static void lod_osts_seq_stop(struct seq_file *p, void *v)
585 struct obd_device *dev = p->private;
586 struct lod_device *lod;
588 LASSERT(dev != NULL);
589 lod = lu2lod_dev(dev->obd_lu_dev);
590 lod_putref(lod, &lod->lod_ost_descs);
593 static void *lod_osts_seq_next(struct seq_file *p, void *v, loff_t *pos)
595 struct obd_device *dev = p->private;
596 struct lod_device *lod = lu2lod_dev(dev->obd_lu_dev);
598 if (*pos >= lod->lod_ost_bitmap->size - 1)
601 *pos = find_next_bit(lod->lod_ost_bitmap->data,
602 lod->lod_ost_bitmap->size, *pos + 1);
603 if (*pos < lod->lod_ost_bitmap->size)
604 return OST_TGT(lod,*pos);
610 * Show active/inactive status for OST found by lod_osts_seq_next().
612 * \param[in] m seq file
613 * \param[in] v unused for single entry
615 * \retval 0 on success
616 * \retval negative error code if failed
618 static int lod_osts_seq_show(struct seq_file *p, void *v)
620 struct obd_device *obd = p->private;
621 struct lod_ost_desc *ost_desc = v;
622 struct lod_device *lod;
624 struct dt_device *next;
625 struct obd_statfs sfs;
627 LASSERT(obd->obd_lu_dev);
628 lod = lu2lod_dev(obd->obd_lu_dev);
630 idx = ost_desc->ltd_index;
631 next = OST_TGT(lod,idx)->ltd_ost;
635 /* XXX: should be non-NULL env, but it's very expensive */
637 rc = dt_statfs(NULL, next, &sfs);
638 if (rc == -ENOTCONN) {
644 seq_printf(p, "%d: %s %sACTIVE\n", idx,
645 obd_uuid2str(&ost_desc->ltd_uuid),
650 static const struct seq_operations lod_osts_sops = {
651 .start = lod_osts_seq_start,
652 .stop = lod_osts_seq_stop,
653 .next = lod_osts_seq_next,
654 .show = lod_osts_seq_show,
657 static int lod_osts_seq_open(struct inode *inode, struct file *file)
659 struct seq_file *seq;
662 rc = seq_open(file, &lod_osts_sops);
666 seq = file->private_data;
667 seq->private = PDE_DATA(inode);
671 LPROC_SEQ_FOPS_RO_TYPE(lod, dt_blksize);
672 LPROC_SEQ_FOPS_RO_TYPE(lod, dt_kbytestotal);
673 LPROC_SEQ_FOPS_RO_TYPE(lod, dt_kbytesfree);
674 LPROC_SEQ_FOPS_RO_TYPE(lod, dt_kbytesavail);
675 LPROC_SEQ_FOPS_RO_TYPE(lod, dt_filestotal);
676 LPROC_SEQ_FOPS_RO_TYPE(lod, dt_filesfree);
679 * Show whether special failout mode for testing is enabled or not.
681 * \param[in] m seq file
682 * \param[in] v unused for single entry
684 * \retval 0 on success
685 * \retval negative error code if failed
687 static int lod_lmv_failout_seq_show(struct seq_file *m, void *v)
689 struct obd_device *dev = m->private;
690 struct lod_device *lod;
692 LASSERT(dev != NULL);
693 lod = lu2lod_dev(dev->obd_lu_dev);
695 seq_printf(m, "%d\n", lod->lod_lmv_failout ? 1 : 0);
700 * Enable/disable a special failout mode for testing.
702 * This determines whether the LMV will try to continue processing a striped
703 * directory even if it has a (partly) corrupted entry in the master directory,
704 * or if it will abort upon finding a corrupted slave directory entry.
706 * \param[in] file proc file
707 * \param[in] buffer string: 0 or non-zero to disable or enable LMV failout
708 * \param[in] count @buffer length
709 * \param[in] off unused for single entry
711 * \retval @count on success
712 * \retval negative error code if failed
715 lod_lmv_failout_seq_write(struct file *file, const char __user *buffer,
716 size_t count, loff_t *off)
718 struct seq_file *m = file->private_data;
719 struct obd_device *dev = m->private;
720 struct lod_device *lod;
724 LASSERT(dev != NULL);
725 lod = lu2lod_dev(dev->obd_lu_dev);
727 rc = lprocfs_str_to_s64(buffer, count, &val);
731 lod->lod_lmv_failout = !!val;
735 LPROC_SEQ_FOPS(lod_lmv_failout);
737 static struct lprocfs_vars lprocfs_lod_obd_vars[] = {
738 { .name = "stripesize",
739 .fops = &lod_stripesize_fops },
740 { .name = "stripeoffset",
741 .fops = &lod_stripeoffset_fops },
742 { .name = "stripecount",
743 .fops = &lod_stripecount_fops },
744 { .name = "stripetype",
745 .fops = &lod_stripetype_fops },
747 .fops = &lod_numobd_fops },
748 { .name = "activeobd",
749 .fops = &lod_activeobd_fops },
750 { .name = "desc_uuid",
751 .fops = &lod_desc_uuid_fops },
752 { .name = "qos_prio_free",
753 .fops = &lod_qos_priofree_fops },
754 { .name = "qos_threshold_rr",
755 .fops = &lod_qos_thresholdrr_fops },
756 { .name = "qos_maxage",
757 .fops = &lod_qos_maxage_fops },
758 { .name = "lmv_failout",
759 .fops = &lod_lmv_failout_fops },
763 static struct lprocfs_vars lprocfs_lod_osd_vars[] = {
764 { .name = "blocksize", .fops = &lod_dt_blksize_fops },
765 { .name = "kbytestotal", .fops = &lod_dt_kbytestotal_fops },
766 { .name = "kbytesfree", .fops = &lod_dt_kbytesfree_fops },
767 { .name = "kbytesavail", .fops = &lod_dt_kbytesavail_fops },
768 { .name = "filestotal", .fops = &lod_dt_filestotal_fops },
769 { .name = "filesfree", .fops = &lod_dt_filesfree_fops },
773 static const struct file_operations lod_proc_target_fops = {
774 .owner = THIS_MODULE,
775 .open = lod_osts_seq_open,
778 .release = lprocfs_seq_release,
782 * Initialize procfs entries for LOD.
784 * \param[in] lod LOD device
786 * \retval 0 on success
787 * \retval negative error code if failed
789 int lod_procfs_init(struct lod_device *lod)
791 struct obd_device *obd = lod2obd(lod);
792 struct proc_dir_entry *lov_proc_dir = NULL;
793 struct obd_type *type;
796 obd->obd_vars = lprocfs_lod_obd_vars;
797 rc = lprocfs_obd_setup(obd, true);
799 CERROR("%s: cannot setup procfs entry: %d\n",
804 rc = lprocfs_add_vars(obd->obd_proc_entry, lprocfs_lod_osd_vars,
807 CERROR("%s: cannot setup procfs entry: %d\n",
812 rc = lprocfs_seq_create(obd->obd_proc_entry, "target_obd",
813 0444, &lod_proc_target_fops, obd);
815 CWARN("%s: Error adding the target_obd file %d\n",
820 lod->lod_pool_proc_entry = lprocfs_register("pools",
823 if (IS_ERR(lod->lod_pool_proc_entry)) {
824 rc = PTR_ERR(lod->lod_pool_proc_entry);
825 lod->lod_pool_proc_entry = NULL;
826 CWARN("%s: Failed to create pool proc file: %d\n",
831 /* If the real LOV is present which is the case for setups
832 * with both server and clients on the same node then use
833 * the LOV's proc root */
834 type = class_search_type(LUSTRE_LOV_NAME);
835 if (type != NULL && type->typ_procroot != NULL)
836 lov_proc_dir = type->typ_procroot;
838 lov_proc_dir = obd->obd_type->typ_procsym;
840 if (lov_proc_dir == NULL)
843 /* for compatibility we link old procfs's LOV entries to lod ones */
844 lod->lod_symlink = lprocfs_add_symlink(obd->obd_name, lov_proc_dir,
845 "../lod/%s", obd->obd_name);
846 if (lod->lod_symlink == NULL)
847 CERROR("cannot create LOV symlink for /proc/fs/lustre/lod/%s\n",
852 lprocfs_obd_cleanup(obd);
858 * Cleanup procfs entries registred for LOD.
860 * \param[in] lod LOD device
862 void lod_procfs_fini(struct lod_device *lod)
864 struct obd_device *obd = lod2obd(lod);
866 if (lod->lod_symlink != NULL) {
867 lprocfs_remove(&lod->lod_symlink);
868 lod->lod_symlink = NULL;
871 if (lod->lod_pool_proc_entry != NULL) {
872 lprocfs_remove(&lod->lod_pool_proc_entry);
873 lod->lod_pool_proc_entry = NULL;
876 lprocfs_obd_cleanup(obd);
879 #endif /* CONFIG_PROC_FS */