Whamcloud - gitweb
LU-8066 obd: make health_check sysfs compliant
[fs/lustre-release.git] / lustre / obdclass / obd_sysfs.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/obdclass/obd_sysfs.c
33  *
34  * Object Devices Class Driver
35  * These are the only exported functions, they provide some generic
36  * infrastructure for managing object devices
37  */
38
39 #define DEBUG_SUBSYSTEM S_CLASS
40
41 #include <linux/module.h>
42 #include <linux/errno.h>
43 #include <linux/kernel.h>
44 #include <linux/sched.h>
45 #include <linux/lp.h>
46 #include <linux/slab.h>
47 #include <linux/ioport.h>
48 #include <linux/fcntl.h>
49 #include <linux/delay.h>
50 #include <linux/skbuff.h>
51 #include <linux/proc_fs.h>
52 #include <linux/fs.h>
53 #include <linux/poll.h>
54 #include <linux/init.h>
55 #include <linux/list.h>
56 #include <linux/highmem.h>
57 #include <asm/io.h>
58 #include <asm/ioctls.h>
59 #include <asm/poll.h>
60 #include <asm/uaccess.h>
61 #include <linux/miscdevice.h>
62 #include <linux/seq_file.h>
63 #include <linux/kobject.h>
64
65 #include <libcfs/libcfs.h>
66 #include <obd_support.h>
67 #include <obd_class.h>
68 #include <lprocfs_status.h>
69 #include <uapi/linux/lnet/lnetctl.h>
70 #include <uapi/linux/lustre/lustre_ioctl.h>
71 #include <uapi/linux/lustre/lustre_ver.h>
72
73 struct static_lustre_uintvalue_attr {
74         struct {
75                 struct attribute attr;
76                 ssize_t (*show)(struct kobject *kobj, struct attribute *attr,
77                                 char *buf);
78                 ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
79                                  const char *buf, size_t len);
80         } u;
81         int *value;
82 };
83
84 static ssize_t static_uintvalue_show(struct kobject *kobj,
85                                      struct attribute *attr,
86                                      char *buf)
87 {
88         struct static_lustre_uintvalue_attr *lattr = (void *)attr;
89
90         return sprintf(buf, "%d\n", *lattr->value);
91 }
92
93 static ssize_t static_uintvalue_store(struct kobject *kobj,
94                                       struct attribute *attr,
95                                       const char *buffer, size_t count)
96 {
97         struct static_lustre_uintvalue_attr *lattr = (void *)attr;
98         unsigned int val;
99         int rc;
100
101         rc = kstrtouint(buffer, 10, &val);
102         if (rc)
103                 return rc;
104
105         *lattr->value = val;
106
107         return count;
108 }
109
110 #define LUSTRE_STATIC_UINT_ATTR(name, value)                            \
111 static struct static_lustre_uintvalue_attr lustre_sattr_##name =        \
112         { __ATTR(name, 0644, static_uintvalue_show,                     \
113                  static_uintvalue_store), value }
114
115 LUSTRE_STATIC_UINT_ATTR(timeout, &obd_timeout);
116 LUSTRE_STATIC_UINT_ATTR(debug_peer_on_timeout, &obd_debug_peer_on_timeout);
117 LUSTRE_STATIC_UINT_ATTR(dump_on_timeout, &obd_dump_on_timeout);
118 LUSTRE_STATIC_UINT_ATTR(dump_on_eviction, &obd_dump_on_eviction);
119 LUSTRE_STATIC_UINT_ATTR(at_min, &at_min);
120 LUSTRE_STATIC_UINT_ATTR(at_max, &at_max);
121 LUSTRE_STATIC_UINT_ATTR(at_extra, &at_extra);
122 LUSTRE_STATIC_UINT_ATTR(at_early_margin, &at_early_margin);
123 LUSTRE_STATIC_UINT_ATTR(at_history, &at_history);
124 LUSTRE_STATIC_UINT_ATTR(lbug_on_eviction, &obd_lbug_on_eviction);
125
126 #ifdef HAVE_SERVER_SUPPORT
127 LUSTRE_STATIC_UINT_ATTR(ldlm_timeout, &ldlm_timeout);
128 LUSTRE_STATIC_UINT_ATTR(bulk_timeout, &bulk_timeout);
129 #endif
130
131 static ssize_t memused_show(struct kobject *kobj, struct attribute *attr,
132                             char *buf)
133 {
134         return sprintf(buf, "%llu\n", obd_memory_sum());
135 }
136 LUSTRE_RO_ATTR(memused);
137
138 static ssize_t memused_max_show(struct kobject *kobj, struct attribute *attr,
139                                 char *buf)
140 {
141         return sprintf(buf, "%llu\n", obd_memory_max());
142 }
143 LUSTRE_RO_ATTR(memused_max);
144
145 static ssize_t max_dirty_mb_show(struct kobject *kobj, struct attribute *attr,
146                                  char *buf)
147 {
148         return sprintf(buf, "%lu\n",
149                        obd_max_dirty_pages / (1 << (20 - PAGE_SHIFT)));
150 }
151
152 static ssize_t max_dirty_mb_store(struct kobject *kobj, struct attribute *attr,
153                                   const char *buffer, size_t count)
154 {
155         unsigned long val;
156         int rc;
157
158         rc = kstrtoul(buffer, 10, &val);
159         if (rc)
160                 return rc;
161
162         val *= 1 << (20 - PAGE_SHIFT); /* convert to pages */
163
164         if (val > ((totalram_pages / 10) * 9)) {
165                 /* Somebody wants to assign too much memory to dirty pages */
166                 return -EINVAL;
167         }
168
169         if (val < 4 << (20 - PAGE_SHIFT)) {
170                 /* Less than 4 Mb for dirty cache is also bad */
171                 return -EINVAL;
172         }
173
174         obd_max_dirty_pages = val;
175
176         return count;
177 }
178 LUSTRE_RW_ATTR(max_dirty_mb);
179
180 static ssize_t version_show(struct kobject *kobj, struct attribute *attr,
181                             char *buf)
182 {
183         return sprintf(buf, "%s\n", LUSTRE_VERSION_STRING);
184 }
185
186 static ssize_t pinger_show(struct kobject *kobj, struct attribute *attr,
187                            char *buf)
188 {
189 #ifdef ENABLE_PINGER
190         const char *state = "on";
191 #else
192         const char *state = "off";
193 #endif
194         return sprintf(buf, "%s\n", state);
195 }
196
197 /**
198  * Check all obd devices health
199  *
200  * \param kobj
201  * \param buf [in]
202  *
203  * \retval number of characters printed if healthy
204  */
205 static ssize_t
206 health_check_show(struct kobject *kobj, struct attribute *attr, char *buf)
207 {
208         bool healthy = true;
209         size_t len = 0;
210         int i;
211
212         if (libcfs_catastrophe)
213                 return sprintf(buf, "LBUG\n");
214
215         read_lock(&obd_dev_lock);
216         for (i = 0; i < class_devno_max(); i++) {
217                 struct obd_device *obd;
218
219                 obd = class_num2obd(i);
220                 if (obd == NULL || !obd->obd_attached || !obd->obd_set_up)
221                         continue;
222
223                 LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
224                 if (obd->obd_stopping)
225                         continue;
226
227                 class_incref(obd, __func__, current);
228                 read_unlock(&obd_dev_lock);
229
230                 if (obd_health_check(NULL, obd))
231                         healthy = false;
232
233                 class_decref(obd, __func__, current);
234                 read_lock(&obd_dev_lock);
235
236                 if (!healthy)
237                         break;
238         }
239         read_unlock(&obd_dev_lock);
240
241         if (healthy)
242                 len = sprintf(buf, "healthy\n");
243         else
244                 len = sprintf(buf, "NOT HEALTHY\n");
245
246         return len;
247 }
248
249 static ssize_t jobid_var_show(struct kobject *kobj, struct attribute *attr,
250                               char *buf)
251 {
252         int rc = 0;
253
254         if (strlen(obd_jobid_var))
255                 rc = snprintf(buf, PAGE_SIZE, "%s\n", obd_jobid_var);
256         return rc;
257 }
258
259 static ssize_t jobid_var_store(struct kobject *kobj, struct attribute *attr,
260                                const char *buffer, size_t count)
261 {
262         if (!count || count > JOBSTATS_JOBID_VAR_MAX_LEN)
263                 return -EINVAL;
264
265         memset(obd_jobid_var, 0, JOBSTATS_JOBID_VAR_MAX_LEN + 1);
266
267         memcpy(obd_jobid_var, buffer, count);
268
269         /* Trim the trailing '\n' if any */
270         if (obd_jobid_var[count - 1] == '\n')
271                 obd_jobid_var[count - 1] = 0;
272
273         return count;
274 }
275
276 static ssize_t jobid_name_show(struct kobject *kobj, struct attribute *attr,
277                                char *buf)
278 {
279         int rc = 0;
280
281         if (strlen(obd_jobid_name))
282                 rc = snprintf(buf, PAGE_SIZE, "%s\n", obd_jobid_name);
283         return rc;
284 }
285
286 static ssize_t jobid_name_store(struct kobject *kobj, struct attribute *attr,
287                                 const char *buffer, size_t count)
288 {
289         if (!count || count > LUSTRE_JOBID_SIZE)
290                 return -EINVAL;
291
292         if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) != 0 &&
293             !strchr(buffer, '%')) {
294                 lustre_jobid_clear(buffer);
295                 return count;
296         }
297
298         /* clear previous value */
299         memset(obd_jobid_name, 0, LUSTRE_JOBID_SIZE);
300
301         memcpy(obd_jobid_name, buffer, count);
302
303         /* Trim the trailing '\n' if any */
304         if (obd_jobid_name[count - 1] == '\n') {
305                 /* Don't echo just a newline */
306                 if (count == 1)
307                         return -EINVAL;
308                 obd_jobid_name[count - 1] = 0;
309         }
310
311         return count;
312 }
313
314 /* Root for /sys/kernel/debug/lustre */
315 struct dentry *debugfs_lustre_root;
316 EXPORT_SYMBOL_GPL(debugfs_lustre_root);
317
318 #ifdef CONFIG_PROC_FS
319 /* Root for /proc/fs/lustre */
320 struct proc_dir_entry *proc_lustre_root;
321 EXPORT_SYMBOL(proc_lustre_root);
322 #else
323 #define lprocfs_base NULL
324 #endif /* CONFIG_PROC_FS */
325
326 LUSTRE_RO_ATTR(version);
327 LUSTRE_RO_ATTR(pinger);
328 LUSTRE_RO_ATTR(health_check);
329 LUSTRE_RW_ATTR(jobid_var);
330 LUSTRE_RW_ATTR(jobid_name);
331
332 static struct attribute *lustre_attrs[] = {
333         &lustre_attr_version.attr,
334         &lustre_attr_pinger.attr,
335         &lustre_attr_health_check.attr,
336         &lustre_attr_jobid_name.attr,
337         &lustre_attr_jobid_var.attr,
338         &lustre_sattr_timeout.u.attr,
339         &lustre_attr_max_dirty_mb.attr,
340         &lustre_sattr_debug_peer_on_timeout.u.attr,
341         &lustre_sattr_dump_on_timeout.u.attr,
342         &lustre_sattr_dump_on_eviction.u.attr,
343         &lustre_sattr_at_min.u.attr,
344         &lustre_sattr_at_max.u.attr,
345         &lustre_sattr_at_extra.u.attr,
346         &lustre_sattr_at_early_margin.u.attr,
347         &lustre_sattr_at_history.u.attr,
348         &lustre_attr_memused_max.attr,
349         &lustre_attr_memused.attr,
350 #ifdef HAVE_SERVER_SUPPORT
351         &lustre_sattr_ldlm_timeout.u.attr,
352         &lustre_sattr_bulk_timeout.u.attr,
353 #endif
354         &lustre_sattr_lbug_on_eviction.u.attr,
355         NULL,
356 };
357
358 static void *obd_device_list_seq_start(struct seq_file *p, loff_t *pos)
359 {
360         if (*pos >= class_devno_max())
361                 return NULL;
362
363         return pos;
364 }
365
366 static void obd_device_list_seq_stop(struct seq_file *p, void *v)
367 {
368 }
369
370 static void *obd_device_list_seq_next(struct seq_file *p, void *v, loff_t *pos)
371 {
372         ++*pos;
373         if (*pos >= class_devno_max())
374                 return NULL;
375
376         return pos;
377 }
378
379 static int obd_device_list_seq_show(struct seq_file *p, void *v)
380 {
381         loff_t index = *(loff_t *)v;
382         struct obd_device *obd = class_num2obd((int)index);
383         char *status;
384
385         if (obd == NULL)
386                 return 0;
387
388         LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
389         if (obd->obd_stopping)
390                 status = "ST";
391         else if (obd->obd_inactive)
392                 status = "IN";
393         else if (obd->obd_set_up)
394                 status = "UP";
395         else if (obd->obd_attached)
396                 status = "AT";
397         else
398                 status = "--";
399
400         seq_printf(p, "%3d %s %s %s %s %d\n",
401                    (int)index, status, obd->obd_type->typ_name,
402                    obd->obd_name, obd->obd_uuid.uuid,
403                    atomic_read(&obd->obd_refcount));
404         return 0;
405 }
406
407 static const struct seq_operations obd_device_list_sops = {
408         .start = obd_device_list_seq_start,
409         .stop = obd_device_list_seq_stop,
410         .next = obd_device_list_seq_next,
411         .show = obd_device_list_seq_show,
412 };
413
414 static int obd_device_list_open(struct inode *inode, struct file *file)
415 {
416         struct seq_file *seq;
417         int rc = seq_open(file, &obd_device_list_sops);
418
419         if (rc)
420                 return rc;
421
422         seq = file->private_data;
423         seq->private = inode->i_private;
424         return 0;
425 }
426
427 static const struct file_operations obd_device_list_fops = {
428         .owner   = THIS_MODULE,
429         .open    = obd_device_list_open,
430         .read    = seq_read,
431         .llseek  = seq_lseek,
432         .release = seq_release,
433 };
434
435 static int
436 health_check_seq_show(struct seq_file *m, void *unused)
437 {
438         int i;
439
440         read_lock(&obd_dev_lock);
441         for (i = 0; i < class_devno_max(); i++) {
442                 struct obd_device *obd;
443
444                 obd = class_num2obd(i);
445                 if (obd == NULL || !obd->obd_attached || !obd->obd_set_up)
446                         continue;
447
448                 LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
449                 if (obd->obd_stopping)
450                         continue;
451
452                 class_incref(obd, __func__, current);
453                 read_unlock(&obd_dev_lock);
454
455                 if (obd_health_check(NULL, obd)) {
456                         seq_printf(m, "device %s reported unhealthy\n",
457                                    obd->obd_name);
458                 }
459                 class_decref(obd, __func__, current);
460                 read_lock(&obd_dev_lock);
461         }
462         read_unlock(&obd_dev_lock);
463
464         return 0;
465 }
466
467 LDEBUGFS_SEQ_FOPS_RO(health_check);
468
469 struct kset *lustre_kset;
470 EXPORT_SYMBOL_GPL(lustre_kset);
471
472 static struct attribute_group lustre_attr_group = {
473         .attrs = lustre_attrs,
474 };
475
476 ssize_t class_set_global(const char *param)
477 {
478         const char *value = strchr(param, '=') + 1;
479         size_t off = value - param - 1;
480         ssize_t count = -ENOENT;
481         int i;
482
483         for (i = 0; lustre_attrs[i]; i++) {
484                 if (!strncmp(lustre_attrs[i]->name, param, off)) {
485                         count = lustre_attr_store(&lustre_kset->kobj,
486                                                   lustre_attrs[i], value,
487                                                   strlen(value));
488                         break;
489                 }
490         }
491         return count;
492 }
493
494 int class_procfs_init(void)
495 {
496         struct proc_dir_entry *entry;
497         struct dentry *file;
498         int rc = -ENOMEM;
499
500         ENTRY;
501
502         lustre_kset = kset_create_and_add("lustre", NULL, fs_kobj);
503         if (!lustre_kset)
504                 goto out;
505
506         /* Create the files associated with this kobject */
507         rc = sysfs_create_group(&lustre_kset->kobj, &lustre_attr_group);
508         if (rc) {
509                 kset_unregister(lustre_kset);
510                 goto out;
511         }
512
513         rc = jobid_cache_init();
514         if (rc) {
515                 kset_unregister(lustre_kset);
516                 goto out;
517         }
518
519         debugfs_lustre_root = debugfs_create_dir("lustre", NULL);
520         if (IS_ERR_OR_NULL(debugfs_lustre_root)) {
521                 rc = debugfs_lustre_root ? PTR_ERR(debugfs_lustre_root)
522                                          : -ENOMEM;
523                 debugfs_lustre_root = NULL;
524                 kset_unregister(lustre_kset);
525                 goto out;
526         }
527
528         file = debugfs_create_file("devices", 0444, debugfs_lustre_root, NULL,
529                                    &obd_device_list_fops);
530         if (IS_ERR_OR_NULL(file)) {
531                 rc = file ? PTR_ERR(file) : -ENOMEM;
532                 debugfs_remove(debugfs_lustre_root);
533                 kset_unregister(lustre_kset);
534                 goto out;
535         }
536
537         file = debugfs_create_file("health_check", 0444, debugfs_lustre_root,
538                                    NULL, &health_check_fops);
539         if (IS_ERR_OR_NULL(file)) {
540                 rc = file ? PTR_ERR(file) : -ENOMEM;
541                 debugfs_remove_recursive(debugfs_lustre_root);
542                 kset_unregister(lustre_kset);
543                 goto out;
544         }
545
546         entry = lprocfs_register("fs/lustre", NULL, NULL, NULL);
547         if (IS_ERR(entry)) {
548                 rc = PTR_ERR(entry);
549                 CERROR("cannot create '/proc/fs/lustre': rc = %d\n", rc);
550                 debugfs_remove_recursive(debugfs_lustre_root);
551                 kset_unregister(lustre_kset);
552                 goto out;
553         }
554
555         proc_lustre_root = entry;
556 out:
557         RETURN(rc);
558 }
559
560 int class_procfs_clean(void)
561 {
562         ENTRY;
563
564         debugfs_remove_recursive(debugfs_lustre_root);
565
566         debugfs_lustre_root = NULL;
567         jobid_cache_fini();
568
569         if (proc_lustre_root)
570                 lprocfs_remove(&proc_lustre_root);
571
572         sysfs_remove_group(&lustre_kset->kobj, &lustre_attr_group);
573
574         kset_unregister(lustre_kset);
575
576         RETURN(0);
577 }