From 5d368bd0b203aee8011426fd147fad3e42ac9f7f Mon Sep 17 00:00:00 2001 From: James Simmons Date: Fri, 15 Mar 2019 14:10:42 -0400 Subject: [PATCH] LU-8066 obd: make health_check sysfs compliant The patch http://review.whamcloud.com/16721 was ported to the upstream client but was rejected since it violating the sysfs one item rule. Change the reporting of LBUG plus unhealthy to just reporting LBUG. Move the reporting of which device is unhealthy to a new debugfs file that mirrors the sysfs file. Change-Id: Ie1640399e97902272000313bb7ccdcbd2be6daf6 Signed-off-by: James Simmons Reviewed-on: https://review.whamcloud.com/25631 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Emoly Liu Reviewed-by: Oleg Drokin --- lustre/obdclass/obd_sysfs.c | 62 +++++++++++++++++++++++++++++++------- lustre/utils/lustre_cfg.c | 73 ++++++++++++++++++++++----------------------- 2 files changed, 87 insertions(+), 48 deletions(-) diff --git a/lustre/obdclass/obd_sysfs.c b/lustre/obdclass/obd_sysfs.c index 32f3a13..f83c6f3 100644 --- a/lustre/obdclass/obd_sysfs.c +++ b/lustre/obdclass/obd_sysfs.c @@ -209,10 +209,8 @@ health_check_show(struct kobject *kobj, struct attribute *attr, char *buf) size_t len = 0; int i; - if (libcfs_catastrophe) { - len = sprintf(buf, "LBUG\n"); - healthy = false; - } + if (libcfs_catastrophe) + return sprintf(buf, "LBUG\n"); read_lock(&obd_dev_lock); for (i = 0; i < class_devno_max(); i++) { @@ -226,16 +224,17 @@ health_check_show(struct kobject *kobj, struct attribute *attr, char *buf) if (obd->obd_stopping) continue; - class_incref(obd, __FUNCTION__, current); + class_incref(obd, __func__, current); read_unlock(&obd_dev_lock); - if (obd_health_check(NULL, obd)) { - len = sprintf(buf, "device %s reported unhealthy\n", - obd->obd_name); + if (obd_health_check(NULL, obd)) healthy = false; - } - class_decref(obd, __FUNCTION__, current); + + class_decref(obd, __func__, current); read_lock(&obd_dev_lock); + + if (!healthy) + break; } read_unlock(&obd_dev_lock); @@ -433,6 +432,40 @@ static const struct file_operations obd_device_list_fops = { .release = seq_release, }; +static int +health_check_seq_show(struct seq_file *m, void *unused) +{ + int i; + + read_lock(&obd_dev_lock); + for (i = 0; i < class_devno_max(); i++) { + struct obd_device *obd; + + obd = class_num2obd(i); + if (obd == NULL || !obd->obd_attached || !obd->obd_set_up) + continue; + + LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); + if (obd->obd_stopping) + continue; + + class_incref(obd, __func__, current); + read_unlock(&obd_dev_lock); + + if (obd_health_check(NULL, obd)) { + seq_printf(m, "device %s reported unhealthy\n", + obd->obd_name); + } + class_decref(obd, __func__, current); + read_lock(&obd_dev_lock); + } + read_unlock(&obd_dev_lock); + + return 0; +} + +LDEBUGFS_SEQ_FOPS_RO(health_check); + struct kset *lustre_kset; EXPORT_SYMBOL_GPL(lustre_kset); @@ -501,6 +534,15 @@ int class_procfs_init(void) goto out; } + file = debugfs_create_file("health_check", 0444, debugfs_lustre_root, + NULL, &health_check_fops); + if (IS_ERR_OR_NULL(file)) { + rc = file ? PTR_ERR(file) : -ENOMEM; + debugfs_remove_recursive(debugfs_lustre_root); + kset_unregister(lustre_kset); + goto out; + } + entry = lprocfs_register("fs/lustre", NULL, NULL, NULL); if (IS_ERR(entry)) { rc = PTR_ERR(entry); diff --git a/lustre/utils/lustre_cfg.c b/lustre/utils/lustre_cfg.c index a29aae7..02f3fa1 100644 --- a/lustre/utils/lustre_cfg.c +++ b/lustre/utils/lustre_cfg.c @@ -969,8 +969,8 @@ static int param_display(struct param_opts *popt, char *pattern, char *value, enum parameter_operation mode) { - int dir_count = 0; - char **dir_cache; + int dup_count = 0; + char **dup_cache; glob_t paths; char *opname = parameter_opname[mode]; int rc, i; @@ -985,11 +985,11 @@ param_display(struct param_opts *popt, char *pattern, char *value, return rc; } - dir_cache = calloc(paths.gl_pathc, sizeof(char *)); - if (dir_cache == NULL) { + dup_cache = calloc(paths.gl_pathc, sizeof(char *)); + if (dup_cache == NULL) { rc = -ENOMEM; fprintf(stderr, - "error: %s: allocating '%s' dir_cache[%zd]: %s\n", + "error: %s: allocating '%s' dup_cache[%zd]: %s\n", opname, pattern, paths.gl_pathc, strerror(-rc)); goto out_param; } @@ -998,7 +998,7 @@ param_display(struct param_opts *popt, char *pattern, char *value, char *param_name = NULL, *tmp; char pathname[PATH_MAX]; struct stat st; - int rc2; + int rc2, j; if (stat(paths.gl_pathv[i], &st) == -1) { fprintf(stderr, "error: %s: stat '%s': %s\n", @@ -1021,35 +1021,6 @@ param_display(struct param_opts *popt, char *pattern, char *value, continue; } - /** - * For the upstream client the parameter files locations - * are split between under both /sys/kernel/debug/lustre - * and /sys/fs/lustre. The parameter files containing - * small amounts of data, less than a page in size, are - * located under /sys/fs/lustre and in the case of large - * parameter data files, think stats for example, are - * located in the debugfs tree. Since the files are split - * across two trees the directories are often duplicated - * which means these directories are listed twice which - * leads to duplicate output to the user. To avoid scanning - * a directory twice we have to cache any directory and - * check if a search has been requested twice. - */ - if (S_ISDIR(st.st_mode)) { - int j; - - for (j = 0; j < dir_count; j++) { - if (!strcmp(dir_cache[j], param_name)) - break; - } - if (j != dir_count) { - free(param_name); - param_name = NULL; - continue; - } - dir_cache[dir_count++] = strdup(param_name); - } - switch (mode) { case GET_PARAM: /* Read the contents of file to stdout */ @@ -1069,6 +1040,32 @@ param_display(struct param_opts *popt, char *pattern, char *value, } break; case LIST_PARAM: + /** + * For the upstream client the parameter files locations + * are split between under both /sys/kernel/debug/lustre + * and /sys/fs/lustre. The parameter files containing + * small amounts of data, less than a page in size, are + * located under /sys/fs/lustre and in the case of large + * parameter data files, think stats for example, are + * located in the debugfs tree. Since the files are split + * across two trees the directories are often duplicated + * which means these directories are listed twice which + * leads to duplicate output to the user. To avoid + * scanning a directory twice we have to cache any + * directory and check if a search has been requested + * twice. + */ + for (j = 0; j < dup_count; j++) { + if (!strcmp(dup_cache[j], param_name)) + break; + } + if (j != dup_count) { + free(param_name); + param_name = NULL; + continue; + } + dup_cache[dup_count++] = strdup(param_name); + if (popt->po_show_path) printf("%s\n", param_name); break; @@ -1131,9 +1128,9 @@ param_display(struct param_opts *popt, char *pattern, char *value, } } - for (i = 0; i < dir_count; i++) - free(dir_cache[i]); - free(dir_cache); + for (i = 0; i < dup_count; i++) + free(dup_cache[i]); + free(dup_cache); out_param: cfs_free_param_data(&paths); return rc; -- 1.8.3.1