Whamcloud - gitweb
LU-8066 obd: make health_check sysfs compliant 31/25631/7
authorJames Simmons <uja.ornl@yahoo.com>
Fri, 15 Mar 2019 18:10:42 +0000 (14:10 -0400)
committerOleg Drokin <green@whamcloud.com>
Mon, 1 Apr 2019 07:23:34 +0000 (07:23 +0000)
The patch http://review.whamcloud.com/16721 was
ported to the upstream client but was rejected
since it violating the sysfs one item rule. Change
the reporting of LBUG plus unhealthy to just
reporting LBUG. Move the reporting of which device
is unhealthy to a new debugfs file that mirrors
the sysfs file.

Change-Id: Ie1640399e97902272000313bb7ccdcbd2be6daf6
Signed-off-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-on: https://review.whamcloud.com/25631
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Emoly Liu <emoly@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/obdclass/obd_sysfs.c
lustre/utils/lustre_cfg.c

index 32f3a13..f83c6f3 100644 (file)
@@ -209,10 +209,8 @@ health_check_show(struct kobject *kobj, struct attribute *attr, char *buf)
        size_t len = 0;
        int i;
 
        size_t len = 0;
        int i;
 
-       if (libcfs_catastrophe) {
-               len = sprintf(buf, "LBUG\n");
-               healthy = false;
-       }
+       if (libcfs_catastrophe)
+               return sprintf(buf, "LBUG\n");
 
        read_lock(&obd_dev_lock);
        for (i = 0; i < class_devno_max(); i++) {
 
        read_lock(&obd_dev_lock);
        for (i = 0; i < class_devno_max(); i++) {
@@ -226,16 +224,17 @@ health_check_show(struct kobject *kobj, struct attribute *attr, char *buf)
                if (obd->obd_stopping)
                        continue;
 
                if (obd->obd_stopping)
                        continue;
 
-               class_incref(obd, __FUNCTION__, current);
+               class_incref(obd, __func__, current);
                read_unlock(&obd_dev_lock);
 
                read_unlock(&obd_dev_lock);
 
-               if (obd_health_check(NULL, obd)) {
-                       len = sprintf(buf, "device %s reported unhealthy\n",
-                                     obd->obd_name);
+               if (obd_health_check(NULL, obd))
                        healthy = false;
                        healthy = false;
-               }
-               class_decref(obd, __FUNCTION__, current);
+
+               class_decref(obd, __func__, current);
                read_lock(&obd_dev_lock);
                read_lock(&obd_dev_lock);
+
+               if (!healthy)
+                       break;
        }
        read_unlock(&obd_dev_lock);
 
        }
        read_unlock(&obd_dev_lock);
 
@@ -433,6 +432,40 @@ static const struct file_operations obd_device_list_fops = {
        .release = seq_release,
 };
 
        .release = seq_release,
 };
 
+static int
+health_check_seq_show(struct seq_file *m, void *unused)
+{
+       int i;
+
+       read_lock(&obd_dev_lock);
+       for (i = 0; i < class_devno_max(); i++) {
+               struct obd_device *obd;
+
+               obd = class_num2obd(i);
+               if (obd == NULL || !obd->obd_attached || !obd->obd_set_up)
+                       continue;
+
+               LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+               if (obd->obd_stopping)
+                       continue;
+
+               class_incref(obd, __func__, current);
+               read_unlock(&obd_dev_lock);
+
+               if (obd_health_check(NULL, obd)) {
+                       seq_printf(m, "device %s reported unhealthy\n",
+                                  obd->obd_name);
+               }
+               class_decref(obd, __func__, current);
+               read_lock(&obd_dev_lock);
+       }
+       read_unlock(&obd_dev_lock);
+
+       return 0;
+}
+
+LDEBUGFS_SEQ_FOPS_RO(health_check);
+
 struct kset *lustre_kset;
 EXPORT_SYMBOL_GPL(lustre_kset);
 
 struct kset *lustre_kset;
 EXPORT_SYMBOL_GPL(lustre_kset);
 
@@ -501,6 +534,15 @@ int class_procfs_init(void)
                goto out;
        }
 
                goto out;
        }
 
+       file = debugfs_create_file("health_check", 0444, debugfs_lustre_root,
+                                  NULL, &health_check_fops);
+       if (IS_ERR_OR_NULL(file)) {
+               rc = file ? PTR_ERR(file) : -ENOMEM;
+               debugfs_remove_recursive(debugfs_lustre_root);
+               kset_unregister(lustre_kset);
+               goto out;
+       }
+
        entry = lprocfs_register("fs/lustre", NULL, NULL, NULL);
        if (IS_ERR(entry)) {
                rc = PTR_ERR(entry);
        entry = lprocfs_register("fs/lustre", NULL, NULL, NULL);
        if (IS_ERR(entry)) {
                rc = PTR_ERR(entry);
index a29aae7..02f3fa1 100644 (file)
@@ -969,8 +969,8 @@ static int
 param_display(struct param_opts *popt, char *pattern, char *value,
              enum parameter_operation mode)
 {
 param_display(struct param_opts *popt, char *pattern, char *value,
              enum parameter_operation mode)
 {
-       int dir_count = 0;
-       char **dir_cache;
+       int dup_count = 0;
+       char **dup_cache;
        glob_t paths;
        char *opname = parameter_opname[mode];
        int rc, i;
        glob_t paths;
        char *opname = parameter_opname[mode];
        int rc, i;
@@ -985,11 +985,11 @@ param_display(struct param_opts *popt, char *pattern, char *value,
                return rc;
        }
 
                return rc;
        }
 
-       dir_cache = calloc(paths.gl_pathc, sizeof(char *));
-       if (dir_cache == NULL) {
+       dup_cache = calloc(paths.gl_pathc, sizeof(char *));
+       if (dup_cache == NULL) {
                rc = -ENOMEM;
                fprintf(stderr,
                rc = -ENOMEM;
                fprintf(stderr,
-                       "error: %s: allocating '%s' dir_cache[%zd]: %s\n",
+                       "error: %s: allocating '%s' dup_cache[%zd]: %s\n",
                        opname, pattern, paths.gl_pathc, strerror(-rc));
                goto out_param;
        }
                        opname, pattern, paths.gl_pathc, strerror(-rc));
                goto out_param;
        }
@@ -998,7 +998,7 @@ param_display(struct param_opts *popt, char *pattern, char *value,
                char *param_name = NULL, *tmp;
                char pathname[PATH_MAX];
                struct stat st;
                char *param_name = NULL, *tmp;
                char pathname[PATH_MAX];
                struct stat st;
-               int rc2;
+               int rc2, j;
 
                if (stat(paths.gl_pathv[i], &st) == -1) {
                        fprintf(stderr, "error: %s: stat '%s': %s\n",
 
                if (stat(paths.gl_pathv[i], &st) == -1) {
                        fprintf(stderr, "error: %s: stat '%s': %s\n",
@@ -1021,35 +1021,6 @@ param_display(struct param_opts *popt, char *pattern, char *value,
                        continue;
                }
 
                        continue;
                }
 
-               /**
-                * For the upstream client the parameter files locations
-                * are split between under both /sys/kernel/debug/lustre
-                * and /sys/fs/lustre. The parameter files containing
-                * small amounts of data, less than a page in size, are
-                * located under /sys/fs/lustre and in the case of large
-                * parameter data files, think stats for example, are
-                * located in the debugfs tree. Since the files are split
-                * across two trees the directories are often duplicated
-                * which means these directories are listed twice which
-                * leads to duplicate output to the user. To avoid scanning
-                * a directory twice we have to cache any directory and
-                * check if a search has been requested twice.
-                */
-               if (S_ISDIR(st.st_mode)) {
-                       int j;
-
-                       for (j = 0; j < dir_count; j++) {
-                               if (!strcmp(dir_cache[j], param_name))
-                                       break;
-                       }
-                       if (j != dir_count) {
-                               free(param_name);
-                               param_name = NULL;
-                               continue;
-                       }
-                       dir_cache[dir_count++] = strdup(param_name);
-               }
-
                switch (mode) {
                case GET_PARAM:
                        /* Read the contents of file to stdout */
                switch (mode) {
                case GET_PARAM:
                        /* Read the contents of file to stdout */
@@ -1069,6 +1040,32 @@ param_display(struct param_opts *popt, char *pattern, char *value,
                        }
                        break;
                case LIST_PARAM:
                        }
                        break;
                case LIST_PARAM:
+                       /**
+                        * For the upstream client the parameter files locations
+                        * are split between under both /sys/kernel/debug/lustre
+                        * and /sys/fs/lustre. The parameter files containing
+                        * small amounts of data, less than a page in size, are
+                        * located under /sys/fs/lustre and in the case of large
+                        * parameter data files, think stats for example, are
+                        * located in the debugfs tree. Since the files are split
+                        * across two trees the directories are often duplicated
+                        * which means these directories are listed twice which
+                        * leads to duplicate output to the user. To avoid
+                        * scanning a directory twice we have to cache any
+                        * directory and check if a search has been requested
+                        * twice.
+                        */
+                       for (j = 0; j < dup_count; j++) {
+                               if (!strcmp(dup_cache[j], param_name))
+                                       break;
+                       }
+                       if (j != dup_count) {
+                               free(param_name);
+                               param_name = NULL;
+                               continue;
+                       }
+                       dup_cache[dup_count++] = strdup(param_name);
+
                        if (popt->po_show_path)
                                printf("%s\n", param_name);
                        break;
                        if (popt->po_show_path)
                                printf("%s\n", param_name);
                        break;
@@ -1131,9 +1128,9 @@ param_display(struct param_opts *popt, char *pattern, char *value,
                }
        }
 
                }
        }
 
-       for (i = 0; i < dir_count; i++)
-               free(dir_cache[i]);
-       free(dir_cache);
+       for (i = 0; i < dup_count; i++)
+               free(dup_cache[i]);
+       free(dup_cache);
 out_param:
        cfs_free_param_data(&paths);
        return rc;
 out_param:
        cfs_free_param_data(&paths);
        return rc;