Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lnet / utils / lst.c
index 07d4b07..e7d0d55 100644 (file)
@@ -68,6 +68,7 @@ free_lstrs(lstr_t **list)
         }
 }
 
+void
 new_lstrs(lstr_t **list, char *prefix, char *postfix,
           int lo, int hi, int stride)
 {
@@ -160,7 +161,7 @@ expand_strs(char *str, lstr_t **head)
         lstr_t  *list = NULL;
         lstr_t  *nlist;
         lstr_t  *l;
-        int      rc;
+        int      rc = 0;
         int      expanded;
 
         l = alloc_lstr(strlen(str) + 1);
@@ -370,7 +371,7 @@ lst_free_rpcent(struct list_head *head)
         }
 }
 
-int
+void
 lst_reset_rpcent(struct list_head *head)
 {
         lstcon_rpc_ent_t *ent;
@@ -679,6 +680,16 @@ lst_ping_ioctl(char *str, int type, int timeout,
         return lst_ioctl (LSTIO_DEBUG, &args, sizeof(args));
 }
 
+int lst_info_batch_ioctl(char *batch, int test, int server,
+                        lstcon_test_batch_ent_t *entp, int *idxp,
+                        int *ndentp, lstcon_node_ent_t *dentsp);
+
+int lst_info_group_ioctl(char *name, lstcon_ndlist_ent_t *gent,
+                         int *idx, int *count, lstcon_node_ent_t *dents);
+
+int lst_query_batch_ioctl(char *batch, int test, int server,
+                          int timeout, struct list_head *head);
+
 int
 lst_get_node_count(int type, char *str, int *countp, lnet_process_id_t **idspp)
 {
@@ -734,7 +745,6 @@ jt_lst_ping(int argc,  char **argv)
         int                type    = 0;
         int                rc      = 0;
         int                c;
-        int                i;
 
         static struct option ping_opts[] =
         {
@@ -1240,7 +1250,7 @@ jt_lst_list_group(int argc, char **argv)
         int               i;
         int               j;
         int               c;
-        int               rc;
+        int               rc = 0;
 
         static struct option list_group_opts[] =
         {
@@ -1598,8 +1608,6 @@ lst_print_lnet_stat(char *name, int bwrt, int rdwr, int type)
         int     end1   = 1;
         int     start2 = 0;
         int     end2   = 1;
-        int     start3 = 0;
-        int     end3   = 2;
         int     i;
         int     j;
 
@@ -1737,11 +1745,9 @@ jt_lst_stat(int argc, char **argv)
 {
         struct list_head      head;
         lst_stat_req_param_t *srp;
-        lstcon_rpc_ent_t     *ent;
-        char                 *name;
+        char                 *name    = NULL;
         time_t                last    = 0;
         int                   optidx  = 0;
-        int                   count   = 0;
         int                   timeout = 5; /* default timeout, 5 sec */
         int                   delay   = 5; /* default delay, 5 sec */
         int                   lnet    = 1; /* lnet stat by default */
@@ -1750,7 +1756,6 @@ jt_lst_stat(int argc, char **argv)
         int                   type    = -1;
         int                   idx     = 0;
         int                   rc;
-        int                   i;
         int                   c;
 
         static struct option stat_opts[] =
@@ -1896,6 +1901,131 @@ out:
 }
 
 int
+jt_lst_show_error(int argc, char **argv)
+{
+        struct list_head      head;
+        lstcon_rpc_ent_t     *ent;
+        sfw_counters_t       *sfwk;
+        srpc_counters_t      *srpc;
+        lnet_counters_t      *lnet;
+        lnet_process_id_t    *idsp   = NULL;
+        char                 *name   = NULL;
+        int                   optidx = 0;
+        int                   count  = 0;
+        int                   type   = 0;
+        int                   timeout = 5;
+        int                   ecount = 0;
+        int                   rc;
+        int                   c;
+
+
+        static struct option show_error_opts[] =
+        {
+                {"group"  , required_argument, 0, 'g' },
+                {"nodes"  , required_argument, 0, 'n' },
+                {0,         0,                 0,  0  }
+        };
+
+        if (session_key == 0) {
+                fprintf(stderr,
+                        "Can't find env LST_SESSION or value is not valid\n");
+                return -1;
+        }
+
+        while (1) {
+                c = getopt_long(argc, argv, "g:n:", show_error_opts, &optidx);
+
+                if (c == -1)
+                        break;
+        
+                switch (c) {
+                case 'g':
+                        type = LST_OPC_GROUP;
+                        name = optarg;
+                        break;
+                case 'n': 
+                        type = LST_OPC_NODES;
+                        name = optarg;
+                        break;
+                default:
+                        lst_print_usage(argv[0]);
+                        return -1;
+                }
+        }
+
+        if (optind != argc || type == 0) {
+                lst_print_usage(argv[0]);
+                return -1;
+        }
+
+        if (name == NULL) {
+                fprintf(stderr, "Missing name of target (group | nodes)\n");
+                return -1;
+        }
+
+        rc = lst_get_node_count(type, name, &count, &idsp);
+        if (rc < 0) {
+                fprintf(stderr, "Failed to get count of nodes from %s: %s\n",
+                        name, strerror(errno));
+                return -1;
+        }
+        
+        CFS_INIT_LIST_HEAD(&head);
+
+        rc = lst_alloc_rpcent(&head, count, sizeof(sfw_counters_t) +
+                                            sizeof(srpc_counters_t) +
+                                            sizeof(lnet_counters_t));
+        if (rc != 0) {
+                fprintf(stderr, "Out of memory\n");
+                goto out;
+        }
+
+        rc = lst_stat_ioctl(name, count, idsp, timeout, &head);
+        if (rc == -1) {
+                lst_print_error(name, "Failed to show errors of %s: %s\n",
+                                name, strerror(errno));
+                goto out;
+        }
+
+        list_for_each_entry(ent, &head, rpe_link) {
+                if (ent->rpe_rpc_errno != 0) {
+                        ecount ++;
+                        fprintf(stderr, "RPC failure, can't show error on %s\n",
+                                libcfs_id2str(ent->rpe_peer));
+                        continue;
+                }
+
+                if (ent->rpe_fwk_errno != 0) {
+                        ecount ++;
+                        fprintf(stderr, "Framework failure, can't show error on %s\n",
+                                libcfs_id2str(ent->rpe_peer));
+                        continue;
+                }
+
+                sfwk = (sfw_counters_t *)&ent->rpe_payload[0];
+                srpc = (srpc_counters_t *)((char *)sfwk + sizeof(*sfwk));
+                lnet = (lnet_counters_t *)((char *)srpc + sizeof(*srpc));
+
+                if (srpc->errors == 0 &&
+                    sfwk->brw_errors == 0 && sfwk->ping_errors == 0)
+                        continue;
+
+                ecount ++;
+                fprintf(stderr, "[%s]: %d RPC errors, %d brw errors, %d ping errors\n",
+                        libcfs_id2str(ent->rpe_peer), srpc->errors, 
+                        sfwk->brw_errors, sfwk->ping_errors);
+        }
+
+        fprintf(stdout, "Total %d errors in %s\n", ecount, name);
+out:
+        lst_free_rpcent(&head);
+        if (idsp != NULL)
+                free(idsp);
+
+        return 0;
+}
+
+int
 lst_add_batch_ioctl (char *name)
 {
         lstio_batch_add_args_t  args = {
@@ -2287,7 +2417,6 @@ jt_lst_list_batch(int argc, char **argv)
         int                  ntest   = 0;
         int                  test    = 0;
         int                  c       = 0;
-        int                  i;
         int                  rc;
 
         static struct option list_batch_opts[] =
@@ -2460,11 +2589,9 @@ jt_lst_query_batch(int argc, char **argv)
 {
         lstcon_test_batch_ent_t ent;
         struct list_head     head;
-        lstcon_rpc_ent_t    *rent    = NULL;
         char                *batch   = NULL;
         time_t               last    = 0;
         int                  optidx  = 0;
-        int                  index   = 0;
         int                  verbose = 0;
         int                  server  = 0;
         int                  timeout = 5; /* default 5 seconds */
@@ -2662,7 +2789,6 @@ int
 lst_get_test_param(char *test, int argc, char **argv, void **param, int *plen)
 {
         lst_test_bulk_param_t *bulk = NULL;
-        lst_test_ping_param_t *ping = NULL;
         int                    type;
         int                    i = 0;
 
@@ -2737,7 +2863,7 @@ lst_get_test_param(char *test, int argc, char **argv, void **param, int *plen)
 int
 lst_add_test_ioctl(char *batch, int type, int loop, int concur,
                    int dist, int span, char *sgrp, char *dgrp,
-                   void *param, int plen, struct list_head *resultp)
+                   void *param, int plen, int *retp, struct list_head *resultp)
 {
         lstio_test_args_t args = {
                 .lstio_tes_key          = session_key,
@@ -2754,6 +2880,7 @@ lst_add_test_ioctl(char *batch, int type, int loop, int concur,
                 .lstio_tes_dgrp_name    = dgrp,
                 .lstio_tes_param_len    = plen,
                 .lstio_tes_param        = param,
+                .lstio_tes_retp         = retp,
                 .lstio_tes_resultp      = resultp,
         };
 
@@ -2778,6 +2905,7 @@ jt_lst_add_test(int argc, char **argv)
         int               plen   = 0;
         int               fcount = 0;
         int               tcount = 0;
+        int               ret    = 0;
         int               type;
         int               rc;
         int               c;
@@ -2888,10 +3016,16 @@ jt_lst_add_test(int argc, char **argv)
         }
 
         rc = lst_add_test_ioctl(batch, type, loop, concur,
-                                dist, span, from, to, param, plen, &head);
+                                dist, span, from, to, param, plen, &ret, &head);
 
         if (rc == 0) {
                 fprintf(stdout, "Test was added successfully\n");
+                if (ret != 0) {
+                        fprintf(stdout, "Server group contains userland test "
+                                "nodes, old version of tcplnd can't accept "
+                                "connection request\n");
+                }
+
                 goto out;
         }
 
@@ -2911,7 +3045,7 @@ out:
         return rc;
 }
 
-command_t lst_cmdlist[] = {
+static command_t lst_cmdlist[] = {
        {"new_session",         jt_lst_new_session,     NULL,
          "Usage: lst new_session [--timeout TIME] [--force] [NAME]"                    },
        {"end_session",         jt_lst_end_session,     NULL,
@@ -2931,6 +3065,8 @@ command_t lst_cmdlist[] = {
         {"stat",                jt_lst_stat,            NULL,
          "Usage: lst stat [--bw] [--rate] [--read] [--write] [--max] [--min] [--avg] "
          " [--timeout #] [--delay #] GROUP [GROUP]"                                     },
+        {"show_error",          jt_lst_show_error,      NULL,
+         "Usage: lst show_error [--group NAME] | [--nodes IDS]"                         },
         {"add_batch",           jt_lst_add_batch,       NULL,
          "Usage: lst add_batch NAME"                                                    },
         {"run",                 jt_lst_start_batch,     NULL,
@@ -2968,8 +3104,6 @@ lst_initialize(void)
 int
 main(int argc, char **argv)
 {
-        int     rc;
-
         setlinebuf(stdout);
 
         if (lst_initialize() < 0)