X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Futils%2Flst.c;h=e7d0d554feff1be83586723ee07ce35a9be86bd8;hb=2f5681ce80a3faa1241a8d0dca7f71f83aadb745;hp=07d4b075097a26afaec0afc22f3ec00cb058bf5c;hpb=776615e6825e2c90c2635c8b55e7bb02da33726c;p=fs%2Flustre-release.git diff --git a/lnet/utils/lst.c b/lnet/utils/lst.c index 07d4b07..e7d0d55 100644 --- a/lnet/utils/lst.c +++ b/lnet/utils/lst.c @@ -68,6 +68,7 @@ free_lstrs(lstr_t **list) } } +void new_lstrs(lstr_t **list, char *prefix, char *postfix, int lo, int hi, int stride) { @@ -160,7 +161,7 @@ expand_strs(char *str, lstr_t **head) lstr_t *list = NULL; lstr_t *nlist; lstr_t *l; - int rc; + int rc = 0; int expanded; l = alloc_lstr(strlen(str) + 1); @@ -370,7 +371,7 @@ lst_free_rpcent(struct list_head *head) } } -int +void lst_reset_rpcent(struct list_head *head) { lstcon_rpc_ent_t *ent; @@ -679,6 +680,16 @@ lst_ping_ioctl(char *str, int type, int timeout, return lst_ioctl (LSTIO_DEBUG, &args, sizeof(args)); } +int lst_info_batch_ioctl(char *batch, int test, int server, + lstcon_test_batch_ent_t *entp, int *idxp, + int *ndentp, lstcon_node_ent_t *dentsp); + +int lst_info_group_ioctl(char *name, lstcon_ndlist_ent_t *gent, + int *idx, int *count, lstcon_node_ent_t *dents); + +int lst_query_batch_ioctl(char *batch, int test, int server, + int timeout, struct list_head *head); + int lst_get_node_count(int type, char *str, int *countp, lnet_process_id_t **idspp) { @@ -734,7 +745,6 @@ jt_lst_ping(int argc, char **argv) int type = 0; int rc = 0; int c; - int i; static struct option ping_opts[] = { @@ -1240,7 +1250,7 @@ jt_lst_list_group(int argc, char **argv) int i; int j; int c; - int rc; + int rc = 0; static struct option list_group_opts[] = { @@ -1598,8 +1608,6 @@ lst_print_lnet_stat(char *name, int bwrt, int rdwr, int type) int end1 = 1; int start2 = 0; int end2 = 1; - int start3 = 0; - int end3 = 2; int i; int j; @@ -1737,11 +1745,9 @@ jt_lst_stat(int argc, char **argv) { struct list_head head; lst_stat_req_param_t *srp; - lstcon_rpc_ent_t *ent; - char *name; + char *name = NULL; time_t last = 0; int optidx = 0; - int count = 0; int timeout = 5; /* default timeout, 5 sec */ int delay = 5; /* default delay, 5 sec */ int lnet = 1; /* lnet stat by default */ @@ -1750,7 +1756,6 @@ jt_lst_stat(int argc, char **argv) int type = -1; int idx = 0; int rc; - int i; int c; static struct option stat_opts[] = @@ -1896,6 +1901,131 @@ out: } int +jt_lst_show_error(int argc, char **argv) +{ + struct list_head head; + lstcon_rpc_ent_t *ent; + sfw_counters_t *sfwk; + srpc_counters_t *srpc; + lnet_counters_t *lnet; + lnet_process_id_t *idsp = NULL; + char *name = NULL; + int optidx = 0; + int count = 0; + int type = 0; + int timeout = 5; + int ecount = 0; + int rc; + int c; + + + static struct option show_error_opts[] = + { + {"group" , required_argument, 0, 'g' }, + {"nodes" , required_argument, 0, 'n' }, + {0, 0, 0, 0 } + }; + + if (session_key == 0) { + fprintf(stderr, + "Can't find env LST_SESSION or value is not valid\n"); + return -1; + } + + while (1) { + c = getopt_long(argc, argv, "g:n:", show_error_opts, &optidx); + + if (c == -1) + break; + + switch (c) { + case 'g': + type = LST_OPC_GROUP; + name = optarg; + break; + case 'n': + type = LST_OPC_NODES; + name = optarg; + break; + default: + lst_print_usage(argv[0]); + return -1; + } + } + + if (optind != argc || type == 0) { + lst_print_usage(argv[0]); + return -1; + } + + if (name == NULL) { + fprintf(stderr, "Missing name of target (group | nodes)\n"); + return -1; + } + + rc = lst_get_node_count(type, name, &count, &idsp); + if (rc < 0) { + fprintf(stderr, "Failed to get count of nodes from %s: %s\n", + name, strerror(errno)); + return -1; + } + + CFS_INIT_LIST_HEAD(&head); + + rc = lst_alloc_rpcent(&head, count, sizeof(sfw_counters_t) + + sizeof(srpc_counters_t) + + sizeof(lnet_counters_t)); + if (rc != 0) { + fprintf(stderr, "Out of memory\n"); + goto out; + } + + rc = lst_stat_ioctl(name, count, idsp, timeout, &head); + if (rc == -1) { + lst_print_error(name, "Failed to show errors of %s: %s\n", + name, strerror(errno)); + goto out; + } + + list_for_each_entry(ent, &head, rpe_link) { + if (ent->rpe_rpc_errno != 0) { + ecount ++; + fprintf(stderr, "RPC failure, can't show error on %s\n", + libcfs_id2str(ent->rpe_peer)); + continue; + } + + if (ent->rpe_fwk_errno != 0) { + ecount ++; + fprintf(stderr, "Framework failure, can't show error on %s\n", + libcfs_id2str(ent->rpe_peer)); + continue; + } + + sfwk = (sfw_counters_t *)&ent->rpe_payload[0]; + srpc = (srpc_counters_t *)((char *)sfwk + sizeof(*sfwk)); + lnet = (lnet_counters_t *)((char *)srpc + sizeof(*srpc)); + + if (srpc->errors == 0 && + sfwk->brw_errors == 0 && sfwk->ping_errors == 0) + continue; + + ecount ++; + fprintf(stderr, "[%s]: %d RPC errors, %d brw errors, %d ping errors\n", + libcfs_id2str(ent->rpe_peer), srpc->errors, + sfwk->brw_errors, sfwk->ping_errors); + } + + fprintf(stdout, "Total %d errors in %s\n", ecount, name); +out: + lst_free_rpcent(&head); + if (idsp != NULL) + free(idsp); + + return 0; +} + +int lst_add_batch_ioctl (char *name) { lstio_batch_add_args_t args = { @@ -2287,7 +2417,6 @@ jt_lst_list_batch(int argc, char **argv) int ntest = 0; int test = 0; int c = 0; - int i; int rc; static struct option list_batch_opts[] = @@ -2460,11 +2589,9 @@ jt_lst_query_batch(int argc, char **argv) { lstcon_test_batch_ent_t ent; struct list_head head; - lstcon_rpc_ent_t *rent = NULL; char *batch = NULL; time_t last = 0; int optidx = 0; - int index = 0; int verbose = 0; int server = 0; int timeout = 5; /* default 5 seconds */ @@ -2662,7 +2789,6 @@ int lst_get_test_param(char *test, int argc, char **argv, void **param, int *plen) { lst_test_bulk_param_t *bulk = NULL; - lst_test_ping_param_t *ping = NULL; int type; int i = 0; @@ -2737,7 +2863,7 @@ lst_get_test_param(char *test, int argc, char **argv, void **param, int *plen) int lst_add_test_ioctl(char *batch, int type, int loop, int concur, int dist, int span, char *sgrp, char *dgrp, - void *param, int plen, struct list_head *resultp) + void *param, int plen, int *retp, struct list_head *resultp) { lstio_test_args_t args = { .lstio_tes_key = session_key, @@ -2754,6 +2880,7 @@ lst_add_test_ioctl(char *batch, int type, int loop, int concur, .lstio_tes_dgrp_name = dgrp, .lstio_tes_param_len = plen, .lstio_tes_param = param, + .lstio_tes_retp = retp, .lstio_tes_resultp = resultp, }; @@ -2778,6 +2905,7 @@ jt_lst_add_test(int argc, char **argv) int plen = 0; int fcount = 0; int tcount = 0; + int ret = 0; int type; int rc; int c; @@ -2888,10 +3016,16 @@ jt_lst_add_test(int argc, char **argv) } rc = lst_add_test_ioctl(batch, type, loop, concur, - dist, span, from, to, param, plen, &head); + dist, span, from, to, param, plen, &ret, &head); if (rc == 0) { fprintf(stdout, "Test was added successfully\n"); + if (ret != 0) { + fprintf(stdout, "Server group contains userland test " + "nodes, old version of tcplnd can't accept " + "connection request\n"); + } + goto out; } @@ -2911,7 +3045,7 @@ out: return rc; } -command_t lst_cmdlist[] = { +static command_t lst_cmdlist[] = { {"new_session", jt_lst_new_session, NULL, "Usage: lst new_session [--timeout TIME] [--force] [NAME]" }, {"end_session", jt_lst_end_session, NULL, @@ -2931,6 +3065,8 @@ command_t lst_cmdlist[] = { {"stat", jt_lst_stat, NULL, "Usage: lst stat [--bw] [--rate] [--read] [--write] [--max] [--min] [--avg] " " [--timeout #] [--delay #] GROUP [GROUP]" }, + {"show_error", jt_lst_show_error, NULL, + "Usage: lst show_error [--group NAME] | [--nodes IDS]" }, {"add_batch", jt_lst_add_batch, NULL, "Usage: lst add_batch NAME" }, {"run", jt_lst_start_batch, NULL, @@ -2968,8 +3104,6 @@ lst_initialize(void) int main(int argc, char **argv) { - int rc; - setlinebuf(stdout); if (lst_initialize() < 0)