X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Futils%2Fportals.c;h=bd670ceffa81644faeaf92e69fe974fcce4bcd64;hp=1a57433b9ac26f6cc1bb1f66d51987ef3147e2d2;hb=c6aab2ca77831852db22b7dc39baed4d06405b7e;hpb=e5c35288861cf8ca4aecc30747b61069f9d80bb5;ds=sidebyside diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index 1a57433..bd670ce 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -1,9 +1,9 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2013, 2014, Intel Corporation. * - * This file is part of Portals, http://www.sf.net/projects/lustre/ + * This file is part of Lustre, https://wiki.hpdd.intel.com/ * * Portals is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -19,11 +19,24 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */ - -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include +#include unsigned int libcfs_debug; unsigned int libcfs_printk = D_CANTMASK; @@ -31,6 +44,10 @@ unsigned int libcfs_printk = D_CANTMASK; static int g_net_set; static __u32 g_net; +#define IOC_BUF_SIZE 8192 +static char local_buf[IOC_BUF_SIZE]; +static char *ioc_buf = local_buf; + /* Convert a string boolean to an int; "enable" -> 1 */ int lnet_parse_bool (int *b, char *str) @@ -144,7 +161,7 @@ lnet_parse_ipaddr (__u32 *ipaddrp, char *str) } char * -ptl_ipaddr_2_str (__u32 ipaddr, char *str, int lookup) +ptl_ipaddr_2_str(__u32 ipaddr, char *str, size_t strsize, int lookup) { #ifdef HAVE_GETHOSTBYNAME __u32 net_ip; @@ -154,7 +171,7 @@ ptl_ipaddr_2_str (__u32 ipaddr, char *str, int lookup) net_ip = htonl (ipaddr); he = gethostbyaddr (&net_ip, sizeof (net_ip), AF_INET); if (he != NULL) { - strcpy(str, he->h_name); + strlcpy(str, he->h_name, strsize); return (str); } } @@ -195,6 +212,19 @@ lnet_parse_time (time_t *t, char *str) return (0); } +int +lnet_parse_nid(char *nid_str, lnet_process_id_t *id_ptr) +{ + id_ptr->pid = LNET_PID_ANY; + id_ptr->nid = libcfs_str2nid(nid_str); + if (id_ptr->nid == LNET_NID_ANY) { + fprintf (stderr, "Can't parse nid \"%s\"\n", nid_str); + return -1; + } + + return 0; +} + int g_net_is_set (char *cmd) { if (g_net_set) @@ -293,6 +323,14 @@ int jt_ptl_network(int argc, char **argv) return -1; } + if (LNET_NETTYP(net) == CIBLND || + LNET_NETTYP(net) == OPENIBLND || + LNET_NETTYP(net) == IIBLND || + LNET_NETTYP(net) == VIBLND) { + fprintf(stderr, "Net %s obsoleted\n", libcfs_lnd2str(net)); + return -1; + } + g_net_set = 1; g_net = net; return 0; @@ -412,7 +450,7 @@ int jt_ptl_print_interfaces (int argc, char **argv) { struct libcfs_ioctl_data data; - char buffer[3][64]; + char buffer[3][HOST_NAME_MAX + 1]; int index; int rc; @@ -429,9 +467,12 @@ jt_ptl_print_interfaces (int argc, char **argv) break; printf ("%s: (%s/%s) npeer %d nroute %d\n", - ptl_ipaddr_2_str(data.ioc_u32[0], buffer[2], 1), - ptl_ipaddr_2_str(data.ioc_u32[0], buffer[0], 0), - ptl_ipaddr_2_str(data.ioc_u32[1], buffer[1], 0), + ptl_ipaddr_2_str(data.ioc_u32[0], buffer[2], + sizeof(buffer[2]), 1), + ptl_ipaddr_2_str(data.ioc_u32[0], buffer[0], + sizeof(buffer[0]), 0), + ptl_ipaddr_2_str(data.ioc_u32[1], buffer[1], + sizeof(buffer[1]), 0), data.ioc_u32[2], data.ioc_u32[3]); } @@ -539,12 +580,11 @@ jt_ptl_print_peers (int argc, char **argv) { struct libcfs_ioctl_data data; lnet_process_id_t id; - char buffer[2][64]; + char buffer[2][HOST_NAME_MAX + 1]; int index; int rc; - if (!g_net_is_compatible (argv[0], SOCKLND, RALND, PTLLND, MXLND, - OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0)) + if (!g_net_is_compatible (argv[0], SOCKLND, O2IBLND, GNILND, 0)) return -1; for (index = 0;;index++) { @@ -560,37 +600,36 @@ jt_ptl_print_peers (int argc, char **argv) id.nid = data.ioc_nid; id.pid = data.ioc_u32[4]; printf ("%-20s [%d]%s->%s:%d #%d\n", - libcfs_id2str(id), + libcfs_id2str(id), data.ioc_count, /* persistence */ - ptl_ipaddr_2_str (data.ioc_u32[2], buffer[0], 1), /* my ip */ - ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */ + /* my ip */ + ptl_ipaddr_2_str(data.ioc_u32[2], buffer[0], + sizeof(buffer[0]), 1), + /* peer ip */ + ptl_ipaddr_2_str(data.ioc_u32[0], buffer[1], + sizeof(buffer[1]), 1), data.ioc_u32[1], /* peer port */ data.ioc_u32[3]); /* conn_count */ - } else if (g_net_is_compatible(NULL, PTLLND, 0)) { - id.nid = data.ioc_nid; - id.pid = data.ioc_u32[4]; - printf ("%-20s s %d%s [%d] "LPD64".%06d" - " m "LPD64"/"LPD64" q %d/%d c %d/%d\n", - libcfs_id2str(id), - data.ioc_net, /* state */ - data.ioc_flags ? "" : " ~!h", /* sent_hello */ - data.ioc_count, /* refcount */ - data.ioc_u64[0]/1000000, /* incarnation secs */ - (int)(data.ioc_u64[0]%1000000), /* incarnation usecs */ - (((__u64)data.ioc_u32[1])<<32) | - ((__u64)data.ioc_u32[0]), /* next_matchbits */ - (((__u64)data.ioc_u32[3])<<32) | - ((__u64)data.ioc_u32[2]), /* last_matchbits_seen */ - data.ioc_u32[5] >> 16, /* nsendq */ - data.ioc_u32[5] & 0xffff, /* nactiveq */ - data.ioc_u32[6] >> 16, /* credits */ - data.ioc_u32[6] & 0xffff); /* outstanding_credits */ - } else if (g_net_is_compatible(NULL, RALND, OPENIBLND, CIBLND, VIBLND, 0)) { - printf ("%-20s [%d]@%s:%d\n", - libcfs_nid2str(data.ioc_nid), /* peer nid */ - data.ioc_count, /* peer persistence */ - ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */ - data.ioc_u32[1]); /* peer port */ + } else if (g_net_is_compatible(NULL, GNILND, 0)) { + int disconn = data.ioc_flags >> 16; + char *state; + + if (disconn) + state = "D"; + else + state = data.ioc_flags & 0xffff ? "C" : "U"; + + printf ("%-20s (%d) %s [%d] "LPU64" " + "sq %d/%d tx %d/%d/%d\n", + libcfs_nid2str(data.ioc_nid), /* peer nid */ + data.ioc_net, /* gemini device id */ + state, /* peer is Connecting, Up, or Down */ + data.ioc_count, /* peer refcount */ + data.ioc_u64[0], /* peerstamp */ + data.ioc_u32[2], data.ioc_u32[3], /* tx and rx seq */ + /* fmaq, nfma, nrdma */ + data.ioc_u32[0], data.ioc_u32[1], data.ioc_u32[4] + ); } else { printf ("%-20s [%d]\n", libcfs_nid2str(data.ioc_nid), data.ioc_count); @@ -618,24 +657,12 @@ jt_ptl_add_peer (int argc, char **argv) int port = 0; int rc; - if (!g_net_is_compatible (argv[0], SOCKLND, RALND, - OPENIBLND, CIBLND, IIBLND, VIBLND, 0)) + if (!g_net_is_compatible(argv[0], SOCKLND, GNILND, 0)) return -1; - if (g_net_is_compatible(NULL, SOCKLND, OPENIBLND, CIBLND, RALND, 0)) { - if (argc != 4) { - fprintf (stderr, "usage(tcp,openib,cib,ra): %s nid ipaddr port\n", - argv[0]); - return 0; - } - } else if (g_net_is_compatible(NULL, VIBLND, 0)) { - if (argc != 3) { - fprintf (stderr, "usage(vib): %s nid ipaddr\n", - argv[0]); - return 0; - } - } else if (argc != 2) { - fprintf (stderr, "usage(iib): %s nid\n", argv[0]); + if (argc != 4) { + fprintf (stderr, "usage(tcp,ra,gni): %s nid ipaddr port\n", + argv[0]); return 0; } @@ -645,14 +672,12 @@ jt_ptl_add_peer (int argc, char **argv) return -1; } - if (g_net_is_compatible (NULL, SOCKLND, OPENIBLND, CIBLND, VIBLND, RALND, 0) && - lnet_parse_ipaddr (&ip, argv[2]) != 0) { + if (lnet_parse_ipaddr (&ip, argv[2]) != 0) { fprintf (stderr, "Can't parse ip addr: %s\n", argv[2]); return -1; } - if (g_net_is_compatible (NULL, SOCKLND, OPENIBLND, CIBLND, RALND, 0) && - lnet_parse_port (&port, argv[3]) != 0) { + if (lnet_parse_port (&port, argv[3]) != 0) { fprintf (stderr, "Can't parse port: %s\n", argv[3]); return -1; } @@ -680,11 +705,9 @@ jt_ptl_del_peer (int argc, char **argv) lnet_nid_t nid = LNET_NID_ANY; lnet_pid_t pid = LNET_PID_ANY; __u32 ip = 0; - char *end; int rc; - if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND, PTLLND, - OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0)) + if (!g_net_is_compatible(argv[0], SOCKLND, O2IBLND, GNILND, 0)) return -1; if (g_net_is_compatible(NULL, SOCKLND, 0)) { @@ -693,12 +716,6 @@ jt_ptl_del_peer (int argc, char **argv) argv[0]); return 0; } - } else if (g_net_is_compatible(NULL, PTLLND, 0)) { - if (argc > 3) { - fprintf (stderr, "usage: %s [nid] [pid]\n", - argv[0]); - return 0; - } } else if (argc > 2) { fprintf (stderr, "usage: %s [nid]\n", argv[0]); return 0; @@ -717,15 +734,6 @@ jt_ptl_del_peer (int argc, char **argv) argv[2]); return -1; } - } else if (g_net_is_compatible(NULL, PTLLND, 0)) { - if (argc > 2) { - pid = strtol(argv[2], &end, 0); - if (end == argv[2] || *end == 0) { - fprintf(stderr, "Can't parse pid %s\n", - argv[2]); - return -1; - } - } } LIBCFS_IOC_INIT(data); @@ -749,12 +757,11 @@ jt_ptl_print_connections (int argc, char **argv) { struct libcfs_ioctl_data data; lnet_process_id_t id; - char buffer[2][64]; + char buffer[2][HOST_NAME_MAX + 1]; int index; int rc; - if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND, - OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0)) + if (!g_net_is_compatible(argv[0], SOCKLND, O2IBLND, GNILND, 0)) return -1; for (index = 0; ; index++) { @@ -776,16 +783,24 @@ jt_ptl_print_connections (int argc, char **argv) (data.ioc_u32[3] == SOCKLND_CONN_BULK_IN) ? "I" : (data.ioc_u32[3] == SOCKLND_CONN_BULK_OUT) ? "O" : "?", data.ioc_u32[4], /* scheduler */ - ptl_ipaddr_2_str (data.ioc_u32[2], buffer[0], 1), /* local IP addr */ - ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* remote IP addr */ + /* local IP addr */ + ptl_ipaddr_2_str(data.ioc_u32[2], buffer[0], + sizeof(buffer[0]), 1), + /* remote IP addr */ + ptl_ipaddr_2_str(data.ioc_u32[0], buffer[1], + sizeof(buffer[1]), 1), data.ioc_u32[1], /* remote port */ data.ioc_count, /* tx buffer size */ data.ioc_u32[5], /* rx buffer size */ data.ioc_flags ? "nagle" : "nonagle"); - } else if (g_net_is_compatible (NULL, RALND, 0)) { - printf ("%-20s [%d]\n", + } else if (g_net_is_compatible (NULL, O2IBLND, 0)) { + printf ("%s mtu %d\n", libcfs_nid2str(data.ioc_nid), - data.ioc_u32[0] /* device id */); + data.ioc_u32[0]); /* path MTU */ + } else if (g_net_is_compatible (NULL, GNILND, 0)) { + printf ("%-20s [%d]\n", + libcfs_nid2str(data.ioc_nid), + data.ioc_u32[0] /* device id */); } else { printf ("%s\n", libcfs_nid2str(data.ioc_nid)); } @@ -815,8 +830,7 @@ int jt_ptl_disconnect(int argc, char **argv) return 0; } - if (!g_net_is_compatible (NULL, SOCKLND, RALND, MXLND, - OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0)) + if (!g_net_is_compatible(NULL, SOCKLND, O2IBLND, GNILND, 0)) return 0; if (argc >= 2 && @@ -858,7 +872,7 @@ int jt_ptl_push_connection (int argc, char **argv) return 0; } - if (!g_net_is_compatible (argv[0], SOCKLND, 0)) + if (!g_net_is_compatible (argv[0], SOCKLND, GNILND, 0)) return -1; if (argc > 1 && @@ -881,49 +895,6 @@ int jt_ptl_push_connection (int argc, char **argv) return 0; } -int -jt_ptl_print_active_txs (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int index; - int rc; - - if (!g_net_is_compatible (argv[0], QSWLND, 0)) - return -1; - - for (index = 0;;index++) { - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_count = index; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_TXDESC, &data); - if (rc != 0) - break; - - printf ("type %u payload %6d to %s via %s by pid %6d: " - "%s, %s, state %d\n", - data.ioc_u32[0], - data.ioc_count, - libcfs_nid2str(data.ioc_nid), - libcfs_nid2str(data.ioc_u64[0]), - data.ioc_u32[1], - (data.ioc_flags & 1) ? "delayed" : "immediate", - (data.ioc_flags & 2) ? "nblk" : "normal", - data.ioc_flags >> 2); - } - - if (index == 0) { - if (errno == ENOENT) { - printf ("\n"); - } else { - fprintf(stderr, "Error getting active transmits list: " - "%s: check dmesg.\n", - strerror(errno)); - } - } - return 0; -} - int jt_ptl_ping(int argc, char **argv) { int rc; @@ -942,12 +913,9 @@ int jt_ptl_ping(int argc, char **argv) sep = strchr(argv[1], '-'); if (sep == NULL) { - id.pid = LNET_PID_ANY; - id.nid = libcfs_str2nid(argv[1]); - if (id.nid == LNET_NID_ANY) { - fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]); + rc = lnet_parse_nid(argv[1], &id); + if (rc != 0) return -1; - } } else { char *end; @@ -957,12 +925,19 @@ int jt_ptl_ping(int argc, char **argv) else id.pid = strtoul(argv[1], &end, 0); - id.nid = libcfs_str2nid(sep + 1); + if (end != sep) { /* assuming '-' is part of hostname */ + rc = lnet_parse_nid(argv[1], &id); + if (rc != 0) + return -1; + } else { + id.nid = libcfs_str2nid(sep + 1); - if (end != sep || - id.nid == LNET_NID_ANY) { - fprintf(stderr, "Can't parse process id \"%s\"\n", argv[1]); - return -1; + if (id.nid == LNET_NID_ANY) { + fprintf(stderr, + "Can't parse process id \"%s\"\n", + argv[1]); + return -1; + } } } @@ -1032,7 +1007,7 @@ jt_ptl_fail_nid (int argc, char **argv) { int rc; lnet_nid_t nid; - unsigned int threshold; + int threshold; struct libcfs_ioctl_data data; if (argc < 2 || argc > 3) @@ -1049,7 +1024,7 @@ jt_ptl_fail_nid (int argc, char **argv) if (argc < 3) { threshold = LNET_MD_THRESH_INF; - } else if (sscanf (argv[2], "%i", &threshold) != 1) { + } else if (sscanf(argv[2], "%i", &threshold) != 1) { fprintf (stderr, "Can't parse count \"%s\"\n", argv[2]); return (-1); } @@ -1071,79 +1046,92 @@ jt_ptl_fail_nid (int argc, char **argv) int jt_ptl_add_route (int argc, char **argv) { - struct libcfs_ioctl_data data; - lnet_nid_t gateway_nid; - unsigned int hops = 1; - char *end; - int rc; - - if (argc < 2 || argc > 3) - { - fprintf (stderr, "usage: %s gateway [hopcount]\n", argv[0]); - return (0); - } - - if (!g_net_is_set(argv[0])) - return (-1); - - gateway_nid = libcfs_str2nid(argv[1]); - if (gateway_nid == LNET_NID_ANY) { - fprintf (stderr, "Can't parse gateway NID \"%s\"\n", argv[1]); - return (-1); - } - - if (argc == 3) { - hops = strtoul(argv[2], &end, 0); - if (hops >= 256 || *end != 0) { - fprintf (stderr, "Can't parse hopcount \"%s\"\n", argv[2]); - return -1; - } - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_count = hops; - data.ioc_nid = gateway_nid; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_ROUTE, &data); - if (rc != 0) { - fprintf (stderr, "IOC_LIBCFS_ADD_ROUTE failed: %s\n", strerror (errno)); - return (-1); - } - - return (0); + struct lnet_ioctl_config_data data; + lnet_nid_t gateway_nid; + unsigned int hops = 1; + unsigned int priority = 0; + char *end; + int rc; + + if (argc < 2 || argc > 4) { + fprintf(stderr, "usage: %s gateway [hopcount [priority]]\n", + argv[0]); + return -1; + } + + if (g_net_is_set(argv[0]) == 0) + return -1; + + gateway_nid = libcfs_str2nid(argv[1]); + if (gateway_nid == LNET_NID_ANY) { + fprintf(stderr, "Can't parse gateway NID \"%s\"\n", argv[1]); + return -1; + } + + if (argc > 2) { + hops = strtoul(argv[2], &end, 0); + if (hops == 0 || hops >= 256 || (end != NULL && *end != 0)) { + fprintf(stderr, "Can't parse hopcount \"%s\"\n", + argv[2]); + return -1; + } + if (argc == 4) { + priority = strtoul(argv[3], &end, 0); + if (end != NULL && *end != 0) { + fprintf(stderr, + "Can't parse priority \"%s\"\n", + argv[3]); + return -1; + } + } + } + + LIBCFS_IOC_INIT_V2(data, cfg_hdr); + data.cfg_net = g_net; + data.cfg_config_u.cfg_route.rtr_hop = hops; + data.cfg_nid = gateway_nid; + data.cfg_config_u.cfg_route.rtr_priority = priority; + + rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_ROUTE, &data); + if (rc != 0) { + fprintf(stderr, "IOC_LIBCFS_ADD_ROUTE failed: %s\n", + strerror(errno)); + return -1; + } + + return 0; } int jt_ptl_del_route (int argc, char **argv) { - struct libcfs_ioctl_data data; - lnet_nid_t nid; - int rc; - - if (argc != 2) { - fprintf (stderr, "usage: %s gatewayNID\n", argv[0]); - return (0); - } - - if (!libcfs_str2anynid(&nid, argv[1])) { - fprintf (stderr, "Can't parse gateway NID " - "\"%s\"\n", argv[1]); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net_set ? g_net : LNET_NIDNET(LNET_NID_ANY); - data.ioc_nid = nid; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_DEL_ROUTE, &data); - if (rc != 0) { - fprintf (stderr, "IOC_LIBCFS_DEL_ROUTE (%s) failed: %s\n", - libcfs_nid2str(nid), strerror (errno)); - return (-1); - } - - return (0); + struct lnet_ioctl_config_data data; + lnet_nid_t nid; + int rc; + + if (argc != 2) { + fprintf(stderr, "usage: %s gatewayNID\n", argv[0]); + return 0; + } + + if (libcfs_str2anynid(&nid, argv[1]) == 0) { + fprintf(stderr, "Can't parse gateway NID " + "\"%s\"\n", argv[1]); + return -1; + } + + LIBCFS_IOC_INIT_V2(data, cfg_hdr); + data.cfg_net = g_net_set ? g_net : LNET_NIDNET(LNET_NID_ANY); + data.cfg_nid = nid; + + rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_DEL_ROUTE, &data); + if (rc != 0) { + fprintf(stderr, "IOC_LIBCFS_DEL_ROUTE (%s) failed: %s\n", + libcfs_nid2str(nid), strerror(errno)); + return -1; + } + + return 0; } int @@ -1208,182 +1196,454 @@ jt_ptl_notify_router (int argc, char **argv) int jt_ptl_print_routes (int argc, char **argv) { - struct libcfs_ioctl_data data; - int rc; - int index; - __u32 net; - lnet_nid_t nid; - unsigned int hops; - int alive; - - for (index = 0;;index++) - { - LIBCFS_IOC_INIT(data); - data.ioc_count = index; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_ROUTE, &data); - if (rc != 0) - break; - - net = data.ioc_net; - hops = data.ioc_count; - nid = data.ioc_nid; - alive = data.ioc_flags; - - printf ("net %18s hops %u gw %32s %s\n", - libcfs_net2str(net), hops, - libcfs_nid2str(nid), alive ? "up" : "down"); - } - - if (errno != ENOENT) - fprintf(stderr, "Error getting routes: %s: check dmesg.\n", - strerror(errno)); - - return (0); + struct lnet_ioctl_config_data data; + int rc; + int index; + __u32 net; + lnet_nid_t nid; + unsigned int hops; + int alive; + unsigned int pri; + + for (index = 0; ; index++) { + LIBCFS_IOC_INIT_V2(data, cfg_hdr); + data.cfg_count = index; + + rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_ROUTE, &data); + if (rc != 0) + break; + + net = data.cfg_net; + hops = data.cfg_config_u.cfg_route.rtr_hop; + nid = data.cfg_nid; + alive = data.cfg_config_u.cfg_route.rtr_flags; + pri = data.cfg_config_u.cfg_route.rtr_priority; + + printf("net %18s hops %u gw %32s %s pri %u\n", + libcfs_net2str(net), hops, + libcfs_nid2str(nid), alive ? "up" : "down", pri); + } + + if (errno != ENOENT) + fprintf(stderr, "Error getting routes: %s: check dmesg.\n", + strerror(errno)); + + return 0; } static int -lwt_control(int enable, int clear) +fault_attr_nid_parse(char *str, lnet_nid_t *nid_p) { - struct libcfs_ioctl_data data; - int rc; - - LIBCFS_IOC_INIT(data); - data.ioc_flags = (enable ? 1 : 0) | (clear ? 2 : 0); - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_CONTROL, &data); - if (rc == 0) - return (0); - - fprintf(stderr, "IOC_LIBCFS_LWT_CONTROL failed: %s\n", - strerror(errno)); - return (-1); + lnet_nid_t nid; + __u32 net; + int rc = 0; + + /* NB: can't support range ipaddress except * and *@net */ + if (strlen(str) > 2 && str[0] == '*' && str[1] == '@') { + net = libcfs_str2net(str + 2); + if (net == LNET_NIDNET(LNET_NID_ANY)) + goto failed; + + nid = LNET_MKNID(net, LNET_NIDADDR(LNET_NID_ANY)); + } else { + rc = libcfs_str2anynid(&nid, str); + if (!rc) + goto failed; + } + + *nid_p = nid; + return 0; +failed: + fprintf(stderr, "Invalid NID : %s\n", str); + return -1; } static int -lwt_snapshot(cycles_t *now, int *ncpu, int *totalsize, - lwt_event_t *events, int size) +fault_attr_msg_parse(char *msg_str, __u32 *mask_p) { - struct libcfs_ioctl_data data; - int rc; + if (!strcasecmp(msg_str, "put")) { + *mask_p |= LNET_PUT_BIT; + return 0; - LIBCFS_IOC_INIT(data); - data.ioc_pbuf1 = (char *)events; - data.ioc_plen1 = size; + } else if (!strcasecmp(msg_str, "ack")) { + *mask_p |= LNET_ACK_BIT; + return 0; - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_SNAPSHOT, &data); - if (rc != 0) { - fprintf(stderr, "IOC_LIBCFS_LWT_SNAPSHOT failed: %s\n", - strerror(errno)); - return (-1); - } + } else if (!strcasecmp(msg_str, "get")) { + *mask_p |= LNET_GET_BIT; + return 0; - /* crappy overloads */ - if (data.ioc_u32[2] != sizeof(lwt_event_t) || - data.ioc_u32[3] != offsetof(lwt_event_t, lwte_where)) { - fprintf(stderr,"kernel/user LWT event mismatch %d(%d),%d(%d)\n", - (int)data.ioc_u32[2], (int)sizeof(lwt_event_t), - (int)data.ioc_u32[3], - (int)offsetof(lwt_event_t, lwte_where)); - return (-1); - } + } else if (!strcasecmp(msg_str, "reply")) { + *mask_p |= LNET_REPLY_BIT; + return 0; + } - if (now != NULL) - *now = data.ioc_u64[0]; + fprintf(stderr, "unknown message type %s\n", msg_str); + return -1; +} - LASSERT (data.ioc_u32[0] != 0); - if (ncpu != NULL) - *ncpu = data.ioc_u32[0]; +static int +fault_attr_ptl_parse(char *ptl_str, __u64 *mask_p) +{ + unsigned long rc = strtoul(optarg, NULL, 0); - LASSERT (data.ioc_u32[1] != 0); - if (totalsize != NULL) - *totalsize = data.ioc_u32[1]; + if (rc >= 64) { + fprintf(stderr, "invalid portal: %lu\n", rc); + return -1; + } - return (0); + *mask_p |= (1ULL << rc); + return 0; } -static char * -lwt_get_string(char *kstr) +static int +fault_simul_rule_add(__u32 opc, char *name, int argc, char **argv) { - char *ustr; - struct libcfs_ioctl_data data; - int size; - int rc; + struct libcfs_ioctl_data data = {{0}}; + struct lnet_fault_attr attr; + char *optstr; + int rc; + + static struct option opts[] = { + {"source", required_argument, 0, 's'}, + {"dest", required_argument, 0, 'd'}, + {"rate", required_argument, 0, 'r'}, + {"interval", required_argument, 0, 'i'}, + {"latency", required_argument, 0, 'l'}, + {"portal", required_argument, 0, 'p'}, + {"message", required_argument, 0, 'm'}, + {0, 0, 0, 0} + }; + + if (argc == 1) { + fprintf(stderr, "Failed, please provide source, destination " + "and rate of rule\n"); + return -1; + } + + optstr = opc == LNET_CTL_DROP_ADD ? "s:d:r:i:p:m:" : "s:d:r:l:p:m:"; + memset(&attr, 0, sizeof(attr)); + while (1) { + char c = getopt_long(argc, argv, optstr, opts, NULL); + + if (c == -1) + break; + + switch (c) { + case 's': /* source NID/NET */ + rc = fault_attr_nid_parse(optarg, &attr.fa_src); + if (rc != 0) + goto getopt_failed; + break; + + case 'd': /* dest NID/NET */ + rc = fault_attr_nid_parse(optarg, &attr.fa_dst); + if (rc != 0) + goto getopt_failed; + break; + + case 'r': /* drop rate */ + if (opc == LNET_CTL_DROP_ADD) + attr.u.drop.da_rate = strtoul(optarg, NULL, 0); + else + attr.u.delay.la_rate = strtoul(optarg, NULL, 0); + break; + + case 'i': /* time interval (# seconds) for message drop */ + if (opc == LNET_CTL_DROP_ADD) + attr.u.drop.da_interval = strtoul(optarg, + NULL, 0); + else + attr.u.delay.la_interval = strtoul(optarg, + NULL, 0); + break; + + case 'l': /* seconds to wait before activating rule */ + attr.u.delay.la_latency = strtoul(optarg, NULL, 0); + break; + + case 'p': /* portal to filter */ + rc = fault_attr_ptl_parse(optarg, &attr.fa_ptl_mask); + if (rc != 0) + goto getopt_failed; + break; + + case 'm': /* message types to filter */ + rc = fault_attr_msg_parse(optarg, &attr.fa_msg_mask); + if (rc != 0) + goto getopt_failed; + break; + + default: + fprintf(stderr, "error: %s: option '%s' " + "unrecognized\n", argv[0], argv[optind - 1]); + goto getopt_failed; + } + } + optind = 1; + + if (opc == LNET_CTL_DROP_ADD) { + /* NB: drop rate and interval are exclusive to each other */ + if (!((attr.u.drop.da_rate == 0) ^ + (attr.u.drop.da_interval == 0))) { + fprintf(stderr, + "please provide either drop rate or interval " + "but not both at the same time.\n"); + return -1; + } + } else if (opc == LNET_CTL_DELAY_ADD) { + if (!((attr.u.delay.la_rate == 0) ^ + (attr.u.delay.la_interval == 0))) { + fprintf(stderr, + "please provide either delay rate or interval " + "but not both at the same time.\n"); + return -1; + } + + if (attr.u.delay.la_latency == 0) { + fprintf(stderr, "latency cannot be zero\n"); + return -1; + } + } + + if (attr.fa_src == 0 || attr.fa_dst == 0) { + fprintf(stderr, "Please provide both source and destination " + "of %s rule\n", name); + return -1; + } + + data.ioc_flags = opc; + data.ioc_inllen1 = sizeof(attr); + data.ioc_inlbuf1 = (char *)&attr; + if (libcfs_ioctl_pack(&data, &ioc_buf, IOC_BUF_SIZE) != 0) { + fprintf(stderr, "libcfs_ioctl_pack failed\n"); + return -1; + } + + rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LNET_FAULT, ioc_buf); + if (rc != 0) { + fprintf(stderr, "add %s rule %s->%s failed: %s\n", + name, libcfs_nid2str(attr.fa_src), + libcfs_nid2str(attr.fa_dst), strerror(errno)); + return -1; + } + + printf("Added %s rule %s->%s (1/%d)\n", + name, libcfs_nid2str(attr.fa_src), libcfs_nid2str(attr.fa_dst), + opc == LNET_CTL_DROP_ADD ? + attr.u.drop.da_rate : attr.u.delay.la_rate); + return 0; + +getopt_failed: + optind = 1; + return -1; +} - /* FIXME: this could maintain a symbol table since we expect to be - * looking up the same strings all the time... */ +int +jt_ptl_drop_add(int argc, char **argv) +{ + return fault_simul_rule_add(LNET_CTL_DROP_ADD, "drop", argc, argv); +} - LIBCFS_IOC_INIT(data); - data.ioc_pbuf1 = kstr; - data.ioc_plen1 = 1; /* non-zero just to fool portal_ioctl_is_invalid() */ - data.ioc_pbuf2 = NULL; - data.ioc_plen2 = 0; +int +jt_ptl_delay_add(int argc, char **argv) +{ + return fault_simul_rule_add(LNET_CTL_DELAY_ADD, "delay", argc, argv); +} - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_LOOKUP_STRING, &data); - if (rc != 0) { - fprintf(stderr, "IOC_LIBCFS_LWT_LOOKUP_STRING failed: %s\n", - strerror(errno)); - return (NULL); - } +static int +fault_simul_rule_del(__u32 opc, char *name, int argc, char **argv) +{ + struct libcfs_ioctl_data data = {{0}}; + struct lnet_fault_attr attr; + bool all = false; + int rc; + + static struct option opts[] = { + {"source", required_argument, 0, 's'}, + {"dest", required_argument, 0, 'd'}, + {"all", no_argument, 0, 'a'}, + {0, 0, 0, 0} + }; + + if (argc == 1) { + fprintf(stderr, "Failed, please provide source and " + "destination of rule\n"); + return -1; + } + + memset(&attr, 0, sizeof(attr)); + while (1) { + char c = getopt_long(argc, argv, "s:d:a", opts, NULL); + + if (c == -1 || all) + break; + + switch (c) { + case 's': + rc = fault_attr_nid_parse(optarg, &attr.fa_src); + if (rc != 0) + goto getopt_failed; + break; + case 'd': + rc = fault_attr_nid_parse(optarg, &attr.fa_dst); + if (rc != 0) + goto getopt_failed; + break; + case 'a': + attr.fa_src = attr.fa_dst = 0; + all = true; + break; + default: + fprintf(stderr, "error: %s: option '%s' " + "unrecognized\n", argv[0], argv[optind - 1]); + goto getopt_failed; + } + } + optind = 1; + + data.ioc_flags = opc; + data.ioc_inllen1 = sizeof(attr); + data.ioc_inlbuf1 = (char *)&attr; + if (libcfs_ioctl_pack(&data, &ioc_buf, IOC_BUF_SIZE) != 0) { + fprintf(stderr, "libcfs_ioctl_pack failed\n"); + return -1; + } + + rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LNET_FAULT, ioc_buf); + if (rc != 0) { + fprintf(stderr, "remove %s rule %s->%s failed: %s\n", name, + all ? "all" : libcfs_nid2str(attr.fa_src), + all ? "all" : libcfs_nid2str(attr.fa_dst), + strerror(errno)); + return -1; + } + + libcfs_ioctl_unpack(&data, ioc_buf); + printf("Removed %d %s rules\n", data.ioc_count, name); + return 0; + +getopt_failed: + optind = 1; + return -1; +} - size = data.ioc_count; - ustr = (char *)malloc(size); - if (ustr == NULL) { - fprintf(stderr, "Can't allocate string storage of size %d\n", - size); - return (NULL); - } +int +jt_ptl_drop_del(int argc, char **argv) +{ + return fault_simul_rule_del(LNET_CTL_DROP_DEL, "drop", argc, argv); +} - LIBCFS_IOC_INIT(data); - data.ioc_pbuf1 = kstr; - data.ioc_plen1 = 1; /* non-zero just to fool portal_ioctl_is_invalid() */ - data.ioc_pbuf2 = ustr; - data.ioc_plen2 = size; +int +jt_ptl_delay_del(int argc, char **argv) +{ + return fault_simul_rule_del(LNET_CTL_DELAY_DEL, "delay", argc, argv); +} - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_LOOKUP_STRING, &data); - if (rc != 0) { - fprintf(stderr, "IOC_LIBCFS_LWT_LOOKUP_STRING failed: %s\n", - strerror(errno)); - return (NULL); - } +static int +fault_simul_rule_reset(__u32 opc, char *name, int argc, char **argv) +{ + struct libcfs_ioctl_data data = {{0}}; + int rc; + + LIBCFS_IOC_INIT(data); + data.ioc_flags = opc; + + rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LNET_FAULT, &data); + if (rc != 0) { + fprintf(stderr, "failed to reset %s stats: %s\n", + name, strerror(errno)); + return -1; + } + return 0; +} - LASSERT(strlen(ustr) == size - 1); - return (ustr); +int +jt_ptl_drop_reset(int argc, char **argv) +{ + return fault_simul_rule_reset(LNET_CTL_DROP_RESET, "drop", argc, argv); } -static void -lwt_put_string(char *ustr) +int +jt_ptl_delay_reset(int argc, char **argv) { - free(ustr); + return fault_simul_rule_reset(LNET_CTL_DELAY_RESET, "delay", + argc, argv); } static int -lwt_print(FILE *f, cycles_t t0, cycles_t tlast, double mhz, int cpu, lwt_event_t *e) +fault_simul_rule_list(__u32 opc, char *name, int argc, char **argv) { -#ifndef __WORDSIZE -# error "__WORDSIZE not defined" -#elif __WORDSIZE == 32 -# define XFMT "%#010lx" -#elif __WORDSIZE== 64 -# define XFMT "%#018lx" -#else -# error "Unexpected __WORDSIZE" -#endif - char *where = lwt_get_string(e->lwte_where); - - if (where == NULL) - return (-1); - - fprintf(f, XFMT" "XFMT" "XFMT" "XFMT": "XFMT" %2d %10.6f %10.2f %s\n", - e->lwte_p1, e->lwte_p2, e->lwte_p3, e->lwte_p4, - (long)e->lwte_task, cpu, (e->lwte_when - t0) / (mhz * 1000000.0), - (t0 == e->lwte_when) ? 0.0 : (e->lwte_when - tlast) / mhz, - where); + struct libcfs_ioctl_data data = {{0}}; + struct lnet_fault_attr attr; + struct lnet_fault_stat stat; + int pos; + + printf("LNet %s rules:\n", name); + for (pos = 0;; pos++) { + int rc; + + memset(&attr, 0, sizeof(attr)); + memset(&stat, 0, sizeof(stat)); + + data.ioc_count = pos; + data.ioc_flags = opc; + data.ioc_inllen1 = sizeof(attr); + data.ioc_inlbuf1 = (char *)&attr; + data.ioc_inllen2 = sizeof(stat); + data.ioc_inlbuf2 = (char *)&stat; + if (libcfs_ioctl_pack(&data, &ioc_buf, IOC_BUF_SIZE) != 0) { + fprintf(stderr, "libcfs_ioctl_pack failed\n"); + return -1; + } + + rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LNET_FAULT, ioc_buf); + if (rc != 0) + break; + + libcfs_ioctl_unpack(&data, ioc_buf); + + if (opc == LNET_CTL_DROP_LIST) { + printf("%s->%s (1/%d | %d) ptl "LPX64", msg %x, " + LPU64"/"LPU64", PUT "LPU64", ACK "LPU64", GET " + LPU64", REP "LPU64"\n", + libcfs_nid2str(attr.fa_src), + libcfs_nid2str(attr.fa_dst), + attr.u.drop.da_rate, attr.u.drop.da_interval, + attr.fa_ptl_mask, attr.fa_msg_mask, + stat.u.drop.ds_dropped, stat.fs_count, + stat.fs_put, stat.fs_ack, + stat.fs_get, stat.fs_reply); + + } else if (opc == LNET_CTL_DELAY_LIST) { + printf("%s->%s (1/%d | %d, latency %d) ptl "LPX64 + ", msg %x, "LPU64"/"LPU64", PUT "LPU64 + ", ACK "LPU64", GET "LPU64", REP "LPU64"\n", + libcfs_nid2str(attr.fa_src), + libcfs_nid2str(attr.fa_dst), + attr.u.delay.la_rate, attr.u.delay.la_interval, + attr.u.delay.la_latency, + attr.fa_ptl_mask, attr.fa_msg_mask, + stat.u.delay.ls_delayed, stat.fs_count, + stat.fs_put, stat.fs_ack, stat.fs_get, + stat.fs_reply); + } + } + printf("found total %d\n", pos); + + return 0; +} - lwt_put_string(where); +int +jt_ptl_drop_list(int argc, char **argv) +{ + return fault_simul_rule_list(LNET_CTL_DROP_LIST, "drop", argc, argv); +} - return (0); -#undef XFMT +int +jt_ptl_delay_list(int argc, char **argv) +{ + return fault_simul_rule_list(LNET_CTL_DELAY_LIST, "delay", argc, argv); } double @@ -1406,206 +1666,6 @@ get_cycles_per_usec () return (1000.0); } -#define LWT_MAX_CPUS (32) - -int -jt_ptl_lwt(int argc, char **argv) -{ - int ncpus; - int totalspace; - int nevents_per_cpu; - lwt_event_t *events; - lwt_event_t *cpu_event[LWT_MAX_CPUS + 1]; - lwt_event_t *next_event[LWT_MAX_CPUS]; - lwt_event_t *first_event[LWT_MAX_CPUS]; - int cpu; - lwt_event_t *e; - int rc; - int i; - double mhz; - cycles_t t0; - cycles_t tlast; - cycles_t tnow; - struct timeval tvnow; - int printed_date = 0; - int nlines = 0; - FILE *f = stdout; - - if (argc < 2 || - (strcmp(argv[1], "start") && - strcmp(argv[1], "stop"))) { - fprintf(stderr, - "usage: %s start\n" - " %s stop [fname]\n", argv[0], argv[0]); - return (-1); - } - - if (!strcmp(argv[1], "start")) { - /* disable */ - if (lwt_control(0, 0) != 0) - return (-1); - - /* clear */ - if (lwt_control(0, 1) != 0) - return (-1); - - /* enable */ - if (lwt_control(1, 0) != 0) - return (-1); - - return (0); - } - - if (lwt_snapshot(NULL, &ncpus, &totalspace, NULL, 0) != 0) - return (-1); - - if (ncpus > LWT_MAX_CPUS) { - fprintf(stderr, "Too many cpus: %d (%d)\n", - ncpus, LWT_MAX_CPUS); - return (-1); - } - - events = (lwt_event_t *)malloc(totalspace); - if (events == NULL) { - fprintf(stderr, "Can't allocate %d\n", totalspace); - return (-1); - } - - if (lwt_control(0, 0) != 0) { /* disable */ - free(events); - return (-1); - } - - if (lwt_snapshot(&tnow, NULL, NULL, events, totalspace)) { - free(events); - return (-1); - } - - /* we want this time to be sampled at snapshot time */ - gettimeofday(&tvnow, NULL); - - if (argc > 2) { - f = fopen (argv[2], "w"); - if (f == NULL) { - fprintf(stderr, "Can't open %s for writing: %s\n", argv[2], strerror (errno)); - free(events); - return (-1); - } - } - - mhz = get_cycles_per_usec(); - - /* carve events into per-cpu slices */ - nevents_per_cpu = totalspace / (ncpus * sizeof(lwt_event_t)); - for (cpu = 0; cpu <= ncpus; cpu++) - cpu_event[cpu] = &events[cpu * nevents_per_cpu]; - - /* find the earliest event on each cpu */ - for (cpu = 0; cpu < ncpus; cpu++) { - first_event[cpu] = NULL; - - for (e = cpu_event[cpu]; e < cpu_event[cpu + 1]; e++) { - - if (e->lwte_where == NULL) /* not an event */ - continue; - - if (first_event[cpu] == NULL || - first_event[cpu]->lwte_when > e->lwte_when) - first_event[cpu] = e; - } - - next_event[cpu] = first_event[cpu]; - } - - t0 = tlast = 0; - for (cpu = 0; cpu < ncpus; cpu++) { - e = first_event[cpu]; - if (e == NULL) /* no events this cpu */ - continue; - - if (e == cpu_event[cpu]) - e = cpu_event[cpu + 1] - 1; - else - e = e - 1; - - /* If there's an event immediately before the first one, this - * cpu wrapped its event buffer */ - if (e->lwte_where == NULL) - continue; - - /* We should only start outputting events from the most recent - * first event in any wrapped cpu. Events before this time on - * other cpus won't have any events from this CPU to interleave - * with. */ - if (t0 < first_event[cpu]->lwte_when) - t0 = first_event[cpu]->lwte_when; - } - - for (;;) { - /* find which cpu has the next event */ - cpu = -1; - for (i = 0; i < ncpus; i++) { - - if (next_event[i] == NULL) /* this cpu exhausted */ - continue; - - if (cpu < 0 || - next_event[i]->lwte_when < next_event[cpu]->lwte_when) - cpu = i; - } - - if (cpu < 0) /* all cpus exhausted */ - break; - - if (t0 == 0) { - /* no wrapped cpus and this is he first ever event */ - t0 = next_event[cpu]->lwte_when; - } - - if (t0 <= next_event[cpu]->lwte_when) { - /* on or after the first event */ - if (!printed_date) { - cycles_t du = (tnow - t0) / mhz; - time_t then = tvnow.tv_sec - du/1000000; - - if (du % 1000000 > tvnow.tv_usec) - then--; - - fprintf(f, "%s", ctime(&then)); - printed_date = 1; - } - - rc = lwt_print(f, t0, tlast, mhz, cpu, next_event[cpu]); - if (rc != 0) - break; - - if (++nlines % 10000 == 0 && f != stdout) { - /* show some activity... */ - printf("."); - fflush (stdout); - } - } - - tlast = next_event[cpu]->lwte_when; - - next_event[cpu]++; - if (next_event[cpu] == cpu_event[cpu + 1]) - next_event[cpu] = cpu_event[cpu]; - - if (next_event[cpu]->lwte_where == NULL || - next_event[cpu] == first_event[cpu]) - next_event[cpu] = NULL; - } - - if (f != stdout) { - printf("\n"); - fclose(f); - } - - free(events); - return (0); -} - int jt_ptl_memhog(int argc, char **argv) { static int gfp = 0; /* sticky! */