From: eeb Date: Fri, 30 Sep 2005 05:51:17 +0000 (+0000) Subject: * Added the 'ip2nets' lnet module parameter. It may be specified X-Git-Tag: v1_7_100~1^25~6^2~125 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=4aacd70c43c08bba7cd78c428f77fd28eafa4426;p=fs%2Flustre-release.git * Added the 'ip2nets' lnet module parameter. It may be specified instead of 'networks', and it consists of a list of networks and IP match expressions. If any IP addresses on the node match the IP match expression, the given network is added to the list of local networks. This (along with recent route table fixes etc) allows a single site-wide modprobe.conf * Included a recursion check in lnet_finalize(). Recursion through lnet_return_credits_locked() was possible if a set of blocked buffers suddenly all complete in-line (e.g. when a peer dies). The recursion check limits the number of threads actually processing finalized messages to the number of CPUs. * Cosmetic changes to /proc/lnet/{buffers,nis,peers} to make the buffer/credit stats more readable. * Fixed configure-on-load (it used to deadlock in modprobe when lnet loaded LNDs) and made it the default. * Added a shell script 'lnetunload' to unload lnet and any loaded LNDs. --- diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 167b25c..8ff67d0 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -605,6 +605,7 @@ int lnet_acceptor_port(void); int lnet_acceptor_start(void); void lnet_acceptor_stop(void); +int lnet_parse_ip2nets (char **networksp, char *ip2nets); int lnet_parse_routes (char *route_str, int *im_a_router); int lnet_parse_networks (struct list_head *nilist, char *networks); diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 93457e0..dc063fb 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -455,7 +455,14 @@ typedef struct char *ln_network_tokens; /* space for network names */ int ln_network_tokens_nob; - + + struct list_head ln_finalizeq; /* msgs waiting to complete finalizing */ +#ifdef __KERNEL__ + void **ln_finalizers; /* threads doing finalization */ + int ln_nfinalizers; /* max # threads finalizing */ +#else + int ln_finalizing; +#endif struct list_head ln_test_peers; /* failure simulation */ #ifdef LNET_USE_LIB_FREELIST diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index c8b20d6..026a679 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -26,10 +26,13 @@ lnet_t the_lnet; /* THE state of the network */ #ifdef __KERNEL__ -#define DEFAULT_NETWORKS "tcp" -static char *networks = DEFAULT_NETWORKS; +static char *ip2nets = ""; +CFS_MODULE_PARM(ip2nets, "s", charp, 0444, + "LNET network <- IP table"); + +static char *networks = ""; CFS_MODULE_PARM(networks, "s", charp, 0444, - "local networks (default='"DEFAULT_NETWORKS"')"); + "local networks"); static char *routes = ""; CFS_MODULE_PARM(routes, "s", charp, 0444, @@ -48,7 +51,45 @@ lnet_get_routes(void) char * lnet_get_networks(void) { - return networks; + if (*networks != 0 && *ip2nets != 0) { + LCONSOLE_ERROR("Please specify EITHER 'networks' or 'ip2nets'" + " but not both at once\n"); + return NULL; + } + + if (*networks != 0) + return networks; + + if (*ip2nets != 0) { + int rc = lnet_parse_ip2nets(&networks, ip2nets); + + switch (rc) { + case 0: + return networks; + + case -ENOENT: + LCONSOLE_ERROR("Can't match any networks in " + "ip2nets\n"); + break; + + case -ENOMEM: + LCONSOLE_ERROR("Out of memory parsing ip2nets\n"); + break; + + case -EINVAL: + LCONSOLE_ERROR("Can't parse ip2nets\n"); + break; + + default: + LCONSOLE_ERROR("Unexpected error %d parsing ip2nets\n", + rc); + break; + } + + return NULL; + } + + return "tcp"; } int @@ -533,6 +574,50 @@ lnet_invalidate_handle (lnet_libhandle_t *lh) } int +lnet_init_finalizers(void) +{ +#ifdef __KERNEL__ + int i; + + the_lnet.ln_nfinalizers = num_online_cpus(); + + LIBCFS_ALLOC(the_lnet.ln_finalizers, + the_lnet.ln_nfinalizers * + sizeof(*the_lnet.ln_finalizers)); + if (the_lnet.ln_finalizers == NULL) { + CERROR("Can't allocate ln_finalizers\n"); + return -ENOMEM; + } + + for (i = 0; i < the_lnet.ln_nfinalizers; i++) + the_lnet.ln_finalizers[i] = NULL; +#else + the_lnet.ln_finalizing = 0; +#endif + + CFS_INIT_LIST_HEAD(&the_lnet.ln_finalizeq); + return 0; +} + +void +lnet_fini_finalizers(void) +{ +#ifdef __KERNEL__ + int i; + + for (i = 0; i < the_lnet.ln_nfinalizers; i++) + LASSERT (the_lnet.ln_finalizers[i] == NULL); + + LIBCFS_FREE(the_lnet.ln_finalizers, + the_lnet.ln_nfinalizers * + sizeof(*the_lnet.ln_finalizers)); +#else + LASSERT (!the_lnet.ln_finalizing); +#endif + LASSERT (list_empty(&the_lnet.ln_finalizeq)); +} + +int lnet_prepare(lnet_pid_t requested_pid) { /* Prepare to bring up the network */ @@ -567,7 +652,7 @@ lnet_prepare(lnet_pid_t requested_pid) the_lnet.ln_interface_cookie = lnet_create_interface_cookie(); lnet_init_rtrpools(); - + rc = lnet_setup_handle_hash (); if (rc != 0) goto failed0; @@ -576,13 +661,17 @@ lnet_prepare(lnet_pid_t requested_pid) if (rc != 0) goto failed1; + rc = lnet_init_finalizers(); + if (rc != 0) + goto failed2; + the_lnet.ln_nportals = MAX_PORTALS; LIBCFS_ALLOC(the_lnet.ln_portals, the_lnet.ln_nportals * sizeof(*the_lnet.ln_portals)); if (the_lnet.ln_portals == NULL) { rc = -ENOMEM; - goto failed2; + goto failed3; } for (i = 0; i < the_lnet.ln_nportals; i++) @@ -590,6 +679,8 @@ lnet_prepare(lnet_pid_t requested_pid) return 0; + failed3: + lnet_fini_finalizers(); failed2: lnet_destroy_peer_table(); failed1: @@ -659,6 +750,7 @@ lnet_unprepare (void) the_lnet.ln_nportals * sizeof(*the_lnet.ln_portals)); lnet_free_rtrpools(); + lnet_fini_finalizers(); lnet_destroy_peer_table(); lnet_cleanup_handle_hash(); lnet_descriptor_cleanup(); @@ -867,9 +959,14 @@ lnet_startup_lndnis (void) int rc = 0; int lnd_type; int nicount = 0; + char *nets = lnet_get_networks(); INIT_LIST_HEAD(&nilist); - rc = lnet_parse_networks(&nilist, lnet_get_networks()); + + if (nets == NULL) + goto failed; + + rc = lnet_parse_networks(&nilist, nets); if (rc != 0) goto failed; diff --git a/lnet/lnet/config.c b/lnet/lnet/config.c index 4d66206..4f6531c 100644 --- a/lnet/lnet/config.c +++ b/lnet/lnet/config.c @@ -23,34 +23,34 @@ #include typedef struct { /* tmp struct for parsing routes */ - struct list_head ptb_list; /* stash on lists */ - int ptb_size; /* allocated size */ - char ptb_text[0]; /* text buffer */ + struct list_head ltb_list; /* stash on lists */ + int ltb_size; /* allocated size */ + char ltb_text[0]; /* text buffer */ } lnet_text_buf_t; static int lnet_tbnob = 0; /* track text buf allocation */ #define LNET_MAX_TEXTBUF_NOB (64<<10) /* bound allocation */ #define LNET_SINGLE_TEXTBUF_NOB (4<<10) +typedef struct { + struct list_head lre_list; /* stash in a list */ + int lre_min; /* min value */ + int lre_max; /* max value */ + int lre_stride; /* stride */ +} lnet_range_expr_t; + +static int lnet_re_alloc = 0; /* track expr allocation */ + void lnet_syntax(char *name, char *str, int offset, int width) { - const char *dots = "................................" - "................................" - "................................" - "................................" - "................................" - "................................" - "................................" - "................................"; - const char *dashes = "--------------------------------" - "--------------------------------" - "--------------------------------" - "--------------------------------" - "--------------------------------" - "--------------------------------" - "--------------------------------" - "--------------------------------"; + static char dots[LNET_SINGLE_TEXTBUF_NOB]; + static char dashes[LNET_SINGLE_TEXTBUF_NOB]; + + memset(dots, '.', sizeof(dots)); + dots[sizeof(dots)-1] = 0; + memset(dashes, '-', sizeof(dashes)); + dashes[sizeof(dashes)-1] = 0; LCONSOLE_ERROR("Error parsing '%s=\"%s\"'\n", name, str); LCONSOLE_ERROR("here...........%.*s..%.*s|%.*s|\n", @@ -306,10 +306,10 @@ lnet_parse_networks(struct list_head *nilist, char *networks) lnet_text_buf_t * lnet_new_text_buf (int str_len) { - lnet_text_buf_t *ptb; + lnet_text_buf_t *ltb; int nob; - nob = offsetof(lnet_text_buf_t, ptb_text[str_len + 1]); + nob = offsetof(lnet_text_buf_t, ltb_text[str_len + 1]); if (nob > LNET_SINGLE_TEXTBUF_NOB) { /* _way_ conservative for "route net gateway..." */ CERROR("text buffer too big\n"); @@ -321,45 +321,45 @@ lnet_new_text_buf (int str_len) return NULL; } - LIBCFS_ALLOC(ptb, nob); - if (ptb == NULL) + LIBCFS_ALLOC(ltb, nob); + if (ltb == NULL) return NULL; - ptb->ptb_size = nob; + ltb->ltb_size = nob; lnet_tbnob += nob; - return ptb; + return ltb; } void -lnet_free_text_buf (lnet_text_buf_t *ptb) +lnet_free_text_buf (lnet_text_buf_t *ltb) { - LIBCFS_FREE(ptb, ptb->ptb_size); - lnet_tbnob -= ptb->ptb_size; + LIBCFS_FREE(ltb, ltb->ltb_size); + lnet_tbnob -= ltb->ltb_size; } void lnet_free_text_bufs(struct list_head *tbs) { - lnet_text_buf_t *ptb; + lnet_text_buf_t *ltb; while (!list_empty(tbs)) { - ptb = list_entry(tbs->next, lnet_text_buf_t, ptb_list); + ltb = list_entry(tbs->next, lnet_text_buf_t, ltb_list); - list_del(&ptb->ptb_list); - lnet_free_text_buf(ptb); + list_del(<b->ltb_list); + lnet_free_text_buf(ltb); } } void lnet_print_text_bufs(struct list_head *tbs) { - struct list_head *tmp; - lnet_text_buf_t *ptb; + struct list_head *tmp; + lnet_text_buf_t *ltb; list_for_each (tmp, tbs) { - ptb = list_entry(tmp, lnet_text_buf_t, ptb_list); + ltb = list_entry(tmp, lnet_text_buf_t, ltb_list); - CDEBUG(D_WARNING, "%s\n", ptb->ptb_text); + CDEBUG(D_WARNING, "%s\n", ltb->ltb_text); } CDEBUG(D_WARNING, "%d allocated\n", lnet_tbnob); @@ -372,7 +372,7 @@ lnet_str2tbs_sep (struct list_head *tbs, char *str) char *sep; int nob; int i; - lnet_text_buf_t *ptb; + lnet_text_buf_t *ltb; INIT_LIST_HEAD(&pending); @@ -389,21 +389,21 @@ lnet_str2tbs_sep (struct list_head *tbs, char *str) nob = sep - str; if (nob > 0) { - ptb = lnet_new_text_buf(nob + 1); - if (ptb == NULL) { + ltb = lnet_new_text_buf(nob + 1); + if (ltb == NULL) { lnet_free_text_bufs(&pending); return -1; } for (i = 0; i < nob; i++) if (lnet_iswhite(str[i])) - ptb->ptb_text[i] = ' '; + ltb->ltb_text[i] = ' '; else - ptb->ptb_text[i] = str[i]; + ltb->ltb_text[i] = str[i]; - ptb->ptb_text[nob] = 0; + ltb->ltb_text[nob] = 0; - list_add_tail(&ptb->ptb_list, &pending); + list_add_tail(<b->ltb_list, &pending); } if (*sep == '#') { @@ -428,23 +428,23 @@ lnet_expand1tb (struct list_head *list, char *str, char *sep1, char *sep2, char *item, int itemlen) { - int len1 = sep1 - str; - int len2 = strlen(sep2 + 1); - lnet_text_buf_t *ptb; + int len1 = sep1 - str; + int len2 = strlen(sep2 + 1); + lnet_text_buf_t *ltb; LASSERT (*sep1 == '['); LASSERT (*sep2 == ']'); - ptb = lnet_new_text_buf(len1 + itemlen + len2 + 1); - if (ptb == NULL) + ltb = lnet_new_text_buf(len1 + itemlen + len2 + 1); + if (ltb == NULL) return -ENOMEM; - memcpy(ptb->ptb_text, str, len1); - memcpy(&ptb->ptb_text[len1], item, itemlen); - memcpy(&ptb->ptb_text[len1+itemlen], sep2 + 1, len2); - ptb->ptb_text[len1 + itemlen + len2] = 0; + memcpy(ltb->ltb_text, str, len1); + memcpy(<b->ltb_text[len1], item, itemlen); + memcpy(<b->ltb_text[len1+itemlen], sep2 + 1, len2); + ltb->ltb_text[len1 + itemlen + len2] = 0; - list_add_tail(&ptb->ptb_list, list); + list_add_tail(<b->ltb_list, list); return 0; } @@ -552,7 +552,7 @@ lnet_parse_route (char *str, int *im_a_router) struct list_head *tmp2; __u32 net; lnet_nid_t nid; - lnet_text_buf_t *ptb; + lnet_text_buf_t *ltb; int rc; char *sep; char *token = str; @@ -561,8 +561,8 @@ lnet_parse_route (char *str, int *im_a_router) unsigned int hops; int got_hops = 0; - INIT_LIST_HEAD(&gateways); - INIT_LIST_HEAD(&nets); + CFS_INIT_LIST_HEAD(&gateways); + CFS_INIT_LIST_HEAD(&nets); /* save a copy of the string for error messages */ strncpy(cmd, str, sizeof(cmd) - 1); @@ -598,36 +598,36 @@ lnet_parse_route (char *str, int *im_a_router) tmp2 = &gateways; /* expanding gateways */ } - ptb = lnet_new_text_buf(strlen(token)); - if (ptb == NULL) + ltb = lnet_new_text_buf(strlen(token)); + if (ltb == NULL) goto out; - strcpy(ptb->ptb_text, token); - tmp1 = &ptb->ptb_list; + strcpy(ltb->ltb_text, token); + tmp1 = <b->ltb_list; list_add_tail(tmp1, tmp2); while (tmp1 != tmp2) { - ptb = list_entry(tmp1, lnet_text_buf_t, ptb_list); + ltb = list_entry(tmp1, lnet_text_buf_t, ltb_list); - rc = lnet_str2tbs_expand(tmp1->next, ptb->ptb_text); + rc = lnet_str2tbs_expand(tmp1->next, ltb->ltb_text); if (rc < 0) goto token_error; tmp1 = tmp1->next; if (rc > 0) { /* expanded! */ - list_del(&ptb->ptb_list); - lnet_free_text_buf(ptb); + list_del(<b->ltb_list); + lnet_free_text_buf(ltb); continue; } if (ntokens == 1) { - net = libcfs_str2net(ptb->ptb_text); + net = libcfs_str2net(ltb->ltb_text); if (net == LNET_NIDNET(LNET_NID_ANY) || LNET_NETTYP(net) == LOLND) goto token_error; } else { - nid = libcfs_str2nid(ptb->ptb_text); + nid = libcfs_str2nid(ltb->ltb_text); if (nid == LNET_NID_ANY || LNET_NETTYP(LNET_NIDNET(nid)) == LOLND) goto token_error; @@ -642,13 +642,13 @@ lnet_parse_route (char *str, int *im_a_router) LASSERT (!list_empty(&gateways)); list_for_each (tmp1, &nets) { - ptb = list_entry(tmp1, lnet_text_buf_t, ptb_list); - net = libcfs_str2net(ptb->ptb_text); + ltb = list_entry(tmp1, lnet_text_buf_t, ltb_list); + net = libcfs_str2net(ltb->ltb_text); LASSERT (net != LNET_NIDNET(LNET_NID_ANY)); list_for_each (tmp2, &gateways) { - ptb = list_entry(tmp2, lnet_text_buf_t, ptb_list); - nid = libcfs_str2nid(ptb->ptb_text); + ltb = list_entry(tmp2, lnet_text_buf_t, ltb_list); + nid = libcfs_str2nid(ltb->ltb_text); LASSERT (nid != LNET_NID_ANY); if (lnet_islocalnid(nid)) { @@ -681,18 +681,18 @@ lnet_parse_route (char *str, int *im_a_router) int lnet_parse_route_tbs(struct list_head *tbs, int *im_a_router) { - lnet_text_buf_t *ptb; + lnet_text_buf_t *ltb; while (!list_empty(tbs)) { - ptb = list_entry(tbs->next, lnet_text_buf_t, ptb_list); + ltb = list_entry(tbs->next, lnet_text_buf_t, ltb_list); - if (lnet_parse_route(ptb->ptb_text, im_a_router) < 0) { + if (lnet_parse_route(ltb->ltb_text, im_a_router) < 0) { lnet_free_text_bufs(tbs); return -EINVAL; } - list_del(&ptb->ptb_list); - lnet_free_text_buf(ptb); + list_del(<b->ltb_list); + lnet_free_text_buf(ltb); } return 0; @@ -714,7 +714,7 @@ lnet_parse_routes (char *routes, int *im_a_router) return -EINVAL; } - INIT_LIST_HEAD(&tbs); + CFS_INIT_LIST_HEAD(&tbs); if (lnet_str2tbs_sep(&tbs, routes) < 0) { CERROR("Error parsing routes\n"); @@ -727,7 +727,433 @@ lnet_parse_routes (char *routes, int *im_a_router) return rc; } +void +lnet_print_range_exprs(struct list_head *exprs) +{ + struct list_head *e; + lnet_range_expr_t *lre; + + list_for_each(e, exprs) { + lre = list_entry(exprs->next, lnet_range_expr_t, lre_list); + + CDEBUG(D_WARNING, "%d-%d/%d\n", + lre->lre_min, lre->lre_max, lre->lre_stride); + } + + CDEBUG(D_WARNING, "%d allocated\n", lnet_re_alloc); +} + +int +lnet_new_range_expr(struct list_head *exprs, int min, int max, int stride) +{ + lnet_range_expr_t *lre; + + CDEBUG(D_NET, "%d-%d/%d\n", min, max, stride); + + if (min < 0 || min > 255 || min > max || stride < 0) + return -EINVAL; + + LIBCFS_ALLOC(lre, sizeof(*lre)); + if (lre == NULL) + return -ENOMEM; + + lnet_re_alloc++; + + lre->lre_min = min; + lre->lre_max = max; + lre->lre_stride = stride; + + list_add(&lre->lre_list, exprs); + return 0; +} + +void +lnet_destroy_range_exprs(struct list_head *exprs) +{ + lnet_range_expr_t *lre; + + while (!list_empty(exprs)) { + lre = list_entry(exprs->next, lnet_range_expr_t, lre_list); + + list_del(&lre->lre_list); + LIBCFS_FREE(lre, sizeof(*lre)); + lnet_re_alloc--; + } +} + +int +lnet_parse_range_expr(struct list_head *exprs, char *str) +{ + int nob = strlen(str); + char *sep; + int n; + int x; + int y; + int z; + int rc; + + if (nob == 0) + return -EINVAL; + + if (!strcmp(str, "*")) /* match all */ + return lnet_new_range_expr(exprs, 0, 255, 1); + + n = nob; + if (sscanf(str, "%u%n", &x, &n) >= 1 && n == nob) { + /* simple number */ + return lnet_new_range_expr(exprs, x, x, 1); + } + + /* Has to be an expansion */ + if (!(str[0] == '[' && nob > 2 && str[nob-1] == ']')) + return -EINVAL; + + nob -= 2; + str++; + str[nob] = 0; + + do { + /* Comma separated list of expressions... */ + sep = strchr(str, ','); + if (sep != NULL) + *sep++ = 0; + + nob = strlen(str); + n = nob; + if (sscanf(str, "%u%n", &x, &n) >= 1 && n == nob) { + /* simple number */ + rc = lnet_new_range_expr(exprs, x, x, 1); + if (rc != 0) + return rc; + + continue; + } + + n = nob; + if (sscanf(str, "%u-%u%n", &x, &y, &n) >= 2 && n == nob) { + /* simple range */ + rc = lnet_new_range_expr(exprs, x, y, 1); + if (rc != 0) + return rc; + continue; + } + + n = nob; + if (sscanf(str, "%u-%u/%u%n", &x, &y, &z, &n) >= 3 && n == nob) { + /* strided range */ + rc = lnet_new_range_expr(exprs, x, y, z); + if (rc != 0) + return rc; + continue; + } + + return -EINVAL; + + } while ((str = sep) != NULL); + + return 0; +} + +int +lnet_match_network_token(char *token, __u32 *ipaddrs, int nip) +{ + struct list_head exprs[4]; + struct list_head *e; + lnet_range_expr_t *re; + char *str; + int i; + int j; + __u32 ip; + int n; + int match; + int rc; + + for (i = 0; i < 4; i++) + CFS_INIT_LIST_HEAD(&exprs[i]); + + for (i = 0; i < 4; i++) { + str = token; + if (i != 3) { + token = strchr(token, '.'); + if (token == NULL) { + rc = -EINVAL; + goto out; + } + *token++ = 0; + } + + rc = lnet_parse_range_expr(&exprs[i], str); + if (rc != 0) { + LASSERT (rc < 0); + goto out; + } + } + + for (match = i = 0; !match && i < nip; i++) { + ip = ipaddrs[i]; + + for (match = 1, j = 0; match && j < 4; j++) { + n = (ip >> (8 * (3 - j))) & 0xff; + match = 0; + + list_for_each(e, &exprs[j]) { + re = list_entry(e, lnet_range_expr_t, lre_list); + + if (re->lre_min <= n && + re->lre_max >= n && + (n - re->lre_min) % re->lre_stride == 0) { + match = 1; + break; + } + } + } + } + + rc = match ? 1 : 0; + + out: + for (i = 0; i < 4; i++) + lnet_destroy_range_exprs(&exprs[i]); + LASSERT (lnet_re_alloc == 0); + + return rc; +} + +int +lnet_match_network_tokens(char *net_entry, __u32 *ipaddrs, int nip) +{ + static char tokens[LNET_SINGLE_TEXTBUF_NOB]; + + int matched = 0; + int ntokens = 0; + int len; + char *net = NULL; + char *sep; + char *token; + int rc; + + LASSERT (strlen(net_entry) < sizeof(tokens)); + + /* work on a copy of the string */ + strcpy(tokens, net_entry); + sep = tokens; + for (;;) { + /* scan for token start */ + while (lnet_iswhite(*sep)) + sep++; + if (*sep == 0) + break; + + token = sep++; + + /* scan for token end */ + while (*sep != 0 && !lnet_iswhite(*sep)) + sep++; + if (*sep != 0) + *sep++ = 0; + + if (ntokens++ == 0) { + net = token; + continue; + } + + len = strlen(token); + + rc = lnet_match_network_token(token, ipaddrs, nip); + if (rc < 0) { + lnet_syntax("ip2nets", net_entry, + token - tokens, len); + return rc; + } + + matched |= (rc != 0); + } + + if (!matched) + return 0; + + strcpy(net_entry, net); /* replace with matched net */ + return 1; +} + +int +lnet_match_networks (char **networksp, char *ip2nets, __u32 *ipaddrs, int nip) +{ + static char networks[LNET_SINGLE_TEXTBUF_NOB]; + + struct list_head raw; + struct list_head matched; + struct list_head *t; + lnet_text_buf_t *tb; + lnet_text_buf_t *tb2; + int len; + int dup; + int rc; + + CFS_INIT_LIST_HEAD(&raw); + + if (lnet_str2tbs_sep(&raw, ip2nets) < 0) { + CERROR("Error parsing ip2nets\n"); + LASSERT (lnet_tbnob == 0); + return -EINVAL; + } + + CFS_INIT_LIST_HEAD(&matched); + networks[0] = 0; + len = 0; + rc = 0; + + while (!list_empty(&raw)) { + tb = list_entry(raw.next, lnet_text_buf_t, ltb_list); + + rc = lnet_match_network_tokens(tb->ltb_text, ipaddrs, nip); + if (rc < 0) + break; + + list_del(&tb->ltb_list); + + if (rc == 0) { /* no match */ + lnet_free_text_buf(tb); + continue; + } + + dup = 0; + list_for_each(t, &matched) { + tb2 = list_entry(t, lnet_text_buf_t, ltb_list); + + if (!strcmp(tb->ltb_text, tb2->ltb_text)) { + dup = 1; + break; + } + } + + if (dup) { + lnet_free_text_buf(tb); + continue; + } + + list_add_tail(&tb->ltb_list, &matched); + + len += snprintf(networks + len, sizeof(networks) - len, + "%s%s", (len == 0) ? "" : ",", tb->ltb_text); + + if (len >= sizeof(networks)) { + CERROR("Too many matched networks\n"); + rc = -E2BIG; + break; + } + } + + lnet_free_text_bufs(&raw); + lnet_free_text_bufs(&matched); + LASSERT (lnet_tbnob == 0); + + if (rc < 0) + return rc; + + if (len == 0) + return -ENOENT; + + *networksp = networks; + return 0; +} + #ifdef __KERNEL__ +void +lnet_ipaddr_free_enumeration(__u32 *ipaddrs, int nip) +{ + LIBCFS_FREE(ipaddrs, nip * sizeof(*ipaddrs)); +} + +int +lnet_ipaddr_enumerate (__u32 **ipaddrsp) +{ + int up; + __u32 netmask; + __u32 *ipaddrs; + __u32 *ipaddrs2; + int nip; + char **ifnames; + int nif = libcfs_ipif_enumerate(&ifnames); + int i; + int rc; + + if (nif <= 0) + return nif; + + LIBCFS_ALLOC(ipaddrs, nif * sizeof(*ipaddrs)); + if (ipaddrs == NULL) { + CERROR("Can't allocate ipaddrs[%d]\n", nif); + libcfs_ipif_free_enumeration(ifnames, nif); + return -ENOMEM; + } + + for (i = nip = 0; i < nif; i++) { + if (!strcmp(ifnames[i], "lo")) + continue; + + rc = libcfs_ipif_query(ifnames[i], &up, + &ipaddrs[nip], &netmask); + if (rc != 0) { + CWARN("Can't query interface %s: %d\n", + ifnames[i], rc); + continue; + } + + if (!up) { + CWARN("Ignoring interface %s: it's down\n", + ifnames[i]); + continue; + } + + nip++; + } + + libcfs_ipif_free_enumeration(ifnames, nif); + + if (nip == nif) { + *ipaddrsp = ipaddrs; + } else { + if (nip > 0) { + LIBCFS_ALLOC(ipaddrs2, nip * sizeof(*ipaddrs2)); + if (ipaddrs2 == NULL) { + CERROR("Can't allocate ipaddrs[%d]\n", nip); + nip = -ENOMEM; + } else { + memcpy(ipaddrs2, ipaddrs, + nip * sizeof(*ipaddrs)); + *ipaddrsp = ipaddrs2; + rc = nip; + } + } + lnet_ipaddr_free_enumeration(ipaddrs, nif); + } + return nip; +} + +int +lnet_parse_ip2nets (char **networksp, char *ip2nets) +{ + __u32 *ipaddrs; + int nip = lnet_ipaddr_enumerate(&ipaddrs); + int rc; + + if (nip == 0) { + CERROR("I have no IP addresses\n"); + return -ENOENT; + } + + if (nip <= 0) { + CERROR("Can't enumerate IP interfaces: %d\n", nip); + return nip; + } + + rc = lnet_match_networks (networksp, ip2nets, ipaddrs, nip); + lnet_ipaddr_free_enumeration(ipaddrs, nip); + + return rc; +} + int lnet_set_ip_niaddr (lnet_ni_t *ni) { @@ -805,7 +1231,6 @@ lnet_set_ip_niaddr (lnet_ni_t *ni) libcfs_ipif_free_enumeration(names, n); return -ENOENT; } - EXPORT_SYMBOL(lnet_set_ip_niaddr); #endif diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c index a8a2a25..21cecab 100644 --- a/lnet/lnet/lib-msg.c +++ b/lnet/lnet/lib-msg.c @@ -67,18 +67,83 @@ lnet_enq_event_locked (lnet_eq_t *eq, lnet_event_t *ev) } void -lnet_finalize (lnet_ni_t *ni, lnet_msg_t *msg, int status) +lnet_complete_msg_locked(lnet_msg_t *msg) { lnet_handle_wire_t ack_wmd; + int rc; + int status = msg->msg_ev.status; + + LASSERT (msg->msg_onactivelist); + + if (status == 0 && msg->msg_ack) { + /* Only send an ACK if the PUT completed successfully */ + + lnet_return_credits_locked(msg); + + msg->msg_ack = 0; + LNET_UNLOCK(); + + LASSERT(msg->msg_ev.type == LNET_EVENT_PUT); + LASSERT(!msg->msg_routing); + + ack_wmd = msg->msg_hdr.msg.put.ack_wmd; + + + lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0); + + msg->msg_hdr.msg.ack.dst_wmd = ack_wmd; + msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits; + msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength); + + LASSERT(!in_interrupt()); + rc = lnet_send(msg->msg_ev.target.nid, msg); + LASSERT(!in_interrupt()); + + LNET_LOCK(); + + if (rc == 0) + return; + } else if (status == 0 && /* OK so far */ + (msg->msg_routing && !msg->msg_sending)) { /* not forwarded */ + + LASSERT (!msg->msg_receiving); /* called back recv already */ + + LNET_UNLOCK(); + + LASSERT(!in_interrupt()); + rc = lnet_send(LNET_NID_ANY, msg); + LASSERT(!in_interrupt()); + + LNET_LOCK(); + + if (rc == 0) + return; + } + + lnet_return_credits_locked(msg); + + LASSERT (msg->msg_onactivelist); + msg->msg_onactivelist = 0; + list_del (&msg->msg_activelist); + the_lnet.ln_counters.msgs_alloc--; + lnet_msg_free(msg); +} + + +void +lnet_finalize (lnet_ni_t *ni, lnet_msg_t *msg, int status) +{ +#ifdef __KERNEL__ + int i; + int my_slot; +#endif lnet_libmd_t *md; int unlink; - int rc; LASSERT (!in_interrupt ()); if (msg == NULL) return; - #if 0 CDEBUG(D_WARNING, "%s msg->%s Flags:%s%s%s%s%s%s%s%s%s%s%s%s txp %s rxp %s\n", lnet_msgtyp2str(msg->msg_type), libcfs_id2str(msg->msg_target), @@ -101,6 +166,8 @@ lnet_finalize (lnet_ni_t *ni, lnet_msg_t *msg, int status) LASSERT (msg->msg_onactivelist); + msg->msg_ev.status = status; + md = msg->msg_md; if (md != NULL) { /* Now it's safe to drop my caller's ref */ @@ -117,7 +184,6 @@ lnet_finalize (lnet_ni_t *ni, lnet_msg_t *msg, int status) else unlink = lnet_md_exhausted(md); - msg->msg_ev.status = status; msg->msg_ev.unlinked = unlink; if (md->md_eq != NULL) @@ -129,57 +195,46 @@ lnet_finalize (lnet_ni_t *ni, lnet_msg_t *msg, int status) msg->msg_md = NULL; } - if (status == 0 && msg->msg_ack) { - /* Only send an ACK if the PUT completed successfully */ + list_add_tail (&msg->msg_list, &the_lnet.ln_finalizeq); - lnet_return_credits_locked(msg); + /* Recursion breaker. Don't complete the message here if I am (or + * enough other threads are) already completing messages */ - msg->msg_ack = 0; - LNET_UNLOCK(); - - LASSERT(msg->msg_ev.type == LNET_EVENT_PUT); - LASSERT(!msg->msg_routing); - - ack_wmd = msg->msg_hdr.msg.put.ack_wmd; - - lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0); - - msg->msg_hdr.msg.ack.dst_wmd = ack_wmd; - msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits; - msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength); - - LASSERT(!in_interrupt()); - rc = lnet_send(ni->ni_nid, msg); - LASSERT(!in_interrupt()); - if (rc == 0) - return; +#ifdef __KERNEL__ + my_slot = -1; + for (i = 0; i < the_lnet.ln_nfinalizers; i++) { + if (the_lnet.ln_finalizers[i] == cfs_current()) + goto out; + if (my_slot < 0 && the_lnet.ln_finalizers[i] == NULL) + my_slot = i; + } + if (my_slot < 0) + goto out; - LNET_LOCK(); + the_lnet.ln_finalizers[my_slot] = cfs_current(); +#else + if (the_lnet.ln_finalizing) + goto out; +#endif - } else if (status == 0 && /* OK so far */ - (msg->msg_routing && !msg->msg_sending)) { /* not forwarded */ + while (!list_empty(&the_lnet.ln_finalizeq)) { + msg = list_entry(the_lnet.ln_finalizeq.next, + lnet_msg_t, msg_list); - LASSERT (!msg->msg_receiving); /* called back recv already */ - - LNET_UNLOCK(); - - LASSERT(!in_interrupt()); - rc = lnet_send(LNET_NID_ANY, msg); - LASSERT(!in_interrupt()); - if (rc == 0) - return; + list_del(&msg->msg_list); - LNET_LOCK(); + /* NB drops and regains the lnet lock if it actually does + * anything, so my finalizing friends can chomp along too */ + lnet_complete_msg_locked(msg); } - lnet_return_credits_locked(msg); +#ifdef __KERNEL__ + the_lnet.ln_finalizers[my_slot] = NULL; +#else + the_lnet.ln_finalizing = 0; +#endif - LASSERT (msg->msg_onactivelist); - msg->msg_onactivelist = 0; - list_del (&msg->msg_activelist); - the_lnet.ln_counters.msgs_alloc--; - lnet_msg_free(msg); - + out: LNET_UNLOCK(); } diff --git a/lnet/lnet/module.c b/lnet/lnet/module.c index 4851822..40dd2de 100644 --- a/lnet/lnet/module.c +++ b/lnet/lnet/module.c @@ -25,7 +25,7 @@ #define DEBUG_SUBSYSTEM S_LNET #include -static int config_on_load = 0; +static int config_on_load = 1; CFS_MODULE_PARM(config_on_load, "i", int, 0444, "configure network at module load"); @@ -70,9 +70,27 @@ static int lnet_ioctl(unsigned int cmd, struct libcfs_ioctl_data *data) DECLARE_IOCTL_HANDLER(lnet_ioctl_handler, lnet_ioctl); +void +lnet_configure (void *arg) +{ + int rc; + + LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex); + the_lnet.ln_niinit_self = 1; + LNET_MUTEX_UP(&the_lnet.ln_api_mutex); + + rc = LNetNIInit(LUSTRE_SRV_LNET_PID); + if (rc != 0) { + LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex); + the_lnet.ln_niinit_self = 0; + LNET_MUTEX_UP(&the_lnet.ln_api_mutex); + } +} + static int init_lnet(void) { - int rc; + static work_struct_t work; + int rc; ENTRY; rc = LNetInit(); @@ -81,24 +99,16 @@ static int init_lnet(void) RETURN(rc); } - if (config_on_load) { - LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex); - the_lnet.ln_niinit_self = 1; - LNET_MUTEX_UP(&the_lnet.ln_api_mutex); - - rc = LNetNIInit(LUSTRE_SRV_LNET_PID); - if (rc != 0) { - /* Can't LNetFini or fail now if I loaded NALs */ - LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex); - the_lnet.ln_niinit_self = 0; - LNET_MUTEX_UP(&the_lnet.ln_api_mutex); - } - } - rc = libcfs_register_ioctl(&lnet_ioctl_handler); LASSERT (rc == 0); - RETURN(rc); + if (config_on_load) { + /* Have to schedule a task to avoid deadlocking modload */ + prepare_work(&work, lnet_configure, NULL); + schedule_work(&work); + } + + RETURN(0); } static void fini_lnet(void) diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index a48df00..c6c8b83 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -197,7 +197,7 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid) /* As a first approximation; allow this peer the same number of router * buffers as it is allowed outstanding sends */ - lp->lp_rtrcredits = lp->lp_txcredits; + lp->lp_rtrcredits = lp->lp_minrtrcredits = lp->lp_txcredits; LASSERT (!the_lnet.ln_shutdown); /* can't add peers after shutdown starts */ diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 42b6a0d..35e4cff 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -35,7 +35,7 @@ CFS_MODULE_PARM(tiny_router_buffers, "i", int, 0444, static int small_router_buffers = 256; CFS_MODULE_PARM(small_router_buffers, "i", int, 0444, "# of small (1 page) messages to buffer in the router"); -static int large_router_buffers = 16; +static int large_router_buffers = 32; CFS_MODULE_PARM(large_router_buffers, "i", int, 0444, "# of large messages to buffer in the router"); @@ -463,7 +463,7 @@ lnet_get_route (int idx, __u32 *net, __u32 *hops, #ifdef __KERNEL__ void -lnet_destory_rtrbuf(lnet_rtrbuf_t *rb, int npages) +lnet_destroy_rtrbuf(lnet_rtrbuf_t *rb, int npages) { int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]); @@ -520,7 +520,7 @@ lnet_rtrpool_free_bufs(lnet_rtrbufpool_t *rbp) rb = list_entry(rbp->rbp_bufs.next, lnet_rtrbuf_t, rb_list); list_del(&rb->rb_list); - lnet_destory_rtrbuf(rb, npages); + lnet_destroy_rtrbuf(rb, npages); nbuffers++; } @@ -552,6 +552,7 @@ lnet_rtrpool_alloc_bufs(lnet_rtrbufpool_t *rbp, int nbufs) rbp->rbp_nbuffers++; rbp->rbp_credits++; + rbp->rbp_mincredits++; list_add(&rb->rb_list, &rbp->rbp_bufs); /* No allocation "under fire" */ diff --git a/lnet/lnet/router_proc.c b/lnet/lnet/router_proc.c index b51f5f2..1013a9d 100644 --- a/lnet/lnet/router_proc.c +++ b/lnet/lnet/router_proc.c @@ -94,6 +94,13 @@ lnet_router_seq_seek (lnet_route_seq_iterator_t *lrsi, loff_t off) struct list_head *r; int rc; loff_t here; + + if (off == 0) { + lrsi->lrsi_net = NULL; + lrsi->lrsi_route = NULL; + lrsi->lrsi_off = 0; + return 0; + } LNET_LOCK(); @@ -108,7 +115,7 @@ lnet_router_seq_seek (lnet_route_seq_iterator_t *lrsi, loff_t off) /* search from start */ n = the_lnet.ln_remote_nets.next; r = NULL; - here = 0; + here = 1; } else { /* continue search */ n = &lrsi->lrsi_net->lrn_list; @@ -208,6 +215,14 @@ lnet_router_seq_show (struct seq_file *s, void *iter) lnet_nid_t nid; int alive; + if (lrsi->lrsi_off == 0) { + seq_printf(s, "Routing %s\n", + the_lnet.ln_routing ? "enabled" : "disabled"); + seq_printf(s, "%-8s %4s %7s %s\n", + "net", "hops", "state", "router"); + return 0; + } + LASSERT (lrsi->lrsi_net != NULL); LASSERT (lrsi->lrsi_route != NULL); @@ -225,7 +240,7 @@ lnet_router_seq_show (struct seq_file *s, void *iter) LNET_UNLOCK(); - seq_printf(s, "%-8s %2u %7s %s\n", libcfs_net2str(net), hops, + seq_printf(s, "%-8s %4u %7s %s\n", libcfs_net2str(net), hops, alive ? "up" : "down", libcfs_nid2str(nid)); return 0; } @@ -276,6 +291,13 @@ lnet_peer_seq_seek (lnet_peer_seq_iterator_t *lpsi, loff_t off) loff_t here; int rc; + if (off == 0) { + lpsi->lpsi_idx = 0; + lpsi->lpsi_peer = NULL; + lpsi->lpsi_off = 0; + return 0; + } + LNET_LOCK(); if (lpsi->lpsi_peer != NULL && @@ -290,7 +312,7 @@ lnet_peer_seq_seek (lnet_peer_seq_iterator_t *lpsi, loff_t off) /* search from start */ idx = 0; p = NULL; - here = 0; + here = 1; } else { /* continue search */ idx = lpsi->lpsi_idx; @@ -392,6 +414,13 @@ lnet_peer_seq_show (struct seq_file *s, void *iter) int alive; int txqnob; int nrefs; + + if (lpsi->lpsi_off == 0) { + seq_printf(s, "%-16s %4s %5s %5s %5s %5s %5s %5s %s\n", + "nid", "refs", "state", "max", + "rtr", "min", "tx", "min", "queue"); + return 0; + } LASSERT (lpsi->lpsi_peer != NULL); @@ -416,7 +445,7 @@ lnet_peer_seq_show (struct seq_file *s, void *iter) LNET_UNLOCK(); - seq_printf(s, "%-16s [%3d] %4s %3d rtr %3d %3d tx %3d %3d # %d\n", + seq_printf(s, "%-16s %4d %5s %5d %5d %5d %5d %5d %d\n", libcfs_nid2str(nid), nrefs, alive ? "up" : "down", maxcr, rtrcr, minrtrcr, txcr, mintxcr, txqnob); return 0; @@ -464,6 +493,12 @@ lnet_buffer_seq_seek (lnet_buffer_seq_iterator_t *lbsi, loff_t off) int idx; loff_t here; int rc; + + if (off == 0) { + lbsi->lbsi_idx = -1; + lbsi->lbsi_off = 0; + return 0; + } LNET_LOCK(); @@ -471,7 +506,7 @@ lnet_buffer_seq_seek (lnet_buffer_seq_iterator_t *lbsi, loff_t off) lbsi->lbsi_off > off) { /* search from start */ idx = 0; - here = 0; + here = 1; } else { /* continue search */ idx = lbsi->lbsi_idx; @@ -552,6 +587,12 @@ lnet_buffer_seq_show (struct seq_file *s, void *iter) int cr; int mincr; + if (lbsi->lbsi_off == 0) { + seq_printf(s, "%5s %5s %7s %7s\n", + "pages", "count", "credits", "min"); + return 0; + } + LASSERT (lbsi->lbsi_idx >= 0 && lbsi->lbsi_idx < LNET_PEER_HASHSIZE); LNET_LOCK(); @@ -565,7 +606,7 @@ lnet_buffer_seq_show (struct seq_file *s, void *iter) LNET_UNLOCK(); - seq_printf(s, "[%d] %4d x %3d %5d %5d\n", lbsi->lbsi_idx, + seq_printf(s, "%5d %5d %7d %7d\n", npages, nbuf, cr, mincr); return 0; } @@ -612,20 +653,27 @@ lnet_ni_seq_seek (lnet_ni_seq_iterator_t *lnsi, loff_t off) struct list_head *n; loff_t here; int rc; + + if (off == 0) { + lnsi->lnsi_ni = NULL; + lnsi->lnsi_off = 0; + return 0; + } LNET_LOCK(); - if (lnsi->lnsi_off > off) { + if (lnsi->lnsi_ni == NULL || + lnsi->lnsi_off > off) { /* search from start */ n = NULL; - here = 0; + here = 1; } else { /* continue search */ n = &lnsi->lnsi_ni->ni_list; here = lnsi->lnsi_off; } - lnsi->lnsi_off = off; + lnsi->lnsi_off = off; if (n == NULL) n = the_lnet.ln_nis.next; @@ -704,6 +752,12 @@ lnet_ni_seq_show (struct seq_file *s, void *iter) lnet_nid_t nid; int nref; + if (lnsi->lnsi_off == 0) { + seq_printf(s, "%-16s %4s %4s %5s %5s %5s\n", + "nid", "refs", "peer", "max", "tx", "min"); + return 0; + } + LASSERT (lnsi->lnsi_ni != NULL); LNET_LOCK(); @@ -719,8 +773,9 @@ lnet_ni_seq_show (struct seq_file *s, void *iter) LNET_UNLOCK(); - seq_printf(s, "%-16s [%3d] %4d %5d %5d %5d\n", - libcfs_nid2str(nid), nref, npeertxcr, maxtxcr, txcr, mintxcr); + seq_printf(s, "%-16s %4d %4d %5d %5d %5d\n", + libcfs_nid2str(nid), nref, + npeertxcr, maxtxcr, txcr, mintxcr); return 0; } diff --git a/lnet/utils/lbstats b/lnet/utils/lbstats index 5b77ad4..0baae3e 100755 --- a/lnet/utils/lbstats +++ b/lnet/utils/lbstats @@ -1,14 +1,11 @@ #!/bin/bash -echo "=== Router Buffers ===========================" -echo +echo "=== Router Buffers =======" test -e /proc/sys/lnet/buffers && cat /proc/sys/lnet/buffers echo -echo "=== NIs ======================================" -echo +echo "=== NIs ====================================" test -e /proc/sys/lnet/nis && cat /proc/sys/lnet/nis echo -echo "=== Peers ====================================" -echo +echo "=== Peers =====================================================" test -e /proc/sys/lnet/peers && cat /proc/sys/lnet/peers - +echo diff --git a/lnet/utils/lnetunload b/lnet/utils/lnetunload new file mode 100755 index 0000000..ffea00b --- /dev/null +++ b/lnet/utils/lnetunload @@ -0,0 +1,13 @@ +#!/bin/sh + +lnds="ksocklnd kqswlnd kgmlnd kopeniblnd kiiblnd kviblnd kralnd kptllnd" + +if lctl network down > /dev/null 2>&1; then + for mod in $lnds; do + if grep "^$mod" /proc/modules >/dev/null 2>&1; then + rmmod $mod + fi + done + + rmmod lnet libcfs +fi \ No newline at end of file