From 9aff811bde056335538685a9c96d0877b0980c87 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Thu, 3 Nov 2022 09:37:05 -0600 Subject: [PATCH 1/1] LU-16466 kfilnd: Allow custom traffic class per-NI Allow a traffic class to be specified per-NI. Per-NI values can be specified via yaml configuration or the lnetctl CLI. A new kfilnd module parameter, traffic_class, defines the default traffic class when a per-NI value is not otherwise specified. Here are the valid values and their associated mapping to kfabric: - best_effort -> KFI_TC_BEST_EFFORT - low_latency -> KFI_TC_LOW_LATENCY - dedicated_access -> KFI_TC_DEDICATED_ACCESS - bulk_data -> KFI_TC_BULK_DATA - scavenger -> KFI_TC_SCAVENGER - network_ctrl -> KFI_TC_NETWORK_CTRL The default value of the traffic_class kfilnd parameter is "best_effort". Here's an example yaml configuration: net: - net type: kfi1 local NI(s): - interfaces: 0: cxi0 tunables: lnd tunables: traffic_class: bulk_data - net type: kfi2 local NI(s): - interfaces: 0: cxi1 tunables: lnd tunables: traffic_class: low_latency Here's an example of setting per-NI values using the lnetctl CLI: $ lnetctl net add --net kfi1 --if cxi0 --traffic-class bulk_data $ lnetctl net add --net kfi2 --if cxi0 --traffic-class low_latency Both the string and numeric representation of the traffic class is shown in the output of lnetctl net show. e.g. $ lnetctl net show -v | egrep -e kfi -e traffic - net type: kfi1 - nid: 0@kfi1 traffic_class: bulk_data traffic_class_num: 515 - net type: kfi2 - nid: 0@kfi2 traffic_class: low_latency traffic_class_num: 513 $ This is simply a debug tool to ensure the feature is working correctly. "traffic_class_num" is ignored if it is specified as part of a yaml configuration, and it is ommitted from lnetctl export output when the --backup option is specified. HPE-bug-id: LUS-10197 Test-Parameters: trivial Signed-off-by: Chris Horn Change-Id: I014a323e675d608ee5b506d8676fcc2cc78e8c4a Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50490 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Ian Ziemba Reviewed-by: Ron Gredvig Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lnet/include/uapi/linux/lnet/lnet-dlc.h | 2 ++ lnet/klnds/kfilnd/kfilnd.c | 17 +++++++++-- lnet/klnds/kfilnd/kfilnd.h | 2 ++ lnet/klnds/kfilnd/kfilnd_dom.c | 2 ++ lnet/klnds/kfilnd/kfilnd_modparams.c | 45 ++++++++++++++++++++++++++++ lnet/utils/lnetconfig/liblnd.h | 2 +- lnet/utils/lnetconfig/liblnetconfig.c | 2 +- lnet/utils/lnetconfig/liblnetconfig_lnd.c | 26 ++++++++++++++-- lnet/utils/lnetctl.c | 49 +++++++++++++++++++++++++++++-- 9 files changed, 137 insertions(+), 10 deletions(-) diff --git a/lnet/include/uapi/linux/lnet/lnet-dlc.h b/lnet/include/uapi/linux/lnet/lnet-dlc.h index a1f4bfe..10e4a19 100644 --- a/lnet/include/uapi/linux/lnet/lnet-dlc.h +++ b/lnet/include/uapi/linux/lnet/lnet-dlc.h @@ -111,6 +111,8 @@ struct lnet_ioctl_config_kfilnd_tunables { __u32 lnd_prov_major_version; __u32 lnd_prov_minor_version; __u32 lnd_auth_key; + char lnd_traffic_class_str[LNET_MAX_STR_LEN]; + __u32 lnd_traffic_class; }; struct lnet_ioctl_config_socklnd_tunables { diff --git a/lnet/klnds/kfilnd/kfilnd.c b/lnet/klnds/kfilnd/kfilnd.c index 1a311e0..1145de4 100644 --- a/lnet/klnds/kfilnd/kfilnd.c +++ b/lnet/klnds/kfilnd/kfilnd.c @@ -366,6 +366,10 @@ static const struct ln_key_list kfilnd_tunables_keys = { .lkp_value = "auth_key", .lkp_data_type = NLA_S32 }, + [LNET_NET_KFILND_TUNABLES_ATTR_TRAFFIC_CLASS] = { + .lkp_value = "traffic_class", + .lkp_data_type = NLA_STRING, + }, }, }; @@ -373,20 +377,27 @@ static int kfilnd_nl_set(int cmd, struct nlattr *attr, int type, void *data) { struct lnet_lnd_tunables *tunables = data; + struct lnet_ioctl_config_kfilnd_tunables *lnd_kfi; int rc = 0; if (cmd != LNET_CMD_NETS) return -EOPNOTSUPP; + lnd_kfi = &tunables->lnd_tun_u.lnd_kfi; + switch (type) { case LNET_NET_KFILND_TUNABLES_ATTR_PROV_MAJOR: - tunables->lnd_tun_u.lnd_kfi.lnd_prov_major_version = nla_get_s64(attr); + lnd_kfi->lnd_prov_major_version = nla_get_s64(attr); break; case LNET_NET_KFILND_TUNABLES_ATTR_PROV_MINOR: - tunables->lnd_tun_u.lnd_kfi.lnd_prov_minor_version = nla_get_s64(attr); + lnd_kfi->lnd_prov_minor_version = nla_get_s64(attr); break; case LNET_NET_KFILND_TUNABLES_ATTR_AUTH_KEY: - tunables->lnd_tun_u.lnd_kfi.lnd_auth_key = nla_get_s64(attr); + lnd_kfi->lnd_auth_key = nla_get_s64(attr); + break; + case LNET_NET_KFILND_TUNABLES_ATTR_TRAFFIC_CLASS: + rc = nla_strscpy(lnd_kfi->lnd_traffic_class_str, attr, + sizeof(lnd_kfi->lnd_traffic_class_str)); break; default: rc = -EINVAL; diff --git a/lnet/klnds/kfilnd/kfilnd.h b/lnet/klnds/kfilnd/kfilnd.h index 0939877..9165081 100644 --- a/lnet/klnds/kfilnd/kfilnd.h +++ b/lnet/klnds/kfilnd/kfilnd.h @@ -70,6 +70,7 @@ #define DEBUG_SUBSYSTEM S_LND #include +#include #include #include "kfi_endpoint.h" #include "kfi_errno.h" @@ -132,6 +133,7 @@ enum kfilnd_ni_lnd_tunables_attr { LNET_NET_KFILND_TUNABLES_ATTR_PROV_MAJOR, LNET_NET_KFILND_TUNABLES_ATTR_PROV_MINOR, LNET_NET_KFILND_TUNABLES_ATTR_AUTH_KEY, + LNET_NET_KFILND_TUNABLES_ATTR_TRAFFIC_CLASS, __LNET_NET_KFILND_TUNABLES_ATTR_MAX_PLUS_ONE, }; diff --git a/lnet/klnds/kfilnd/kfilnd_dom.c b/lnet/klnds/kfilnd/kfilnd_dom.c index a5149aa..afbf2ef 100644 --- a/lnet/klnds/kfilnd/kfilnd_dom.c +++ b/lnet/klnds/kfilnd/kfilnd_dom.c @@ -337,6 +337,8 @@ struct kfilnd_dom *kfilnd_dom_get(struct lnet_ni *ni, const char *node, hints->domain_attr->mr_iov_limit = 256; /* 1 MiB LNet message */ hints->domain_attr->mr_key_size = sizeof(int); hints->domain_attr->resource_mgmt = KFI_RM_DISABLED; + hints->domain_attr->tclass = + ni->ni_lnd_tunables.lnd_tun_u.lnd_kfi.lnd_traffic_class; hints->ep_attr->max_msg_size = LNET_MAX_PAYLOAD; hints->rx_attr->op_flags = KFI_COMPLETION | KFI_MULTI_RECV; hints->rx_attr->iov_limit = 256; /* 1 MiB LNet message */ diff --git a/lnet/klnds/kfilnd/kfilnd_modparams.c b/lnet/klnds/kfilnd/kfilnd_modparams.c index ac36cd7..22c84c9 100644 --- a/lnet/klnds/kfilnd/kfilnd_modparams.c +++ b/lnet/klnds/kfilnd/kfilnd_modparams.c @@ -98,6 +98,28 @@ static unsigned int auth_key = 255; module_param(auth_key, uint, 0444); MODULE_PARM_DESC(auth_key, "Default authorization key to be used for LNet NIs"); +static char *traffic_class = "best_effort"; +module_param(traffic_class, charp, 0444); +MODULE_PARM_DESC(traffic_class, "Traffic class - default is \"best_effort\""); + +static int +kfilnd_tcstr2num(char *tcstr) +{ + if (!strcmp(tcstr, "best_effort")) + return KFI_TC_BEST_EFFORT; + if (!strcmp(tcstr, "low_latency")) + return KFI_TC_LOW_LATENCY; + if (!strcmp(tcstr, "dedicated_access")) + return KFI_TC_DEDICATED_ACCESS; + if (!strcmp(tcstr, "bulk_data")) + return KFI_TC_BULK_DATA; + if (!strcmp(tcstr, "scavenger")) + return KFI_TC_SCAVENGER; + if (!strcmp(tcstr, "network_ctrl")) + return KFI_TC_NETWORK_CTRL; + return -1; +} + int kfilnd_tunables_setup(struct lnet_ni *ni) { struct lnet_ioctl_config_lnd_cmn_tunables *net_tunables; @@ -128,6 +150,9 @@ int kfilnd_tunables_setup(struct lnet_ni *ni) kfilnd_tunables->lnd_prov_major_version = prov_major_version; kfilnd_tunables->lnd_prov_minor_version = prov_minor_version; kfilnd_tunables->lnd_auth_key = auth_key; + if (strlen(traffic_class) < LNET_MAX_STR_LEN) + strcpy(&kfilnd_tunables->lnd_traffic_class_str[0], + traffic_class); } /* Treat kfilnd_tunables set to zero as uninitialized. */ @@ -140,6 +165,14 @@ int kfilnd_tunables_setup(struct lnet_ni *ni) if (kfilnd_tunables->lnd_auth_key == 0) kfilnd_tunables->lnd_auth_key = auth_key; + if (strlen(kfilnd_tunables->lnd_traffic_class_str) == 0 && + strlen(traffic_class) < LNET_MAX_STR_LEN) + strcpy(&kfilnd_tunables->lnd_traffic_class_str[0], + traffic_class); + + kfilnd_tunables->lnd_traffic_class = + kfilnd_tcstr2num(kfilnd_tunables->lnd_traffic_class_str); + if (net_tunables->lct_max_tx_credits > KFILND_EP_KEY_MAX) { CERROR("Credits cannot exceed %lu\n", KFILND_EP_KEY_MAX); return -EINVAL; @@ -156,6 +189,12 @@ int kfilnd_tunables_setup(struct lnet_ni *ni) return -EINVAL; } + if (kfilnd_tunables->lnd_traffic_class == -1) { + CERROR("Invalid traffic_class \"%s\" - Valid values are: best_effort, low_latency, dedicated_access, bulk_data, scavenger, and network_ctrl\n", + kfilnd_tunables->lnd_traffic_class_str); + return -EINVAL; + } + return 0; } @@ -196,5 +235,11 @@ int kfilnd_tunables_init(void) return -EINVAL; } + if (kfilnd_tcstr2num(traffic_class) == -1) { + CERROR("Invalid traffic_class \"%s\" - Valid values are: best_effort, low_latency, dedicated_access, bulk_data, scavenger, and network_ctrl\n", + traffic_class); + return -EINVAL; + } + return 0; } diff --git a/lnet/utils/lnetconfig/liblnd.h b/lnet/utils/lnetconfig/liblnd.h index 825ee05..29592f8 100644 --- a/lnet/utils/lnetconfig/liblnd.h +++ b/lnet/utils/lnetconfig/liblnd.h @@ -37,7 +37,7 @@ lustre_net_show_tunables(struct cYAML *tunables, int lustre_ni_show_tunables(struct cYAML *lnd_tunables, __u32 net_type, - struct lnet_lnd_tunables *lnd); + struct lnet_lnd_tunables *lnd, bool backup); void lustre_yaml_extract_lnd_tunables(struct cYAML *tree, diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index 208743d..b198a6d 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -2403,7 +2403,7 @@ continue_without_msg_stats: rc = lustre_ni_show_tunables(tunables, LNET_NETTYP(rc_net), - &lnd->lt_tun); + &lnd->lt_tun, backup); if (rc != LUSTRE_CFG_RC_NO_ERR && rc != LUSTRE_CFG_RC_NO_MATCH) goto out; diff --git a/lnet/utils/lnetconfig/liblnetconfig_lnd.c b/lnet/utils/lnetconfig/liblnetconfig_lnd.c index 23c0815..251e7da 100644 --- a/lnet/utils/lnetconfig/liblnetconfig_lnd.c +++ b/lnet/utils/lnetconfig/liblnetconfig_lnd.c @@ -88,7 +88,8 @@ lustre_socklnd_show_tun(struct cYAML *lndparams, #ifdef HAVE_KFILND static int lustre_kfilnd_show_tun(struct cYAML *lndparams, - struct lnet_ioctl_config_kfilnd_tunables *lnd_cfg) + struct lnet_ioctl_config_kfilnd_tunables *lnd_cfg, + bool backup) { if (cYAML_create_number(lndparams, "prov_major_version", lnd_cfg->lnd_prov_major_version) == NULL) @@ -102,6 +103,15 @@ lustre_kfilnd_show_tun(struct cYAML *lndparams, lnd_cfg->lnd_auth_key) == NULL) return LUSTRE_CFG_RC_OUT_OF_MEM; + if (cYAML_create_string(lndparams, "traffic_class", + lnd_cfg->lnd_traffic_class_str) == NULL) + return LUSTRE_CFG_RC_OUT_OF_MEM; + + if (!backup && + cYAML_create_number(lndparams, "traffic_class_num", + lnd_cfg->lnd_traffic_class) == NULL) + return LUSTRE_CFG_RC_OUT_OF_MEM; + return LUSTRE_CFG_RC_NO_ERR; } #endif @@ -140,7 +150,8 @@ out: int lustre_ni_show_tunables(struct cYAML *lnd_tunables, __u32 net_type, - struct lnet_lnd_tunables *lnd) + struct lnet_lnd_tunables *lnd, + bool backup) { int rc = LUSTRE_CFG_RC_NO_MATCH; @@ -153,8 +164,10 @@ lustre_ni_show_tunables(struct cYAML *lnd_tunables, #ifdef HAVE_KFILND else if (net_type == KFILND) rc = lustre_kfilnd_show_tun(lnd_tunables, - &lnd->lnd_tun_u.lnd_kfi); + &lnd->lnd_tun_u.lnd_kfi, + backup); #endif + return rc; } @@ -209,6 +222,7 @@ yaml_extract_kfi_tun(struct cYAML *tree, struct cYAML *prov_major_version = NULL; struct cYAML *prov_minor_version = NULL; struct cYAML *auth_key = NULL; + struct cYAML *traffic_class = NULL; struct cYAML *lndparams = NULL; lndparams = cYAML_get_object_item(tree, "lnd tunables"); @@ -228,6 +242,12 @@ yaml_extract_kfi_tun(struct cYAML *tree, auth_key = cYAML_get_object_item(lndparams, "auth_key"); lnd_cfg->lnd_auth_key = (auth_key) ? auth_key->cy_valueint : 0; + + traffic_class = cYAML_get_object_item(lndparams, "traffic_class"); + if (traffic_class && traffic_class->cy_valuestring && + strlen(traffic_class->cy_valuestring) < LNET_MAX_STR_LEN) + strcpy(&lnd_cfg->lnd_traffic_class_str[0], + traffic_class->cy_valuestring); } #endif diff --git a/lnet/utils/lnetctl.c b/lnet/utils/lnetctl.c index 72ed1a3..d72ec2e 100644 --- a/lnet/utils/lnetctl.c +++ b/lnet/utils/lnetctl.c @@ -167,7 +167,8 @@ command_t net_cmds[] = { "\t--cpt: CPU Partitions configured net uses (e.g. [0,1]\n" "\t--conns-per-peer: number of connections per peer\n" "\t--skip-mr-route-setup: do not add linux route for the ni\n" - "\t--auth-key: Network authorization key (kfilnd only)\n"}, + "\t--auth-key: Network authorization key (kfilnd only)\n" + "\t--traffic-class: Traffic class (kfilnd only)\n"}, {"del", jt_del_ni, 0, "delete a network\n" "\t--net: net name (e.g. tcp0)\n" "\t--if: physical interface (e.g. eth0)\n"}, @@ -1315,6 +1316,7 @@ skip_general_settings: if (tunables->lt_tun.lnd_tun_u.lnd_sock.lnd_conns_per_peer > 0 || #ifdef HAVE_KFILND tunables->lt_tun.lnd_tun_u.lnd_kfi.lnd_auth_key > 0 || + tunables->lt_tun.lnd_tun_u.lnd_kfi.lnd_traffic_class_str[0] || #endif tunables->lt_tun.lnd_tun_u.lnd_o2ib.lnd_conns_per_peer > 0) { yaml_scalar_event_initialize(&event, NULL, @@ -1355,6 +1357,29 @@ skip_general_settings: if (rc == 0) goto error; } + + if (tunables->lt_tun.lnd_tun_u.lnd_kfi.lnd_traffic_class_str[0]) { + char *tc = &tunables->lt_tun.lnd_tun_u.lnd_kfi.lnd_traffic_class_str[0]; + + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_STR_TAG, + (yaml_char_t *)"traffic_class", + strlen("traffic_class"), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(output, &event); + if (rc == 0) + goto error; + + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_INT_TAG, + (yaml_char_t *)tc, + strlen(tc), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + + rc = yaml_emitter_emit(output, &event); + if (rc == 0) + goto error; + } #endif if (tunables->lt_tun.lnd_tun_u.lnd_sock.lnd_conns_per_peer > 0 || tunables->lt_tun.lnd_tun_u.lnd_o2ib.lnd_conns_per_peer > 0) { @@ -1692,6 +1717,7 @@ static int jt_add_ni(int argc, char **argv) { char *ip2net = NULL; long int pto = -1, pc = -1, pbc = -1, cre = -1, cpp = -1, auth_key = -1; + char *traffic_class = NULL; struct cYAML *err_rc = NULL; int rc, opt, cpt_rc = -1; struct lnet_dlc_network_descr nw_descr; @@ -1699,7 +1725,7 @@ static int jt_add_ni(int argc, char **argv) struct lnet_ioctl_config_lnd_tunables tunables; bool found = false; bool skip_mr_route_setup = false; - const char *const short_options = "a:b:c:i:k:m:n:p:r:s:t:"; + const char *const short_options = "a:b:c:i:k:m:n:p:r:s:t:T:"; static const struct option long_options[] = { { .name = "auth-key", .has_arg = required_argument, .val = 'a' }, { .name = "peer-buffer-credits", @@ -1715,6 +1741,7 @@ static int jt_add_ni(int argc, char **argv) { .name = "credits", .has_arg = required_argument, .val = 'r' }, { .name = "cpt", .has_arg = required_argument, .val = 's' }, { .name = "peer-timeout", .has_arg = required_argument, .val = 't' }, + { .name = "traffic-class", .has_arg = required_argument, .val = 'T' }, { .name = NULL } }; char *net_id = NULL; @@ -1801,6 +1828,17 @@ static int jt_add_ni(int argc, char **argv) continue; } break; + case 'T': + traffic_class = optarg; + if (strlen(traffic_class) == 0 || + strlen(traffic_class) >= LNET_MAX_STR_LEN) { + cYAML_build_error(-1, -1, "ni", "add", + "Invalid traffic-class argument", + &err_rc); + rc = LUSTRE_CFG_RC_BAD_PARAM; + goto failed; + } + break; case '?': print_help(net_cmds, "net", "add"); default: @@ -1812,6 +1850,13 @@ static int jt_add_ni(int argc, char **argv) tunables.lt_tun.lnd_tun_u.lnd_kfi.lnd_auth_key = auth_key; found = true; } + + if (traffic_class && LNET_NETTYP(nw_descr.nw_id) == KFILND && + strlen(traffic_class) < LNET_MAX_STR_LEN) { + strcpy(&tunables.lt_tun.lnd_tun_u.lnd_kfi.lnd_traffic_class_str[0], + traffic_class); + found = true; + } #endif if (LNET_NETTYP(nw_descr.nw_id) == SOCKLND && (cpp > -1)) { -- 1.8.3.1