Whamcloud - gitweb
LU-16466 kfilnd: Allow custom traffic class per-NI 90/50490/4
authorChris Horn <chris.horn@hpe.com>
Thu, 3 Nov 2022 15:37:05 +0000 (09:37 -0600)
committerOleg Drokin <green@whamcloud.com>
Fri, 19 May 2023 07:06:41 +0000 (07:06 +0000)
Allow a traffic class to be specified per-NI. Per-NI values can be
specified via yaml configuration or the lnetctl CLI. A new kfilnd
module parameter, traffic_class, defines the default traffic class
when a per-NI value is not otherwise specified.

Here are the valid values and their associated mapping to kfabric:
 - best_effort -> KFI_TC_BEST_EFFORT
 - low_latency -> KFI_TC_LOW_LATENCY
 - dedicated_access -> KFI_TC_DEDICATED_ACCESS
 - bulk_data -> KFI_TC_BULK_DATA
 - scavenger -> KFI_TC_SCAVENGER
 - network_ctrl -> KFI_TC_NETWORK_CTRL

The default value of the traffic_class kfilnd parameter is
"best_effort".

Here's an example yaml configuration:
net:
    - net type: kfi1
      local NI(s):
        - interfaces:
              0: cxi0
          tunables:
          lnd tunables:
              traffic_class: bulk_data
    - net type: kfi2
      local NI(s):
        - interfaces:
              0: cxi1
          tunables:
          lnd tunables:
              traffic_class: low_latency

Here's an example of setting per-NI values using the lnetctl CLI:

$ lnetctl net add --net kfi1 --if cxi0 --traffic-class bulk_data
$ lnetctl net add --net kfi2 --if cxi0 --traffic-class low_latency

Both the string and numeric representation of the traffic class is
shown in the output of lnetctl net show. e.g.

$ lnetctl net show -v | egrep -e kfi -e traffic
    - net type: kfi1
        - nid: 0@kfi1
              traffic_class: bulk_data
              traffic_class_num: 515
    - net type: kfi2
        - nid: 0@kfi2
              traffic_class: low_latency
              traffic_class_num: 513
$

This is simply a debug tool to ensure the feature is working
correctly. "traffic_class_num" is ignored if it is specified as
part of a yaml configuration, and it is ommitted from lnetctl export
output when the --backup option is specified.

HPE-bug-id: LUS-10197
Test-Parameters: trivial
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I014a323e675d608ee5b506d8676fcc2cc78e8c4a
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50490
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Ian Ziemba <ian.ziemba@hpe.com>
Reviewed-by: Ron Gredvig <ron.gredvig@hpe.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/uapi/linux/lnet/lnet-dlc.h
lnet/klnds/kfilnd/kfilnd.c
lnet/klnds/kfilnd/kfilnd.h
lnet/klnds/kfilnd/kfilnd_dom.c
lnet/klnds/kfilnd/kfilnd_modparams.c
lnet/utils/lnetconfig/liblnd.h
lnet/utils/lnetconfig/liblnetconfig.c
lnet/utils/lnetconfig/liblnetconfig_lnd.c
lnet/utils/lnetctl.c

index a1f4bfe..10e4a19 100644 (file)
@@ -111,6 +111,8 @@ struct lnet_ioctl_config_kfilnd_tunables {
        __u32 lnd_prov_major_version;
        __u32 lnd_prov_minor_version;
        __u32 lnd_auth_key;
+       char lnd_traffic_class_str[LNET_MAX_STR_LEN];
+       __u32 lnd_traffic_class;
 };
 
 struct lnet_ioctl_config_socklnd_tunables {
index 1a311e0..1145de4 100644 (file)
@@ -366,6 +366,10 @@ static const struct ln_key_list kfilnd_tunables_keys = {
                        .lkp_value      = "auth_key",
                        .lkp_data_type  = NLA_S32
                },
+               [LNET_NET_KFILND_TUNABLES_ATTR_TRAFFIC_CLASS]  = {
+                       .lkp_value      = "traffic_class",
+                       .lkp_data_type  = NLA_STRING,
+               },
        },
 };
 
@@ -373,20 +377,27 @@ static int
 kfilnd_nl_set(int cmd, struct nlattr *attr, int type, void *data)
 {
        struct lnet_lnd_tunables *tunables = data;
+       struct lnet_ioctl_config_kfilnd_tunables *lnd_kfi;
        int rc = 0;
 
        if (cmd != LNET_CMD_NETS)
                return -EOPNOTSUPP;
 
+       lnd_kfi = &tunables->lnd_tun_u.lnd_kfi;
+
        switch (type) {
        case LNET_NET_KFILND_TUNABLES_ATTR_PROV_MAJOR:
-               tunables->lnd_tun_u.lnd_kfi.lnd_prov_major_version = nla_get_s64(attr);
+               lnd_kfi->lnd_prov_major_version = nla_get_s64(attr);
                break;
        case LNET_NET_KFILND_TUNABLES_ATTR_PROV_MINOR:
-               tunables->lnd_tun_u.lnd_kfi.lnd_prov_minor_version = nla_get_s64(attr);
+               lnd_kfi->lnd_prov_minor_version = nla_get_s64(attr);
                break;
        case LNET_NET_KFILND_TUNABLES_ATTR_AUTH_KEY:
-               tunables->lnd_tun_u.lnd_kfi.lnd_auth_key = nla_get_s64(attr);
+               lnd_kfi->lnd_auth_key = nla_get_s64(attr);
+               break;
+       case LNET_NET_KFILND_TUNABLES_ATTR_TRAFFIC_CLASS:
+               rc = nla_strscpy(lnd_kfi->lnd_traffic_class_str, attr,
+                                sizeof(lnd_kfi->lnd_traffic_class_str));
                break;
        default:
                rc = -EINVAL;
index 0939877..9165081 100644 (file)
@@ -70,6 +70,7 @@
 #define DEBUG_SUBSYSTEM S_LND
 
 #include <libcfs/libcfs.h>
+#include <libcfs/linux/linux-net.h>
 #include <lnet/lib-lnet.h>
 #include "kfi_endpoint.h"
 #include "kfi_errno.h"
@@ -132,6 +133,7 @@ enum kfilnd_ni_lnd_tunables_attr {
        LNET_NET_KFILND_TUNABLES_ATTR_PROV_MAJOR,
        LNET_NET_KFILND_TUNABLES_ATTR_PROV_MINOR,
        LNET_NET_KFILND_TUNABLES_ATTR_AUTH_KEY,
+       LNET_NET_KFILND_TUNABLES_ATTR_TRAFFIC_CLASS,
        __LNET_NET_KFILND_TUNABLES_ATTR_MAX_PLUS_ONE,
 };
 
index a5149aa..afbf2ef 100644 (file)
@@ -337,6 +337,8 @@ struct kfilnd_dom *kfilnd_dom_get(struct lnet_ni *ni, const char *node,
        hints->domain_attr->mr_iov_limit = 256; /* 1 MiB LNet message */
        hints->domain_attr->mr_key_size = sizeof(int);
        hints->domain_attr->resource_mgmt = KFI_RM_DISABLED;
+       hints->domain_attr->tclass =
+               ni->ni_lnd_tunables.lnd_tun_u.lnd_kfi.lnd_traffic_class;
        hints->ep_attr->max_msg_size = LNET_MAX_PAYLOAD;
        hints->rx_attr->op_flags = KFI_COMPLETION | KFI_MULTI_RECV;
        hints->rx_attr->iov_limit = 256; /* 1 MiB LNet message */
index ac36cd7..22c84c9 100644 (file)
@@ -98,6 +98,28 @@ static unsigned int auth_key = 255;
 module_param(auth_key, uint, 0444);
 MODULE_PARM_DESC(auth_key, "Default authorization key to be used for LNet NIs");
 
+static char *traffic_class = "best_effort";
+module_param(traffic_class, charp, 0444);
+MODULE_PARM_DESC(traffic_class, "Traffic class - default is \"best_effort\"");
+
+static int
+kfilnd_tcstr2num(char *tcstr)
+{
+       if (!strcmp(tcstr, "best_effort"))
+               return KFI_TC_BEST_EFFORT;
+       if (!strcmp(tcstr, "low_latency"))
+               return KFI_TC_LOW_LATENCY;
+       if (!strcmp(tcstr, "dedicated_access"))
+               return KFI_TC_DEDICATED_ACCESS;
+       if (!strcmp(tcstr, "bulk_data"))
+               return KFI_TC_BULK_DATA;
+       if (!strcmp(tcstr, "scavenger"))
+               return KFI_TC_SCAVENGER;
+       if (!strcmp(tcstr, "network_ctrl"))
+               return KFI_TC_NETWORK_CTRL;
+       return -1;
+}
+
 int kfilnd_tunables_setup(struct lnet_ni *ni)
 {
        struct lnet_ioctl_config_lnd_cmn_tunables *net_tunables;
@@ -128,6 +150,9 @@ int kfilnd_tunables_setup(struct lnet_ni *ni)
                kfilnd_tunables->lnd_prov_major_version = prov_major_version;
                kfilnd_tunables->lnd_prov_minor_version = prov_minor_version;
                kfilnd_tunables->lnd_auth_key = auth_key;
+               if (strlen(traffic_class) < LNET_MAX_STR_LEN)
+                       strcpy(&kfilnd_tunables->lnd_traffic_class_str[0],
+                              traffic_class);
        }
 
        /* Treat kfilnd_tunables set to zero as uninitialized. */
@@ -140,6 +165,14 @@ int kfilnd_tunables_setup(struct lnet_ni *ni)
        if (kfilnd_tunables->lnd_auth_key == 0)
                kfilnd_tunables->lnd_auth_key = auth_key;
 
+       if (strlen(kfilnd_tunables->lnd_traffic_class_str) == 0 &&
+           strlen(traffic_class) < LNET_MAX_STR_LEN)
+               strcpy(&kfilnd_tunables->lnd_traffic_class_str[0],
+                      traffic_class);
+
+       kfilnd_tunables->lnd_traffic_class =
+               kfilnd_tcstr2num(kfilnd_tunables->lnd_traffic_class_str);
+
        if (net_tunables->lct_max_tx_credits > KFILND_EP_KEY_MAX) {
                CERROR("Credits cannot exceed %lu\n", KFILND_EP_KEY_MAX);
                return -EINVAL;
@@ -156,6 +189,12 @@ int kfilnd_tunables_setup(struct lnet_ni *ni)
                return -EINVAL;
        }
 
+       if (kfilnd_tunables->lnd_traffic_class == -1) {
+               CERROR("Invalid traffic_class \"%s\" - Valid values are: best_effort, low_latency, dedicated_access, bulk_data, scavenger, and network_ctrl\n",
+                      kfilnd_tunables->lnd_traffic_class_str);
+               return -EINVAL;
+       }
+
        return 0;
 }
 
@@ -196,5 +235,11 @@ int kfilnd_tunables_init(void)
                return -EINVAL;
        }
 
+       if (kfilnd_tcstr2num(traffic_class) == -1) {
+               CERROR("Invalid traffic_class \"%s\" - Valid values are: best_effort, low_latency, dedicated_access, bulk_data, scavenger, and network_ctrl\n",
+                      traffic_class);
+               return -EINVAL;
+       }
+
        return 0;
 }
index 825ee05..29592f8 100644 (file)
@@ -37,7 +37,7 @@ lustre_net_show_tunables(struct cYAML *tunables,
 int
 lustre_ni_show_tunables(struct cYAML *lnd_tunables,
                        __u32 net_type,
-                       struct lnet_lnd_tunables *lnd);
+                       struct lnet_lnd_tunables *lnd, bool backup);
 
 void
 lustre_yaml_extract_lnd_tunables(struct cYAML *tree,
index 208743d..b198a6d 100644 (file)
@@ -2403,7 +2403,7 @@ continue_without_msg_stats:
 
                        rc = lustre_ni_show_tunables(tunables,
                                                     LNET_NETTYP(rc_net),
-                                                    &lnd->lt_tun);
+                                                    &lnd->lt_tun, backup);
                        if (rc != LUSTRE_CFG_RC_NO_ERR &&
                            rc != LUSTRE_CFG_RC_NO_MATCH)
                                goto out;
index 23c0815..251e7da 100644 (file)
@@ -88,7 +88,8 @@ lustre_socklnd_show_tun(struct cYAML *lndparams,
 #ifdef HAVE_KFILND
 static int
 lustre_kfilnd_show_tun(struct cYAML *lndparams,
-                       struct lnet_ioctl_config_kfilnd_tunables *lnd_cfg)
+                      struct lnet_ioctl_config_kfilnd_tunables *lnd_cfg,
+                      bool backup)
 {
        if (cYAML_create_number(lndparams, "prov_major_version",
                                lnd_cfg->lnd_prov_major_version) == NULL)
@@ -102,6 +103,15 @@ lustre_kfilnd_show_tun(struct cYAML *lndparams,
                                lnd_cfg->lnd_auth_key) == NULL)
                return LUSTRE_CFG_RC_OUT_OF_MEM;
 
+       if (cYAML_create_string(lndparams, "traffic_class",
+                               lnd_cfg->lnd_traffic_class_str) == NULL)
+               return LUSTRE_CFG_RC_OUT_OF_MEM;
+
+       if (!backup &&
+           cYAML_create_number(lndparams, "traffic_class_num",
+                               lnd_cfg->lnd_traffic_class) == NULL)
+               return LUSTRE_CFG_RC_OUT_OF_MEM;
+
        return LUSTRE_CFG_RC_NO_ERR;
 }
 #endif
@@ -140,7 +150,8 @@ out:
 int
 lustre_ni_show_tunables(struct cYAML *lnd_tunables,
                        __u32 net_type,
-                       struct lnet_lnd_tunables *lnd)
+                       struct lnet_lnd_tunables *lnd,
+                       bool backup)
 {
        int rc = LUSTRE_CFG_RC_NO_MATCH;
 
@@ -153,8 +164,10 @@ lustre_ni_show_tunables(struct cYAML *lnd_tunables,
 #ifdef HAVE_KFILND
        else if (net_type == KFILND)
                rc = lustre_kfilnd_show_tun(lnd_tunables,
-                                           &lnd->lnd_tun_u.lnd_kfi);
+                                           &lnd->lnd_tun_u.lnd_kfi,
+                                           backup);
 #endif
+
        return rc;
 }
 
@@ -209,6 +222,7 @@ yaml_extract_kfi_tun(struct cYAML *tree,
        struct cYAML *prov_major_version = NULL;
        struct cYAML *prov_minor_version = NULL;
        struct cYAML *auth_key = NULL;
+       struct cYAML *traffic_class = NULL;
        struct cYAML *lndparams = NULL;
 
        lndparams = cYAML_get_object_item(tree, "lnd tunables");
@@ -228,6 +242,12 @@ yaml_extract_kfi_tun(struct cYAML *tree,
        auth_key = cYAML_get_object_item(lndparams, "auth_key");
        lnd_cfg->lnd_auth_key =
                (auth_key) ? auth_key->cy_valueint : 0;
+
+       traffic_class = cYAML_get_object_item(lndparams, "traffic_class");
+       if (traffic_class && traffic_class->cy_valuestring &&
+           strlen(traffic_class->cy_valuestring) < LNET_MAX_STR_LEN)
+               strcpy(&lnd_cfg->lnd_traffic_class_str[0],
+                      traffic_class->cy_valuestring);
 }
 #endif
 
index 72ed1a3..d72ec2e 100644 (file)
@@ -167,7 +167,8 @@ command_t net_cmds[] = {
         "\t--cpt: CPU Partitions configured net uses (e.g. [0,1]\n"
         "\t--conns-per-peer: number of connections per peer\n"
         "\t--skip-mr-route-setup: do not add linux route for the ni\n"
-        "\t--auth-key: Network authorization key (kfilnd only)\n"},
+        "\t--auth-key: Network authorization key (kfilnd only)\n"
+        "\t--traffic-class: Traffic class (kfilnd only)\n"},
        {"del", jt_del_ni, 0, "delete a network\n"
         "\t--net: net name (e.g. tcp0)\n"
         "\t--if: physical interface (e.g. eth0)\n"},
@@ -1315,6 +1316,7 @@ skip_general_settings:
        if (tunables->lt_tun.lnd_tun_u.lnd_sock.lnd_conns_per_peer > 0 ||
 #ifdef HAVE_KFILND
            tunables->lt_tun.lnd_tun_u.lnd_kfi.lnd_auth_key > 0 ||
+           tunables->lt_tun.lnd_tun_u.lnd_kfi.lnd_traffic_class_str[0] ||
 #endif
            tunables->lt_tun.lnd_tun_u.lnd_o2ib.lnd_conns_per_peer > 0) {
                yaml_scalar_event_initialize(&event, NULL,
@@ -1355,6 +1357,29 @@ skip_general_settings:
                        if (rc == 0)
                                goto error;
                }
+
+               if (tunables->lt_tun.lnd_tun_u.lnd_kfi.lnd_traffic_class_str[0]) {
+                       char *tc = &tunables->lt_tun.lnd_tun_u.lnd_kfi.lnd_traffic_class_str[0];
+
+                       yaml_scalar_event_initialize(&event, NULL,
+                                                    (yaml_char_t *)YAML_STR_TAG,
+                                                    (yaml_char_t *)"traffic_class",
+                                                    strlen("traffic_class"), 1, 0,
+                                                    YAML_PLAIN_SCALAR_STYLE);
+                       rc = yaml_emitter_emit(output, &event);
+                       if (rc == 0)
+                               goto error;
+
+                       yaml_scalar_event_initialize(&event, NULL,
+                                                    (yaml_char_t *)YAML_INT_TAG,
+                                                    (yaml_char_t *)tc,
+                                                    strlen(tc), 1, 0,
+                                                    YAML_PLAIN_SCALAR_STYLE);
+
+                       rc = yaml_emitter_emit(output, &event);
+                       if (rc == 0)
+                               goto error;
+               }
 #endif
                if (tunables->lt_tun.lnd_tun_u.lnd_sock.lnd_conns_per_peer > 0 ||
                    tunables->lt_tun.lnd_tun_u.lnd_o2ib.lnd_conns_per_peer > 0) {
@@ -1692,6 +1717,7 @@ static int jt_add_ni(int argc, char **argv)
 {
        char *ip2net = NULL;
        long int pto = -1, pc = -1, pbc = -1, cre = -1, cpp = -1, auth_key = -1;
+       char *traffic_class = NULL;
        struct cYAML *err_rc = NULL;
        int rc, opt, cpt_rc = -1;
        struct lnet_dlc_network_descr nw_descr;
@@ -1699,7 +1725,7 @@ static int jt_add_ni(int argc, char **argv)
        struct lnet_ioctl_config_lnd_tunables tunables;
        bool found = false;
        bool skip_mr_route_setup = false;
-       const char *const short_options = "a:b:c:i:k:m:n:p:r:s:t:";
+       const char *const short_options = "a:b:c:i:k:m:n:p:r:s:t:T:";
        static const struct option long_options[] = {
        { .name = "auth-key",     .has_arg = required_argument, .val = 'a' },
        { .name = "peer-buffer-credits",
@@ -1715,6 +1741,7 @@ static int jt_add_ni(int argc, char **argv)
        { .name = "credits",      .has_arg = required_argument, .val = 'r' },
        { .name = "cpt",          .has_arg = required_argument, .val = 's' },
        { .name = "peer-timeout", .has_arg = required_argument, .val = 't' },
+       { .name = "traffic-class", .has_arg = required_argument, .val = 'T' },
        { .name = NULL } };
        char *net_id = NULL;
 
@@ -1801,6 +1828,17 @@ static int jt_add_ni(int argc, char **argv)
                                continue;
                        }
                        break;
+               case 'T':
+                       traffic_class = optarg;
+                       if (strlen(traffic_class) == 0 ||
+                           strlen(traffic_class) >= LNET_MAX_STR_LEN) {
+                               cYAML_build_error(-1, -1, "ni", "add",
+                                                 "Invalid traffic-class argument",
+                                                 &err_rc);
+                               rc = LUSTRE_CFG_RC_BAD_PARAM;
+                               goto failed;
+                       }
+                       break;
                case '?':
                        print_help(net_cmds, "net", "add");
                default:
@@ -1812,6 +1850,13 @@ static int jt_add_ni(int argc, char **argv)
                tunables.lt_tun.lnd_tun_u.lnd_kfi.lnd_auth_key = auth_key;
                found = true;
        }
+
+       if (traffic_class && LNET_NETTYP(nw_descr.nw_id) == KFILND &&
+           strlen(traffic_class) < LNET_MAX_STR_LEN) {
+               strcpy(&tunables.lt_tun.lnd_tun_u.lnd_kfi.lnd_traffic_class_str[0],
+                      traffic_class);
+               found = true;
+       }
 #endif
 
        if (LNET_NETTYP(nw_descr.nw_id) == SOCKLND && (cpp > -1)) {