Whamcloud - gitweb
LU-12452 o2iblnd: allow setting IP ToS value (RoCE) 79/54079/5
authorEtienne AUJAMES <eaujames@ddn.com>
Sat, 23 Mar 2024 19:41:38 +0000 (15:41 -0400)
committerOleg Drokin <green@whamcloud.com>
Mon, 8 Apr 2024 15:35:42 +0000 (15:35 +0000)
This patch add a new tuning to set the IP "Type of Service" value for
RoCE QoS.

It adds the module parameter "tos":
...
options ko2iblnd tos=106

tos=-1 means "disable": the LND will not try to set the ToS value.

Test-Parameters: trivial testlist=sanity-lnet
Signed-off-by: Etienne AUJAMES <eaujames@ddn.com>
Change-Id: I453d2eb690ea7d1997c6151669e12085e75b629b
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54079
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/uapi/linux/lnet/lnet-dlc.h
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/o2iblnd/o2iblnd.h
lnet/klnds/o2iblnd/o2iblnd_cb.c
lnet/klnds/o2iblnd/o2iblnd_modparams.c

index 9270145..99cd63f 100644 (file)
@@ -110,6 +110,7 @@ struct lnet_ioctl_config_o2iblnd_tunables {
        __u16 lnd_conns_per_peer;
        __u16 lnd_ntx;
        __u32 lnd_timeout;
+       __s16 lnd_tos;
 };
 
 struct lnet_ioctl_config_kfilnd_tunables {
index 03fce9f..be69163 100644 (file)
@@ -1255,6 +1255,10 @@ static const struct ln_key_list kiblnd_tunables_keys = {
                        .lkp_value      = "timeout",
                        .lkp_data_type  = NLA_U32,
                },
+               [LNET_NET_O2IBLND_TUNABLES_ATTR_LND_TOS] = {
+                       .lkp_value      = "tos",
+                       .lkp_data_type  = NLA_S16,
+               },
        },
 };
 
@@ -1290,6 +1294,8 @@ kiblnd_nl_get(int cmd, struct sk_buff *msg, int type, void *data)
                    tuns->lnd_conns_per_peer);
        nla_put_u32(msg, LNET_NET_O2IBLND_TUNABLES_ATTR_LND_TIMEOUT,
                    kiblnd_timeout());
+       nla_put_s16(msg, LNET_NET_O2IBLND_TUNABLES_ATTR_LND_TOS,
+                   tuns->lnd_tos);
 
        return 0;
 }
@@ -1386,6 +1392,10 @@ kiblnd_nl_set(int cmd, struct nlattr *attr, int type, void *data)
                        tunables->lnd_tun_u.lnd_o2ib.lnd_conns_per_peer = num;
                else
                        rc = -ERANGE;
+               break;
+       case LNET_NET_O2IBLND_TUNABLES_ATTR_LND_TOS:
+               num = nla_get_s64(attr);
+               tunables->lnd_tun_u.lnd_o2ib.lnd_tos = num;
                fallthrough;
        default:
                break;
index e7a28ab..4d01da9 100644 (file)
@@ -112,6 +112,7 @@ enum kiblnd_ni_lnd_tunables_attr {
        LNET_NET_O2IBLND_TUNABLES_ATTR_NTX,
        LNET_NET_O2IBLND_TUNABLES_ATTR_CONNS_PER_PEER,
        LNET_NET_O2IBLND_TUNABLES_ATTR_LND_TIMEOUT,
+       LNET_NET_O2IBLND_TUNABLES_ATTR_LND_TOS,
        __LNET_NET_O2IBLND_TUNABLES_ATTR_MAX_PLUS_ONE,
 };
 
index 956649a..5c58a5c 100644 (file)
@@ -3254,6 +3254,20 @@ kiblnd_active_connect(struct rdma_cm_id *cmid)
         return 0;
 }
 
+/* set the IP ToS ("Type of Service") used by the RoCE QoS */
+static void
+kiblnd_set_tos(struct rdma_cm_id *cmid)
+{
+       struct kib_peer_ni *peer_ni = cmid->context;
+       struct lnet_ioctl_config_o2iblnd_tunables *t;
+
+       t = &peer_ni->ibp_ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
+       if (t->lnd_tos < 0)
+               return;
+
+       rdma_set_service_type(cmid, t->lnd_tos);
+}
+
 int
 kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 {
@@ -3295,6 +3309,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
                                event->status, cmid);
                         rc = event->status;
                } else {
+                       kiblnd_set_tos(cmid);
                        rc = rdma_resolve_route(
                                cmid, kiblnd_timeout() * 1000);
                        if (rc == 0) {
index 711583c..ffb68e5 100644 (file)
@@ -184,6 +184,22 @@ static unsigned int wrq_sge = 2;
 module_param(wrq_sge, uint, 0444);
 MODULE_PARM_DESC(wrq_sge, "# scatter/gather element per work request");
 
+static int tos = -1;
+static int param_set_tos(const char *val, cfs_kernel_param_arg_t *kp);
+#ifdef HAVE_KERNEL_PARAM_OPS
+static const struct kernel_param_ops param_ops_tos = {
+       .set = param_set_tos,
+       .get = param_get_int,
+};
+
+#define param_check_tos(name, p) \
+       __param_check(name, p, int)
+module_param(tos, tos, 0444);
+#else
+module_param_call(tos, param_set_tos, param_get_int, &tos, 0444);
+#endif
+MODULE_PARM_DESC(tos, "Set the type of service (=-1 to disable)");
+
 struct kib_tunables kiblnd_tunables = {
         .kib_dev_failover           = &dev_failover,
         .kib_service                = &service,
@@ -203,6 +219,25 @@ struct kib_tunables kiblnd_tunables = {
 
 struct lnet_ioctl_config_o2iblnd_tunables kib_default_tunables;
 
+static int param_set_tos(const char *val, cfs_kernel_param_arg_t *kp)
+{
+       int rc, t;
+
+       if (!val)
+               return -EINVAL;
+
+       rc = kstrtoint(val, 0, &t);
+       if (rc)
+               return rc;
+
+       if (t < -1 || t > 0xff)
+               return -ERANGE;
+
+       *((int *)kp->arg) = t;
+
+       return 0;
+}
+
 /* # messages/RDMAs in-flight */
 int
 kiblnd_msg_queue_size(int version, struct lnet_ni *ni)
@@ -312,10 +347,11 @@ kiblnd_tunables_setup(struct lnet_ni *ni)
                tunables->lnd_fmr_cache = fmr_cache;
        if (!tunables->lnd_ntx)
                tunables->lnd_ntx = ntx;
-       if (!tunables->lnd_conns_per_peer) {
+       if (!tunables->lnd_conns_per_peer)
                tunables->lnd_conns_per_peer = (conns_per_peer) ?
                        conns_per_peer : 1;
-       }
+       if (tunables->lnd_tos < 0)
+               tunables->lnd_tos = tos;
 
        tunables->lnd_timeout = kiblnd_timeout();
 
@@ -334,5 +370,6 @@ kiblnd_tunables_init(void)
        kib_default_tunables.lnd_fmr_cache = fmr_cache;
        kib_default_tunables.lnd_ntx = ntx;
        kib_default_tunables.lnd_conns_per_peer = conns_per_peer;
+       kib_default_tunables.lnd_tos = tos;
        return 0;
 }