From 4c809f7621425985e294c7d1da0ad6932be460af Mon Sep 17 00:00:00 2001 From: Etienne AUJAMES Date: Sat, 23 Mar 2024 15:41:38 -0400 Subject: [PATCH] LU-12452 o2iblnd: allow setting IP ToS value (RoCE) This patch add a new tuning to set the IP "Type of Service" value for RoCE QoS. It adds the module parameter "tos": ... options ko2iblnd tos=106 tos=-1 means "disable": the LND will not try to set the ToS value. Test-Parameters: trivial testlist=sanity-lnet Signed-off-by: Etienne AUJAMES Change-Id: I453d2eb690ea7d1997c6151669e12085e75b629b Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54079 Tested-by: jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Serguei Smirnov Reviewed-by: Frank Sehr Reviewed-by: Cyril Bordage Reviewed-by: Oleg Drokin --- lnet/include/uapi/linux/lnet/lnet-dlc.h | 1 + lnet/klnds/o2iblnd/o2iblnd.c | 10 ++++++++ lnet/klnds/o2iblnd/o2iblnd.h | 1 + lnet/klnds/o2iblnd/o2iblnd_cb.c | 15 ++++++++++++ lnet/klnds/o2iblnd/o2iblnd_modparams.c | 41 +++++++++++++++++++++++++++++++-- 5 files changed, 66 insertions(+), 2 deletions(-) diff --git a/lnet/include/uapi/linux/lnet/lnet-dlc.h b/lnet/include/uapi/linux/lnet/lnet-dlc.h index 9270145..99cd63f 100644 --- a/lnet/include/uapi/linux/lnet/lnet-dlc.h +++ b/lnet/include/uapi/linux/lnet/lnet-dlc.h @@ -110,6 +110,7 @@ struct lnet_ioctl_config_o2iblnd_tunables { __u16 lnd_conns_per_peer; __u16 lnd_ntx; __u32 lnd_timeout; + __s16 lnd_tos; }; struct lnet_ioctl_config_kfilnd_tunables { diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 03fce9f..be69163 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -1255,6 +1255,10 @@ static const struct ln_key_list kiblnd_tunables_keys = { .lkp_value = "timeout", .lkp_data_type = NLA_U32, }, + [LNET_NET_O2IBLND_TUNABLES_ATTR_LND_TOS] = { + .lkp_value = "tos", + .lkp_data_type = NLA_S16, + }, }, }; @@ -1290,6 +1294,8 @@ kiblnd_nl_get(int cmd, struct sk_buff *msg, int type, void *data) tuns->lnd_conns_per_peer); nla_put_u32(msg, LNET_NET_O2IBLND_TUNABLES_ATTR_LND_TIMEOUT, kiblnd_timeout()); + nla_put_s16(msg, LNET_NET_O2IBLND_TUNABLES_ATTR_LND_TOS, + tuns->lnd_tos); return 0; } @@ -1386,6 +1392,10 @@ kiblnd_nl_set(int cmd, struct nlattr *attr, int type, void *data) tunables->lnd_tun_u.lnd_o2ib.lnd_conns_per_peer = num; else rc = -ERANGE; + break; + case LNET_NET_O2IBLND_TUNABLES_ATTR_LND_TOS: + num = nla_get_s64(attr); + tunables->lnd_tun_u.lnd_o2ib.lnd_tos = num; fallthrough; default: break; diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index e7a28ab..4d01da9 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -112,6 +112,7 @@ enum kiblnd_ni_lnd_tunables_attr { LNET_NET_O2IBLND_TUNABLES_ATTR_NTX, LNET_NET_O2IBLND_TUNABLES_ATTR_CONNS_PER_PEER, LNET_NET_O2IBLND_TUNABLES_ATTR_LND_TIMEOUT, + LNET_NET_O2IBLND_TUNABLES_ATTR_LND_TOS, __LNET_NET_O2IBLND_TUNABLES_ATTR_MAX_PLUS_ONE, }; diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 956649a..5c58a5c 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -3254,6 +3254,20 @@ kiblnd_active_connect(struct rdma_cm_id *cmid) return 0; } +/* set the IP ToS ("Type of Service") used by the RoCE QoS */ +static void +kiblnd_set_tos(struct rdma_cm_id *cmid) +{ + struct kib_peer_ni *peer_ni = cmid->context; + struct lnet_ioctl_config_o2iblnd_tunables *t; + + t = &peer_ni->ibp_ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib; + if (t->lnd_tos < 0) + return; + + rdma_set_service_type(cmid, t->lnd_tos); +} + int kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) { @@ -3295,6 +3309,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) event->status, cmid); rc = event->status; } else { + kiblnd_set_tos(cmid); rc = rdma_resolve_route( cmid, kiblnd_timeout() * 1000); if (rc == 0) { diff --git a/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/lnet/klnds/o2iblnd/o2iblnd_modparams.c index 711583c..ffb68e5 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_modparams.c +++ b/lnet/klnds/o2iblnd/o2iblnd_modparams.c @@ -184,6 +184,22 @@ static unsigned int wrq_sge = 2; module_param(wrq_sge, uint, 0444); MODULE_PARM_DESC(wrq_sge, "# scatter/gather element per work request"); +static int tos = -1; +static int param_set_tos(const char *val, cfs_kernel_param_arg_t *kp); +#ifdef HAVE_KERNEL_PARAM_OPS +static const struct kernel_param_ops param_ops_tos = { + .set = param_set_tos, + .get = param_get_int, +}; + +#define param_check_tos(name, p) \ + __param_check(name, p, int) +module_param(tos, tos, 0444); +#else +module_param_call(tos, param_set_tos, param_get_int, &tos, 0444); +#endif +MODULE_PARM_DESC(tos, "Set the type of service (=-1 to disable)"); + struct kib_tunables kiblnd_tunables = { .kib_dev_failover = &dev_failover, .kib_service = &service, @@ -203,6 +219,25 @@ struct kib_tunables kiblnd_tunables = { struct lnet_ioctl_config_o2iblnd_tunables kib_default_tunables; +static int param_set_tos(const char *val, cfs_kernel_param_arg_t *kp) +{ + int rc, t; + + if (!val) + return -EINVAL; + + rc = kstrtoint(val, 0, &t); + if (rc) + return rc; + + if (t < -1 || t > 0xff) + return -ERANGE; + + *((int *)kp->arg) = t; + + return 0; +} + /* # messages/RDMAs in-flight */ int kiblnd_msg_queue_size(int version, struct lnet_ni *ni) @@ -312,10 +347,11 @@ kiblnd_tunables_setup(struct lnet_ni *ni) tunables->lnd_fmr_cache = fmr_cache; if (!tunables->lnd_ntx) tunables->lnd_ntx = ntx; - if (!tunables->lnd_conns_per_peer) { + if (!tunables->lnd_conns_per_peer) tunables->lnd_conns_per_peer = (conns_per_peer) ? conns_per_peer : 1; - } + if (tunables->lnd_tos < 0) + tunables->lnd_tos = tos; tunables->lnd_timeout = kiblnd_timeout(); @@ -334,5 +370,6 @@ kiblnd_tunables_init(void) kib_default_tunables.lnd_fmr_cache = fmr_cache; kib_default_tunables.lnd_ntx = ntx; kib_default_tunables.lnd_conns_per_peer = conns_per_peer; + kib_default_tunables.lnd_tos = tos; return 0; } -- 1.8.3.1