From 774c3d2883c86117e9e2da992843756a5b721064 Mon Sep 17 00:00:00 2001 From: James Simmons Date: Fri, 10 Nov 2023 09:30:22 -0500 Subject: [PATCH] LU-10391 lnet: missing some peer functionality For peers if we encounter a bad setup in the peer nis settings for creation we need to cleanup the entire peer setup. For the peers API if one of the peer nis is the same as the primary nid then treat it as tearing down all peer nis in the peer deletion case. Change-Id: I57d2a63a9e31860a5ad7587f73f159a9cad2b3c9 Test-Parameters: trivial testlist=sanity-lnet Fixes: 8a0fdfa0b28 ("LU-10391 lnet: migrate peer NI control to Netlink") Signed-off-by: James Simmons Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53018 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Chris Horn Reviewed-by: Serguei Smirnov Reviewed-by: Oleg Drokin --- lnet/lnet/api-ni.c | 33 +++++++++++++++++++++------------ lustre/tests/sanity-lnet.sh | 6 ++++++ 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index d1b98df..d0cacdd 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -6211,12 +6211,12 @@ out: } /* Called with ln_api_mutex */ -static int lnet_parse_peer_nis(struct nlattr *rlist, struct lnet_nid *pnid, - bool mr, struct genl_info *info) +static int lnet_parse_peer_nis(struct nlattr *rlist, struct genl_info *info, + struct lnet_nid *pnid, bool mr, + bool *create_some) { struct lnet_nid snid = LNET_ANY_NID; struct nlattr *props; - bool all = false; int rem, rc = 0; s64 num = -1; @@ -6247,8 +6247,8 @@ static int lnet_parse_peer_nis(struct nlattr *rlist, struct lnet_nid *pnid, GOTO(report_err, rc); } - if (LNET_NID_IS_ANY(&snid)) - all = true; + if (LNET_NID_IS_ANY(&snid) || nid_same(&snid, pnid)) + *create_some = false; } else if (nla_strcmp(props, "health stats") == 0) { struct nlattr *health; int rem2; @@ -6284,7 +6284,7 @@ static int lnet_parse_peer_nis(struct nlattr *rlist, struct lnet_nid *pnid, } if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE && num != -1) { - lnet_peer_ni_set_healthv(pnid, num, all); + lnet_peer_ni_set_healthv(pnid, num, !*create_some); } else if (info->nlhdr->nlmsg_flags & NLM_F_CREATE) { bool lock_prim = info->nlhdr->nlmsg_flags & NLM_F_EXCL; @@ -6292,7 +6292,7 @@ static int lnet_parse_peer_nis(struct nlattr *rlist, struct lnet_nid *pnid, if (rc < 0) GENL_SET_ERR_MSG(info, "failed to add peer"); - } else if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) { + } else if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE) && *create_some) { bool force = info->nlhdr->nlmsg_flags & NLM_F_EXCL; rc = lnet_del_peer_ni(pnid, &snid, force); @@ -6310,6 +6310,7 @@ static int lnet_peer_ni_cmd(struct sk_buff *skb, struct genl_info *info) struct genlmsghdr *gnlh = nlmsg_data(nlh); struct nlattr *params = genlmsg_data(gnlh); int msg_len, rem, rc = 0; + struct lnet_nid pnid; struct nlattr *attr; mutex_lock(&the_lnet.ln_api_mutex); @@ -6333,7 +6334,6 @@ static int lnet_peer_ni_cmd(struct sk_buff *skb, struct genl_info *info) } nla_for_each_nested(attr, params, rem) { - struct lnet_nid pnid = LNET_ANY_NID; bool parse_peer_nis = false; struct nlattr *pnid_prop; int rem2; @@ -6341,6 +6341,7 @@ static int lnet_peer_ni_cmd(struct sk_buff *skb, struct genl_info *info) if (nla_type(attr) != LN_SCALAR_ATTR_LIST) continue; + pnid = LNET_ANY_NID; nla_for_each_nested(pnid_prop, attr, rem2) { bool mr = true; @@ -6439,13 +6440,14 @@ static int lnet_peer_ni_cmd(struct sk_buff *skb, struct genl_info *info) GOTO(report_err, rc = -EINVAL); } + parse_peer_nis = true; nla_for_each_nested(rlist, pnid_prop, rem3) { - rc = lnet_parse_peer_nis(rlist, &pnid, - mr, info); + rc = lnet_parse_peer_nis(rlist, info, + &pnid, mr, + &parse_peer_nis); if (rc < 0) GOTO(report_err, rc); } - parse_peer_nis = true; } } @@ -6465,12 +6467,19 @@ static int lnet_peer_ni_cmd(struct sk_buff *skb, struct genl_info *info) force); if (rc < 0) { GENL_SET_ERR_MSG(info, - "failed to del peer"); + "failed to del primary peer"); GOTO(report_err, rc); } } } report_err: + /* If we failed on creation and encounter a latter error then + * delete the primary nid. + */ + if (rc < 0 && info->nlhdr->nlmsg_flags & NLM_F_CREATE && + !LNET_NID_IS_ANY(&pnid)) + lnet_del_peer_ni(&pnid, &LNET_ANY_NID, + info->nlhdr->nlmsg_flags & NLM_F_EXCL); mutex_unlock(&the_lnet.ln_api_mutex); return rc; diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index 356ca68..c8db85c 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -451,6 +451,12 @@ test_7() { --nid [8-12]@gni,7.7.7.[1-4]@tcp,7.7.7.[5-9]@o2ib,[1-5]@kfi || error "Peer add failed $?" compare_peer_del "7@gni" + + echo "Delete peer that contains nid equal to primary nid" + do_lnetctl peer add --prim 1.1.1.1@tcp \ + --nid 1.1.1.1@tcp,2.2.2.2@tcp,3.3.3.3@tcp,4.4.4.4@tcp || + error "Peer add failed $?" + compare_peer_del "1.1.1.1@tcp" "1.1.1.1@tcp,2.2.2.2@tcp,3.3.3.3@tcp,4.4.4.4@tcp" } run_test 7 "Various peer delete tests" -- 1.8.3.1