Whamcloud - gitweb
LU-10391 lnet: missing some peer functionality 18/53018/3
authorJames Simmons <jsimmons@infradead.org>
Fri, 10 Nov 2023 14:30:22 +0000 (09:30 -0500)
committerOleg Drokin <green@whamcloud.com>
Wed, 29 Nov 2023 21:25:08 +0000 (21:25 +0000)
For peers if we encounter a bad setup in the peer nis
settings for creation we need to cleanup the entire
peer setup.

For the peers API if one of the peer nis is the same as
the primary nid then treat it as tearing down all peer nis
in the peer deletion case.

Change-Id: I57d2a63a9e31860a5ad7587f73f159a9cad2b3c9
Test-Parameters: trivial testlist=sanity-lnet
Fixes: 8a0fdfa0b28 ("LU-10391 lnet: migrate peer NI control to Netlink")
Signed-off-by: James Simmons <jsimmons@infradead.org>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53018
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/lnet/api-ni.c
lustre/tests/sanity-lnet.sh

index d1b98df..d0cacdd 100644 (file)
@@ -6211,12 +6211,12 @@ out:
 }
 
 /* Called with ln_api_mutex */
-static int lnet_parse_peer_nis(struct nlattr *rlist, struct lnet_nid *pnid,
-                              bool mr, struct genl_info *info)
+static int lnet_parse_peer_nis(struct nlattr *rlist, struct genl_info *info,
+                              struct lnet_nid *pnid, bool mr,
+                              bool *create_some)
 {
        struct lnet_nid snid = LNET_ANY_NID;
        struct nlattr *props;
-       bool all = false;
        int rem, rc = 0;
        s64 num = -1;
 
@@ -6247,8 +6247,8 @@ static int lnet_parse_peer_nis(struct nlattr *rlist, struct lnet_nid *pnid,
                                GOTO(report_err, rc);
                        }
 
-                       if (LNET_NID_IS_ANY(&snid))
-                               all = true;
+                       if (LNET_NID_IS_ANY(&snid) || nid_same(&snid, pnid))
+                               *create_some = false;
                } else if (nla_strcmp(props, "health stats") == 0) {
                        struct nlattr *health;
                        int rem2;
@@ -6284,7 +6284,7 @@ static int lnet_parse_peer_nis(struct nlattr *rlist, struct lnet_nid *pnid,
        }
 
        if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE && num != -1) {
-               lnet_peer_ni_set_healthv(pnid, num, all);
+               lnet_peer_ni_set_healthv(pnid, num, !*create_some);
        } else if (info->nlhdr->nlmsg_flags & NLM_F_CREATE) {
                bool lock_prim = info->nlhdr->nlmsg_flags & NLM_F_EXCL;
 
@@ -6292,7 +6292,7 @@ static int lnet_parse_peer_nis(struct nlattr *rlist, struct lnet_nid *pnid,
                if (rc < 0)
                        GENL_SET_ERR_MSG(info,
                                         "failed to add peer");
-       } else if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) {
+       } else if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE) && *create_some) {
                bool force = info->nlhdr->nlmsg_flags & NLM_F_EXCL;
 
                rc = lnet_del_peer_ni(pnid, &snid, force);
@@ -6310,6 +6310,7 @@ static int lnet_peer_ni_cmd(struct sk_buff *skb, struct genl_info *info)
        struct genlmsghdr *gnlh = nlmsg_data(nlh);
        struct nlattr *params = genlmsg_data(gnlh);
        int msg_len, rem, rc = 0;
+       struct lnet_nid pnid;
        struct nlattr *attr;
 
        mutex_lock(&the_lnet.ln_api_mutex);
@@ -6333,7 +6334,6 @@ static int lnet_peer_ni_cmd(struct sk_buff *skb, struct genl_info *info)
        }
 
        nla_for_each_nested(attr, params, rem) {
-               struct lnet_nid pnid = LNET_ANY_NID;
                bool parse_peer_nis = false;
                struct nlattr *pnid_prop;
                int rem2;
@@ -6341,6 +6341,7 @@ static int lnet_peer_ni_cmd(struct sk_buff *skb, struct genl_info *info)
                if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
                        continue;
 
+               pnid = LNET_ANY_NID;
                nla_for_each_nested(pnid_prop, attr, rem2) {
                        bool mr = true;
 
@@ -6439,13 +6440,14 @@ static int lnet_peer_ni_cmd(struct sk_buff *skb, struct genl_info *info)
                                        GOTO(report_err, rc = -EINVAL);
                                }
 
+                               parse_peer_nis = true;
                                nla_for_each_nested(rlist, pnid_prop, rem3) {
-                                       rc = lnet_parse_peer_nis(rlist, &pnid,
-                                                                mr, info);
+                                       rc = lnet_parse_peer_nis(rlist, info,
+                                                                &pnid, mr,
+                                                                &parse_peer_nis);
                                        if (rc < 0)
                                                GOTO(report_err, rc);
                                }
-                               parse_peer_nis = true;
                        }
                }
 
@@ -6465,12 +6467,19 @@ static int lnet_peer_ni_cmd(struct sk_buff *skb, struct genl_info *info)
                                              force);
                        if (rc < 0) {
                                GENL_SET_ERR_MSG(info,
-                                                "failed to del peer");
+                                                "failed to del primary peer");
                                GOTO(report_err, rc);
                        }
                }
        }
 report_err:
+       /* If we failed on creation and encounter a latter error then
+        * delete the primary nid.
+        */
+       if (rc < 0 && info->nlhdr->nlmsg_flags & NLM_F_CREATE &&
+           !LNET_NID_IS_ANY(&pnid))
+               lnet_del_peer_ni(&pnid, &LNET_ANY_NID,
+                                info->nlhdr->nlmsg_flags & NLM_F_EXCL);
        mutex_unlock(&the_lnet.ln_api_mutex);
 
        return rc;
index 356ca68..c8db85c 100755 (executable)
@@ -451,6 +451,12 @@ test_7() {
                --nid [8-12]@gni,7.7.7.[1-4]@tcp,7.7.7.[5-9]@o2ib,[1-5]@kfi ||
                error "Peer add failed $?"
        compare_peer_del "7@gni"
+
+       echo "Delete peer that contains nid equal to primary nid"
+       do_lnetctl peer add --prim 1.1.1.1@tcp \
+               --nid 1.1.1.1@tcp,2.2.2.2@tcp,3.3.3.3@tcp,4.4.4.4@tcp ||
+               error "Peer add failed $?"
+       compare_peer_del "1.1.1.1@tcp" "1.1.1.1@tcp,2.2.2.2@tcp,3.3.3.3@tcp,4.4.4.4@tcp"
 }
 run_test 7 "Various peer delete tests"