Whamcloud - gitweb
LU-16709 lnet: fix locking multiple NIDs of the MR peer 86/55486/3
authorSerguei Smirnov <ssmirnov@whamcloud.com>
Tue, 4 Apr 2023 21:02:51 +0000 (14:02 -0700)
committerOleg Drokin <green@whamcloud.com>
Sat, 22 Jun 2024 06:40:32 +0000 (06:40 +0000)
If Lustre identifies the same peer with multiple NIDs,
as a result of peer discovery it is possible that
the discovered peer is found to contain a NID which is locked
as primary by a different existing peer record.
In this case it is safe to merge the peer records,
but the NID which got locked the earliest should be
kept as primary.

This allows for the first of the two locked NIDs
to stay primary as intended for the purpose of communicating
with Lustre even if peer discovery succeeded
using a different NID of MR peer.

Lustre-change: https://review.whamcloud.com/50530
Lustre-commit: 3b7a02ee4d656b7b3e044713681da2f56dddb152

Fixes: aacb16191a ("LU-14668 lnet: Lock primary NID logic")
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Change-Id: Iec9f8b70053fe24cddee552358500dfad0234b7f
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55486
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/lnet/lib-types.h
lnet/include/uapi/linux/lnet/libcfs_ioctl.h
lnet/lnet/api-ni.c
lnet/lnet/peer.c
lnet/utils/lnetconfig/liblnetconfig.c
lnet/utils/lnetconfig/liblnetconfig.h
lnet/utils/lnetctl.c
lustre/tests/sanity-lnet.sh

index 0df6857..800837c 100644 (file)
@@ -757,6 +757,9 @@ struct lnet_peer {
 
        /* cached peer aliveness */
        bool                    lp_alive;
+
+       /* timestamp of primary nid lock */
+       __u64                   lp_prim_lock_ts;
 };
 
 /*
index 1bcf47b..d1ae4dc 100644 (file)
@@ -155,7 +155,8 @@ struct libcfs_ioctl_data {
 #define IOC_LIBCFS_GET_CONST_UDSP_INFO    _IOWR(IOC_LIBCFS_TYPE, 109, IOCTL_CONFIG_SIZE)
 #define IOC_LIBCFS_RESET_LNET_STATS       _IOWR(IOC_LIBCFS_TYPE, 110, IOCTL_CONFIG_SIZE)
 #define IOC_LIBCFS_SET_CONNS_PER_PEER     _IOWR(IOC_LIBCFS_TYPE, 111, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_MAX_NR                                        111
+#define IOC_LIBCFS_SET_PEER               _IOWR(IOC_LIBCFS_TYPE, 112, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_MAX_NR                                        112
 
 extern int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data);
 
index fa7d702..1f9369e 100644 (file)
@@ -4304,6 +4304,30 @@ LNetCtl(unsigned int cmd, void *arg)
                return 0;
        }
 
+       case IOC_LIBCFS_SET_PEER: {
+               struct lnet_ioctl_peer_cfg *cfg = arg;
+               struct lnet_peer *lp;
+
+               if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
+                       return -EINVAL;
+
+               mutex_lock(&the_lnet.ln_api_mutex);
+               lnet_nid4_to_nid(cfg->prcfg_prim_nid, &nid);
+               lp = lnet_find_peer(&nid);
+               if (!lp) {
+                       mutex_unlock(&the_lnet.ln_api_mutex);
+                       return -ENOENT;
+               }
+               spin_lock(&lp->lp_lock);
+               lp->lp_state = cfg->prcfg_state;
+               spin_unlock(&lp->lp_lock);
+               lnet_peer_decref_locked(lp);
+               mutex_unlock(&the_lnet.ln_api_mutex);
+               CDEBUG(D_NET, "Set peer %s state to %u\n",
+                      libcfs_nid2str(cfg->prcfg_prim_nid), cfg->prcfg_state);
+               return 0;
+       }
+
        case IOC_LIBCFS_SET_CONNS_PER_PEER: {
                struct lnet_ioctl_reset_conns_per_peer_cfg *cfg = arg;
                int value;
index c2a54a5..7064452 100644 (file)
@@ -1498,8 +1498,10 @@ LNetPrimaryNID(lnet_nid_t nid)
         */
 again:
        spin_lock(&lp->lp_lock);
-       if (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY) && lock_prim_nid)
+       if (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY) && lock_prim_nid) {
                lp->lp_state |= LNET_PEER_LOCK_PRIMARY;
+               lp->lp_prim_lock_ts = ktime_get_ns();
+       }
 
        /* DD disabled, nothing to do */
        if (lnet_peer_discovery_disabled) {
@@ -1646,8 +1648,10 @@ lnet_peer_attach_peer_ni(struct lnet_peer *lp,
                        lnet_peer_clr_non_mr_pref_nids(lp);
                }
        }
-       if (flags & LNET_PEER_LOCK_PRIMARY)
+       if (flags & LNET_PEER_LOCK_PRIMARY) {
                lp->lp_state |= LNET_PEER_LOCK_PRIMARY;
+               lp->lp_prim_lock_ts = ktime_get_ns();
+       }
        spin_unlock(&lp->lp_lock);
 
        lp->lp_nnis++;
@@ -1836,24 +1840,53 @@ lnet_peer_add_nid(struct lnet_peer *lp, lnet_nid_t nid4, unsigned int flags)
                        struct lnet_peer *lp2 =
                                lpni->lpni_peer_net->lpn_peer;
                        int rtr_refcount = lp2->lp_rtr_refcount;
-
-                       /* If the new peer that this NID belongs to is
-                        * a primary NID for another peer which we're
-                        * suppose to preserve the Primary for then we
-                        * don't want to mess with it. But the
-                        * configuration is wrong at this point, so we
-                        * should flag both of these peers as in a bad
+                       unsigned int peer2_state;
+                       __u64 peer2_prim_lock_ts;
+
+                       /* If there's another peer that this NID belongs to
+                        * and the primary NID for that peer is locked,
+                        * then, unless it is the only NID, we don't want
+                        * to mess with it.
+                        * But the configuration is wrong at this point,
+                        * so we should flag both of these peers as in a bad
                         * state
                         */
-                       if (lp2->lp_state & LNET_PEER_LOCK_PRIMARY) {
+                       spin_lock(&lp2->lp_lock);
+                       if (lp2->lp_state & LNET_PEER_LOCK_PRIMARY &&
+                           lp2->lp_nnis > 1) {
+                               lp2->lp_state |= LNET_PEER_BAD_CONFIG;
+                               spin_unlock(&lp2->lp_lock);
                                spin_lock(&lp->lp_lock);
                                lp->lp_state |= LNET_PEER_BAD_CONFIG;
                                spin_unlock(&lp->lp_lock);
-                               spin_lock(&lp2->lp_lock);
-                               lp2->lp_state |= LNET_PEER_BAD_CONFIG;
-                               spin_unlock(&lp2->lp_lock);
+                               CERROR("Peer %s NID %s is already locked with peer %s\n",
+                                       libcfs_nidstr(&lp->lp_primary_nid),
+                                       libcfs_nidstr(&nid),
+                                       libcfs_nidstr(&lp2->lp_primary_nid));
                                goto out_free_lpni;
                        }
+                       peer2_state = lp2->lp_state;
+                       peer2_prim_lock_ts = lp2->lp_prim_lock_ts;
+                       spin_unlock(&lp2->lp_lock);
+
+                       /* NID which got locked the earliest should be
+                        * kept as primary. In case if the peers were
+                        * created by Lustre, this allows the
+                        * first listed NID to stay primary as intended
+                        * for the purpose of communicating with Lustre
+                        * even if peer discovery succeeded using
+                        * a different NID of MR peer.
+                        */
+                       spin_lock(&lp->lp_lock);
+                       if (peer2_state & LNET_PEER_LOCK_PRIMARY &&
+                           ((lp->lp_state & LNET_PEER_LOCK_PRIMARY &&
+                           peer2_prim_lock_ts < lp->lp_prim_lock_ts) ||
+                            !(lp->lp_state & LNET_PEER_LOCK_PRIMARY))) {
+                               lp->lp_prim_lock_ts = peer2_prim_lock_ts;
+                               lp->lp_primary_nid = nid;
+                               lp->lp_state |= LNET_PEER_LOCK_PRIMARY;
+                       }
+                       spin_unlock(&lp->lp_lock);
                        /*
                         * if we're trying to delete a router it means
                         * we're moving this peer NI to a new peer so must
index bbf5d38..eea8d42 100644 (file)
@@ -1838,6 +1838,31 @@ lustre_lnet_config_healthv(int value, bool all, lnet_nid_t nid,
        return rc;
 }
 
+static int
+lustre_lnet_config_peer(int state, lnet_nid_t nid, char *name,
+                       int seq_no, struct cYAML **err_rc)
+{
+       struct lnet_ioctl_peer_cfg data;
+       int rc = LUSTRE_CFG_RC_NO_ERR;
+       char err_str[LNET_MAX_STR_LEN] = "\"success\"";
+
+       LIBCFS_IOC_INIT_V2(data, prcfg_hdr);
+       data.prcfg_state = state;
+       data.prcfg_prim_nid = nid;
+       data.prcfg_cfg_nid = LNET_NID_ANY;
+
+       rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_SET_PEER, &data);
+       if (rc != 0) {
+               rc = -errno;
+               snprintf(err_str,
+                        sizeof(err_str), "Can not set peer property: %s",
+                        strerror(errno));
+       }
+
+       cYAML_build_error(rc, seq_no, ADD_CMD, name, err_str, err_rc);
+
+       return rc;
+}
 
 static int
 lustre_lnet_config_conns_per_peer(int value, bool all, lnet_nid_t nid,
@@ -1900,6 +1925,19 @@ int lustre_lnet_config_peer_ni_healthv(int value, bool all, char *lpni_nid,
                                          "peer_ni healthv", seq_no, err_rc);
 }
 
+int lustre_lnet_set_peer_state(int state, char *lpni_nid, int seq_no,
+                              struct cYAML **err_rc)
+{
+       lnet_nid_t nid;
+
+       if (lpni_nid)
+               nid = libcfs_str2nid(lpni_nid);
+       else
+               nid = LNET_NID_ANY;
+       return lustre_lnet_config_peer(state, nid, "peer state", seq_no,
+                                      err_rc);
+}
+
 int lustre_lnet_config_ni_conns_per_peer(int value, bool all, char *ni_nid,
                                         int seq_no, struct cYAML **err_rc)
 {
index 2c5397d..81bd6ff 100644 (file)
@@ -933,4 +933,14 @@ int lustre_lnet_del_udsp(unsigned int idx, int seq_no, struct cYAML **err_rc);
 int lustre_lnet_show_udsp(int idx, int seq_no, struct cYAML **show_rc,
                          struct cYAML **err_rc);
 
+/* lustre_lnet_set_peer_state
+ *     set peer state
+ *     lpni_nid - primary nid of the peer
+ *     seq_no - sequence number of the request
+ *     err_rc - [OUT] struct cYAML tree describing the error. Freed by
+ *     caller
+ */
+int lustre_lnet_set_peer_state(int state, char *lpni_nid, int seq_no,
+                              struct cYAML **err_rc);
+
 #endif /* LIB_LNET_CONFIG_API_H */
index 0f89e20..cae48d8 100644 (file)
@@ -274,7 +274,8 @@ command_t peer_cmds[] = {
        {"set", jt_set_peer_ni_value, 0, "set peer ni specific parameter\n"
         "\t--nid: Peer NI NID to set the\n"
         "\t--health: specify health value to set\n"
-        "\t--all: set all peer_nis values to the one specified\n"},
+        "\t--all: set all peer_nis values to the one specified\n"
+        "\t--state: set peer state (DANGEROUS: for test/debug only)"},
        { 0, 0, 0, NULL }
 };
 
@@ -1334,13 +1335,15 @@ static int set_value_helper(int argc, char **argv,
        char *nid = NULL;
        long int healthv = -1;
        bool all = false;
+       long int state = -1;
        int rc, opt;
        struct cYAML *err_rc = NULL;
 
-       const char *const short_options = "t:n:a";
+       const char *const short_options = "t:n:s:a";
        static const struct option long_options[] = {
                { .name = "nid", .has_arg = required_argument, .val = 'n' },
                { .name = "health", .has_arg = required_argument, .val = 't' },
+               { .name = "state", .has_arg = required_argument, .val = 's' },
                { .name = "all", .has_arg = no_argument, .val = 'a' },
                { .name = NULL } };
 
@@ -1358,6 +1361,10 @@ static int set_value_helper(int argc, char **argv,
                        if (parse_long(optarg, &healthv) != 0)
                                healthv = -1;
                        break;
+               case 's':
+                       if (parse_long(optarg, &state) != 0)
+                               state = -1;
+                       break;
                case 'a':
                        all = true;
                        break;
@@ -1366,7 +1373,10 @@ static int set_value_helper(int argc, char **argv,
                }
        }
 
-       rc = cb(healthv, all, nid, -1, &err_rc);
+       if (state > -1)
+               rc = lustre_lnet_set_peer_state(state, nid, -1, &err_rc);
+       else
+               rc = cb(healthv, all, nid, -1, &err_rc);
 
        if (rc != LUSTRE_CFG_RC_NO_ERR)
                cYAML_print_tree2file(stderr, err_rc);
index 40e3a24..588c576 100755 (executable)
@@ -2946,6 +2946,74 @@ test_300() {
 }
 run_test 300 "packaged LNet UAPI headers can be compiled"
 
+test_304() {
+       [[ ${NETTYPE} == tcp* ]] || skip "Need tcp NETTYPE"
+
+       cleanup_netns || error "Failed to cleanup netns before test execution"
+       cleanup_lnet || error "Failed to unload modules before test execution"
+
+       setup_fakeif || error "Failed to add fake IF"
+       have_interface "$FAKE_IF" ||
+               error "Expect $FAKE_IF configured but not found"
+
+       reinit_dlc || return $?
+
+       add_net "tcp" "${INTERFACES[0]}" || return $?
+       add_net "tcp" "$FAKE_IF" || return $?
+
+       local nid1=$(lctl list_nids | head -n 1)
+       local nid2=$(lctl list_nids | tail --lines 1)
+
+       check_ni_status "$nid1" up
+       check_ni_status "$nid2" up
+
+       do_lnetctl peer add --prim_nid ${nid2} --lock_prim ||
+               error "peer add failed $?"
+       local locked_peer_state=($(do_lnetctl peer show -v 4 --nid ${nid2} |
+               awk '/peer state/{print $NF}'))
+
+       # Expect peer state bits:
+       #   LNET_PEER_MULTI_RAIL(0) | LNET_PEER_CONFIGURED(3) |
+       #   LNET_PEER_LOCK_PRIMARY(20)
+       (( $locked_peer_state != "1048585")) &&
+               error "Wrong peer state \"$locked_peer_state\" expected 1048585"
+
+       # Clear LNET_PEER_CONFIGURED bit and verify
+       do_lnetctl peer set --nid ${nid2} --state 1048577 ||
+               error "peer add failed $?"
+       locked_peer_state=($(do_lnetctl peer show -v 4 --nid ${nid2} |
+               awk '/peer state/{print $NF}'))
+       (( $locked_peer_state != "1048577")) &&
+               error "Wrong peer state \"$locked_peer_state\" expected 1048577"
+       do_lnetctl discover ${nid1} ||
+               error "Failed to discover peer"
+
+       # Expect nid2 and nid1 peer entries to be consolidated,
+       # nid2 to stay primary
+       cat <<EOF >> $TMP/sanity-lnet-$testnum-expected.yaml
+peer:
+    - primary nid: ${nid2}
+      Multi-Rail: True
+      peer ni:
+        - nid: ${nid1}
+          state: NA
+        - nid: ${nid2}
+          state: NA
+EOF
+       $LNETCTL peer show > $TMP/sanity-lnet-$testnum-actual.yaml
+       compare_yaml_files ||
+               error "Unexpected peer configuration"
+
+       locked_peer_state=($(do_lnetctl peer show -v 4 --nid ${nid2} |
+               awk '/peer state/{print $NF}'))
+       # Expect peer state bits to be added:
+       #   LNET_PEER_DISCOVERED(4) | LNET_PEER_NIDS_UPTODATE(8)
+       (( $locked_peer_state != "1048849")) &&
+               error "Wrong peer state \"$locked_peer_state\" expected 1048849"
+       return 0
+}
+run_test 304 "Check locked primary peer nid consolidation"
+
 complete $SECONDS
 
 cleanup_testsuite