Whamcloud - gitweb
LU-14939 lnet: Allow specifying a source NID for lnetctl ping 27/44727/5
authorChris Horn <chris.horn@hpe.com>
Thu, 12 Aug 2021 16:26:07 +0000 (11:26 -0500)
committerOleg Drokin <green@whamcloud.com>
Tue, 30 Nov 2021 03:47:22 +0000 (03:47 +0000)
Add a new --source option for lnetctl ping command. This allows the
user to specify a local NI from which to send the ping. This also
ensures that the specified destination NID is also used. Otherwise,
pings to multi-rail peers may end up going to a different peer NI
based on the multi-rail selection algorithm. The ability to specify
a source NI, and thus fix the destination NI, is a great help in
troubleshooting communication issues between multi-rail peers.

Add test to exercise lnetctl ping --source option.

HPE-bug-id: LUS-10296
Test-Parameters: trivial testlist=sanity-lnet
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I454217b30a92414de537880f076a11a693b1f0b3
Reviewed-on: https://review.whamcloud.com/44727
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andriy Skulysh <andriy.skulysh@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/uapi/linux/lnet/lnet-dlc.h
lnet/lnet/api-ni.c
lnet/utils/lnetconfig/liblnetconfig.c
lnet/utils/lnetconfig/liblnetconfig.h
lnet/utils/lnetctl.c
lustre/tests/sanity-lnet.sh

index fccadbe..2b2c05f 100644 (file)
@@ -132,6 +132,7 @@ struct lnet_ioctl_ping_data {
        __u32 mr_info;
        struct lnet_process_id ping_id;
        struct lnet_process_id __user *ping_buf;
        __u32 mr_info;
        struct lnet_process_id ping_id;
        struct lnet_process_id __user *ping_buf;
+       lnet_nid_t ping_src;
 };
 
 struct lnet_ioctl_config_data {
 };
 
 struct lnet_ioctl_config_data {
index f7be169..7598c8d 100644 (file)
@@ -261,8 +261,9 @@ static void lnet_set_lnd_timeout(void)
  */
 static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0);
 
  */
 static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0);
 
-static int lnet_ping(struct lnet_process_id id, signed long timeout,
-                    struct lnet_process_id __user *ids, int n_ids);
+static int lnet_ping(struct lnet_process_id id, lnet_nid_t src_nid,
+                    signed long timeout, struct lnet_process_id __user *ids,
+                    int n_ids);
 
 static int lnet_discover(struct lnet_process_id id, __u32 force,
                         struct lnet_process_id __user *ids, int n_ids);
 
 static int lnet_discover(struct lnet_process_id id, __u32 force,
                         struct lnet_process_id __user *ids, int n_ids);
@@ -4334,7 +4335,7 @@ LNetCtl(unsigned int cmd, void *arg)
                else
                        timeout = nsecs_to_jiffies(data->ioc_u32[1] * NSEC_PER_MSEC);
 
                else
                        timeout = nsecs_to_jiffies(data->ioc_u32[1] * NSEC_PER_MSEC);
 
-               rc = lnet_ping(id, timeout, data->ioc_pbuf1,
+               rc = lnet_ping(id, LNET_NID_ANY, timeout, data->ioc_pbuf1,
                               data->ioc_plen1 / sizeof(struct lnet_process_id));
 
                if (rc < 0)
                               data->ioc_plen1 / sizeof(struct lnet_process_id));
 
                if (rc < 0)
@@ -4348,6 +4349,18 @@ LNetCtl(unsigned int cmd, void *arg)
                struct lnet_ioctl_ping_data *ping = arg;
                struct lnet_peer *lp;
                signed long timeout;
                struct lnet_ioctl_ping_data *ping = arg;
                struct lnet_peer *lp;
                signed long timeout;
+               lnet_nid_t src_nid = LNET_NID_ANY;
+
+               /* Check if the supplied ping data supports source nid
+                * NB: This check is sufficient if lnet_ioctl_ping_data has
+                * additional fields added, but if they are re-ordered or
+                * fields removed then this will break. It is expected that
+                * these ioctls will be replaced with netlink implementation, so
+                * it is probably not worth coming up with a more robust version
+                * compatibility scheme.
+                */
+               if (ping->ping_hdr.ioc_len >= sizeof(struct lnet_ioctl_ping_data))
+                       src_nid = ping->ping_src;
 
                /* If timeout is negative then set default of 3 minutes */
                if (((s32)ping->op_param) <= 0 ||
 
                /* If timeout is negative then set default of 3 minutes */
                if (((s32)ping->op_param) <= 0 ||
@@ -4356,7 +4369,7 @@ LNetCtl(unsigned int cmd, void *arg)
                else
                        timeout = nsecs_to_jiffies(ping->op_param * NSEC_PER_MSEC);
 
                else
                        timeout = nsecs_to_jiffies(ping->op_param * NSEC_PER_MSEC);
 
-               rc = lnet_ping(ping->ping_id, timeout,
+               rc = lnet_ping(ping->ping_id, src_nid, timeout,
                               ping->ping_buf,
                               ping->ping_count);
                if (rc < 0)
                               ping->ping_buf,
                               ping->ping_count);
                if (rc < 0)
@@ -4622,8 +4635,9 @@ lnet_ping_event_handler(struct lnet_event *event)
                complete(&pd->completion);
 }
 
                complete(&pd->completion);
 }
 
-static int lnet_ping(struct lnet_process_id id, signed long timeout,
-                    struct lnet_process_id __user *ids, int n_ids)
+static int lnet_ping(struct lnet_process_id id, lnet_nid_t src_nid,
+                    signed long timeout, struct lnet_process_id __user *ids,
+                    int n_ids)
 {
        struct lnet_md md = { NULL };
        struct ping_data pd = { 0 };
 {
        struct lnet_md md = { NULL };
        struct ping_data pd = { 0 };
@@ -4669,7 +4683,7 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout,
                goto fail_ping_buffer_decref;
        }
 
                goto fail_ping_buffer_decref;
        }
 
-       rc = LNetGet(LNET_NID_ANY, pd.mdh, id,
+       rc = LNetGet(src_nid, pd.mdh, id,
                     LNET_RESERVED_PORTAL,
                     LNET_PROTO_PING_MATCHBITS, 0, false);
 
                     LNET_RESERVED_PORTAL,
                     LNET_PROTO_PING_MATCHBITS, 0, false);
 
index e9541f4..0b8b110 100644 (file)
@@ -420,9 +420,9 @@ static int dispatch_peer_ni_cmd(__u32 cmd, struct lnet_ioctl_peer_cfg *data,
        return rc;
 }
 
        return rc;
 }
 
-static int infra_ping_nid(char *ping_nids, char *oper, int param, int ioc_call,
-                         int seq_no, struct cYAML **show_rc,
-                         struct cYAML **err_rc)
+static int infra_ping_nid(char *ping_nids, char *src_nidstr, char *oper,
+                         int param, int ioc_call, int seq_no,
+                         struct cYAML **show_rc, struct cYAML **err_rc)
 {
        void *data = NULL;
        struct lnet_ioctl_ping_data ping;
 {
        void *data = NULL;
        struct lnet_ioctl_ping_data ping;
@@ -436,6 +436,7 @@ static int infra_ping_nid(char *ping_nids, char *oper, int param, int ioc_call,
        int rc = LUSTRE_CFG_RC_OUT_OF_MEM;
        int i;
        bool flag = false;
        int rc = LUSTRE_CFG_RC_OUT_OF_MEM;
        int i;
        bool flag = false;
+       lnet_nid_t src;
 
        len = (sizeof(struct lnet_process_id) * LNET_INTERFACES_MAX_DEFAULT);
 
 
        len = (sizeof(struct lnet_process_id) * LNET_INTERFACES_MAX_DEFAULT);
 
@@ -452,6 +453,21 @@ static int infra_ping_nid(char *ping_nids, char *oper, int param, int ioc_call,
        if (ping_node == NULL)
                goto out;
 
        if (ping_node == NULL)
                goto out;
 
+       if (src_nidstr) {
+               src = libcfs_str2nid(src_nidstr);
+               if (src == LNET_NID_ANY) {
+                       snprintf(err_str, sizeof(err_str),
+                                "\"cannot parse source NID '%s'\"",
+                                src_nidstr);
+                       rc = LUSTRE_CFG_RC_BAD_PARAM;
+                       cYAML_build_error(rc, seq_no, MANAGE_CMD,
+                                         oper, err_str, err_rc);
+                       goto out;
+               }
+       } else {
+               src = LNET_NID_ANY;
+       }
+
        /* tokenise each nid in string ping_nids */
        token = strtok(ping_nids, ",");
 
        /* tokenise each nid in string ping_nids */
        token = strtok(ping_nids, ",");
 
@@ -516,6 +532,7 @@ static int infra_ping_nid(char *ping_nids, char *oper, int param, int ioc_call,
                LIBCFS_IOC_INIT_V2(ping, ping_hdr);
                ping.ping_hdr.ioc_len = sizeof(ping);
                ping.ping_id          = id;
                LIBCFS_IOC_INIT_V2(ping, ping_hdr);
                ping.ping_hdr.ioc_len = sizeof(ping);
                ping.ping_id          = id;
+               ping.ping_src         = src;
                ping.op_param         = param;
                ping.ping_count       = LNET_INTERFACES_MAX_DEFAULT;
                ping.ping_buf         = data;
                ping.op_param         = param;
                ping.ping_count       = LNET_INTERFACES_MAX_DEFAULT;
                ping.ping_buf         = data;
@@ -591,13 +608,14 @@ out:
        return rc;
 }
 
        return rc;
 }
 
-int lustre_lnet_ping_nid(char *ping_nids, int timeout, int seq_no,
-                        struct cYAML **show_rc, struct cYAML **err_rc)
+int lustre_lnet_ping_nid(char *ping_nids, char *src_nidstr, int timeout,
+                        int seq_no, struct cYAML **show_rc,
+                        struct cYAML **err_rc)
 {
        int rc;
 
 {
        int rc;
 
-       rc = infra_ping_nid(ping_nids, "ping", timeout, IOC_LIBCFS_PING_PEER,
-                           seq_no, show_rc, err_rc);
+       rc = infra_ping_nid(ping_nids, src_nidstr, "ping", timeout,
+                           IOC_LIBCFS_PING_PEER, seq_no, show_rc, err_rc);
        return rc;
 }
 
        return rc;
 }
 
@@ -606,8 +624,8 @@ int lustre_lnet_discover_nid(char *ping_nids, int force, int seq_no,
 {
        int rc;
 
 {
        int rc;
 
-       rc = infra_ping_nid(ping_nids, "discover", force, IOC_LIBCFS_DISCOVER,
-                           seq_no, show_rc, err_rc);
+       rc = infra_ping_nid(ping_nids, NULL, "discover", force,
+                           IOC_LIBCFS_DISCOVER, seq_no, show_rc, err_rc);
        return rc;
 }
 
        return rc;
 }
 
@@ -5183,13 +5201,15 @@ static int handle_yaml_show_global_settings(struct cYAML *tree,
 static int handle_yaml_ping(struct cYAML *tree, struct cYAML **show_rc,
                            struct cYAML **err_rc)
 {
 static int handle_yaml_ping(struct cYAML *tree, struct cYAML **show_rc,
                            struct cYAML **err_rc)
 {
-       struct cYAML *seq_no, *nid, *timeout;
+       struct cYAML *seq_no, *nid, *timeout, *src_nid;
 
        seq_no = cYAML_get_object_item(tree, "seq_no");
        nid = cYAML_get_object_item(tree, "primary nid");
        timeout = cYAML_get_object_item(tree, "timeout");
 
        seq_no = cYAML_get_object_item(tree, "seq_no");
        nid = cYAML_get_object_item(tree, "primary nid");
        timeout = cYAML_get_object_item(tree, "timeout");
+       src_nid = cYAML_get_object_item(tree, "source_nid");
 
        return lustre_lnet_ping_nid((nid) ? nid->cy_valuestring : NULL,
 
        return lustre_lnet_ping_nid((nid) ? nid->cy_valuestring : NULL,
+                                   (src_nid) ? src_nid->cy_valuestring : NULL,
                                    (timeout) ? timeout->cy_valueint : 1000,
                                    (seq_no) ? seq_no->cy_valueint : -1,
                                    show_rc, err_rc);
                                    (timeout) ? timeout->cy_valueint : 1000,
                                    (seq_no) ? seq_no->cy_valueint : -1,
                                    show_rc, err_rc);
index 67403ec..dae9268 100644 (file)
@@ -698,13 +698,14 @@ int lustre_lnet_list_peer(int seq_no,
  *   Ping the nid list, pnids.
  *
  *    pnids - NID list to ping.
  *   Ping the nid list, pnids.
  *
  *    pnids - NID list to ping.
+ *    src_nidstr - source NID
  *    timeout - timeout(seconds) for ping.
  *    seq_no - sequence number of the command.
  *    show_rc - YAML structure of the resultant show.
  *    err_rc - YAML strucutre of the resultant return code.
  *
  */
  *    timeout - timeout(seconds) for ping.
  *    seq_no - sequence number of the command.
  *    show_rc - YAML structure of the resultant show.
  *    err_rc - YAML strucutre of the resultant return code.
  *
  */
-int lustre_lnet_ping_nid(char *pnid, int timeout, int seq_no,
+int lustre_lnet_ping_nid(char *pnid, char *src_nidstr, int timeout, int seq_no,
                        struct cYAML **show_rc, struct cYAML **err_rc);
 
 /* lustre_lnet_discover_nid
                        struct cYAML **show_rc, struct cYAML **err_rc);
 
 /* lustre_lnet_discover_nid
index 23888fb..6c76812 100644 (file)
@@ -2178,21 +2178,27 @@ static int jt_ping(int argc, char **argv)
        struct cYAML *show_rc = NULL;
        int timeout = 1000;
        int rc = 0, opt;
        struct cYAML *show_rc = NULL;
        int timeout = 1000;
        int rc = 0, opt;
+       char *src_nidstr = NULL;
 
 
-       const char *const short_options = "ht:";
+       const char *const short_options = "hs:t:";
        const struct option long_options[] = {
        { .name = "help",       .has_arg = no_argument,         .val = 'h' },
        { .name = "timeout",    .has_arg = required_argument,   .val = 't' },
        const struct option long_options[] = {
        { .name = "help",       .has_arg = no_argument,         .val = 'h' },
        { .name = "timeout",    .has_arg = required_argument,   .val = 't' },
+       { .name = "source",     .has_arg = required_argument,   .val = 's' },
        { .name = NULL } };
 
        while ((opt = getopt_long(argc, argv, short_options,
                                  long_options, NULL)) != -1) {
                switch (opt) {
        { .name = NULL } };
 
        while ((opt = getopt_long(argc, argv, short_options,
                                  long_options, NULL)) != -1) {
                switch (opt) {
+               case 's':
+                       src_nidstr = optarg;
+                       break;
                case 't':
                        timeout = 1000 * atol(optarg);
                        break;
                case 'h':
                        printf("ping nid[,nid,...]\n"
                case 't':
                        timeout = 1000 * atol(optarg);
                        break;
                case 'h':
                        printf("ping nid[,nid,...]\n"
+                              "\t --source: source nid\n"
                               "\t --timeout: ping timeout\n"
                               "\t --help: display this help\n");
                        return 0;
                               "\t --timeout: ping timeout\n"
                               "\t --help: display this help\n");
                        return 0;
@@ -2202,7 +2208,8 @@ static int jt_ping(int argc, char **argv)
        }
 
        for (; optind < argc; optind++)
        }
 
        for (; optind < argc; optind++)
-               rc = lustre_lnet_ping_nid(argv[optind], timeout, -1, &show_rc, &err_rc);
+               rc = lustre_lnet_ping_nid(argv[optind], src_nidstr, timeout, -1,
+                                         &show_rc, &err_rc);
 
        if (show_rc)
                cYAML_print_tree(show_rc);
 
        if (show_rc)
                cYAML_print_tree(show_rc);
index f64fcec..ea76ab1 100755 (executable)
@@ -2072,6 +2072,126 @@ test_214() {
 }
 run_test 214 "Check local NI status when link is downed"
 
 }
 run_test 214 "Check local NI status when link is downed"
 
+get_ni_stat() {
+       local nid=$1
+       local stat=$2
+
+       $LNETCTL net show -v 2 |
+               egrep -e nid -e $stat |
+               grep -wA 1 $nid |
+               awk '/'$stat':/{print $NF}'
+}
+
+ni_stats_pre() {
+       local nidvar s
+       for nidvar in nid1 nid2; do
+               for stat in send_count recv_count; do
+                       s=$(get_ni_stat ${!nidvar} $stat)
+                       eval ${nidvar}_pre_${stat}=$s
+               done
+       done
+}
+
+ni_stats_post() {
+       local nidvar s
+       for nidvar in nid1 nid2; do
+               for stat in send_count recv_count; do
+                       s=$(get_ni_stat ${!nidvar} $stat)
+                       eval ${nidvar}_post_${stat}=$s
+               done
+       done
+}
+
+ni_stat_changed() {
+       local nidvar=$1
+       local stat=$2
+
+       local pre post
+       eval pre=\${${nidvar}_pre_${stat}}
+       eval post=\${${nidvar}_post_${stat}}
+
+       echo "${!nidvar} pre ${stat} $pre post ${stat} $post"
+
+       [[ $pre -ne $post ]]
+}
+
+test_215() {
+       have_interface "eth0" || skip "Need eth0 interface with ipv4 configured"
+
+       cleanup_netns || error "Failed to cleanup netns before test execution"
+       cleanup_lnet || error "Failed to unload modules before test execution"
+
+       reinit_dlc || return $?
+
+       add_net "tcp1" "eth0" || return $?
+       add_net "tcp2" "eth0" || return $?
+
+       local nid1=$($LCTL list_nids | head -n 1)
+       local nid2=$($LCTL list_nids | tail --lines 1)
+
+       do_lnetctl peer add --prim $nid1 --nid $nid2 ||
+               error "Failed to add peer"
+
+       local npings=25
+
+       for nidvarA in nid1 nid2; do
+               src=${!nidvarA}
+               dst=${!nidvarA}
+               for nidvarB in nid1 nid2; do
+                       [[ $nidvarA == $nidvarB ]] && continue
+
+                       ni_stats_pre
+
+                       echo "$LNETCTL ping $dst x $npings"
+                       for i in $(seq 1 $npings); do
+                               $LNETCTL ping $dst &>/dev/null ||
+                                       error "$LNETCTL ping $dst failed"
+                       done
+
+                       ni_stats_post
+
+                       # No source specified, sends to either NID should cause
+                       # counts to increase across both NIs
+                       for nidvar in nid1 nid2; do
+                               for stat in send_count recv_count; do
+                                       ni_stat_changed $nidvar $stat ||
+                                               error "$stat unchanged for ${!nidvar}"
+                               done
+                       done
+
+                       ni_stats_pre
+
+                       echo "$LNETCTL ping --source $src $dst x $npings"
+                       for i in $(seq 1 $npings); do
+                               $LNETCTL ping --source $src $dst &>/dev/null ||
+                                       error "$LNETCTL ping --source $src $dst failed"
+                       done
+
+                       ni_stats_post
+
+                       # src nid == dest nid means stats for the _other_ NI
+                       # should be unchanged
+                       for nidvar in nid1 nid2; do
+                               for stat in send_count recv_count; do
+                                       if [[ ${!nidvar} == $src ]]; then
+                                               ni_stat_changed $nidvar $stat ||
+                                                       error "$stat unchanged for ${!nidvar}"
+                                       else
+                                               ni_stat_changed $nidvar $stat &&
+                                                       error "$stat changed for ${!nidvar}"
+                                       fi
+                               done
+                       done
+               done
+               # Double number of pings for next iteration because the net
+               # sequence numbers will have diverged
+               npings=$(($npings * 2))
+       done
+
+       return 0
+}
+run_test 215 "Test lnetctl ping --source option"
+
 test_230() {
        # LU-12815
        echo "Check valid values; Should succeed"
 test_230() {
        # LU-12815
        echo "Check valid values; Should succeed"