Whamcloud - gitweb
LU-16393 o2iblnd: add IBLND_REJECT_EARLY reject reason 51/51651/3
authorSerguei Smirnov <ssmirnov@whamcloud.com>
Thu, 13 Jul 2023 00:29:56 +0000 (17:29 -0700)
committerOleg Drokin <green@whamcloud.com>
Sat, 19 Aug 2023 05:39:26 +0000 (05:39 +0000)
Add IBLND_REJECT_EARLY reason for rejecting connection request:
to be used when the device doesn't have any nets added yet or
when there's no active NIs on the net to handle the connection.
These conditions are supposed to occur only when LNI is being
added/initialized, so report at CNETERROR level vs. CERROR.

In lnet, set NI state to ACTIVE only after it has been added
to the list of NIs for the net, so that LND can know that
the NI can be used to accept connections.

Test-parameters: trivial
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Change-Id: I59efb2fdf5d5ceabb6ff23f638ec85da82d57b99
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51651
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/klnds/o2iblnd/o2iblnd-idl.h
lnet/klnds/o2iblnd/o2iblnd_cb.c
lnet/lnet/api-ni.c

index 35df50b..544bab8 100644 (file)
@@ -149,6 +149,7 @@ struct kib_rej {
 /* peer_ni's msg queue size doesn't match mine */
 #define IBLND_REJECT_MSG_QUEUE_SIZE  7
 #define IBLND_REJECT_INVALID_SRV_ID  8
+#define IBLND_REJECT_EARLY          9          /* NI not initialized yet */
 
 /***********************************************************************/
 
index 32e67bf..74006aa 100644 (file)
@@ -2572,6 +2572,31 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
        if (ni != NULL) {
                net = (struct kib_net *)ni->ni_data;
                rej.ibr_incarnation = net->ibn_incarnation;
+       } else {
+               if (ibdev->ibd_nnets == 0) {
+                       rej.ibr_why = IBLND_REJECT_EARLY;
+                       CNETERR("Can't accept conn from %s on %s (%s:%d:%pI4h): net for nid %s not added yet\n",
+                               libcfs_nid2str(nid),
+                               libcfs_nidstr(&net->ibn_ni->ni_nid),
+                               ibdev->ibd_ifname, ibdev->ibd_nnets,
+                               &ibdev->ibd_ifip,
+                               libcfs_nid2str(reqmsg->ibm_dstnid));
+                       goto failed;
+               }
+               list_for_each_entry(net, &ibdev->ibd_nets, ibn_list) {
+                       if ((net->ibn_dev == ibdev) &&
+                           (net->ibn_ni != NULL) &&
+                           (net->ibn_ni->ni_state != LNET_NI_STATE_ACTIVE)) {
+                               rej.ibr_why = IBLND_REJECT_EARLY;
+                               CNETERR("Can't accept conn from %s on %s (%s:%d:%pI4h): nid %s not ready\n",
+                                      libcfs_nid2str(nid),
+                                      libcfs_nidstr(&net->ibn_ni->ni_nid),
+                                      ibdev->ibd_ifname, ibdev->ibd_nnets,
+                                      &ibdev->ibd_ifip,
+                                      libcfs_nid2str(reqmsg->ibm_dstnid));
+                               goto failed;
+                       }
+               }
        }
 
        if (ni == NULL ||                       /* no matching net */
@@ -3037,6 +3062,11 @@ kiblnd_rejected(struct kib_conn *conn, int reason, void *priv, int priv_nob)
                                       libcfs_nid2str(peer_ni->ibp_nid));
                                break;
 
+                       case IBLND_REJECT_EARLY:
+                               CNETERR("%s rejected: tried too early\n",
+                                      libcfs_nid2str(peer_ni->ibp_nid));
+                               break;
+
                        default:
                                CERROR("%s rejected: o2iblnd reason %d\n",
                                       libcfs_nid2str(peer_ni->ibp_nid),
index 8251b81..bc82b02 100644 (file)
@@ -2550,10 +2550,6 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
                goto failed0;
        }
 
-       lnet_ni_lock(ni);
-       ni->ni_state = LNET_NI_STATE_ACTIVE;
-       lnet_ni_unlock(ni);
-
        /* We keep a reference on the loopback net through the loopback NI */
        if (net->net_lnd->lnd_type == LOLND) {
                lnet_ni_addref(ni);
@@ -2729,6 +2725,14 @@ lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
        lnet_net_lock(LNET_LOCK_EX);
        list_splice_tail(&local_ni_list, &net_l->net_ni_list);
        lnet_incr_dlc_seq();
+
+       list_for_each_entry(ni, &net_l->net_ni_list, ni_netlist) {
+               if (!ni)
+                       break;
+               lnet_ni_lock(ni);
+               ni->ni_state = LNET_NI_STATE_ACTIVE;
+               lnet_ni_unlock(ni);
+       }
        lnet_net_unlock(LNET_LOCK_EX);
 
        /* if the network is not unique then we don't want to keep