Whamcloud - gitweb
LU-7734 lnet: fix routing selection
[fs/lustre-release.git] / lnet / lnet / peer.c
index d5d4986..a1f6990 100644 (file)
@@ -56,12 +56,15 @@ lnet_peer_net_added(struct lnet_net *net)
 
                if (LNET_NIDNET(lpni->lpni_nid) == net->net_id) {
                        lpni->lpni_net = net;
+
+                       spin_lock(&lpni->lpni_lock);
                        lpni->lpni_txcredits =
-                       lpni->lpni_mintxcredits =
                                lpni->lpni_net->net_tunables.lct_peer_tx_credits;
+                       lpni->lpni_mintxcredits = lpni->lpni_txcredits;
                        lpni->lpni_rtrcredits =
-                       lpni->lpni_minrtrcredits =
                                lnet_peer_buffer_credits(lpni->lpni_net);
+                       lpni->lpni_minrtrcredits = lpni->lpni_rtrcredits;
+                       spin_unlock(&lpni->lpni_lock);
 
                        lnet_peer_remove_from_remote_list(lpni);
                }
@@ -255,11 +258,18 @@ lnet_try_destroy_peer_hierarchy_locked(struct lnet_peer_ni *lpni)
 }
 
 /* called with lnet_net_lock LNET_LOCK_EX held */
-static void
+static int
 lnet_peer_ni_del_locked(struct lnet_peer_ni *lpni)
 {
        struct lnet_peer_table *ptable = NULL;
 
+       /* don't remove a peer_ni if it's also a gateway */
+       if (lpni->lpni_rtr_refcount > 0) {
+               CERROR("Peer NI %s is a gateway. Can not delete it\n",
+                      libcfs_nid2str(lpni->lpni_nid));
+               return -EBUSY;
+       }
+
        lnet_peer_remove_from_remote_list(lpni);
 
        /* remove peer ni from the hash list. */
@@ -290,6 +300,8 @@ lnet_peer_ni_del_locked(struct lnet_peer_ni *lpni)
 
        /* decrement reference on peer */
        lnet_peer_ni_decref_locked(lpni);
+
+       return 0;
 }
 
 void lnet_peer_uninit()
@@ -308,17 +320,22 @@ void lnet_peer_uninit()
        lnet_net_unlock(LNET_LOCK_EX);
 }
 
-static void
+static int
 lnet_peer_del_locked(struct lnet_peer *peer)
 {
        struct lnet_peer_ni *lpni = NULL, *lpni2;
+       int rc = 0, rc2 = 0;
 
        lpni = lnet_get_next_peer_ni_locked(peer, NULL, lpni);
        while (lpni != NULL) {
                lpni2 = lnet_get_next_peer_ni_locked(peer, NULL, lpni);
-               lnet_peer_ni_del_locked(lpni);
+               rc = lnet_peer_ni_del_locked(lpni);
+               if (rc != 0)
+                       rc2 = rc;
                lpni = lpni2;
        }
+
+       return rc2;
 }
 
 static void
@@ -326,26 +343,32 @@ lnet_peer_table_cleanup_locked(struct lnet_net *net,
                               struct lnet_peer_table *ptable)
 {
        int                      i;
+       struct lnet_peer_ni     *next;
        struct lnet_peer_ni     *lpni;
-       struct lnet_peer_ni     *tmp;
        struct lnet_peer        *peer;
 
        for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
-               list_for_each_entry_safe(lpni, tmp, &ptable->pt_hash[i],
+               list_for_each_entry_safe(lpni, next, &ptable->pt_hash[i],
                                         lpni_hashlist) {
                        if (net != NULL && net != lpni->lpni_net)
                                continue;
 
-                       /*
-                        * check if by removing this peer ni we should be
-                        * removing the entire peer.
-                        */
                        peer = lpni->lpni_peer_net->lpn_peer;
-
-                       if (peer->lp_primary_nid == lpni->lpni_nid)
-                               lnet_peer_del_locked(peer);
-                       else
+                       if (peer->lp_primary_nid != lpni->lpni_nid) {
                                lnet_peer_ni_del_locked(lpni);
+                               continue;
+                       }
+                       /*
+                        * Removing the primary NID implies removing
+                        * the entire peer. Advance next beyond any
+                        * peer_ni that belongs to the same peer.
+                        */
+                       list_for_each_entry_from(next, &ptable->pt_hash[i],
+                                                lpni_hashlist) {
+                               if (next->lpni_peer_net->lpn_peer != peer)
+                                       break;
+                       }
+                       lnet_peer_del_locked(peer);
                }
        }
 }
@@ -884,6 +907,7 @@ lnet_del_peer_ni_from_peer(lnet_nid_t key_nid, lnet_nid_t nid)
        lnet_nid_t local_nid;
        struct lnet_peer *peer;
        struct lnet_peer_ni *lpni;
+       int rc;
 
        if (key_nid == LNET_NID_ANY)
                return -EINVAL;
@@ -904,17 +928,17 @@ lnet_del_peer_ni_from_peer(lnet_nid_t key_nid, lnet_nid_t nid)
                 * entire peer
                 */
                lnet_net_lock(LNET_LOCK_EX);
-               lnet_peer_del_locked(peer);
+               rc = lnet_peer_del_locked(peer);
                lnet_net_unlock(LNET_LOCK_EX);
 
-               return 0;
+               return rc;
        }
 
        lnet_net_lock(LNET_LOCK_EX);
-       lnet_peer_ni_del_locked(lpni);
+       rc = lnet_peer_ni_del_locked(lpni);
        lnet_net_unlock(LNET_LOCK_EX);
 
-       return 0;
+       return rc;
 }
 
 void
@@ -940,35 +964,73 @@ lnet_destroy_peer_ni_locked(struct lnet_peer_ni *lpni)
 }
 
 struct lnet_peer_ni *
+lnet_nid2peerni_ex(lnet_nid_t nid, int cpt)
+{
+       struct lnet_peer_ni *lpni = NULL;
+       int rc;
+
+       if (the_lnet.ln_shutdown) /* it's shutting down */
+               return ERR_PTR(-ESHUTDOWN);
+
+       /*
+        * find if a peer_ni already exists.
+        * If so then just return that.
+        */
+       lpni = lnet_find_peer_ni_locked(nid);
+       if (lpni)
+               return lpni;
+
+       lnet_net_unlock(cpt);
+
+       rc = lnet_peer_ni_traffic_add(nid);
+       if (rc) {
+               lpni = ERR_PTR(rc);
+               goto out_net_relock;
+       }
+
+       lpni = lnet_find_peer_ni_locked(nid);
+       LASSERT(lpni);
+
+out_net_relock:
+       lnet_net_lock(cpt);
+
+       return lpni;
+}
+
+struct lnet_peer_ni *
 lnet_nid2peerni_locked(lnet_nid_t nid, int cpt)
 {
-       struct lnet_peer_table  *ptable;
-       struct lnet_peer_ni     *lpni = NULL;
-       int                     cpt2;
-       int                     rc;
+       struct lnet_peer_ni *lpni = NULL;
+       int rc;
 
        if (the_lnet.ln_shutdown) /* it's shutting down */
                return ERR_PTR(-ESHUTDOWN);
 
        /*
-        * calculate cpt2 with the standard hash function
-        * This cpt2 is the slot where we'll find or create the peer.
+        * find if a peer_ni already exists.
+        * If so then just return that.
         */
-       cpt2 = lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
-       ptable = the_lnet.ln_peer_tables[cpt2];
-       lpni = lnet_get_peer_ni_locked(ptable, nid);
+       lpni = lnet_find_peer_ni_locked(nid);
        if (lpni)
                return lpni;
 
-       /* Slow path: serialized using the ln_api_mutex. */
+       /*
+        * Slow path:
+        * use the lnet_api_mutex to serialize the creation of the peer_ni
+        * and the creation/deletion of the local ni/net. When a local ni is
+        * created, if there exists a set of peer_nis on that network,
+        * they need to be traversed and updated. When a local NI is
+        * deleted, which could result in a network being deleted, then
+        * all peer nis on that network need to be removed as well.
+        *
+        * Creation through traffic should also be serialized with
+        * creation through DLC.
+        */
        lnet_net_unlock(cpt);
        mutex_lock(&the_lnet.ln_api_mutex);
        /*
         * Shutdown is only set under the ln_api_lock, so a single
         * check here is sufficent.
-        *
-        * lnet_add_nid_to_peer() also handles the case where we've
-        * raced and a different thread added the NID.
         */
        if (the_lnet.ln_shutdown) {
                lpni = ERR_PTR(-ESHUTDOWN);
@@ -981,7 +1043,7 @@ lnet_nid2peerni_locked(lnet_nid_t nid, int cpt)
                goto out_mutex_unlock;
        }
 
-       lpni = lnet_get_peer_ni_locked(ptable, nid);
+       lpni = lnet_find_peer_ni_locked(nid);
        LASSERT(lpni);
 
 out_mutex_unlock:
@@ -1112,7 +1174,8 @@ int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid,
                lpni->lpni_net->net_tunables.lct_peer_tx_credits : 0;
        peer_ni_info->cr_peer_tx_credits = lpni->lpni_txcredits;
        peer_ni_info->cr_peer_rtr_credits = lpni->lpni_rtrcredits;
-       peer_ni_info->cr_peer_min_rtr_credits = lpni->lpni_mintxcredits;
+       peer_ni_info->cr_peer_min_rtr_credits = lpni->lpni_minrtrcredits;
+       peer_ni_info->cr_peer_min_tx_credits = lpni->lpni_mintxcredits;
        peer_ni_info->cr_peer_tx_qnob = lpni->lpni_txqnob;
 
        peer_ni_stats->send_count = atomic_read(&lpni->lpni_stats.send_count);