Whamcloud - gitweb
LU-15509 lnet: Ping buffer ref leak in lnet_peer_data_present
[fs/lustre-release.git] / lnet / lnet / peer.c
index 4725a01..ae71e25 100644 (file)
@@ -637,7 +637,7 @@ lnet_peer_table_del_rtrs_locked(struct lnet_net *net,
 {
        struct lnet_peer_ni     *lp;
        struct lnet_peer_ni     *tmp;
-       lnet_nid_t              gw_nid;
+       struct lnet_nid         gw_nid;
        int                     i;
 
        for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
@@ -649,12 +649,10 @@ lnet_peer_table_del_rtrs_locked(struct lnet_net *net,
                        if (!lnet_isrouter(lp))
                                continue;
 
-                       /* FIXME handle large-addr nid */
-                       gw_nid = lnet_nid_to_nid4(
-                               &lp->lpni_peer_net->lpn_peer->lp_primary_nid);
+                       gw_nid = lp->lpni_peer_net->lpn_peer->lp_primary_nid;
 
                        lnet_net_unlock(LNET_LOCK_EX);
-                       lnet_del_route(LNET_NET_ANY, gw_nid);
+                       lnet_del_route(LNET_NET_ANY, &gw_nid);
                        lnet_net_lock(LNET_LOCK_EX);
                }
        }
@@ -776,7 +774,7 @@ lnet_peer_ni_get_locked(struct lnet_peer *lp, struct lnet_nid *nid)
 }
 
 struct lnet_peer *
-lnet_find_peer(lnet_nid_t nid)
+lnet_find_peer4(lnet_nid_t nid)
 {
        struct lnet_peer_ni *lpni;
        struct lnet_peer *lp = NULL;
@@ -794,6 +792,25 @@ lnet_find_peer(lnet_nid_t nid)
        return lp;
 }
 
+struct lnet_peer *
+lnet_find_peer(struct lnet_nid *nid)
+{
+       struct lnet_peer_ni *lpni;
+       struct lnet_peer *lp = NULL;
+       int cpt;
+
+       cpt = lnet_net_lock_current();
+       lpni = lnet_peer_ni_find_locked(nid);
+       if (lpni) {
+               lp = lpni->lpni_peer_net->lpn_peer;
+               lnet_peer_addref_locked(lp);
+               lnet_peer_ni_decref_locked(lpni);
+       }
+       lnet_net_unlock(cpt);
+
+       return lp;
+}
+
 struct lnet_peer_net *
 lnet_get_next_peer_net_locked(struct lnet_peer *lp, __u32 prev_lpn_id)
 {
@@ -1334,21 +1351,17 @@ lnet_peer_clr_pref_nids(struct lnet_peer_ni *lpni)
        }
 }
 
-lnet_nid_t
-lnet_peer_primary_nid_locked(lnet_nid_t nid)
+void
+lnet_peer_primary_nid_locked(struct lnet_nid *nid, struct lnet_nid *result)
 {
-       /* FIXME handle large-addr nid */
        struct lnet_peer_ni *lpni;
-       lnet_nid_t primary_nid = nid;
 
-       lpni = lnet_find_peer_ni_locked(nid);
+       *result = *nid;
+       lpni = lnet_peer_ni_find_locked(nid);
        if (lpni) {
-               primary_nid = lnet_nid_to_nid4(
-                       &lpni->lpni_peer_net->lpn_peer->lp_primary_nid);
+               *result = lpni->lpni_peer_net->lpn_peer->lp_primary_nid;
                lnet_peer_ni_decref_locked(lpni);
        }
-
-       return primary_nid;
 }
 
 bool
@@ -1426,21 +1439,20 @@ unlock:
 }
 EXPORT_SYMBOL(LNetAddPeer);
 
-/* FIXME support large-addr nid */
-lnet_nid_t
-LNetPrimaryNID(lnet_nid_t nid)
+void LNetPrimaryNID(struct lnet_nid *nid)
 {
        struct lnet_peer *lp;
        struct lnet_peer_ni *lpni;
-       lnet_nid_t primary_nid = nid;
+       struct lnet_nid orig;
        int rc = 0;
        int cpt;
 
-       if (nid == LNET_NID_LO_0)
-               return LNET_NID_LO_0;
+       if (!nid || nid_is_lo0(nid))
+               return;
+       orig = *nid;
 
        cpt = lnet_net_lock_current();
-       lpni = lnet_nid2peerni_locked(nid, LNET_NID_ANY, cpt);
+       lpni = lnet_peerni_by_nid_locked(nid, NULL, cpt);
        if (IS_ERR(lpni)) {
                rc = PTR_ERR(lpni);
                goto out_unlock;
@@ -1467,7 +1479,7 @@ LNetPrimaryNID(lnet_nid_t nid)
                 * and lookup the lpni again
                 */
                lnet_peer_ni_decref_locked(lpni);
-               lpni = lnet_find_peer_ni_locked(nid);
+               lpni = lnet_peer_ni_find_locked(nid);
                if (!lpni) {
                        rc = -ENOENT;
                        goto out_unlock;
@@ -1482,15 +1494,14 @@ LNetPrimaryNID(lnet_nid_t nid)
                if (lnet_is_discovery_disabled(lp))
                        break;
        }
-       primary_nid = lnet_nid_to_nid4(&lp->lp_primary_nid);
+       *nid = lp->lp_primary_nid;
 out_decref:
        lnet_peer_ni_decref_locked(lpni);
 out_unlock:
        lnet_net_unlock(cpt);
 
-       CDEBUG(D_NET, "NID %s primary NID %s rc %d\n", libcfs_nid2str(nid),
-              libcfs_nid2str(primary_nid), rc);
-       return primary_nid;
+       CDEBUG(D_NET, "NID %s primary NID %s rc %d\n", libcfs_nidstr(&orig),
+              libcfs_nidstr(nid), rc);
 }
 EXPORT_SYMBOL(LNetPrimaryNID);
 
@@ -1833,19 +1844,22 @@ out:
 
 /*
  * lpni creation initiated due to traffic either sending or receiving.
+ * Callers must hold ln_api_mutex
+ * Ref taken on lnet_peer_ni returned by this function
  */
-static int
+static struct lnet_peer_ni *
 lnet_peer_ni_traffic_add(struct lnet_nid *nid, struct lnet_nid *pref)
+__must_hold(&the_lnet.ln_api_mutex)
 {
-       struct lnet_peer *lp;
-       struct lnet_peer_net *lpn;
+       struct lnet_peer *lp = NULL;
+       struct lnet_peer_net *lpn = NULL;
        struct lnet_peer_ni *lpni;
        unsigned flags = 0;
        int rc = 0;
 
        if (LNET_NID_IS_ANY(nid)) {
                rc = -EINVAL;
-               goto out;
+               goto out_err;
        }
 
        /* lnet_net_lock is not needed here because ln_api_lock is held */
@@ -1857,7 +1871,6 @@ lnet_peer_ni_traffic_add(struct lnet_nid *nid, struct lnet_nid *pref)
                 * traffic, we just assume everything is ok and
                 * return.
                 */
-               lnet_peer_ni_decref_locked(lpni);
                goto out;
        }
 
@@ -1865,24 +1878,31 @@ lnet_peer_ni_traffic_add(struct lnet_nid *nid, struct lnet_nid *pref)
        rc = -ENOMEM;
        lp = lnet_peer_alloc(nid);
        if (!lp)
-               goto out;
+               goto out_err;
        lpn = lnet_peer_net_alloc(LNET_NID_NET(nid));
        if (!lpn)
-               goto out_free_lp;
+               goto out_err;
        lpni = lnet_peer_ni_alloc(nid);
        if (!lpni)
-               goto out_free_lpn;
+               goto out_err;
        lnet_peer_ni_set_non_mr_pref_nid(lpni, pref);
 
-       return lnet_peer_attach_peer_ni(lp, lpn, lpni, flags);
+       /* lnet_peer_attach_peer_ni() always returns 0 */
+       rc = lnet_peer_attach_peer_ni(lp, lpn, lpni, flags);
 
-out_free_lpn:
-       LIBCFS_FREE(lpn, sizeof(*lpn));
-out_free_lp:
-       LIBCFS_FREE(lp, sizeof(*lp));
+       lnet_peer_ni_addref_locked(lpni);
+
+out_err:
+       if (rc) {
+               if (lpn)
+                       LIBCFS_FREE(lpn, sizeof(*lpn));
+               if (lp)
+                       LIBCFS_FREE(lp, sizeof(*lp));
+               lpni = ERR_PTR(rc);
+       }
 out:
        CDEBUG(D_NET, "peer %s: %d\n", libcfs_nidstr(nid), rc);
-       return rc;
+       return lpni;
 }
 
 /*
@@ -2051,10 +2071,10 @@ lnet_destroy_peer_ni_locked(struct kref *ref)
 }
 
 struct lnet_peer_ni *
-lnet_nid2peerni_ex(struct lnet_nid *nid, int cpt)
+lnet_nid2peerni_ex(struct lnet_nid *nid)
+__must_hold(&the_lnet.ln_api_mutex)
 {
        struct lnet_peer_ni *lpni = NULL;
-       int rc;
 
        if (the_lnet.ln_state != LNET_STATE_RUNNING)
                return ERR_PTR(-ESHUTDOWN);
@@ -2067,19 +2087,11 @@ lnet_nid2peerni_ex(struct lnet_nid *nid, int cpt)
        if (lpni)
                return lpni;
 
-       lnet_net_unlock(cpt);
-
-       rc = lnet_peer_ni_traffic_add(nid, NULL);
-       if (rc) {
-               lpni = ERR_PTR(rc);
-               goto out_net_relock;
-       }
+       lnet_net_unlock(LNET_LOCK_EX);
 
-       lpni = lnet_peer_ni_find_locked(nid);
-       LASSERT(lpni);
+       lpni = lnet_peer_ni_traffic_add(nid, NULL);
 
-out_net_relock:
-       lnet_net_lock(cpt);
+       lnet_net_lock(LNET_LOCK_EX);
 
        return lpni;
 }
@@ -2093,7 +2105,6 @@ lnet_peerni_by_nid_locked(struct lnet_nid *nid,
                        struct lnet_nid *pref, int cpt)
 {
        struct lnet_peer_ni *lpni = NULL;
-       int rc;
 
        if (the_lnet.ln_state != LNET_STATE_RUNNING)
                return ERR_PTR(-ESHUTDOWN);
@@ -2121,30 +2132,18 @@ lnet_peerni_by_nid_locked(struct lnet_nid *nid,
        lnet_net_unlock(cpt);
        mutex_lock(&the_lnet.ln_api_mutex);
        /*
-        * Shutdown is only set under the ln_api_lock, so a single
+        * the_lnet.ln_state is only modified under the ln_api_lock, so a single
         * check here is sufficent.
         */
-       if (the_lnet.ln_state != LNET_STATE_RUNNING) {
-               lpni = ERR_PTR(-ESHUTDOWN);
-               goto out_mutex_unlock;
-       }
+       if (the_lnet.ln_state == LNET_STATE_RUNNING)
+               lpni = lnet_peer_ni_traffic_add(nid, pref);
 
-       rc = lnet_peer_ni_traffic_add(nid, pref);
-       if (rc) {
-               lpni = ERR_PTR(rc);
-               goto out_mutex_unlock;
-       }
-
-       lpni = lnet_peer_ni_find_locked(nid);
-       LASSERT(lpni);
-
-out_mutex_unlock:
        mutex_unlock(&the_lnet.ln_api_mutex);
        lnet_net_lock(cpt);
 
        /* Lock has been dropped, check again for shutdown. */
        if (the_lnet.ln_state != LNET_STATE_RUNNING) {
-               if (!IS_ERR(lpni))
+               if (!IS_ERR_OR_NULL(lpni))
                        lnet_peer_ni_decref_locked(lpni);
                lpni = ERR_PTR(-ESHUTDOWN);
        }
@@ -2275,7 +2274,7 @@ static int lnet_peer_queue_for_discovery(struct lnet_peer *lp)
  * Discovery of a peer is complete. Wake all waiters on the peer.
  * Call with lnet_net_lock/EX held.
  */
-static void lnet_peer_discovery_complete(struct lnet_peer *lp)
+static void lnet_peer_discovery_complete(struct lnet_peer *lp, int dc_error)
 {
        struct lnet_msg *msg, *tmp;
        int rc = 0;
@@ -2286,6 +2285,11 @@ static void lnet_peer_discovery_complete(struct lnet_peer *lp)
 
        list_del_init(&lp->lp_dc_list);
        spin_lock(&lp->lp_lock);
+       if (dc_error) {
+               lp->lp_dc_error = dc_error;
+               lp->lp_state &= ~LNET_PEER_DISCOVERING;
+               lp->lp_state |= LNET_PEER_REDISCOVER;
+       }
        list_splice_init(&lp->lp_dc_pendq, &pending_msgs);
        spin_unlock(&lp->lp_lock);
        wake_up(&lp->lp_dc_waitq);
@@ -2298,20 +2302,20 @@ static void lnet_peer_discovery_complete(struct lnet_peer *lp)
        /* iterate through all pending messages and send them again */
        list_for_each_entry_safe(msg, tmp, &pending_msgs, msg_list) {
                list_del_init(&msg->msg_list);
-               if (lp->lp_dc_error) {
-                       lnet_finalize(msg, lp->lp_dc_error);
+               if (dc_error) {
+                       lnet_finalize(msg, dc_error);
                        continue;
                }
 
                CDEBUG(D_NET, "sending pending message %s to target %s\n",
                       lnet_msgtyp2str(msg->msg_type),
-                      libcfs_id2str(msg->msg_target));
-               rc = lnet_send(msg->msg_src_nid_param, msg,
-                              msg->msg_rtr_nid_param);
+                      libcfs_idstr(&msg->msg_target));
+               rc = lnet_send(&msg->msg_src_nid_param, msg,
+                              &msg->msg_rtr_nid_param);
                if (rc < 0) {
                        CNETERR("Error sending %s to %s: %d\n",
                               lnet_msgtyp2str(msg->msg_type),
-                              libcfs_id2str(msg->msg_target), rc);
+                              libcfs_idstr(&msg->msg_target), rc);
                        lnet_finalize(msg, rc);
                }
        }
@@ -2331,11 +2335,11 @@ void lnet_peer_push_event(struct lnet_event *ev)
        pbuf = LNET_PING_INFO_TO_BUFFER(ev->md_start + ev->offset);
 
        /* lnet_find_peer() adds a refcount */
-       lp = lnet_find_peer(ev->source.nid);
+       lp = lnet_find_peer(&ev->source.nid);
        if (!lp) {
                CDEBUG(D_NET, "Push Put from unknown %s (source %s). Ignoring...\n",
-                      libcfs_nid2str(ev->initiator.nid),
-                      libcfs_nid2str(ev->source.nid));
+                      libcfs_nidstr(&ev->initiator.nid),
+                      libcfs_nidstr(&ev->source.nid));
                pbuf->pb_needs_post = true;
                return;
        }
@@ -2354,7 +2358,7 @@ void lnet_peer_push_event(struct lnet_event *ev)
                CDEBUG(D_NET, "Push Put error %d from %s (source %s)\n",
                       ev->status,
                       libcfs_nidstr(&lp->lp_primary_nid),
-                      libcfs_nid2str(ev->source.nid));
+                      libcfs_nidstr(&ev->source.nid));
                goto out;
        }
 
@@ -2575,6 +2579,8 @@ again:
                        break;
                if (lnet_peer_is_uptodate(lp))
                        break;
+               if (lp->lp_state & LNET_PEER_MARK_DELETED)
+                       break;
                lnet_peer_queue_for_discovery(lp);
                count++;
                CDEBUG(D_NET, "Discovery attempt # %d\n", count);
@@ -2619,7 +2625,9 @@ again:
                rc = lp->lp_dc_error;
        else if (!block)
                CDEBUG(D_NET, "non-blocking discovery\n");
-       else if (!lnet_peer_is_uptodate(lp) && !lnet_is_discovery_disabled(lp))
+       else if (!lnet_peer_is_uptodate(lp) &&
+                !(lnet_is_discovery_disabled(lp) ||
+                  (lp->lp_state & LNET_PEER_MARK_DELETED)))
                goto again;
 
        CDEBUG(D_NET, "peer %s NID %s: %d. %s\n",
@@ -2659,8 +2667,8 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev)
 
        spin_lock(&lp->lp_lock);
 
-       lnet_nid4_to_nid(ev->target.nid, &lp->lp_disc_src_nid);
-       lnet_nid4_to_nid(ev->source.nid, &lp->lp_disc_dst_nid);
+       lp->lp_disc_src_nid = ev->target.nid;
+       lp->lp_disc_dst_nid = ev->source.nid;
 
        /*
         * If some kind of error happened the contents of message
@@ -2672,7 +2680,7 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev)
                CDEBUG(D_NET, "Ping Reply error %d from %s (source %s)\n",
                       ev->status,
                       libcfs_nidstr(&lp->lp_primary_nid),
-                      libcfs_nid2str(ev->source.nid));
+                      libcfs_nidstr(&ev->source.nid));
                goto out;
        }
 
@@ -2859,7 +2867,7 @@ lnet_discovery_event_send(struct lnet_peer *lp, struct lnet_event *ev)
 out:
        CDEBUG(D_NET, "%s Send to %s: %d\n",
                (ev->msg_type == LNET_MSG_GET ? "Ping" : "Push"),
-               libcfs_nid2str(ev->target.nid), rc);
+               libcfs_nidstr(&ev->target.nid), rc);
        return rc;
 }
 
@@ -3240,12 +3248,15 @@ __must_hold(&lp->lp_lock)
        if (lp->lp_state & LNET_PEER_MARK_DELETED)
                return 0;
 
-       if (the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING)
-               return -ESHUTDOWN;
-
        spin_unlock(&lp->lp_lock);
 
        mutex_lock(&the_lnet.ln_api_mutex);
+       if (the_lnet.ln_state != LNET_STATE_RUNNING ||
+           the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING) {
+               mutex_unlock(&the_lnet.ln_api_mutex);
+               spin_lock(&lp->lp_lock);
+               return -ESHUTDOWN;
+       }
 
        lnet_net_lock(LNET_LOCK_EX);
        /* remove the peer from the discovery work
@@ -3323,8 +3334,10 @@ __must_hold(&lp->lp_lock)
         * down, and our reference count may be all that is keeping it
         * alive. Don't do any work on it.
         */
-       if (list_empty(&lp->lp_peer_list))
+       if (list_empty(&lp->lp_peer_list)) {
+               lnet_ping_buffer_decref(pbuf);
                goto out;
+       }
 
        flags = LNET_PEER_DISCOVERED;
        if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL)
@@ -3344,12 +3357,16 @@ __must_hold(&lp->lp_lock)
         * primary NID to the correct value here. Moreover, this peer
         * can show up with only the loopback NID in the ping buffer.
         */
-       if (pbuf->pb_info.pi_nnis <= 1)
+       if (pbuf->pb_info.pi_nnis <= 1) {
+               lnet_ping_buffer_decref(pbuf);
                goto out;
+       }
        nid = pbuf->pb_info.pi_ni[1].ns_nid;
        if (nid_is_lo0(&lp->lp_primary_nid)) {
                rc = lnet_peer_set_primary_nid(lp, nid, flags);
-               if (!rc)
+               if (rc)
+                       lnet_ping_buffer_decref(pbuf);
+               else
                        rc = lnet_peer_merge_data(lp, pbuf);
        /*
         * if the primary nid of the peer is present in the ping info returned
@@ -3372,6 +3389,7 @@ __must_hold(&lp->lp_lock)
                                CERROR("Primary NID error %s versus %s: %d\n",
                                       libcfs_nidstr(&lp->lp_primary_nid),
                                       libcfs_nid2str(nid), rc);
+                               lnet_ping_buffer_decref(pbuf);
                        } else {
                                rc = lnet_peer_merge_data(lp, pbuf);
                        }
@@ -3470,8 +3488,7 @@ __must_hold(&lp->lp_lock)
 
        nnis = max(lp->lp_data_nnis, LNET_INTERFACES_MIN);
 
-       rc = lnet_send_ping(lnet_nid_to_nid4(&lp->lp_primary_nid),
-                           &lp->lp_ping_mdh, nnis, lp,
+       rc = lnet_send_ping(&lp->lp_primary_nid, &lp->lp_ping_mdh, nnis, lp,
                            the_lnet.ln_dc_handler, false);
 
        /*
@@ -3555,7 +3572,7 @@ static int lnet_peer_send_push(struct lnet_peer *lp)
 __must_hold(&lp->lp_lock)
 {
        struct lnet_ping_buffer *pbuf;
-       struct lnet_process_id id;
+       struct lnet_processid id;
        struct lnet_md md;
        int cpt;
        int rc;
@@ -3602,13 +3619,13 @@ __must_hold(&lp->lp_lock)
        lnet_peer_addref_locked(lp);
        id.pid = LNET_PID_LUSTRE;
        if (!LNET_NID_IS_ANY(&lp->lp_disc_dst_nid))
-               id.nid = lnet_nid_to_nid4(&lp->lp_disc_dst_nid);
+               id.nid = lp->lp_disc_dst_nid;
        else
-               id.nid = lnet_nid_to_nid4(&lp->lp_primary_nid);
+               id.nid = lp->lp_primary_nid;
        lnet_net_unlock(cpt);
 
-       rc = LNetPut(lnet_nid_to_nid4(&lp->lp_disc_src_nid), lp->lp_push_mdh,
-                    LNET_ACK_REQ, id, LNET_RESERVED_PORTAL,
+       rc = LNetPut(&lp->lp_disc_src_nid, lp->lp_push_mdh,
+                    LNET_ACK_REQ, &id, LNET_RESERVED_PORTAL,
                     LNET_PROTO_PING_MATCHBITS, 0, 0);
 
        /*
@@ -3647,22 +3664,6 @@ fail_error:
 }
 
 /*
- * An unrecoverable error was encountered during discovery.
- * Set error status in peer and abort discovery.
- */
-static void lnet_peer_discovery_error(struct lnet_peer *lp, int error)
-{
-       CDEBUG(D_NET, "Discovery error %s: %d\n",
-              libcfs_nidstr(&lp->lp_primary_nid), error);
-
-       spin_lock(&lp->lp_lock);
-       lp->lp_dc_error = error;
-       lp->lp_state &= ~LNET_PEER_DISCOVERING;
-       lp->lp_state |= LNET_PEER_REDISCOVER;
-       spin_unlock(&lp->lp_lock);
-}
-
-/*
  * Wait for work to be queued or some other change that must be
  * attended to. Returns non-zero if the discovery thread should shut
  * down.
@@ -3739,12 +3740,12 @@ static void lnet_resend_msgs(void)
 
        list_for_each_entry_safe(msg, tmp, &resend, msg_list) {
                list_del_init(&msg->msg_list);
-               rc = lnet_send(msg->msg_src_nid_param, msg,
-                              msg->msg_rtr_nid_param);
+               rc = lnet_send(&msg->msg_src_nid_param, msg,
+                              &msg->msg_rtr_nid_param);
                if (rc < 0) {
                        CNETERR("Error sending %s to %s: %d\n",
                               lnet_msgtyp2str(msg->msg_type),
-                              libcfs_id2str(msg->msg_target), rc);
+                              libcfs_idstr(&msg->msg_target), rc);
                        lnet_finalize(msg, rc);
                }
        }
@@ -3840,17 +3841,22 @@ static int lnet_peer_discovery(void *arg)
                        CDEBUG(D_NET, "peer %s(%p) state %#x rc %d\n",
                                libcfs_nidstr(&lp->lp_primary_nid), lp,
                                lp->lp_state, rc);
-                       spin_unlock(&lp->lp_lock);
 
-                       lnet_net_lock(LNET_LOCK_EX);
                        if (rc == LNET_REDISCOVER_PEER) {
+                               spin_unlock(&lp->lp_lock);
+                               lnet_net_lock(LNET_LOCK_EX);
                                list_move(&lp->lp_dc_list,
                                          &the_lnet.ln_dc_request);
-                       } else if (rc) {
-                               lnet_peer_discovery_error(lp, rc);
+                       } else if (rc ||
+                                  !(lp->lp_state & LNET_PEER_DISCOVERING)) {
+                               spin_unlock(&lp->lp_lock);
+                               lnet_net_lock(LNET_LOCK_EX);
+                               lnet_peer_discovery_complete(lp, rc);
+                       } else {
+                               spin_unlock(&lp->lp_lock);
+                               lnet_net_lock(LNET_LOCK_EX);
                        }
-                       if (!(lp->lp_state & LNET_PEER_DISCOVERING))
-                               lnet_peer_discovery_complete(lp);
+
                        if (the_lnet.ln_dc_state == LNET_DC_STATE_STOPPING)
                                break;
 
@@ -3888,8 +3894,7 @@ static int lnet_peer_discovery(void *arg)
        while (!list_empty(&the_lnet.ln_dc_request)) {
                lp = list_first_entry(&the_lnet.ln_dc_request,
                                      struct lnet_peer, lp_dc_list);
-               lnet_peer_discovery_error(lp, -ESHUTDOWN);
-               lnet_peer_discovery_complete(lp);
+               lnet_peer_discovery_complete(lp, -ESHUTDOWN);
        }
        lnet_net_unlock(LNET_LOCK_EX);
 
@@ -3947,8 +3952,10 @@ void lnet_peer_discovery_stop(void)
        else
                wake_up(&the_lnet.ln_dc_waitq);
 
+       mutex_unlock(&the_lnet.ln_api_mutex);
        wait_event(the_lnet.ln_dc_waitq,
                   the_lnet.ln_dc_state == LNET_DC_STATE_SHUTDOWN);
+       mutex_lock(&the_lnet.ln_api_mutex);
 
        LASSERT(list_empty(&the_lnet.ln_dc_request));
        LASSERT(list_empty(&the_lnet.ln_dc_working));
@@ -4070,7 +4077,7 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk)
        __u32 size;
        int rc;
 
-       lp = lnet_find_peer(cfg->prcfg_prim_nid);
+       lp = lnet_find_peer4(cfg->prcfg_prim_nid);
 
        if (!lp) {
                rc = -ENOENT;