Whamcloud - gitweb
LU-13277 lnet: Discovery thread can deadlock on shutdown
[fs/lustre-release.git] / lnet / lnet / peer.c
index 8e3f8f6..fe4f028 100644 (file)
@@ -104,7 +104,7 @@ lnet_peer_tables_destroy(void)
                for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
                        LASSERT(list_empty(&hash[j]));
 
-               LIBCFS_FREE(hash, LNET_PEER_HASH_SIZE * sizeof(*hash));
+               CFS_FREE_PTR_ARRAY(hash, LNET_PEER_HASH_SIZE);
        }
 
        cfs_percpt_free(the_lnet.ln_peer_tables);
@@ -258,6 +258,10 @@ lnet_peer_alloc(lnet_nid_t nid)
        init_waitqueue_head(&lp->lp_dc_waitq);
        spin_lock_init(&lp->lp_lock);
        lp->lp_primary_nid = nid;
+       if (lnet_peers_start_down())
+               lp->lp_alive = false;
+       else
+               lp->lp_alive = true;
 
        /*
         * all peers created on a router should have health on
@@ -400,8 +404,6 @@ lnet_peer_ni_del_locked(struct lnet_peer_ni *lpni, bool force)
 
        /* decrement the ref count on the peer table */
        ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt];
-       LASSERT(ptable->pt_number > 0);
-       ptable->pt_number--;
 
        /*
         * The peer_ni can no longer be found with a lookup. But there
@@ -585,8 +587,7 @@ lnet_peer_ni_finalize_wait(struct lnet_peer_table *ptable)
                               "Waiting for %d zombies on peer table\n",
                               ptable->pt_zombies);
                }
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(cfs_time_seconds(1) >> 1);
+               schedule_timeout_uninterruptible(cfs_time_seconds(1) >> 1);
                spin_lock(&ptable->pt_zombie_lock);
        }
        spin_unlock(&ptable->pt_zombie_lock);
@@ -651,7 +652,8 @@ lnet_get_peer_ni_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
        struct list_head        *peers;
        struct lnet_peer_ni     *lp;
 
-       LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING);
+       if (the_lnet.ln_state != LNET_STATE_RUNNING)
+               return NULL;
 
        peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
        list_for_each_entry(lp, peers, lpni_hashlist) {
@@ -1047,7 +1049,7 @@ lnet_peer_add_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid)
 
        if (oldnids) {
                size = sizeof(*nids) * (lpni->lpni_pref_nnids - 1);
-               LIBCFS_FREE(oldnids, sizeof(*oldnids) * size);
+               CFS_FREE_PTR_ARRAY(oldnids, size);
        }
 out:
        if (rc == -EEXIST && (lpni->lpni_state & LNET_PEER_NI_NON_MR_PREF)) {
@@ -1127,7 +1129,7 @@ lnet_peer_del_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid)
 
        if (oldnids) {
                size = sizeof(*nids) * (lpni->lpni_pref_nnids + 1);
-               LIBCFS_FREE(oldnids, sizeof(*oldnids) * size);
+               CFS_FREE_PTR_ARRAY(oldnids, size);
        }
 out:
        CDEBUG(D_NET, "peer %s nid %s: %d\n",
@@ -1152,6 +1154,7 @@ lnet_peer_primary_nid_locked(lnet_nid_t nid)
 
 bool
 lnet_is_discovery_disabled_locked(struct lnet_peer *lp)
+__must_hold(&lp->lp_lock)
 {
        if (lnet_peer_discovery_disabled)
                return true;
@@ -1256,7 +1259,6 @@ lnet_peer_attach_peer_ni(struct lnet_peer *lp,
                ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt];
                list_add_tail(&lpni->lpni_hashlist, &ptable->pt_hash[hash]);
                ptable->pt_version++;
-               ptable->pt_number++;
                /* This is the 1st refcount on lpni. */
                atomic_inc(&lpni->lpni_refcount);
        }
@@ -1273,6 +1275,7 @@ lnet_peer_attach_peer_ni(struct lnet_peer *lp,
        /* Add peer_ni to peer_net */
        lpni->lpni_peer_net = lpn;
        list_add_tail(&lpni->lpni_peer_nis, &lpn->lpn_peer_nis);
+       lnet_update_peer_net_healthv(lpni);
        lnet_peer_net_addref_locked(lpn);
 
        /* Add peer_net to peer */
@@ -1527,11 +1530,7 @@ lnet_peer_ni_traffic_add(lnet_nid_t nid, lnet_nid_t pref)
        struct lnet_peer *lp;
        struct lnet_peer_net *lpn;
        struct lnet_peer_ni *lpni;
-       /*
-        * Assume peer is Multi-Rail capable and let discovery find out
-        * otherwise.
-        */
-       unsigned flags = LNET_PEER_MULTI_RAIL;
+       unsigned flags = 0;
        int rc = 0;
 
        if (nid == LNET_NID_ANY) {
@@ -1721,10 +1720,9 @@ lnet_destroy_peer_ni_locked(struct lnet_peer_ni *lpni)
        ptable->pt_zombies--;
        spin_unlock(&ptable->pt_zombie_lock);
 
-       if (lpni->lpni_pref_nnids > 1) {
-               LIBCFS_FREE(lpni->lpni_pref.nids,
-                       sizeof(*lpni->lpni_pref.nids) * lpni->lpni_pref_nnids);
-       }
+       if (lpni->lpni_pref_nnids > 1)
+               CFS_FREE_PTR_ARRAY(lpni->lpni_pref.nids, lpni->lpni_pref_nnids);
+
        LIBCFS_FREE(lpni, sizeof(*lpni));
 
        lnet_peer_net_decref_locked(lpn);
@@ -1844,6 +1842,17 @@ lnet_peer_gw_discovery(struct lnet_peer *lp)
        return rc;
 }
 
+bool
+lnet_peer_is_uptodate(struct lnet_peer *lp)
+{
+       bool rc;
+
+       spin_lock(&lp->lp_lock);
+       rc = lnet_peer_is_uptodate_locked(lp);
+       spin_unlock(&lp->lp_lock);
+       return rc;
+}
+
 /*
  * Is a peer uptodate from the point of view of discovery?
  *
@@ -1853,11 +1862,11 @@ lnet_peer_gw_discovery(struct lnet_peer *lp)
  * Otherwise look at whether the peer needs rediscovering.
  */
 bool
-lnet_peer_is_uptodate(struct lnet_peer *lp)
+lnet_peer_is_uptodate_locked(struct lnet_peer *lp)
+__must_hold(&lp->lp_lock)
 {
        bool rc;
 
-       spin_lock(&lp->lp_lock);
        if (lp->lp_state & (LNET_PEER_DISCOVERING |
                            LNET_PEER_FORCE_PING |
                            LNET_PEER_FORCE_PUSH)) {
@@ -1874,7 +1883,6 @@ lnet_peer_is_uptodate(struct lnet_peer *lp)
        } else {
                rc = false;
        }
-       spin_unlock(&lp->lp_lock);
 
        return rc;
 }
@@ -1915,9 +1923,7 @@ static void lnet_peer_discovery_complete(struct lnet_peer *lp)
 {
        struct lnet_msg *msg, *tmp;
        int rc = 0;
-       struct list_head pending_msgs;
-
-       INIT_LIST_HEAD(&pending_msgs);
+       LIST_HEAD(pending_msgs);
 
        CDEBUG(D_NET, "Discovery complete. Dequeue peer %s\n",
               libcfs_nid2str(lp->lp_primary_nid));
@@ -2301,20 +2307,6 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev)
 
 
        /*
-        * Only enable the multi-rail feature on the peer if both sides of
-        * the connection have discovery on
-        */
-       if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL) {
-               CDEBUG(D_NET, "Peer %s has Multi-Rail feature enabled\n",
-                      libcfs_nid2str(lp->lp_primary_nid));
-               lp->lp_state |= LNET_PEER_MULTI_RAIL;
-       } else {
-               CDEBUG(D_NET, "Peer %s has Multi-Rail feature disabled\n",
-                      libcfs_nid2str(lp->lp_primary_nid));
-               lp->lp_state &= ~LNET_PEER_MULTI_RAIL;
-       }
-
-       /*
         * The peer may have discovery disabled at its end. Set
         * NO_DISCOVERY as appropriate.
         */
@@ -2336,22 +2328,24 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev)
         */
        if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL) {
                if (lp->lp_state & LNET_PEER_MULTI_RAIL) {
-                       /* Everything's fine */
+                       CDEBUG(D_NET, "peer %s(%p) is MR\n",
+                              libcfs_nid2str(lp->lp_primary_nid), lp);
                } else if (lp->lp_state & LNET_PEER_CONFIGURED) {
                        CWARN("Reply says %s is Multi-Rail, DLC says not\n",
                              libcfs_nid2str(lp->lp_primary_nid));
+               } else if (lnet_peer_discovery_disabled) {
+                       CDEBUG(D_NET,
+                              "peer %s(%p) not MR: DD disabled locally\n",
+                              libcfs_nid2str(lp->lp_primary_nid), lp);
+               } else if (lp->lp_state & LNET_PEER_NO_DISCOVERY) {
+                       CDEBUG(D_NET,
+                              "peer %s(%p) not MR: DD disabled remotely\n",
+                              libcfs_nid2str(lp->lp_primary_nid), lp);
                } else {
-                       /*
-                        * if discovery is disabled then we don't want to
-                        * update the state of the peer. All we'll do is
-                        * update the peer_nis which were reported back in
-                        * the initial ping
-                        */
-
-                       if (!lnet_is_discovery_disabled_locked(lp)) {
-                               lp->lp_state |= LNET_PEER_MULTI_RAIL;
-                               lnet_peer_clr_non_mr_pref_nids(lp);
-                       }
+                       CDEBUG(D_NET, "peer %s(%p) is MR capable\n",
+                              libcfs_nid2str(lp->lp_primary_nid), lp);
+                       lp->lp_state |= LNET_PEER_MULTI_RAIL;
+                       lnet_peer_clr_non_mr_pref_nids(lp);
                }
        } else if (lp->lp_state & LNET_PEER_MULTI_RAIL) {
                if (lp->lp_state & LNET_PEER_CONFIGURED) {
@@ -2414,6 +2408,15 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev)
 out:
        lp->lp_state &= ~LNET_PEER_PING_SENT;
        spin_unlock(&lp->lp_lock);
+
+       lnet_net_lock(LNET_LOCK_EX);
+       /*
+        * If this peer is a gateway, call the routing callback to
+        * handle the ping reply
+        */
+       if (lp->lp_rtr_refcount > 0)
+               lnet_router_discovery_ping_reply(lp);
+       lnet_net_unlock(LNET_LOCK_EX);
 }
 
 /*
@@ -2579,10 +2582,10 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
                lp->lp_state &= ~LNET_PEER_ROUTER_ENABLED;
        spin_unlock(&lp->lp_lock);
 
-       nnis = MAX(lp->lp_nnis, pbuf->pb_info.pi_nnis);
-       LIBCFS_ALLOC(curnis, nnis * sizeof(*curnis));
-       LIBCFS_ALLOC(addnis, nnis * sizeof(*addnis));
-       LIBCFS_ALLOC(delnis, nnis * sizeof(*delnis));
+       nnis = max_t(int, lp->lp_nnis, pbuf->pb_info.pi_nnis);
+       CFS_ALLOC_PTR_ARRAY(curnis, nnis);
+       CFS_ALLOC_PTR_ARRAY(addnis, nnis);
+       CFS_ALLOC_PTR_ARRAY(delnis, nnis);
        if (!curnis || !addnis || !delnis) {
                rc = -ENOMEM;
                goto out;
@@ -2686,9 +2689,9 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
         */
        rc = 0;
 out:
-       LIBCFS_FREE(curnis, nnis * sizeof(*curnis));
-       LIBCFS_FREE(addnis, nnis * sizeof(*addnis));
-       LIBCFS_FREE(delnis, nnis * sizeof(*delnis));
+       CFS_FREE_PTR_ARRAY(curnis, nnis);
+       CFS_FREE_PTR_ARRAY(addnis, nnis);
+       CFS_FREE_PTR_ARRAY(delnis, nnis);
        lnet_ping_buffer_decref(pbuf);
        CDEBUG(D_NET, "peer %s (%p): %d\n", libcfs_nid2str(lp->lp_primary_nid), lp, rc);
 
@@ -2973,10 +2976,10 @@ __must_hold(&lp->lp_lock)
        pnid = lnet_peer_select_nid(lp);
        lnet_net_unlock(cpt);
 
-       nnis = MAX(lp->lp_data_nnis, LNET_INTERFACES_MIN);
+       nnis = max(lp->lp_data_nnis, LNET_INTERFACES_MIN);
 
        rc = lnet_send_ping(pnid, &lp->lp_ping_mdh, nnis, lp,
-                           the_lnet.ln_dc_eqh, false);
+                           the_lnet.ln_dc_eq, false);
 
        /*
         * if LNetMDBind in lnet_send_ping fails we need to decrement the
@@ -3068,7 +3071,7 @@ __must_hold(&lp->lp_lock)
        md.threshold = 2; /* Put/Ack */
        md.max_size  = 0;
        md.options   = 0;
-       md.eq_handle = the_lnet.ln_dc_eqh;
+       md.eq_handle = the_lnet.ln_dc_eq;
        md.user_ptr  = lp;
 
        rc = LNetMDBind(md, LNET_UNLINK, &lp->lp_push_mdh);
@@ -3248,11 +3251,9 @@ static int lnet_peer_discovery_wait_for_work(void)
 static void lnet_resend_msgs(void)
 {
        struct lnet_msg *msg, *tmp;
-       struct list_head resend;
+       LIST_HEAD(resend);
        int rc;
 
-       INIT_LIST_HEAD(&resend);
-
        spin_lock(&the_lnet.ln_msg_resend_lock);
        list_splice(&the_lnet.ln_msg_resend, &resend);
        spin_unlock(&the_lnet.ln_msg_resend_lock);
@@ -3276,8 +3277,9 @@ static int lnet_peer_discovery(void *arg)
        struct lnet_peer *lp;
        int rc;
 
+       wait_for_completion(&the_lnet.ln_started);
+
        CDEBUG(D_NET, "started\n");
-       cfs_block_allsigs();
 
        for (;;) {
                if (lnet_peer_discovery_wait_for_work())
@@ -3289,8 +3291,10 @@ static int lnet_peer_discovery(void *arg)
                        lnet_push_target_resize();
 
                lnet_net_lock(LNET_LOCK_EX);
-               if (the_lnet.ln_dc_state == LNET_DC_STATE_STOPPING)
+               if (the_lnet.ln_dc_state == LNET_DC_STATE_STOPPING) {
+                       lnet_net_unlock(LNET_LOCK_EX);
                        break;
+               }
 
                /*
                 * Process all incoming discovery work requests.  When
@@ -3385,7 +3389,7 @@ static int lnet_peer_discovery(void *arg)
 
        /* Queue cleanup 2: wait for the expired queue to clear. */
        while (!list_empty(&the_lnet.ln_dc_expired))
-               schedule_timeout(cfs_time_seconds(1));
+               schedule_timeout_uninterruptible(cfs_time_seconds(1));
 
        /* Queue cleanup 3: clear the request queue. */
        lnet_net_lock(LNET_LOCK_EX);
@@ -3397,8 +3401,8 @@ static int lnet_peer_discovery(void *arg)
        }
        lnet_net_unlock(LNET_LOCK_EX);
 
-       LNetEQFree(the_lnet.ln_dc_eqh);
-       LNetInvalidateEQHandle(&the_lnet.ln_dc_eqh);
+       LNetEQFree(the_lnet.ln_dc_eq);
+       the_lnet.ln_dc_eq = NULL;
 
        the_lnet.ln_dc_state = LNET_DC_STATE_SHUTDOWN;
        wake_up(&the_lnet.ln_dc_waitq);
@@ -3412,13 +3416,14 @@ static int lnet_peer_discovery(void *arg)
 int lnet_peer_discovery_start(void)
 {
        struct task_struct *task;
-       int rc;
+       int rc = 0;
 
        if (the_lnet.ln_dc_state != LNET_DC_STATE_SHUTDOWN)
                return -EALREADY;
 
-       rc = LNetEQAlloc(0, lnet_discovery_event_handler, &the_lnet.ln_dc_eqh);
-       if (rc != 0) {
+       the_lnet.ln_dc_eq = LNetEQAlloc(lnet_discovery_event_handler);
+       if (IS_ERR(the_lnet.ln_dc_eq)) {
+               rc = PTR_ERR(the_lnet.ln_dc_eq);
                CERROR("Can't allocate discovery EQ: %d\n", rc);
                return rc;
        }
@@ -3429,8 +3434,8 @@ int lnet_peer_discovery_start(void)
                rc = PTR_ERR(task);
                CERROR("Can't start peer discovery thread: %d\n", rc);
 
-               LNetEQFree(the_lnet.ln_dc_eqh);
-               LNetInvalidateEQHandle(&the_lnet.ln_dc_eqh);
+               LNetEQFree(the_lnet.ln_dc_eq);
+               the_lnet.ln_dc_eq = NULL;
 
                the_lnet.ln_dc_state = LNET_DC_STATE_SHUTDOWN;
        }
@@ -3448,7 +3453,14 @@ void lnet_peer_discovery_stop(void)
 
        LASSERT(the_lnet.ln_dc_state == LNET_DC_STATE_RUNNING);
        the_lnet.ln_dc_state = LNET_DC_STATE_STOPPING;
-       wake_up(&the_lnet.ln_dc_waitq);
+
+       /* In the LNetNIInit() path we may be stopping discovery before it
+        * entered its work loop
+        */
+       if (!completion_done(&the_lnet.ln_started))
+               complete(&the_lnet.ln_started);
+       else
+               wake_up(&the_lnet.ln_dc_waitq);
 
        wait_event(the_lnet.ln_dc_waitq,
                   the_lnet.ln_dc_state == LNET_DC_STATE_SHUTDOWN);
@@ -3690,7 +3702,7 @@ lnet_peer_ni_add_to_recoveryq_locked(struct lnet_peer_ni *lpni)
 
        if (list_empty(&lpni->lpni_recovery) &&
            atomic_read(&lpni->lpni_healthv) < LNET_MAX_HEALTH_VALUE) {
-               CERROR("lpni %s added to recovery queue. Health = %d\n",
+               CDEBUG(D_NET, "lpni %s added to recovery queue. Health = %d\n",
                        libcfs_nid2str(lpni->lpni_nid),
                        atomic_read(&lpni->lpni_healthv));
                list_add_tail(&lpni->lpni_recovery, &the_lnet.ln_mt_peerNIRecovq);