Whamcloud - gitweb
LU-11514 lnet: separate ni state from recovery
[fs/lustre-release.git] / lnet / lnet / lib-move.c
index d5f1132..e217e95 100644 (file)
@@ -957,10 +957,9 @@ lnet_post_send_locked(struct lnet_msg *msg, int do_send)
 
                CNETERR("Dropping message for %s: peer not alive\n",
                        libcfs_id2str(msg->msg_target));
-               if (do_send) {
-                       msg->msg_health_status = LNET_MSG_STATUS_LOCAL_DROPPED;
+               msg->msg_health_status = LNET_MSG_STATUS_LOCAL_DROPPED;
+               if (do_send)
                        lnet_finalize(msg, -EHOSTUNREACH);
-               }
 
                lnet_net_lock(cpt);
                return -EHOSTUNREACH;
@@ -1610,6 +1609,7 @@ lnet_handle_send(struct lnet_send_data *sd)
        __u32 send_case = sd->sd_send_case;
        int rc;
        __u32 routing = send_case & REMOTE_DST;
+        struct lnet_rsp_tracker *rspt;
 
        /*
         * Increment sequence number of the selected peer so that we
@@ -1702,6 +1702,19 @@ lnet_handle_send(struct lnet_send_data *sd)
                msg->msg_hdr.dest_nid = cpu_to_le64(msg->msg_txpeer->lpni_nid);
        }
 
+       /*
+        * if we have response tracker block update it with the next hop
+        * nid
+        */
+       if (msg->msg_md) {
+               rspt = msg->msg_md->md_rspt_ptr;
+               if (rspt) {
+                       rspt->rspt_next_hop_nid = msg->msg_txpeer->lpni_nid;
+                       CDEBUG(D_NET, "rspt_next_hop_nid = %s\n",
+                              libcfs_nid2str(rspt->rspt_next_hop_nid));
+               }
+       }
+
        rc = lnet_post_send_locked(msg, 0);
 
        if (!rc)
@@ -2734,6 +2747,9 @@ lnet_finalize_expired_responses(bool force)
 
                        if (ktime_compare(ktime_get(), rspt->rspt_deadline) >= 0 ||
                            force) {
+                               struct lnet_peer_ni *lpni;
+                               lnet_nid_t nid;
+
                                md = lnet_handle2md(&rspt->rspt_mdh);
                                if (!md) {
                                        LNetInvalidateMDHandle(&rspt->rspt_mdh);
@@ -2752,9 +2768,25 @@ lnet_finalize_expired_responses(bool force)
 
                                list_del_init(&rspt->rspt_on_list);
 
-                               CNETERR("Response timed out: md = %p\n", md);
+                               nid = rspt->rspt_next_hop_nid;
+
+                               CNETERR("Response timed out: md = %p: nid = %s\n",
+                                       md, libcfs_nid2str(nid));
                                LNetMDUnlink(rspt->rspt_mdh);
                                lnet_rspt_free(rspt, i);
+
+                               /*
+                                * If there is a timeout on the response
+                                * from the next hop decrement its health
+                                * value so that we don't use it
+                                */
+                               lnet_net_lock(0);
+                               lpni = lnet_find_peer_ni_locked(nid);
+                               if (lpni) {
+                                       lnet_handle_remote_failure_locked(lpni);
+                                       lnet_peer_ni_decref_locked(lpni);
+                               }
+                               lnet_net_unlock(0);
                        } else {
                                lnet_res_unlock(i);
                                break;
@@ -2852,13 +2884,14 @@ lnet_resend_pending_msgs(void)
 
 /* called with cpt and ni_lock held */
 static void
-lnet_unlink_ni_recovery_mdh_locked(struct lnet_ni *ni, int cpt)
+lnet_unlink_ni_recovery_mdh_locked(struct lnet_ni *ni, int cpt, bool force)
 {
        struct lnet_handle_md recovery_mdh;
 
        LNetInvalidateMDHandle(&recovery_mdh);
 
-       if (ni->ni_state & LNET_NI_STATE_RECOVERY_PENDING) {
+       if (ni->ni_recovery_state & LNET_NI_RECOVERY_PENDING ||
+           force) {
                recovery_mdh = ni->ni_ping_mdh;
                LNetInvalidateMDHandle(&ni->ni_ping_mdh);
        }
@@ -2911,15 +2944,26 @@ lnet_recover_local_nis(void)
 
                lnet_net_lock(0);
                lnet_ni_lock(ni);
-               if (!(ni->ni_state & LNET_NI_STATE_ACTIVE) ||
+               if (ni->ni_state != LNET_NI_STATE_ACTIVE ||
                    healthv == LNET_MAX_HEALTH_VALUE) {
                        list_del_init(&ni->ni_recovery);
-                       lnet_unlink_ni_recovery_mdh_locked(ni, 0);
+                       lnet_unlink_ni_recovery_mdh_locked(ni, 0, false);
                        lnet_ni_unlock(ni);
                        lnet_ni_decref_locked(ni, 0);
                        lnet_net_unlock(0);
                        continue;
                }
+
+               /*
+                * if the local NI failed recovery we must unlink the md.
+                * But we want to keep the local_ni on the recovery queue
+                * so we can continue the attempts to recover it.
+                */
+               if (ni->ni_recovery_state & LNET_NI_RECOVERY_FAILED) {
+                       lnet_unlink_ni_recovery_mdh_locked(ni, 0, true);
+                       ni->ni_recovery_state &= ~LNET_NI_RECOVERY_FAILED;
+               }
+
                lnet_ni_unlock(ni);
                lnet_net_unlock(0);
 
@@ -2928,8 +2972,8 @@ lnet_recover_local_nis(void)
                       libcfs_nid2str(ni->ni_nid));
 
                lnet_ni_lock(ni);
-               if (!(ni->ni_state & LNET_NI_STATE_RECOVERY_PENDING)) {
-                       ni->ni_state |= LNET_NI_STATE_RECOVERY_PENDING;
+               if (!(ni->ni_recovery_state & LNET_NI_RECOVERY_PENDING)) {
+                       ni->ni_recovery_state |= LNET_NI_RECOVERY_PENDING;
                        lnet_ni_unlock(ni);
 
                        LIBCFS_ALLOC(ev_info, sizeof(*ev_info));
@@ -2937,7 +2981,8 @@ lnet_recover_local_nis(void)
                                CERROR("out of memory. Can't recover %s\n",
                                       libcfs_nid2str(ni->ni_nid));
                                lnet_ni_lock(ni);
-                               ni->ni_state &= ~LNET_NI_STATE_RECOVERY_PENDING;
+                               ni->ni_recovery_state &=
+                                 ~LNET_NI_RECOVERY_PENDING;
                                lnet_ni_unlock(ni);
                                continue;
                        }
@@ -3009,7 +3054,7 @@ lnet_recover_local_nis(void)
 
                        lnet_ni_lock(ni);
                        if (rc)
-                               ni->ni_state &= ~LNET_NI_STATE_RECOVERY_PENDING;
+                               ni->ni_recovery_state &= ~LNET_NI_RECOVERY_PENDING;
                }
                lnet_ni_unlock(ni);
        }
@@ -3073,7 +3118,7 @@ lnet_clean_local_ni_recoveryq(void)
                                struct lnet_ni, ni_recovery);
                list_del_init(&ni->ni_recovery);
                lnet_ni_lock(ni);
-               lnet_unlink_ni_recovery_mdh_locked(ni, 0);
+               lnet_unlink_ni_recovery_mdh_locked(ni, 0, true);
                lnet_ni_unlock(ni);
                lnet_ni_decref_locked(ni, 0);
        }
@@ -3082,13 +3127,14 @@ lnet_clean_local_ni_recoveryq(void)
 }
 
 static void
-lnet_unlink_lpni_recovery_mdh_locked(struct lnet_peer_ni *lpni, int cpt)
+lnet_unlink_lpni_recovery_mdh_locked(struct lnet_peer_ni *lpni, int cpt,
+                                    bool force)
 {
        struct lnet_handle_md recovery_mdh;
 
        LNetInvalidateMDHandle(&recovery_mdh);
 
-       if (lpni->lpni_state & LNET_PEER_NI_RECOVERY_PENDING) {
+       if (lpni->lpni_state & LNET_PEER_NI_RECOVERY_PENDING || force) {
                recovery_mdh = lpni->lpni_recovery_ping_mdh;
                LNetInvalidateMDHandle(&lpni->lpni_recovery_ping_mdh);
        }
@@ -3111,7 +3157,7 @@ lnet_clean_peer_ni_recoveryq(void)
                                 lpni_recovery) {
                list_del_init(&lpni->lpni_recovery);
                spin_lock(&lpni->lpni_lock);
-               lnet_unlink_lpni_recovery_mdh_locked(lpni, LNET_LOCK_EX);
+               lnet_unlink_lpni_recovery_mdh_locked(lpni, LNET_LOCK_EX, true);
                spin_unlock(&lpni->lpni_lock);
                lnet_peer_ni_decref_locked(lpni);
        }
@@ -3179,12 +3225,23 @@ lnet_recover_peer_nis(void)
                if (lpni->lpni_state & LNET_PEER_NI_DELETING ||
                    healthv == LNET_MAX_HEALTH_VALUE) {
                        list_del_init(&lpni->lpni_recovery);
-                       lnet_unlink_lpni_recovery_mdh_locked(lpni, 0);
+                       lnet_unlink_lpni_recovery_mdh_locked(lpni, 0, false);
                        spin_unlock(&lpni->lpni_lock);
                        lnet_peer_ni_decref_locked(lpni);
                        lnet_net_unlock(0);
                        continue;
                }
+
+               /*
+                * If the peer NI has failed recovery we must unlink the
+                * md. But we want to keep the peer ni on the recovery
+                * queue so we can try to continue recovering it
+                */
+               if (lpni->lpni_state & LNET_PEER_NI_RECOVERY_FAILED) {
+                       lnet_unlink_lpni_recovery_mdh_locked(lpni, 0, true);
+                       lpni->lpni_state &= ~LNET_PEER_NI_RECOVERY_FAILED;
+               }
+
                spin_unlock(&lpni->lpni_lock);
                lnet_net_unlock(0);
 
@@ -3269,7 +3326,10 @@ lnet_recover_peer_nis(void)
 static int
 lnet_monitor_thread(void *arg)
 {
-       int wakeup_counter = 0;
+       time64_t recovery_timeout = 0;
+       time64_t rsp_timeout = 0;
+       int interval;
+       time64_t now;
 
        /*
         * The monitor thread takes care of the following:
@@ -3284,20 +3344,23 @@ lnet_monitor_thread(void *arg)
        cfs_block_allsigs();
 
        while (the_lnet.ln_mt_state == LNET_MT_STATE_RUNNING) {
+               now = ktime_get_real_seconds();
+
                if (lnet_router_checker_active())
                        lnet_check_routers();
 
                lnet_resend_pending_msgs();
 
-               wakeup_counter++;
-               if (wakeup_counter >= lnet_transaction_timeout / 2) {
+               if (now >= rsp_timeout) {
                        lnet_finalize_expired_responses(false);
-                       wakeup_counter = 0;
+                       rsp_timeout = now + (lnet_transaction_timeout / 2);
                }
 
-               lnet_recover_local_nis();
-
-               lnet_recover_peer_nis();
+               if (now >= recovery_timeout) {
+                       lnet_recover_local_nis();
+                       lnet_recover_peer_nis();
+                       recovery_timeout = now + lnet_recovery_interval;
+               }
 
                /*
                 * TODO do we need to check if we should sleep without
@@ -3308,9 +3371,11 @@ lnet_monitor_thread(void *arg)
                 * cases where we get a complaint that an idle thread
                 * is waking up unnecessarily.
                 */
+               interval = min(lnet_recovery_interval,
+                              lnet_transaction_timeout / 2);
                wait_event_interruptible_timeout(the_lnet.ln_mt_waitq,
                                                false,
-                                               cfs_time_seconds(1));
+                                               cfs_time_seconds(interval));
        }
 
        /* clean up the router checker */
@@ -3404,12 +3469,15 @@ lnet_handle_recovery_reply(struct lnet_mt_event_info *ev_info,
                        return;
                }
                lnet_ni_lock(ni);
-               ni->ni_state &= ~LNET_NI_STATE_RECOVERY_PENDING;
+               ni->ni_recovery_state &= ~LNET_NI_RECOVERY_PENDING;
+               if (status)
+                       ni->ni_recovery_state |= LNET_NI_RECOVERY_FAILED;
                lnet_ni_unlock(ni);
                lnet_net_unlock(0);
 
                if (status != 0) {
-                       CERROR("local NI recovery failed with %d\n", status);
+                       CERROR("local NI (%s) recovery failed with %d\n",
+                              libcfs_nid2str(nid), status);
                        return;
                }
                /*
@@ -3432,12 +3500,15 @@ lnet_handle_recovery_reply(struct lnet_mt_event_info *ev_info,
                }
                spin_lock(&lpni->lpni_lock);
                lpni->lpni_state &= ~LNET_PEER_NI_RECOVERY_PENDING;
+               if (status)
+                       lpni->lpni_state |= LNET_PEER_NI_RECOVERY_FAILED;
                spin_unlock(&lpni->lpni_lock);
                lnet_peer_ni_decref_locked(lpni);
                lnet_net_unlock(cpt);
 
                if (status != 0)
-                       CERROR("peer NI recovery failed with %d\n", status);
+                       CERROR("peer NI (%s) recovery failed with %d\n",
+                              libcfs_nid2str(nid), status);
        }
 }
 
@@ -3467,6 +3538,7 @@ lnet_mt_event_handler(struct lnet_event *event)
                               libcfs_nid2str(ev_info->mt_nid),
                               (event->status) ? "unsuccessfully" :
                               "successfully", event->status);
+               lnet_handle_recovery_reply(ev_info, event->status);
                break;
        default:
                CERROR("Unexpected event: %d\n", event->type);