Whamcloud - gitweb
LU-11273 lnet: update logging 44/33044/2
authorAmir Shehata <ashehata@whamcloud.com>
Tue, 21 Aug 2018 19:29:27 +0000 (12:29 -0700)
committerOleg Drokin <green@whamcloud.com>
Tue, 4 Sep 2018 03:48:51 +0000 (03:48 +0000)
Add the retry count when logging message sending/resending.
Make timed out responses visible on net error.
Log cases when a message is not resent

Test-Parameters: trivial
Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Change-Id: I0908d495c8ba54754fa77b0fc3b5df59317bb2e8
Reviewed-on: https://review.whamcloud.com/33044
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Olaf Weber <olaf.weber@hpe.com>
Reviewed-by: Doug Oucharek <dougso@me.com>
Reviewed-by: Sonia Sharma <sharmaso@whamcloud.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/lnet/lib-move.c
lnet/lnet/lib-msg.c

index eee5901..c572751 100644 (file)
@@ -1704,14 +1704,14 @@ lnet_handle_send(struct lnet_send_data *sd)
        rc = lnet_post_send_locked(msg, 0);
 
        if (!rc)
-               CDEBUG(D_NET, "TRACE: %s(%s:%s) -> %s(%s:%s) : %s\n",
+               CDEBUG(D_NET, "TRACE: %s(%s:%s) -> %s(%s:%s) : %s try# %d\n",
                       libcfs_nid2str(msg->msg_hdr.src_nid),
                       libcfs_nid2str(msg->msg_txni->ni_nid),
                       libcfs_nid2str(sd->sd_src_nid),
                       libcfs_nid2str(msg->msg_hdr.dest_nid),
                       libcfs_nid2str(sd->sd_dst_nid),
                       libcfs_nid2str(msg->msg_txpeer->lpni_nid),
-                      lnet_msgtyp2str(msg->msg_type));
+                      lnet_msgtyp2str(msg->msg_type), msg->msg_retry_count);
 
        return rc;
 }
@@ -2751,7 +2751,7 @@ lnet_finalize_expired_responses(bool force)
 
                                list_del_init(&rspt->rspt_on_list);
 
-                               CDEBUG(D_NET, "Response timed out: md = %p\n", md);
+                               CNETERR("Response timed out: md = %p\n", md);
                                LNetMDUnlink(rspt->rspt_mdh);
                                lnet_rspt_free(rspt, i);
                        } else {
@@ -2816,11 +2816,12 @@ lnet_resend_pending_msgs_locked(struct list_head *resendq, int cpt)
                        lnet_peer_ni_decref_locked(lpni);
 
                        lnet_net_unlock(cpt);
-                       CDEBUG(D_NET, "resending %s->%s: %s recovery %d\n",
+                       CDEBUG(D_NET, "resending %s->%s: %s recovery %d try# %d\n",
                               libcfs_nid2str(src_nid),
                               libcfs_id2str(msg->msg_target),
                               lnet_msgtyp2str(msg->msg_type),
-                              msg->msg_recovery);
+                              msg->msg_recovery,
+                              msg->msg_retry_count);
                        rc = lnet_send(src_nid, msg, LNET_NID_ANY);
                        if (rc) {
                                CERROR("Error sending %s to %s: %d\n",
index 8fed27f..3bd6946 100644 (file)
@@ -691,19 +691,34 @@ lnet_health_check(struct lnet_msg *msg)
 
 resend:
        /* don't resend recovery messages */
-       if (msg->msg_recovery)
+       if (msg->msg_recovery) {
+               CDEBUG(D_NET, "msg %s->%s is a recovery ping. retry# %d\n",
+                       libcfs_nid2str(msg->msg_from),
+                       libcfs_nid2str(msg->msg_target.nid),
+                       msg->msg_retry_count);
                return -1;
+       }
 
        /*
         * if we explicitly indicated we don't want to resend then just
         * return
         */
-       if (msg->msg_no_resend)
+       if (msg->msg_no_resend) {
+               CDEBUG(D_NET, "msg %s->%s requested no resend. retry# %d\n",
+                       libcfs_nid2str(msg->msg_from),
+                       libcfs_nid2str(msg->msg_target.nid),
+                       msg->msg_retry_count);
                return -1;
+       }
 
        /* check if the message has exceeded the number of retries */
-       if (msg->msg_retry_count >= lnet_retry_count)
+       if (msg->msg_retry_count >= lnet_retry_count) {
+               CNETERR("msg %s->%s exceeded retry count %d\n",
+                       libcfs_nid2str(msg->msg_from),
+                       libcfs_nid2str(msg->msg_target.nid),
+                       msg->msg_retry_count);
                return -1;
+       }
        msg->msg_retry_count++;
 
        lnet_net_lock(msg->msg_tx_cpt);