Whamcloud - gitweb
LU-56 lnet: eliminate a few locking dance in LNet
authorLiang Zhen <liang@whamcloud.com>
Tue, 29 May 2012 07:34:13 +0000 (15:34 +0800)
committerAndreas Dilger <adilger@whamcloud.com>
Thu, 21 Jun 2012 03:05:34 +0000 (23:05 -0400)
This patch gets rid of a few unnecessary locking dance from
LNet, also changed type of lnet_ni_t::ni_last_alive from jiffy to
second because per-second checking should be good enough.

Signed-off-by: Liang Zhen <liang@whamcloud.com>
Change-Id: I0e3269f0a5ae0cfecd9611584d5e213a670e38f6
Reviewed-on: http://review.whamcloud.com/2933
Tested-by: Hudson
Reviewed-by: Doug Oucharek <doug@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Bobi Jam <bobijam@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lnet/include/lnet/lib-types.h
lnet/lnet/api-ni.c
lnet/lnet/lib-move.c
lnet/lnet/router.c

index 8febd47..2e5133a 100644 (file)
@@ -401,9 +401,11 @@ typedef struct lnet_ni {
         void             *ni_data;              /* instance-specific data */
         lnd_t            *ni_lnd;               /* procedural interface */
         int               ni_refcount;          /* reference count */
-        cfs_time_t        ni_last_alive;        /* when I was last alive */
-        lnet_ni_status_t *ni_status;            /* my health status */
-        char             *ni_interfaces[LNET_MAX_INTERFACES]; /* equivalent interfaces to use */
+       /* when I was last alive */
+       long                    ni_last_alive;
+       lnet_ni_status_t        *ni_status;     /* my health status */
+       /* equivalent interfaces to use */
+       char                    *ni_interfaces[LNET_MAX_INTERFACES];
 } lnet_ni_t;
 
 #define LNET_PROTO_PING_MATCHBITS     0x8000000000000000LL
index 4912be7..f2b4b2b 100644 (file)
@@ -1182,7 +1182,7 @@ LNetNIInit(lnet_pid_t requested_pid)
         /* Now I may use my own API functions... */
 
         /* NB router checker needs the_lnet.ln_ping_info in
-         * lnet_router_checker -> lnet_update_ni_status */
+        * lnet_router_checker -> lnet_update_ni_status_locked */
         rc = lnet_ping_target_init();
         if (rc != 0)
                 goto failed3;
index c760a92..96fb6db 100644 (file)
@@ -1854,17 +1854,16 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
                 return -EPROTO;
         }
 
-        if (the_lnet.ln_routing) {
-                cfs_time_t now = cfs_time_current();
-
-                LNET_LOCK();
-
-                ni->ni_last_alive = now;
-                if (ni->ni_status != NULL &&
-                    ni->ni_status->ns_status == LNET_NI_STATUS_DOWN)
-                        ni->ni_status->ns_status = LNET_NI_STATUS_UP;
+       if (the_lnet.ln_routing &&
+           ni->ni_last_alive != cfs_time_current_sec()) {
+               LNET_LOCK();
 
-                LNET_UNLOCK();
+               /* NB: so far here is the only place to set NI status to "up */
+               ni->ni_last_alive = cfs_time_current_sec();
+               if (ni->ni_status != NULL &&
+                   ni->ni_status->ns_status == LNET_NI_STATUS_DOWN)
+                       ni->ni_status->ns_status = LNET_NI_STATUS_UP;
+               LNET_UNLOCK();
         }
 
         /* Regard a bad destination NID as a protocol error.  Senders should
index 08bb57c..86d24ad 100644 (file)
@@ -736,40 +736,35 @@ lnet_wait_known_routerstate(void)
 }
 
 void
-lnet_update_ni_status(void)
+lnet_update_ni_status_locked(void)
 {
-        cfs_time_t now = cfs_time_current();
-        lnet_ni_t *ni;
-        int        status;
-        int        timeout;
-
-        LASSERT (the_lnet.ln_routing);
+       lnet_ni_t       *ni;
+       long            now;
+       int             timeout;
 
-        timeout = router_ping_timeout +
-                  MAX(live_router_check_interval, dead_router_check_interval);
+       LASSERT(the_lnet.ln_routing);
 
-        LNET_LOCK();
+       timeout = router_ping_timeout +
+                 MAX(live_router_check_interval, dead_router_check_interval);
 
-        cfs_list_for_each_entry (ni, &the_lnet.ln_nis, ni_list) {
-                lnet_ni_status_t *ns = ni->ni_status;
+       now = cfs_time_current_sec();
+       cfs_list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
+               if (ni->ni_lnd->lnd_type == LOLND)
+                       continue;
 
-                LASSERT (ns != NULL);
+               if (now < ni->ni_last_alive + timeout)
+                       continue;
 
-                status = LNET_NI_STATUS_UP;
-                if (ni->ni_lnd->lnd_type != LOLND &&  /* @lo forever alive */
-                    cfs_time_after(now, cfs_time_add(ni->ni_last_alive,
-                                                     cfs_time_seconds(timeout))))
-                        status = LNET_NI_STATUS_DOWN;
+               LASSERT(ni->ni_status != NULL);
 
-                if (ns->ns_status != status) {
-                        ns->ns_status = status;
-                        CDEBUG(D_NET, "NI(%s:%d) status changed to %s\n",
-                               libcfs_nid2str(ni->ni_nid), timeout,
-                               status == LNET_NI_STATUS_UP ? "up" : "down");
-                }
-        }
-
-        LNET_UNLOCK();
+               if (ni->ni_status->ns_status != LNET_NI_STATUS_DOWN) {
+                       CDEBUG(D_NET, "NI(%s:%d) status changed to down\n",
+                              libcfs_nid2str(ni->ni_nid), timeout);
+                       /* NB: so far, this is the only place to set
+                        * NI status to "down" */
+                       ni->ni_status->ns_status = LNET_NI_STATUS_DOWN;
+               }
+       }
 }
 
 void
@@ -1196,10 +1191,10 @@ rescan:
                         }
                 }
 
-                LNET_UNLOCK();
+               if (the_lnet.ln_routing)
+                       lnet_update_ni_status_locked();
 
-                if (the_lnet.ln_routing)
-                        lnet_update_ni_status();
+               LNET_UNLOCK();
 
                lnet_prune_rc_data(0); /* don't wait for UNLINK */