Whamcloud - gitweb
LU-4780 lnet: NI shutdown may loop forever 89/10489/4
authorLiang Zhen <liang.zhen@intel.com>
Tue, 18 Mar 2014 12:55:28 +0000 (20:55 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 11 Jun 2014 19:35:10 +0000 (19:35 +0000)
lnet_shutdown_lndnis() may enter endless loop if there is a busy NI,
this is injected by LNet SMP improvements. It's fixed in this patch.

Lustre-change: http://review.whamcloud.com/9706
Lustre-commit: 3efa31eacc612261ea3019fbd7243e573714042c

Signed-off-by: Liang Zhen <liang.zhen@intel.com>
Reviewed-by: Isaac Huang <he.huang@intel.com>
Reviewed-by: Bobi Jam <bobijam@gmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
Change-Id: If6bf6a8362d8808cd924d6fe79dbe43151f41f30
Reviewed-on: http://review.whamcloud.com/10489
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
lnet/lnet/api-ni.c

index eb220b3..317b254 100644 (file)
@@ -1100,7 +1100,7 @@ lnet_shutdown_lndnis (void)
                int     j;
 
                ni = cfs_list_entry(the_lnet.ln_nis_zombie.next,
-                                   lnet_ni_t, ni_list);
+                               lnet_ni_t, ni_list);
                cfs_list_del_init(&ni->ni_list);
                cfs_percpt_for_each(ref, j, ni->ni_refs) {
                        if (*ref == 0)
@@ -1110,12 +1110,11 @@ lnet_shutdown_lndnis (void)
                        break;
                }
 
-               while (!cfs_list_empty(&ni->ni_list)) {
+               if (!cfs_list_empty(&ni->ni_list)) {
                        lnet_net_unlock(LNET_LOCK_EX);
                        ++i;
                        if ((i & (-i)) == i) {
-                               CDEBUG(D_WARNING,
-                                      "Waiting for zombie LNI %s\n",
+                               CDEBUG(D_WARNING, "Waiting for zombie LNI %s\n",
                                       libcfs_nid2str(ni->ni_nid));
                        }
                        cfs_pause(cfs_time_seconds(1));
@@ -1134,11 +1133,13 @@ lnet_shutdown_lndnis (void)
                /* can't deref lnd anymore now; it might have unregistered
                 * itself...  */
 
-                if (!islo)
-                        CDEBUG(D_LNI, "Removed LNI %s\n",
-                               libcfs_nid2str(ni->ni_nid));
+               if (!islo)
+                       CDEBUG(D_LNI, "Removed LNI %s\n",
+                              libcfs_nid2str(ni->ni_nid));
 
                lnet_ni_free(ni);
+               i = 2;
+
                lnet_net_lock(LNET_LOCK_EX);
        }