Whamcloud - gitweb
LU-4780 lnet: NI shutdown may loop forever 06/9706/2
authorLiang Zhen <liang.zhen@intel.com>
Tue, 18 Mar 2014 12:55:28 +0000 (20:55 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 25 Mar 2014 23:51:18 +0000 (23:51 +0000)
lnet_shutdown_lndnis() may enter endless loop if there is a busy NI,
this is injected by LNet SMP improvements. It's fixed in this patch.

Signed-off-by: Liang Zhen <liang.zhen@intel.com>
Change-Id: I612ab9ff15aa40c05d6bd4cfee7cd107715f41b4
Reviewed-on: http://review.whamcloud.com/9706
Tested-by: Jenkins
Reviewed-by: Isaac Huang <he.huang@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Bobi Jam <bobijam@gmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lnet/lnet/api-ni.c

index d233ba6..2d5f0f3 100644 (file)
@@ -1095,27 +1095,26 @@ lnet_shutdown_lndnis (void)
        /* Now wait for the NI's I just nuked to show up on ln_zombie_nis
         * and shut them down in guaranteed thread context */
        i = 2;
        /* Now wait for the NI's I just nuked to show up on ln_zombie_nis
         * and shut them down in guaranteed thread context */
        i = 2;
-       while (!cfs_list_empty(&the_lnet.ln_nis_zombie)) {
+       while (!list_empty(&the_lnet.ln_nis_zombie)) {
                int     *ref;
                int     j;
 
                int     *ref;
                int     j;
 
-               ni = cfs_list_entry(the_lnet.ln_nis_zombie.next,
-                                   lnet_ni_t, ni_list);
-               cfs_list_del_init(&ni->ni_list);
+               ni = list_entry(the_lnet.ln_nis_zombie.next,
+                               lnet_ni_t, ni_list);
+               list_del_init(&ni->ni_list);
                cfs_percpt_for_each(ref, j, ni->ni_refs) {
                        if (*ref == 0)
                                continue;
                        /* still busy, add it back to zombie list */
                cfs_percpt_for_each(ref, j, ni->ni_refs) {
                        if (*ref == 0)
                                continue;
                        /* still busy, add it back to zombie list */
-                       cfs_list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
+                       list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
                        break;
                }
 
                        break;
                }
 
-               while (!cfs_list_empty(&ni->ni_list)) {
+               if (!list_empty(&ni->ni_list)) {
                        lnet_net_unlock(LNET_LOCK_EX);
                        ++i;
                        if ((i & (-i)) == i) {
                        lnet_net_unlock(LNET_LOCK_EX);
                        ++i;
                        if ((i & (-i)) == i) {
-                               CDEBUG(D_WARNING,
-                                      "Waiting for zombie LNI %s\n",
+                               CDEBUG(D_WARNING, "Waiting for zombie LNI %s\n",
                                       libcfs_nid2str(ni->ni_nid));
                        }
                        cfs_pause(cfs_time_seconds(1));
                                       libcfs_nid2str(ni->ni_nid));
                        }
                        cfs_pause(cfs_time_seconds(1));
@@ -1134,11 +1133,13 @@ lnet_shutdown_lndnis (void)
                /* can't deref lnd anymore now; it might have unregistered
                 * itself...  */
 
                /* can't deref lnd anymore now; it might have unregistered
                 * itself...  */
 
-                if (!islo)
-                        CDEBUG(D_LNI, "Removed LNI %s\n",
-                               libcfs_nid2str(ni->ni_nid));
+               if (!islo)
+                       CDEBUG(D_LNI, "Removed LNI %s\n",
+                              libcfs_nid2str(ni->ni_nid));
 
                lnet_ni_free(ni);
 
                lnet_ni_free(ni);
+               i = 2;
+
                lnet_net_lock(LNET_LOCK_EX);
        }
 
                lnet_net_lock(LNET_LOCK_EX);
        }