Whamcloud - gitweb
Branch HEAD
authordzogin <dzogin>
Tue, 25 Aug 2009 00:20:06 +0000 (00:20 +0000)
committerdzogin <dzogin>
Tue, 25 Aug 2009 00:20:06 +0000 (00:20 +0000)
 b=19566
 i=oleg.drokin
 i=nathan.rutman
 Modified Files:
  lustre/ChangeLog lustre/obdclass/obd_mount.c
Bugzilla   : 19566
Description: Prevent inconsistences between linux and lustre mount structures.
Details    : Wait indefinitely in server_wait_finished() until mnt_count drops.
     Make the sleep interruptible.

lustre/ChangeLog
lustre/obdclass/obd_mount.c

index fd386f3..6c1c647 100644 (file)
@@ -13,6 +13,12 @@ tbd  Sun Microsystems, Inc.
         removed cwd "./" (refer to Bugzilla 14399).
        * File join has been disabled in this release, refer to Bugzilla 16929.
 
+Severity   : normal
+Bugzilla   : 19566
+Description: Prevent inconsistences between linux and lustre mount structures.
+Details    : Wait indefinitely in server_wait_finished() until mnt_count drops.
+            Make the sleep interruptible.
+
 Severity   : enhancement
 Bugzilla   : 19955
 Description: provide server to client comms path
index 72b5128..b6e68f7 100644 (file)
@@ -1373,28 +1373,42 @@ out_free:
         RETURN(ERR_PTR(rc));
 }
 
+/* Wait here forever until the mount refcount is 0 before completing umount,
+ * else we risk dereferencing a null pointer.
+ * LNET may take e.g. 165s before killing zombies.
+ */
 static void server_wait_finished(struct vfsmount *mnt)
 {
-        wait_queue_head_t   waitq;
-        struct l_wait_info  lwi;
-        int                 retries = 330;
-
-        init_waitqueue_head(&waitq);
-
-        while ((atomic_read(&mnt->mnt_count) > 1) && (retries > 0)) {
-                LCONSOLE_WARN("%s: Mount still busy with %d refs, waiting for "
-                              "%d secs...\n", mnt->mnt_devname,
-                              atomic_read(&mnt->mnt_count), retries);
-
-                /* Wait for a bit */
-                retries -= 5;
-                lwi = LWI_TIMEOUT(5 * HZ, NULL, NULL);
-                l_wait_event(waitq, 0, &lwi);
-        }
-        if (atomic_read(&mnt->mnt_count) > 1) {
-                CERROR("%s: Mount still busy (%d refs), giving up.\n",
-                       mnt->mnt_devname, atomic_read(&mnt->mnt_count));
-        }
+       cfs_waitq_t             waitq;
+       int                     rc, waited = 0;
+       cfs_sigset_t            blocked;
+
+       cfs_waitq_init(&waitq);
+
+       while (cfs_atomic_read(&mnt->mnt_count) > 1) {
+               if (waited && (waited % 30 == 0))
+                       LCONSOLE_WARN("Mount still busy with %d refs after "
+                                      "%d secs.\n",
+                                      atomic_read(&mnt->mnt_count),
+                                      waited);
+               /* Cannot use l_event_wait() for an interruptible sleep. */
+               waited += 3;
+               blocked = l_w_e_set_sigs(sigmask(SIGKILL));
+               cfs_waitq_wait_event_interruptible_timeout(
+                       waitq,
+                       (cfs_atomic_read(&mnt->mnt_count) == 1),
+                       cfs_time_seconds(3),
+                       rc);
+               cfs_block_sigs(blocked);
+               if (rc < 0) {
+                       LCONSOLE_EMERG("Danger: interrupted umount %s with "
+                                      "%d refs!\n",
+                                     mnt->mnt_devname,
+                                      atomic_read(&mnt->mnt_count));
+                       break;
+               }
+
+       }
 }
 
 static void server_put_super(struct super_block *sb)