From bf04dd4aa56b516ff7d2c3d4e3b27cce79241446 Mon Sep 17 00:00:00 2001 From: dzogin Date: Tue, 25 Aug 2009 00:20:06 +0000 Subject: [PATCH] Branch HEAD b=19566 i=oleg.drokin i=nathan.rutman Modified Files: lustre/ChangeLog lustre/obdclass/obd_mount.c Bugzilla : 19566 Description: Prevent inconsistences between linux and lustre mount structures. Details : Wait indefinitely in server_wait_finished() until mnt_count drops. Make the sleep interruptible. --- lustre/ChangeLog | 6 +++++ lustre/obdclass/obd_mount.c | 54 ++++++++++++++++++++++++++++----------------- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index fd386f3..6c1c647 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -13,6 +13,12 @@ tbd Sun Microsystems, Inc. removed cwd "./" (refer to Bugzilla 14399). * File join has been disabled in this release, refer to Bugzilla 16929. +Severity : normal +Bugzilla : 19566 +Description: Prevent inconsistences between linux and lustre mount structures. +Details : Wait indefinitely in server_wait_finished() until mnt_count drops. + Make the sleep interruptible. + Severity : enhancement Bugzilla : 19955 Description: provide server to client comms path diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 72b5128..b6e68f7 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -1373,28 +1373,42 @@ out_free: RETURN(ERR_PTR(rc)); } +/* Wait here forever until the mount refcount is 0 before completing umount, + * else we risk dereferencing a null pointer. + * LNET may take e.g. 165s before killing zombies. + */ static void server_wait_finished(struct vfsmount *mnt) { - wait_queue_head_t waitq; - struct l_wait_info lwi; - int retries = 330; - - init_waitqueue_head(&waitq); - - while ((atomic_read(&mnt->mnt_count) > 1) && (retries > 0)) { - LCONSOLE_WARN("%s: Mount still busy with %d refs, waiting for " - "%d secs...\n", mnt->mnt_devname, - atomic_read(&mnt->mnt_count), retries); - - /* Wait for a bit */ - retries -= 5; - lwi = LWI_TIMEOUT(5 * HZ, NULL, NULL); - l_wait_event(waitq, 0, &lwi); - } - if (atomic_read(&mnt->mnt_count) > 1) { - CERROR("%s: Mount still busy (%d refs), giving up.\n", - mnt->mnt_devname, atomic_read(&mnt->mnt_count)); - } + cfs_waitq_t waitq; + int rc, waited = 0; + cfs_sigset_t blocked; + + cfs_waitq_init(&waitq); + + while (cfs_atomic_read(&mnt->mnt_count) > 1) { + if (waited && (waited % 30 == 0)) + LCONSOLE_WARN("Mount still busy with %d refs after " + "%d secs.\n", + atomic_read(&mnt->mnt_count), + waited); + /* Cannot use l_event_wait() for an interruptible sleep. */ + waited += 3; + blocked = l_w_e_set_sigs(sigmask(SIGKILL)); + cfs_waitq_wait_event_interruptible_timeout( + waitq, + (cfs_atomic_read(&mnt->mnt_count) == 1), + cfs_time_seconds(3), + rc); + cfs_block_sigs(blocked); + if (rc < 0) { + LCONSOLE_EMERG("Danger: interrupted umount %s with " + "%d refs!\n", + mnt->mnt_devname, + atomic_read(&mnt->mnt_count)); + break; + } + + } } static void server_put_super(struct super_block *sb) -- 1.8.3.1