From 64441fe62f7819a4437da90205aaf3227f83e35a Mon Sep 17 00:00:00 2001 From: dzogin Date: Sat, 22 Aug 2009 16:07:33 +0000 Subject: [PATCH] Branch b1_8 b=19566 i=oleg.drokin i=nathan.rutman Modified Files: Tag: b1_8 lustre/ChangeLog lustre/obdclass/obd_mount.c Description: Prevent inconsistences between linux and lustre mount structures. Details : Wait indefinitely in server_wait_finished() until mnt_count drops. Make the sleep interruptible. --- lustre/ChangeLog | 6 ++++++ lustre/obdclass/obd_mount.c | 48 ++++++++++++++++++++++++++++----------------- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 3d46722..2fc3569 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -15,6 +15,12 @@ tbd Sun Microsystems, Inc. more information, please refer to bugzilla 17630. Severity : normal +Bugzilla : 19566 +Description: Prevent inconsistences between linux and lustre mount structures. +Details : Wait indefinitely in server_wait_finished() until mnt_count drops. + Make the sleep interruptible. + +Severity : normal Bugzilla : 20146 Description: Increase of the size of the LDLM resource hash. Details : Bump up RES_HASH_BITS=12. diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 0bc3729..b573f13 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -1350,26 +1350,38 @@ out_free: RETURN(ERR_PTR(rc)); } +/* Wait here forever until the mount refcount is 0 before completing umount, + * else we risk dereferencing a null pointer. + * LNET may take e.g. 165s before killing zombies. +*/ static void server_wait_finished(struct vfsmount *mnt) { - wait_queue_head_t waitq; - struct l_wait_info lwi; - int retries = 330; - - init_waitqueue_head(&waitq); - - while ((atomic_read(&mnt->mnt_count) > 1) && (retries > 0)) { - LCONSOLE_WARN("Mount still busy with %d refs, waiting for " - "%d secs...\n", - atomic_read(&mnt->mnt_count), retries); - /* Wait for a bit */ - retries -= 5; - lwi = LWI_TIMEOUT(cfs_time_seconds(5), NULL, NULL); - l_wait_event(waitq, 0, &lwi); - } - if (atomic_read(&mnt->mnt_count) > 1) { - CERROR("Mount %p is still busy (%d refs), giving up.\n", - mnt, atomic_read(&mnt->mnt_count)); + cfs_waitq_t waitq; + int rc, waited = 0; + cfs_sigset_t blocked; + + cfs_waitq_init(&waitq); + + while (cfs_atomic_read(&mnt->mnt_count) > 1) { + if (waited && (waited % 30 == 0)) + LCONSOLE_WARN("Mount still busy with %d refs after " + "%d secs.\n", + atomic_read(&mnt->mnt_count), + waited); + /* Cannot use l_event_wait() for an interruptible sleep. */ + waited += 3; + blocked = l_w_e_set_sigs(sigmask(SIGKILL)); + rc = cfs_waitq_wait_event_interruptible_timeout( + waitq, + (cfs_atomic_read(&mnt->mnt_count) == 1), + cfs_time_seconds(3)); + cfs_block_sigs(blocked); + if (rc < 0) { + LCONSOLE_EMERG("Danger: interrupted umount %p with " + "%d refs!\n", + mnt, atomic_read(&mnt->mnt_count)); + break; + } } } -- 1.8.3.1