more information, please refer to bugzilla 17630.
Severity : normal
+Bugzilla : 19566
+Description: Prevent inconsistences between linux and lustre mount structures.
+Details : Wait indefinitely in server_wait_finished() until mnt_count drops.
+ Make the sleep interruptible.
+
+Severity : normal
Bugzilla : 20146
Description: Increase of the size of the LDLM resource hash.
Details : Bump up RES_HASH_BITS=12.
RETURN(ERR_PTR(rc));
}
+/* Wait here forever until the mount refcount is 0 before completing umount,
+ * else we risk dereferencing a null pointer.
+ * LNET may take e.g. 165s before killing zombies.
+*/
static void server_wait_finished(struct vfsmount *mnt)
{
- wait_queue_head_t waitq;
- struct l_wait_info lwi;
- int retries = 330;
-
- init_waitqueue_head(&waitq);
-
- while ((atomic_read(&mnt->mnt_count) > 1) && (retries > 0)) {
- LCONSOLE_WARN("Mount still busy with %d refs, waiting for "
- "%d secs...\n",
- atomic_read(&mnt->mnt_count), retries);
- /* Wait for a bit */
- retries -= 5;
- lwi = LWI_TIMEOUT(cfs_time_seconds(5), NULL, NULL);
- l_wait_event(waitq, 0, &lwi);
- }
- if (atomic_read(&mnt->mnt_count) > 1) {
- CERROR("Mount %p is still busy (%d refs), giving up.\n",
- mnt, atomic_read(&mnt->mnt_count));
+ cfs_waitq_t waitq;
+ int rc, waited = 0;
+ cfs_sigset_t blocked;
+
+ cfs_waitq_init(&waitq);
+
+ while (cfs_atomic_read(&mnt->mnt_count) > 1) {
+ if (waited && (waited % 30 == 0))
+ LCONSOLE_WARN("Mount still busy with %d refs after "
+ "%d secs.\n",
+ atomic_read(&mnt->mnt_count),
+ waited);
+ /* Cannot use l_event_wait() for an interruptible sleep. */
+ waited += 3;
+ blocked = l_w_e_set_sigs(sigmask(SIGKILL));
+ rc = cfs_waitq_wait_event_interruptible_timeout(
+ waitq,
+ (cfs_atomic_read(&mnt->mnt_count) == 1),
+ cfs_time_seconds(3));
+ cfs_block_sigs(blocked);
+ if (rc < 0) {
+ LCONSOLE_EMERG("Danger: interrupted umount %p with "
+ "%d refs!\n",
+ mnt, atomic_read(&mnt->mnt_count));
+ break;
+ }
}
}