From 205a57df278733d1c733defe5c80de0d745e1bd5 Mon Sep 17 00:00:00 2001
From: Rahul Deshmukh <rahul.deshmukh@seagate.com>
Date: Fri, 9 Sep 2016 00:59:57 +0530
Subject: [PATCH] LU-1882 llite: Adding timed wait in ll_umount_begin

There exists timing race between umount and other
thread which will increment the reference count on
mnt e.g. getattr. If umount thread lose the race
then umount fails with EBUSY error. To avoid this
timed wait is added so that umount thread will wait
for user to decrement the mnt reference count.

This patch also fixes below two problems
1.the fail_loc code in conf-sanity test 45 to
OR with the OBD_FAIL_ONCE (0x80000000) to prevent
the fail_loc from looping forever.

2.fixes the fail_loc code name from
OBD_FAIL_PTLRPC_LONG_UNLINK to
OBD_FAIL_PTLRPC_LONG_REPL_UNLINK.

Signed-off-by: Rahul Deshmukh <rahul.deshmukh@seagate.com>
Signed-off-by: Lokesh Nagappa Jaliminche <lokesh.jaliminche@seagate.com>
Signed-off-by: Jian Yu <jian.yu@intel.com>
Change-Id: Icab9b560cadcb8623c8592bfc4c5a842277ad266
Seagate-bug-id: MRP-1192
Reviewed-on: http://review.whamcloud.com/20061
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---
 lustre/llite/llite_internal.h |  1 +
 lustre/llite/llite_lib.c      | 23 +++++++++++++++--------
 lustre/tests/conf-sanity.sh   |  4 ++--
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h
index 96770ad..e1d5670 100644
--- a/lustre/llite/llite_internal.h
+++ b/lustre/llite/llite_internal.h
@@ -530,6 +530,7 @@ struct ll_sb_info {
 						 * clustred nfs */
 	/* root squash */
 	struct root_squash_info	  ll_squash;
+	struct path		  ll_mnt;
 };
 
 /*
diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c
index f395747..0aec9fd 100644
--- a/lustre/llite/llite_lib.c
+++ b/lustre/llite/llite_lib.c
@@ -326,11 +326,12 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	}
 
 	LASSERT(osfs->os_bsize);
-        sb->s_blocksize = osfs->os_bsize;
-        sb->s_blocksize_bits = log2(osfs->os_bsize);
-        sb->s_magic = LL_SUPER_MAGIC;
-        sb->s_maxbytes = MAX_LFS_FILESIZE;
-        sbi->ll_namelen = osfs->os_namelen;
+	sb->s_blocksize = osfs->os_bsize;
+	sb->s_blocksize_bits = log2(osfs->os_bsize);
+	sb->s_magic = LL_SUPER_MAGIC;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sbi->ll_namelen = osfs->os_namelen;
+	sbi->ll_mnt.mnt = current->fs->root.mnt;
 
         if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
             !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
@@ -2132,6 +2133,8 @@ void ll_umount_begin(struct super_block *sb)
 	struct ll_sb_info *sbi = ll_s2sbi(sb);
 	struct obd_device *obd;
 	struct obd_ioctl_data *ioc_data;
+	struct l_wait_info lwi;
+	wait_queue_head_t waitq;
 	ENTRY;
 
 	CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
@@ -2167,10 +2170,14 @@ void ll_umount_begin(struct super_block *sb)
 	}
 
 	/* Really, we'd like to wait until there are no requests outstanding,
-	 * and then continue.  For now, we just invalidate the requests,
-	 * schedule() and sleep one second if needed, and hope.
+	 * and then continue.  For now, we just periodically checking for vfs
+	 * to decrement mnt_cnt and hope to finish it within 10sec.
 	 */
-	schedule();
+	init_waitqueue_head(&waitq);
+	lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(10),
+				   cfs_time_seconds(1), NULL, NULL);
+	l_wait_event(waitq, may_umount(sbi->ll_mnt.mnt), &lwi);
+
 	EXIT;
 }
 
diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh
index 24ab71e..1c038c8 100755
--- a/lustre/tests/conf-sanity.sh
+++ b/lustre/tests/conf-sanity.sh
@@ -3209,8 +3209,8 @@ test_45() { #17310
 	df -h $MOUNT &
 	log "sleep 60 sec"
 	sleep 60
-#define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK   0x50f
-	do_facet client "$LCTL set_param fail_loc=0x50f fail_val=0"
+	#define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK	0x50f
+	do_facet client "$LCTL set_param fail_loc=0x8000050f"
 	log "sleep 10 sec"
 	sleep 10
 	manual_umount_client --force || error "manual_umount_client failed"
-- 
1.8.3.1