From e4d63c854d774792f8a77b8d1e575ccc2d8c3c8b Mon Sep 17 00:00:00 2001
From: Wang Shilong <wshilong@ddn.com>
Date: Tue, 25 Feb 2020 18:51:43 +0800
Subject: [PATCH] LU-13096 llite: serialize max_cached_mb write operation

It is possible that two threads try to change max_cached_mb concurrently,
if they both try to reclaim LRU pages, it will be possible
that one thread finish, but another thread loop forever to reclaim
enough LRU pages.

Morever, this operations try to change two value, they should be
protected together to guarantee atomic, the whole operations could
be slow, this patch introduce another mutex lock to serialize operation.

Patch also fix to avoid reclaim too many LRU pages with large memory.

Change-Id: I1e88e37607f9e8db2ba67f1f7e5670f25c136465
Signed-off-by: Wang Shilong <wshilong@ddn.com>
Reviewed-on: https://review.whamcloud.com/37710
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Emoly Liu <emoly@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
---
 lustre/include/cl_object.h |  4 ++++
 lustre/llite/lproc_llite.c | 37 ++++++++++++++++++++++---------------
 lustre/obdclass/cl_page.c  |  1 +
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h
index 61f214a..8edb653 100644
--- a/lustre/include/cl_object.h
+++ b/lustre/include/cl_object.h
@@ -2288,6 +2288,10 @@ struct cl_client_cache {
 	 * Used at umounting time and signaled on BRW commit
 	 */
 	wait_queue_head_t	ccc_unstable_waitq;
+	/**
+	 * Serialize max_cache_mb write operation
+	 */
+	struct mutex		ccc_max_cache_mb_lock;
 };
 /**
  * cl_cache functions
diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c
index e015ba6..f1b0884 100644
--- a/lustre/llite/lproc_llite.c
+++ b/lustre/llite/lproc_llite.c
@@ -467,8 +467,10 @@ static int ll_max_cached_mb_seq_show(struct seq_file *m, void *v)
 	long max_cached_mb;
 	long unused_mb;
 
+	mutex_lock(&cache->ccc_max_cache_mb_lock);
 	max_cached_mb = PAGES_TO_MiB(cache->ccc_lru_max);
 	unused_mb = PAGES_TO_MiB(atomic_long_read(&cache->ccc_lru_left));
+	mutex_unlock(&cache->ccc_max_cache_mb_lock);
 	seq_printf(m, "users: %d\n"
 		      "max_cached_mb: %ld\n"
 		      "used_mb: %ld\n"
@@ -522,9 +524,8 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 	/* Allow enough cache so clients can make well-formed RPCs */
 	pages_number = max_t(long, pages_number, PTLRPC_MAX_BRW_PAGES);
 
-	spin_lock(&sbi->ll_lock);
+	mutex_lock(&cache->ccc_max_cache_mb_lock);
 	diff = pages_number - cache->ccc_lru_max;
-	spin_unlock(&sbi->ll_lock);
 
 	/* easy - add more LRU slots. */
 	if (diff >= 0) {
@@ -534,7 +535,7 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 
 	env = cl_env_get(&refcheck);
 	if (IS_ERR(env))
-		RETURN(PTR_ERR(env));
+		GOTO(out_unlock, rc = PTR_ERR(env));
 
 	diff = -diff;
 	while (diff > 0) {
@@ -542,18 +543,21 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 
 		/* reduce LRU budget from free slots. */
 		do {
-			long ov, nv, retv;
+			long lru_left_old, lru_left_new, lru_left_ret;
 
-			ov = atomic_long_read(&cache->ccc_lru_left);
-			if (ov == 0)
+			lru_left_old = atomic_long_read(&cache->ccc_lru_left);
+			if (lru_left_old == 0)
 				break;
 
-			nv = ov > diff ? ov - diff : 0;
-			retv = atomic_long_cmpxchg(&cache->ccc_lru_left,
-						   ov, nv);
-			if (likely(ov == retv)) {
-				diff -= ov - nv;
-				nrpages += ov - nv;
+			lru_left_new = lru_left_old > diff ?
+					lru_left_old - diff : 0;
+			lru_left_ret =
+				atomic_long_cmpxchg(&cache->ccc_lru_left,
+						    lru_left_old,
+						    lru_left_new);
+			if (likely(lru_left_old == lru_left_ret)) {
+				diff -= lru_left_old - lru_left_new;
+				nrpages += lru_left_old - lru_left_new;
 				break;
 			}
 		} while (1);
@@ -566,8 +570,11 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 			break;
 		}
 
+		/* Request extra free slots to avoid them all being used
+		 * by other processes before this can continue shrinking.
+		 */
+		tmp = diff + min_t(long, diff, MiB_TO_PAGES(1024));
 		/* difficult - have to ask OSCs to drop LRU slots. */
-		tmp = diff << 1;
 		rc = obd_set_info_async(env, sbi->ll_dt_exp,
 				sizeof(KEY_CACHE_LRU_SHRINK),
 				KEY_CACHE_LRU_SHRINK,
@@ -579,13 +586,13 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 
 out:
 	if (rc >= 0) {
-		spin_lock(&sbi->ll_lock);
 		cache->ccc_lru_max = pages_number;
-		spin_unlock(&sbi->ll_lock);
 		rc = count;
 	} else {
 		atomic_long_add(nrpages, &cache->ccc_lru_left);
 	}
+out_unlock:
+	mutex_unlock(&cache->ccc_max_cache_mb_lock);
 	return rc;
 }
 LDEBUGFS_SEQ_FOPS(ll_max_cached_mb);
diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c
index 2508c0c..de21f79 100644
--- a/lustre/obdclass/cl_page.c
+++ b/lustre/obdclass/cl_page.c
@@ -1140,6 +1140,7 @@ struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
 	cache->ccc_unstable_check = 0;
 	atomic_long_set(&cache->ccc_unstable_nr, 0);
 	init_waitqueue_head(&cache->ccc_unstable_waitq);
+	mutex_init(&cache->ccc_max_cache_mb_lock);
 
 	RETURN(cache);
 }
-- 
1.8.3.1