From d9e57a765e73e1bc3046124433eb6e2186f7e07c Mon Sep 17 00:00:00 2001
From: Bruno Faccini <bruno.faccini@intel.com>
Date: Tue, 19 Sep 2017 00:55:01 +0200
Subject: [PATCH] LU-9372 ptlrpc: allow to limit number of service's rqbds

This patch provides a way to limit the number of rqbds per
service.
This should help to avoid OOM during heavy clients
requests load, like during target failover/recovery for
thousands of Clients.
This change has been required, even after first patch for
LU-9372 (ptlrpc: drain "ptlrpc_request_buffer_desc" objects)
which already allowed to drain unused rqbds previously
allocated during heavy load, but was not efficient during
too long period of load.

Signed-off-by: Bruno Faccini <bruno.faccini@intel.com>
Change-Id: Ib43f3e07741b9fcecdfae24a3753128a939d2196
Reviewed-on: https://review.whamcloud.com/29064
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Dmitry Eremin <dmitry.eremin@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---
 lustre/include/lustre_net.h  |  2 ++
 lustre/ptlrpc/lproc_ptlrpc.c | 49 +++++++++++++++++++++++++++++++++++++++++---
 lustre/ptlrpc/service.c      | 14 ++++++++++---
 3 files changed, 59 insertions(+), 6 deletions(-)

diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h
index 6a77af7..3a33211 100644
--- a/lustre/include/lustre_net.h
+++ b/lustre/include/lustre_net.h
@@ -1710,6 +1710,8 @@ struct ptlrpc_service {
         /** under unregister_service */
         unsigned                        srv_is_stopping:1;
 
+	/** max # request buffers */
+	int				srv_nrqbds_max;
 	/** max # request buffers in history per partition */
 	int				srv_hist_nrqbds_cpt_max;
 	/** number of CPTs this service bound on */
diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c
index cd86d8c..0221c5e 100644
--- a/lustre/ptlrpc/lproc_ptlrpc.c
+++ b/lustre/ptlrpc/lproc_ptlrpc.c
@@ -318,10 +318,14 @@ ptlrpc_lprocfs_req_history_max_seq_write(struct file *file,
 
 	/* This sanity check is more of an insanity check; we can still
 	 * hose a kernel by allowing the request history to grow too
-	 * far. */
-	bufpages = (svc->srv_buf_size + PAGE_SIZE - 1) >>
+	 * far. The roundup to the next power of two is an empirical way
+	 * to take care that request buffer is allocated in Slab and thus
+	 * will be upgraded */
+	bufpages = (roundup_pow_of_two(svc->srv_buf_size) + PAGE_SIZE - 1) >>
 							PAGE_SHIFT;
-	if (val > totalram_pages/(2 * bufpages))
+	/* do not allow history to consume more than half max number of rqbds */
+	if ((svc->srv_nrqbds_max == 0 && val > totalram_pages/(2 * bufpages)) ||
+	    val > svc->srv_nrqbds_max/2)
 		return -ERANGE;
 
 	spin_lock(&svc->srv_lock);
@@ -338,6 +342,45 @@ ptlrpc_lprocfs_req_history_max_seq_write(struct file *file,
 }
 LPROC_SEQ_FOPS(ptlrpc_lprocfs_req_history_max);
 
+static int
+ptlrpc_lprocfs_req_buffers_max_seq_show(struct seq_file *m, void *n)
+{
+	struct ptlrpc_service *svc = m->private;
+
+	seq_printf(m, "%d\n", svc->srv_nrqbds_max);
+	return 0;
+}
+
+static ssize_t
+ptlrpc_lprocfs_req_buffers_max_seq_write(struct file *file,
+					 const char __user *buffer,
+					 size_t count, loff_t *off)
+{
+	struct seq_file *m = file->private_data;
+	struct ptlrpc_service *svc = m->private;
+	__s64 val;
+	int rc;
+
+	rc = lprocfs_str_to_s64(buffer, count, &val);
+	if (rc < 0)
+		return rc;
+
+	if (val < 0 || val > INT_MAX)
+		return -ERANGE;
+
+	if (val < svc->srv_nbuf_per_group)
+		return -ERANGE;
+
+	spin_lock(&svc->srv_lock);
+
+	svc->srv_nrqbds_max = (uint)val;
+
+	spin_unlock(&svc->srv_lock);
+
+	return count;
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_req_buffers_max);
+
 static ssize_t threads_min_show(struct kobject *kobj, struct attribute *attr,
 				char *buf)
 {
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c
index 5c94c4f..0bb0175 100644
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -140,7 +140,10 @@ ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post)
         for (i = 0; i < svc->srv_nbuf_per_group; i++) {
                 /* NB: another thread might have recycled enough rqbds, we
 		 * need to make sure it wouldn't over-allocate, see LU-1212. */
-		if (svcpt->scp_nrqbds_posted >= svc->srv_nbuf_per_group)
+		if (test_req_buffer_pressure ||
+		    svcpt->scp_nrqbds_posted >= svc->srv_nbuf_per_group ||
+		    (svc->srv_nrqbds_max != 0 &&
+		     svcpt->scp_nrqbds_total > svc->srv_nrqbds_max))
 			break;
 
 		rqbd = ptlrpc_alloc_rqbd(svcpt);
@@ -760,6 +763,9 @@ ptlrpc_register_service(struct ptlrpc_service_conf *conf,
 	/* buffer configuration */
 	service->srv_nbuf_per_group	= test_req_buffer_pressure ?
 					  1 : conf->psc_buf.bc_nbufs;
+	/* do not limit max number of rqbds by default */
+	service->srv_nrqbds_max		= 0;
+
 	service->srv_max_req_size	= conf->psc_buf.bc_req_max_size +
 					  SPTLRPC_MAX_PAYLOAD;
 	service->srv_buf_size		= conf->psc_buf.bc_buf_size;
@@ -947,8 +953,10 @@ void ptlrpc_server_drop_request(struct ptlrpc_request *req)
 			 */
 			LASSERT(atomic_read(&rqbd->rqbd_req.rq_refcount) == 0);
 			if (svcpt->scp_nrqbds_posted >=
-			    svc->srv_nbuf_per_group &&
-			    !test_req_buffer_pressure) {
+			    svc->srv_nbuf_per_group ||
+			    (svc->srv_nrqbds_max != 0 &&
+			     svcpt->scp_nrqbds_total > svc->srv_nrqbds_max) ||
+			    test_req_buffer_pressure) {
 				/* like in ptlrpc_free_rqbd() */
 				svcpt->scp_nrqbds_total--;
 				OBD_FREE_LARGE(rqbd->rqbd_buffer,
-- 
1.8.3.1