From d9e57a765e73e1bc3046124433eb6e2186f7e07c Mon Sep 17 00:00:00 2001 From: Bruno Faccini Date: Tue, 19 Sep 2017 00:55:01 +0200 Subject: [PATCH] LU-9372 ptlrpc: allow to limit number of service's rqbds This patch provides a way to limit the number of rqbds per service. This should help to avoid OOM during heavy clients requests load, like during target failover/recovery for thousands of Clients. This change has been required, even after first patch for LU-9372 (ptlrpc: drain "ptlrpc_request_buffer_desc" objects) which already allowed to drain unused rqbds previously allocated during heavy load, but was not efficient during too long period of load. Signed-off-by: Bruno Faccini Change-Id: Ib43f3e07741b9fcecdfae24a3753128a939d2196 Reviewed-on: https://review.whamcloud.com/29064 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Dmitry Eremin Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- lustre/include/lustre_net.h | 2 ++ lustre/ptlrpc/lproc_ptlrpc.c | 49 +++++++++++++++++++++++++++++++++++++++++--- lustre/ptlrpc/service.c | 14 ++++++++++--- 3 files changed, 59 insertions(+), 6 deletions(-) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 6a77af7..3a33211 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -1710,6 +1710,8 @@ struct ptlrpc_service { /** under unregister_service */ unsigned srv_is_stopping:1; + /** max # request buffers */ + int srv_nrqbds_max; /** max # request buffers in history per partition */ int srv_hist_nrqbds_cpt_max; /** number of CPTs this service bound on */ diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index cd86d8c..0221c5e 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -318,10 +318,14 @@ ptlrpc_lprocfs_req_history_max_seq_write(struct file *file, /* This sanity check is more of an insanity check; we can still * hose a kernel by allowing the request history to grow too - * far. */ - bufpages = (svc->srv_buf_size + PAGE_SIZE - 1) >> + * far. The roundup to the next power of two is an empirical way + * to take care that request buffer is allocated in Slab and thus + * will be upgraded */ + bufpages = (roundup_pow_of_two(svc->srv_buf_size) + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (val > totalram_pages/(2 * bufpages)) + /* do not allow history to consume more than half max number of rqbds */ + if ((svc->srv_nrqbds_max == 0 && val > totalram_pages/(2 * bufpages)) || + val > svc->srv_nrqbds_max/2) return -ERANGE; spin_lock(&svc->srv_lock); @@ -338,6 +342,45 @@ ptlrpc_lprocfs_req_history_max_seq_write(struct file *file, } LPROC_SEQ_FOPS(ptlrpc_lprocfs_req_history_max); +static int +ptlrpc_lprocfs_req_buffers_max_seq_show(struct seq_file *m, void *n) +{ + struct ptlrpc_service *svc = m->private; + + seq_printf(m, "%d\n", svc->srv_nrqbds_max); + return 0; +} + +static ssize_t +ptlrpc_lprocfs_req_buffers_max_seq_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct ptlrpc_service *svc = m->private; + __s64 val; + int rc; + + rc = lprocfs_str_to_s64(buffer, count, &val); + if (rc < 0) + return rc; + + if (val < 0 || val > INT_MAX) + return -ERANGE; + + if (val < svc->srv_nbuf_per_group) + return -ERANGE; + + spin_lock(&svc->srv_lock); + + svc->srv_nrqbds_max = (uint)val; + + spin_unlock(&svc->srv_lock); + + return count; +} +LPROC_SEQ_FOPS(ptlrpc_lprocfs_req_buffers_max); + static ssize_t threads_min_show(struct kobject *kobj, struct attribute *attr, char *buf) { diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 5c94c4f..0bb0175 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -140,7 +140,10 @@ ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post) for (i = 0; i < svc->srv_nbuf_per_group; i++) { /* NB: another thread might have recycled enough rqbds, we * need to make sure it wouldn't over-allocate, see LU-1212. */ - if (svcpt->scp_nrqbds_posted >= svc->srv_nbuf_per_group) + if (test_req_buffer_pressure || + svcpt->scp_nrqbds_posted >= svc->srv_nbuf_per_group || + (svc->srv_nrqbds_max != 0 && + svcpt->scp_nrqbds_total > svc->srv_nrqbds_max)) break; rqbd = ptlrpc_alloc_rqbd(svcpt); @@ -760,6 +763,9 @@ ptlrpc_register_service(struct ptlrpc_service_conf *conf, /* buffer configuration */ service->srv_nbuf_per_group = test_req_buffer_pressure ? 1 : conf->psc_buf.bc_nbufs; + /* do not limit max number of rqbds by default */ + service->srv_nrqbds_max = 0; + service->srv_max_req_size = conf->psc_buf.bc_req_max_size + SPTLRPC_MAX_PAYLOAD; service->srv_buf_size = conf->psc_buf.bc_buf_size; @@ -947,8 +953,10 @@ void ptlrpc_server_drop_request(struct ptlrpc_request *req) */ LASSERT(atomic_read(&rqbd->rqbd_req.rq_refcount) == 0); if (svcpt->scp_nrqbds_posted >= - svc->srv_nbuf_per_group && - !test_req_buffer_pressure) { + svc->srv_nbuf_per_group || + (svc->srv_nrqbds_max != 0 && + svcpt->scp_nrqbds_total > svc->srv_nrqbds_max) || + test_req_buffer_pressure) { /* like in ptlrpc_free_rqbd() */ svcpt->scp_nrqbds_total--; OBD_FREE_LARGE(rqbd->rqbd_buffer, -- 1.8.3.1