From 5b787cb7a375372c7a4f3c405d38137a7a867677 Mon Sep 17 00:00:00 2001 From: Chuck Fossen Date: Mon, 1 Feb 2016 18:46:00 -0500 Subject: [PATCH] LU-7578 gnilnd: Add module parameter reg_fail_timeout During network outages on very large machines, it is possible to use up all of GART space with connections that are in purgatory waiting to be freed when we finally make a new connection. This mod adds a timeout parameter so that when we fail registering memory for fma blocks for a period of time, we can bring the node down so it is not stuck in a state of being up but unusable. This can only happen on service nodes as there can potentially be 10s of thousands of connections. A recommended setting for reg_fail_timeout would be 60 - 300 seconds. The default setting for reg_fail_timeout is -1 (disabled). Set fail_loc 0xf002 which fails memory registrations and see that we BUG after the required timeout. Test that transient registration failures within the timeout period do not cause BUG. Signed-off-by: Chris Horn Signed-off-by: Chuck Fossen Change-Id: I214b5e5a297c547f3c4675fcc263e5dd8aaed24f Reviewed-on: http://review.whamcloud.com/17664 Reviewed-by: James Simmons Tested-by: James Simmons Tested-by: Jenkins Reviewed-by: Dmitry Eremin Tested-by: Maloo Reviewed-by: Oleg Drokin --- lnet/klnds/gnilnd/gnilnd.h | 4 ++++ lnet/klnds/gnilnd/gnilnd_conn.c | 19 ++++++++++++++++--- lnet/klnds/gnilnd/gnilnd_modparams.c | 13 +++++++++++++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/lnet/klnds/gnilnd/gnilnd.h b/lnet/klnds/gnilnd/gnilnd.h index 2dfc91d..c27c48f 100644 --- a/lnet/klnds/gnilnd/gnilnd.h +++ b/lnet/klnds/gnilnd/gnilnd.h @@ -165,6 +165,9 @@ #define GNILND_LASTRX(conn) (time_after(conn->gnc_last_rx, conn->gnc_last_rx_cq) \ ? conn->gnc_last_rx : conn->gnc_last_rx_cq) +/* fmablk registration failures timeout before failing node */ +#define GNILND_REGFAILTO_DISABLE -1 + /************************************************************************ * Enum, flag and tag data */ @@ -485,6 +488,7 @@ typedef struct kgn_tunables { int *kgn_fast_reconn; /* fast reconnection on conn timeout */ int *kgn_efault_lbug; /* LBUG on receiving an EFAULT */ int *kgn_max_purgatory; /* # conns/peer to keep in purgatory */ + int *kgn_reg_fail_timeout; /* registration failure timeout */ int *kgn_thread_affinity; /* bind scheduler threads to cpus */ int *kgn_thread_safe; /* use thread safe kgni API */ #if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM diff --git a/lnet/klnds/gnilnd/gnilnd_conn.c b/lnet/klnds/gnilnd/gnilnd_conn.c index 7ec96a2..e00a8f9 100644 --- a/lnet/klnds/gnilnd/gnilnd_conn.c +++ b/lnet/klnds/gnilnd/gnilnd_conn.c @@ -38,6 +38,8 @@ kgnilnd_map_fmablk(kgn_device_t *device, kgn_fma_memblock_t *fma_blk) { gni_return_t rrc; __u32 flags = GNI_MEM_READWRITE; + static unsigned long reg_to; + int rfto = *kgnilnd_tunables.kgn_reg_fail_timeout; if (fma_blk->gnm_state == GNILND_FMABLK_PHYS) { flags |= GNI_MEM_PHYS_CONT; @@ -52,14 +54,25 @@ kgnilnd_map_fmablk(kgn_device_t *device, kgn_fma_memblock_t *fma_blk) fma_blk->gnm_blk_size, device->gnd_rcv_fma_cqh, flags, &fma_blk->gnm_hndl); if (rrc != GNI_RC_SUCCESS) { - /* XXX Nic: need a way to silence this for runtime stuff that is ok to fail - * -- like when under MDD or GART pressure on big systems - */ + if (rfto != GNILND_REGFAILTO_DISABLE) { + if (reg_to == 0) { + reg_to = jiffies + cfs_time_seconds(rfto); + } else if (time_after(jiffies, reg_to)) { + CERROR("FATAL:fmablk registration has failed " + "for %ld seconds.\n", + cfs_duration_sec(jiffies - reg_to) + + rfto); + LBUG(); + } + } + CNETERR("register fmablk failed 0x%p mbox_size %d flags %u\n", fma_blk, fma_blk->gnm_mbox_size, flags); RETURN(-ENOMEM); } + reg_to = 0; + /* PHYS_CONT memory isn't really mapped, at least not in GART - * but all mappings chew up a MDD */ diff --git a/lnet/klnds/gnilnd/gnilnd_modparams.c b/lnet/klnds/gnilnd/gnilnd_modparams.c index 8747848..7026b00 100644 --- a/lnet/klnds/gnilnd/gnilnd_modparams.c +++ b/lnet/klnds/gnilnd/gnilnd_modparams.c @@ -198,6 +198,10 @@ static int thread_safe = GNILND_TS_ENABLE; CFS_MODULE_PARM(thread_safe, "i", int, 0444, "Use kgni thread safe API if available"); +static int reg_fail_timeout = GNILND_REGFAILTO_DISABLE; +CFS_MODULE_PARM(reg_fail_timeout, "i", int, 0644, + "fmablk registration timeout LBUG"); + kgn_tunables_t kgnilnd_tunables = { .kgn_min_reconnect_interval = &min_reconnect_interval, .kgn_max_reconnect_interval = &max_reconnect_interval, @@ -238,6 +242,7 @@ kgn_tunables_t kgnilnd_tunables = { .kgn_efault_lbug = &efault_lbug, .kgn_thread_affinity = &thread_affinity, .kgn_thread_safe = &thread_safe, + .kgn_reg_fail_timeout = ®_fail_timeout, .kgn_max_purgatory = &max_conn_purg }; @@ -548,6 +553,14 @@ static struct ctl_table kgnilnd_ctl_table[] = { }, { INIT_CTL_NAME + .procname = "reg_fail_timeout" + .data = ®_fail_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + INIT_CTL_NAME .procname = "max_conn_purg" .data = &max_conn_purg, .maxlen = sizeof(int), -- 1.8.3.1