During network outages on very large machines, it is possible to use
up all of GART space with connections that are in purgatory waiting
to be freed when we finally make a new connection.
This mod adds a timeout parameter so that when we fail registering
memory for fma blocks for a period of time, we can bring the node down
so it is not stuck in a state of being up but unusable.
This can only happen on service nodes as there can potentially be 10s
of thousands of connections.
A recommended setting for reg_fail_timeout would be 60 - 300 seconds.
The default setting for reg_fail_timeout is -1 (disabled).
Set fail_loc 0xf002 which fails memory registrations and see that we
BUG after the required timeout.
Test that transient registration failures within the timeout period
do not cause BUG.
Signed-off-by: Chris Horn <hornc@cray.com>
Signed-off-by: Chuck Fossen <chuckf@cray.com>
Change-Id: I214b5e5a297c547f3c4675fcc263e5dd8aaed24f
Reviewed-on: http://review.whamcloud.com/17664
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Tested-by: James Simmons <uja.ornl@yahoo.com>
Tested-by: Jenkins
Reviewed-by: Dmitry Eremin <dmitry.eremin@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
#define GNILND_LASTRX(conn) (time_after(conn->gnc_last_rx, conn->gnc_last_rx_cq) \
? conn->gnc_last_rx : conn->gnc_last_rx_cq)
#define GNILND_LASTRX(conn) (time_after(conn->gnc_last_rx, conn->gnc_last_rx_cq) \
? conn->gnc_last_rx : conn->gnc_last_rx_cq)
+/* fmablk registration failures timeout before failing node */
+#define GNILND_REGFAILTO_DISABLE -1
+
/************************************************************************
* Enum, flag and tag data
*/
/************************************************************************
* Enum, flag and tag data
*/
int *kgn_fast_reconn; /* fast reconnection on conn timeout */
int *kgn_efault_lbug; /* LBUG on receiving an EFAULT */
int *kgn_max_purgatory; /* # conns/peer to keep in purgatory */
int *kgn_fast_reconn; /* fast reconnection on conn timeout */
int *kgn_efault_lbug; /* LBUG on receiving an EFAULT */
int *kgn_max_purgatory; /* # conns/peer to keep in purgatory */
+ int *kgn_reg_fail_timeout; /* registration failure timeout */
int *kgn_thread_affinity; /* bind scheduler threads to cpus */
int *kgn_thread_safe; /* use thread safe kgni API */
#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
int *kgn_thread_affinity; /* bind scheduler threads to cpus */
int *kgn_thread_safe; /* use thread safe kgni API */
#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
{
gni_return_t rrc;
__u32 flags = GNI_MEM_READWRITE;
{
gni_return_t rrc;
__u32 flags = GNI_MEM_READWRITE;
+ static unsigned long reg_to;
+ int rfto = *kgnilnd_tunables.kgn_reg_fail_timeout;
if (fma_blk->gnm_state == GNILND_FMABLK_PHYS) {
flags |= GNI_MEM_PHYS_CONT;
if (fma_blk->gnm_state == GNILND_FMABLK_PHYS) {
flags |= GNI_MEM_PHYS_CONT;
fma_blk->gnm_blk_size, device->gnd_rcv_fma_cqh,
flags, &fma_blk->gnm_hndl);
if (rrc != GNI_RC_SUCCESS) {
fma_blk->gnm_blk_size, device->gnd_rcv_fma_cqh,
flags, &fma_blk->gnm_hndl);
if (rrc != GNI_RC_SUCCESS) {
- /* XXX Nic: need a way to silence this for runtime stuff that is ok to fail
- * -- like when under MDD or GART pressure on big systems
- */
+ if (rfto != GNILND_REGFAILTO_DISABLE) {
+ if (reg_to == 0) {
+ reg_to = jiffies + cfs_time_seconds(rfto);
+ } else if (time_after(jiffies, reg_to)) {
+ CERROR("FATAL:fmablk registration has failed "
+ "for %ld seconds.\n",
+ cfs_duration_sec(jiffies - reg_to) +
+ rfto);
+ LBUG();
+ }
+ }
+
CNETERR("register fmablk failed 0x%p mbox_size %d flags %u\n",
fma_blk, fma_blk->gnm_mbox_size, flags);
RETURN(-ENOMEM);
}
CNETERR("register fmablk failed 0x%p mbox_size %d flags %u\n",
fma_blk, fma_blk->gnm_mbox_size, flags);
RETURN(-ENOMEM);
}
/* PHYS_CONT memory isn't really mapped, at least not in GART -
* but all mappings chew up a MDD
*/
/* PHYS_CONT memory isn't really mapped, at least not in GART -
* but all mappings chew up a MDD
*/
CFS_MODULE_PARM(thread_safe, "i", int, 0444,
"Use kgni thread safe API if available");
CFS_MODULE_PARM(thread_safe, "i", int, 0444,
"Use kgni thread safe API if available");
+static int reg_fail_timeout = GNILND_REGFAILTO_DISABLE;
+CFS_MODULE_PARM(reg_fail_timeout, "i", int, 0644,
+ "fmablk registration timeout LBUG");
+
kgn_tunables_t kgnilnd_tunables = {
.kgn_min_reconnect_interval = &min_reconnect_interval,
.kgn_max_reconnect_interval = &max_reconnect_interval,
kgn_tunables_t kgnilnd_tunables = {
.kgn_min_reconnect_interval = &min_reconnect_interval,
.kgn_max_reconnect_interval = &max_reconnect_interval,
.kgn_efault_lbug = &efault_lbug,
.kgn_thread_affinity = &thread_affinity,
.kgn_thread_safe = &thread_safe,
.kgn_efault_lbug = &efault_lbug,
.kgn_thread_affinity = &thread_affinity,
.kgn_thread_safe = &thread_safe,
+ .kgn_reg_fail_timeout = ®_fail_timeout,
.kgn_max_purgatory = &max_conn_purg
};
.kgn_max_purgatory = &max_conn_purg
};
+ .procname = "reg_fail_timeout"
+ .data = ®_fail_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ INIT_CTL_NAME
.procname = "max_conn_purg"
.data = &max_conn_purg,
.maxlen = sizeof(int),
.procname = "max_conn_purg"
.data = &max_conn_purg,
.maxlen = sizeof(int),