From 94ceb39ba458564ea99a8e7194cdb48661edc673 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Mon, 29 Jun 2015 13:50:24 -0500 Subject: [PATCH] LU-6261 gnilnd: Use trylock for conn mutex. When converting to thread safe implementation, I missed the need for the conn mutex to trylock because we may be holding the kgn_peer_conn lock. Change conn mutex to use a trylock in kgnilnd_sendmsg_trylock(). Add the module parameter thread_safe. Disable thread safe on gemini. In kgnilnd_create_conn(), use NOFS flag when vmalloc the tx_ref_table to avoid possible hangs because of OOM condition. Clean up info message for down event. Signed-off-by: Chris Horn Change-Id: I1896c12b421ae35f65d1816bbe3eb5599b664498 Reviewed-on: http://review.whamcloud.com/15434 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: James Shimek Reviewed-by: Doug Oucharek Reviewed-by: Oleg Drokin --- lnet/klnds/gnilnd/gnilnd.c | 6 ++++-- lnet/klnds/gnilnd/gnilnd.h | 38 ++++++++++++++++++++++++------------ lnet/klnds/gnilnd/gnilnd_api_wrap.h | 5 +++++ lnet/klnds/gnilnd/gnilnd_aries.h | 1 + lnet/klnds/gnilnd/gnilnd_cb.c | 5 +++-- lnet/klnds/gnilnd/gnilnd_gemini.h | 1 + lnet/klnds/gnilnd/gnilnd_modparams.c | 15 +++++++++++++- 7 files changed, 54 insertions(+), 17 deletions(-) diff --git a/lnet/klnds/gnilnd/gnilnd.c b/lnet/klnds/gnilnd/gnilnd.c index 7e67339..d29749e 100644 --- a/lnet/klnds/gnilnd/gnilnd.c +++ b/lnet/klnds/gnilnd/gnilnd.c @@ -248,7 +248,8 @@ kgnilnd_create_conn(kgn_conn_t **connp, kgn_device_t *dev) return -ENOMEM; } - LIBCFS_ALLOC(conn->gnc_tx_ref_table, GNILND_MAX_MSG_ID * sizeof(void *)); + conn->gnc_tx_ref_table = + kgnilnd_vmalloc(GNILND_MAX_MSG_ID * sizeof(void *)); if (conn->gnc_tx_ref_table == NULL) { CERROR("Can't allocate conn tx_ref_table\n"); GOTO(failed, rc = -ENOMEM); @@ -1789,7 +1790,8 @@ kgnilnd_report_node_state(lnet_nid_t nid, int down) */ kgnilnd_txlist_done(&zombies, -ENETRESET); kgnilnd_peer_notify(peer, -ECONNRESET, 0); - LCONSOLE_INFO("Recieved down event for nid %lld\n", nid); + LCONSOLE_INFO("Received down event for nid %d\n", + LNET_NIDADDR(nid)); } return 0; diff --git a/lnet/klnds/gnilnd/gnilnd.h b/lnet/klnds/gnilnd/gnilnd.h index a878522..2b9c039 100644 --- a/lnet/klnds/gnilnd/gnilnd.h +++ b/lnet/klnds/gnilnd/gnilnd.h @@ -484,6 +484,7 @@ typedef struct kgn_tunables { int *kgn_efault_lbug; /* LBUG on receiving an EFAULT */ int *kgn_max_purgatory; /* # conns/peer to keep in purgatory */ int *kgn_thread_affinity; /* bind scheduler threads to cpus */ + int *kgn_thread_safe; /* use thread safe kgni API */ #if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM cfs_sysctl_table_header_t *kgn_sysctl; /* sysctl interface */ #endif @@ -893,14 +894,6 @@ kgnilnd_thread_fini(void) atomic_dec(&kgnilnd_data.kgn_nthreads); } -static inline int kgnilnd_gl_mutex_trylock(struct mutex *lock) -{ - if (kgnilnd_data.kgn_enable_gl_mutex) - return mutex_trylock(lock); - else - return 1; -} - static inline void kgnilnd_gl_mutex_lock(struct mutex *lock) { if (kgnilnd_data.kgn_enable_gl_mutex) @@ -938,14 +931,11 @@ static inline void kgnilnd_conn_mutex_unlock(struct mutex *lock) * This function must not be used in interrupt context. The * mutex must be released by the same task that acquired it. */ -static inline int kgnilnd_mutex_trylock(struct mutex *lock) +static inline int __kgnilnd_mutex_trylock(struct mutex *lock) { int ret; unsigned long timeout; - if (!kgnilnd_data.kgn_enable_gl_mutex) - return 1; - LASSERT(!in_interrupt()); for (timeout = jiffies + 1; time_before(jiffies, timeout);) { @@ -957,6 +947,30 @@ static inline int kgnilnd_mutex_trylock(struct mutex *lock) return 0; } +static inline int kgnilnd_mutex_trylock(struct mutex *lock) +{ + if (!kgnilnd_data.kgn_enable_gl_mutex) + return 1; + + return __kgnilnd_mutex_trylock(lock); +} + +static inline int kgnilnd_trylock(struct mutex *cq_lock, + struct mutex *c_lock) +{ + if (kgnilnd_data.kgn_enable_gl_mutex) + return __kgnilnd_mutex_trylock(cq_lock); + else + return __kgnilnd_mutex_trylock(c_lock); +} + +static inline void *kgnilnd_vmalloc(int size) +{ + void *ret = __vmalloc(size, __GFP_HIGHMEM | GFP_NOFS, PAGE_KERNEL); + LIBCFS_ALLOC_POST(ret, size); + return ret; +} + /* Copied from DEBUG_REQ in Lustre - the dance is needed to save stack space */ extern void diff --git a/lnet/klnds/gnilnd/gnilnd_api_wrap.h b/lnet/klnds/gnilnd/gnilnd_api_wrap.h index 7f2a5e3..a22c665 100644 --- a/lnet/klnds/gnilnd/gnilnd_api_wrap.h +++ b/lnet/klnds/gnilnd/gnilnd_api_wrap.h @@ -128,6 +128,11 @@ kgnilnd_check_kgni_version(void) } symbol_put(kgni_driver_version); + + if (!*kgnilnd_tunables.kgn_thread_safe) { + return; + } + /* Use thread-safe locking */ kgnilnd_data.kgn_enable_gl_mutex = 0; } diff --git a/lnet/klnds/gnilnd/gnilnd_aries.h b/lnet/klnds/gnilnd/gnilnd_aries.h index ce187ed..50ef0b8 100644 --- a/lnet/klnds/gnilnd/gnilnd_aries.h +++ b/lnet/klnds/gnilnd/gnilnd_aries.h @@ -56,6 +56,7 @@ /* Thread-safe kgni implemented in minor ver 45, code rev 0xb9 */ #define GNILND_KGNI_TS_MINOR_VER 0x45 +#define GNILND_TS_ENABLE 1 /* plug in our functions for use on the simulator */ #if !defined(GNILND_USE_RCA) diff --git a/lnet/klnds/gnilnd/gnilnd_cb.c b/lnet/klnds/gnilnd/gnilnd_cb.c index 7175f0d..de84a5b 100644 --- a/lnet/klnds/gnilnd/gnilnd_cb.c +++ b/lnet/klnds/gnilnd/gnilnd_cb.c @@ -1471,7 +1471,6 @@ kgnilnd_sendmsg_nolock(kgn_tx_t *tx, void *immediate, unsigned int immediatenob, */ msg->gnm_connstamp = conn->gnc_my_connstamp; msg->gnm_payload_len = immediatenob; - kgnilnd_conn_mutex_lock(&conn->gnc_smsg_mutex); msg->gnm_seq = atomic_read(&conn->gnc_tx_seq); /* always init here - kgn_checksum is a /sys module tunable @@ -1586,6 +1585,7 @@ kgnilnd_sendmsg(kgn_tx_t *tx, void *immediate, unsigned int immediatenob, timestamp = jiffies; kgnilnd_gl_mutex_lock(&dev->gnd_cq_mutex); + kgnilnd_conn_mutex_lock(&tx->tx_conn->gnc_smsg_mutex); /* delay in jiffies - we are really concerned only with things that * result in a schedule() or really holding this off for long times . * NB - mutex_lock could spin for 2 jiffies before going to sleep to wait */ @@ -1630,7 +1630,8 @@ kgnilnd_sendmsg_trylock(kgn_tx_t *tx, void *immediate, unsigned int immediatenob rc = 0; } else { atomic_inc(&conn->gnc_device->gnd_fast_try); - rc = kgnilnd_gl_mutex_trylock(&conn->gnc_device->gnd_cq_mutex); + rc = kgnilnd_trylock(&conn->gnc_device->gnd_cq_mutex, + &conn->gnc_smsg_mutex); } if (!rc) { rc = -EAGAIN; diff --git a/lnet/klnds/gnilnd/gnilnd_gemini.h b/lnet/klnds/gnilnd/gnilnd_gemini.h index 1e7e2f6..d405bcf 100644 --- a/lnet/klnds/gnilnd/gnilnd_gemini.h +++ b/lnet/klnds/gnilnd/gnilnd_gemini.h @@ -40,5 +40,6 @@ /* Thread-safe kgni implemented in minor ver 44, code rev 0xb9 */ #define GNILND_KGNI_TS_MINOR_VER 0x44 +#define GNILND_TS_ENABLE 0 #endif /* _GNILND_GEMINI_H */ diff --git a/lnet/klnds/gnilnd/gnilnd_modparams.c b/lnet/klnds/gnilnd/gnilnd_modparams.c index d89dc3e..26814aa 100644 --- a/lnet/klnds/gnilnd/gnilnd_modparams.c +++ b/lnet/klnds/gnilnd/gnilnd_modparams.c @@ -192,7 +192,11 @@ CFS_MODULE_PARM(max_conn_purg, "i", int, 0644, static int thread_affinity = 0; CFS_MODULE_PARM(thread_affinity, "i", int, 0444, - "scheduler thread affinity default 0 (diabled)"); + "scheduler thread affinity default 0 (disabled)"); + +static int thread_safe = GNILND_TS_ENABLE; +CFS_MODULE_PARM(thread_safe, "i", int, 0444, + "Use kgni thread safe API if available"); kgn_tunables_t kgnilnd_tunables = { .kgn_min_reconnect_interval = &min_reconnect_interval, @@ -233,6 +237,7 @@ kgn_tunables_t kgnilnd_tunables = { .kgn_fast_reconn = &fast_reconn, .kgn_efault_lbug = &efault_lbug, .kgn_thread_affinity = &thread_affinity, + .kgn_thread_safe = &thread_safe, .kgn_max_purgatory = &max_conn_purg }; @@ -535,6 +540,14 @@ static struct ctl_table kgnilnd_ctl_table[] = { }, { INIT_CTL_NAME + .procname = "thread_safe" + .data = &thread_safe, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec + }, + { + INIT_CTL_NAME .procname = "max_conn_purg" .data = &max_conn_purg, .maxlen = sizeof(int), -- 1.8.3.1