i=isaac
Landing a patch fixing deadlock in usocklnd on HEAD. The patch also includes:
- minor code cleanup suggested by Isaac in comment #6 of bug #18844
- trivial cleanup patch for acceptor.c (att #21983 of bug #14132)
- minor one-line fix for handling EINTR error of poll(2) - inspected by Shadow.
+Severity : normal
+Bugzilla : 18844
+Description: Fixing deadlock in usocklnd
+Details : A deadlock was possible in usocklnd due to race condition while
+ tearing connection down. The problem resulted from erroneous
+ assumption that lnet_finalize() could have been called holding
+ some lnd-level locks.
+
Severity : major
Bugzilla : 13621, 15983
Description: Protocol V2 of o2iblnd
Severity : major
Bugzilla : 13621, 15983
Description: Protocol V2 of o2iblnd
CFS_MODULE_PARM(accept_timeout, "i", int, 0644,
"Acceptor's timeout (seconds)");
CFS_MODULE_PARM(accept_timeout, "i", int, 0644,
"Acceptor's timeout (seconds)");
+static char *accept_type = NULL;
+
+int
+lnet_acceptor_get_tunables(void)
+{
+ /* Userland acceptor uses 'accept_type' instead of 'accept', due to
+ * conflict with 'accept(2)', but kernel acceptor still uses 'accept'
+ * for compatibility. Hence the trick. */
+ accept_type = accept;
+ return 0;
+}
+
int
lnet_acceptor_timeout(void)
{
int
lnet_acceptor_timeout(void)
{
#else /* below is multi-threaded user-space code */
#else /* below is multi-threaded user-space code */
-static char *accept_type = "secure";
+static char *accept_type = "secure";
int
lnet_acceptor_get_tunables()
int
lnet_acceptor_get_tunables()
lnet_acceptor_state.pta_sock = NULL;
} else {
lnet_acceptor_state.pta_sock = NULL;
} else {
-#ifdef __KERNEL__
- LCONSOLE(0, "Accept %s, port %d\n", accept, accept_port);
-#else
LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port);
LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port);
}
/* set init status and unblock parent */
}
/* set init status and unblock parent */
LASSERT (lnet_acceptor_state.pta_sock == NULL);
LASSERT (lnet_acceptor_state.pta_sock == NULL);
-#ifndef __KERNEL__
- /* kernel version uses CFS_MODULE_PARM */
rc = lnet_acceptor_get_tunables();
if (rc != 0)
return rc;
rc = lnet_acceptor_get_tunables();
if (rc != 0)
return rc;
/* Do nothing if we're liblustre clients */
if ((the_lnet.ln_pid & LNET_PID_USERFLAG) != 0)
return 0;
#endif
cfs_init_completion(&lnet_acceptor_state.pta_signal);
/* Do nothing if we're liblustre clients */
if ((the_lnet.ln_pid & LNET_PID_USERFLAG) != 0)
return 0;
#endif
cfs_init_completion(&lnet_acceptor_state.pta_signal);
-#ifdef __KERNEL__
- rc = accept2secure(accept, &secure);
-#else
rc = accept2secure(accept_type, &secure);
rc = accept2secure(accept_type, &secure);
if (rc <= 0) {
cfs_fini_completion(&lnet_acceptor_state.pta_signal);
return rc;
if (rc <= 0) {
cfs_fini_completion(&lnet_acceptor_state.pta_signal);
return rc;
lnet_process_id_t id;
int decref_flag = 0;
int killall_flag = 0;
lnet_process_id_t id;
int decref_flag = 0;
int killall_flag = 0;
+ void *rx_lnetmsg = NULL;
+ CFS_LIST_HEAD (zombie_txs);
if (peer == NULL) /* nothing to tear */
return;
if (peer == NULL) /* nothing to tear */
return;
if (conn->uc_rx_state == UC_RX_LNET_PAYLOAD) {
/* change state not to finalize twice */
conn->uc_rx_state = UC_RX_KSM_HEADER;
if (conn->uc_rx_state == UC_RX_LNET_PAYLOAD) {
/* change state not to finalize twice */
conn->uc_rx_state = UC_RX_KSM_HEADER;
- lnet_finalize(peer->up_ni, conn->uc_rx_lnetmsg, -EIO);
+ /* stash lnetmsg while holding locks */
+ rx_lnetmsg = conn->uc_rx_lnetmsg;
- usocklnd_destroy_txlist(peer->up_ni,
- &conn->uc_tx_list);
+ /* we cannot finilize txs right now (bug #18844) */
+ list_splice_init(&conn->uc_tx_list, &zombie_txs);
peer->up_conns[idx] = NULL;
conn->uc_peer = NULL;
peer->up_conns[idx] = NULL;
conn->uc_peer = NULL;
if(conn->uc_errored && !peer->up_errored)
peer->up_errored = killall_flag = 1;
if(conn->uc_errored && !peer->up_errored)
peer->up_errored = killall_flag = 1;
+
+ /* prevent queueing new txs to this conn */
+ conn->uc_errored = 1;
}
pthread_mutex_unlock(&conn->uc_lock);
}
pthread_mutex_unlock(&conn->uc_lock);
if (!decref_flag)
return;
if (!decref_flag)
return;
+ if (rx_lnetmsg != NULL)
+ lnet_finalize(ni, rx_lnetmsg, -EIO);
+
+ usocklnd_destroy_txlist(ni, &zombie_txs);
+
usocklnd_conn_decref(conn);
usocklnd_peer_decref(peer);
usocklnd_conn_decref(conn);
usocklnd_peer_decref(peer);
LASSERT(tx == NULL || zc_ack == NULL);
if (tx != NULL) {
LASSERT(tx == NULL || zc_ack == NULL);
if (tx != NULL) {
+ /* usocklnd_tear_peer_conn() could signal us stop queueing */
+ if (conn->uc_errored) {
+ rc = -EIO;
+ pthread_mutex_unlock(&conn->uc_lock);
+ goto find_or_create_conn_failed;
+ }
+
usocklnd_enqueue_tx(conn, tx, send_immediately);
} else {
rc = usocklnd_enqueue_zcack(conn, zc_ack);
usocklnd_enqueue_tx(conn, tx, send_immediately);
} else {
rc = usocklnd_enqueue_zcack(conn, zc_ack);
nob = libcfs_sock_readv(conn->uc_sock,
conn->uc_rx_iov, conn->uc_rx_niov);
if (nob <= 0) {/* read nothing or error */
nob = libcfs_sock_readv(conn->uc_sock,
conn->uc_rx_iov, conn->uc_rx_niov);
if (nob <= 0) {/* read nothing or error */
+ if (nob < 0)
+ conn->uc_errored = 1;
pt_data->upt_nfds,
usock_tuns.ut_poll_timeout * 1000);
pt_data->upt_nfds,
usock_tuns.ut_poll_timeout * 1000);
+ if (rc < 0 && errno != EINTR) {
CERROR("Cannot poll(2): errno=%d\n", errno);
break;
}
CERROR("Cannot poll(2): errno=%d\n", errno);
break;
}