X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Fklnds%2Fsocklnd%2Fsocklnd_cb.c;h=ac5088dc902e74122884e22a24fbe2839aa73c2d;hp=669e491ec2851ce1a6c5995de46bd1a8c18659fe;hb=96bbc5b674d8c766e375c4266a3a7859a47b172e;hpb=a594ec4212c0ef4a619a3a3f932b30d0a700b96d diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 669e491..ac5088d 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -1271,8 +1271,9 @@ ksocknal_process_receive (ksock_conn_t *conn) id = &conn->ksnc_peer->ksnp_id; rc = conn->ksnc_proto->pro_handle_zcreq(conn, - conn->ksnc_msg.ksm_zc_cookies[0], - le64_to_cpu(lhdr->src_nid) != id->nid); + conn->ksnc_msg.ksm_zc_cookies[0], + *ksocknal_tunables.ksnd_nonblk_zcack || + le64_to_cpu(lhdr->src_nid) != id->nid); } lnet_finalize(conn->ksnc_peer->ksnp_ni, conn->ksnc_cookie, rc); @@ -2302,6 +2303,7 @@ ksocknal_check_peer_timeouts (int idx) cfs_list_t *peers = &ksocknal_data.ksnd_peers[idx]; ksock_peer_t *peer; ksock_conn_t *conn; + ksock_tx_t *tx; again: /* NB. We expect to have a look at all the peers and not find any @@ -2310,6 +2312,10 @@ ksocknal_check_peer_timeouts (int idx) cfs_read_lock (&ksocknal_data.ksnd_global_lock); cfs_list_for_each_entry_typed(peer, peers, ksock_peer_t, ksnp_list) { + cfs_time_t deadline = 0; + int resid = 0; + int n = 0; + if (ksocknal_send_keepalive_locked(peer) != 0) { cfs_read_unlock (&ksocknal_data.ksnd_global_lock); goto again; @@ -2348,30 +2354,47 @@ ksocknal_check_peer_timeouts (int idx) goto again; } } - } - /* print out warnings about stale ZC_REQs */ - cfs_list_for_each_entry_typed(peer, peers, ksock_peer_t, ksnp_list) { - ksock_tx_t *tx; - int n = 0; + if (cfs_list_empty(&peer->ksnp_zc_req_list)) + continue; + cfs_spin_lock(&peer->ksnp_lock); cfs_list_for_each_entry_typed(tx, &peer->ksnp_zc_req_list, ksock_tx_t, tx_zc_list) { if (!cfs_time_aftereq(cfs_time_current(), tx->tx_deadline)) break; + /* ignore the TX if connection is being closed */ + if (tx->tx_conn->ksnc_closing) + continue; n++; } - if (n != 0) { - tx = cfs_list_entry (peer->ksnp_zc_req_list.next, - ksock_tx_t, tx_zc_list); - CWARN("Stale ZC_REQs for peer %s detected: %d; the " - "oldest (%p) timed out %ld secs ago\n", - libcfs_nid2str(peer->ksnp_id.nid), n, tx, - cfs_duration_sec(cfs_time_current() - - tx->tx_deadline)); + if (n == 0) { + cfs_spin_unlock(&peer->ksnp_lock); + continue; } + + tx = cfs_list_entry(peer->ksnp_zc_req_list.next, + ksock_tx_t, tx_zc_list); + deadline = tx->tx_deadline; + resid = tx->tx_resid; + conn = tx->tx_conn; + ksocknal_conn_addref(conn); + + cfs_spin_unlock(&peer->ksnp_lock); + cfs_read_unlock (&ksocknal_data.ksnd_global_lock); + + CERROR("Total %d stale ZC_REQs for peer %s detected; the " + "oldest(%p) timed out %ld secs ago, " + "resid: %d, wmem: %d\n", + n, libcfs_nid2str(peer->ksnp_id.nid), tx, + cfs_duration_sec(cfs_time_current() - deadline), + resid, libcfs_sock_wmem_queued(conn->ksnc_sock)); + + ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT); + ksocknal_conn_decref(conn); + goto again; } cfs_read_unlock (&ksocknal_data.ksnd_global_lock);