A GFP occurred in the ksocknal_find_timed_out_conn() while processing
ksnc_tx_queue list.
Add locking to this list.
Change-Id: I1f76683e5798c5015f11e3fa285db9613b1af906
Signed-off-by: Artem Blagodarenko <artem.blagodarenko@hpe.com>
HPE-bug-id: LUS-10248
Fixes:
25c1cb2c4d ("LU-9120 lnet: handle socklnd tx failure")
Reviewed-by: Chris Horn <hornc@cray.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-on: https://review.whamcloud.com/45179
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
/* We're called with a shared lock on ksnd_global_lock */
struct ksock_conn *conn;
struct ksock_tx *tx;
/* We're called with a shared lock on ksnd_global_lock */
struct ksock_conn *conn;
struct ksock_tx *tx;
+ struct ksock_sched *sched;
list_for_each_entry(conn, &peer_ni->ksnp_conns, ksnc_list) {
int error;
/* Don't need the {get,put}connsock dance to deref ksnc_sock */
LASSERT (!conn->ksnc_closing);
list_for_each_entry(conn, &peer_ni->ksnp_conns, ksnc_list) {
int error;
/* Don't need the {get,put}connsock dance to deref ksnc_sock */
LASSERT (!conn->ksnc_closing);
+ sched = conn->ksnc_scheduler;
error = conn->ksnc_sock->sk->sk_err;
if (error != 0) {
error = conn->ksnc_sock->sk->sk_err;
if (error != 0) {
+ spin_lock_bh(&sched->kss_lock);
if ((!list_empty(&conn->ksnc_tx_queue) ||
conn->ksnc_sock->sk->sk_wmem_queued != 0) &&
ktime_get_seconds() >= conn->ksnc_tx_deadline) {
if ((!list_empty(&conn->ksnc_tx_queue) ||
conn->ksnc_sock->sk->sk_wmem_queued != 0) &&
ktime_get_seconds() >= conn->ksnc_tx_deadline) {
CNETERR("Timeout sending data to %s (%pISp) the network or that node may be down.\n",
libcfs_idstr(&peer_ni->ksnp_id),
&conn->ksnc_peeraddr);
CNETERR("Timeout sending data to %s (%pISp) the network or that node may be down.\n",
libcfs_idstr(&peer_ni->ksnp_id),
&conn->ksnc_peeraddr);
+ spin_unlock_bh(&sched->kss_lock);
+ spin_unlock_bh(&sched->kss_lock);