X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Fklnds%2Fo2iblnd%2Fo2iblnd_cb.c;h=e22f9464c8355f47bec5f9c293e354f48a27391f;hb=f9d837b479232bfc4f271f23cd3729ca67cb6c1d;hp=49433c1ea6a1d5894bdad27a4a9f66e451ca942f;hpb=e5574f72f2fd912ffa6b3e9a7bc2b69bd8370a22;p=fs%2Flustre-release.git diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 49433c1..e22f946 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -588,6 +588,7 @@ kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx, return -EPROTONOSUPPORT; } +#ifdef HAVE_FMR_POOL_API /* * FMR does not support gaps but the tx has gaps then * we should make sure that the number of fragments we'll be sending @@ -606,6 +607,7 @@ kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx, return -EFBIG; } } +#endif fps = net->ibn_fmr_ps[cpt]; rc = kiblnd_fmr_pool_map(fps, tx, rd, nob, 0, &tx->tx_fmr); @@ -624,11 +626,17 @@ kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx, * for FastReg or FMR with no gaps we can accumulate all * the fragments in one FastReg or FMR fragment. */ - if (((dev->ibd_dev_caps & IBLND_DEV_CAPS_FMR_ENABLED) && !tx->tx_gaps) || + if ( +#ifdef HAVE_FMR_POOL_API + ((dev->ibd_dev_caps & IBLND_DEV_CAPS_FMR_ENABLED) + && !tx->tx_gaps) || +#endif (dev->ibd_dev_caps & IBLND_DEV_CAPS_FASTREG_ENABLED)) { /* FMR requires zero based address */ +#ifdef HAVE_FMR_POOL_API if (dev->ibd_dev_caps & IBLND_DEV_CAPS_FMR_ENABLED) rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask; +#endif rd->rd_frags[0].rf_nob = nob; rd->rd_nfrags = 1; } else { @@ -649,7 +657,11 @@ kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx, static void kiblnd_unmap_tx(struct kib_tx *tx) { - if (tx->tx_fmr.fmr_pfmr || tx->tx_fmr.fmr_frd) + if ( +#ifdef HAVE_FMR_POOL_API + tx->tx_fmr.fmr_pfmr || +#endif + tx->tx_fmr.fmr_frd) kiblnd_fmr_pool_unmap(&tx->tx_fmr, tx->tx_status); if (tx->tx_nfrags != 0) { @@ -676,8 +688,11 @@ kiblnd_find_rd_dma_mr(struct lnet_ni *ni, struct kib_rdma_desc *rd) * dead in the water and fail the operation. */ if (tunables->lnd_map_on_demand && - (net->ibn_dev->ibd_dev_caps & IBLND_DEV_CAPS_FASTREG_ENABLED || - net->ibn_dev->ibd_dev_caps & IBLND_DEV_CAPS_FMR_ENABLED)) + (net->ibn_dev->ibd_dev_caps & IBLND_DEV_CAPS_FASTREG_ENABLED +#ifdef HAVE_FMR_POOL_API + || net->ibn_dev->ibd_dev_caps & IBLND_DEV_CAPS_FMR_ENABLED +#endif + )) return NULL; /* @@ -1293,10 +1308,11 @@ kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn) spin_unlock(&conn->ibc_lock); } -static int kiblnd_resolve_addr(struct rdma_cm_id *cmid, - struct sockaddr_in *srcaddr, - struct sockaddr_in *dstaddr, - int timeout_ms) +static int +kiblnd_resolve_addr_cap(struct rdma_cm_id *cmid, + struct sockaddr_in *srcaddr, + struct sockaddr_in *dstaddr, + int timeout_ms) { unsigned short port; int rc; @@ -1326,8 +1342,36 @@ static int kiblnd_resolve_addr(struct rdma_cm_id *cmid, } } - CERROR("Failed to bind to a free privileged port\n"); - return rc; + CERROR("cannot bind to a free privileged port: rc = %d\n", rc); + + return rc; +} + +static int +kiblnd_resolve_addr(struct rdma_cm_id *cmid, + struct sockaddr_in *srcaddr, + struct sockaddr_in *dstaddr, + int timeout_ms) +{ + const struct cred *old_creds = NULL; + struct cred *new_creds; + int rc; + + if (!capable(CAP_NET_BIND_SERVICE)) { + new_creds = prepare_kernel_cred(NULL); + if (!new_creds) + return -ENOMEM; + + cap_raise(new_creds->cap_effective, CAP_NET_BIND_SERVICE); + old_creds = override_creds(new_creds); + } + + rc = kiblnd_resolve_addr_cap(cmid, srcaddr, dstaddr, timeout_ms); + + if (old_creds) + revert_creds(old_creds); + + return rc; } static void @@ -1450,47 +1494,49 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) struct kib_peer_ni *peer2; struct kib_conn *conn; rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - unsigned long flags; - int rc; - int i; + unsigned long flags; + int rc; + int i; struct lnet_ioctl_config_o2iblnd_tunables *tunables; - /* If I get here, I've committed to send, so I complete the tx with - * failure on any problems */ + /* If I get here, I've committed to send, so I complete the tx with + * failure on any problems + */ - LASSERT (tx == NULL || tx->tx_conn == NULL); /* only set when assigned a conn */ - LASSERT (tx == NULL || tx->tx_nwrq > 0); /* work items have been set up */ + LASSERT(!tx || !tx->tx_conn); /* only set when assigned a conn */ + LASSERT(!tx || tx->tx_nwrq > 0); /* work items have been set up */ - /* First time, just use a read lock since I expect to find my peer_ni - * connected */ + /* First time, just use a read lock since I expect to find my peer_ni + * connected + */ read_lock_irqsave(g_lock, flags); - peer_ni = kiblnd_find_peer_locked(ni, nid); + peer_ni = kiblnd_find_peer_locked(ni, nid); if (peer_ni != NULL && !list_empty(&peer_ni->ibp_conns)) { - /* Found a peer_ni with an established connection */ - conn = kiblnd_get_conn_locked(peer_ni); - kiblnd_conn_addref(conn); /* 1 ref for me... */ + /* Found a peer_ni with an established connection */ + conn = kiblnd_get_conn_locked(peer_ni); + kiblnd_conn_addref(conn); /* 1 ref for me... */ read_unlock_irqrestore(g_lock, flags); - if (tx != NULL) - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - return; - } + if (tx != NULL) + kiblnd_queue_tx(tx, conn); + kiblnd_conn_decref(conn); /* ...to here */ + return; + } read_unlock(g_lock); /* Re-try with a write lock */ write_lock(g_lock); - peer_ni = kiblnd_find_peer_locked(ni, nid); - if (peer_ni != NULL) { + peer_ni = kiblnd_find_peer_locked(ni, nid); + if (peer_ni != NULL) { if (list_empty(&peer_ni->ibp_conns)) { - /* found a peer_ni, but it's still connecting... */ + /* found a peer_ni, but it's still connecting... */ LASSERT(kiblnd_peer_connecting(peer_ni)); - if (tx != NULL) + if (tx != NULL) list_add_tail(&tx->tx_list, - &peer_ni->ibp_tx_queue); + &peer_ni->ibp_tx_queue); write_unlock_irqrestore(g_lock, flags); } else { conn = kiblnd_get_conn_locked(peer_ni); @@ -1498,12 +1544,12 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) write_unlock_irqrestore(g_lock, flags); - if (tx != NULL) - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - } - return; - } + if (tx != NULL) + kiblnd_queue_tx(tx, conn); + kiblnd_conn_decref(conn); /* ...to here */ + } + return; + } write_unlock_irqrestore(g_lock, flags); @@ -1522,14 +1568,14 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) write_lock_irqsave(g_lock, flags); - peer2 = kiblnd_find_peer_locked(ni, nid); - if (peer2 != NULL) { + peer2 = kiblnd_find_peer_locked(ni, nid); + if (peer2 != NULL) { if (list_empty(&peer2->ibp_conns)) { - /* found a peer_ni, but it's still connecting... */ + /* found a peer_ni, but it's still connecting... */ LASSERT(kiblnd_peer_connecting(peer2)); - if (tx != NULL) + if (tx != NULL) list_add_tail(&tx->tx_list, - &peer2->ibp_tx_queue); + &peer2->ibp_tx_queue); write_unlock_irqrestore(g_lock, flags); } else { conn = kiblnd_get_conn_locked(peer2); @@ -1537,14 +1583,14 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) write_unlock_irqrestore(g_lock, flags); - if (tx != NULL) - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - } + if (tx != NULL) + kiblnd_queue_tx(tx, conn); + kiblnd_conn_decref(conn); /* ...to here */ + } - kiblnd_peer_decref(peer_ni); - return; - } + kiblnd_peer_decref(peer_ni); + return; + } /* Brand new peer_ni */ LASSERT(peer_ni->ibp_connecting == 0); @@ -1557,14 +1603,14 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) if (tx != NULL) list_add_tail(&tx->tx_list, &peer_ni->ibp_tx_queue); - kiblnd_peer_addref(peer_ni); - list_add_tail(&peer_ni->ibp_list, kiblnd_nid2peerlist(nid)); + kiblnd_peer_addref(peer_ni); + hash_add(kiblnd_data.kib_peers, &peer_ni->ibp_list, nid); write_unlock_irqrestore(g_lock, flags); for (i = 0; i < tunables->lnd_conns_per_peer; i++) kiblnd_connect_peer(peer_ni); - kiblnd_peer_decref(peer_ni); + kiblnd_peer_decref(peer_ni); } int @@ -1889,7 +1935,7 @@ kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, int kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name) { - struct task_struct *task = kthread_run(fn, arg, name); + struct task_struct *task = kthread_run(fn, arg, "%s", name); if (IS_ERR(task)) return PTR_ERR(task); @@ -2089,6 +2135,10 @@ kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs) if (tx->tx_sending == 0) { tx->tx_queued = 0; list_move(&tx->tx_list, &zombies); + } else { + /* keep tx until cq destroy */ + list_move(&tx->tx_list, &conn->ibc_zombie_txs); + conn->ibc_waits ++; } } @@ -2103,6 +2153,31 @@ kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs) kiblnd_txlist_done(&zombies, -ECONNABORTED, LNET_MSG_STATUS_OK); } +static bool +kiblnd_tx_may_discard(struct kib_conn *conn) +{ + bool rc = false; + struct kib_tx *nxt; + struct kib_tx *tx; + + spin_lock(&conn->ibc_lock); + + list_for_each_entry_safe(tx, nxt, &conn->ibc_zombie_txs, tx_list) { + if (tx->tx_sending > 0 && tx->tx_lntmsg[0] && + lnet_md_discarded(tx->tx_lntmsg[0]->msg_md)) { + tx->tx_sending --; + if (tx->tx_sending == 0) { + kiblnd_conn_decref(tx->tx_conn); + tx->tx_conn = NULL; + rc = true; + } + } + } + + spin_unlock(&conn->ibc_lock); + return rc; +} + static void kiblnd_finalise_conn(struct kib_conn *conn) { @@ -2133,10 +2208,11 @@ kiblnd_peer_connect_failed(struct kib_peer_ni *peer_ni, int active, int error) { LIST_HEAD(zombies); - unsigned long flags; + unsigned long flags; + enum lnet_msg_hstatus hstatus; - LASSERT (error != 0); - LASSERT (!in_interrupt()); + LASSERT(error != 0); + LASSERT(!in_interrupt()); write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); @@ -2179,8 +2255,20 @@ kiblnd_peer_connect_failed(struct kib_peer_ni *peer_ni, int active, CNETERR("Deleting messages for %s: connection failed\n", libcfs_nid2str(peer_ni->ibp_nid)); - kiblnd_txlist_done(&zombies, error, - LNET_MSG_STATUS_LOCAL_DROPPED); + switch (error) { + case -EHOSTUNREACH: + case -ETIMEDOUT: + hstatus = LNET_MSG_STATUS_NETWORK_TIMEOUT; + break; + case -ECONNREFUSED: + hstatus = LNET_MSG_STATUS_REMOTE_DROPPED; + break; + default: + hstatus = LNET_MSG_STATUS_LOCAL_DROPPED; + break; + } + + kiblnd_txlist_done(&zombies, error, hstatus); } static void @@ -2204,22 +2292,25 @@ kiblnd_connreq_done(struct kib_conn *conn, int status) (conn->ibc_state == IBLND_CONN_PASSIVE_WAIT && peer_ni->ibp_accepting > 0)); - LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); - conn->ibc_connvars = NULL; + LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); + conn->ibc_connvars = NULL; - if (status != 0) { - /* failed to establish connection */ - kiblnd_peer_connect_failed(peer_ni, active, status); - kiblnd_finalise_conn(conn); - return; - } + if (status != 0) { + /* failed to establish connection */ + kiblnd_peer_connect_failed(peer_ni, active, status); + kiblnd_finalise_conn(conn); + return; + } - /* connection established */ + /* connection established */ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + /* reset retry count */ + peer_ni->ibp_retries = 0; + conn->ibc_last_send = ktime_get(); - kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED); - kiblnd_peer_alive(peer_ni); + kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED); + kiblnd_peer_alive(peer_ni); /* Add conn to peer_ni's list and nuke any dangling conns from a different * peer_ni instance... */ @@ -2293,7 +2384,11 @@ kiblnd_reject(struct rdma_cm_id *cmid, struct kib_rej *rej) { int rc; +#ifdef HAVE_RDMA_REJECT_4ARGS + rc = rdma_reject(cmid, rej, sizeof(*rej), IB_CM_REJ_CONSUMER_DEFINED); +#else rc = rdma_reject(cmid, rej, sizeof(*rej)); +#endif if (rc != 0) CWARN("Error %d sending reject\n", rc); @@ -2302,7 +2397,7 @@ kiblnd_reject(struct rdma_cm_id *cmid, struct kib_rej *rej) static int kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) { - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; + rwlock_t *g_lock = &kiblnd_data.kib_global_lock; struct kib_msg *reqmsg = priv; struct kib_msg *ackmsg; struct kib_dev *ibdev; @@ -2311,27 +2406,27 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) struct kib_conn *conn; struct lnet_ni *ni = NULL; struct kib_net *net = NULL; - lnet_nid_t nid; - struct rdma_conn_param cp; + lnet_nid_t nid; + struct rdma_conn_param cp; struct kib_rej rej; - int version = IBLND_MSG_VERSION; - unsigned long flags; - int rc; - struct sockaddr_in *peer_addr; - LASSERT (!in_interrupt()); + int version = IBLND_MSG_VERSION; + unsigned long flags; + int rc; + struct sockaddr_in *peer_addr; + LASSERT(!in_interrupt()); /* cmid inherits 'context' from the corresponding listener id */ ibdev = cmid->context; LASSERT(ibdev); - memset(&rej, 0, sizeof(rej)); - rej.ibr_magic = IBLND_MSG_MAGIC; - rej.ibr_why = IBLND_REJECT_FATAL; - rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE; + memset(&rej, 0, sizeof(rej)); + rej.ibr_magic = IBLND_MSG_MAGIC; + rej.ibr_why = IBLND_REJECT_FATAL; + rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE; - peer_addr = (struct sockaddr_in *)&(cmid->route.addr.dst_addr); - if (*kiblnd_tunables.kib_require_priv_port && - ntohs(peer_addr->sin_port) >= PROT_SOCK) { + peer_addr = (struct sockaddr_in *)&(cmid->route.addr.dst_addr); + if (*kiblnd_tunables.kib_require_priv_port && + ntohs(peer_addr->sin_port) >= PROT_SOCK) { __u32 ip = ntohl(peer_addr->sin_addr.s_addr); CERROR("peer_ni's port (%pI4h:%hu) is not privileged\n", &ip, ntohs(peer_addr->sin_port)); @@ -2378,17 +2473,16 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) if (ni == NULL || /* no matching net */ ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */ net->ibn_dev != ibdev) { /* wrong device */ - CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): " - "bad dst nid %s\n", libcfs_nid2str(nid), - ni == NULL ? "NA" : libcfs_nid2str(ni->ni_nid), + CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): bad dst nid %s\n", libcfs_nid2str(nid), + ni ? libcfs_nid2str(ni->ni_nid) : "NA", ibdev->ibd_ifname, ibdev->ibd_nnets, - &ibdev->ibd_ifip, + &ibdev->ibd_ifip, libcfs_nid2str(reqmsg->ibm_dstnid)); goto failed; } - /* check time stamp as soon as possible */ + /* check time stamp as soon as possible */ if (reqmsg->ibm_dststamp != 0 && reqmsg->ibm_dststamp != net->ibn_incarnation) { CWARN("Stale connection request\n"); @@ -2407,8 +2501,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) if (reqmsg->ibm_u.connparams.ibcp_queue_depth > kiblnd_msg_queue_size(version, ni)) { - CERROR("Can't accept conn from %s, queue depth too large: " - " %d (<=%d wanted)\n", + CERROR("Can't accept conn from %s, queue depth too large: %d (<=%d wanted)\n", libcfs_nid2str(nid), reqmsg->ibm_u.connparams.ibcp_queue_depth, kiblnd_msg_queue_size(version, ni)); @@ -2421,8 +2514,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) if (reqmsg->ibm_u.connparams.ibcp_max_frags > IBLND_MAX_RDMA_FRAGS) { - CWARN("Can't accept conn from %s (version %x): " - "max_frags %d too large (%d wanted)\n", + CWARN("Can't accept conn from %s (version %x): max_frags %d too large (%d wanted)\n", libcfs_nid2str(nid), version, reqmsg->ibm_u.connparams.ibcp_max_frags, IBLND_MAX_RDMA_FRAGS); @@ -2434,9 +2526,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) } else if (reqmsg->ibm_u.connparams.ibcp_max_frags < IBLND_MAX_RDMA_FRAGS && net->ibn_fmr_ps == NULL) { - CWARN("Can't accept conn from %s (version %x): " - "max_frags %d incompatible without FMR pool " - "(%d wanted)\n", + CWARN("Can't accept conn from %s (version %x): max_frags %d incompatible without FMR pool (%d wanted)\n", libcfs_nid2str(nid), version, reqmsg->ibm_u.connparams.ibcp_max_frags, IBLND_MAX_RDMA_FRAGS); @@ -2447,13 +2537,13 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) goto failed; } - if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) { - CERROR("Can't accept %s: message size %d too big (%d max)\n", - libcfs_nid2str(nid), - reqmsg->ibm_u.connparams.ibcp_max_msg_size, - IBLND_MSG_SIZE); - goto failed; - } + if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) { + CERROR("Can't accept %s: message size %d too big (%d max)\n", + libcfs_nid2str(nid), + reqmsg->ibm_u.connparams.ibcp_max_msg_size, + IBLND_MSG_SIZE); + goto failed; + } /* assume 'nid' is a new peer_ni; create */ rc = kiblnd_create_peer(ni, &peer_ni, nid); @@ -2469,16 +2559,16 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) write_lock_irqsave(g_lock, flags); - peer2 = kiblnd_find_peer_locked(ni, nid); - if (peer2 != NULL) { - if (peer2->ibp_version == 0) { - peer2->ibp_version = version; - peer2->ibp_incarnation = reqmsg->ibm_srcstamp; - } + peer2 = kiblnd_find_peer_locked(ni, nid); + if (peer2 != NULL) { + if (peer2->ibp_version == 0) { + peer2->ibp_version = version; + peer2->ibp_incarnation = reqmsg->ibm_srcstamp; + } - /* not the guy I've talked with */ - if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp || - peer2->ibp_version != version) { + /* not the guy I've talked with */ + if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp || + peer2->ibp_version != version) { kiblnd_close_peer_conns_locked(peer2, -ESTALE); if (kiblnd_peer_active(peer2)) { @@ -2491,10 +2581,10 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) libcfs_nid2str(nid), peer2->ibp_version, version, peer2->ibp_incarnation, reqmsg->ibm_srcstamp); - kiblnd_peer_decref(peer_ni); - rej.ibr_why = IBLND_REJECT_CONN_STALE; - goto failed; - } + kiblnd_peer_decref(peer_ni); + rej.ibr_why = IBLND_REJECT_CONN_STALE; + goto failed; + } /* Tie-break connection race in favour of the higher NID. * If we keep running into a race condition multiple times, @@ -2536,78 +2626,80 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) peer2->ibp_queue_depth = peer_ni->ibp_queue_depth; write_unlock_irqrestore(g_lock, flags); - kiblnd_peer_decref(peer_ni); - peer_ni = peer2; - } else { - /* Brand new peer_ni */ - LASSERT (peer_ni->ibp_accepting == 0); - LASSERT (peer_ni->ibp_version == 0 && - peer_ni->ibp_incarnation == 0); + kiblnd_peer_decref(peer_ni); + peer_ni = peer2; + } else { + /* Brand new peer_ni */ + LASSERT(peer_ni->ibp_accepting == 0); + LASSERT(peer_ni->ibp_version == 0 && + peer_ni->ibp_incarnation == 0); - peer_ni->ibp_accepting = 1; - peer_ni->ibp_version = version; - peer_ni->ibp_incarnation = reqmsg->ibm_srcstamp; + peer_ni->ibp_accepting = 1; + peer_ni->ibp_version = version; + peer_ni->ibp_incarnation = reqmsg->ibm_srcstamp; - /* I have a ref on ni that prevents it being shutdown */ - LASSERT (net->ibn_shutdown == 0); + /* I have a ref on ni that prevents it being shutdown */ + LASSERT(net->ibn_shutdown == 0); - kiblnd_peer_addref(peer_ni); - list_add_tail(&peer_ni->ibp_list, kiblnd_nid2peerlist(nid)); + kiblnd_peer_addref(peer_ni); + hash_add(kiblnd_data.kib_peers, &peer_ni->ibp_list, nid); write_unlock_irqrestore(g_lock, flags); - } + } - conn = kiblnd_create_conn(peer_ni, cmid, IBLND_CONN_PASSIVE_WAIT, version); - if (conn == NULL) { - kiblnd_peer_connect_failed(peer_ni, 0, -ENOMEM); - kiblnd_peer_decref(peer_ni); - rej.ibr_why = IBLND_REJECT_NO_RESOURCES; - goto failed; - } + conn = kiblnd_create_conn(peer_ni, cmid, IBLND_CONN_PASSIVE_WAIT, + version); + if (!conn) { + kiblnd_peer_connect_failed(peer_ni, 0, -ENOMEM); + kiblnd_peer_decref(peer_ni); + rej.ibr_why = IBLND_REJECT_NO_RESOURCES; + goto failed; + } - /* conn now "owns" cmid, so I return success from here on to ensure the - * CM callback doesn't destroy cmid. */ + /* conn now "owns" cmid, so I return success from here on to ensure the + * CM callback doesn't destroy cmid. + */ conn->ibc_incarnation = reqmsg->ibm_srcstamp; conn->ibc_credits = conn->ibc_queue_depth; conn->ibc_reserved_credits = conn->ibc_queue_depth; LASSERT(conn->ibc_credits + conn->ibc_reserved_credits + IBLND_OOB_MSGS(version) <= IBLND_RX_MSGS(conn)); - ackmsg = &conn->ibc_connvars->cv_msg; - memset(ackmsg, 0, sizeof(*ackmsg)); + ackmsg = &conn->ibc_connvars->cv_msg; + memset(ackmsg, 0, sizeof(*ackmsg)); - kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK, - sizeof(ackmsg->ibm_u.connparams)); + kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK, + sizeof(ackmsg->ibm_u.connparams)); ackmsg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth; ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags; ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; - kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp); + kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp); - memset(&cp, 0, sizeof(cp)); - cp.private_data = ackmsg; - cp.private_data_len = ackmsg->ibm_nob; - cp.responder_resources = 0; /* No atomic ops or RDMA reads */ - cp.initiator_depth = 0; - cp.flow_control = 1; - cp.retry_count = *kiblnd_tunables.kib_retry_count; - cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count; + memset(&cp, 0, sizeof(cp)); + cp.private_data = ackmsg; + cp.private_data_len = ackmsg->ibm_nob; + cp.responder_resources = 0; /* No atomic ops or RDMA reads */ + cp.initiator_depth = 0; + cp.flow_control = 1; + cp.retry_count = *kiblnd_tunables.kib_retry_count; + cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count; - CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid)); + CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid)); - rc = rdma_accept(cmid, &cp); - if (rc != 0) { - CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc); - rej.ibr_version = version; - rej.ibr_why = IBLND_REJECT_FATAL; + rc = rdma_accept(cmid, &cp); + if (rc != 0) { + CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc); + rej.ibr_version = version; + rej.ibr_why = IBLND_REJECT_FATAL; - kiblnd_reject(cmid, &rej); - kiblnd_connreq_done(conn, rc); - kiblnd_conn_decref(conn); - } + kiblnd_reject(cmid, &rej); + kiblnd_connreq_done(conn, rc); + kiblnd_conn_decref(conn); + } - lnet_ni_decref(ni); - return 0; + lnet_ni_decref(ni); + return 0; failed: if (ni != NULL) { @@ -2661,10 +2753,15 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version, goto out; } - switch (why) { - default: - reason = "Unknown"; - break; + if (peer_ni->ibp_retries > *kiblnd_tunables.kib_retry_count) { + reason = "retry count exceeded due to no listener"; + goto out; + } + + switch (why) { + default: + reason = "Unknown"; + break; case IBLND_REJECT_RDMA_FRAGS: { struct lnet_ioctl_config_o2iblnd_tunables *tunables; @@ -2758,117 +2855,121 @@ kiblnd_rejected(struct kib_conn *conn, int reason, void *priv, int priv_nob) IBLND_REJECT_CONN_STALE, NULL); break; - case IB_CM_REJ_INVALID_SERVICE_ID: + case IB_CM_REJ_INVALID_SERVICE_ID: + peer_ni->ibp_retries++; kiblnd_check_reconnect(conn, IBLND_MSG_VERSION, 0, IBLND_REJECT_INVALID_SRV_ID, NULL); - CNETERR("%s rejected: no listener at %d\n", - libcfs_nid2str(peer_ni->ibp_nid), - *kiblnd_tunables.kib_service); - break; + CNETERR("%s rejected: no listener at %d\n", + libcfs_nid2str(peer_ni->ibp_nid), + *kiblnd_tunables.kib_service); + break; - case IB_CM_REJ_CONSUMER_DEFINED: + case IB_CM_REJ_CONSUMER_DEFINED: if (priv_nob >= offsetof(struct kib_rej, ibr_padding)) { struct kib_rej *rej = priv; struct kib_connparams *cp = NULL; - int flip = 0; - __u64 incarnation = -1; - - /* NB. default incarnation is -1 because: - * a) V1 will ignore dst incarnation in connreq. - * b) V2 will provide incarnation while rejecting me, - * -1 will be overwrote. - * - * if I try to connect to a V1 peer_ni with V2 protocol, - * it rejected me then upgrade to V2, I have no idea - * about the upgrading and try to reconnect with V1, - * in this case upgraded V2 can find out I'm trying to - * talk to the old guy and reject me(incarnation is -1). - */ - - if (rej->ibr_magic == __swab32(IBLND_MSG_MAGIC) || - rej->ibr_magic == __swab32(LNET_PROTO_MAGIC)) { - __swab32s(&rej->ibr_magic); - __swab16s(&rej->ibr_version); - flip = 1; - } + bool flip = false; + __u64 incarnation = -1; + + /* NB. default incarnation is -1 because: + * a) V1 will ignore dst incarnation in connreq. + * b) V2 will provide incarnation while rejecting me, + * -1 will be overwrote. + * + * if I try to connect to a V1 peer_ni with V2 protocol, + * it rejected me then upgrade to V2, I have no idea + * about the upgrading and try to reconnect with V1, + * in this case upgraded V2 can find out I'm trying to + * talk to the old guy and reject me(incarnation is -1). + */ + + if (rej->ibr_magic == __swab32(IBLND_MSG_MAGIC) || + rej->ibr_magic == __swab32(LNET_PROTO_MAGIC)) { + __swab32s(&rej->ibr_magic); + __swab16s(&rej->ibr_version); + flip = true; + } if (priv_nob >= sizeof(struct kib_rej) && - rej->ibr_version > IBLND_MSG_VERSION_1) { - /* priv_nob is always 148 in current version - * of OFED, so we still need to check version. - * (define of IB_CM_REJ_PRIVATE_DATA_SIZE) */ - cp = &rej->ibr_cp; - - if (flip) { - __swab64s(&rej->ibr_incarnation); - __swab16s(&cp->ibcp_queue_depth); - __swab16s(&cp->ibcp_max_frags); - __swab32s(&cp->ibcp_max_msg_size); - } - - incarnation = rej->ibr_incarnation; - } - - if (rej->ibr_magic != IBLND_MSG_MAGIC && - rej->ibr_magic != LNET_PROTO_MAGIC) { - CERROR("%s rejected: consumer defined fatal error\n", - libcfs_nid2str(peer_ni->ibp_nid)); - break; - } - - if (rej->ibr_version != IBLND_MSG_VERSION && - rej->ibr_version != IBLND_MSG_VERSION_1) { - CERROR("%s rejected: o2iblnd version %x error\n", - libcfs_nid2str(peer_ni->ibp_nid), - rej->ibr_version); - break; - } - - if (rej->ibr_why == IBLND_REJECT_FATAL && - rej->ibr_version == IBLND_MSG_VERSION_1) { - CDEBUG(D_NET, "rejected by old version peer_ni %s: %x\n", - libcfs_nid2str(peer_ni->ibp_nid), rej->ibr_version); - - if (conn->ibc_version != IBLND_MSG_VERSION_1) - rej->ibr_why = IBLND_REJECT_CONN_UNCOMPAT; - } - - switch (rej->ibr_why) { - case IBLND_REJECT_CONN_RACE: - case IBLND_REJECT_CONN_STALE: - case IBLND_REJECT_CONN_UNCOMPAT: + rej->ibr_version > IBLND_MSG_VERSION_1) { + /* priv_nob is always 148 in current version + * of OFED, so we still need to check version. + * (define of IB_CM_REJ_PRIVATE_DATA_SIZE) + */ + cp = &rej->ibr_cp; + + if (flip) { + __swab64s(&rej->ibr_incarnation); + __swab16s(&cp->ibcp_queue_depth); + __swab16s(&cp->ibcp_max_frags); + __swab32s(&cp->ibcp_max_msg_size); + } + + incarnation = rej->ibr_incarnation; + } + + if (rej->ibr_magic != IBLND_MSG_MAGIC && + rej->ibr_magic != LNET_PROTO_MAGIC) { + CERROR("%s rejected: consumer defined fatal error\n", + libcfs_nid2str(peer_ni->ibp_nid)); + break; + } + + if (rej->ibr_version != IBLND_MSG_VERSION && + rej->ibr_version != IBLND_MSG_VERSION_1) { + CERROR("%s rejected: o2iblnd version %x error\n", + libcfs_nid2str(peer_ni->ibp_nid), + rej->ibr_version); + break; + } + + if (rej->ibr_why == IBLND_REJECT_FATAL && + rej->ibr_version == IBLND_MSG_VERSION_1) { + CDEBUG(D_NET, "rejected by old version peer_ni %s: %x\n", + libcfs_nid2str(peer_ni->ibp_nid), + rej->ibr_version); + + if (conn->ibc_version != IBLND_MSG_VERSION_1) + rej->ibr_why = IBLND_REJECT_CONN_UNCOMPAT; + } + + switch (rej->ibr_why) { + case IBLND_REJECT_CONN_RACE: + case IBLND_REJECT_CONN_STALE: + case IBLND_REJECT_CONN_UNCOMPAT: case IBLND_REJECT_MSG_QUEUE_SIZE: case IBLND_REJECT_RDMA_FRAGS: kiblnd_check_reconnect(conn, rej->ibr_version, - incarnation, rej->ibr_why, cp); - break; - - case IBLND_REJECT_NO_RESOURCES: - CERROR("%s rejected: o2iblnd no resources\n", - libcfs_nid2str(peer_ni->ibp_nid)); - break; - - case IBLND_REJECT_FATAL: - CERROR("%s rejected: o2iblnd fatal error\n", - libcfs_nid2str(peer_ni->ibp_nid)); - break; - - default: - CERROR("%s rejected: o2iblnd reason %d\n", - libcfs_nid2str(peer_ni->ibp_nid), - rej->ibr_why); - break; - } - break; - } - /* fall through */ - default: - CNETERR("%s rejected: reason %d, size %d\n", - libcfs_nid2str(peer_ni->ibp_nid), reason, priv_nob); - break; - } + incarnation, + rej->ibr_why, cp); + break; + + case IBLND_REJECT_NO_RESOURCES: + CERROR("%s rejected: o2iblnd no resources\n", + libcfs_nid2str(peer_ni->ibp_nid)); + break; + + case IBLND_REJECT_FATAL: + CERROR("%s rejected: o2iblnd fatal error\n", + libcfs_nid2str(peer_ni->ibp_nid)); + break; - kiblnd_connreq_done(conn, -ECONNREFUSED); + default: + CERROR("%s rejected: o2iblnd reason %d\n", + libcfs_nid2str(peer_ni->ibp_nid), + rej->ibr_why); + break; + } + break; + } + /* fall through */ + default: + CNETERR("%s rejected: reason %d, size %d\n", + libcfs_nid2str(peer_ni->ibp_nid), reason, priv_nob); + break; + } + + kiblnd_connreq_done(conn, -ECONNREFUSED); } static void @@ -3026,8 +3127,7 @@ kiblnd_active_connect(struct rdma_cm_id *cmid) LASSERT(cmid->context == (void *)conn); LASSERT(conn->ibc_cmid == cmid); - - rc = rdma_connect(cmid, &cp); + rc = rdma_connect_locked(cmid, &cp); if (rc != 0) { CERROR("Can't connect to %s: %d\n", libcfs_nid2str(peer_ni->ibp_nid), rc); @@ -3236,8 +3336,10 @@ kiblnd_check_txs_locked(struct kib_conn *conn, struct list_head *txs) } if (ktime_compare(ktime_get(), tx->tx_deadline) >= 0) { - CERROR("Timed out tx: %s, %lld seconds\n", + CERROR("Timed out tx: %s(WSQ:%d%d%d), %lld seconds\n", kiblnd_queue2str(conn, txs), + tx->tx_waiting, tx->tx_sending, tx->tx_queued, + kiblnd_timeout() + ktime_ms_delta(ktime_get(), tx->tx_deadline) / MSEC_PER_SEC); return 1; @@ -3263,22 +3365,20 @@ kiblnd_check_conns (int idx) LIST_HEAD(closes); LIST_HEAD(checksends); LIST_HEAD(timedout_txs); - struct list_head *peers = &kiblnd_data.kib_peers[idx]; - struct list_head *ptmp; + struct hlist_head *peers = &kiblnd_data.kib_peers[idx]; struct kib_peer_ni *peer_ni; - struct kib_conn *conn; + struct kib_conn *conn; struct kib_tx *tx, *tx_tmp; struct list_head *ctmp; - unsigned long flags; + unsigned long flags; /* NB. We expect to have a look at all the peers and not find any * RDMAs to time out, so we just use a shared lock while we - * take a look... */ + * take a look... + */ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - list_for_each(ptmp, peers) { - peer_ni = list_entry(ptmp, struct kib_peer_ni, ibp_list); - + hlist_for_each_entry(peer_ni, peers, ibp_list) { /* Check tx_deadline */ list_for_each_entry_safe(tx, tx_tmp, &peer_ni->ibp_tx_queue, tx_list) { if (ktime_compare(ktime_get(), tx->tx_deadline) >= 0) { @@ -3308,10 +3408,10 @@ kiblnd_check_conns (int idx) } if (timedout) { - CERROR("Timed out RDMA with %s (%lld): " - "c: %u, oc: %u, rc: %u\n", + CERROR("Timed out RDMA with %s (%lld): c: %u, oc: %u, rc: %u\n", libcfs_nid2str(peer_ni->ibp_nid), - ktime_get_seconds() - peer_ni->ibp_last_alive, + ktime_get_seconds() + - peer_ni->ibp_last_alive, conn->ibc_credits, conn->ibc_outstanding_credits, conn->ibc_reserved_credits); @@ -3330,11 +3430,12 @@ kiblnd_check_conns (int idx) if (!list_empty(&timedout_txs)) kiblnd_txlist_done(&timedout_txs, -ETIMEDOUT, - LNET_MSG_STATUS_LOCAL_TIMEOUT); + LNET_MSG_STATUS_NETWORK_TIMEOUT); /* Handle timeout by closing the whole * connection. We can only be sure RDMA activity - * has ceased once the QP has been modified. */ + * has ceased once the QP has been modified. + */ while (!list_empty(&closes)) { conn = list_entry(closes.next, struct kib_conn, ibc_connd_list); @@ -3345,7 +3446,8 @@ kiblnd_check_conns (int idx) /* In case we have enough credits to return via a * NOOP, but there were no non-blocking tx descs - * free to do it last time... */ + * free to do it last time... + */ while (!list_empty(&checksends)) { conn = list_entry(checksends.next, struct kib_conn, ibc_connd_list); @@ -3386,17 +3488,17 @@ kiblnd_disconnect_conn(struct kib_conn *conn) int kiblnd_connd (void *arg) { - spinlock_t *lock= &kiblnd_data.kib_connd_lock; + spinlock_t *lock = &kiblnd_data.kib_connd_lock; wait_queue_entry_t wait; - unsigned long flags; + unsigned long flags; struct kib_conn *conn; - int timeout; - int i; - int dropped_lock; - int peer_index = 0; - unsigned long deadline = jiffies; + int timeout; + int i; + bool dropped_lock; + int peer_index = 0; + unsigned long deadline = jiffies; - init_waitqueue_entry(&wait, current); + init_wait(&wait); kiblnd_data.kib_connd = current; spin_lock_irqsave(lock, flags); @@ -3404,7 +3506,7 @@ kiblnd_connd (void *arg) while (!kiblnd_data.kib_shutdown) { int reconn = 0; - dropped_lock = 0; + dropped_lock = false; if (!list_empty(&kiblnd_data.kib_connd_zombies)) { struct kib_peer_ni *peer_ni = NULL; @@ -3418,7 +3520,7 @@ kiblnd_connd (void *arg) } spin_unlock_irqrestore(lock, flags); - dropped_lock = 1; + dropped_lock = true; kiblnd_destroy_conn(conn); @@ -3438,18 +3540,25 @@ kiblnd_connd (void *arg) } if (!list_empty(&kiblnd_data.kib_connd_conns)) { + int wait; conn = list_entry(kiblnd_data.kib_connd_conns.next, struct kib_conn, ibc_list); list_del(&conn->ibc_list); spin_unlock_irqrestore(lock, flags); - dropped_lock = 1; + dropped_lock = true; kiblnd_disconnect_conn(conn); - kiblnd_conn_decref(conn); + wait = conn->ibc_waits; + if (wait == 0) /* keep ref for connd_wait, see below */ + kiblnd_conn_decref(conn); spin_lock_irqsave(lock, flags); - } + + if (wait) + list_add_tail(&conn->ibc_list, + &kiblnd_data.kib_connd_waits); + } while (reconn < KIB_RECONN_BREAK) { if (kiblnd_data.kib_reconn_sec != @@ -3467,7 +3576,7 @@ kiblnd_connd (void *arg) list_del(&conn->ibc_list); spin_unlock_irqrestore(lock, flags); - dropped_lock = 1; + dropped_lock = true; reconn += kiblnd_reconnect_peer(conn->ibc_peer); kiblnd_peer_decref(conn->ibc_peer); @@ -3476,24 +3585,41 @@ kiblnd_connd (void *arg) spin_lock_irqsave(lock, flags); } - /* careful with the jiffy wrap... */ - timeout = (int)(deadline - jiffies); - if (timeout <= 0) { - const int n = 4; - const int p = 1; - int chunk = kiblnd_data.kib_peer_hash_size; + if (!list_empty(&kiblnd_data.kib_connd_waits)) { + conn = list_entry(kiblnd_data.kib_connd_waits.next, + struct kib_conn, ibc_list); + list_del(&conn->ibc_list); + spin_unlock_irqrestore(lock, flags); + + dropped_lock = kiblnd_tx_may_discard(conn); + if (dropped_lock) + kiblnd_conn_decref(conn); + + spin_lock_irqsave(lock, flags); + if (!dropped_lock) + list_add_tail(&conn->ibc_list, + &kiblnd_data.kib_connd_waits); + } + + /* careful with the jiffy wrap... */ + timeout = (int)(deadline - jiffies); + if (timeout <= 0) { + const int n = 4; + const int p = 1; + int chunk = HASH_SIZE(kiblnd_data.kib_peers); unsigned int lnd_timeout; spin_unlock_irqrestore(lock, flags); - dropped_lock = 1; + dropped_lock = true; - /* Time to check for RDMA timeouts on a few more - * peers: I do checks every 'p' seconds on a - * proportion of the peer_ni table and I need to check - * every connection 'n' times within a timeout - * interval, to ensure I detect a timeout on any - * connection within (n+1)/n times the timeout - * interval. */ + /* Time to check for RDMA timeouts on a few more + * peers: I do checks every 'p' seconds on a + * proportion of the peer_ni table and I need to check + * every connection 'n' times within a timeout + * interval, to ensure I detect a timeout on any + * connection within (n+1)/n times the timeout + * interval. + */ lnd_timeout = kiblnd_timeout(); if (lnd_timeout > n * p) @@ -3504,7 +3630,7 @@ kiblnd_connd (void *arg) for (i = 0; i < chunk; i++) { kiblnd_check_conns(peer_index); peer_index = (peer_index + 1) % - kiblnd_data.kib_peer_hash_size; + HASH_SIZE(kiblnd_data.kib_peers); } deadline += cfs_time_seconds(p); @@ -3645,25 +3771,22 @@ kiblnd_cq_event(struct ib_event *event, void *arg) int kiblnd_scheduler(void *arg) { - long id = (long)arg; - struct kib_sched_info *sched; + long id = (long)arg; + struct kib_sched_info *sched; struct kib_conn *conn; - wait_queue_entry_t wait; - unsigned long flags; - struct ib_wc wc; - int did_something; - int rc; + wait_queue_entry_t wait; + unsigned long flags; + struct ib_wc wc; + bool did_something; + int rc; - init_waitqueue_entry(&wait, current); + init_wait(&wait); sched = kiblnd_data.kib_scheds[KIB_THREAD_CPT(id)]; rc = cfs_cpt_bind(lnet_cpt_table(), sched->ibs_cpt); if (rc != 0) { - CWARN("Unable to bind on CPU partition %d, please verify " - "whether all CPUs are healthy and reload modules if " - "necessary, otherwise your system might under risk of " - "low performance\n", sched->ibs_cpt); + CWARN("Unable to bind on CPU partition %d, please verify whether all CPUs are healthy and reload modules if necessary, otherwise your system might under risk of low performance\n", sched->ibs_cpt); } spin_lock_irqsave(&sched->ibs_lock, flags); @@ -3677,7 +3800,7 @@ kiblnd_scheduler(void *arg) spin_lock_irqsave(&sched->ibs_lock, flags); } - did_something = 0; + did_something = false; if (!list_empty(&sched->ibs_conns)) { conn = list_entry(sched->ibs_conns.next, @@ -3691,18 +3814,17 @@ kiblnd_scheduler(void *arg) wc.wr_id = IBLND_WID_INVAL; - rc = ib_poll_cq(conn->ibc_cq, 1, &wc); - if (rc == 0) { - rc = ib_req_notify_cq(conn->ibc_cq, - IB_CQ_NEXT_COMP); - if (rc < 0) { - CWARN("%s: ib_req_notify_cq failed: %d, " - "closing connection\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kiblnd_close_conn(conn, -EIO); - kiblnd_conn_decref(conn); + rc = ib_poll_cq(conn->ibc_cq, 1, &wc); + if (rc == 0) { + rc = ib_req_notify_cq(conn->ibc_cq, + IB_CQ_NEXT_COMP); + if (rc < 0) { + CWARN("%s: ib_req_notify_cq failed: %d, closing connection\n", + libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); + kiblnd_close_conn(conn, -EIO); + kiblnd_conn_decref(conn); spin_lock_irqsave(&sched->ibs_lock, - flags); + flags); continue; } @@ -3723,8 +3845,7 @@ kiblnd_scheduler(void *arg) } if (rc < 0) { - CWARN("%s: ib_poll_cq failed: %d, " - "closing connection\n", + CWARN("%s: ib_poll_cq failed: %d, closing connection\n", libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); kiblnd_close_conn(conn, -EIO); @@ -3742,7 +3863,7 @@ kiblnd_scheduler(void *arg) /* +1 ref for sched_conns */ kiblnd_conn_addref(conn); list_add_tail(&conn->ibc_sched_list, - &sched->ibs_conns); + &sched->ibs_conns); if (waitqueue_active(&sched->ibs_waitq)) wake_up(&sched->ibs_waitq); } else { @@ -3754,14 +3875,14 @@ kiblnd_scheduler(void *arg) kiblnd_complete(&wc); spin_lock_irqsave(&sched->ibs_lock, flags); - } + } - kiblnd_conn_decref(conn); /* ...drop my ref from above */ - did_something = 1; - } + kiblnd_conn_decref(conn); /* ..drop my ref from above */ + did_something = true; + } - if (did_something) - continue; + if (did_something) + continue; set_current_state(TASK_INTERRUPTIBLE); add_wait_queue_exclusive(&sched->ibs_waitq, &wait); @@ -3783,58 +3904,58 @@ kiblnd_scheduler(void *arg) int kiblnd_failover_thread(void *arg) { - rwlock_t *glock = &kiblnd_data.kib_global_lock; + rwlock_t *glock = &kiblnd_data.kib_global_lock; struct kib_dev *dev; struct net *ns = arg; wait_queue_entry_t wait; - unsigned long flags; - int rc; + unsigned long flags; + int rc; LASSERT(*kiblnd_tunables.kib_dev_failover != 0); - init_waitqueue_entry(&wait, current); + init_wait(&wait); write_lock_irqsave(glock, flags); - while (!kiblnd_data.kib_shutdown) { - int do_failover = 0; - int long_sleep; + while (!kiblnd_data.kib_shutdown) { + bool do_failover = false; + int long_sleep; list_for_each_entry(dev, &kiblnd_data.kib_failed_devs, - ibd_fail_list) { + ibd_fail_list) { if (ktime_get_seconds() < dev->ibd_next_failover) - continue; - do_failover = 1; - break; - } + continue; + do_failover = true; + break; + } - if (do_failover) { + if (do_failover) { list_del_init(&dev->ibd_fail_list); - dev->ibd_failover = 1; + dev->ibd_failover = 1; write_unlock_irqrestore(glock, flags); rc = kiblnd_dev_failover(dev, ns); write_lock_irqsave(glock, flags); - LASSERT (dev->ibd_failover); - dev->ibd_failover = 0; - if (rc >= 0) { /* Device is OK or failover succeed */ + LASSERT(dev->ibd_failover); + dev->ibd_failover = 0; + if (rc >= 0) { /* Device is OK or failover succeed */ dev->ibd_next_failover = ktime_get_seconds() + 3; - continue; - } + continue; + } - /* failed to failover, retry later */ + /* failed to failover, retry later */ dev->ibd_next_failover = ktime_get_seconds() + - min(dev->ibd_failed_failover, 10); - if (kiblnd_dev_can_failover(dev)) { + min(dev->ibd_failed_failover, 10); + if (kiblnd_dev_can_failover(dev)) { list_add_tail(&dev->ibd_fail_list, - &kiblnd_data.kib_failed_devs); - } + &kiblnd_data.kib_failed_devs); + } - continue; - } + continue; + } - /* long sleep if no more pending failover */ + /* long sleep if no more pending failover */ long_sleep = list_empty(&kiblnd_data.kib_failed_devs); set_current_state(TASK_INTERRUPTIBLE); @@ -3842,28 +3963,29 @@ kiblnd_failover_thread(void *arg) write_unlock_irqrestore(glock, flags); rc = schedule_timeout(long_sleep ? cfs_time_seconds(10) : - cfs_time_seconds(1)); + cfs_time_seconds(1)); set_current_state(TASK_RUNNING); remove_wait_queue(&kiblnd_data.kib_failover_waitq, &wait); write_lock_irqsave(glock, flags); - if (!long_sleep || rc != 0) - continue; + if (!long_sleep || rc != 0) + continue; - /* have a long sleep, routine check all active devices, - * we need checking like this because if there is not active - * connection on the dev and no SEND from local, we may listen - * on wrong HCA for ever while there is a bonding failover */ + /* have a long sleep, routine check all active devices, + * we need checking like this because if there is not active + * connection on the dev and no SEND from local, we may listen + * on wrong HCA for ever while there is a bonding failover + */ list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) { - if (kiblnd_dev_can_failover(dev)) { + if (kiblnd_dev_can_failover(dev)) { list_add_tail(&dev->ibd_fail_list, - &kiblnd_data.kib_failed_devs); - } - } - } + &kiblnd_data.kib_failed_devs); + } + } + } write_unlock_irqrestore(glock, flags); - kiblnd_thread_fini(); - return 0; + kiblnd_thread_fini(); + return 0; }