X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Fklnds%2Fo2iblnd%2Fo2iblnd_cb.c;h=eacc5251edfdb2de225895d4e11e1c10efb15892;hp=e79f6ce1b9726dda041e36714b6e15fd72b8474d;hb=7c8ad11ef08f0f2f886004ae4a56f67722c16d5c;hpb=7a74d382d5e8867785f662aede54a3e399168325 diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index e79f6ce..eacc525 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -610,7 +610,8 @@ kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx, fps = net->ibn_fmr_ps[cpt]; rc = kiblnd_fmr_pool_map(fps, tx, rd, nob, 0, &tx->tx_fmr); if (rc != 0) { - CERROR("Can't map %u pages: %d\n", nob, rc); + CERROR("Can't map %u bytes (%u/%u)s: %d\n", nob, + tx->tx_nfrags, rd->rd_nfrags, rc); return rc; } @@ -730,80 +731,6 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx, return -EINVAL; } -static int kiblnd_setup_rd_iov(struct lnet_ni *ni, struct kib_tx *tx, - struct kib_rdma_desc *rd, unsigned int niov, - struct kvec *iov, int offset, int nob) -{ - struct kib_net *net = ni->ni_data; - struct page *page; - struct scatterlist *sg; - unsigned long vaddr; - int fragnob; - int page_offset; - unsigned int max_niov; - - LASSERT (nob > 0); - LASSERT (niov > 0); - LASSERT (net != NULL); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - max_niov = niov; - - sg = tx->tx_frags; - do { - LASSERT(niov > 0); - - vaddr = ((unsigned long)iov->iov_base) + offset; - page_offset = vaddr & (PAGE_SIZE - 1); - page = lnet_kvaddr_to_page(vaddr); - if (page == NULL) { - CERROR("Can't find page\n"); - return -EFAULT; - } - - fragnob = min((int)(iov->iov_len - offset), nob); - fragnob = min(fragnob, (int)PAGE_SIZE - page_offset); - - /* - * We're allowed to start at a non-aligned page offset in - * the first fragment and end at a non-aligned page offset - * in the last fragment. - */ - if ((fragnob < (int)PAGE_SIZE - page_offset) && - (niov < max_niov) && nob > fragnob) { - CDEBUG(D_NET, "fragnob %d < available page %d: with" - " remaining %d iovs with %d nob left\n", - fragnob, (int)PAGE_SIZE - page_offset, niov, - nob); - tx->tx_gaps = true; - } - - sg_set_page(sg, page, fragnob, page_offset); - sg = sg_next(sg); - if (!sg) { - CERROR("lacking enough sg entries to map tx\n"); - return -EFAULT; - } - - if (offset + fragnob < iov->iov_len) { - offset += fragnob; - } else { - offset = 0; - iov++; - niov--; - } - nob -= fragnob; - } while (nob > 0); - - return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags); -} - static int kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx, struct kib_rdma_desc *rd, int nkiov, struct bio_vec *kiov, int offset, int nob) @@ -1094,24 +1021,28 @@ kiblnd_check_sends_locked(struct kib_conn *conn) static void kiblnd_tx_complete(struct kib_tx *tx, int status) { - int failed = (status != IB_WC_SUCCESS); + int failed = (status != IB_WC_SUCCESS); struct kib_conn *conn = tx->tx_conn; - int idle; + int idle; - LASSERT (tx->tx_sending > 0); + if (tx->tx_sending <= 0) { + CERROR("Received an event on a freed tx: %p status %d\n", + tx, tx->tx_status); + return; + } - if (failed) { - if (conn->ibc_state == IBLND_CONN_ESTABLISHED) + if (failed) { + if (conn->ibc_state == IBLND_CONN_ESTABLISHED) CNETERR("Tx -> %s cookie %#llx" - " sending %d waiting %d: failed %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - tx->tx_cookie, tx->tx_sending, tx->tx_waiting, - status); + " sending %d waiting %d: failed %d\n", + libcfs_nid2str(conn->ibc_peer->ibp_nid), + tx->tx_cookie, tx->tx_sending, tx->tx_waiting, + status); - kiblnd_close_conn(conn, -EIO); - } else { - kiblnd_peer_alive(conn->ibc_peer); - } + kiblnd_close_conn(conn, -EIO); + } else { + kiblnd_peer_alive(conn->ibc_peer); + } spin_lock(&conn->ibc_lock); @@ -1308,7 +1239,7 @@ kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn) return; } - timeout_ns = lnet_get_lnd_timeout() * NSEC_PER_SEC; + timeout_ns = kiblnd_timeout() * NSEC_PER_SEC; tx->tx_queued = 1; tx->tx_deadline = ktime_add_ns(ktime_get(), timeout_ns); @@ -1362,14 +1293,17 @@ kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn) spin_unlock(&conn->ibc_lock); } -static int kiblnd_resolve_addr(struct rdma_cm_id *cmid, - struct sockaddr_in *srcaddr, - struct sockaddr_in *dstaddr, - int timeout_ms) +static int +kiblnd_resolve_addr_cap(struct rdma_cm_id *cmid, + struct sockaddr_in *srcaddr, + struct sockaddr_in *dstaddr, + int timeout_ms) { unsigned short port; int rc; + LASSERT(capable(CAP_NET_BIND_SERVICE)); + /* allow the port to be reused */ rc = rdma_set_reuseaddr(cmid, 1); if (rc != 0) { @@ -1399,6 +1333,33 @@ static int kiblnd_resolve_addr(struct rdma_cm_id *cmid, return rc; } +static int +kiblnd_resolve_addr(struct rdma_cm_id *cmid, + struct sockaddr_in *srcaddr, + struct sockaddr_in *dstaddr, + int timeout_ms) +{ + const struct cred *old_creds = NULL; + struct cred *new_creds; + int rc; + + if (!capable(CAP_NET_BIND_SERVICE)) { + new_creds = prepare_creds(); + if (!new_creds) + return -ENOMEM; + + cap_raise(new_creds->cap_effective, CAP_NET_BIND_SERVICE); + old_creds = override_creds(new_creds); + } + + rc = kiblnd_resolve_addr_cap(cmid, srcaddr, dstaddr, timeout_ms); + + if (old_creds) + revert_creds(old_creds); + + return rc; +} + static void kiblnd_connect_peer(struct kib_peer_ni *peer_ni) { @@ -1437,12 +1398,12 @@ kiblnd_connect_peer(struct kib_peer_ni *peer_ni) if (*kiblnd_tunables.kib_use_priv_port) { rc = kiblnd_resolve_addr(cmid, &srcaddr, &dstaddr, - lnet_get_lnd_timeout() * 1000); + kiblnd_timeout() * 1000); } else { rc = rdma_resolve_addr(cmid, (struct sockaddr *)&srcaddr, (struct sockaddr *)&dstaddr, - lnet_get_lnd_timeout() * 1000); + kiblnd_timeout() * 1000); } if (rc != 0) { /* Can't initiate address resolution: */ @@ -1645,7 +1606,6 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) int target_is_router = lntmsg->msg_target_is_router; int routing = lntmsg->msg_routing; unsigned int payload_niov = lntmsg->msg_niov; - struct kvec *payload_iov = lntmsg->msg_iov; struct bio_vec *payload_kiov = lntmsg->msg_kiov; unsigned int payload_offset = lntmsg->msg_offset; unsigned int payload_nob = lntmsg->msg_len; @@ -1665,8 +1625,6 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) /* Thread context */ LASSERT (!in_interrupt()); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); switch (type) { default: @@ -1695,16 +1653,10 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) ibmsg = tx->tx_msg; rd = &ibmsg->ibm_u.get.ibgm_rd; - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) - rc = kiblnd_setup_rd_iov(ni, tx, rd, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, - 0, lntmsg->msg_md->md_length); - else - rc = kiblnd_setup_rd_kiov(ni, tx, rd, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.kiov, - 0, lntmsg->msg_md->md_length); + rc = kiblnd_setup_rd_kiov(ni, tx, rd, + lntmsg->msg_md->md_niov, + lntmsg->msg_md->md_kiov, + 0, lntmsg->msg_md->md_length); if (rc != 0) { CERROR("Can't setup GET sink for %s: %d\n", libcfs_nid2str(target.nid), rc); @@ -1747,14 +1699,9 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) return -ENOMEM; } - if (payload_kiov == NULL) - rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd, - payload_niov, payload_iov, - payload_offset, payload_nob); - else - rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd, - payload_niov, payload_kiov, - payload_offset, payload_nob); + rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd, + payload_niov, payload_kiov, + payload_offset, payload_nob); if (rc != 0) { CERROR("Can't setup PUT src for %s: %d\n", libcfs_nid2str(target.nid), rc); @@ -1788,16 +1735,11 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) ibmsg = tx->tx_msg; ibmsg->ibm_u.immediate.ibim_hdr = *hdr; - if (payload_kiov != NULL) - lnet_copy_kiov2flat(IBLND_MSG_SIZE, ibmsg, - offsetof(struct kib_msg, ibm_u.immediate.ibim_payload), - payload_niov, payload_kiov, - payload_offset, payload_nob); - else - lnet_copy_iov2flat(IBLND_MSG_SIZE, ibmsg, - offsetof(struct kib_msg, ibm_u.immediate.ibim_payload), - payload_niov, payload_iov, - payload_offset, payload_nob); + lnet_copy_kiov2flat(IBLND_MSG_SIZE, ibmsg, + offsetof(struct kib_msg, + ibm_u.immediate.ibim_payload), + payload_niov, payload_kiov, + payload_offset, payload_nob); nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]); kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob); @@ -1811,13 +1753,12 @@ static void kiblnd_reply(struct lnet_ni *ni, struct kib_rx *rx, struct lnet_msg *lntmsg) { struct lnet_process_id target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct kvec *iov = lntmsg->msg_iov; - struct bio_vec *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; + unsigned int niov = lntmsg->msg_niov; + struct bio_vec *kiov = lntmsg->msg_kiov; + unsigned int offset = lntmsg->msg_offset; + unsigned int nob = lntmsg->msg_len; struct kib_tx *tx; - int rc; + int rc; tx = kiblnd_get_idle_tx(ni, rx->rx_conn->ibc_peer->ibp_nid); if (tx == NULL) { @@ -1828,9 +1769,6 @@ kiblnd_reply(struct lnet_ni *ni, struct kib_rx *rx, struct lnet_msg *lntmsg) if (nob == 0) rc = 0; - else if (kiov == NULL) - rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd, - niov, iov, offset, nob); else rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd, niov, kiov, offset, nob); @@ -1873,7 +1811,7 @@ failed_0: int kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, - int delayed, unsigned int niov, struct kvec *iov, struct bio_vec *kiov, + int delayed, unsigned int niov, struct bio_vec *kiov, unsigned int offset, unsigned int mlen, unsigned int rlen) { struct kib_rx *rx = private; @@ -1887,8 +1825,6 @@ kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, LASSERT (mlen <= rlen); LASSERT (!in_interrupt()); - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); switch (rxmsg->ibm_type) { default: @@ -1904,16 +1840,11 @@ kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, break; } - if (kiov != NULL) - lnet_copy_flat2kiov(niov, kiov, offset, - IBLND_MSG_SIZE, rxmsg, - offsetof(struct kib_msg, ibm_u.immediate.ibim_payload), - mlen); - else - lnet_copy_flat2iov(niov, iov, offset, - IBLND_MSG_SIZE, rxmsg, - offsetof(struct kib_msg, ibm_u.immediate.ibim_payload), - mlen); + lnet_copy_flat2kiov(niov, kiov, offset, + IBLND_MSG_SIZE, rxmsg, + offsetof(struct kib_msg, + ibm_u.immediate.ibim_payload), + mlen); lnet_finalize(lntmsg, 0); break; @@ -1940,12 +1871,8 @@ kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, txmsg = tx->tx_msg; rd = &txmsg->ibm_u.putack.ibpam_rd; - if (kiov == NULL) - rc = kiblnd_setup_rd_iov(ni, tx, rd, - niov, iov, offset, mlen); - else - rc = kiblnd_setup_rd_kiov(ni, tx, rd, - niov, kiov, offset, mlen); + rc = kiblnd_setup_rd_kiov(ni, tx, rd, + niov, kiov, offset, mlen); if (rc != 0) { CERROR("Can't setup PUT sink for %s: %d\n", libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); @@ -2150,15 +2077,12 @@ void kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs) { LIST_HEAD(zombies); - struct list_head *tmp; - struct list_head *nxt; + struct kib_tx *nxt; struct kib_tx *tx; spin_lock(&conn->ibc_lock); - list_for_each_safe(tmp, nxt, txs) { - tx = list_entry(tmp, struct kib_tx, tx_list); - + list_for_each_entry_safe(tx, nxt, txs, tx_list) { if (txs == &conn->ibc_active_txs) { LASSERT(!tx->tx_queued); LASSERT(tx->tx_waiting || @@ -2310,22 +2234,25 @@ kiblnd_connreq_done(struct kib_conn *conn, int status) (conn->ibc_state == IBLND_CONN_PASSIVE_WAIT && peer_ni->ibp_accepting > 0)); - LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); - conn->ibc_connvars = NULL; + LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); + conn->ibc_connvars = NULL; - if (status != 0) { - /* failed to establish connection */ - kiblnd_peer_connect_failed(peer_ni, active, status); - kiblnd_finalise_conn(conn); - return; - } + if (status != 0) { + /* failed to establish connection */ + kiblnd_peer_connect_failed(peer_ni, active, status); + kiblnd_finalise_conn(conn); + return; + } - /* connection established */ + /* connection established */ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + /* reset retry count */ + peer_ni->ibp_retries = 0; + conn->ibc_last_send = ktime_get(); - kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED); - kiblnd_peer_alive(peer_ni); + kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED); + kiblnd_peer_alive(peer_ni); /* Add conn to peer_ni's list and nuke any dangling conns from a different * peer_ni instance... */ @@ -2399,7 +2326,11 @@ kiblnd_reject(struct rdma_cm_id *cmid, struct kib_rej *rej) { int rc; +#ifdef HAVE_RDMA_REJECT_4ARGS + rc = rdma_reject(cmid, rej, sizeof(*rej), IB_CM_REJ_CONSUMER_DEFINED); +#else rc = rdma_reject(cmid, rej, sizeof(*rej)); +#endif if (rc != 0) CWARN("Error %d sending reject\n", rc); @@ -2767,10 +2698,15 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version, goto out; } - switch (why) { - default: - reason = "Unknown"; - break; + if (peer_ni->ibp_retries > *kiblnd_tunables.kib_retry_count) { + reason = "retry count exceeded due to no listener"; + goto out; + } + + switch (why) { + default: + reason = "Unknown"; + break; case IBLND_REJECT_RDMA_FRAGS: { struct lnet_ioctl_config_o2iblnd_tunables *tunables; @@ -2864,13 +2800,14 @@ kiblnd_rejected(struct kib_conn *conn, int reason, void *priv, int priv_nob) IBLND_REJECT_CONN_STALE, NULL); break; - case IB_CM_REJ_INVALID_SERVICE_ID: + case IB_CM_REJ_INVALID_SERVICE_ID: + peer_ni->ibp_retries++; kiblnd_check_reconnect(conn, IBLND_MSG_VERSION, 0, IBLND_REJECT_INVALID_SRV_ID, NULL); - CNETERR("%s rejected: no listener at %d\n", - libcfs_nid2str(peer_ni->ibp_nid), - *kiblnd_tunables.kib_service); - break; + CNETERR("%s rejected: no listener at %d\n", + libcfs_nid2str(peer_ni->ibp_nid), + *kiblnd_tunables.kib_service); + break; case IB_CM_REJ_CONSUMER_DEFINED: if (priv_nob >= offsetof(struct kib_rej, ibr_padding)) { @@ -3185,7 +3122,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) rc = event->status; } else { rc = rdma_resolve_route( - cmid, lnet_get_lnd_timeout() * 1000); + cmid, kiblnd_timeout() * 1000); if (rc == 0) { struct kib_net *net = peer_ni->ibp_ni->ni_data; struct kib_dev *dev = net->ibn_dev; @@ -3601,7 +3538,7 @@ kiblnd_connd (void *arg) * connection within (n+1)/n times the timeout * interval. */ - lnd_timeout = lnet_get_lnd_timeout(); + lnd_timeout = kiblnd_timeout(); if (lnd_timeout > n * p) chunk = (chunk * n * p) / lnd_timeout; if (chunk == 0) @@ -3758,7 +3695,6 @@ kiblnd_scheduler(void *arg) unsigned long flags; struct ib_wc wc; int did_something; - int busy_loops = 0; int rc; init_waitqueue_entry(&wait, current); @@ -3776,11 +3712,10 @@ kiblnd_scheduler(void *arg) spin_lock_irqsave(&sched->ibs_lock, flags); while (!kiblnd_data.kib_shutdown) { - if (busy_loops++ >= IBLND_RESCHED) { + if (need_resched()) { spin_unlock_irqrestore(&sched->ibs_lock, flags); cond_resched(); - busy_loops = 0; spin_lock_irqsave(&sched->ibs_lock, flags); } @@ -3876,7 +3811,6 @@ kiblnd_scheduler(void *arg) spin_unlock_irqrestore(&sched->ibs_lock, flags); schedule(); - busy_loops = 0; remove_wait_queue(&sched->ibs_waitq, &wait); set_current_state(TASK_RUNNING);