From 9f307f92e9f652221fb0edff35bda03c955bf3dd Mon Sep 17 00:00:00 2001 From: eeb Date: Fri, 21 Oct 2005 15:22:28 +0000 Subject: [PATCH] * iiblnd fixes (mid-way through changing Infinicon API) * viblnd fix (tx_waiting not cleared on RDMA ops initiated on a new connection that fails triggers an assertion failure). * some prep for userspace ip2nets * router selection round robins if other selection criterea are equal * local_nid_dist_zero LNET module param for single-node LND testing. * reformat LNET /proc buffer displays * rename userspace tcplnd env params TCPNAL_xxx -> TCPLND_xxx --- lnet/klnds/iiblnd/iiblnd.c | 53 +++++++++-------- lnet/klnds/iiblnd/iiblnd.h | 35 ++++++----- lnet/klnds/iiblnd/iiblnd_cb.c | 111 ++++++++++++++--------------------- lnet/klnds/iiblnd/iiblnd_modparams.c | 2 +- lnet/klnds/viblnd/viblnd_cb.c | 1 + lnet/lnet/api-ni.c | 75 +++++++++++------------ lnet/lnet/config.c | 22 ++++--- lnet/lnet/lib-move.c | 44 +++++++++++--- lnet/lnet/router_proc.c | 8 +-- lnet/ulnds/socklnd/connection.c | 6 +- lnet/utils/lbstats | 4 +- lnet/utils/portals.c | 2 +- 12 files changed, 188 insertions(+), 175 deletions(-) diff --git a/lnet/klnds/iiblnd/iiblnd.c b/lnet/klnds/iiblnd/iiblnd.c index 281b9dc..e78a06c 100644 --- a/lnet/klnds/iiblnd/iiblnd.c +++ b/lnet/klnds/iiblnd/iiblnd.c @@ -350,6 +350,7 @@ kibnal_create_cep(lnet_nid_t nid) return NULL; } +#define IBNAL_CHECK_ADVERT 1 #if IBNAL_CHECK_ADVERT void kibnal_service_query_done (void *arg, QUERY *qry, @@ -358,7 +359,7 @@ kibnal_service_query_done (void *arg, QUERY *qry, int *rcp = arg; FSTATUS frc = qry_result->Status; SERVICE_RECORD_RESULTS *svc_rslt; - SERVICE_RECORD *svc; + IB_SERVICE_RECORD *svc; lnet_nid_t nid; if (frc != FSUCCESS || qry_result->ResultDataSize == 0) { @@ -372,7 +373,7 @@ kibnal_service_query_done (void *arg, QUERY *qry, if (svc_rslt->NumServiceRecords < 1) { CERROR("Check advert: %d records\n", - svc->NumServiceRecords); + svc_rslt->NumServiceRecords); *rcp = -ENOENT; goto out; } @@ -380,15 +381,21 @@ kibnal_service_query_done (void *arg, QUERY *qry, svc = &svc_rslt->ServiceRecords[0]; nid = le64_to_cpu(*kibnal_service_nid_field(svc)); + CDEBUG(D_NET, "Check advert: %s "LPX64" "LPX64":%04x\n", + libcfs_nid2str(nid), svc->RID.ServiceID, + svc->RID.ServiceGID.Type.Global.InterfaceID, + svc->RID.ServiceP_Key); + if (nid != kibnal_data.kib_ni->ni_nid) { CERROR("Check advert: Bad NID %s (%s expected)\n", - nid, kibnal_data.kib_ni->ni_nid); + libcfs_nid2str(nid), + libcfs_nid2str(kibnal_data.kib_ni->ni_nid)); *rcp = -EINVAL; goto out; } if (svc->RID.ServiceID != *kibnal_tunables.kib_service_number) { - CERROR("Check advert: Bad ServiceID "LPX64" ("LPX64" expected)\n", + CERROR("Check advert: Bad ServiceID "LPX64" (%x expected)\n", svc->RID.ServiceID, *kibnal_tunables.kib_service_number); *rcp = -EINVAL; @@ -411,7 +418,7 @@ kibnal_service_query_done (void *arg, QUERY *qry, goto out; } - CDEBUG(D_WARNING, "Check advert OK\n"); + CDEBUG(D_NET, "Check advert OK\n"); *rcp = 0; out: @@ -499,7 +506,7 @@ kibnal_advertise (void) sizeof(svc->ServiceName)) { CERROR("Service name '%s' too long (%d chars max)\n", *kibnal_tunables.kib_service_name, - sizeof(svc->ServiceName) - 1); + (int)sizeof(svc->ServiceName) - 1); return -EINVAL; } @@ -565,7 +572,9 @@ kibnal_unadvertise (int expect_success) down (&kibnal_data.kib_listener_signal); - if ((frc2 -= FSUCCESS) == !!expect_success) + CDEBUG(D_NET, "Unadvertise rc: %d\n", frc2); + + if ((frc2 == FSUCCESS) == !!expect_success) return; if (expect_success) @@ -619,14 +628,10 @@ kibnal_start_listener(void) unsigned long flags; int rc; FSTATUS frc; - __u32 u32val; LASSERT (kibnal_data.kib_listener_cep == NULL); init_MUTEX_LOCKED (&kibnal_data.kib_listener_signal); - /* remove any previous advert (crashed node etc) */ - kibnal_unadvertise(0); - cep = kibnal_create_cep(LNET_NID_ANY); if (cep == NULL) return -ENOMEM; @@ -751,9 +756,8 @@ kibnal_find_peer_locked (lnet_nid_t nid) if (peer->ibp_nid != nid) continue; - CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n", - peer, libcfs_nid2str(nid), - atomic_read (&peer->ibp_refcount)); + CDEBUG(D_NET, "got peer %s (%d)\n", + libcfs_nid2str(nid), atomic_read (&peer->ibp_refcount)); return (peer); } return (NULL); @@ -898,7 +902,7 @@ kibnal_del_peer (lnet_nid_t nid) rc = 0; /* matched something */ } } - out: + write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); return (rc); @@ -1149,6 +1153,7 @@ kibnal_create_conn (lnet_nid_t nid) /* 1 ref for caller */ atomic_set (&conn->ibc_refcount, 1); + CDEBUG(D_WARNING, "New conn %p\n", conn); return (conn); failed: @@ -1159,12 +1164,13 @@ kibnal_create_conn (lnet_nid_t nid) void kibnal_destroy_conn (kib_conn_t *conn) { - int rc; FSTATUS frc; LASSERT (!in_interrupt()); - CDEBUG (D_NET, "connection %p\n", conn); + CDEBUG (D_NET, "connection %s\n", + (conn->ibc_peer) == NULL ? "" : + libcfs_nid2str(conn->ibc_peer->ibp_nid)); LASSERT (atomic_read (&conn->ibc_refcount) == 0); LASSERT (list_empty(&conn->ibc_early_rxs)); @@ -1554,7 +1560,6 @@ kibnal_register_all_memory(void) IB_MR_PHYS_BUFFER phys; IB_ACCESS_CONTROL access; FSTATUS frc; - int rc; memset(&access, 0, sizeof(access)); access.s.MWBindable = 1; @@ -1572,9 +1577,6 @@ kibnal_register_all_memory(void) si_meminfo(&si); total = ((__u64)si.totalram) * si.mem_unit; - if (total < ((__u64)max_mapnr) * PAGE_SIZE) - total = ((__u64)max_mapnr) * PAGE_SIZE; - if (total == 0) { CERROR("Can't determine memory size\n"); return -ENOMEM; @@ -1786,7 +1788,7 @@ kibnal_startup (lnet_ni_t *ni) /* Find IP address from */ snprintf(ipif_name, sizeof(ipif_name), "%s%d", - *kibnal_tunables.kib_ipif_basename, kibnal_data.kib_hca_idx); + *kibnal_tunables.kib_ipif_basename, kibnal_data.kib_hca_idx + 1); if (strlen(ipif_name) == sizeof(ipif_name - 1)) { CERROR("IPoIB interface name %s truncated\n", ipif_name); return -EINVAL; @@ -1859,9 +1861,10 @@ kibnal_startup (lnet_ni_t *ni) *kibnal_tunables.kib_sd_retries; for (i = 0; i < IBNAL_N_SCHED; i++) { - rc = kibnal_thread_start (kibnal_scheduler, (void *)i); + rc = kibnal_thread_start (kibnal_scheduler, + (void *)(unsigned long)i); if (rc != 0) { - CERROR("Can't spawn iibnal scheduler[%d]: %d\n", + CERROR("Can't spawn iib scheduler[%d]: %d\n", i, rc); goto failed; } @@ -1869,7 +1872,7 @@ kibnal_startup (lnet_ni_t *ni) rc = kibnal_thread_start (kibnal_connd, NULL); if (rc != 0) { - CERROR ("Can't spawn iibnal connd: %d\n", rc); + CERROR ("Can't spawn iib connd: %d\n", rc); goto failed; } diff --git a/lnet/klnds/iiblnd/iiblnd.h b/lnet/klnds/iiblnd/iiblnd.h index 6d796b9..c081417 100644 --- a/lnet/klnds/iiblnd/iiblnd.h +++ b/lnet/klnds/iiblnd/iiblnd.h @@ -78,7 +78,7 @@ /* defaults for modparams/tunables */ #define IBNAL_IPIF_BASENAME "ib" /* IPoIB interface basename */ -#define IBNAL_SERVICE_NAME "iibnal" /* global service name */ +#define IBNAL_SERVICE_NAME "iiblnd" /* global service name */ #define IBNAL_SERVICE_NUMBER 0x11b9a2 /* global service number */ #define IBNAL_MIN_RECONNECT_INTERVAL 1 /* first failed connection retry... */ #define IBNAL_MAX_RECONNECT_INTERVAL 60 /* ...exponentially increasing to this */ @@ -353,7 +353,7 @@ typedef struct kib_rx /* receive message */ int rx_nob; /* # bytes received (-1 while posted) */ __u64 rx_hca_msg; /* pre-mapped buffer (hca vaddr) */ kib_msg_t *rx_msg; /* pre-mapped buffer (host vaddr) */ - IB_WORK_REQ rx_wrq; + IB_WORK_REQ2 rx_wrq; IB_LOCAL_DATASEGMENT rx_gl; /* and its memory */ } kib_rx_t; @@ -373,13 +373,13 @@ typedef struct kib_tx /* transmit message */ __u64 tx_hca_msg; /* pre-mapped buffer (HCA vaddr) */ int tx_nwrq; /* # send work items */ #if IBNAL_USE_FMR - IB_WORK_REQ tx_wrq[2]; /* send work items... */ + IB_WORK_REQ2 tx_wrq[2]; /* send work items... */ IB_LOCAL_DATASEGMENT tx_gl[2]; /* ...and their memory */ kib_rdma_desc_t tx_rd[1]; /* rdma descriptor */ kib_md_t tx_md; /* mapping */ __u64 *tx_pages; /* page phys addrs */ #else - IB_WORK_REQ *tx_wrq; /* send work items... */ + IB_WORK_REQ2 *tx_wrq; /* send work items... */ IB_LOCAL_DATASEGMENT *tx_gl; /* ...and their memory */ kib_rdma_desc_t *tx_rd; /* rdma descriptor (src buffers) */ #endif @@ -605,16 +605,19 @@ iibt_qp_destroy(IB_HANDLE qp_handle) return IIBT_IF.Vpi.DestroyQP(qp_handle); } + static inline FSTATUS -iibt_postrecv(IB_HANDLE qp_handle, IB_WORK_REQ *work_req) +iibt_postrecv2(IB_HANDLE qp_handle, IB_WORK_REQ2 *work_req, + IB_WORK_REQ2 **failed_work_req) { - return IIBT_IF.Vpi.PostRecv(qp_handle, work_req); + return IIBT_IF.Vpi.PostRecv2(qp_handle, work_req, failed_work_req); } static inline FSTATUS -iibt_postsend(IB_HANDLE qp_handle, IB_WORK_REQ *work_req) +iibt_postsend2(IB_HANDLE qp_handle, IB_WORK_REQ2 *work_req, + IB_WORK_REQ2 **failed_work_req) { - return IIBT_IF.Vpi.PostSend(qp_handle, work_req); + return IIBT_IF.Vpi.PostSend2(qp_handle, work_req, failed_work_req); } static inline FSTATUS @@ -712,12 +715,6 @@ iibt_cm_connect (IB_HANDLE cep, CM_REQUEST_INFO *req, return IIBT_IF.Cmi.CmConnect (cep, req, callback, arg); } -static inline int wrq_signals_completion(IB_WORK_REQ *wrq) -{ - return wrq->Req.SendRC.Options.s.SignaledCompletion == 1; -} - - /******************************************************************************/ /* these are purposely avoiding using local vars so they don't increase @@ -892,12 +889,22 @@ kibnal_wreqid2type (__u64 wreqid) return (wreqid & IBNAL_WID_MASK); } +#if 0 static inline void kibnal_set_conn_state (kib_conn_t *conn, int state) { + CDEBUG(D_WARNING,"%p state %d\n", conn, state); conn->ibc_state = state; mb(); } +#else +#define kibnal_set_conn_state(conn, state) \ +do { \ + CDEBUG(D_WARNING,"%p state %d\n", conn, state); \ + conn->ibc_state = state; \ + mb(); \ +} while (0) +#endif #if IBNAL_USE_FMR diff --git a/lnet/klnds/iiblnd/iiblnd_cb.c b/lnet/klnds/iiblnd/iiblnd_cb.c index 472840a..c486116 100644 --- a/lnet/klnds/iiblnd/iiblnd_cb.c +++ b/lnet/klnds/iiblnd/iiblnd_cb.c @@ -58,7 +58,6 @@ kibnal_tx_done (kib_tx_t *tx) { int rc = tx->tx_status; int i; - FSTATUS frc; LASSERT (!in_interrupt()); LASSERT (!tx->tx_queued); /* mustn't be queued for sending */ @@ -131,7 +130,6 @@ kibnal_post_rx (kib_rx_t *rx, int credit) { kib_conn_t *conn = rx->rx_conn; int rc = 0; - unsigned long flags; FSTATUS frc; LASSERT (!in_interrupt()); @@ -142,7 +140,8 @@ kibnal_post_rx (kib_rx_t *rx, int credit) .Length = IBNAL_MSG_SIZE, }; - rx->rx_wrq = (IB_WORK_REQ) { + rx->rx_wrq = (IB_WORK_REQ2) { + .Next = NULL; .WorkReqId = kibnal_ptr2wreqid(rx, IBNAL_WID_RX), .MessageLen = IBNAL_MSG_SIZE, .DSList = &rx->rx_gl, @@ -167,7 +166,7 @@ kibnal_post_rx (kib_rx_t *rx, int credit) rx->rx_nob = -1; /* flag posted */ mb(); - frc = iibt_postrecv(conn->ibc_qp, &rx->rx_wrq); + frc = iibt_postrecv2(conn->ibc_qp, &rx->rx_wrq, NULL); if (frc == FSUCCESS) { if (credit) { spin_lock(&conn->ibc_lock); @@ -425,7 +424,6 @@ kibnal_rx_complete (IB_WORK_COMPLETION *wc, __u64 rxseq) unsigned long flags; int rc; - CDEBUG(D_NET, "rx %p conn %p\n", rx, conn); LASSERT (rx->rx_nob < 0); /* was posted */ rx->rx_nob = 0; /* isn't now */ mb(); @@ -486,7 +484,6 @@ kibnal_rx_complete (IB_WORK_COMPLETION *wc, __u64 rxseq) return; failed: - CDEBUG(D_NET, "rx %p conn %p\n", rx, conn); kibnal_close_conn(conn, -EIO); ignore: /* Don't re-post rx & drop its ref on conn */ @@ -805,7 +802,6 @@ kibnal_check_sends (kib_conn_t *conn) int rc; int done; int i; - int nwork; LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); @@ -894,29 +890,18 @@ kibnal_check_sends (kib_conn_t *conn) list_add (&tx->tx_list, &conn->ibc_active_txs); - /* Drop the lock while I send (this can re-order sends) */ - spin_unlock(&conn->ibc_lock); - LASSERT (tx->tx_nwrq > 0); - rc = -ECONNABORTED; + rc = 0; frc = FSUCCESS; - nwork = 0; - if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) { - /* Driver only accepts 1 item at a time */ - for (i = 0; i < tx->tx_nwrq; i++) { - frc = iibt_postsend(conn->ibc_qp, - &tx->tx_wrq[i]); - if (frc != FSUCCESS) { - rc = -EIO; - break; - } - CDEBUG(D_NET, "posted tx wrq %p\n", - &tx->tx_wrq[i]); - } + if (conn->ibc_state != IBNAL_CONN_ESTABLISHED) { + rc = -ECONNABORTED; + } else { + frc = iibt_postsend2(conn->ibc_qp, tx->tx_wrq, NULL); + if (frc != FSUCCESS) + rc = -EIO; } - spin_lock(&conn->ibc_lock); if (rc != 0) { /* NB credits are transferred in the actual * message, which can only be the last work item */ @@ -960,8 +945,9 @@ kibnal_tx_complete (IB_WORK_COMPLETION *wc) int failed = wc->Status != WRStatusSuccess; int idle; - CDEBUG(D_NET, "tx %p conn %p sending %d nwrq %d status %d\n", - tx, conn, tx->tx_sending, tx->tx_nwrq, wc->Status); + CDEBUG(D_NET, "%s: sending %d nwrq %d status %d\n", + libcfs_nid2str(conn->ibc_peer->ibp_nid), + tx->tx_sending, tx->tx_nwrq, wc->Status); LASSERT (tx->tx_sending > 0); @@ -1012,7 +998,7 @@ void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob) { IB_LOCAL_DATASEGMENT *gl = &tx->tx_gl[tx->tx_nwrq]; - IB_WORK_REQ *wrq = &tx->tx_wrq[tx->tx_nwrq]; + IB_WORK_REQ2 *wrq = &tx->tx_wrq[tx->tx_nwrq]; int nob = offsetof (kib_msg_t, ibm_u) + body_nob; LASSERT (tx->tx_nwrq >= 0 && @@ -1027,6 +1013,8 @@ kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob) .Lkey = kibnal_data.kib_whole_mem.md_lkey, }; + wrq->Next = NULL; /* This is the last one */ + wrq->WorkReqId = kibnal_ptr2wreqid(tx, IBNAL_WID_TX); wrq->Operation = WROpSend; wrq->DSList = gl; @@ -1062,6 +1050,7 @@ kibnal_init_rdma (kib_tx_t *tx, int type, int nob, wrq = &tx->tx_wrq[0]; + wrq->Next = wrq + 1; wrq->WorkReqId = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA); wrq->Operation = WROpRdmaWrite; wrq->DSList = gl; @@ -1129,6 +1118,7 @@ kibnal_init_rdma (kib_tx_t *tx, int type, int nob, wrq = &tx->tx_wrq[tx->tx_nwrq]; + wrq->Next = wrq + 1; wrq->WorkReqId = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA); wrq->Operation = WROpRdmaWrite; wrq->DSList = gl; @@ -1802,7 +1792,6 @@ kibnal_conn_disconnected(kib_conn_t *conn) struct list_head *nxt; kib_tx_t *tx; FSTATUS frc; - int done; LASSERT (conn->ibc_state >= IBNAL_CONN_INIT_QP); @@ -1923,6 +1912,7 @@ kibnal_peer_connect_failed (kib_peer_t *peer, int active, int rc) list_del (&tx->tx_list); /* complete now */ + tx->tx_waiting = 0; tx->tx_status = -EHOSTUNREACH; kibnal_tx_done (tx); } while (!list_empty (&zombies)); @@ -1935,7 +1925,6 @@ kibnal_connreq_done (kib_conn_t *conn, int active, int status) struct list_head txs; kib_tx_t *tx; unsigned long flags; - int i; LASSERT (conn->ibc_state >= IBNAL_CONN_INIT_QP); LASSERT (conn->ibc_state < IBNAL_CONN_ESTABLISHED); @@ -1956,11 +1945,14 @@ kibnal_connreq_done (kib_conn_t *conn, int active, int status) LASSERT(conn->ibc_state == IBNAL_CONN_CONNECTING); kibnal_set_conn_state(conn, IBNAL_CONN_ESTABLISHED); - CDEBUG(D_WARNING, "Connection %p -> %s ESTABLISHED\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); + CDEBUG(D_WARNING, "Connection %s ESTABLISHED\n", + libcfs_nid2str(conn->ibc_peer->ibp_nid)); write_lock_irqsave(&kibnal_data.kib_global_lock, flags); + kibnal_conn_addref(conn); /* +1 ref for ibc_list */ + list_add_tail(&conn->ibc_list, &peer->ibp_conns); + if (!kibnal_peer_active(peer)) { /* peer has been deleted */ kibnal_close_conn_locked(conn, -ECONNABORTED); @@ -1975,9 +1967,7 @@ kibnal_connreq_done (kib_conn_t *conn, int active, int status) peer->ibp_connecting--; peer->ibp_reconnect_interval = 0; /* OK to reconnect at any time */ - /* Add conn to peer's list and nuke any dangling conns from a different - * peer instance... */ - kibnal_conn_addref(conn); /* +1 ref for ibc_list */ + /* Nuke any dangling conns from a different peer instance... */ kibnal_close_stale_conns_locked(peer, conn->ibc_incarnation); /* grab txs blocking for a conn */ @@ -2023,7 +2013,6 @@ kibnal_check_connreject(kib_conn_t *conn, int active, CM_REJECT_INFO *rej) { kib_peer_t *peer = conn->ibc_peer; unsigned long flags; - FSTATUS frc; if (rej->Reason != RC_STALE_CONN) { CERROR("%s connection to %s rejected: %d\n", @@ -2058,8 +2047,10 @@ kibnal_check_connreject(kib_conn_t *conn, int active, CM_REJECT_INFO *rej) void kibnal_cm_disconnect_callback(kib_conn_t *conn, CM_CONN_INFO *info) { - CDEBUG(D_NET, "status 0x%x\n", info->Status); - + CDEBUG(D_WARNING, "%s: state %d, status 0x%x\n", + libcfs_nid2str(conn->ibc_peer->ibp_nid), + conn->ibc_state, info->Status); + LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); switch (info->Status) { @@ -2067,21 +2058,15 @@ kibnal_cm_disconnect_callback(kib_conn_t *conn, CM_CONN_INFO *info) LBUG(); break; - case FCM_DISCONNECT_REPLY: - /* You can't get this if you set TIMEWAIT */ - CERROR("Unexpected FCM_DISCONNECT_REPLY for %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - LBUG(); - break; - case FCM_DISCONNECT_REQUEST: /* Schedule conn to iibt_cm_disconnect() if it wasn't already */ kibnal_close_conn (conn, 0); break; - case FCM_DISCONNECTED: - CDEBUG(D_NET, "Connection %p -> %s disconnected.\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); + case FCM_DISCONNECT_REPLY: /* peer acks my disconnect req */ + case FCM_DISCONNECTED: /* end of TIME_WAIT */ + CDEBUG(D_NET, "Connection %s disconnected.\n", + libcfs_nid2str(conn->ibc_peer->ibp_nid)); kibnal_conn_decref(conn); /* Lose CM's ref */ break; } @@ -2093,14 +2078,12 @@ kibnal_cm_passive_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg) kib_conn_t *conn = arg; CDEBUG(D_NET, "status 0x%x\n", info->Status); - kibnal_set_conn_state(conn, IBNAL_CONN_CONNECTING); /* Established Connection Notifier */ switch (info->Status) { default: - CERROR("Unexpected status %d on Connection %p -> %s\n", - info->Status, conn, - libcfs_nid2str(conn->ibc_peer->ibp_nid)); + CERROR("Unexpected status %d on Connection %s\n", + info->Status, libcfs_nid2str(conn->ibc_peer->ibp_nid)); LBUG(); break; @@ -2342,8 +2325,8 @@ kibnal_check_connreply(kib_conn_t *conn, CM_REPLY_INFO *rep) return; } - CDEBUG(D_NET, "Connection %p -> %s REP_RECEIVED.\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); + CDEBUG(D_NET, "Connection %s REP_RECEIVED.\n", + libcfs_nid2str(conn->ibc_peer->ibp_nid)); conn->ibc_incarnation = msg->ibm_srcstamp; conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE; @@ -2365,12 +2348,13 @@ kibnal_check_connreply(kib_conn_t *conn, CM_REPLY_INFO *rep) &conn->ibc_cvars->cv_cmci, NULL, NULL, NULL, NULL); - if (frc == FCM_CONNECT_ESTABLISHED) + if (frc == FCM_CONNECT_ESTABLISHED) { kibnal_connreq_done(conn, 1, 0); - - - CERROR("Connection %p -> %s CMAccept failed: %d\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); + return; + } + + CERROR("Connection %s CMAccept failed: %d\n", + libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); kibnal_connreq_done(conn, 1, -ECONNABORTED); } @@ -2383,9 +2367,8 @@ kibnal_cm_active_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg) switch (info->Status) { default: - CERROR("unknown status %d on Connection %p -> %s\n", - info->Status, conn, - libcfs_nid2str(conn->ibc_peer->ibp_nid)); + CERROR("unknown status %d on Connection %s\n", + info->Status, libcfs_nid2str(conn->ibc_peer->ibp_nid)); LBUG(); break; @@ -2527,7 +2510,6 @@ kibnal_service_get_callback (void *arg, QUERY *qry, kib_conn_t *conn = arg; SERVICE_RECORD_RESULTS *svc; FSTATUS frc; - lnet_nid_t nid; if (qrslt->Status != FSUCCESS || qrslt->ResultDataSize < sizeof(*svc)) { @@ -2888,10 +2870,7 @@ kibnal_scheduler(void *arg) FSTATUS frc2; IB_WORK_COMPLETION wc; kib_rx_t *rx; - kib_tx_t *tx; unsigned long flags; - int rc; - int did_something; __u64 rxseq = 0; int busy_loops = 0; diff --git a/lnet/klnds/iiblnd/iiblnd_modparams.c b/lnet/klnds/iiblnd/iiblnd_modparams.c index ab9f0d3..41287c8 100644 --- a/lnet/klnds/iiblnd/iiblnd_modparams.c +++ b/lnet/klnds/iiblnd/iiblnd_modparams.c @@ -68,7 +68,7 @@ CFS_MODULE_PARM(peer_credits, "i", int, 0444, "# concurrent sends to 1 peer"); static int sd_retries = IBNAL_SD_RETRIES; -CFS_MODULE_PARM(sa_retries, "i", int, 0444, +CFS_MODULE_PARM(sd_retries, "i", int, 0444, "# times to retry SD queries"); kib_tunables_t kibnal_tunables = { diff --git a/lnet/klnds/viblnd/viblnd_cb.c b/lnet/klnds/viblnd/viblnd_cb.c index 1c83872..64dbad4 100644 --- a/lnet/klnds/viblnd/viblnd_cb.c +++ b/lnet/klnds/viblnd/viblnd_cb.c @@ -1985,6 +1985,7 @@ kibnal_peer_connect_failed (kib_peer_t *peer, int active) list_del (&tx->tx_list); /* complete now */ + tx->tx_waiting = 0; tx->tx_status = -EHOSTUNREACH; kibnal_tx_done (tx); } while (!list_empty (&zombies)); diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 0dfaae1..660ed62 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -51,44 +51,22 @@ lnet_get_routes(void) char * lnet_get_networks(void) { + int rc; + if (*networks != 0 && *ip2nets != 0) { LCONSOLE_ERROR("Please specify EITHER 'networks' or 'ip2nets'" " but not both at once\n"); return NULL; } - if (*networks != 0) - return networks; - if (*ip2nets != 0) { - int rc = lnet_parse_ip2nets(&networks, ip2nets); - - switch (rc) { - case 0: - return networks; - - case -ENOENT: - LCONSOLE_ERROR("Can't match any networks in " - "ip2nets\n"); - break; - - case -ENOMEM: - LCONSOLE_ERROR("Out of memory parsing ip2nets\n"); - break; - - case -EINVAL: - LCONSOLE_ERROR("Can't parse ip2nets\n"); - break; - - default: - LCONSOLE_ERROR("Unexpected error %d parsing ip2nets\n", - rc); - break; - } - - return NULL; + rc = lnet_parse_ip2nets(&networks, ip2nets); + return (rc == 0) ? networks : NULL; } + if (*networks != 0) + return networks; + return "tcp"; } @@ -142,15 +120,32 @@ char * lnet_get_networks (void) { static char default_networks[256]; + char *networks = getenv ("LNET_NETWORKS"); + char *ip2nets = getenv ("LNET_IP2NETS"); char *str; char *sep; int len; int nob; + int rc; struct list_head *tmp; - str = getenv ("LNET_NETWORKS"); - if (str != NULL) - return str; +#if NOT_YET + if (networks != NULL && ip2nets != NULL) { + LCONSOLE_ERROR("Please set EITHER 'LNET_NETWORKS' or " + "'LNET_IP2NETS' but not both at once\n"); + return NULL; + } + + if (ip2nets != NULL) { + rc = lnet_parse_ip2nets(&networks, ip2nets); + return (rc == 0) ? networks : NULL; + } +#else + ip2nets = NULL; + rc = 0; +#endif + if (networks != NULL) + return networks; /* In userland, the default 'networks=' is the list of known net types */ @@ -343,9 +338,7 @@ lnet_register_lnd (lnd_t *lnd) list_add_tail (&lnd->lnd_list, &the_lnet.ln_lnds); lnd->lnd_refcount = 0; - if (lnd->lnd_type != LOLND) - LCONSOLE(0, "%s LND registered\n", - libcfs_lnd2str(lnd->lnd_type)); + CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type)); LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex); } @@ -360,9 +353,7 @@ lnet_unregister_lnd (lnd_t *lnd) LASSERT (lnd->lnd_refcount == 0); list_del (&lnd->lnd_list); - if (lnd->lnd_type != LOLND) - LCONSOLE(0, "%s LND unregistered\n", - libcfs_lnd2str(lnd->lnd_type)); + CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type)); LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex); } @@ -931,7 +922,7 @@ lnet_shutdown_lndnis (void) * itself... */ if (!islo) - LCONSOLE(0, "Removed NI %s\n", + LCONSOLE(0, "Removed LNI %s\n", libcfs_nid2str(ni->ni_nid)); LIBCFS_FREE(ni, sizeof(*ni)); @@ -1021,7 +1012,7 @@ lnet_startup_lndnis (void) LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex); if (rc != 0) { - LCONSOLE_ERROR("Error %d starting up NI %s\n", + LCONSOLE_ERROR("Error %d starting up LNI %s\n", rc, libcfs_lnd2str(lnd->lnd_type)); LNET_LOCK(); lnd->lnd_refcount--; @@ -1059,7 +1050,7 @@ lnet_startup_lndnis (void) #endif if (ni->ni_peertxcredits == 0 || ni->ni_maxtxcredits == 0) { - LCONSOLE_ERROR("NI %s has no %scredits\n", + LCONSOLE_ERROR("LNI %s has no %scredits\n", libcfs_lnd2str(lnd->lnd_type), ni->ni_peertxcredits == 0 ? "" : "per-peer "); @@ -1068,7 +1059,7 @@ lnet_startup_lndnis (void) ni->ni_txcredits = ni->ni_mintxcredits = ni->ni_maxtxcredits; - LCONSOLE(0, "Added NI %s [%d/%d]\n", + LCONSOLE(0, "Added LNI %s [%d/%d]\n", libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits, ni->ni_txcredits); diff --git a/lnet/lnet/config.c b/lnet/lnet/config.c index 46ff65a..e872ddf 100644 --- a/lnet/lnet/config.c +++ b/lnet/lnet/config.c @@ -1166,19 +1166,25 @@ lnet_parse_ip2nets (char **networksp, char *ip2nets) int nip = lnet_ipaddr_enumerate(&ipaddrs); int rc; - if (nip == 0) { - CERROR("I have no IP addresses\n"); - return -ENOENT; - } - - if (nip <= 0) { - CERROR("Can't enumerate IP interfaces: %d\n", nip); + if (nip <=0) { + if (nip < 0) { + rc = nip; + CERROR("Can't enumerate IP interfaces: %d\n", nip); + } else { + rc = -ENOENT; + CERROR("No local IP interfaces\n"); + } + + LCONSOLE_ERROR("Can't match networks in ip2nets\n"); return nip; } rc = lnet_match_networks (networksp, ip2nets, ipaddrs, nip); lnet_ipaddr_free_enumeration(ipaddrs, nip); - + + if (rc != 0) + LCONSOLE_ERROR("Error %d parsing ip2nets\n", rc); + return rc; } diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 0224d4a..1bd9c4e 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -26,6 +26,10 @@ #include +static int local_nid_dist_zero = 1; +CFS_MODULE_PARM(local_nid_dist_zero, "i", int, 0444, + "Reserved"); + /* forward ref */ static void lnet_commit_md (lnet_libmd_t *md, lnet_msg_t *msg); @@ -732,21 +736,28 @@ lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, int lnet_compare_routers(lnet_peer_t *p1, lnet_peer_t *p2) { - /* Go for the one with more available credits. - * Otherwise go for the minimum queue depth */ + /* FIRST compare available send credits + * (sends block immediately when peer credits are <= 0) + * THEN compare queue depth */ if (p1->lp_txcredits > 0) { if (p1->lp_txcredits > p2->lp_txcredits) return 1; if (p1->lp_txcredits < p2->lp_txcredits) - return 0; + return -1; } else if (p2->lp_txcredits > 0) { - return 0; + return -1; } + + if (p1->lp_txqnob > p2->lp_txqnob) + return 1; - return (p1->lp_txqnob > p2->lp_txqnob); + if (p1->lp_txqnob < p2->lp_txqnob) + return -1; + + return 0; } @@ -1148,6 +1159,7 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg) lnet_ni_t *local_ni; lnet_remotenet_t *rnet; lnet_route_t *route; + lnet_route_t *best_route; struct list_head *tmp; lnet_peer_t *lp; lnet_peer_t *lp2; @@ -1237,14 +1249,17 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg) /* Find the best gateway I can use */ lp = NULL; + best_route = NULL; list_for_each(tmp, &rnet->lrn_routes) { route = list_entry(tmp, lnet_route_t, lr_list); lp2 = route->lr_gateway; if (lp2->lp_alive && (src_ni == NULL || lp2->lp_ni == src_ni) && - (lp == NULL || lnet_compare_routers(lp2, lp))) + (lp == NULL || lnet_compare_routers(lp2, lp) > 0)) { + best_route = route; lp = lp2; + } } if (lp == NULL) { @@ -1256,6 +1271,11 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg) return -EHOSTUNREACH; } + /* Place selected route at the end of the route list to ensure + * fairness; everything else being equal... */ + list_del(&best_route->lr_list); + list_add_tail(&best_route->lr_list, &rnet->lrn_routes); + if (src_ni == NULL) { src_ni = lp->lp_ni; src_nid = src_ni->ni_nid; @@ -2130,13 +2150,18 @@ LNetDist (lnet_nid_t dstnid, lnet_nid_t *srcnidp, int *orderp) lnet_remotenet_t *rnet; __u32 dstnet = LNET_NIDNET(dstnid); int hops; - int order = 0; + int order = 2; + + /* if !local_nid_dist_zero, I don't return a distance of 0 ever + * (when lustre sees a distance of 0, it substitutes 0@lo), so I + * keep order 0 free for 0@lo and order 1 free for a local NID + * match */ LASSERT (the_lnet.ln_init); LASSERT (the_lnet.ln_refcount > 0); LNET_LOCK(); - + list_for_each (e, &the_lnet.ln_nis) { ni = list_entry(e, lnet_ni_t, ni_list); @@ -2154,7 +2179,8 @@ LNetDist (lnet_nid_t dstnid, lnet_nid_t *srcnidp, int *orderp) *orderp = 1; } LNET_UNLOCK(); - return 0; + + return local_nid_dist_zero ? 0 : 1; } if (LNET_NIDNET(ni->ni_nid) == dstnet || diff --git a/lnet/lnet/router_proc.c b/lnet/lnet/router_proc.c index 1013a9d..1cdab53 100644 --- a/lnet/lnet/router_proc.c +++ b/lnet/lnet/router_proc.c @@ -416,7 +416,7 @@ lnet_peer_seq_show (struct seq_file *s, void *iter) int nrefs; if (lpsi->lpsi_off == 0) { - seq_printf(s, "%-16s %4s %5s %5s %5s %5s %5s %5s %s\n", + seq_printf(s, "%-24s %4s %5s %5s %5s %5s %5s %5s %s\n", "nid", "refs", "state", "max", "rtr", "min", "tx", "min", "queue"); return 0; @@ -445,7 +445,7 @@ lnet_peer_seq_show (struct seq_file *s, void *iter) LNET_UNLOCK(); - seq_printf(s, "%-16s %4d %5s %5d %5d %5d %5d %5d %d\n", + seq_printf(s, "%-24s %4d %5s %5d %5d %5d %5d %5d %d\n", libcfs_nid2str(nid), nrefs, alive ? "up" : "down", maxcr, rtrcr, minrtrcr, txcr, mintxcr, txqnob); return 0; @@ -753,7 +753,7 @@ lnet_ni_seq_show (struct seq_file *s, void *iter) int nref; if (lnsi->lnsi_off == 0) { - seq_printf(s, "%-16s %4s %4s %5s %5s %5s\n", + seq_printf(s, "%-24s %4s %4s %5s %5s %5s\n", "nid", "refs", "peer", "max", "tx", "min"); return 0; } @@ -773,7 +773,7 @@ lnet_ni_seq_show (struct seq_file *s, void *iter) LNET_UNLOCK(); - seq_printf(s, "%-16s %4d %4d %5d %5d %5d\n", + seq_printf(s, "%-24s %4d %4d %5d %5d %5d\n", libcfs_nid2str(nid), nref, npeertxcr, maxtxcr, txcr, mintxcr); return 0; diff --git a/lnet/ulnds/socklnd/connection.c b/lnet/ulnds/socklnd/connection.c index 092d564..93d63e6c 100644 --- a/lnet/ulnds/socklnd/connection.c +++ b/lnet/ulnds/socklnd/connection.c @@ -75,11 +75,11 @@ tcpnal_env_param (char *name, int *val) int tcpnal_set_global_params (void) { - return tcpnal_env_param("TCPNAL_ACCEPTOR_PORT", + return tcpnal_env_param("TCPLND_PORT", &tcpnal_acceptor_port) && - tcpnal_env_param("TCPNAL_BUFFER_SIZE", + tcpnal_env_param("TCPLND_BUFFER_SIZE", &tcpnal_buffer_size) && - tcpnal_env_param("TCPNAL_NAGLE", + tcpnal_env_param("TCPLND_NAGLE", &tcpnal_nagle); } diff --git a/lnet/utils/lbstats b/lnet/utils/lbstats index 0baae3e..a8f0857 100755 --- a/lnet/utils/lbstats +++ b/lnet/utils/lbstats @@ -3,9 +3,9 @@ echo "=== Router Buffers =======" test -e /proc/sys/lnet/buffers && cat /proc/sys/lnet/buffers echo -echo "=== NIs ====================================" +echo "=== NIs ============================================" test -e /proc/sys/lnet/nis && cat /proc/sys/lnet/nis echo -echo "=== Peers =====================================================" +echo "=== Peers =============================================================" test -e /proc/sys/lnet/peers && cat /proc/sys/lnet/peers echo diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index 53e11f3..b0397ec 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -1260,7 +1260,7 @@ lwt_snapshot(cycles_t *now, int *ncpu, int *totalsize, if (data.ioc_u32[2] != sizeof(lwt_event_t) || data.ioc_u32[3] != offsetof(lwt_event_t, lwte_where)) { fprintf(stderr,"kernel/user LWT event mismatch %d(%d),%d(%d)\n", - (int)data.ioc_u32[2], sizeof(lwt_event_t), + (int)data.ioc_u32[2], (int)sizeof(lwt_event_t), (int)data.ioc_u32[3], (int)offsetof(lwt_event_t, lwte_where)); return (-1); -- 1.8.3.1