From ef9303096d41d9f237962335662dfcd7df00b600 Mon Sep 17 00:00:00 2001 From: eeb Date: Thu, 28 Oct 2004 12:03:25 +0000 Subject: [PATCH] * Landed b1_2_singleportals --- lnet/klnds/openiblnd/openiblnd.c | 971 ++++++++++++------------ lnet/klnds/openiblnd/openiblnd.h | 480 ++++++------ lnet/klnds/openiblnd/openiblnd_cb.c | 1387 ++++++++++++++++++----------------- 3 files changed, 1445 insertions(+), 1393 deletions(-) diff --git a/lnet/klnds/openiblnd/openiblnd.c b/lnet/klnds/openiblnd/openiblnd.c index 6f66143..652eb34 100644 --- a/lnet/klnds/openiblnd/openiblnd.c +++ b/lnet/klnds/openiblnd/openiblnd.c @@ -23,26 +23,25 @@ #include "openibnal.h" -nal_t koibnal_api; -ptl_handle_ni_t koibnal_ni; -koib_data_t koibnal_data; -koib_tunables_t koibnal_tunables; +nal_t kibnal_api; +ptl_handle_ni_t kibnal_ni; +kib_data_t kibnal_data; +kib_tunables_t kibnal_tunables; #ifdef CONFIG_SYSCTL -#define OPENIBNAL_SYSCTL 202 +#define IBNAL_SYSCTL 202 -#define OPENIBNAL_SYSCTL_TIMEOUT 1 -#define OPENIBNAL_SYSCTL_ZERO_COPY 2 +#define IBNAL_SYSCTL_TIMEOUT 1 -static ctl_table koibnal_ctl_table[] = { - {OPENIBNAL_SYSCTL_TIMEOUT, "timeout", - &koibnal_tunables.koib_io_timeout, sizeof (int), +static ctl_table kibnal_ctl_table[] = { + {IBNAL_SYSCTL_TIMEOUT, "timeout", + &kibnal_tunables.kib_io_timeout, sizeof (int), 0644, NULL, &proc_dointvec}, { 0 } }; -static ctl_table koibnal_top_ctl_table[] = { - {OPENIBNAL_SYSCTL, "openibnal", NULL, 0, 0555, koibnal_ctl_table}, +static ctl_table kibnal_top_ctl_table[] = { + {IBNAL_SYSCTL, "openibnal", NULL, 0, 0555, kibnal_ctl_table}, { 0 } }; #endif @@ -66,167 +65,183 @@ print_service(struct ib_common_attrib_service *service, char *tag, int rc) "service id: "LPX64"\n" "name : %s\n" "NID : "LPX64"\n", tag, rc, - service->service_id, name, service->service_data64[0]); + service->service_id, name, + *kibnal_service_nid_field(service)); } void -koibnal_service_setunset_done (tTS_IB_CLIENT_QUERY_TID tid, int status, +kibnal_service_setunset_done (tTS_IB_CLIENT_QUERY_TID tid, int status, struct ib_common_attrib_service *service, void *arg) { *(int *)arg = status; - up (&koibnal_data.koib_nid_signal); + up (&kibnal_data.kib_nid_signal); } +#if IBNAL_CHECK_ADVERT +void +kibnal_check_advert (void) +{ + struct ib_common_attrib_service *svc; + __u64 tid; + int rc; + int rc2; + + PORTAL_ALLOC(svc, sizeof(*svc)); + if (svc == NULL) + return; + + memset (svc, 0, sizeof (*svc)); + kibnal_set_service_keys(svc, kibnal_data.kib_nid); + + rc = ib_service_get (kibnal_data.kib_device, + kibnal_data.kib_port, + svc, + KIBNAL_SERVICE_KEY_MASK, + kibnal_tunables.kib_io_timeout * HZ, + kibnal_service_setunset_done, &rc2, + &tid); + + if (rc != 0) { + CERROR ("Immediate error %d checking SM service\n", rc); + } else { + down (&kibnal_data.kib_nid_signal); + rc = rc2; + + if (rc != 0) + CERROR ("Error %d checking SM service\n", rc); + } + + PORTAL_FREE(svc, sizeof(*svc)); +} +#endif + int -koibnal_advertise (void) +kibnal_advertise (void) { + struct ib_common_attrib_service *svc; __u64 tid; int rc; int rc2; - LASSERT (koibnal_data.koib_nid != PTL_NID_ANY); + LASSERT (kibnal_data.kib_nid != PTL_NID_ANY); + + PORTAL_ALLOC(svc, sizeof(*svc)); + if (svc == NULL) + return (-ENOMEM); - memset (&koibnal_data.koib_service, 0, - sizeof (koibnal_data.koib_service)); + memset (svc, 0, sizeof (*svc)); - koibnal_data.koib_service.service_id - = koibnal_data.koib_cm_service_id; + svc->service_id = kibnal_data.kib_service_id; - rc = ib_cached_gid_get(koibnal_data.koib_device, - koibnal_data.koib_port, + rc = ib_cached_gid_get(kibnal_data.kib_device, + kibnal_data.kib_port, 0, - koibnal_data.koib_service.service_gid); + svc->service_gid); if (rc != 0) { CERROR ("Can't get port %d GID: %d\n", - koibnal_data.koib_port, rc); - return (rc); + kibnal_data.kib_port, rc); + goto out; } - rc = ib_cached_pkey_get(koibnal_data.koib_device, - koibnal_data.koib_port, + rc = ib_cached_pkey_get(kibnal_data.kib_device, + kibnal_data.kib_port, 0, - &koibnal_data.koib_service.service_pkey); + &svc->service_pkey); if (rc != 0) { CERROR ("Can't get port %d PKEY: %d\n", - koibnal_data.koib_port, rc); - return (rc); + kibnal_data.kib_port, rc); + goto out; } - koibnal_data.koib_service.service_lease = 0xffffffff; + svc->service_lease = 0xffffffff; - koibnal_set_service_keys(&koibnal_data.koib_service, koibnal_data.koib_nid); + kibnal_set_service_keys(svc, kibnal_data.kib_nid); CDEBUG(D_NET, "Advertising service id "LPX64" %s:"LPX64"\n", - koibnal_data.koib_service.service_id, - koibnal_data.koib_service.service_name, - *koibnal_service_nid_field(&koibnal_data.koib_service)); + svc->service_id, + svc->service_name, *kibnal_service_nid_field(svc)); - rc = ib_service_set (koibnal_data.koib_device, - koibnal_data.koib_port, - &koibnal_data.koib_service, + rc = ib_service_set (kibnal_data.kib_device, + kibnal_data.kib_port, + svc, IB_SA_SERVICE_COMP_MASK_ID | IB_SA_SERVICE_COMP_MASK_GID | IB_SA_SERVICE_COMP_MASK_PKEY | IB_SA_SERVICE_COMP_MASK_LEASE | - KOIBNAL_SERVICE_KEY_MASK, - koibnal_tunables.koib_io_timeout * HZ, - koibnal_service_setunset_done, &rc2, &tid); + KIBNAL_SERVICE_KEY_MASK, + kibnal_tunables.kib_io_timeout * HZ, + kibnal_service_setunset_done, &rc2, &tid); - if (rc == 0) { - down (&koibnal_data.koib_nid_signal); - rc = rc2; + if (rc != 0) { + CERROR ("Immediate error %d advertising NID "LPX64"\n", + rc, kibnal_data.kib_nid); + goto out; } - - if (rc != 0) - CERROR ("Error %d advertising SM service\n", rc); + down (&kibnal_data.kib_nid_signal); + + rc = rc2; + if (rc != 0) + CERROR ("Error %d advertising NID "LPX64"\n", + rc, kibnal_data.kib_nid); + out: + PORTAL_FREE(svc, sizeof(*svc)); return (rc); } -int -koibnal_unadvertise (int expect_success) +void +kibnal_unadvertise (int expect_success) { + struct ib_common_attrib_service *svc; __u64 tid; int rc; int rc2; - LASSERT (koibnal_data.koib_nid != PTL_NID_ANY); + LASSERT (kibnal_data.kib_nid != PTL_NID_ANY); - memset (&koibnal_data.koib_service, 0, - sizeof (koibnal_data.koib_service)); + PORTAL_ALLOC(svc, sizeof(*svc)); + if (svc == NULL) + return; - koibnal_set_service_keys(&koibnal_data.koib_service, koibnal_data.koib_nid); + memset (svc, 0, sizeof(*svc)); + + kibnal_set_service_keys(svc, kibnal_data.kib_nid); CDEBUG(D_NET, "Unadvertising service %s:"LPX64"\n", - koibnal_data.koib_service.service_name, - *koibnal_service_nid_field(&koibnal_data.koib_service)); - - rc = ib_service_delete (koibnal_data.koib_device, - koibnal_data.koib_port, - &koibnal_data.koib_service, - KOIBNAL_SERVICE_KEY_MASK, - koibnal_tunables.koib_io_timeout * HZ, - koibnal_service_setunset_done, &rc2, &tid); + svc->service_name, *kibnal_service_nid_field(svc)); + + rc = ib_service_delete (kibnal_data.kib_device, + kibnal_data.kib_port, + svc, + KIBNAL_SERVICE_KEY_MASK, + kibnal_tunables.kib_io_timeout * HZ, + kibnal_service_setunset_done, &rc2, &tid); if (rc != 0) { CERROR ("Immediate error %d unadvertising NID "LPX64"\n", - rc, koibnal_data.koib_nid); - return (rc); + rc, kibnal_data.kib_nid); + goto out; } - down (&koibnal_data.koib_nid_signal); + down (&kibnal_data.kib_nid_signal); if ((rc2 == 0) == !!expect_success) - return (0); + goto out; /* success: rc == 0 */ if (expect_success) CERROR("Error %d unadvertising NID "LPX64"\n", - rc, koibnal_data.koib_nid); + rc, kibnal_data.kib_nid); else CWARN("Removed conflicting NID "LPX64"\n", - koibnal_data.koib_nid); - - return (rc); -} - -int -koibnal_check_advert (void) -{ - __u64 tid; - int rc; - int rc2; - - static struct ib_common_attrib_service srv; - - memset (&srv, 0, sizeof (srv)); - - koibnal_set_service_keys(&srv, koibnal_data.koib_nid); - - rc = ib_service_get (koibnal_data.koib_device, - koibnal_data.koib_port, - &srv, - KOIBNAL_SERVICE_KEY_MASK, - koibnal_tunables.koib_io_timeout * HZ, - koibnal_service_setunset_done, &rc2, - &tid); - - if (rc != 0) { - CERROR ("Immediate error %d checking SM service\n", rc); - } else { - down (&koibnal_data.koib_nid_signal); - rc = rc2; - - if (rc != 0) - CERROR ("Error %d checking SM service\n", rc); - } - - return (rc); + kibnal_data.kib_nid); + out: + PORTAL_FREE(svc, sizeof(*svc)); } int -koibnal_set_mynid(ptl_nid_t nid) +kibnal_set_mynid(ptl_nid_t nid) { struct timeval tv; - lib_ni_t *ni = &koibnal_lib.libnal_ni; + lib_ni_t *ni = &kibnal_lib.libnal_ni; int rc; CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", @@ -234,75 +249,76 @@ koibnal_set_mynid(ptl_nid_t nid) do_gettimeofday(&tv); - down (&koibnal_data.koib_nid_mutex); + down (&kibnal_data.kib_nid_mutex); - if (nid == koibnal_data.koib_nid) { + if (nid == kibnal_data.kib_nid) { /* no change of NID */ - up (&koibnal_data.koib_nid_mutex); + up (&kibnal_data.kib_nid_mutex); return (0); } CDEBUG(D_NET, "NID "LPX64"("LPX64")\n", - koibnal_data.koib_nid, nid); + kibnal_data.kib_nid, nid); - if (koibnal_data.koib_nid != PTL_NID_ANY) { + if (kibnal_data.kib_nid != PTL_NID_ANY) { - koibnal_unadvertise (1); + kibnal_unadvertise (1); - rc = ib_cm_listen_stop (koibnal_data.koib_listen_handle); + rc = ib_cm_listen_stop (kibnal_data.kib_listen_handle); if (rc != 0) CERROR ("Error %d stopping listener\n", rc); } - koibnal_data.koib_nid = ni->ni_pid.nid = nid; - koibnal_data.koib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; + kibnal_data.kib_nid = ni->ni_pid.nid = nid; + kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; /* Delete all existing peers and their connections after new * NID/incarnation set to ensure no old connections in our brave * new world. */ - koibnal_del_peer (PTL_NID_ANY, 0); - - rc = 0; - if (koibnal_data.koib_nid != PTL_NID_ANY) { - /* New NID installed */ + kibnal_del_peer (PTL_NID_ANY, 0); - /* remove any previous advert (crashed node etc) */ - koibnal_unadvertise(0); + if (kibnal_data.kib_nid == PTL_NID_ANY) { + /* No new NID to install */ + up (&kibnal_data.kib_nid_mutex); + return (0); + } + + /* remove any previous advert (crashed node etc) */ + kibnal_unadvertise(0); - /* Assign new service number */ - koibnal_data.koib_cm_service_id = ib_cm_service_assign(); - CDEBUG(D_NET, "service_id "LPX64"\n", koibnal_data.koib_cm_service_id); + /* Assign new service number */ + kibnal_data.kib_service_id = ib_cm_service_assign(); + CDEBUG(D_NET, "service_id "LPX64"\n", kibnal_data.kib_service_id); - rc = ib_cm_listen(koibnal_data.koib_cm_service_id, - TS_IB_CM_SERVICE_EXACT_MASK, - koibnal_passive_conn_callback, NULL, - &koibnal_data.koib_listen_handle); - if (rc != 0) { - CERROR ("ib_cm_listen error: %d\n", rc); - goto out; + rc = ib_cm_listen(kibnal_data.kib_service_id, + TS_IB_CM_SERVICE_EXACT_MASK, + kibnal_passive_conn_callback, NULL, + &kibnal_data.kib_listen_handle); + if (rc == 0) { + rc = kibnal_advertise(); + if (rc == 0) { +#if IBNAL_CHECK_ADVERT + kibnal_check_advert(); +#endif + up (&kibnal_data.kib_nid_mutex); + return (0); } - rc = koibnal_advertise(); - - koibnal_check_advert(); - } - - out: - if (rc != 0) { - koibnal_data.koib_nid = PTL_NID_ANY; + ib_cm_listen_stop(kibnal_data.kib_listen_handle); /* remove any peers that sprung up while I failed to * advertise myself */ - koibnal_del_peer (PTL_NID_ANY, 0); + kibnal_del_peer (PTL_NID_ANY, 0); } - - up (&koibnal_data.koib_nid_mutex); - return (0); + + kibnal_data.kib_nid = PTL_NID_ANY; + up (&kibnal_data.kib_nid_mutex); + return (rc); } -koib_peer_t * -koibnal_create_peer (ptl_nid_t nid) +kib_peer_t * +kibnal_create_peer (ptl_nid_t nid) { - koib_peer_t *peer; + kib_peer_t *peer; LASSERT (nid != PTL_NID_ANY); @@ -320,20 +336,20 @@ koibnal_create_peer (ptl_nid_t nid) INIT_LIST_HEAD (&peer->ibp_tx_queue); peer->ibp_reconnect_time = jiffies; - peer->ibp_reconnect_interval = OPENIBNAL_MIN_RECONNECT_INTERVAL; + peer->ibp_reconnect_interval = IBNAL_MIN_RECONNECT_INTERVAL; - atomic_inc (&koibnal_data.koib_npeers); + atomic_inc (&kibnal_data.kib_npeers); return (peer); } void -koibnal_destroy_peer (koib_peer_t *peer) +kibnal_destroy_peer (kib_peer_t *peer) { CDEBUG (D_NET, "peer "LPX64" %p deleted\n", peer->ibp_nid, peer); LASSERT (atomic_read (&peer->ibp_refcount) == 0); LASSERT (peer->ibp_persistence == 0); - LASSERT (!koibnal_peer_active(peer)); + LASSERT (!kibnal_peer_active(peer)); LASSERT (peer->ibp_connecting == 0); LASSERT (list_empty (&peer->ibp_conns)); LASSERT (list_empty (&peer->ibp_tx_queue)); @@ -344,11 +360,11 @@ koibnal_destroy_peer (koib_peer_t *peer) * they are destroyed, so we can be assured that _all_ state to do * with this peer has been cleaned up when its refcount drops to * zero. */ - atomic_dec (&koibnal_data.koib_npeers); + atomic_dec (&kibnal_data.kib_npeers); } void -koibnal_put_peer (koib_peer_t *peer) +kibnal_put_peer (kib_peer_t *peer) { CDEBUG (D_OTHER, "putting peer[%p] -> "LPX64" (%d)\n", peer, peer->ibp_nid, @@ -358,19 +374,19 @@ koibnal_put_peer (koib_peer_t *peer) if (!atomic_dec_and_test (&peer->ibp_refcount)) return; - koibnal_destroy_peer (peer); + kibnal_destroy_peer (peer); } -koib_peer_t * -koibnal_find_peer_locked (ptl_nid_t nid) +kib_peer_t * +kibnal_find_peer_locked (ptl_nid_t nid) { - struct list_head *peer_list = koibnal_nid2peerlist (nid); + struct list_head *peer_list = kibnal_nid2peerlist (nid); struct list_head *tmp; - koib_peer_t *peer; + kib_peer_t *peer; list_for_each (tmp, peer_list) { - peer = list_entry (tmp, koib_peer_t, ibp_list); + peer = list_entry (tmp, kib_peer_t, ibp_list); LASSERT (peer->ibp_persistence != 0 || /* persistent peer */ peer->ibp_connecting != 0 || /* creating conns */ @@ -386,46 +402,46 @@ koibnal_find_peer_locked (ptl_nid_t nid) return (NULL); } -koib_peer_t * -koibnal_get_peer (ptl_nid_t nid) +kib_peer_t * +kibnal_get_peer (ptl_nid_t nid) { - koib_peer_t *peer; + kib_peer_t *peer; - read_lock (&koibnal_data.koib_global_lock); - peer = koibnal_find_peer_locked (nid); + read_lock (&kibnal_data.kib_global_lock); + peer = kibnal_find_peer_locked (nid); if (peer != NULL) /* +1 ref for caller? */ atomic_inc (&peer->ibp_refcount); - read_unlock (&koibnal_data.koib_global_lock); + read_unlock (&kibnal_data.kib_global_lock); return (peer); } void -koibnal_unlink_peer_locked (koib_peer_t *peer) +kibnal_unlink_peer_locked (kib_peer_t *peer) { LASSERT (peer->ibp_persistence == 0); LASSERT (list_empty(&peer->ibp_conns)); - LASSERT (koibnal_peer_active(peer)); + LASSERT (kibnal_peer_active(peer)); list_del_init (&peer->ibp_list); /* lose peerlist's ref */ - koibnal_put_peer (peer); + kibnal_put_peer (peer); } int -koibnal_get_peer_info (int index, ptl_nid_t *nidp, int *persistencep) +kibnal_get_peer_info (int index, ptl_nid_t *nidp, int *persistencep) { - koib_peer_t *peer; + kib_peer_t *peer; struct list_head *ptmp; int i; - read_lock (&koibnal_data.koib_global_lock); + read_lock (&kibnal_data.kib_global_lock); - for (i = 0; i < koibnal_data.koib_peer_hash_size; i++) { + for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - list_for_each (ptmp, &koibnal_data.koib_peers[i]) { + list_for_each (ptmp, &kibnal_data.kib_peers[i]) { - peer = list_entry (ptmp, koib_peer_t, ibp_list); + peer = list_entry (ptmp, kib_peer_t, ibp_list); LASSERT (peer->ibp_persistence != 0 || peer->ibp_connecting != 0 || !list_empty (&peer->ibp_conns)); @@ -436,53 +452,53 @@ koibnal_get_peer_info (int index, ptl_nid_t *nidp, int *persistencep) *nidp = peer->ibp_nid; *persistencep = peer->ibp_persistence; - read_unlock (&koibnal_data.koib_global_lock); + read_unlock (&kibnal_data.kib_global_lock); return (0); } } - read_unlock (&koibnal_data.koib_global_lock); + read_unlock (&kibnal_data.kib_global_lock); return (-ENOENT); } int -koibnal_add_persistent_peer (ptl_nid_t nid) +kibnal_add_persistent_peer (ptl_nid_t nid) { unsigned long flags; - koib_peer_t *peer; - koib_peer_t *peer2; + kib_peer_t *peer; + kib_peer_t *peer2; if (nid == PTL_NID_ANY) return (-EINVAL); - peer = koibnal_create_peer (nid); + peer = kibnal_create_peer (nid); if (peer == NULL) return (-ENOMEM); - write_lock_irqsave (&koibnal_data.koib_global_lock, flags); + write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - peer2 = koibnal_find_peer_locked (nid); + peer2 = kibnal_find_peer_locked (nid); if (peer2 != NULL) { - koibnal_put_peer (peer); + kibnal_put_peer (peer); peer = peer2; } else { /* peer table takes existing ref on peer */ list_add_tail (&peer->ibp_list, - koibnal_nid2peerlist (nid)); + kibnal_nid2peerlist (nid)); } peer->ibp_persistence++; - write_unlock_irqrestore (&koibnal_data.koib_global_lock, flags); + write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); return (0); } void -koibnal_del_peer_locked (koib_peer_t *peer, int single_share) +kibnal_del_peer_locked (kib_peer_t *peer, int single_share) { struct list_head *ctmp; struct list_head *cnxt; - koib_conn_t *conn; + kib_conn_t *conn; if (!single_share) peer->ibp_persistence = 0; @@ -493,38 +509,38 @@ koibnal_del_peer_locked (koib_peer_t *peer, int single_share) return; list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, koib_conn_t, ibc_list); + conn = list_entry(ctmp, kib_conn_t, ibc_list); - koibnal_close_conn_locked (conn, 0); + kibnal_close_conn_locked (conn, 0); } /* NB peer unlinks itself when last conn is closed */ } int -koibnal_del_peer (ptl_nid_t nid, int single_share) +kibnal_del_peer (ptl_nid_t nid, int single_share) { unsigned long flags; struct list_head *ptmp; struct list_head *pnxt; - koib_peer_t *peer; + kib_peer_t *peer; int lo; int hi; int i; int rc = -ENOENT; - write_lock_irqsave (&koibnal_data.koib_global_lock, flags); + write_lock_irqsave (&kibnal_data.kib_global_lock, flags); if (nid != PTL_NID_ANY) - lo = hi = koibnal_nid2peerlist(nid) - koibnal_data.koib_peers; + lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers; else { lo = 0; - hi = koibnal_data.koib_peer_hash_size - 1; + hi = kibnal_data.kib_peer_hash_size - 1; } for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &koibnal_data.koib_peers[i]) { - peer = list_entry (ptmp, koib_peer_t, ibp_list); + list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) { + peer = list_entry (ptmp, kib_peer_t, ibp_list); LASSERT (peer->ibp_persistence != 0 || peer->ibp_connecting != 0 || !list_empty (&peer->ibp_conns)); @@ -532,7 +548,7 @@ koibnal_del_peer (ptl_nid_t nid, int single_share) if (!(nid == PTL_NID_ANY || peer->ibp_nid == nid)) continue; - koibnal_del_peer_locked (peer, single_share); + kibnal_del_peer_locked (peer, single_share); rc = 0; /* matched something */ if (single_share) @@ -540,26 +556,26 @@ koibnal_del_peer (ptl_nid_t nid, int single_share) } } out: - write_unlock_irqrestore (&koibnal_data.koib_global_lock, flags); + write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); return (rc); } -koib_conn_t * -koibnal_get_conn_by_idx (int index) +kib_conn_t * +kibnal_get_conn_by_idx (int index) { - koib_peer_t *peer; + kib_peer_t *peer; struct list_head *ptmp; - koib_conn_t *conn; + kib_conn_t *conn; struct list_head *ctmp; int i; - read_lock (&koibnal_data.koib_global_lock); + read_lock (&kibnal_data.kib_global_lock); - for (i = 0; i < koibnal_data.koib_peer_hash_size; i++) { - list_for_each (ptmp, &koibnal_data.koib_peers[i]) { + for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { + list_for_each (ptmp, &kibnal_data.kib_peers[i]) { - peer = list_entry (ptmp, koib_peer_t, ibp_list); + peer = list_entry (ptmp, kib_peer_t, ibp_list); LASSERT (peer->ibp_persistence > 0 || peer->ibp_connecting != 0 || !list_empty (&peer->ibp_conns)); @@ -568,25 +584,25 @@ koibnal_get_conn_by_idx (int index) if (index-- > 0) continue; - conn = list_entry (ctmp, koib_conn_t, ibc_list); + conn = list_entry (ctmp, kib_conn_t, ibc_list); CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n", conn, conn->ibc_state, conn->ibc_peer->ibp_nid, atomic_read (&conn->ibc_refcount)); atomic_inc (&conn->ibc_refcount); - read_unlock (&koibnal_data.koib_global_lock); + read_unlock (&kibnal_data.kib_global_lock); return (conn); } } } - read_unlock (&koibnal_data.koib_global_lock); + read_unlock (&kibnal_data.kib_global_lock); return (NULL); } -koib_conn_t * -koibnal_create_conn (void) +kib_conn_t * +kibnal_create_conn (void) { - koib_conn_t *conn; + kib_conn_t *conn; int i; __u64 vaddr = 0; __u64 vaddr_base; @@ -608,57 +624,57 @@ koibnal_create_conn (void) memset (conn, 0, sizeof (*conn)); INIT_LIST_HEAD (&conn->ibc_tx_queue); - INIT_LIST_HEAD (&conn->ibc_rdma_queue); + INIT_LIST_HEAD (&conn->ibc_active_txs); spin_lock_init (&conn->ibc_lock); - atomic_inc (&koibnal_data.koib_nconns); + atomic_inc (&kibnal_data.kib_nconns); /* well not really, but I call destroy() on failure, which decrements */ - PORTAL_ALLOC (conn->ibc_rxs, OPENIBNAL_RX_MSGS * sizeof (koib_rx_t)); + PORTAL_ALLOC (conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t)); if (conn->ibc_rxs == NULL) goto failed; - memset (conn->ibc_rxs, 0, OPENIBNAL_RX_MSGS * sizeof(koib_rx_t)); + memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t)); - rc = koibnal_alloc_pages(&conn->ibc_rx_pages, - OPENIBNAL_RX_MSG_PAGES, - IB_ACCESS_LOCAL_WRITE); + rc = kibnal_alloc_pages(&conn->ibc_rx_pages, + IBNAL_RX_MSG_PAGES, + IB_ACCESS_LOCAL_WRITE); if (rc != 0) goto failed; - vaddr_base = vaddr = conn->ibc_rx_pages->oibp_vaddr; + vaddr_base = vaddr = conn->ibc_rx_pages->ibp_vaddr; - for (i = ipage = page_offset = 0; i < OPENIBNAL_RX_MSGS; i++) { - struct page *page = conn->ibc_rx_pages->oibp_pages[ipage]; - koib_rx_t *rx = &conn->ibc_rxs[i]; + for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) { + struct page *page = conn->ibc_rx_pages->ibp_pages[ipage]; + kib_rx_t *rx = &conn->ibc_rxs[i]; rx->rx_conn = conn; rx->rx_vaddr = vaddr; - rx->rx_msg = (koib_msg_t *)(((char *)page_address(page)) + page_offset); + rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset); - vaddr += OPENIBNAL_MSG_SIZE; - LASSERT (vaddr <= vaddr_base + OPENIBNAL_RX_MSG_BYTES); + vaddr += IBNAL_MSG_SIZE; + LASSERT (vaddr <= vaddr_base + IBNAL_RX_MSG_BYTES); - page_offset += OPENIBNAL_MSG_SIZE; + page_offset += IBNAL_MSG_SIZE; LASSERT (page_offset <= PAGE_SIZE); if (page_offset == PAGE_SIZE) { page_offset = 0; ipage++; - LASSERT (ipage <= OPENIBNAL_RX_MSG_PAGES); + LASSERT (ipage <= IBNAL_RX_MSG_PAGES); } } params.qp_create = (struct ib_qp_create_param) { .limit = { /* Sends have an optional RDMA */ - .max_outstanding_send_request = 2 * OPENIBNAL_MSG_QUEUE_SIZE, - .max_outstanding_receive_request = OPENIBNAL_MSG_QUEUE_SIZE, + .max_outstanding_send_request = 2 * IBNAL_MSG_QUEUE_SIZE, + .max_outstanding_receive_request = IBNAL_MSG_QUEUE_SIZE, .max_send_gather_element = 1, .max_receive_scatter_element = 1, }, - .pd = koibnal_data.koib_pd, - .send_queue = koibnal_data.koib_tx_cq, - .receive_queue = koibnal_data.koib_rx_cq, + .pd = kibnal_data.kib_pd, + .send_queue = kibnal_data.kib_cq, + .receive_queue = kibnal_data.kib_cq, .send_policy = IB_WQ_SIGNAL_SELECTABLE, .receive_policy = IB_WQ_SIGNAL_SELECTABLE, .rd_domain = 0, @@ -673,11 +689,11 @@ koibnal_create_conn (void) } /* Mark QP created */ - conn->ibc_state = OPENIBNAL_CONN_INIT_QP; + conn->ibc_state = IBNAL_CONN_INIT_QP; params.qp_attr = (struct ib_qp_attribute) { .state = IB_QP_STATE_INIT, - .port = koibnal_data.koib_port, + .port = kibnal_data.kib_port, .enable_rdma_read = 1, .enable_rdma_write = 1, .valid_fields = (IB_QP_ATTRIBUTE_STATE | @@ -696,12 +712,12 @@ koibnal_create_conn (void) return (conn); failed: - koibnal_destroy_conn (conn); + kibnal_destroy_conn (conn); return (NULL); } void -koibnal_destroy_conn (koib_conn_t *conn) +kibnal_destroy_conn (kib_conn_t *conn) { int rc; @@ -709,21 +725,21 @@ koibnal_destroy_conn (koib_conn_t *conn) LASSERT (atomic_read (&conn->ibc_refcount) == 0); LASSERT (list_empty(&conn->ibc_tx_queue)); - LASSERT (list_empty(&conn->ibc_rdma_queue)); + LASSERT (list_empty(&conn->ibc_active_txs)); LASSERT (conn->ibc_nsends_posted == 0); LASSERT (conn->ibc_connreq == NULL); switch (conn->ibc_state) { - case OPENIBNAL_CONN_ZOMBIE: + case IBNAL_CONN_ZOMBIE: /* called after connection sequence initiated */ - case OPENIBNAL_CONN_INIT_QP: + case IBNAL_CONN_INIT_QP: rc = ib_qp_destroy(conn->ibc_qp); if (rc != 0) CERROR("Can't destroy QP: %d\n", rc); /* fall through */ - case OPENIBNAL_CONN_INIT_NOTHING: + case IBNAL_CONN_INIT_NOTHING: break; default: @@ -731,30 +747,30 @@ koibnal_destroy_conn (koib_conn_t *conn) } if (conn->ibc_rx_pages != NULL) - koibnal_free_pages(conn->ibc_rx_pages); + kibnal_free_pages(conn->ibc_rx_pages); if (conn->ibc_rxs != NULL) PORTAL_FREE(conn->ibc_rxs, - OPENIBNAL_RX_MSGS * sizeof(koib_rx_t)); + IBNAL_RX_MSGS * sizeof(kib_rx_t)); if (conn->ibc_peer != NULL) - koibnal_put_peer(conn->ibc_peer); + kibnal_put_peer(conn->ibc_peer); PORTAL_FREE(conn, sizeof (*conn)); - atomic_dec(&koibnal_data.koib_nconns); + atomic_dec(&kibnal_data.kib_nconns); - if (atomic_read (&koibnal_data.koib_nconns) == 0 && - koibnal_data.koib_shutdown) { + if (atomic_read (&kibnal_data.kib_nconns) == 0 && + kibnal_data.kib_shutdown) { /* I just nuked the last connection on shutdown; wake up * everyone so they can exit. */ - wake_up_all(&koibnal_data.koib_sched_waitq); - wake_up_all(&koibnal_data.koib_connd_waitq); + wake_up_all(&kibnal_data.kib_sched_waitq); + wake_up_all(&kibnal_data.kib_connd_waitq); } } void -koibnal_put_conn (koib_conn_t *conn) +kibnal_put_conn (kib_conn_t *conn) { unsigned long flags; @@ -767,44 +783,44 @@ koibnal_put_conn (koib_conn_t *conn) return; /* last ref only goes on zombies */ - LASSERT (conn->ibc_state == OPENIBNAL_CONN_ZOMBIE); + LASSERT (conn->ibc_state == IBNAL_CONN_ZOMBIE); - spin_lock_irqsave (&koibnal_data.koib_connd_lock, flags); + spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - list_add (&conn->ibc_list, &koibnal_data.koib_connd_conns); - wake_up (&koibnal_data.koib_connd_waitq); + list_add (&conn->ibc_list, &kibnal_data.kib_connd_conns); + wake_up (&kibnal_data.kib_connd_waitq); - spin_unlock_irqrestore (&koibnal_data.koib_connd_lock, flags); + spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); } int -koibnal_close_peer_conns_locked (koib_peer_t *peer, int why) +kibnal_close_peer_conns_locked (kib_peer_t *peer, int why) { - koib_conn_t *conn; + kib_conn_t *conn; struct list_head *ctmp; struct list_head *cnxt; int count = 0; list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry (ctmp, koib_conn_t, ibc_list); + conn = list_entry (ctmp, kib_conn_t, ibc_list); count++; - koibnal_close_conn_locked (conn, why); + kibnal_close_conn_locked (conn, why); } return (count); } int -koibnal_close_stale_conns_locked (koib_peer_t *peer, __u64 incarnation) +kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation) { - koib_conn_t *conn; + kib_conn_t *conn; struct list_head *ctmp; struct list_head *cnxt; int count = 0; list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry (ctmp, koib_conn_t, ibc_list); + conn = list_entry (ctmp, kib_conn_t, ibc_list); if (conn->ibc_incarnation == incarnation) continue; @@ -813,17 +829,17 @@ koibnal_close_stale_conns_locked (koib_peer_t *peer, __u64 incarnation) peer->ibp_nid, conn->ibc_incarnation, incarnation); count++; - koibnal_close_conn_locked (conn, -ESTALE); + kibnal_close_conn_locked (conn, -ESTALE); } return (count); } int -koibnal_close_matching_conns (ptl_nid_t nid) +kibnal_close_matching_conns (ptl_nid_t nid) { unsigned long flags; - koib_peer_t *peer; + kib_peer_t *peer; struct list_head *ptmp; struct list_head *pnxt; int lo; @@ -831,19 +847,19 @@ koibnal_close_matching_conns (ptl_nid_t nid) int i; int count = 0; - write_lock_irqsave (&koibnal_data.koib_global_lock, flags); + write_lock_irqsave (&kibnal_data.kib_global_lock, flags); if (nid != PTL_NID_ANY) - lo = hi = koibnal_nid2peerlist(nid) - koibnal_data.koib_peers; + lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers; else { lo = 0; - hi = koibnal_data.koib_peer_hash_size - 1; + hi = kibnal_data.kib_peer_hash_size - 1; } for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &koibnal_data.koib_peers[i]) { + list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) { - peer = list_entry (ptmp, koib_peer_t, ibp_list); + peer = list_entry (ptmp, kib_peer_t, ibp_list); LASSERT (peer->ibp_persistence != 0 || peer->ibp_connecting != 0 || !list_empty (&peer->ibp_conns)); @@ -851,11 +867,11 @@ koibnal_close_matching_conns (ptl_nid_t nid) if (!(nid == PTL_NID_ANY || nid == peer->ibp_nid)) continue; - count += koibnal_close_peer_conns_locked (peer, 0); + count += kibnal_close_peer_conns_locked (peer, 0); } } - write_unlock_irqrestore (&koibnal_data.koib_global_lock, flags); + write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); /* wildcards always succeed */ if (nid == PTL_NID_ANY) @@ -865,7 +881,7 @@ koibnal_close_matching_conns (ptl_nid_t nid) } int -koibnal_cmd(struct portals_cfg *pcfg, void * private) +kibnal_cmd(struct portals_cfg *pcfg, void * private) { int rc = -EINVAL; @@ -876,8 +892,8 @@ koibnal_cmd(struct portals_cfg *pcfg, void * private) ptl_nid_t nid = 0; int share_count = 0; - rc = koibnal_get_peer_info(pcfg->pcfg_count, - &nid, &share_count); + rc = kibnal_get_peer_info(pcfg->pcfg_count, + &nid, &share_count); pcfg->pcfg_nid = nid; pcfg->pcfg_size = 0; pcfg->pcfg_id = 0; @@ -887,17 +903,17 @@ koibnal_cmd(struct portals_cfg *pcfg, void * private) break; } case NAL_CMD_ADD_PEER: { - rc = koibnal_add_persistent_peer (pcfg->pcfg_nid); + rc = kibnal_add_persistent_peer (pcfg->pcfg_nid); break; } case NAL_CMD_DEL_PEER: { - rc = koibnal_del_peer (pcfg->pcfg_nid, + rc = kibnal_del_peer (pcfg->pcfg_nid, /* flags == single_share */ pcfg->pcfg_flags != 0); break; } case NAL_CMD_GET_CONN: { - koib_conn_t *conn = koibnal_get_conn_by_idx (pcfg->pcfg_count); + kib_conn_t *conn = kibnal_get_conn_by_idx (pcfg->pcfg_count); if (conn == NULL) rc = -ENOENT; @@ -907,19 +923,19 @@ koibnal_cmd(struct portals_cfg *pcfg, void * private) pcfg->pcfg_id = 0; pcfg->pcfg_misc = 0; pcfg->pcfg_flags = 0; - koibnal_put_conn (conn); + kibnal_put_conn (conn); } break; } case NAL_CMD_CLOSE_CONNECTION: { - rc = koibnal_close_matching_conns (pcfg->pcfg_nid); + rc = kibnal_close_matching_conns (pcfg->pcfg_nid); break; } case NAL_CMD_REGISTER_MYNID: { if (pcfg->pcfg_nid == PTL_NID_ANY) rc = -EINVAL; else - rc = koibnal_set_mynid (pcfg->pcfg_nid); + rc = kibnal_set_mynid (pcfg->pcfg_nid); break; } } @@ -928,47 +944,47 @@ koibnal_cmd(struct portals_cfg *pcfg, void * private) } void -koibnal_free_pages (koib_pages_t *p) +kibnal_free_pages (kib_pages_t *p) { - int npages = p->oibp_npages; + int npages = p->ibp_npages; int rc; int i; - if (p->oibp_mapped) { - rc = ib_memory_deregister(p->oibp_handle); + if (p->ibp_mapped) { + rc = ib_memory_deregister(p->ibp_handle); if (rc != 0) CERROR ("Deregister error: %d\n", rc); } for (i = 0; i < npages; i++) - if (p->oibp_pages[i] != NULL) - __free_page(p->oibp_pages[i]); + if (p->ibp_pages[i] != NULL) + __free_page(p->ibp_pages[i]); - PORTAL_FREE (p, offsetof(koib_pages_t, oibp_pages[npages])); + PORTAL_FREE (p, offsetof(kib_pages_t, ibp_pages[npages])); } int -koibnal_alloc_pages (koib_pages_t **pp, int npages, int access) +kibnal_alloc_pages (kib_pages_t **pp, int npages, int access) { - koib_pages_t *p; + kib_pages_t *p; struct ib_physical_buffer *phys_pages; int i; int rc; - PORTAL_ALLOC(p, offsetof(koib_pages_t, oibp_pages[npages])); + PORTAL_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages])); if (p == NULL) { CERROR ("Can't allocate buffer %d\n", npages); return (-ENOMEM); } - memset (p, 0, offsetof(koib_pages_t, oibp_pages[npages])); - p->oibp_npages = npages; + memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages])); + p->ibp_npages = npages; for (i = 0; i < npages; i++) { - p->oibp_pages[i] = alloc_page (GFP_KERNEL); - if (p->oibp_pages[i] == NULL) { + p->ibp_pages[i] = alloc_page (GFP_KERNEL); + if (p->ibp_pages[i] == NULL) { CERROR ("Can't allocate page %d of %d\n", i, npages); - koibnal_free_pages(p); + kibnal_free_pages(p); return (-ENOMEM); } } @@ -976,96 +992,96 @@ koibnal_alloc_pages (koib_pages_t **pp, int npages, int access) PORTAL_ALLOC(phys_pages, npages * sizeof(*phys_pages)); if (phys_pages == NULL) { CERROR ("Can't allocate physarray for %d pages\n", npages); - koibnal_free_pages(p); + kibnal_free_pages(p); return (-ENOMEM); } for (i = 0; i < npages; i++) { phys_pages[i].size = PAGE_SIZE; phys_pages[i].address = - koibnal_page2phys(p->oibp_pages[i]); + kibnal_page2phys(p->ibp_pages[i]); } - p->oibp_vaddr = 0; - rc = ib_memory_register_physical(koibnal_data.koib_pd, + p->ibp_vaddr = 0; + rc = ib_memory_register_physical(kibnal_data.kib_pd, phys_pages, npages, - &p->oibp_vaddr, + &p->ibp_vaddr, npages * PAGE_SIZE, 0, access, - &p->oibp_handle, - &p->oibp_lkey, - &p->oibp_rkey); + &p->ibp_handle, + &p->ibp_lkey, + &p->ibp_rkey); PORTAL_FREE(phys_pages, npages * sizeof(*phys_pages)); if (rc != 0) { CERROR ("Error %d mapping %d pages\n", rc, npages); - koibnal_free_pages(p); + kibnal_free_pages(p); return (rc); } - p->oibp_mapped = 1; + p->ibp_mapped = 1; *pp = p; return (0); } int -koibnal_setup_tx_descs (void) +kibnal_setup_tx_descs (void) { int ipage = 0; int page_offset = 0; __u64 vaddr; __u64 vaddr_base; struct page *page; - koib_tx_t *tx; + kib_tx_t *tx; int i; int rc; /* pre-mapped messages are not bigger than 1 page */ - LASSERT (OPENIBNAL_MSG_SIZE <= PAGE_SIZE); + LASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE); /* No fancy arithmetic when we do the buffer calculations */ - LASSERT (PAGE_SIZE % OPENIBNAL_MSG_SIZE == 0); + LASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0); - rc = koibnal_alloc_pages(&koibnal_data.koib_tx_pages, - OPENIBNAL_TX_MSG_PAGES, - 0); /* local read access only */ + rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, + IBNAL_TX_MSG_PAGES, + 0); /* local read access only */ if (rc != 0) return (rc); - vaddr = vaddr_base = koibnal_data.koib_tx_pages->oibp_vaddr; + vaddr = vaddr_base = kibnal_data.kib_tx_pages->ibp_vaddr; - for (i = 0; i < OPENIBNAL_TX_MSGS; i++) { - page = koibnal_data.koib_tx_pages->oibp_pages[ipage]; - tx = &koibnal_data.koib_tx_descs[i]; + for (i = 0; i < IBNAL_TX_MSGS; i++) { + page = kibnal_data.kib_tx_pages->ibp_pages[ipage]; + tx = &kibnal_data.kib_tx_descs[i]; memset (tx, 0, sizeof(*tx)); /* zero flags etc */ - tx->tx_msg = (koib_msg_t *)(((char *)page_address(page)) + page_offset); + tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset); tx->tx_vaddr = vaddr; - tx->tx_isnblk = (i >= OPENIBNAL_NTX); - tx->tx_mapped = KOIB_TX_UNMAPPED; + tx->tx_isnblk = (i >= IBNAL_NTX); + tx->tx_mapped = KIB_TX_UNMAPPED; CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n", i, tx, tx->tx_msg, tx->tx_vaddr); if (tx->tx_isnblk) list_add (&tx->tx_list, - &koibnal_data.koib_idle_nblk_txs); + &kibnal_data.kib_idle_nblk_txs); else list_add (&tx->tx_list, - &koibnal_data.koib_idle_txs); + &kibnal_data.kib_idle_txs); - vaddr += OPENIBNAL_MSG_SIZE; - LASSERT (vaddr <= vaddr_base + OPENIBNAL_TX_MSG_BYTES); + vaddr += IBNAL_MSG_SIZE; + LASSERT (vaddr <= vaddr_base + IBNAL_TX_MSG_BYTES); - page_offset += OPENIBNAL_MSG_SIZE; + page_offset += IBNAL_MSG_SIZE; LASSERT (page_offset <= PAGE_SIZE); if (page_offset == PAGE_SIZE) { page_offset = 0; ipage++; - LASSERT (ipage <= OPENIBNAL_TX_MSG_PAGES); + LASSERT (ipage <= IBNAL_TX_MSG_PAGES); } } @@ -1073,7 +1089,7 @@ koibnal_setup_tx_descs (void) } void -koibnal_api_shutdown (nal_t *nal) +kibnal_api_shutdown (nal_t *nal) { int i; int rc; @@ -1087,119 +1103,113 @@ koibnal_api_shutdown (nal_t *nal) CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", atomic_read (&portal_kmemory)); - LASSERT(nal == &koibnal_api); + LASSERT(nal == &kibnal_api); - switch (koibnal_data.koib_init) { + switch (kibnal_data.kib_init) { default: - CERROR ("Unexpected state %d\n", koibnal_data.koib_init); + CERROR ("Unexpected state %d\n", kibnal_data.kib_init); LBUG(); - case OPENIBNAL_INIT_ALL: + case IBNAL_INIT_ALL: /* stop calls to nal_cmd */ libcfs_nal_cmd_unregister(OPENIBNAL); /* No new peers */ /* resetting my NID to unadvertises me, removes my * listener and nukes all current peers */ - koibnal_set_mynid (PTL_NID_ANY); + kibnal_set_mynid (PTL_NID_ANY); /* Wait for all peer state to clean up */ i = 2; - while (atomic_read (&koibnal_data.koib_npeers) != 0) { + while (atomic_read (&kibnal_data.kib_npeers) != 0) { i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ "waiting for %d peers to close down\n", - atomic_read (&koibnal_data.koib_npeers)); + atomic_read (&kibnal_data.kib_npeers)); set_current_state (TASK_INTERRUPTIBLE); schedule_timeout (HZ); } /* fall through */ - case OPENIBNAL_INIT_TX_CQ: - rc = ib_cq_destroy (koibnal_data.koib_tx_cq); - if (rc != 0) - CERROR ("Destroy tx CQ error: %d\n", rc); - /* fall through */ - - case OPENIBNAL_INIT_RX_CQ: - rc = ib_cq_destroy (koibnal_data.koib_rx_cq); + case IBNAL_INIT_CQ: + rc = ib_cq_destroy (kibnal_data.kib_cq); if (rc != 0) - CERROR ("Destroy rx CQ error: %d\n", rc); + CERROR ("Destroy CQ error: %d\n", rc); /* fall through */ - case OPENIBNAL_INIT_TXD: - koibnal_free_pages (koibnal_data.koib_tx_pages); + case IBNAL_INIT_TXD: + kibnal_free_pages (kibnal_data.kib_tx_pages); /* fall through */ -#if OPENIBNAL_FMR - case OPENIBNAL_INIT_FMR: - rc = ib_fmr_pool_destroy (koibnal_data.koib_fmr_pool); +#if IBNAL_FMR + case IBNAL_INIT_FMR: + rc = ib_fmr_pool_destroy (kibnal_data.kib_fmr_pool); if (rc != 0) CERROR ("Destroy FMR pool error: %d\n", rc); /* fall through */ #endif - case OPENIBNAL_INIT_PD: - rc = ib_pd_destroy(koibnal_data.koib_pd); + case IBNAL_INIT_PD: + rc = ib_pd_destroy(kibnal_data.kib_pd); if (rc != 0) CERROR ("Destroy PD error: %d\n", rc); /* fall through */ - case OPENIBNAL_INIT_LIB: - lib_fini(&koibnal_lib); + case IBNAL_INIT_LIB: + lib_fini(&kibnal_lib); /* fall through */ - case OPENIBNAL_INIT_DATA: + case IBNAL_INIT_DATA: /* Module refcount only gets to zero when all peers * have been closed so all lists must be empty */ - LASSERT (atomic_read (&koibnal_data.koib_npeers) == 0); - LASSERT (koibnal_data.koib_peers != NULL); - for (i = 0; i < koibnal_data.koib_peer_hash_size; i++) { - LASSERT (list_empty (&koibnal_data.koib_peers[i])); + LASSERT (atomic_read (&kibnal_data.kib_npeers) == 0); + LASSERT (kibnal_data.kib_peers != NULL); + for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { + LASSERT (list_empty (&kibnal_data.kib_peers[i])); } - LASSERT (atomic_read (&koibnal_data.koib_nconns) == 0); - LASSERT (list_empty (&koibnal_data.koib_sched_rxq)); - LASSERT (list_empty (&koibnal_data.koib_sched_txq)); - LASSERT (list_empty (&koibnal_data.koib_connd_conns)); - LASSERT (list_empty (&koibnal_data.koib_connd_peers)); + LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0); + LASSERT (list_empty (&kibnal_data.kib_sched_rxq)); + LASSERT (list_empty (&kibnal_data.kib_sched_txq)); + LASSERT (list_empty (&kibnal_data.kib_connd_conns)); + LASSERT (list_empty (&kibnal_data.kib_connd_peers)); /* flag threads to terminate; wake and wait for them to die */ - koibnal_data.koib_shutdown = 1; - wake_up_all (&koibnal_data.koib_sched_waitq); - wake_up_all (&koibnal_data.koib_connd_waitq); + kibnal_data.kib_shutdown = 1; + wake_up_all (&kibnal_data.kib_sched_waitq); + wake_up_all (&kibnal_data.kib_connd_waitq); i = 2; - while (atomic_read (&koibnal_data.koib_nthreads) != 0) { + while (atomic_read (&kibnal_data.kib_nthreads) != 0) { i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ "Waiting for %d threads to terminate\n", - atomic_read (&koibnal_data.koib_nthreads)); + atomic_read (&kibnal_data.kib_nthreads)); set_current_state (TASK_INTERRUPTIBLE); schedule_timeout (HZ); } /* fall through */ - case OPENIBNAL_INIT_NOTHING: + case IBNAL_INIT_NOTHING: break; } - if (koibnal_data.koib_tx_descs != NULL) - PORTAL_FREE (koibnal_data.koib_tx_descs, - OPENIBNAL_TX_MSGS * sizeof(koib_tx_t)); + if (kibnal_data.kib_tx_descs != NULL) + PORTAL_FREE (kibnal_data.kib_tx_descs, + IBNAL_TX_MSGS * sizeof(kib_tx_t)); - if (koibnal_data.koib_peers != NULL) - PORTAL_FREE (koibnal_data.koib_peers, + if (kibnal_data.kib_peers != NULL) + PORTAL_FREE (kibnal_data.kib_peers, sizeof (struct list_head) * - koibnal_data.koib_peer_hash_size); + kibnal_data.kib_peer_hash_size); CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", atomic_read (&portal_kmemory)); printk(KERN_INFO "Lustre: OpenIB NAL unloaded (final mem %d)\n", atomic_read(&portal_kmemory)); - koibnal_data.koib_init = OPENIBNAL_INIT_NOTHING; + kibnal_data.kib_init = IBNAL_INIT_NOTHING; } int -koibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, +kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, ptl_ni_limits_t *requested_limits, ptl_ni_limits_t *actual_limits) { @@ -1208,65 +1218,66 @@ koibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, int rc; int i; - LASSERT (nal == &koibnal_api); + LASSERT (nal == &kibnal_api); if (nal->nal_refct != 0) { if (actual_limits != NULL) - *actual_limits = koibnal_lib.libnal_ni.ni_actual_limits; + *actual_limits = kibnal_lib.libnal_ni.ni_actual_limits; /* This module got the first ref */ PORTAL_MODULE_USE; return (PTL_OK); } - LASSERT (koibnal_data.koib_init == OPENIBNAL_INIT_NOTHING); + LASSERT (kibnal_data.kib_init == IBNAL_INIT_NOTHING); - memset (&koibnal_data, 0, sizeof (koibnal_data)); /* zero pointers, flags etc */ + memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */ - init_MUTEX (&koibnal_data.koib_nid_mutex); - init_MUTEX_LOCKED (&koibnal_data.koib_nid_signal); - koibnal_data.koib_nid = PTL_NID_ANY; + init_MUTEX (&kibnal_data.kib_nid_mutex); + init_MUTEX_LOCKED (&kibnal_data.kib_nid_signal); + kibnal_data.kib_nid = PTL_NID_ANY; - rwlock_init(&koibnal_data.koib_global_lock); + rwlock_init(&kibnal_data.kib_global_lock); - koibnal_data.koib_peer_hash_size = OPENIBNAL_PEER_HASH_SIZE; - PORTAL_ALLOC (koibnal_data.koib_peers, - sizeof (struct list_head) * koibnal_data.koib_peer_hash_size); - if (koibnal_data.koib_peers == NULL) { + kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE; + PORTAL_ALLOC (kibnal_data.kib_peers, + sizeof (struct list_head) * kibnal_data.kib_peer_hash_size); + if (kibnal_data.kib_peers == NULL) { goto failed; } - for (i = 0; i < koibnal_data.koib_peer_hash_size; i++) - INIT_LIST_HEAD(&koibnal_data.koib_peers[i]); - - spin_lock_init (&koibnal_data.koib_connd_lock); - INIT_LIST_HEAD (&koibnal_data.koib_connd_peers); - INIT_LIST_HEAD (&koibnal_data.koib_connd_conns); - init_waitqueue_head (&koibnal_data.koib_connd_waitq); - - spin_lock_init (&koibnal_data.koib_sched_lock); - INIT_LIST_HEAD (&koibnal_data.koib_sched_txq); - INIT_LIST_HEAD (&koibnal_data.koib_sched_rxq); - init_waitqueue_head (&koibnal_data.koib_sched_waitq); - - spin_lock_init (&koibnal_data.koib_tx_lock); - INIT_LIST_HEAD (&koibnal_data.koib_idle_txs); - INIT_LIST_HEAD (&koibnal_data.koib_idle_nblk_txs); - init_waitqueue_head(&koibnal_data.koib_idle_tx_waitq); - - PORTAL_ALLOC (koibnal_data.koib_tx_descs, - OPENIBNAL_TX_MSGS * sizeof(koib_tx_t)); - if (koibnal_data.koib_tx_descs == NULL) { + for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) + INIT_LIST_HEAD(&kibnal_data.kib_peers[i]); + + spin_lock_init (&kibnal_data.kib_connd_lock); + INIT_LIST_HEAD (&kibnal_data.kib_connd_peers); + INIT_LIST_HEAD (&kibnal_data.kib_connd_conns); + init_waitqueue_head (&kibnal_data.kib_connd_waitq); + + spin_lock_init (&kibnal_data.kib_sched_lock); + INIT_LIST_HEAD (&kibnal_data.kib_sched_txq); + INIT_LIST_HEAD (&kibnal_data.kib_sched_rxq); + init_waitqueue_head (&kibnal_data.kib_sched_waitq); + + spin_lock_init (&kibnal_data.kib_tx_lock); + INIT_LIST_HEAD (&kibnal_data.kib_idle_txs); + INIT_LIST_HEAD (&kibnal_data.kib_idle_nblk_txs); + init_waitqueue_head(&kibnal_data.kib_idle_tx_waitq); + + PORTAL_ALLOC (kibnal_data.kib_tx_descs, + IBNAL_TX_MSGS * sizeof(kib_tx_t)); + if (kibnal_data.kib_tx_descs == NULL) { CERROR ("Can't allocate tx descs\n"); goto failed; } /* lists/ptrs/locks initialised */ - koibnal_data.koib_init = OPENIBNAL_INIT_DATA; + kibnal_data.kib_init = IBNAL_INIT_DATA; /*****************************************************/ + process_id.pid = requested_pid; - process_id.nid = koibnal_data.koib_nid; + process_id.nid = kibnal_data.kib_nid; - rc = lib_init(&koibnal_lib, nal, process_id, + rc = lib_init(&kibnal_lib, nal, process_id, requested_limits, actual_limits); if (rc != PTL_OK) { CERROR("lib_init failed: error %d\n", rc); @@ -1274,11 +1285,11 @@ koibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, } /* lib interface initialised */ - koibnal_data.koib_init = OPENIBNAL_INIT_LIB; + kibnal_data.kib_init = IBNAL_INIT_LIB; /*****************************************************/ - for (i = 0; i < OPENIBNAL_N_SCHED; i++) { - rc = koibnal_thread_start (koibnal_scheduler, (void *)i); + for (i = 0; i < IBNAL_N_SCHED; i++) { + rc = kibnal_thread_start (kibnal_scheduler, (void *)i); if (rc != 0) { CERROR("Can't spawn openibnal scheduler[%d]: %d\n", i, rc); @@ -1286,56 +1297,56 @@ koibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, } } - rc = koibnal_thread_start (koibnal_connd, NULL); + rc = kibnal_thread_start (kibnal_connd, NULL); if (rc != 0) { CERROR ("Can't spawn openibnal connd: %d\n", rc); goto failed; } - koibnal_data.koib_device = ib_device_get_by_index(0); - if (koibnal_data.koib_device == NULL) { + kibnal_data.kib_device = ib_device_get_by_index(0); + if (kibnal_data.kib_device == NULL) { CERROR ("Can't open ib device 0\n"); goto failed; } - rc = ib_device_properties_get(koibnal_data.koib_device, - &koibnal_data.koib_device_props); + rc = ib_device_properties_get(kibnal_data.kib_device, + &kibnal_data.kib_device_props); if (rc != 0) { CERROR ("Can't get device props: %d\n", rc); goto failed; } CDEBUG(D_NET, "Max Initiator: %d Max Responder %d\n", - koibnal_data.koib_device_props.max_initiator_per_qp, - koibnal_data.koib_device_props.max_responder_per_qp); + kibnal_data.kib_device_props.max_initiator_per_qp, + kibnal_data.kib_device_props.max_responder_per_qp); - koibnal_data.koib_port = 0; + kibnal_data.kib_port = 0; for (i = 1; i <= 2; i++) { - rc = ib_port_properties_get(koibnal_data.koib_device, i, - &koibnal_data.koib_port_props); + rc = ib_port_properties_get(kibnal_data.kib_device, i, + &kibnal_data.kib_port_props); if (rc == 0) { - koibnal_data.koib_port = i; + kibnal_data.kib_port = i; break; } } - if (koibnal_data.koib_port == 0) { + if (kibnal_data.kib_port == 0) { CERROR ("Can't find a port\n"); goto failed; } - rc = ib_pd_create(koibnal_data.koib_device, - NULL, &koibnal_data.koib_pd); + rc = ib_pd_create(kibnal_data.kib_device, + NULL, &kibnal_data.kib_pd); if (rc != 0) { CERROR ("Can't create PD: %d\n", rc); goto failed; } /* flag PD initialised */ - koibnal_data.koib_init = OPENIBNAL_INIT_PD; + kibnal_data.kib_init = IBNAL_INIT_PD; /*****************************************************/ -#if OPENIBNAL_FMR +#if IBNAL_FMR { - const int pool_size = OPENIBNAL_NTX + OPENIBNAL_NTX_NBLK; + const int pool_size = IBNAL_NTX + IBNAL_NTX_NBLK; struct ib_fmr_pool_param params = { .max_pages_per_fmr = PTL_MTU/PAGE_SIZE, .access = (IB_ACCESS_LOCAL_WRITE | @@ -1347,8 +1358,8 @@ koibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, .flush_arg = NULL, .cache = 1, }; - rc = ib_fmr_pool_create(koibnal_data.koib_pd, ¶ms, - &koibnal_data.koib_fmr_pool); + rc = ib_fmr_pool_create(kibnal_data.kib_pd, ¶ms, + &kibnal_data.kib_fmr_pool); if (rc != 0) { CERROR ("Can't create FMR pool size %d: %d\n", pool_size, rc); @@ -1357,84 +1368,56 @@ koibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, } /* flag FMR pool initialised */ - koibnal_data.koib_init = OPENIBNAL_INIT_FMR; + kibnal_data.kib_init = IBNAL_INIT_FMR; #endif /*****************************************************/ - rc = koibnal_setup_tx_descs(); + rc = kibnal_setup_tx_descs(); if (rc != 0) { CERROR ("Can't register tx descs: %d\n", rc); goto failed; } /* flag TX descs initialised */ - koibnal_data.koib_init = OPENIBNAL_INIT_TXD; + kibnal_data.kib_init = IBNAL_INIT_TXD; /*****************************************************/ { struct ib_cq_callback callback = { - .context = OPENIBNAL_CALLBACK_CTXT, + .context = IBNAL_CALLBACK_CTXT, .policy = IB_CQ_PROVIDER_REARM, .function = { - .entry = koibnal_rx_callback, + .entry = kibnal_callback, }, .arg = NULL, }; - int nentries = OPENIBNAL_RX_CQ_ENTRIES; + int nentries = IBNAL_CQ_ENTRIES; - rc = ib_cq_create (koibnal_data.koib_device, + rc = ib_cq_create (kibnal_data.kib_device, &nentries, &callback, NULL, - &koibnal_data.koib_rx_cq); + &kibnal_data.kib_cq); if (rc != 0) { - CERROR ("Can't create RX CQ: %d\n", rc); + CERROR ("Can't create CQ: %d\n", rc); goto failed; } /* I only want solicited events */ - rc = ib_cq_request_notification(koibnal_data.koib_rx_cq, 1); + rc = ib_cq_request_notification(kibnal_data.kib_cq, 1); LASSERT (rc == 0); } - /* flag RX CQ initialised */ - koibnal_data.koib_init = OPENIBNAL_INIT_RX_CQ; - /*****************************************************/ - - { - struct ib_cq_callback callback = { - .context = OPENIBNAL_CALLBACK_CTXT, - .policy = IB_CQ_PROVIDER_REARM, - .function = { - .entry = koibnal_tx_callback, - }, - .arg = NULL, - }; - int nentries = OPENIBNAL_TX_CQ_ENTRIES; - - rc = ib_cq_create (koibnal_data.koib_device, - &nentries, &callback, NULL, - &koibnal_data.koib_tx_cq); - if (rc != 0) { - CERROR ("Can't create RX CQ: %d\n", rc); - goto failed; - } - - /* I only want solicited events */ - rc = ib_cq_request_notification(koibnal_data.koib_tx_cq, 1); - LASSERT (rc == 0); - } - - /* flag TX CQ initialised */ - koibnal_data.koib_init = OPENIBNAL_INIT_TX_CQ; + /* flag CQ initialised */ + kibnal_data.kib_init = IBNAL_INIT_CQ; /*****************************************************/ - rc = libcfs_nal_cmd_register(OPENIBNAL, &koibnal_cmd, NULL); + rc = libcfs_nal_cmd_register(OPENIBNAL, &kibnal_cmd, NULL); if (rc != 0) { CERROR ("Can't initialise command interface (rc = %d)\n", rc); goto failed; } /* flag everything initialised */ - koibnal_data.koib_init = OPENIBNAL_INIT_ALL; + kibnal_data.kib_init = IBNAL_INIT_ALL; /*****************************************************/ printk(KERN_INFO "Lustre: OpenIB NAL loaded " @@ -1443,44 +1426,44 @@ koibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, return (PTL_OK); failed: - koibnal_api_shutdown (&koibnal_api); + kibnal_api_shutdown (&kibnal_api); return (PTL_FAIL); } void __exit -koibnal_module_fini (void) +kibnal_module_fini (void) { #ifdef CONFIG_SYSCTL - if (koibnal_tunables.koib_sysctl != NULL) - unregister_sysctl_table (koibnal_tunables.koib_sysctl); + if (kibnal_tunables.kib_sysctl != NULL) + unregister_sysctl_table (kibnal_tunables.kib_sysctl); #endif - PtlNIFini(koibnal_ni); + PtlNIFini(kibnal_ni); ptl_unregister_nal(OPENIBNAL); } int __init -koibnal_module_init (void) +kibnal_module_init (void) { int rc; /* the following must be sizeof(int) for proc_dointvec() */ - LASSERT(sizeof (koibnal_tunables.koib_io_timeout) == sizeof (int)); + LASSERT(sizeof (kibnal_tunables.kib_io_timeout) == sizeof (int)); - koibnal_api.nal_ni_init = koibnal_api_startup; - koibnal_api.nal_ni_fini = koibnal_api_shutdown; + kibnal_api.nal_ni_init = kibnal_api_startup; + kibnal_api.nal_ni_fini = kibnal_api_shutdown; /* Initialise dynamic tunables to defaults once only */ - koibnal_tunables.koib_io_timeout = OPENIBNAL_IO_TIMEOUT; + kibnal_tunables.kib_io_timeout = IBNAL_IO_TIMEOUT; - rc = ptl_register_nal(OPENIBNAL, &koibnal_api); + rc = ptl_register_nal(OPENIBNAL, &kibnal_api); if (rc != PTL_OK) { - CERROR("Can't register OPENIBNAL: %d\n", rc); + CERROR("Can't register IBNAL: %d\n", rc); return (-ENOMEM); /* or something... */ } /* Pure gateways want the NAL started up at module load time... */ - rc = PtlNIInit(OPENIBNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &koibnal_ni); + rc = PtlNIInit(OPENIBNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kibnal_ni); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { ptl_unregister_nal(OPENIBNAL); return (-ENODEV); @@ -1488,8 +1471,8 @@ koibnal_module_init (void) #ifdef CONFIG_SYSCTL /* Press on regardless even if registering sysctl doesn't work */ - koibnal_tunables.koib_sysctl = - register_sysctl_table (koibnal_top_ctl_table, 0); + kibnal_tunables.kib_sysctl = + register_sysctl_table (kibnal_top_ctl_table, 0); #endif return (0); } @@ -1498,6 +1481,6 @@ MODULE_AUTHOR("Cluster File Systems, Inc. "); MODULE_DESCRIPTION("Kernel OpenIB NAL v0.01"); MODULE_LICENSE("GPL"); -module_init(koibnal_module_init); -module_exit(koibnal_module_fini); +module_init(kibnal_module_init); +module_exit(kibnal_module_fini); diff --git a/lnet/klnds/openiblnd/openiblnd.h b/lnet/klnds/openiblnd/openiblnd.h index 301d3ae..f0610f2 100644 --- a/lnet/klnds/openiblnd/openiblnd.h +++ b/lnet/klnds/openiblnd/openiblnd.h @@ -48,7 +48,7 @@ #include #include -#define DEBUG_SUBSYSTEM S_OPENIBNAL +#define DEBUG_SUBSYSTEM S_IBNAL #include #include @@ -59,144 +59,140 @@ #include #include -#define OPENIBNAL_SERVICE_NAME "openibnal" +#define IBNAL_SERVICE_NAME "openibnal" #if CONFIG_SMP -# define OPENIBNAL_N_SCHED num_online_cpus() /* # schedulers */ +# define IBNAL_N_SCHED num_online_cpus() /* # schedulers */ #else -# define OPENIBNAL_N_SCHED 1 /* # schedulers */ +# define IBNAL_N_SCHED 1 /* # schedulers */ #endif -#define OPENIBNAL_MIN_RECONNECT_INTERVAL HZ /* first failed connection retry... */ -#define OPENIBNAL_MAX_RECONNECT_INTERVAL (60*HZ) /* ...exponentially increasing to this */ +#define IBNAL_MIN_RECONNECT_INTERVAL HZ /* first failed connection retry... */ +#define IBNAL_MAX_RECONNECT_INTERVAL (60*HZ) /* ...exponentially increasing to this */ -#define OPENIBNAL_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */ +#define IBNAL_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */ -#define OPENIBNAL_MSG_QUEUE_SIZE 8 /* # messages in-flight */ -#define OPENIBNAL_CREDIT_HIGHWATER 6 /* when to eagerly return credits */ -#define OPENIBNAL_RETRY 7 /* # times to retry */ -#define OPENIBNAL_RNR_RETRY 7 /* */ -#define OPENIBNAL_CM_RETRY 7 /* # times to retry connection */ -#define OPENIBNAL_FLOW_CONTROL 1 -#define OPENIBNAL_RESPONDER_RESOURCES 8 +#define IBNAL_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */ +#define IBNAL_CREDIT_HIGHWATER 6 /* when to eagerly return credits */ +#define IBNAL_RETRY 7 /* # times to retry */ +#define IBNAL_RNR_RETRY 7 /* */ +#define IBNAL_CM_RETRY 7 /* # times to retry connection */ +#define IBNAL_FLOW_CONTROL 1 +#define IBNAL_RESPONDER_RESOURCES 8 -#define OPENIBNAL_NTX 64 /* # tx descs */ -#define OPENIBNAL_NTX_NBLK 256 /* # reserved tx descs */ +#define IBNAL_NTX 64 /* # tx descs */ +#define IBNAL_NTX_NBLK 256 /* # reserved tx descs */ -#define OPENIBNAL_PEER_HASH_SIZE 101 /* # peer lists */ +#define IBNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define OPENIBNAL_RESCHED 100 /* # scheduler loops before reschedule */ +#define IBNAL_RESCHED 100 /* # scheduler loops before reschedule */ -#define OPENIBNAL_CONCURRENT_PEERS 1000 /* # nodes all talking at once to me */ +#define IBNAL_CONCURRENT_PEERS 1000 /* # nodes all talking at once to me */ /* default vals for runtime tunables */ -#define OPENIBNAL_IO_TIMEOUT 50 /* default comms timeout (seconds) */ +#define IBNAL_IO_TIMEOUT 50 /* default comms timeout (seconds) */ /************************/ /* derived constants... */ /* TX messages (shared by all connections) */ -#define OPENIBNAL_TX_MSGS (OPENIBNAL_NTX + OPENIBNAL_NTX_NBLK) -#define OPENIBNAL_TX_MSG_BYTES (OPENIBNAL_TX_MSGS * OPENIBNAL_MSG_SIZE) -#define OPENIBNAL_TX_MSG_PAGES ((OPENIBNAL_TX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE) - -/* we may have up to 2 completions per transmit */ -#define OPENIBNAL_TX_CQ_ENTRIES (2*OPENIBNAL_TX_MSGS) +#define IBNAL_TX_MSGS (IBNAL_NTX + IBNAL_NTX_NBLK) +#define IBNAL_TX_MSG_BYTES (IBNAL_TX_MSGS * IBNAL_MSG_SIZE) +#define IBNAL_TX_MSG_PAGES ((IBNAL_TX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE) /* RX messages (per connection) */ -#define OPENIBNAL_RX_MSGS OPENIBNAL_MSG_QUEUE_SIZE -#define OPENIBNAL_RX_MSG_BYTES (OPENIBNAL_RX_MSGS * OPENIBNAL_MSG_SIZE) -#define OPENIBNAL_RX_MSG_PAGES ((OPENIBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE) +#define IBNAL_RX_MSGS IBNAL_MSG_QUEUE_SIZE +#define IBNAL_RX_MSG_BYTES (IBNAL_RX_MSGS * IBNAL_MSG_SIZE) +#define IBNAL_RX_MSG_PAGES ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE) -/* 1 completion per receive, per connection */ -#define OPENIBNAL_RX_CQ_ENTRIES (OPENIBNAL_RX_MSGS * OPENIBNAL_CONCURRENT_PEERS) +/* we may have up to 2 completions per transmit + + 1 completion per receive, per connection */ +#define IBNAL_CQ_ENTRIES ((2*IBNAL_TX_MSGS) + \ + (IBNAL_RX_MSGS * IBNAL_CONCURRENT_PEERS)) -#define OPENIBNAL_RDMA_BASE 0x0eeb0000 -#define OPENIBNAL_FMR 1 -#define OPENIBNAL_CKSUM 0 -//#define OPENIBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_PROCESS -#define OPENIBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_INTERRUPT +#define IBNAL_RDMA_BASE 0x0eeb0000 +#define IBNAL_FMR 1 +#define IBNAL_CKSUM 0 +//#define IBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_PROCESS +#define IBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_INTERRUPT typedef struct { - int koib_io_timeout; /* comms timeout (seconds) */ - struct ctl_table_header *koib_sysctl; /* sysctl interface */ -} koib_tunables_t; + int kib_io_timeout; /* comms timeout (seconds) */ + struct ctl_table_header *kib_sysctl; /* sysctl interface */ +} kib_tunables_t; typedef struct { - int oibp_npages; /* # pages */ - int oibp_mapped; /* mapped? */ - __u64 oibp_vaddr; /* mapped region vaddr */ - __u32 oibp_lkey; /* mapped region lkey */ - __u32 oibp_rkey; /* mapped region rkey */ - struct ib_mr *oibp_handle; /* mapped region handle */ - struct page *oibp_pages[0]; -} koib_pages_t; + int ibp_npages; /* # pages */ + int ibp_mapped; /* mapped? */ + __u64 ibp_vaddr; /* mapped region vaddr */ + __u32 ibp_lkey; /* mapped region lkey */ + __u32 ibp_rkey; /* mapped region rkey */ + struct ib_mr *ibp_handle; /* mapped region handle */ + struct page *ibp_pages[0]; +} kib_pages_t; typedef struct { - int koib_init; /* initialisation state */ - __u64 koib_incarnation; /* which one am I */ - int koib_shutdown; /* shut down? */ - atomic_t koib_nthreads; /* # live threads */ - - __u64 koib_cm_service_id; /* service number I listen on */ - ptl_nid_t koib_nid; /* my NID */ - struct semaphore koib_nid_mutex; /* serialise NID ops */ - struct semaphore koib_nid_signal; /* signal completion */ - - rwlock_t koib_global_lock; /* stabilize peer/conn ops */ - - struct list_head *koib_peers; /* hash table of all my known peers */ - int koib_peer_hash_size; /* size of koib_peers */ - atomic_t koib_npeers; /* # peers extant */ - atomic_t koib_nconns; /* # connections extant */ - - struct list_head koib_connd_conns; /* connections to progress */ - struct list_head koib_connd_peers; /* peers waiting for a connection */ - wait_queue_head_t koib_connd_waitq; /* connection daemons sleep here */ - unsigned long koib_connd_waketime; /* when connd will wake */ - spinlock_t koib_connd_lock; /* serialise */ - - wait_queue_head_t koib_sched_waitq; /* schedulers sleep here */ - struct list_head koib_sched_txq; /* tx requiring attention */ - struct list_head koib_sched_rxq; /* rx requiring attention */ - spinlock_t koib_sched_lock; /* serialise */ + int kib_init; /* initialisation state */ + __u64 kib_incarnation; /* which one am I */ + int kib_shutdown; /* shut down? */ + atomic_t kib_nthreads; /* # live threads */ + + __u64 kib_service_id; /* service number I listen on */ + ptl_nid_t kib_nid; /* my NID */ + struct semaphore kib_nid_mutex; /* serialise NID ops */ + struct semaphore kib_nid_signal; /* signal completion */ + + rwlock_t kib_global_lock; /* stabilize peer/conn ops */ + + struct list_head *kib_peers; /* hash table of all my known peers */ + int kib_peer_hash_size; /* size of kib_peers */ + atomic_t kib_npeers; /* # peers extant */ + atomic_t kib_nconns; /* # connections extant */ + + struct list_head kib_connd_conns; /* connections to progress */ + struct list_head kib_connd_peers; /* peers waiting for a connection */ + wait_queue_head_t kib_connd_waitq; /* connection daemons sleep here */ + unsigned long kib_connd_waketime; /* when connd will wake */ + spinlock_t kib_connd_lock; /* serialise */ + + wait_queue_head_t kib_sched_waitq; /* schedulers sleep here */ + struct list_head kib_sched_txq; /* tx requiring attention */ + struct list_head kib_sched_rxq; /* rx requiring attention */ + spinlock_t kib_sched_lock; /* serialise */ - struct koib_tx *koib_tx_descs; /* all the tx descriptors */ - koib_pages_t *koib_tx_pages; /* premapped tx msg pages */ - - struct list_head koib_idle_txs; /* idle tx descriptors */ - struct list_head koib_idle_nblk_txs; /* idle reserved tx descriptors */ - wait_queue_head_t koib_idle_tx_waitq; /* block here for tx descriptor */ - __u64 koib_next_tx_cookie; /* RDMA completion cookie */ - spinlock_t koib_tx_lock; /* serialise */ + struct kib_tx *kib_tx_descs; /* all the tx descriptors */ + kib_pages_t *kib_tx_pages; /* premapped tx msg pages */ + + struct list_head kib_idle_txs; /* idle tx descriptors */ + struct list_head kib_idle_nblk_txs; /* idle reserved tx descriptors */ + wait_queue_head_t kib_idle_tx_waitq; /* block here for tx descriptor */ + __u64 kib_next_tx_cookie; /* RDMA completion cookie */ + spinlock_t kib_tx_lock; /* serialise */ - struct ib_device *koib_device; /* "the" device */ - struct ib_device_properties koib_device_props; /* its properties */ - int koib_port; /* port on the device */ - struct ib_port_properties koib_port_props; /* its properties */ - struct ib_pd *koib_pd; /* protection domain */ -#if OPENIBNAL_FMR - struct ib_fmr_pool *koib_fmr_pool; /* fast memory region pool */ + struct ib_device *kib_device; /* "the" device */ + struct ib_device_properties kib_device_props; /* its properties */ + int kib_port; /* port on the device */ + struct ib_port_properties kib_port_props; /* its properties */ + struct ib_pd *kib_pd; /* protection domain */ +#if IBNAL_FMR + struct ib_fmr_pool *kib_fmr_pool; /* fast memory region pool */ #endif - struct ib_cq *koib_rx_cq; /* receive completion queue */ - struct ib_cq *koib_tx_cq; /* transmit completion queue */ - void *koib_listen_handle; /* where I listen for connections */ - struct ib_common_attrib_service koib_service; /* SM service */ + struct ib_cq *kib_cq; /* completion queue */ + void *kib_listen_handle; /* where I listen for connections */ -} koib_data_t; - -#define OPENIBNAL_INIT_NOTHING 0 -#define OPENIBNAL_INIT_DATA 1 -#define OPENIBNAL_INIT_LIB 2 -#define OPENIBNAL_INIT_PD 3 -#define OPENIBNAL_INIT_FMR 4 -#define OPENIBNAL_INIT_TXD 5 -#define OPENIBNAL_INIT_RX_CQ 6 -#define OPENIBNAL_INIT_TX_CQ 7 -#define OPENIBNAL_INIT_ALL 8 +} kib_data_t; + +#define IBNAL_INIT_NOTHING 0 +#define IBNAL_INIT_DATA 1 +#define IBNAL_INIT_LIB 2 +#define IBNAL_INIT_PD 3 +#define IBNAL_INIT_FMR 4 +#define IBNAL_INIT_TXD 5 +#define IBNAL_INIT_CQ 6 +#define IBNAL_INIT_ALL 7 /************************************************************************ * Wire message structs. @@ -214,125 +210,125 @@ typedef struct __u32 md_lkey; __u32 md_rkey; __u64 md_addr; -} koib_md_t; +} kib_md_t; typedef struct { __u32 rd_key; /* remote key */ __u32 rd_nob; /* # of bytes */ __u64 rd_addr; /* remote io vaddr */ -} koib_rdma_desc_t; +} kib_rdma_desc_t; typedef struct { - ptl_hdr_t oibim_hdr; /* portals header */ - char oibim_payload[0]; /* piggy-backed payload */ -} koib_immediate_msg_t; + ptl_hdr_t ibim_hdr; /* portals header */ + char ibim_payload[0]; /* piggy-backed payload */ +} kib_immediate_msg_t; typedef struct { - ptl_hdr_t oibrm_hdr; /* portals header */ - __u64 oibrm_cookie; /* opaque completion cookie */ - koib_rdma_desc_t oibrm_desc; /* where to suck/blow */ -} koib_rdma_msg_t; + ptl_hdr_t ibrm_hdr; /* portals header */ + __u64 ibrm_cookie; /* opaque completion cookie */ + kib_rdma_desc_t ibrm_desc; /* where to suck/blow */ +} kib_rdma_msg_t; typedef struct { - __u64 oibcm_cookie; /* opaque completion cookie */ - __u32 oibcm_status; /* completion status */ -} koib_completion_msg_t; + __u64 ibcm_cookie; /* opaque completion cookie */ + __u32 ibcm_status; /* completion status */ +} kib_completion_msg_t; typedef struct { - __u32 oibm_magic; /* I'm an openibnal message */ - __u16 oibm_version; /* this is my version number */ - __u8 oibm_type; /* msg type */ - __u8 oibm_credits; /* returned credits */ -#if OPENIBNAL_CKSUM - __u32 oibm_nob; - __u32 oibm_cksum; + __u32 ibm_magic; /* I'm an openibnal message */ + __u16 ibm_version; /* this is my version number */ + __u8 ibm_type; /* msg type */ + __u8 ibm_credits; /* returned credits */ +#if IBNAL_CKSUM + __u32 ibm_nob; + __u32 ibm_cksum; #endif union { - koib_immediate_msg_t immediate; - koib_rdma_msg_t rdma; - koib_completion_msg_t completion; - } oibm_u; -} koib_msg_t; - -#define OPENIBNAL_MSG_MAGIC 0x0be91b91 /* unique magic */ -#define OPENIBNAL_MSG_VERSION 1 /* current protocol version */ - -#define OPENIBNAL_MSG_NOOP 0xd0 /* nothing (just credits) */ -#define OPENIBNAL_MSG_IMMEDIATE 0xd1 /* portals hdr + payload */ -#define OPENIBNAL_MSG_PUT_RDMA 0xd2 /* portals PUT hdr + source rdma desc */ -#define OPENIBNAL_MSG_PUT_DONE 0xd3 /* signal PUT rdma completion */ -#define OPENIBNAL_MSG_GET_RDMA 0xd4 /* portals GET hdr + sink rdma desc */ -#define OPENIBNAL_MSG_GET_DONE 0xd5 /* signal GET rdma completion */ + kib_immediate_msg_t immediate; + kib_rdma_msg_t rdma; + kib_completion_msg_t completion; + } ibm_u; +} kib_msg_t; + +#define IBNAL_MSG_MAGIC 0x0be91b91 /* unique magic */ +#define IBNAL_MSG_VERSION 1 /* current protocol version */ + +#define IBNAL_MSG_NOOP 0xd0 /* nothing (just credits) */ +#define IBNAL_MSG_IMMEDIATE 0xd1 /* portals hdr + payload */ +#define IBNAL_MSG_PUT_RDMA 0xd2 /* portals PUT hdr + source rdma desc */ +#define IBNAL_MSG_PUT_DONE 0xd3 /* signal PUT rdma completion */ +#define IBNAL_MSG_GET_RDMA 0xd4 /* portals GET hdr + sink rdma desc */ +#define IBNAL_MSG_GET_DONE 0xd5 /* signal GET rdma completion */ /***********************************************************************/ -typedef struct koib_rx /* receive message */ +typedef struct kib_rx /* receive message */ { struct list_head rx_list; /* queue for attention */ - struct koib_conn *rx_conn; /* owning conn */ + struct kib_conn *rx_conn; /* owning conn */ int rx_rdma; /* RDMA completion posted? */ int rx_posted; /* posted? */ __u64 rx_vaddr; /* pre-mapped buffer (hca vaddr) */ - koib_msg_t *rx_msg; /* pre-mapped buffer (host vaddr) */ + kib_msg_t *rx_msg; /* pre-mapped buffer (host vaddr) */ struct ib_receive_param rx_sp; /* receive work item */ struct ib_gather_scatter rx_gl; /* and it's memory */ -} koib_rx_t; +} kib_rx_t; -typedef struct koib_tx /* transmit message */ +typedef struct kib_tx /* transmit message */ { struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */ int tx_isnblk; /* I'm reserved for non-blocking sends */ - struct koib_conn *tx_conn; /* owning conn */ + struct kib_conn *tx_conn; /* owning conn */ int tx_mapped; /* mapped for RDMA? */ int tx_sending; /* # tx callbacks outstanding */ int tx_status; /* completion status */ - int tx_passive_rdma; /* waiting for peer to RDMA? */ - int tx_passive_rdma_wait; /* on ibc_rdma_queue */ - unsigned long tx_passive_rdma_deadline; /* completion deadline */ + unsigned long tx_deadline; /* completion deadline */ + int tx_passive_rdma; /* peer sucks/blows */ + int tx_passive_rdma_wait; /* waiting for peer to complete */ __u64 tx_passive_rdma_cookie; /* completion cookie */ lib_msg_t *tx_libmsg[2]; /* lib msgs to finalize on completion */ - koib_md_t tx_md; /* RDMA mapping (active/passive) */ + kib_md_t tx_md; /* RDMA mapping (active/passive) */ __u64 tx_vaddr; /* pre-mapped buffer (hca vaddr) */ - koib_msg_t *tx_msg; /* pre-mapped buffer (host vaddr) */ + kib_msg_t *tx_msg; /* pre-mapped buffer (host vaddr) */ int tx_nsp; /* # send work items */ struct ib_send_param tx_sp[2]; /* send work items... */ struct ib_gather_scatter tx_gl[2]; /* ...and their memory */ -} koib_tx_t; +} kib_tx_t; -#define KOIB_TX_UNMAPPED 0 -#define KOIB_TX_MAPPED 1 -#define KOIB_TX_MAPPED_FMR 2 +#define KIB_TX_UNMAPPED 0 +#define KIB_TX_MAPPED 1 +#define KIB_TX_MAPPED_FMR 2 -typedef struct koib_wire_connreq +typedef struct kib_wire_connreq { __u32 wcr_magic; /* I'm an openibnal connreq */ __u16 wcr_version; /* this is my version number */ __u16 wcr_queue_depth; /* this is my receive queue size */ __u64 wcr_nid; /* peer's NID */ __u64 wcr_incarnation; /* peer's incarnation */ -} koib_wire_connreq_t; +} kib_wire_connreq_t; -typedef struct koib_connreq +typedef struct kib_connreq { /* connection-in-progress */ - struct koib_conn *cr_conn; - koib_wire_connreq_t cr_wcr; + struct kib_conn *cr_conn; + kib_wire_connreq_t cr_wcr; __u64 cr_tid; struct ib_common_attrib_service cr_service; tTS_IB_GID cr_gid; struct ib_path_record cr_path; struct ib_cm_active_param cr_connparam; -} koib_connreq_t; +} kib_connreq_t; -typedef struct koib_conn +typedef struct kib_conn { - struct koib_peer *ibc_peer; /* owning peer */ + struct kib_peer *ibc_peer; /* owning peer */ struct list_head ibc_list; /* stash on peer's conn list */ __u64 ibc_incarnation; /* which instance of the peer */ atomic_t ibc_refcount; /* # users */ @@ -342,27 +338,27 @@ typedef struct koib_conn int ibc_credits; /* # credits I have */ int ibc_outstanding_credits; /* # credits to return */ struct list_head ibc_tx_queue; /* send queue */ - struct list_head ibc_rdma_queue; /* tx awaiting RDMA completion */ + struct list_head ibc_active_txs; /* active tx awaiting completion */ spinlock_t ibc_lock; /* serialise */ - koib_rx_t *ibc_rxs; /* the rx descs */ - koib_pages_t *ibc_rx_pages; /* premapped rx msg pages */ + kib_rx_t *ibc_rxs; /* the rx descs */ + kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */ struct ib_qp *ibc_qp; /* queue pair */ __u32 ibc_qpn; /* queue pair number */ tTS_IB_CM_COMM_ID ibc_comm_id; /* connection ID? */ - koib_connreq_t *ibc_connreq; /* connection request state */ -} koib_conn_t; + kib_connreq_t *ibc_connreq; /* connection request state */ +} kib_conn_t; -#define OPENIBNAL_CONN_INIT_NOTHING 0 /* initial state */ -#define OPENIBNAL_CONN_INIT_QP 1 /* ibc_qp set up */ -#define OPENIBNAL_CONN_CONNECTING 2 /* started to connect */ -#define OPENIBNAL_CONN_ESTABLISHED 3 /* connection established */ -#define OPENIBNAL_CONN_DEATHROW 4 /* waiting to be closed */ -#define OPENIBNAL_CONN_ZOMBIE 5 /* waiting to be freed */ +#define IBNAL_CONN_INIT_NOTHING 0 /* initial state */ +#define IBNAL_CONN_INIT_QP 1 /* ibc_qp set up */ +#define IBNAL_CONN_CONNECTING 2 /* started to connect */ +#define IBNAL_CONN_ESTABLISHED 3 /* connection established */ +#define IBNAL_CONN_DEATHROW 4 /* waiting to be closed */ +#define IBNAL_CONN_ZOMBIE 5 /* waiting to be freed */ -typedef struct koib_peer +typedef struct kib_peer { struct list_head ibp_list; /* stash on global peer list */ - struct list_head ibp_connd_list; /* schedule on koib_connd_peers */ + struct list_head ibp_connd_list; /* schedule on kib_connd_peers */ ptl_nid_t ibp_nid; /* who's on the other end(s) */ atomic_t ibp_refcount; /* # users */ int ibp_persistence; /* "known" peer refs */ @@ -371,30 +367,30 @@ typedef struct koib_peer int ibp_connecting; /* connecting+accepting */ unsigned long ibp_reconnect_time; /* when reconnect may be attempted */ unsigned long ibp_reconnect_interval; /* exponential backoff */ -} koib_peer_t; +} kib_peer_t; -extern lib_nal_t koibnal_lib; -extern koib_data_t koibnal_data; -extern koib_tunables_t koibnal_tunables; +extern lib_nal_t kibnal_lib; +extern kib_data_t kibnal_data; +extern kib_tunables_t kibnal_tunables; static inline struct list_head * -koibnal_nid2peerlist (ptl_nid_t nid) +kibnal_nid2peerlist (ptl_nid_t nid) { - unsigned int hash = ((unsigned int)nid) % koibnal_data.koib_peer_hash_size; + unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size; - return (&koibnal_data.koib_peers [hash]); + return (&kibnal_data.kib_peers [hash]); } static inline int -koibnal_peer_active(koib_peer_t *peer) +kibnal_peer_active(kib_peer_t *peer) { /* Am I in the peer hash table? */ return (!list_empty(&peer->ibp_list)); } static inline void -koibnal_queue_tx_locked (koib_tx_t *tx, koib_conn_t *conn) +kibnal_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn) { /* CAVEAT EMPTOR: tx takes caller's ref on conn */ @@ -402,40 +398,41 @@ koibnal_queue_tx_locked (koib_tx_t *tx, koib_conn_t *conn) LASSERT (tx->tx_conn == NULL); /* only set here */ tx->tx_conn = conn; + tx->tx_deadline = jiffies + kibnal_tunables.kib_io_timeout * HZ; list_add_tail(&tx->tx_list, &conn->ibc_tx_queue); } -#define KOIBNAL_SERVICE_KEY_MASK (IB_SA_SERVICE_COMP_MASK_NAME | \ - IB_SA_SERVICE_COMP_MASK_DATA8_1 | \ - IB_SA_SERVICE_COMP_MASK_DATA8_2 | \ - IB_SA_SERVICE_COMP_MASK_DATA8_3 | \ - IB_SA_SERVICE_COMP_MASK_DATA8_4 | \ - IB_SA_SERVICE_COMP_MASK_DATA8_5 | \ - IB_SA_SERVICE_COMP_MASK_DATA8_6 | \ - IB_SA_SERVICE_COMP_MASK_DATA8_7 | \ - IB_SA_SERVICE_COMP_MASK_DATA8_8) +#define KIBNAL_SERVICE_KEY_MASK (IB_SA_SERVICE_COMP_MASK_NAME | \ + IB_SA_SERVICE_COMP_MASK_DATA8_1 | \ + IB_SA_SERVICE_COMP_MASK_DATA8_2 | \ + IB_SA_SERVICE_COMP_MASK_DATA8_3 | \ + IB_SA_SERVICE_COMP_MASK_DATA8_4 | \ + IB_SA_SERVICE_COMP_MASK_DATA8_5 | \ + IB_SA_SERVICE_COMP_MASK_DATA8_6 | \ + IB_SA_SERVICE_COMP_MASK_DATA8_7 | \ + IB_SA_SERVICE_COMP_MASK_DATA8_8) static inline __u64* -koibnal_service_nid_field(struct ib_common_attrib_service *srv) +kibnal_service_nid_field(struct ib_common_attrib_service *srv) { - /* must be consistent with KOIBNAL_SERVICE_KEY_MASK */ + /* must be consistent with KIBNAL_SERVICE_KEY_MASK */ return (__u64 *)srv->service_data8; } static inline void -koibnal_set_service_keys(struct ib_common_attrib_service *srv, ptl_nid_t nid) +kibnal_set_service_keys(struct ib_common_attrib_service *srv, ptl_nid_t nid) { - LASSERT (strlen (OPENIBNAL_SERVICE_NAME) < sizeof(srv->service_name)); + LASSERT (strlen (IBNAL_SERVICE_NAME) < sizeof(srv->service_name)); memset (srv->service_name, 0, sizeof(srv->service_name)); - strcpy (srv->service_name, OPENIBNAL_SERVICE_NAME); + strcpy (srv->service_name, IBNAL_SERVICE_NAME); - *koibnal_service_nid_field(srv) = cpu_to_le64(nid); + *kibnal_service_nid_field(srv) = cpu_to_le64(nid); } #if 0 static inline void -koibnal_show_rdma_attr (koib_conn_t *conn) +kibnal_show_rdma_attr (kib_conn_t *conn) { struct ib_qp_attribute qp_attr; int rc; @@ -457,7 +454,7 @@ koibnal_show_rdma_attr (koib_conn_t *conn) #if CONFIG_X86 static inline __u64 -koibnal_page2phys (struct page *p) +kibnal_page2phys (struct page *p) { __u64 page_number = p - mem_map; @@ -467,42 +464,69 @@ koibnal_page2phys (struct page *p) # error "no page->phys" #endif -extern koib_peer_t *koibnal_create_peer (ptl_nid_t nid); -extern void koibnal_put_peer (koib_peer_t *peer); -extern int koibnal_del_peer (ptl_nid_t nid, int single_share); -extern koib_peer_t *koibnal_find_peer_locked (ptl_nid_t nid); -extern void koibnal_unlink_peer_locked (koib_peer_t *peer); -extern int koibnal_close_stale_conns_locked (koib_peer_t *peer, +/* CAVEAT EMPTOR: + * We rely on tx/rx descriptor alignment to allow us to use the lowest bit + * of the work request id as a flag to determine if the completion is for a + * transmit or a receive. It seems that that the CQ entry's 'op' field + * isn't always set correctly on completions that occur after QP teardown. */ + +static inline __u64 +kibnal_ptr2wreqid (void *ptr, int isrx) +{ + unsigned long lptr = (unsigned long)ptr; + + LASSERT ((lptr & 1) == 0); + return (__u64)(lptr | (isrx ? 1 : 0)); +} + +static inline void * +kibnal_wreqid2ptr (__u64 wreqid) +{ + return (void *)(((unsigned long)wreqid) & ~1UL); +} + +static inline int +kibnal_wreqid_is_rx (__u64 wreqid) +{ + return (wreqid & 1) != 0; +} + +extern kib_peer_t *kibnal_create_peer (ptl_nid_t nid); +extern void kibnal_put_peer (kib_peer_t *peer); +extern int kibnal_del_peer (ptl_nid_t nid, int single_share); +extern kib_peer_t *kibnal_find_peer_locked (ptl_nid_t nid); +extern void kibnal_unlink_peer_locked (kib_peer_t *peer); +extern int kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation); -extern koib_conn_t *koibnal_create_conn (void); -extern void koibnal_put_conn (koib_conn_t *conn); -extern void koibnal_destroy_conn (koib_conn_t *conn); -extern int koibnal_alloc_pages (koib_pages_t **pp, int npages, int access); -extern void koibnal_free_pages (koib_pages_t *p); +extern kib_conn_t *kibnal_create_conn (void); +extern void kibnal_put_conn (kib_conn_t *conn); +extern void kibnal_destroy_conn (kib_conn_t *conn); +extern int kibnal_alloc_pages (kib_pages_t **pp, int npages, int access); +extern void kibnal_free_pages (kib_pages_t *p); -extern void koibnal_check_sends (koib_conn_t *conn); +extern void kibnal_check_sends (kib_conn_t *conn); extern tTS_IB_CM_CALLBACK_RETURN -koibnal_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, +kibnal_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, void *param, void *arg); extern tTS_IB_CM_CALLBACK_RETURN -koibnal_passive_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, +kibnal_passive_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, void *param, void *arg); -extern void koibnal_close_conn_locked (koib_conn_t *conn, int error); -extern void koibnal_destroy_conn (koib_conn_t *conn); -extern int koibnal_thread_start (int (*fn)(void *arg), void *arg); -extern int koibnal_scheduler(void *arg); -extern int koibnal_connd (void *arg); -extern void koibnal_rx_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg); -extern void koibnal_tx_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg); -extern void koibnal_init_tx_msg (koib_tx_t *tx, int type, int body_nob); -extern int koibnal_close_conn (koib_conn_t *conn, int why); -extern void koibnal_start_active_rdma (int type, int status, - koib_rx_t *rx, lib_msg_t *libmsg, - unsigned int niov, - struct iovec *iov, ptl_kiov_t *kiov, - size_t offset, size_t nob); +extern void kibnal_close_conn_locked (kib_conn_t *conn, int error); +extern void kibnal_destroy_conn (kib_conn_t *conn); +extern int kibnal_thread_start (int (*fn)(void *arg), void *arg); +extern int kibnal_scheduler(void *arg); +extern int kibnal_connd (void *arg); +extern void kibnal_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg); +extern void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob); +extern int kibnal_close_conn (kib_conn_t *conn, int why); +extern void kibnal_start_active_rdma (int type, int status, + kib_rx_t *rx, lib_msg_t *libmsg, + unsigned int niov, + struct iovec *iov, ptl_kiov_t *kiov, + size_t offset, size_t nob); + diff --git a/lnet/klnds/openiblnd/openiblnd_cb.c b/lnet/klnds/openiblnd/openiblnd_cb.c index 79bf37a..d774853 100644 --- a/lnet/klnds/openiblnd/openiblnd_cb.c +++ b/lnet/klnds/openiblnd/openiblnd_cb.c @@ -28,20 +28,20 @@ * */ void -koibnal_schedule_tx_done (koib_tx_t *tx) +kibnal_schedule_tx_done (kib_tx_t *tx) { unsigned long flags; - spin_lock_irqsave (&koibnal_data.koib_sched_lock, flags); + spin_lock_irqsave (&kibnal_data.kib_sched_lock, flags); - list_add_tail(&tx->tx_list, &koibnal_data.koib_sched_txq); - wake_up (&koibnal_data.koib_sched_waitq); + list_add_tail(&tx->tx_list, &kibnal_data.kib_sched_txq); + wake_up (&kibnal_data.kib_sched_waitq); - spin_unlock_irqrestore(&koibnal_data.koib_sched_lock, flags); + spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); } void -koibnal_tx_done (koib_tx_t *tx) +kibnal_tx_done (kib_tx_t *tx) { ptl_err_t ptlrc = (tx->tx_status == 0) ? PTL_OK : PTL_FAIL; unsigned long flags; @@ -49,31 +49,31 @@ koibnal_tx_done (koib_tx_t *tx) int rc; LASSERT (tx->tx_sending == 0); /* mustn't be awaiting callback */ - LASSERT (!tx->tx_passive_rdma_wait); /* mustn't be on ibc_rdma_queue */ + LASSERT (!tx->tx_passive_rdma_wait); /* mustn't be awaiting RDMA */ switch (tx->tx_mapped) { default: LBUG(); - case KOIB_TX_UNMAPPED: + case KIB_TX_UNMAPPED: break; - case KOIB_TX_MAPPED: + case KIB_TX_MAPPED: if (in_interrupt()) { /* can't deregister memory in IRQ context... */ - koibnal_schedule_tx_done(tx); + kibnal_schedule_tx_done(tx); return; } rc = ib_memory_deregister(tx->tx_md.md_handle.mr); LASSERT (rc == 0); - tx->tx_mapped = KOIB_TX_UNMAPPED; + tx->tx_mapped = KIB_TX_UNMAPPED; break; -#if OPENIBNAL_FMR - case KOIB_TX_MAPPED_FMR: +#if IBNAL_FMR + case KIB_TX_MAPPED_FMR: if (in_interrupt() && tx->tx_status != 0) { /* can't flush FMRs in IRQ context... */ - koibnal_schedule_tx_done(tx); + kibnal_schedule_tx_done(tx); return; } @@ -81,8 +81,8 @@ koibnal_tx_done (koib_tx_t *tx) LASSERT (rc == 0); if (tx->tx_status != 0) - ib_fmr_pool_force_flush(koibnal_data.koib_fmr_pool); - tx->tx_mapped = KOIB_TX_UNMAPPED; + ib_fmr_pool_force_flush(kibnal_data.kib_fmr_pool); + tx->tx_mapped = KIB_TX_UNMAPPED; break; #endif } @@ -92,12 +92,12 @@ koibnal_tx_done (koib_tx_t *tx) if (tx->tx_libmsg[i] == NULL) continue; - lib_finalize (&koibnal_lib, NULL, tx->tx_libmsg[i], ptlrc); + lib_finalize (&kibnal_lib, NULL, tx->tx_libmsg[i], ptlrc); tx->tx_libmsg[i] = NULL; } if (tx->tx_conn != NULL) { - koibnal_put_conn (tx->tx_conn); + kibnal_put_conn (tx->tx_conn); tx->tx_conn = NULL; } @@ -105,52 +105,52 @@ koibnal_tx_done (koib_tx_t *tx) tx->tx_passive_rdma = 0; tx->tx_status = 0; - spin_lock_irqsave (&koibnal_data.koib_tx_lock, flags); + spin_lock_irqsave (&kibnal_data.kib_tx_lock, flags); if (tx->tx_isnblk) { - list_add_tail (&tx->tx_list, &koibnal_data.koib_idle_nblk_txs); + list_add_tail (&tx->tx_list, &kibnal_data.kib_idle_nblk_txs); } else { - list_add_tail (&tx->tx_list, &koibnal_data.koib_idle_txs); - wake_up (&koibnal_data.koib_idle_tx_waitq); + list_add_tail (&tx->tx_list, &kibnal_data.kib_idle_txs); + wake_up (&kibnal_data.kib_idle_tx_waitq); } - spin_unlock_irqrestore (&koibnal_data.koib_tx_lock, flags); + spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags); } -koib_tx_t * -koibnal_get_idle_tx (int may_block) +kib_tx_t * +kibnal_get_idle_tx (int may_block) { - unsigned long flags; - koib_tx_t *tx = NULL; + unsigned long flags; + kib_tx_t *tx = NULL; for (;;) { - spin_lock_irqsave (&koibnal_data.koib_tx_lock, flags); + spin_lock_irqsave (&kibnal_data.kib_tx_lock, flags); /* "normal" descriptor is free */ - if (!list_empty (&koibnal_data.koib_idle_txs)) { - tx = list_entry (koibnal_data.koib_idle_txs.next, - koib_tx_t, tx_list); + if (!list_empty (&kibnal_data.kib_idle_txs)) { + tx = list_entry (kibnal_data.kib_idle_txs.next, + kib_tx_t, tx_list); break; } if (!may_block) { /* may dip into reserve pool */ - if (list_empty (&koibnal_data.koib_idle_nblk_txs)) { + if (list_empty (&kibnal_data.kib_idle_nblk_txs)) { CERROR ("reserved tx desc pool exhausted\n"); break; } - tx = list_entry (koibnal_data.koib_idle_nblk_txs.next, - koib_tx_t, tx_list); + tx = list_entry (kibnal_data.kib_idle_nblk_txs.next, + kib_tx_t, tx_list); break; } /* block for idle tx */ - spin_unlock_irqrestore (&koibnal_data.koib_tx_lock, flags); + spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags); - wait_event (koibnal_data.koib_idle_tx_waitq, - !list_empty (&koibnal_data.koib_idle_txs) || - koibnal_data.koib_shutdown); + wait_event (kibnal_data.kib_idle_tx_waitq, + !list_empty (&kibnal_data.kib_idle_txs) || + kibnal_data.kib_shutdown); } if (tx != NULL) { @@ -159,9 +159,9 @@ koibnal_get_idle_tx (int may_block) /* Allocate a new passive RDMA completion cookie. It might * not be needed, but we've got a lock right now and we're * unlikely to wrap... */ - tx->tx_passive_rdma_cookie = koibnal_data.koib_next_tx_cookie++; + tx->tx_passive_rdma_cookie = kibnal_data.kib_next_tx_cookie++; - LASSERT (tx->tx_mapped == KOIB_TX_UNMAPPED); + LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED); LASSERT (tx->tx_nsp == 0); LASSERT (tx->tx_sending == 0); LASSERT (tx->tx_status == 0); @@ -172,15 +172,15 @@ koibnal_get_idle_tx (int may_block) LASSERT (tx->tx_libmsg[1] == NULL); } - spin_unlock_irqrestore (&koibnal_data.koib_tx_lock, flags); + spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags); return (tx); } int -koibnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist) +kibnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist) { - /* I would guess that if koibnal_get_peer (nid) == NULL, + /* I would guess that if kibnal_get_peer (nid) == NULL, and we're not routing, then 'nid' is very distant :) */ if ( nal->libnal_ni.ni_pid.nid == nid ) { *dist = 0; @@ -192,7 +192,7 @@ koibnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist) } void -koibnal_complete_passive_rdma(koib_conn_t *conn, __u64 cookie, int status) +kibnal_complete_passive_rdma(kib_conn_t *conn, __u64 cookie, int status) { struct list_head *ttmp; unsigned long flags; @@ -200,30 +200,34 @@ koibnal_complete_passive_rdma(koib_conn_t *conn, __u64 cookie, int status) spin_lock_irqsave (&conn->ibc_lock, flags); - list_for_each (ttmp, &conn->ibc_rdma_queue) { - koib_tx_t *tx = list_entry(ttmp, koib_tx_t, tx_list); - - LASSERT (tx->tx_passive_rdma); - LASSERT (tx->tx_passive_rdma_wait); + list_for_each (ttmp, &conn->ibc_active_txs) { + kib_tx_t *tx = list_entry(ttmp, kib_tx_t, tx_list); - if (tx->tx_passive_rdma_cookie != cookie) - continue; + LASSERT (tx->tx_passive_rdma || + !tx->tx_passive_rdma_wait); - CDEBUG(D_NET, "Complete %p "LPD64"\n", tx, cookie); + LASSERT (tx->tx_passive_rdma_wait || + tx->tx_sending != 0); - list_del (&tx->tx_list); + if (!tx->tx_passive_rdma_wait || + tx->tx_passive_rdma_cookie != cookie) + continue; + + CDEBUG(D_NET, "Complete %p "LPD64": %d\n", tx, cookie, status); + tx->tx_status = status; tx->tx_passive_rdma_wait = 0; idle = (tx->tx_sending == 0); - tx->tx_status = status; + if (idle) + list_del (&tx->tx_list); spin_unlock_irqrestore (&conn->ibc_lock, flags); /* I could be racing with tx callbacks. It's whoever * _makes_ tx idle that frees it */ if (idle) - koibnal_tx_done (tx); + kibnal_tx_done (tx); return; } @@ -234,32 +238,32 @@ koibnal_complete_passive_rdma(koib_conn_t *conn, __u64 cookie, int status) } void -koibnal_post_rx (koib_rx_t *rx, int do_credits) +kibnal_post_rx (kib_rx_t *rx, int do_credits) { - koib_conn_t *conn = rx->rx_conn; + kib_conn_t *conn = rx->rx_conn; int rc; unsigned long flags; rx->rx_gl = (struct ib_gather_scatter) { .address = rx->rx_vaddr, - .length = OPENIBNAL_MSG_SIZE, - .key = conn->ibc_rx_pages->oibp_lkey, + .length = IBNAL_MSG_SIZE, + .key = conn->ibc_rx_pages->ibp_lkey, }; - + rx->rx_sp = (struct ib_receive_param) { - .work_request_id = (__u64)(unsigned long)rx, + .work_request_id = kibnal_ptr2wreqid(rx, 1), .scatter_list = &rx->rx_gl, .num_scatter_entries = 1, .device_specific = NULL, .signaled = 1, }; - LASSERT (conn->ibc_state >= OPENIBNAL_CONN_ESTABLISHED); + LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); LASSERT (!rx->rx_posted); rx->rx_posted = 1; mb(); - if (conn->ibc_state != OPENIBNAL_CONN_ESTABLISHED) + if (conn->ibc_state != IBNAL_CONN_ESTABLISHED) rc = -ECONNABORTED; else rc = ib_receive (conn->ibc_qp, &rx->rx_sp, 1); @@ -270,26 +274,26 @@ koibnal_post_rx (koib_rx_t *rx, int do_credits) conn->ibc_outstanding_credits++; spin_unlock_irqrestore(&conn->ibc_lock, flags); - koibnal_check_sends(conn); + kibnal_check_sends(conn); } return; } - if (conn->ibc_state == OPENIBNAL_CONN_ESTABLISHED) { + if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) { CERROR ("Error posting receive -> "LPX64": %d\n", conn->ibc_peer->ibp_nid, rc); - koibnal_close_conn (rx->rx_conn, rc); + kibnal_close_conn (rx->rx_conn, rc); } else { CDEBUG (D_NET, "Error posting receive -> "LPX64": %d\n", conn->ibc_peer->ibp_nid, rc); } /* Drop rx's ref */ - koibnal_put_conn (conn); + kibnal_put_conn (conn); } -#if OPENIBNAL_CKSUM -__u32 koibnal_cksum (void *ptr, int nob) +#if IBNAL_CKSUM +__u32 kibnal_cksum (void *ptr, int nob) { char *c = ptr; __u32 sum = 0; @@ -302,17 +306,17 @@ __u32 koibnal_cksum (void *ptr, int nob) #endif void -koibnal_rx_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg) +kibnal_rx_callback (struct ib_cq_entry *e) { - koib_rx_t *rx = (koib_rx_t *)((unsigned long)e->work_request_id); - koib_msg_t *msg = rx->rx_msg; - koib_conn_t *conn = rx->rx_conn; + kib_rx_t *rx = (kib_rx_t *)kibnal_wreqid2ptr(e->work_request_id); + kib_msg_t *msg = rx->rx_msg; + kib_conn_t *conn = rx->rx_conn; int nob = e->bytes_transferred; - const int base_nob = offsetof(koib_msg_t, oibm_u); + const int base_nob = offsetof(kib_msg_t, ibm_u); int credits; int flipped; unsigned long flags; -#if OPENIBNAL_CKSUM +#if IBNAL_CKSUM __u32 msg_cksum; __u32 computed_cksum; #endif @@ -324,11 +328,11 @@ koibnal_rx_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg) /* receives complete with error in any case after we've started * closing the QP */ - if (conn->ibc_state >= OPENIBNAL_CONN_DEATHROW) + if (conn->ibc_state >= IBNAL_CONN_DEATHROW) goto failed; /* We don't post receives until the conn is established */ - LASSERT (conn->ibc_state == OPENIBNAL_CONN_ESTABLISHED); + LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED); if (e->status != IB_COMPLETION_STATUS_SUCCESS) { CERROR("Rx from "LPX64" failed: %d\n", @@ -344,35 +348,35 @@ koibnal_rx_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg) /* Receiver does any byte flipping if necessary... */ - if (msg->oibm_magic == OPENIBNAL_MSG_MAGIC) { + if (msg->ibm_magic == IBNAL_MSG_MAGIC) { flipped = 0; } else { - if (msg->oibm_magic != __swab32(OPENIBNAL_MSG_MAGIC)) { + if (msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) { CERROR ("Unrecognised magic: %08x from "LPX64"\n", - msg->oibm_magic, conn->ibc_peer->ibp_nid); + msg->ibm_magic, conn->ibc_peer->ibp_nid); goto failed; } flipped = 1; - __swab16s (&msg->oibm_version); - LASSERT (sizeof(msg->oibm_type) == 1); - LASSERT (sizeof(msg->oibm_credits) == 1); + __swab16s (&msg->ibm_version); + LASSERT (sizeof(msg->ibm_type) == 1); + LASSERT (sizeof(msg->ibm_credits) == 1); } - if (msg->oibm_version != OPENIBNAL_MSG_VERSION) { + if (msg->ibm_version != IBNAL_MSG_VERSION) { CERROR ("Incompatible msg version %d (%d expected)\n", - msg->oibm_version, OPENIBNAL_MSG_VERSION); + msg->ibm_version, IBNAL_MSG_VERSION); goto failed; } -#if OPENIBNAL_CKSUM - if (nob != msg->oibm_nob) { - CERROR ("Unexpected # bytes %d (%d expected)\n", nob, msg->oibm_nob); +#if IBNAL_CKSUM + if (nob != msg->ibm_nob) { + CERROR ("Unexpected # bytes %d (%d expected)\n", nob, msg->ibm_nob); goto failed; } - msg_cksum = le32_to_cpu(msg->oibm_cksum); - msg->oibm_cksum = 0; - computed_cksum = koibnal_cksum (msg, nob); + msg_cksum = le32_to_cpu(msg->ibm_cksum); + msg->ibm_cksum = 0; + computed_cksum = kibnal_cksum (msg, nob); if (msg_cksum != computed_cksum) { CERROR ("Checksum failure %d: (%d expected)\n", @@ -383,101 +387,101 @@ koibnal_rx_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg) #endif /* Have I received credits that will let me send? */ - credits = msg->oibm_credits; + credits = msg->ibm_credits; if (credits != 0) { spin_lock_irqsave(&conn->ibc_lock, flags); conn->ibc_credits += credits; spin_unlock_irqrestore(&conn->ibc_lock, flags); - koibnal_check_sends(conn); + kibnal_check_sends(conn); } - switch (msg->oibm_type) { - case OPENIBNAL_MSG_NOOP: - koibnal_post_rx (rx, 1); + switch (msg->ibm_type) { + case IBNAL_MSG_NOOP: + kibnal_post_rx (rx, 1); return; - case OPENIBNAL_MSG_IMMEDIATE: - if (nob < base_nob + sizeof (koib_immediate_msg_t)) { + case IBNAL_MSG_IMMEDIATE: + if (nob < base_nob + sizeof (kib_immediate_msg_t)) { CERROR ("Short IMMEDIATE from "LPX64": %d\n", conn->ibc_peer->ibp_nid, nob); goto failed; } break; - case OPENIBNAL_MSG_PUT_RDMA: - case OPENIBNAL_MSG_GET_RDMA: - if (nob < base_nob + sizeof (koib_rdma_msg_t)) { + case IBNAL_MSG_PUT_RDMA: + case IBNAL_MSG_GET_RDMA: + if (nob < base_nob + sizeof (kib_rdma_msg_t)) { CERROR ("Short RDMA msg from "LPX64": %d\n", conn->ibc_peer->ibp_nid, nob); goto failed; } if (flipped) { - __swab32s(&msg->oibm_u.rdma.oibrm_desc.rd_key); - __swab32s(&msg->oibm_u.rdma.oibrm_desc.rd_nob); - __swab64s(&msg->oibm_u.rdma.oibrm_desc.rd_addr); + __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_key); + __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_nob); + __swab64s(&msg->ibm_u.rdma.ibrm_desc.rd_addr); } CDEBUG(D_NET, "%d RDMA: cookie "LPX64", key %x, addr "LPX64", nob %d\n", - msg->oibm_type, msg->oibm_u.rdma.oibrm_cookie, - msg->oibm_u.rdma.oibrm_desc.rd_key, - msg->oibm_u.rdma.oibrm_desc.rd_addr, - msg->oibm_u.rdma.oibrm_desc.rd_nob); + msg->ibm_type, msg->ibm_u.rdma.ibrm_cookie, + msg->ibm_u.rdma.ibrm_desc.rd_key, + msg->ibm_u.rdma.ibrm_desc.rd_addr, + msg->ibm_u.rdma.ibrm_desc.rd_nob); break; - case OPENIBNAL_MSG_PUT_DONE: - case OPENIBNAL_MSG_GET_DONE: - if (nob < base_nob + sizeof (koib_completion_msg_t)) { + case IBNAL_MSG_PUT_DONE: + case IBNAL_MSG_GET_DONE: + if (nob < base_nob + sizeof (kib_completion_msg_t)) { CERROR ("Short COMPLETION msg from "LPX64": %d\n", conn->ibc_peer->ibp_nid, nob); goto failed; } if (flipped) - __swab32s(&msg->oibm_u.completion.oibcm_status); + __swab32s(&msg->ibm_u.completion.ibcm_status); CDEBUG(D_NET, "%d DONE: cookie "LPX64", status %d\n", - msg->oibm_type, msg->oibm_u.completion.oibcm_cookie, - msg->oibm_u.completion.oibcm_status); + msg->ibm_type, msg->ibm_u.completion.ibcm_cookie, + msg->ibm_u.completion.ibcm_status); - koibnal_complete_passive_rdma (conn, - msg->oibm_u.completion.oibcm_cookie, - msg->oibm_u.completion.oibcm_status); - koibnal_post_rx (rx, 1); + kibnal_complete_passive_rdma (conn, + msg->ibm_u.completion.ibcm_cookie, + msg->ibm_u.completion.ibcm_status); + kibnal_post_rx (rx, 1); return; default: CERROR ("Can't parse type from "LPX64": %d\n", - conn->ibc_peer->ibp_nid, msg->oibm_type); + conn->ibc_peer->ibp_nid, msg->ibm_type); goto failed; } - /* schedule for koibnal_rx() in thread context */ - spin_lock_irqsave(&koibnal_data.koib_sched_lock, flags); + /* schedule for kibnal_rx() in thread context */ + spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - list_add_tail (&rx->rx_list, &koibnal_data.koib_sched_rxq); - wake_up (&koibnal_data.koib_sched_waitq); + list_add_tail (&rx->rx_list, &kibnal_data.kib_sched_rxq); + wake_up (&kibnal_data.kib_sched_waitq); - spin_unlock_irqrestore(&koibnal_data.koib_sched_lock, flags); + spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); return; failed: CDEBUG(D_NET, "rx %p conn %p\n", rx, conn); - koibnal_close_conn(conn, -ECONNABORTED); + kibnal_close_conn(conn, -ECONNABORTED); /* Don't re-post rx & drop its ref on conn */ - koibnal_put_conn(conn); + kibnal_put_conn(conn); } void -koibnal_rx (koib_rx_t *rx) +kibnal_rx (kib_rx_t *rx) { - koib_msg_t *msg = rx->rx_msg; + kib_msg_t *msg = rx->rx_msg; /* Clear flag so I can detect if I've sent an RDMA completion */ rx->rx_rdma = 0; - switch (msg->oibm_type) { - case OPENIBNAL_MSG_GET_RDMA: - lib_parse(&koibnal_lib, &msg->oibm_u.rdma.oibrm_hdr, rx); + switch (msg->ibm_type) { + case IBNAL_MSG_GET_RDMA: + lib_parse(&kibnal_lib, &msg->ibm_u.rdma.ibrm_hdr, rx); /* If the incoming get was matched, I'll have initiated the * RDMA and the completion message... */ if (rx->rx_rdma) @@ -487,12 +491,12 @@ koibnal_rx (koib_rx_t *rx) * the peer's GET blocking for the full timeout. */ CERROR ("Completing unmatched RDMA GET from "LPX64"\n", rx->rx_conn->ibc_peer->ibp_nid); - koibnal_start_active_rdma (OPENIBNAL_MSG_GET_DONE, -EIO, - rx, NULL, 0, NULL, NULL, 0, 0); + kibnal_start_active_rdma (IBNAL_MSG_GET_DONE, -EIO, + rx, NULL, 0, NULL, NULL, 0, 0); break; - case OPENIBNAL_MSG_PUT_RDMA: - lib_parse(&koibnal_lib, &msg->oibm_u.rdma.oibrm_hdr, rx); + case IBNAL_MSG_PUT_RDMA: + lib_parse(&kibnal_lib, &msg->ibm_u.rdma.ibrm_hdr, rx); if (rx->rx_rdma) break; /* This is most unusual, since even if lib_parse() didn't @@ -505,8 +509,8 @@ koibnal_rx (koib_rx_t *rx) rx->rx_conn->ibc_peer->ibp_nid); break; - case OPENIBNAL_MSG_IMMEDIATE: - lib_parse(&koibnal_lib, &msg->oibm_u.immediate.oibim_hdr, rx); + case IBNAL_MSG_IMMEDIATE: + lib_parse(&kibnal_lib, &msg->ibm_u.immediate.ibim_hdr, rx); LASSERT (!rx->rx_rdma); break; @@ -515,12 +519,12 @@ koibnal_rx (koib_rx_t *rx) break; } - koibnal_post_rx (rx, 1); + kibnal_post_rx (rx, 1); } #if 0 int -koibnal_kvaddr_to_phys (unsigned long vaddr, __u64 *physp) +kibnal_kvaddr_to_phys (unsigned long vaddr, __u64 *physp) { struct page *page; @@ -531,7 +535,7 @@ koibnal_kvaddr_to_phys (unsigned long vaddr, __u64 *physp) else if (vaddr >= PKMAP_BASE && vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) page = vmalloc_to_page ((void *)vaddr); - /* in 2.4 ^ just walks the page tables */ + /* in 2.4 ^ just walks the page tables */ #endif else page = virt_to_page (vaddr); @@ -540,13 +544,13 @@ koibnal_kvaddr_to_phys (unsigned long vaddr, __u64 *physp) !VALID_PAGE (page)) return (-EFAULT); - *physp = koibnal_page2phys(page) + (vaddr & (PAGE_SIZE - 1)); + *physp = kibnal_page2phys(page) + (vaddr & (PAGE_SIZE - 1)); return (0); } #endif int -koibnal_map_iov (koib_tx_t *tx, enum ib_memory_access access, +kibnal_map_iov (kib_tx_t *tx, enum ib_memory_access access, int niov, struct iovec *iov, int offset, int nob) { @@ -555,7 +559,7 @@ koibnal_map_iov (koib_tx_t *tx, enum ib_memory_access access, LASSERT (nob > 0); LASSERT (niov > 0); - LASSERT (tx->tx_mapped == KOIB_TX_UNMAPPED); + LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED); while (offset >= iov->iov_len) { offset -= iov->iov_len; @@ -572,7 +576,7 @@ koibnal_map_iov (koib_tx_t *tx, enum ib_memory_access access, vaddr = (void *)(((unsigned long)iov->iov_base) + offset); tx->tx_md.md_addr = (__u64)((unsigned long)vaddr); - rc = ib_memory_register (koibnal_data.koib_pd, + rc = ib_memory_register (kibnal_data.kib_pd, vaddr, nob, access, &tx->tx_md.md_handle.mr, @@ -584,21 +588,21 @@ koibnal_map_iov (koib_tx_t *tx, enum ib_memory_access access, return (rc); } - tx->tx_mapped = KOIB_TX_MAPPED; + tx->tx_mapped = KIB_TX_MAPPED; return (0); } int -koibnal_map_kiov (koib_tx_t *tx, enum ib_memory_access access, +kibnal_map_kiov (kib_tx_t *tx, enum ib_memory_access access, int nkiov, ptl_kiov_t *kiov, int offset, int nob) { -#if OPENIBNAL_FMR +#if IBNAL_FMR __u64 *phys; - const int mapped = KOIB_TX_MAPPED_FMR; + const int mapped = KIB_TX_MAPPED_FMR; #else struct ib_physical_buffer *phys; - const int mapped = KOIB_TX_MAPPED; + const int mapped = KIB_TX_MAPPED; #endif int page_offset; int nphys; @@ -610,7 +614,7 @@ koibnal_map_kiov (koib_tx_t *tx, enum ib_memory_access access, LASSERT (nob > 0); LASSERT (nkiov > 0); - LASSERT (tx->tx_mapped == KOIB_TX_UNMAPPED); + LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED); while (offset >= kiov->kiov_len) { offset -= kiov->kiov_len; @@ -627,10 +631,10 @@ koibnal_map_kiov (koib_tx_t *tx, enum ib_memory_access access, } page_offset = kiov->kiov_offset + offset; -#if OPENIBNAL_FMR - phys[0] = koibnal_page2phys(kiov->kiov_page); +#if IBNAL_FMR + phys[0] = kibnal_page2phys(kiov->kiov_page); #else - phys[0].address = koibnal_page2phys(kiov->kiov_page); + phys[0].address = kibnal_page2phys(kiov->kiov_page); phys[0].size = PAGE_SIZE; #endif nphys = 1; @@ -667,10 +671,10 @@ koibnal_map_kiov (koib_tx_t *tx, enum ib_memory_access access, } LASSERT (nphys * sizeof (*phys) < phys_size); -#if OPENIBNAL_FMR - phys[nphys] = koibnal_page2phys(kiov->kiov_page); +#if IBNAL_FMR + phys[nphys] = kibnal_page2phys(kiov->kiov_page); #else - phys[nphys].address = koibnal_page2phys(kiov->kiov_page); + phys[nphys].address = kibnal_page2phys(kiov->kiov_page); phys[nphys].size = PAGE_SIZE; #endif nphys++; @@ -683,10 +687,10 @@ koibnal_map_kiov (koib_tx_t *tx, enum ib_memory_access access, for (rc = 0; rc < nphys; rc++) CWARN (" [%d] "LPX64" / %d\n", rc, phys[rc].address, phys[rc].size); #endif - tx->tx_md.md_addr = OPENIBNAL_RDMA_BASE; + tx->tx_md.md_addr = IBNAL_RDMA_BASE; -#if OPENIBNAL_FMR - rc = ib_fmr_register_physical (koibnal_data.koib_fmr_pool, +#if IBNAL_FMR + rc = ib_fmr_register_physical (kibnal_data.kib_fmr_pool, phys, nphys, &tx->tx_md.md_addr, page_offset, @@ -694,7 +698,7 @@ koibnal_map_kiov (koib_tx_t *tx, enum ib_memory_access access, &tx->tx_md.md_lkey, &tx->tx_md.md_rkey); #else - rc = ib_memory_register_physical (koibnal_data.koib_pd, + rc = ib_memory_register_physical (kibnal_data.kib_pd, phys, nphys, &tx->tx_md.md_addr, nob, page_offset, @@ -717,24 +721,24 @@ koibnal_map_kiov (koib_tx_t *tx, enum ib_memory_access access, return (rc); } -koib_conn_t * -koibnal_find_conn_locked (koib_peer_t *peer) +kib_conn_t * +kibnal_find_conn_locked (kib_peer_t *peer) { struct list_head *tmp; /* just return the first connection */ list_for_each (tmp, &peer->ibp_conns) { - return (list_entry(tmp, koib_conn_t, ibc_list)); + return (list_entry(tmp, kib_conn_t, ibc_list)); } return (NULL); } void -koibnal_check_sends (koib_conn_t *conn) +kibnal_check_sends (kib_conn_t *conn) { unsigned long flags; - koib_tx_t *tx; + kib_tx_t *tx; int rc; int i; int done; @@ -742,39 +746,39 @@ koibnal_check_sends (koib_conn_t *conn) spin_lock_irqsave (&conn->ibc_lock, flags); + LASSERT (conn->ibc_nsends_posted <= IBNAL_MSG_QUEUE_SIZE); + if (list_empty(&conn->ibc_tx_queue) && - conn->ibc_outstanding_credits >= OPENIBNAL_CREDIT_HIGHWATER) { + conn->ibc_outstanding_credits >= IBNAL_CREDIT_HIGHWATER) { spin_unlock_irqrestore(&conn->ibc_lock, flags); - - tx = koibnal_get_idle_tx(0); /* don't block */ + + tx = kibnal_get_idle_tx(0); /* don't block */ if (tx != NULL) - koibnal_init_tx_msg(tx, OPENIBNAL_MSG_NOOP, 0); + kibnal_init_tx_msg(tx, IBNAL_MSG_NOOP, 0); spin_lock_irqsave(&conn->ibc_lock, flags); - + if (tx != NULL) { atomic_inc(&conn->ibc_refcount); - koibnal_queue_tx_locked(tx, conn); + kibnal_queue_tx_locked(tx, conn); } } - LASSERT (conn->ibc_nsends_posted <= OPENIBNAL_MSG_QUEUE_SIZE); - while (!list_empty (&conn->ibc_tx_queue)) { - tx = list_entry (conn->ibc_tx_queue.next, koib_tx_t, tx_list); + tx = list_entry (conn->ibc_tx_queue.next, kib_tx_t, tx_list); /* We rely on this for QP sizing */ LASSERT (tx->tx_nsp > 0 && tx->tx_nsp <= 2); LASSERT (conn->ibc_outstanding_credits >= 0); - LASSERT (conn->ibc_outstanding_credits <= OPENIBNAL_MSG_QUEUE_SIZE); + LASSERT (conn->ibc_outstanding_credits <= IBNAL_MSG_QUEUE_SIZE); LASSERT (conn->ibc_credits >= 0); - LASSERT (conn->ibc_credits <= OPENIBNAL_MSG_QUEUE_SIZE); + LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE); /* Not on ibc_rdma_queue */ LASSERT (!tx->tx_passive_rdma_wait); - if (conn->ibc_nsends_posted == OPENIBNAL_MSG_QUEUE_SIZE) + if (conn->ibc_nsends_posted == IBNAL_MSG_QUEUE_SIZE) break; if (conn->ibc_credits == 0) /* no credits */ @@ -786,37 +790,29 @@ koibnal_check_sends (koib_conn_t *conn) list_del (&tx->tx_list); - if (tx->tx_msg->oibm_type == OPENIBNAL_MSG_NOOP && + if (tx->tx_msg->ibm_type == IBNAL_MSG_NOOP && (!list_empty(&conn->ibc_tx_queue) || - conn->ibc_outstanding_credits < OPENIBNAL_CREDIT_HIGHWATER)) { - /* Redundant NOOP */ + conn->ibc_outstanding_credits < IBNAL_CREDIT_HIGHWATER)) { + /* redundant NOOP */ spin_unlock_irqrestore(&conn->ibc_lock, flags); - koibnal_tx_done(tx); + kibnal_tx_done(tx); spin_lock_irqsave(&conn->ibc_lock, flags); continue; } - - /* incoming RDMA completion can find this one now */ - if (tx->tx_passive_rdma) { - list_add (&tx->tx_list, &conn->ibc_rdma_queue); - tx->tx_passive_rdma_wait = 1; - tx->tx_passive_rdma_deadline = - jiffies + koibnal_tunables.koib_io_timeout * HZ; - } - tx->tx_msg->oibm_credits = conn->ibc_outstanding_credits; + tx->tx_msg->ibm_credits = conn->ibc_outstanding_credits; conn->ibc_outstanding_credits = 0; - /* use the free memory barrier when we unlock to ensure - * sending set before we can get the tx callback. */ conn->ibc_nsends_posted++; conn->ibc_credits--; - tx->tx_sending = tx->tx_nsp; -#if OPENIBNAL_CKSUM - tx->tx_msg->oibm_cksum = 0; - tx->tx_msg->oibm_cksum = koibnal_cksum(tx->tx_msg, tx->tx_msg->oibm_nob); - CDEBUG(D_NET, "cksum %x, nob %d\n", tx->tx_msg->oibm_cksum, tx->tx_msg->oibm_nob); + tx->tx_sending = tx->tx_nsp; + tx->tx_passive_rdma_wait = tx->tx_passive_rdma; + list_add (&tx->tx_list, &conn->ibc_active_txs); +#if IBNAL_CKSUM + tx->tx_msg->ibm_cksum = 0; + tx->tx_msg->ibm_cksum = kibnal_cksum(tx->tx_msg, tx->tx_msg->ibm_nob); + CDEBUG(D_NET, "cksum %x, nob %d\n", tx->tx_msg->ibm_cksum, tx->tx_msg->ibm_nob); #endif spin_unlock_irqrestore (&conn->ibc_lock, flags); @@ -827,7 +823,7 @@ koibnal_check_sends (koib_conn_t *conn) rc = -ECONNABORTED; nwork = 0; - if (conn->ibc_state == OPENIBNAL_CONN_ESTABLISHED) { + if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) { tx->tx_status = 0; /* Driver only accepts 1 item at a time */ for (i = 0; i < tx->tx_nsp; i++) { @@ -842,31 +838,31 @@ koibnal_check_sends (koib_conn_t *conn) if (rc != 0) { /* NB credits are transferred in the actual * message, which can only be the last work item */ - conn->ibc_outstanding_credits += tx->tx_msg->oibm_credits; + conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits; conn->ibc_credits++; conn->ibc_nsends_posted--; - tx->tx_sending -= tx->tx_nsp - nwork; + tx->tx_status = rc; + tx->tx_passive_rdma_wait = 0; + tx->tx_sending -= tx->tx_nsp - nwork; + done = (tx->tx_sending == 0); - - if (tx->tx_passive_rdma) { - tx->tx_passive_rdma_wait = 0; + if (done) list_del (&tx->tx_list); - } spin_unlock_irqrestore (&conn->ibc_lock, flags); - if (conn->ibc_state == OPENIBNAL_CONN_ESTABLISHED) + if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) CERROR ("Error %d posting transmit to "LPX64"\n", rc, conn->ibc_peer->ibp_nid); else CDEBUG (D_NET, "Error %d posting transmit to " LPX64"\n", rc, conn->ibc_peer->ibp_nid); - koibnal_close_conn (conn, rc); + kibnal_close_conn (conn, rc); if (done) - koibnal_tx_done (tx); + kibnal_tx_done (tx); return; } @@ -876,10 +872,10 @@ koibnal_check_sends (koib_conn_t *conn) } void -koibnal_tx_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg) +kibnal_tx_callback (struct ib_cq_entry *e) { - koib_tx_t *tx = (koib_tx_t *)((unsigned long)e->work_request_id); - koib_conn_t *conn; + kib_tx_t *tx = (kib_tx_t *)kibnal_wreqid2ptr(e->work_request_id); + kib_conn_t *conn; unsigned long flags; int idle; @@ -901,6 +897,8 @@ koibnal_tx_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg) tx->tx_sending--; idle = (tx->tx_sending == 0) && /* This is the final callback */ (!tx->tx_passive_rdma_wait); /* Not waiting for RDMA completion */ + if (idle) + list_del(&tx->tx_list); CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n", conn, conn->ibc_state, conn->ibc_peer->ibp_nid, @@ -917,53 +915,62 @@ koibnal_tx_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg) spin_unlock_irqrestore(&conn->ibc_lock, flags); if (idle) - koibnal_tx_done (tx); + kibnal_tx_done (tx); if (e->status != IB_COMPLETION_STATUS_SUCCESS) { CERROR ("Tx completion to "LPX64" failed: %d\n", conn->ibc_peer->ibp_nid, e->status); - koibnal_close_conn (conn, -ENETDOWN); + kibnal_close_conn (conn, -ENETDOWN); } else { /* can I shovel some more sends out the door? */ - koibnal_check_sends(conn); + kibnal_check_sends(conn); } - koibnal_put_conn (conn); + kibnal_put_conn (conn); } void -koibnal_init_tx_msg (koib_tx_t *tx, int type, int body_nob) +kibnal_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg) +{ + if (kibnal_wreqid_is_rx(e->work_request_id)) + kibnal_rx_callback (e); + else + kibnal_tx_callback (e); +} + +void +kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob) { struct ib_gather_scatter *gl = &tx->tx_gl[tx->tx_nsp]; struct ib_send_param *sp = &tx->tx_sp[tx->tx_nsp]; int fence; - int nob = offsetof (koib_msg_t, oibm_u) + body_nob; + int nob = offsetof (kib_msg_t, ibm_u) + body_nob; LASSERT (tx->tx_nsp >= 0 && tx->tx_nsp < sizeof(tx->tx_sp)/sizeof(tx->tx_sp[0])); - LASSERT (nob <= OPENIBNAL_MSG_SIZE); + LASSERT (nob <= IBNAL_MSG_SIZE); - tx->tx_msg->oibm_magic = OPENIBNAL_MSG_MAGIC; - tx->tx_msg->oibm_version = OPENIBNAL_MSG_VERSION; - tx->tx_msg->oibm_type = type; -#if OPENIBNAL_CKSUM - tx->tx_msg->oibm_nob = nob; + tx->tx_msg->ibm_magic = IBNAL_MSG_MAGIC; + tx->tx_msg->ibm_version = IBNAL_MSG_VERSION; + tx->tx_msg->ibm_type = type; +#if IBNAL_CKSUM + tx->tx_msg->ibm_nob = nob; #endif /* Fence the message if it's bundled with an RDMA read */ fence = (tx->tx_nsp > 0) && - (type == OPENIBNAL_MSG_PUT_DONE); + (type == IBNAL_MSG_PUT_DONE); *gl = (struct ib_gather_scatter) { .address = tx->tx_vaddr, .length = nob, - .key = koibnal_data.koib_tx_pages->oibp_lkey, + .key = kibnal_data.kib_tx_pages->ibp_lkey, }; /* NB If this is an RDMA read, the completion message must wait for * the RDMA to complete. Sends wait for previous RDMA writes * anyway... */ *sp = (struct ib_send_param) { - .work_request_id = (__u64)((unsigned long)tx), + .work_request_id = kibnal_ptr2wreqid(tx, 0), .op = IB_OP_SEND, .gather_list = gl, .num_gather_entries = 1, @@ -979,26 +986,26 @@ koibnal_init_tx_msg (koib_tx_t *tx, int type, int body_nob) } void -koibnal_queue_tx (koib_tx_t *tx, koib_conn_t *conn) +kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn) { unsigned long flags; spin_lock_irqsave(&conn->ibc_lock, flags); - koibnal_queue_tx_locked (tx, conn); + kibnal_queue_tx_locked (tx, conn); spin_unlock_irqrestore(&conn->ibc_lock, flags); - koibnal_check_sends(conn); + kibnal_check_sends(conn); } void -koibnal_launch_tx (koib_tx_t *tx, ptl_nid_t nid) +kibnal_launch_tx (kib_tx_t *tx, ptl_nid_t nid) { unsigned long flags; - koib_peer_t *peer; - koib_conn_t *conn; - rwlock_t *g_lock = &koibnal_data.koib_global_lock; + kib_peer_t *peer; + kib_conn_t *conn; + rwlock_t *g_lock = &kibnal_data.kib_global_lock; /* If I get here, I've committed to send, so I complete the tx with * failure on any problems */ @@ -1008,15 +1015,15 @@ koibnal_launch_tx (koib_tx_t *tx, ptl_nid_t nid) read_lock (g_lock); - peer = koibnal_find_peer_locked (nid); + peer = kibnal_find_peer_locked (nid); if (peer == NULL) { read_unlock (g_lock); tx->tx_status = -EHOSTUNREACH; - koibnal_tx_done (tx); + kibnal_tx_done (tx); return; } - conn = koibnal_find_conn_locked (peer); + conn = kibnal_find_conn_locked (peer); if (conn != NULL) { CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n", conn, conn->ibc_state, conn->ibc_peer->ibp_nid, @@ -1024,7 +1031,7 @@ koibnal_launch_tx (koib_tx_t *tx, ptl_nid_t nid) atomic_inc (&conn->ibc_refcount); /* 1 ref for the tx */ read_unlock (g_lock); - koibnal_queue_tx (tx, conn); + kibnal_queue_tx (tx, conn); return; } @@ -1032,15 +1039,15 @@ koibnal_launch_tx (koib_tx_t *tx, ptl_nid_t nid) read_unlock (g_lock); write_lock_irqsave (g_lock, flags); - peer = koibnal_find_peer_locked (nid); + peer = kibnal_find_peer_locked (nid); if (peer == NULL) { write_unlock_irqrestore (g_lock, flags); tx->tx_status = -EHOSTUNREACH; - koibnal_tx_done (tx); + kibnal_tx_done (tx); return; } - conn = koibnal_find_conn_locked (peer); + conn = kibnal_find_conn_locked (peer); if (conn != NULL) { /* Connection exists; queue message on it */ CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n", @@ -1049,7 +1056,7 @@ koibnal_launch_tx (koib_tx_t *tx, ptl_nid_t nid) atomic_inc (&conn->ibc_refcount); /* 1 ref for the tx */ write_unlock_irqrestore (g_lock, flags); - koibnal_queue_tx (tx, conn); + kibnal_queue_tx (tx, conn); return; } @@ -1057,20 +1064,20 @@ koibnal_launch_tx (koib_tx_t *tx, ptl_nid_t nid) if (!time_after_eq(jiffies, peer->ibp_reconnect_time)) { write_unlock_irqrestore (g_lock, flags); tx->tx_status = -EHOSTUNREACH; - koibnal_tx_done (tx); + kibnal_tx_done (tx); return; } peer->ibp_connecting = 1; atomic_inc (&peer->ibp_refcount); /* extra ref for connd */ - spin_lock (&koibnal_data.koib_connd_lock); + spin_lock (&kibnal_data.kib_connd_lock); list_add_tail (&peer->ibp_connd_list, - &koibnal_data.koib_connd_peers); - wake_up (&koibnal_data.koib_connd_waitq); + &kibnal_data.kib_connd_peers); + wake_up (&kibnal_data.kib_connd_waitq); - spin_unlock (&koibnal_data.koib_connd_lock); + spin_unlock (&kibnal_data.kib_connd_lock); } /* A connection is being established; queue the message... */ @@ -1080,49 +1087,49 @@ koibnal_launch_tx (koib_tx_t *tx, ptl_nid_t nid) } ptl_err_t -koibnal_start_passive_rdma (int type, ptl_nid_t nid, +kibnal_start_passive_rdma (int type, ptl_nid_t nid, lib_msg_t *libmsg, ptl_hdr_t *hdr) { int nob = libmsg->md->length; - koib_tx_t *tx; - koib_msg_t *oibmsg; + kib_tx_t *tx; + kib_msg_t *ibmsg; int rc; int access; - LASSERT (type == OPENIBNAL_MSG_PUT_RDMA || - type == OPENIBNAL_MSG_GET_RDMA); + LASSERT (type == IBNAL_MSG_PUT_RDMA || + type == IBNAL_MSG_GET_RDMA); LASSERT (nob > 0); LASSERT (!in_interrupt()); /* Mapping could block */ - if (type == OPENIBNAL_MSG_PUT_RDMA) { + if (type == IBNAL_MSG_PUT_RDMA) { access = IB_ACCESS_REMOTE_READ; } else { access = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE; } - tx = koibnal_get_idle_tx (1); /* May block; caller is an app thread */ + tx = kibnal_get_idle_tx (1); /* May block; caller is an app thread */ LASSERT (tx != NULL); if ((libmsg->md->options & PTL_MD_KIOV) == 0) - rc = koibnal_map_iov (tx, access, - libmsg->md->md_niov, - libmsg->md->md_iov.iov, - 0, nob); + rc = kibnal_map_iov (tx, access, + libmsg->md->md_niov, + libmsg->md->md_iov.iov, + 0, nob); else - rc = koibnal_map_kiov (tx, access, - libmsg->md->md_niov, - libmsg->md->md_iov.kiov, - 0, nob); + rc = kibnal_map_kiov (tx, access, + libmsg->md->md_niov, + libmsg->md->md_iov.kiov, + 0, nob); if (rc != 0) { CERROR ("Can't map RDMA for "LPX64": %d\n", nid, rc); goto failed; } - if (type == OPENIBNAL_MSG_GET_RDMA) { + if (type == IBNAL_MSG_GET_RDMA) { /* reply gets finalized when tx completes */ - tx->tx_libmsg[1] = lib_create_reply_msg(&koibnal_lib, + tx->tx_libmsg[1] = lib_create_reply_msg(&kibnal_lib, nid, libmsg); if (tx->tx_libmsg[1] == NULL) { CERROR ("Can't create reply for GET -> "LPX64"\n", @@ -1134,15 +1141,15 @@ koibnal_start_passive_rdma (int type, ptl_nid_t nid, tx->tx_passive_rdma = 1; - oibmsg = tx->tx_msg; + ibmsg = tx->tx_msg; - oibmsg->oibm_u.rdma.oibrm_hdr = *hdr; - oibmsg->oibm_u.rdma.oibrm_cookie = tx->tx_passive_rdma_cookie; - oibmsg->oibm_u.rdma.oibrm_desc.rd_key = tx->tx_md.md_rkey; - oibmsg->oibm_u.rdma.oibrm_desc.rd_addr = tx->tx_md.md_addr; - oibmsg->oibm_u.rdma.oibrm_desc.rd_nob = nob; + ibmsg->ibm_u.rdma.ibrm_hdr = *hdr; + ibmsg->ibm_u.rdma.ibrm_cookie = tx->tx_passive_rdma_cookie; + ibmsg->ibm_u.rdma.ibrm_desc.rd_key = tx->tx_md.md_rkey; + ibmsg->ibm_u.rdma.ibrm_desc.rd_addr = tx->tx_md.md_addr; + ibmsg->ibm_u.rdma.ibrm_desc.rd_nob = nob; - koibnal_init_tx_msg (tx, type, sizeof (koib_rdma_msg_t)); + kibnal_init_tx_msg (tx, type, sizeof (kib_rdma_msg_t)); CDEBUG(D_NET, "Passive: %p cookie "LPX64", key %x, addr " LPX64", nob %d\n", @@ -1152,25 +1159,25 @@ koibnal_start_passive_rdma (int type, ptl_nid_t nid, /* libmsg gets finalized when tx completes. */ tx->tx_libmsg[0] = libmsg; - koibnal_launch_tx(tx, nid); + kibnal_launch_tx(tx, nid); return (PTL_OK); failed: tx->tx_status = rc; - koibnal_tx_done (tx); + kibnal_tx_done (tx); return (PTL_FAIL); } void -koibnal_start_active_rdma (int type, int status, - koib_rx_t *rx, lib_msg_t *libmsg, +kibnal_start_active_rdma (int type, int status, + kib_rx_t *rx, lib_msg_t *libmsg, unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, size_t offset, size_t nob) { - koib_msg_t *rxmsg = rx->rx_msg; - koib_msg_t *txmsg; - koib_tx_t *tx; + kib_msg_t *rxmsg = rx->rx_msg; + kib_msg_t *txmsg; + kib_tx_t *tx; int access; int rdma_op; int rc; @@ -1187,8 +1194,8 @@ koibnal_start_active_rdma (int type, int status, /* No data if we're completing with failure */ LASSERT (status == 0 || nob == 0); - LASSERT (type == OPENIBNAL_MSG_GET_DONE || - type == OPENIBNAL_MSG_PUT_DONE); + LASSERT (type == IBNAL_MSG_GET_DONE || + type == IBNAL_MSG_PUT_DONE); /* Flag I'm completing the RDMA. Even if I fail to send the * completion message, I will have tried my best so further @@ -1196,22 +1203,22 @@ koibnal_start_active_rdma (int type, int status, LASSERT (!rx->rx_rdma); rx->rx_rdma = 1; - if (type == OPENIBNAL_MSG_GET_DONE) { + if (type == IBNAL_MSG_GET_DONE) { access = 0; rdma_op = IB_OP_RDMA_WRITE; - LASSERT (rxmsg->oibm_type == OPENIBNAL_MSG_GET_RDMA); + LASSERT (rxmsg->ibm_type == IBNAL_MSG_GET_RDMA); } else { access = IB_ACCESS_LOCAL_WRITE; rdma_op = IB_OP_RDMA_READ; - LASSERT (rxmsg->oibm_type == OPENIBNAL_MSG_PUT_RDMA); + LASSERT (rxmsg->ibm_type == IBNAL_MSG_PUT_RDMA); } - tx = koibnal_get_idle_tx (0); /* Mustn't block */ + tx = kibnal_get_idle_tx (0); /* Mustn't block */ if (tx == NULL) { CERROR ("tx descs exhausted on RDMA from "LPX64 " completing locally with failure\n", - rx->rx_conn->ibc_peer->ibp_nid); - lib_finalize (&koibnal_lib, NULL, libmsg, PTL_NO_SPACE); + rx->rx_conn->ibc_peer->ibp_nid); + lib_finalize (&kibnal_lib, NULL, libmsg, PTL_NO_SPACE); return; } LASSERT (tx->tx_nsp == 0); @@ -1222,11 +1229,11 @@ koibnal_start_active_rdma (int type, int status, * message is matched) */ if (kiov != NULL) - rc = koibnal_map_kiov (tx, access, - niov, kiov, offset, nob); + rc = kibnal_map_kiov (tx, access, + niov, kiov, offset, nob); else - rc = koibnal_map_iov (tx, access, - niov, iov, offset, nob); + rc = kibnal_map_iov (tx, access, + niov, iov, offset, nob); if (rc != 0) { CERROR ("Can't map RDMA -> "LPX64": %d\n", @@ -1242,12 +1249,12 @@ koibnal_start_active_rdma (int type, int status, }; tx->tx_sp[0] = (struct ib_send_param) { - .work_request_id = (__u64)((unsigned long)tx), + .work_request_id = kibnal_ptr2wreqid(tx, 0), .op = rdma_op, .gather_list = &tx->tx_gl[0], .num_gather_entries = 1, - .remote_address = rxmsg->oibm_u.rdma.oibrm_desc.rd_addr, - .rkey = rxmsg->oibm_u.rdma.oibrm_desc.rd_key, + .remote_address = rxmsg->ibm_u.rdma.ibrm_desc.rd_addr, + .rkey = rxmsg->ibm_u.rdma.ibrm_desc.rd_key, .device_specific = NULL, .solicited_event = 0, .signaled = 1, @@ -1262,10 +1269,10 @@ koibnal_start_active_rdma (int type, int status, txmsg = tx->tx_msg; - txmsg->oibm_u.completion.oibcm_cookie = rxmsg->oibm_u.rdma.oibrm_cookie; - txmsg->oibm_u.completion.oibcm_status = status; + txmsg->ibm_u.completion.ibcm_cookie = rxmsg->ibm_u.rdma.ibrm_cookie; + txmsg->ibm_u.completion.ibcm_status = status; - koibnal_init_tx_msg(tx, type, sizeof (koib_completion_msg_t)); + kibnal_init_tx_msg(tx, type, sizeof (kib_completion_msg_t)); if (status == 0 && nob != 0) { LASSERT (tx->tx_nsp > 1); @@ -1277,7 +1284,7 @@ koibnal_start_active_rdma (int type, int status, LASSERT (tx->tx_nsp == 1); /* No RDMA: local completion happens now! */ CDEBUG(D_WARNING,"No data: immediate completion\n"); - lib_finalize (&koibnal_lib, NULL, libmsg, + lib_finalize (&kibnal_lib, NULL, libmsg, status == 0 ? PTL_OK : PTL_FAIL); } @@ -1288,11 +1295,11 @@ koibnal_start_active_rdma (int type, int status, atomic_read (&rx->rx_conn->ibc_refcount)); atomic_inc (&rx->rx_conn->ibc_refcount); /* ...and queue it up */ - koibnal_queue_tx(tx, rx->rx_conn); + kibnal_queue_tx(tx, rx->rx_conn); } ptl_err_t -koibnal_sendmsg(lib_nal_t *nal, +kibnal_sendmsg(lib_nal_t *nal, void *private, lib_msg_t *libmsg, ptl_hdr_t *hdr, @@ -1305,8 +1312,8 @@ koibnal_sendmsg(lib_nal_t *nal, size_t payload_offset, size_t payload_nob) { - koib_msg_t *oibmsg; - koib_tx_t *tx; + kib_msg_t *ibmsg; + kib_tx_t *tx; int nob; /* NB 'private' is different depending on what we're sending.... */ @@ -1329,27 +1336,27 @@ koibnal_sendmsg(lib_nal_t *nal, case PTL_MSG_REPLY: { /* reply's 'private' is the incoming receive */ - koib_rx_t *rx = private; + kib_rx_t *rx = private; /* RDMA reply expected? */ - if (rx->rx_msg->oibm_type == OPENIBNAL_MSG_GET_RDMA) { - koibnal_start_active_rdma(OPENIBNAL_MSG_GET_DONE, 0, - rx, libmsg, payload_niov, - payload_iov, payload_kiov, - payload_offset, payload_nob); + if (rx->rx_msg->ibm_type == IBNAL_MSG_GET_RDMA) { + kibnal_start_active_rdma(IBNAL_MSG_GET_DONE, 0, + rx, libmsg, payload_niov, + payload_iov, payload_kiov, + payload_offset, payload_nob); return (PTL_OK); } /* Incoming message consistent with immediate reply? */ - if (rx->rx_msg->oibm_type != OPENIBNAL_MSG_IMMEDIATE) { + if (rx->rx_msg->ibm_type != IBNAL_MSG_IMMEDIATE) { CERROR ("REPLY to "LPX64" bad opbm type %d!!!\n", - nid, rx->rx_msg->oibm_type); + nid, rx->rx_msg->ibm_type); return (PTL_FAIL); } /* Will it fit in a message? */ - nob = offsetof(koib_msg_t, oibm_u.immediate.oibim_payload[payload_nob]); - if (nob >= OPENIBNAL_MSG_SIZE) { + nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]); + if (nob >= IBNAL_MSG_SIZE) { CERROR("REPLY for "LPX64" too big (RDMA not requested): %d\n", nid, payload_nob); return (PTL_FAIL); @@ -1359,10 +1366,10 @@ koibnal_sendmsg(lib_nal_t *nal, case PTL_MSG_GET: /* might the REPLY message be big enough to need RDMA? */ - nob = offsetof(koib_msg_t, oibm_u.immediate.oibim_payload[libmsg->md->length]); - if (nob > OPENIBNAL_MSG_SIZE) - return (koibnal_start_passive_rdma(OPENIBNAL_MSG_GET_RDMA, - nid, libmsg, hdr)); + nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[libmsg->md->length]); + if (nob > IBNAL_MSG_SIZE) + return (kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, + nid, libmsg, hdr)); break; case PTL_MSG_ACK: @@ -1371,181 +1378,181 @@ koibnal_sendmsg(lib_nal_t *nal, case PTL_MSG_PUT: /* Is the payload big enough to need RDMA? */ - nob = offsetof(koib_msg_t, oibm_u.immediate.oibim_payload[payload_nob]); - if (nob > OPENIBNAL_MSG_SIZE) - return (koibnal_start_passive_rdma(OPENIBNAL_MSG_PUT_RDMA, - nid, libmsg, hdr)); + nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]); + if (nob > IBNAL_MSG_SIZE) + return (kibnal_start_passive_rdma(IBNAL_MSG_PUT_RDMA, + nid, libmsg, hdr)); break; } - tx = koibnal_get_idle_tx(!(type == PTL_MSG_ACK || - type == PTL_MSG_REPLY || - in_interrupt())); + tx = kibnal_get_idle_tx(!(type == PTL_MSG_ACK || + type == PTL_MSG_REPLY || + in_interrupt())); if (tx == NULL) { CERROR ("Can't send %d to "LPX64": tx descs exhausted%s\n", type, nid, in_interrupt() ? " (intr)" : ""); return (PTL_NO_SPACE); } - oibmsg = tx->tx_msg; - oibmsg->oibm_u.immediate.oibim_hdr = *hdr; + ibmsg = tx->tx_msg; + ibmsg->ibm_u.immediate.ibim_hdr = *hdr; if (payload_nob > 0) { if (payload_kiov != NULL) - lib_copy_kiov2buf(oibmsg->oibm_u.immediate.oibim_payload, + lib_copy_kiov2buf(ibmsg->ibm_u.immediate.ibim_payload, payload_niov, payload_kiov, payload_offset, payload_nob); else - lib_copy_iov2buf(oibmsg->oibm_u.immediate.oibim_payload, + lib_copy_iov2buf(ibmsg->ibm_u.immediate.ibim_payload, payload_niov, payload_iov, payload_offset, payload_nob); } - koibnal_init_tx_msg (tx, OPENIBNAL_MSG_IMMEDIATE, - offsetof(koib_immediate_msg_t, - oibim_payload[payload_nob])); + kibnal_init_tx_msg (tx, IBNAL_MSG_IMMEDIATE, + offsetof(kib_immediate_msg_t, + ibim_payload[payload_nob])); /* libmsg gets finalized when tx completes */ tx->tx_libmsg[0] = libmsg; - koibnal_launch_tx(tx, nid); + kibnal_launch_tx(tx, nid); return (PTL_OK); } ptl_err_t -koibnal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie, +kibnal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int payload_niov, struct iovec *payload_iov, size_t payload_offset, size_t payload_len) { - return (koibnal_sendmsg(nal, private, cookie, - hdr, type, nid, pid, - payload_niov, payload_iov, NULL, - payload_offset, payload_len)); + return (kibnal_sendmsg(nal, private, cookie, + hdr, type, nid, pid, + payload_niov, payload_iov, NULL, + payload_offset, payload_len)); } ptl_err_t -koibnal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, +kibnal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int payload_niov, ptl_kiov_t *payload_kiov, size_t payload_offset, size_t payload_len) { - return (koibnal_sendmsg(nal, private, cookie, - hdr, type, nid, pid, - payload_niov, NULL, payload_kiov, - payload_offset, payload_len)); + return (kibnal_sendmsg(nal, private, cookie, + hdr, type, nid, pid, + payload_niov, NULL, payload_kiov, + payload_offset, payload_len)); } ptl_err_t -koibnal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg, +kibnal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg, unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, size_t offset, size_t mlen, size_t rlen) { - koib_rx_t *rx = private; - koib_msg_t *rxmsg = rx->rx_msg; - int msg_nob; + kib_rx_t *rx = private; + kib_msg_t *rxmsg = rx->rx_msg; + int msg_nob; LASSERT (mlen <= rlen); LASSERT (!in_interrupt ()); /* Either all pages or all vaddrs */ LASSERT (!(kiov != NULL && iov != NULL)); - switch (rxmsg->oibm_type) { + switch (rxmsg->ibm_type) { default: LBUG(); return (PTL_FAIL); - case OPENIBNAL_MSG_IMMEDIATE: - msg_nob = offsetof(koib_msg_t, oibm_u.immediate.oibim_payload[rlen]); - if (msg_nob > OPENIBNAL_MSG_SIZE) { + case IBNAL_MSG_IMMEDIATE: + msg_nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]); + if (msg_nob > IBNAL_MSG_SIZE) { CERROR ("Immediate message from "LPX64" too big: %d\n", - rxmsg->oibm_u.immediate.oibim_hdr.src_nid, rlen); + rxmsg->ibm_u.immediate.ibim_hdr.src_nid, rlen); return (PTL_FAIL); } if (kiov != NULL) lib_copy_buf2kiov(niov, kiov, offset, - rxmsg->oibm_u.immediate.oibim_payload, + rxmsg->ibm_u.immediate.ibim_payload, mlen); else lib_copy_buf2iov(niov, iov, offset, - rxmsg->oibm_u.immediate.oibim_payload, + rxmsg->ibm_u.immediate.ibim_payload, mlen); lib_finalize (nal, NULL, libmsg, PTL_OK); return (PTL_OK); - case OPENIBNAL_MSG_GET_RDMA: + case IBNAL_MSG_GET_RDMA: /* We get called here just to discard any junk after the * GET hdr. */ LASSERT (libmsg == NULL); lib_finalize (nal, NULL, libmsg, PTL_OK); return (PTL_OK); - case OPENIBNAL_MSG_PUT_RDMA: - koibnal_start_active_rdma (OPENIBNAL_MSG_PUT_DONE, 0, - rx, libmsg, - niov, iov, kiov, offset, mlen); + case IBNAL_MSG_PUT_RDMA: + kibnal_start_active_rdma (IBNAL_MSG_PUT_DONE, 0, + rx, libmsg, + niov, iov, kiov, offset, mlen); return (PTL_OK); } } ptl_err_t -koibnal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, +kibnal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, unsigned int niov, struct iovec *iov, size_t offset, size_t mlen, size_t rlen) { - return (koibnal_recvmsg (nal, private, msg, niov, iov, NULL, - offset, mlen, rlen)); + return (kibnal_recvmsg (nal, private, msg, niov, iov, NULL, + offset, mlen, rlen)); } ptl_err_t -koibnal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg, +kibnal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg, unsigned int niov, ptl_kiov_t *kiov, size_t offset, size_t mlen, size_t rlen) { - return (koibnal_recvmsg (nal, private, msg, niov, NULL, kiov, - offset, mlen, rlen)); + return (kibnal_recvmsg (nal, private, msg, niov, NULL, kiov, + offset, mlen, rlen)); } int -koibnal_thread_start (int (*fn)(void *arg), void *arg) +kibnal_thread_start (int (*fn)(void *arg), void *arg) { long pid = kernel_thread (fn, arg, 0); if (pid < 0) return ((int)pid); - atomic_inc (&koibnal_data.koib_nthreads); + atomic_inc (&kibnal_data.kib_nthreads); return (0); } void -koibnal_thread_fini (void) +kibnal_thread_fini (void) { - atomic_dec (&koibnal_data.koib_nthreads); + atomic_dec (&kibnal_data.kib_nthreads); } void -koibnal_close_conn_locked (koib_conn_t *conn, int error) +kibnal_close_conn_locked (kib_conn_t *conn, int error) { /* This just does the immmediate housekeeping, and schedules the * connection for the connd to finish off. - * Caller holds koib_global_lock exclusively in irq context */ - koib_peer_t *peer = conn->ibc_peer; + * Caller holds kib_global_lock exclusively in irq context */ + kib_peer_t *peer = conn->ibc_peer; CDEBUG (error == 0 ? D_NET : D_ERROR, "closing conn to "LPX64": error %d\n", peer->ibp_nid, error); - LASSERT (conn->ibc_state == OPENIBNAL_CONN_ESTABLISHED || - conn->ibc_state == OPENIBNAL_CONN_CONNECTING); + LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED || + conn->ibc_state == IBNAL_CONN_CONNECTING); - if (conn->ibc_state == OPENIBNAL_CONN_ESTABLISHED) { - /* koib_connd_conns takes ibc_list's ref */ + if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) { + /* kib_connd_conns takes ibc_list's ref */ list_del (&conn->ibc_list); } else { - /* new ref for koib_connd_conns */ + /* new ref for kib_connd_conns */ CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n", conn, conn->ibc_state, conn->ibc_peer->ibp_nid, atomic_read (&conn->ibc_refcount)); @@ -1555,57 +1562,57 @@ koibnal_close_conn_locked (koib_conn_t *conn, int error) if (list_empty (&peer->ibp_conns) && peer->ibp_persistence == 0) { /* Non-persistent peer with no more conns... */ - koibnal_unlink_peer_locked (peer); + kibnal_unlink_peer_locked (peer); } - conn->ibc_state = OPENIBNAL_CONN_DEATHROW; + conn->ibc_state = IBNAL_CONN_DEATHROW; /* Schedule conn for closing/destruction */ - spin_lock (&koibnal_data.koib_connd_lock); + spin_lock (&kibnal_data.kib_connd_lock); - list_add_tail (&conn->ibc_list, &koibnal_data.koib_connd_conns); - wake_up (&koibnal_data.koib_connd_waitq); + list_add_tail (&conn->ibc_list, &kibnal_data.kib_connd_conns); + wake_up (&kibnal_data.kib_connd_waitq); - spin_unlock (&koibnal_data.koib_connd_lock); + spin_unlock (&kibnal_data.kib_connd_lock); } int -koibnal_close_conn (koib_conn_t *conn, int why) +kibnal_close_conn (kib_conn_t *conn, int why) { unsigned long flags; int count = 0; - write_lock_irqsave (&koibnal_data.koib_global_lock, flags); + write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - LASSERT (conn->ibc_state >= OPENIBNAL_CONN_CONNECTING); + LASSERT (conn->ibc_state >= IBNAL_CONN_CONNECTING); - if (conn->ibc_state <= OPENIBNAL_CONN_ESTABLISHED) { + if (conn->ibc_state <= IBNAL_CONN_ESTABLISHED) { count = 1; - koibnal_close_conn_locked (conn, why); + kibnal_close_conn_locked (conn, why); } - write_unlock_irqrestore (&koibnal_data.koib_global_lock, flags); + write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); return (count); } void -koibnal_peer_connect_failed (koib_peer_t *peer, int active, int rc) +kibnal_peer_connect_failed (kib_peer_t *peer, int active, int rc) { LIST_HEAD (zombies); - koib_tx_t *tx; + kib_tx_t *tx; unsigned long flags; LASSERT (rc != 0); - LASSERT (peer->ibp_reconnect_interval >= OPENIBNAL_MIN_RECONNECT_INTERVAL); + LASSERT (peer->ibp_reconnect_interval >= IBNAL_MIN_RECONNECT_INTERVAL); - write_lock_irqsave (&koibnal_data.koib_global_lock, flags); + write_lock_irqsave (&kibnal_data.kib_global_lock, flags); LASSERT (peer->ibp_connecting != 0); peer->ibp_connecting--; if (peer->ibp_connecting != 0) { /* another connection attempt under way (loopback?)... */ - write_unlock_irqrestore (&koibnal_data.koib_global_lock, flags); + write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); return; } @@ -1614,50 +1621,50 @@ koibnal_peer_connect_failed (koib_peer_t *peer, int active, int rc) peer->ibp_reconnect_time = jiffies + peer->ibp_reconnect_interval; /* Increase reconnection interval */ peer->ibp_reconnect_interval = MIN (peer->ibp_reconnect_interval * 2, - OPENIBNAL_MAX_RECONNECT_INTERVAL); + IBNAL_MAX_RECONNECT_INTERVAL); /* Take peer's blocked blocked transmits; I'll complete * them with error */ while (!list_empty (&peer->ibp_tx_queue)) { tx = list_entry (peer->ibp_tx_queue.next, - koib_tx_t, tx_list); + kib_tx_t, tx_list); list_del (&tx->tx_list); list_add_tail (&tx->tx_list, &zombies); } - if (koibnal_peer_active(peer) && + if (kibnal_peer_active(peer) && (peer->ibp_persistence == 0)) { /* failed connection attempt on non-persistent peer */ - koibnal_unlink_peer_locked (peer); + kibnal_unlink_peer_locked (peer); } } else { /* Can't have blocked transmits if there are connections */ LASSERT (list_empty(&peer->ibp_tx_queue)); } - write_unlock_irqrestore (&koibnal_data.koib_global_lock, flags); + write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); if (!list_empty (&zombies)) CERROR ("Deleting messages for "LPX64": connection failed\n", peer->ibp_nid); while (!list_empty (&zombies)) { - tx = list_entry (zombies.next, koib_tx_t, tx_list); + tx = list_entry (zombies.next, kib_tx_t, tx_list); list_del (&tx->tx_list); /* complete now */ tx->tx_status = -EHOSTUNREACH; - koibnal_tx_done (tx); + kibnal_tx_done (tx); } } void -koibnal_connreq_done (koib_conn_t *conn, int active, int status) +kibnal_connreq_done (kib_conn_t *conn, int active, int status) { int state = conn->ibc_state; - koib_peer_t *peer = conn->ibc_peer; - koib_tx_t *tx; + kib_peer_t *peer = conn->ibc_peer; + kib_tx_t *tx; unsigned long flags; int rc; int i; @@ -1669,31 +1676,31 @@ koibnal_connreq_done (koib_conn_t *conn, int active, int status) conn->ibc_connreq = NULL; } - if (state == OPENIBNAL_CONN_CONNECTING) { + if (state == IBNAL_CONN_CONNECTING) { /* Install common (active/passive) callback for * disconnect/idle notification if I got as far as getting * a CM comm_id */ rc = tsIbCmCallbackModify(conn->ibc_comm_id, - koibnal_conn_callback, conn); + kibnal_conn_callback, conn); LASSERT (rc == 0); } - write_lock_irqsave (&koibnal_data.koib_global_lock, flags); + write_lock_irqsave (&kibnal_data.kib_global_lock, flags); LASSERT (peer->ibp_connecting != 0); if (status == 0) { /* connection established... */ - LASSERT (state == OPENIBNAL_CONN_CONNECTING); - conn->ibc_state = OPENIBNAL_CONN_ESTABLISHED; + LASSERT (state == IBNAL_CONN_CONNECTING); + conn->ibc_state = IBNAL_CONN_ESTABLISHED; - if (!koibnal_peer_active(peer)) { + if (!kibnal_peer_active(peer)) { /* ...but peer deleted meantime */ status = -ECONNABORTED; } } else { - LASSERT (state == OPENIBNAL_CONN_INIT_QP || - state == OPENIBNAL_CONN_CONNECTING); + LASSERT (state == IBNAL_CONN_INIT_QP || + state == IBNAL_CONN_CONNECTING); } if (status == 0) { @@ -1710,14 +1717,14 @@ koibnal_connreq_done (koib_conn_t *conn, int active, int status) list_add (&conn->ibc_list, &peer->ibp_conns); /* reset reconnect interval for next attempt */ - peer->ibp_reconnect_interval = OPENIBNAL_MIN_RECONNECT_INTERVAL; + peer->ibp_reconnect_interval = IBNAL_MIN_RECONNECT_INTERVAL; /* post blocked sends to the new connection */ spin_lock (&conn->ibc_lock); while (!list_empty (&peer->ibp_tx_queue)) { tx = list_entry (peer->ibp_tx_queue.next, - koib_tx_t, tx_list); + kib_tx_t, tx_list); list_del (&tx->tx_list); @@ -1726,19 +1733,19 @@ koibnal_connreq_done (koib_conn_t *conn, int active, int status) conn, conn->ibc_state, conn->ibc_peer->ibp_nid, atomic_read (&conn->ibc_refcount)); atomic_inc (&conn->ibc_refcount); - koibnal_queue_tx_locked (tx, conn); + kibnal_queue_tx_locked (tx, conn); } spin_unlock (&conn->ibc_lock); /* Nuke any dangling conns from a different peer instance... */ - koibnal_close_stale_conns_locked (conn->ibc_peer, - conn->ibc_incarnation); + kibnal_close_stale_conns_locked (conn->ibc_peer, + conn->ibc_incarnation); - write_unlock_irqrestore (&koibnal_data.koib_global_lock, flags); + write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); /* queue up all the receives */ - for (i = 0; i < OPENIBNAL_RX_MSGS; i++) { + for (i = 0; i < IBNAL_RX_MSGS; i++) { /* +1 ref for rx desc */ CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n", conn, conn->ibc_state, conn->ibc_peer->ibp_nid, @@ -1749,71 +1756,71 @@ koibnal_connreq_done (koib_conn_t *conn, int active, int status) i, &conn->ibc_rxs[i], conn->ibc_rxs[i].rx_msg, conn->ibc_rxs[i].rx_vaddr); - koibnal_post_rx (&conn->ibc_rxs[i], 0); + kibnal_post_rx (&conn->ibc_rxs[i], 0); } - koibnal_check_sends (conn); + kibnal_check_sends (conn); return; } /* connection failed */ - if (state == OPENIBNAL_CONN_CONNECTING) { + if (state == IBNAL_CONN_CONNECTING) { /* schedule for connd to close */ - koibnal_close_conn_locked (conn, status); + kibnal_close_conn_locked (conn, status); } else { /* Don't have a CM comm_id; just wait for refs to drain */ - conn->ibc_state = OPENIBNAL_CONN_ZOMBIE; + conn->ibc_state = IBNAL_CONN_ZOMBIE; } - write_unlock_irqrestore (&koibnal_data.koib_global_lock, flags); + write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - koibnal_peer_connect_failed (conn->ibc_peer, active, status); + kibnal_peer_connect_failed (conn->ibc_peer, active, status); - if (state != OPENIBNAL_CONN_CONNECTING) { + if (state != IBNAL_CONN_CONNECTING) { /* drop caller's ref if we're not waiting for the * IB_CM_IDLE callback */ - koibnal_put_conn (conn); + kibnal_put_conn (conn); } } int -koibnal_accept (koib_conn_t **connp, tTS_IB_CM_COMM_ID cid, +kibnal_accept (kib_conn_t **connp, tTS_IB_CM_COMM_ID cid, ptl_nid_t nid, __u64 incarnation, int queue_depth) { - koib_conn_t *conn = koibnal_create_conn(); - koib_peer_t *peer; - koib_peer_t *peer2; + kib_conn_t *conn = kibnal_create_conn(); + kib_peer_t *peer; + kib_peer_t *peer2; unsigned long flags; if (conn == NULL) return (-ENOMEM); - if (queue_depth != OPENIBNAL_MSG_QUEUE_SIZE) { + if (queue_depth != IBNAL_MSG_QUEUE_SIZE) { CERROR("Can't accept "LPX64": bad queue depth %d (%d expected)\n", - nid, queue_depth, OPENIBNAL_MSG_QUEUE_SIZE); + nid, queue_depth, IBNAL_MSG_QUEUE_SIZE); return (-EPROTO); } /* assume 'nid' is a new peer */ - peer = koibnal_create_peer (nid); + peer = kibnal_create_peer (nid); if (peer == NULL) { CDEBUG(D_NET, "--conn[%p] state %d -> "LPX64" (%d)\n", conn, conn->ibc_state, conn->ibc_peer->ibp_nid, atomic_read (&conn->ibc_refcount)); atomic_dec (&conn->ibc_refcount); - koibnal_destroy_conn(conn); + kibnal_destroy_conn(conn); return (-ENOMEM); } - write_lock_irqsave (&koibnal_data.koib_global_lock, flags); + write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - peer2 = koibnal_find_peer_locked(nid); + peer2 = kibnal_find_peer_locked(nid); if (peer2 == NULL) { /* peer table takes my ref on peer */ list_add_tail (&peer->ibp_list, - koibnal_nid2peerlist(nid)); + kibnal_nid2peerlist(nid)); } else { - koibnal_put_peer (peer); + kibnal_put_peer (peer); peer = peer2; } @@ -1821,20 +1828,20 @@ koibnal_accept (koib_conn_t **connp, tTS_IB_CM_COMM_ID cid, atomic_inc (&peer->ibp_refcount); peer->ibp_connecting++; - write_unlock_irqrestore (&koibnal_data.koib_global_lock, flags); + write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); conn->ibc_peer = peer; - conn->ibc_state = OPENIBNAL_CONN_CONNECTING; + conn->ibc_state = IBNAL_CONN_CONNECTING; conn->ibc_comm_id = cid; conn->ibc_incarnation = incarnation; - conn->ibc_credits = OPENIBNAL_MSG_QUEUE_SIZE; + conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE; *connp = conn; return (0); } tTS_IB_CM_CALLBACK_RETURN -koibnal_idle_conn_callback (tTS_IB_CM_EVENT event, +kibnal_idle_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, void *param, void *arg) @@ -1846,13 +1853,19 @@ koibnal_idle_conn_callback (tTS_IB_CM_EVENT event, } tTS_IB_CM_CALLBACK_RETURN -koibnal_conn_callback (tTS_IB_CM_EVENT event, +kibnal_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, void *param, void *arg) { - koib_conn_t *conn = arg; - int rc; + kib_conn_t *conn = arg; + LIST_HEAD (zombies); + struct list_head *tmp; + struct list_head *nxt; + kib_tx_t *tx; + unsigned long flags; + int done; + int rc; /* Established Connection Notifier */ @@ -1860,24 +1873,72 @@ koibnal_conn_callback (tTS_IB_CM_EVENT event, default: CERROR("Connection %p -> "LPX64" ERROR %d\n", conn, conn->ibc_peer->ibp_nid, event); - koibnal_close_conn (conn, -ECONNABORTED); + kibnal_close_conn (conn, -ECONNABORTED); break; case TS_IB_CM_DISCONNECTED: CDEBUG(D_WARNING, "Connection %p -> "LPX64" DISCONNECTED.\n", conn, conn->ibc_peer->ibp_nid); - koibnal_close_conn (conn, 0); + kibnal_close_conn (conn, 0); break; case TS_IB_CM_IDLE: CDEBUG(D_NET, "Connection %p -> "LPX64" IDLE.\n", conn, conn->ibc_peer->ibp_nid); - koibnal_put_conn (conn); /* Lose CM's ref */ + kibnal_put_conn (conn); /* Lose CM's ref */ /* LASSERT (no further callbacks) */ rc = tsIbCmCallbackModify(cid, - koibnal_idle_conn_callback, conn); + kibnal_idle_conn_callback, conn); LASSERT (rc == 0); + + /* NB we wait until the connection has closed before + * completing outstanding passive RDMAs so we can be sure + * the network can't touch the mapped memory any more. */ + + spin_lock_irqsave (&conn->ibc_lock, flags); + + /* grab passive RDMAs not waiting for the tx callback */ + list_for_each_safe (tmp, nxt, &conn->ibc_active_txs) { + tx = list_entry (tmp, kib_tx_t, tx_list); + + LASSERT (tx->tx_passive_rdma || + !tx->tx_passive_rdma_wait); + + LASSERT (tx->tx_passive_rdma_wait || + tx->tx_sending != 0); + + /* still waiting for tx callback? */ + if (!tx->tx_passive_rdma_wait) + continue; + + tx->tx_status = -ECONNABORTED; + tx->tx_passive_rdma_wait = 0; + done = (tx->tx_sending == 0); + + if (!done) + continue; + + list_del (&tx->tx_list); + list_add (&tx->tx_list, &zombies); + } + + /* grab all blocked transmits */ + list_for_each_safe (tmp, nxt, &conn->ibc_tx_queue) { + tx = list_entry (tmp, kib_tx_t, tx_list); + + list_del (&tx->tx_list); + list_add (&tx->tx_list, &zombies); + } + + spin_unlock_irqrestore (&conn->ibc_lock, flags); + + while (!list_empty(&zombies)) { + tx = list_entry (zombies.next, kib_tx_t, tx_list); + + list_del(&tx->tx_list); + kibnal_tx_done (tx); + } break; } @@ -1885,12 +1946,12 @@ koibnal_conn_callback (tTS_IB_CM_EVENT event, } tTS_IB_CM_CALLBACK_RETURN -koibnal_passive_conn_callback (tTS_IB_CM_EVENT event, +kibnal_passive_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, void *param, void *arg) { - koib_conn_t *conn = arg; + kib_conn_t *conn = arg; int rc; switch (event) { @@ -1903,12 +1964,12 @@ koibnal_passive_conn_callback (tTS_IB_CM_EVENT event, CERROR ("Unexpected event %p -> "LPX64": %d\n", conn, conn->ibc_peer->ibp_nid, event); - koibnal_connreq_done (conn, 0, -ECONNABORTED); + kibnal_connreq_done (conn, 0, -ECONNABORTED); break; case TS_IB_CM_REQ_RECEIVED: { struct ib_cm_req_received_param *req = param; - koib_wire_connreq_t *wcr = req->remote_private_data; + kib_wire_connreq_t *wcr = req->remote_private_data; LASSERT (conn == NULL); @@ -1920,23 +1981,23 @@ koibnal_passive_conn_callback (tTS_IB_CM_EVENT event, return TS_IB_CM_CALLBACK_ABORT; } - if (wcr->wcr_magic != cpu_to_le32(OPENIBNAL_MSG_MAGIC)) { + if (wcr->wcr_magic != cpu_to_le32(IBNAL_MSG_MAGIC)) { CERROR ("Can't accept LID %04x: bad magic %08x\n", req->dlid, le32_to_cpu(wcr->wcr_magic)); return TS_IB_CM_CALLBACK_ABORT; } - if (wcr->wcr_version != cpu_to_le16(OPENIBNAL_MSG_VERSION)) { + if (wcr->wcr_version != cpu_to_le16(IBNAL_MSG_VERSION)) { CERROR ("Can't accept LID %04x: bad version %d\n", req->dlid, le16_to_cpu(wcr->wcr_magic)); return TS_IB_CM_CALLBACK_ABORT; } - rc = koibnal_accept(&conn, - cid, - le64_to_cpu(wcr->wcr_nid), - le64_to_cpu(wcr->wcr_incarnation), - le16_to_cpu(wcr->wcr_queue_depth)); + rc = kibnal_accept(&conn, + cid, + le64_to_cpu(wcr->wcr_nid), + le64_to_cpu(wcr->wcr_incarnation), + le16_to_cpu(wcr->wcr_queue_depth)); if (rc != 0) { CERROR ("Can't accept "LPX64": %d\n", le64_to_cpu(wcr->wcr_nid), rc); @@ -1945,23 +2006,23 @@ koibnal_passive_conn_callback (tTS_IB_CM_EVENT event, /* update 'arg' for next callback */ rc = tsIbCmCallbackModify(cid, - koibnal_passive_conn_callback, conn); + kibnal_passive_conn_callback, conn); LASSERT (rc == 0); req->accept_param.qp = conn->ibc_qp; - *((koib_wire_connreq_t *)req->accept_param.reply_private_data) - = (koib_wire_connreq_t) { - .wcr_magic = cpu_to_le32(OPENIBNAL_MSG_MAGIC), - .wcr_version = cpu_to_le16(OPENIBNAL_MSG_VERSION), - .wcr_queue_depth = cpu_to_le32(OPENIBNAL_MSG_QUEUE_SIZE), - .wcr_nid = cpu_to_le64(koibnal_data.koib_nid), - .wcr_incarnation = cpu_to_le64(koibnal_data.koib_incarnation), + *((kib_wire_connreq_t *)req->accept_param.reply_private_data) + = (kib_wire_connreq_t) { + .wcr_magic = cpu_to_le32(IBNAL_MSG_MAGIC), + .wcr_version = cpu_to_le16(IBNAL_MSG_VERSION), + .wcr_queue_depth = cpu_to_le32(IBNAL_MSG_QUEUE_SIZE), + .wcr_nid = cpu_to_le64(kibnal_data.kib_nid), + .wcr_incarnation = cpu_to_le64(kibnal_data.kib_incarnation), }; - req->accept_param.reply_private_data_len = sizeof(koib_wire_connreq_t); - req->accept_param.responder_resources = OPENIBNAL_RESPONDER_RESOURCES; - req->accept_param.initiator_depth = OPENIBNAL_RESPONDER_RESOURCES; - req->accept_param.rnr_retry_count = OPENIBNAL_RNR_RETRY; - req->accept_param.flow_control = OPENIBNAL_FLOW_CONTROL; + req->accept_param.reply_private_data_len = sizeof(kib_wire_connreq_t); + req->accept_param.responder_resources = IBNAL_RESPONDER_RESOURCES; + req->accept_param.initiator_depth = IBNAL_RESPONDER_RESOURCES; + req->accept_param.rnr_retry_count = IBNAL_RNR_RETRY; + req->accept_param.flow_control = IBNAL_FLOW_CONTROL; CDEBUG(D_NET, "Proceeding\n"); break; @@ -1972,60 +2033,60 @@ koibnal_passive_conn_callback (tTS_IB_CM_EVENT event, CDEBUG(D_WARNING, "Connection %p -> "LPX64" ESTABLISHED.\n", conn, conn->ibc_peer->ibp_nid); - koibnal_connreq_done (conn, 0, 0); + kibnal_connreq_done (conn, 0, 0); break; } - /* NB if the connreq is done, we switch to koibnal_conn_callback */ + /* NB if the connreq is done, we switch to kibnal_conn_callback */ return TS_IB_CM_CALLBACK_PROCEED; } tTS_IB_CM_CALLBACK_RETURN -koibnal_active_conn_callback (tTS_IB_CM_EVENT event, +kibnal_active_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, void *param, void *arg) { - koib_conn_t *conn = arg; + kib_conn_t *conn = arg; switch (event) { case TS_IB_CM_REP_RECEIVED: { struct ib_cm_rep_received_param *rep = param; - koib_wire_connreq_t *wcr = rep->remote_private_data; + kib_wire_connreq_t *wcr = rep->remote_private_data; if (rep->remote_private_data_len < sizeof (*wcr)) { CERROR ("Short reply from "LPX64": %d\n", conn->ibc_peer->ibp_nid, rep->remote_private_data_len); - koibnal_connreq_done (conn, 1, -EPROTO); + kibnal_connreq_done (conn, 1, -EPROTO); break; } - if (wcr->wcr_magic != cpu_to_le32(OPENIBNAL_MSG_MAGIC)) { + if (wcr->wcr_magic != cpu_to_le32(IBNAL_MSG_MAGIC)) { CERROR ("Can't connect "LPX64": bad magic %08x\n", conn->ibc_peer->ibp_nid, le32_to_cpu(wcr->wcr_magic)); - koibnal_connreq_done (conn, 1, -EPROTO); + kibnal_connreq_done (conn, 1, -EPROTO); break; } - if (wcr->wcr_version != cpu_to_le16(OPENIBNAL_MSG_VERSION)) { + if (wcr->wcr_version != cpu_to_le16(IBNAL_MSG_VERSION)) { CERROR ("Can't connect "LPX64": bad version %d\n", conn->ibc_peer->ibp_nid, le16_to_cpu(wcr->wcr_magic)); - koibnal_connreq_done (conn, 1, -EPROTO); + kibnal_connreq_done (conn, 1, -EPROTO); break; } - if (wcr->wcr_queue_depth != cpu_to_le16(OPENIBNAL_MSG_QUEUE_SIZE)) { + if (wcr->wcr_queue_depth != cpu_to_le16(IBNAL_MSG_QUEUE_SIZE)) { CERROR ("Can't connect "LPX64": bad queue depth %d\n", conn->ibc_peer->ibp_nid, le16_to_cpu(wcr->wcr_queue_depth)); - koibnal_connreq_done (conn, 1, -EPROTO); + kibnal_connreq_done (conn, 1, -EPROTO); break; } if (le64_to_cpu(wcr->wcr_nid) != conn->ibc_peer->ibp_nid) { CERROR ("Unexpected NID "LPX64" from "LPX64"\n", le64_to_cpu(wcr->wcr_nid), conn->ibc_peer->ibp_nid); - koibnal_connreq_done (conn, 1, -EPROTO); + kibnal_connreq_done (conn, 1, -EPROTO); break; } @@ -2033,7 +2094,7 @@ koibnal_active_conn_callback (tTS_IB_CM_EVENT event, conn, conn->ibc_peer->ibp_nid); conn->ibc_incarnation = le64_to_cpu(wcr->wcr_incarnation); - conn->ibc_credits = OPENIBNAL_MSG_QUEUE_SIZE; + conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE; break; } @@ -2041,86 +2102,86 @@ koibnal_active_conn_callback (tTS_IB_CM_EVENT event, CDEBUG(D_WARNING, "Connection %p -> "LPX64" Established\n", conn, conn->ibc_peer->ibp_nid); - koibnal_connreq_done (conn, 1, 0); + kibnal_connreq_done (conn, 1, 0); break; case TS_IB_CM_IDLE: CERROR("Connection %p -> "LPX64" IDLE\n", conn, conn->ibc_peer->ibp_nid); /* Back out state change: I'm disengaged from CM */ - conn->ibc_state = OPENIBNAL_CONN_INIT_QP; + conn->ibc_state = IBNAL_CONN_INIT_QP; - koibnal_connreq_done (conn, 1, -ECONNABORTED); + kibnal_connreq_done (conn, 1, -ECONNABORTED); break; default: CERROR("Connection %p -> "LPX64" ERROR %d\n", conn, conn->ibc_peer->ibp_nid, event); - koibnal_connreq_done (conn, 1, -ECONNABORTED); + kibnal_connreq_done (conn, 1, -ECONNABORTED); break; } - /* NB if the connreq is done, we switch to koibnal_conn_callback */ + /* NB if the connreq is done, we switch to kibnal_conn_callback */ return TS_IB_CM_CALLBACK_PROCEED; } int -koibnal_pathreq_callback (tTS_IB_CLIENT_QUERY_TID tid, int status, +kibnal_pathreq_callback (tTS_IB_CLIENT_QUERY_TID tid, int status, struct ib_path_record *resp, int remaining, void *arg) { - koib_conn_t *conn = arg; + kib_conn_t *conn = arg; if (status != 0) { CERROR ("status %d\n", status); - koibnal_connreq_done (conn, 1, status); + kibnal_connreq_done (conn, 1, status); goto out; } conn->ibc_connreq->cr_path = *resp; - conn->ibc_connreq->cr_wcr = (koib_wire_connreq_t) { - .wcr_magic = cpu_to_le32(OPENIBNAL_MSG_MAGIC), - .wcr_version = cpu_to_le16(OPENIBNAL_MSG_VERSION), - .wcr_queue_depth = cpu_to_le16(OPENIBNAL_MSG_QUEUE_SIZE), - .wcr_nid = cpu_to_le64(koibnal_data.koib_nid), - .wcr_incarnation = cpu_to_le64(koibnal_data.koib_incarnation), + conn->ibc_connreq->cr_wcr = (kib_wire_connreq_t) { + .wcr_magic = cpu_to_le32(IBNAL_MSG_MAGIC), + .wcr_version = cpu_to_le16(IBNAL_MSG_VERSION), + .wcr_queue_depth = cpu_to_le16(IBNAL_MSG_QUEUE_SIZE), + .wcr_nid = cpu_to_le64(kibnal_data.kib_nid), + .wcr_incarnation = cpu_to_le64(kibnal_data.kib_incarnation), }; conn->ibc_connreq->cr_connparam = (struct ib_cm_active_param) { .qp = conn->ibc_qp, .req_private_data = &conn->ibc_connreq->cr_wcr, .req_private_data_len = sizeof(conn->ibc_connreq->cr_wcr), - .responder_resources = OPENIBNAL_RESPONDER_RESOURCES, - .initiator_depth = OPENIBNAL_RESPONDER_RESOURCES, - .retry_count = OPENIBNAL_RETRY, - .rnr_retry_count = OPENIBNAL_RNR_RETRY, - .cm_response_timeout = koibnal_tunables.koib_io_timeout, - .max_cm_retries = OPENIBNAL_CM_RETRY, - .flow_control = OPENIBNAL_FLOW_CONTROL, + .responder_resources = IBNAL_RESPONDER_RESOURCES, + .initiator_depth = IBNAL_RESPONDER_RESOURCES, + .retry_count = IBNAL_RETRY, + .rnr_retry_count = IBNAL_RNR_RETRY, + .cm_response_timeout = kibnal_tunables.kib_io_timeout, + .max_cm_retries = IBNAL_CM_RETRY, + .flow_control = IBNAL_FLOW_CONTROL, }; /* XXX set timeout just like SDP!!!*/ conn->ibc_connreq->cr_path.packet_life = 13; /* Flag I'm getting involved with the CM... */ - conn->ibc_state = OPENIBNAL_CONN_CONNECTING; + conn->ibc_state = IBNAL_CONN_CONNECTING; CDEBUG(D_NET, "Connecting to, service id "LPX64", on "LPX64"\n", conn->ibc_connreq->cr_service.service_id, - *koibnal_service_nid_field(&conn->ibc_connreq->cr_service)); + *kibnal_service_nid_field(&conn->ibc_connreq->cr_service)); - /* koibnal_connect_callback gets my conn ref */ + /* kibnal_connect_callback gets my conn ref */ status = ib_cm_connect (&conn->ibc_connreq->cr_connparam, &conn->ibc_connreq->cr_path, NULL, conn->ibc_connreq->cr_service.service_id, 0, - koibnal_active_conn_callback, conn, + kibnal_active_conn_callback, conn, &conn->ibc_comm_id); if (status != 0) { CERROR ("Connect: %d\n", status); /* Back out state change: I've not got a CM comm_id yet... */ - conn->ibc_state = OPENIBNAL_CONN_INIT_QP; - koibnal_connreq_done (conn, 1, status); + conn->ibc_state = IBNAL_CONN_INIT_QP; + kibnal_connreq_done (conn, 1, status); } out: @@ -2129,58 +2190,58 @@ koibnal_pathreq_callback (tTS_IB_CLIENT_QUERY_TID tid, int status, } void -koibnal_service_get_callback (tTS_IB_CLIENT_QUERY_TID tid, int status, - struct ib_common_attrib_service *resp, void *arg) +kibnal_service_get_callback (tTS_IB_CLIENT_QUERY_TID tid, int status, + struct ib_common_attrib_service *resp, void *arg) { - koib_conn_t *conn = arg; + kib_conn_t *conn = arg; if (status != 0) { CERROR ("status %d\n", status); - koibnal_connreq_done (conn, 1, status); + kibnal_connreq_done (conn, 1, status); return; } CDEBUG(D_NET, "Got status %d, service id "LPX64", on "LPX64"\n", status, resp->service_id, - *koibnal_service_nid_field(resp)); + *kibnal_service_nid_field(resp)); conn->ibc_connreq->cr_service = *resp; - status = ib_cached_gid_get(koibnal_data.koib_device, - koibnal_data.koib_port, 0, + status = ib_cached_gid_get(kibnal_data.kib_device, + kibnal_data.kib_port, 0, conn->ibc_connreq->cr_gid); LASSERT (status == 0); - /* koibnal_pathreq_callback gets my conn ref */ - status = tsIbPathRecordRequest (koibnal_data.koib_device, - koibnal_data.koib_port, + /* kibnal_pathreq_callback gets my conn ref */ + status = tsIbPathRecordRequest (kibnal_data.kib_device, + kibnal_data.kib_port, conn->ibc_connreq->cr_gid, conn->ibc_connreq->cr_service.service_gid, conn->ibc_connreq->cr_service.service_pkey, 0, - koibnal_tunables.koib_io_timeout * HZ, + kibnal_tunables.kib_io_timeout * HZ, 0, - koibnal_pathreq_callback, conn, + kibnal_pathreq_callback, conn, &conn->ibc_connreq->cr_tid); if (status == 0) return; CERROR ("Path record request: %d\n", status); - koibnal_connreq_done (conn, 1, status); + kibnal_connreq_done (conn, 1, status); } void -koibnal_connect_peer (koib_peer_t *peer) +kibnal_connect_peer (kib_peer_t *peer) { - koib_conn_t *conn = koibnal_create_conn(); + kib_conn_t *conn = kibnal_create_conn(); int rc; LASSERT (peer->ibp_connecting != 0); if (conn == NULL) { CERROR ("Can't allocate conn\n"); - koibnal_peer_connect_failed (peer, 1, -ENOMEM); + kibnal_peer_connect_failed (peer, 1, -ENOMEM); return; } @@ -2190,85 +2251,101 @@ koibnal_connect_peer (koib_peer_t *peer) PORTAL_ALLOC (conn->ibc_connreq, sizeof (*conn->ibc_connreq)); if (conn->ibc_connreq == NULL) { CERROR ("Can't allocate connreq\n"); - koibnal_connreq_done (conn, 1, -ENOMEM); + kibnal_connreq_done (conn, 1, -ENOMEM); return; } memset(conn->ibc_connreq, 0, sizeof (*conn->ibc_connreq)); - koibnal_set_service_keys(&conn->ibc_connreq->cr_service, peer->ibp_nid); + kibnal_set_service_keys(&conn->ibc_connreq->cr_service, peer->ibp_nid); - /* koibnal_service_get_callback gets my conn ref */ - rc = ib_service_get (koibnal_data.koib_device, - koibnal_data.koib_port, + /* kibnal_service_get_callback gets my conn ref */ + rc = ib_service_get (kibnal_data.kib_device, + kibnal_data.kib_port, &conn->ibc_connreq->cr_service, - KOIBNAL_SERVICE_KEY_MASK, - koibnal_tunables.koib_io_timeout * HZ, - koibnal_service_get_callback, conn, + KIBNAL_SERVICE_KEY_MASK, + kibnal_tunables.kib_io_timeout * HZ, + kibnal_service_get_callback, conn, &conn->ibc_connreq->cr_tid); if (rc == 0) return; CERROR ("ib_service_get: %d\n", rc); - koibnal_connreq_done (conn, 1, rc); + kibnal_connreq_done (conn, 1, rc); } int -koibnal_conn_timed_out (koib_conn_t *conn) +kibnal_conn_timed_out (kib_conn_t *conn) { - koib_tx_t *tx; + kib_tx_t *tx; struct list_head *ttmp; unsigned long flags; - int rc = 0; spin_lock_irqsave (&conn->ibc_lock, flags); - list_for_each (ttmp, &conn->ibc_rdma_queue) { - tx = list_entry (ttmp, koib_tx_t, tx_list); + list_for_each (ttmp, &conn->ibc_tx_queue) { + tx = list_entry (ttmp, kib_tx_t, tx_list); - LASSERT (tx->tx_passive_rdma); - LASSERT (tx->tx_passive_rdma_wait); + LASSERT (!tx->tx_passive_rdma_wait); + LASSERT (tx->tx_sending == 0); - if (time_after_eq (jiffies, tx->tx_passive_rdma_deadline)) { - rc = 1; - break; + if (time_after_eq (jiffies, tx->tx_deadline)) { + spin_unlock_irqrestore (&conn->ibc_lock, flags); + return 1; } } + + list_for_each (ttmp, &conn->ibc_active_txs) { + tx = list_entry (ttmp, kib_tx_t, tx_list); + + LASSERT (tx->tx_passive_rdma || + !tx->tx_passive_rdma_wait); + + LASSERT (tx->tx_passive_rdma_wait || + tx->tx_sending != 0); + + if (time_after_eq (jiffies, tx->tx_deadline)) { + spin_unlock_irqrestore (&conn->ibc_lock, flags); + return 1; + } + } + spin_unlock_irqrestore (&conn->ibc_lock, flags); - return rc; + return 0; } void -koibnal_check_conns (int idx) +kibnal_check_conns (int idx) { - struct list_head *peers = &koibnal_data.koib_peers[idx]; + struct list_head *peers = &kibnal_data.kib_peers[idx]; struct list_head *ptmp; - koib_peer_t *peer; - koib_conn_t *conn; + kib_peer_t *peer; + kib_conn_t *conn; struct list_head *ctmp; again: /* NB. We expect to have a look at all the peers and not find any * rdmas to time out, so we just use a shared lock while we * take a look... */ - read_lock (&koibnal_data.koib_global_lock); + read_lock (&kibnal_data.kib_global_lock); list_for_each (ptmp, peers) { - peer = list_entry (ptmp, koib_peer_t, ibp_list); + peer = list_entry (ptmp, kib_peer_t, ibp_list); list_for_each (ctmp, &peer->ibp_conns) { - conn = list_entry (ctmp, koib_conn_t, ibc_list); + conn = list_entry (ctmp, kib_conn_t, ibc_list); + + LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED); - LASSERT (conn->ibc_state == OPENIBNAL_CONN_ESTABLISHED); /* In case we have enough credits to return via a * NOOP, but there were no non-blocking tx descs * free to do it last time... */ - koibnal_check_sends(conn); + kibnal_check_sends(conn); - if (!koibnal_conn_timed_out(conn)) + if (!kibnal_conn_timed_out(conn)) continue; CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n", @@ -2276,108 +2353,76 @@ koibnal_check_conns (int idx) atomic_read (&conn->ibc_refcount)); atomic_inc (&conn->ibc_refcount); - read_unlock (&koibnal_data.koib_global_lock); + read_unlock (&kibnal_data.kib_global_lock); CERROR("Timed out RDMA with "LPX64"\n", peer->ibp_nid); - koibnal_close_conn (conn, -ETIMEDOUT); - koibnal_put_conn (conn); + kibnal_close_conn (conn, -ETIMEDOUT); + kibnal_put_conn (conn); /* start again now I've dropped the lock */ goto again; } } - read_unlock (&koibnal_data.koib_global_lock); + read_unlock (&kibnal_data.kib_global_lock); } void -koibnal_terminate_conn (koib_conn_t *conn) +kibnal_terminate_conn (kib_conn_t *conn) { - unsigned long flags; int rc; - int done; CDEBUG(D_NET, "conn %p\n", conn); - LASSERT (conn->ibc_state == OPENIBNAL_CONN_DEATHROW); - conn->ibc_state = OPENIBNAL_CONN_ZOMBIE; + LASSERT (conn->ibc_state == IBNAL_CONN_DEATHROW); + conn->ibc_state = IBNAL_CONN_ZOMBIE; rc = ib_cm_disconnect (conn->ibc_comm_id); if (rc != 0) CERROR ("Error %d disconnecting conn %p -> "LPX64"\n", rc, conn, conn->ibc_peer->ibp_nid); - - /* complete blocked passive RDMAs */ - spin_lock_irqsave (&conn->ibc_lock, flags); - - while (!list_empty (&conn->ibc_rdma_queue)) { - koib_tx_t *tx = list_entry (conn->ibc_rdma_queue.next, - koib_tx_t, tx_list); - - LASSERT (tx->tx_passive_rdma); - LASSERT (tx->tx_passive_rdma_wait); - - list_del (&tx->tx_list); - - tx->tx_passive_rdma_wait = 0; - done = (tx->tx_sending == 0); - - tx->tx_status = -ECONNABORTED; - - spin_unlock_irqrestore (&conn->ibc_lock, flags); - - if (done) - koibnal_tx_done (tx); - - spin_lock_irqsave (&conn->ibc_lock, flags); - } - - spin_unlock_irqrestore (&conn->ibc_lock, flags); - - /* Complete all blocked transmits */ - koibnal_check_sends(conn); } int -koibnal_connd (void *arg) +kibnal_connd (void *arg) { wait_queue_t wait; unsigned long flags; - koib_conn_t *conn; - koib_peer_t *peer; + kib_conn_t *conn; + kib_peer_t *peer; int timeout; int i; int peer_index = 0; unsigned long deadline = jiffies; - kportal_daemonize ("koibnal_connd"); + kportal_daemonize ("kibnal_connd"); kportal_blockallsigs (); init_waitqueue_entry (&wait, current); - spin_lock_irqsave (&koibnal_data.koib_connd_lock, flags); + spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); for (;;) { - if (!list_empty (&koibnal_data.koib_connd_conns)) { - conn = list_entry (koibnal_data.koib_connd_conns.next, - koib_conn_t, ibc_list); + if (!list_empty (&kibnal_data.kib_connd_conns)) { + conn = list_entry (kibnal_data.kib_connd_conns.next, + kib_conn_t, ibc_list); list_del (&conn->ibc_list); - spin_unlock_irqrestore (&koibnal_data.koib_connd_lock, flags); + spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); switch (conn->ibc_state) { - case OPENIBNAL_CONN_DEATHROW: + case IBNAL_CONN_DEATHROW: LASSERT (conn->ibc_comm_id != TS_IB_CM_COMM_ID_INVALID); /* Disconnect: conn becomes a zombie in the * callback and last ref reschedules it * here... */ - koibnal_terminate_conn(conn); - koibnal_put_conn (conn); + kibnal_terminate_conn(conn); + kibnal_put_conn (conn); break; - case OPENIBNAL_CONN_ZOMBIE: - koibnal_destroy_conn (conn); + case IBNAL_CONN_ZOMBIE: + kibnal_destroy_conn (conn); break; default: @@ -2386,35 +2431,35 @@ koibnal_connd (void *arg) LBUG(); } - spin_lock_irqsave (&koibnal_data.koib_connd_lock, flags); + spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); continue; } - if (!list_empty (&koibnal_data.koib_connd_peers)) { - peer = list_entry (koibnal_data.koib_connd_peers.next, - koib_peer_t, ibp_connd_list); + if (!list_empty (&kibnal_data.kib_connd_peers)) { + peer = list_entry (kibnal_data.kib_connd_peers.next, + kib_peer_t, ibp_connd_list); list_del_init (&peer->ibp_connd_list); - spin_unlock_irqrestore (&koibnal_data.koib_connd_lock, flags); + spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - koibnal_connect_peer (peer); - koibnal_put_peer (peer); + kibnal_connect_peer (peer); + kibnal_put_peer (peer); - spin_lock_irqsave (&koibnal_data.koib_connd_lock, flags); + spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); } /* shut down and nobody left to reap... */ - if (koibnal_data.koib_shutdown && - atomic_read(&koibnal_data.koib_nconns) == 0) + if (kibnal_data.kib_shutdown && + atomic_read(&kibnal_data.kib_nconns) == 0) break; - spin_unlock_irqrestore (&koibnal_data.koib_connd_lock, flags); + spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); /* careful with the jiffy wrap... */ while ((timeout = (int)(deadline - jiffies)) <= 0) { const int n = 4; const int p = 1; - int chunk = koibnal_data.koib_peer_hash_size; + int chunk = kibnal_data.kib_peer_hash_size; /* Time to check for RDMA timeouts on a few more * peers: I do checks every 'p' seconds on a @@ -2424,129 +2469,129 @@ koibnal_connd (void *arg) * connection within (n+1)/n times the timeout * interval. */ - if (koibnal_tunables.koib_io_timeout > n * p) + if (kibnal_tunables.kib_io_timeout > n * p) chunk = (chunk * n * p) / - koibnal_tunables.koib_io_timeout; + kibnal_tunables.kib_io_timeout; if (chunk == 0) chunk = 1; for (i = 0; i < chunk; i++) { - koibnal_check_conns (peer_index); + kibnal_check_conns (peer_index); peer_index = (peer_index + 1) % - koibnal_data.koib_peer_hash_size; + kibnal_data.kib_peer_hash_size; } deadline += p * HZ; } - koibnal_data.koib_connd_waketime = jiffies + timeout; + kibnal_data.kib_connd_waketime = jiffies + timeout; set_current_state (TASK_INTERRUPTIBLE); - add_wait_queue (&koibnal_data.koib_connd_waitq, &wait); + add_wait_queue (&kibnal_data.kib_connd_waitq, &wait); - if (!koibnal_data.koib_shutdown && - list_empty (&koibnal_data.koib_connd_conns) && - list_empty (&koibnal_data.koib_connd_peers)) + if (!kibnal_data.kib_shutdown && + list_empty (&kibnal_data.kib_connd_conns) && + list_empty (&kibnal_data.kib_connd_peers)) schedule_timeout (timeout); set_current_state (TASK_RUNNING); - remove_wait_queue (&koibnal_data.koib_connd_waitq, &wait); + remove_wait_queue (&kibnal_data.kib_connd_waitq, &wait); - spin_lock_irqsave (&koibnal_data.koib_connd_lock, flags); + spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); } - spin_unlock_irqrestore (&koibnal_data.koib_connd_lock, flags); + spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - koibnal_thread_fini (); + kibnal_thread_fini (); return (0); } int -koibnal_scheduler(void *arg) +kibnal_scheduler(void *arg) { long id = (long)arg; char name[16]; - koib_rx_t *rx; - koib_tx_t *tx; + kib_rx_t *rx; + kib_tx_t *tx; unsigned long flags; int rc; int counter = 0; int did_something; - snprintf(name, sizeof(name), "koibnal_sd_%02ld", id); + snprintf(name, sizeof(name), "kibnal_sd_%02ld", id); kportal_daemonize(name); kportal_blockallsigs(); - spin_lock_irqsave(&koibnal_data.koib_sched_lock, flags); + spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); for (;;) { did_something = 0; - while (!list_empty(&koibnal_data.koib_sched_txq)) { - tx = list_entry(koibnal_data.koib_sched_txq.next, - koib_tx_t, tx_list); + while (!list_empty(&kibnal_data.kib_sched_txq)) { + tx = list_entry(kibnal_data.kib_sched_txq.next, + kib_tx_t, tx_list); list_del(&tx->tx_list); - spin_unlock_irqrestore(&koibnal_data.koib_sched_lock, + spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); - koibnal_tx_done(tx); + kibnal_tx_done(tx); - spin_lock_irqsave(&koibnal_data.koib_sched_lock, + spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); } - if (!list_empty(&koibnal_data.koib_sched_rxq)) { - rx = list_entry(koibnal_data.koib_sched_rxq.next, - koib_rx_t, rx_list); + if (!list_empty(&kibnal_data.kib_sched_rxq)) { + rx = list_entry(kibnal_data.kib_sched_rxq.next, + kib_rx_t, rx_list); list_del(&rx->rx_list); - spin_unlock_irqrestore(&koibnal_data.koib_sched_lock, + spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); - koibnal_rx(rx); + kibnal_rx(rx); did_something = 1; - spin_lock_irqsave(&koibnal_data.koib_sched_lock, + spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); } /* shut down and no receives to complete... */ - if (koibnal_data.koib_shutdown && - atomic_read(&koibnal_data.koib_nconns) == 0) + if (kibnal_data.kib_shutdown && + atomic_read(&kibnal_data.kib_nconns) == 0) break; /* nothing to do or hogging CPU */ - if (!did_something || counter++ == OPENIBNAL_RESCHED) { - spin_unlock_irqrestore(&koibnal_data.koib_sched_lock, + if (!did_something || counter++ == IBNAL_RESCHED) { + spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); counter = 0; if (!did_something) { rc = wait_event_interruptible( - koibnal_data.koib_sched_waitq, - !list_empty(&koibnal_data.koib_sched_txq) || - !list_empty(&koibnal_data.koib_sched_rxq) || - (koibnal_data.koib_shutdown && - atomic_read (&koibnal_data.koib_nconns) == 0)); + kibnal_data.kib_sched_waitq, + !list_empty(&kibnal_data.kib_sched_txq) || + !list_empty(&kibnal_data.kib_sched_rxq) || + (kibnal_data.kib_shutdown && + atomic_read (&kibnal_data.kib_nconns) == 0)); } else { our_cond_resched(); } - spin_lock_irqsave(&koibnal_data.koib_sched_lock, + spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); } } - spin_unlock_irqrestore(&koibnal_data.koib_sched_lock, flags); + spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); - koibnal_thread_fini(); + kibnal_thread_fini(); return (0); } -lib_nal_t koibnal_lib = { - libnal_data: &koibnal_data, /* NAL private data */ - libnal_send: koibnal_send, - libnal_send_pages: koibnal_send_pages, - libnal_recv: koibnal_recv, - libnal_recv_pages: koibnal_recv_pages, - libnal_dist: koibnal_dist +lib_nal_t kibnal_lib = { + libnal_data: &kibnal_data, /* NAL private data */ + libnal_send: kibnal_send, + libnal_send_pages: kibnal_send_pages, + libnal_recv: kibnal_recv, + libnal_recv_pages: kibnal_recv_pages, + libnal_dist: kibnal_dist }; -- 1.8.3.1