nal_t kibnal_api;
ptl_handle_ni_t kibnal_ni;
+kib_data_t kibnal_data;
kib_tunables_t kibnal_tunables;
-kib_data_t kibnal_data = {
- .kib_service_id = IBNAL_SERVICE_NUMBER,
-};
-
#ifdef CONFIG_SYSCTL
#define IBNAL_SYSCTL 202
};
#endif
-#ifdef unused
void
-print_service(IB_SERVICE_RECORD *service, char *tag, int rc)
+kibnal_pause(int ticks)
{
- char name[32];
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(ticks);
+}
- if (service == NULL)
- {
- CWARN("tag : %s\n"
- "status : %d (NULL)\n", tag, rc);
- return;
- }
- strncpy (name, service->ServiceName, sizeof(name)-1);
- name[sizeof(name)-1] = 0;
-
- CWARN("tag : %s\n"
- "status : %d\n"
- "service id: "LPX64"\n"
- "name : %s\n"
- "NID : "LPX64"\n", tag, rc,
- service->RID.ServiceID, name,
- *kibnal_service_nid_field(service));
+__u32
+kibnal_cksum (void *ptr, int nob)
+{
+ char *c = ptr;
+ __u32 sum = 0;
+
+ while (nob-- > 0)
+ sum = ((sum << 1) | (sum >> 31)) + *c++;
+
+ /* ensure I don't return 0 (== no checksum) */
+ return (sum == 0) ? 1 : sum;
}
-#endif
-/*
- * method is SUBN_ADM_SET, SUBN_ADM_GET, SUBN_ADM_DELETE. Tables not supported.
- * nid is the nid to advertize/query/unadvertize
- */
-static void fill_sa_request(struct sa_request *request, int method, ptl_nid_t nid)
+void
+kibnal_init_msg(kib_msg_t *msg, int type, int body_nob)
{
- gsi_dtgrm_t *dtgrm = request->dtgrm_req;
- sa_mad_v2_t *mad = (sa_mad_v2_t *) dtgrm->mad;
- ib_service_record_v2_t *sr = (ib_service_record_v2_t *) mad->payload;
-
- memset(mad, 0, MAD_BLOCK_SIZE);
-
- request->mad = mad;
-
- dtgrm->rlid = kibnal_data.kib_port_attr.port_sma_address_info.sm_lid;
- dtgrm->sl = kibnal_data.kib_port_attr.port_sma_address_info.service_level;
-
- mad->hdr.base_ver = MAD_IB_BASE_VERSION;
- mad->hdr.class = MAD_CLASS_SUBN_ADM;
- mad->hdr.class_ver = 2;
- mad->hdr.m.ms.method = method;
- mad->hdr.attrib_id = SA_SERVICE_RECORD; /* something(?) will swap that field */
-
- /* Note: the transaction ID is set by the Voltaire stack if it is 0. */
-
- /* TODO: change the 40 to sizeof(something) */
- mad->payload_len = cpu_to_be32(0x40 /*header size */ +
- sizeof (ib_service_record_v2_t));
-
-
- mad->component_mask = cpu_to_be64(
- (1ull << 0) | /* service_id */
- (1ull << 2) | /* service_pkey */
- (1ull << 6) | /* service_name */
- (1ull << 7) | /* service_data8[0] */
- (1ull << 8) | /* service_data8[1] */
- (1ull << 9) | /* service_data8[2] */
- (1ull << 10) | /* service_data8[3] */
- (1ull << 11) | /* service_data8[4] */
- (1ull << 12) | /* service_data8[5] */
- (1ull << 13) | /* service_data8[6] */
- (1ull << 14) /* service_data8[7] */
- );
-
- sr->service_id = cpu_to_be64(kibnal_data.kib_service_id);
- sr->service_pkey = cpu_to_be16(kibnal_data.kib_port_pkey);
-
- /* Set the service name and the data (bytes 0 to 7) in data8 */
- kibnal_set_service_keys(sr, nid);
-
- if (method == SUBN_ADM_SET) {
- mad->component_mask |= cpu_to_be64(
- (1ull << 1) | /* service_gid */
- (1ull << 4) /* service_lease */
- );
-
- sr->service_gid = kibnal_data.kib_port_gid;
- gid_swap(&sr->service_gid);
- sr->service_lease = cpu_to_be32(0xffffffff);
- }
-
- CDEBUG(D_NET, "SA request %02x for service id "LPX64" %s:"LPX64"\n",
- mad->hdr.m.ms.method,
- sr->service_id,
- sr->service_name,
- *kibnal_service_nid_field(sr));
+ msg->ibm_type = type;
+ msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob;
}
-/* Do an advertizement operation:
- * SUBN_ADM_GET = 0x01 (i.e. query),
- * SUBN_ADM_SET = 0x02 (i.e. advertize),
- * SUBN_ADM_DELETE = 0x15 (i.e. un-advertize).
- * If callback is NULL, the function is synchronous (and context is ignored).
- */
-int kibnal_advertize_op(ptl_nid_t nid, int op, sa_request_cb_t callback, void *context)
+void
+kibnal_pack_msg(kib_msg_t *msg, int credits, ptl_nid_t dstnid, __u64 dststamp)
{
- struct sa_request *request;
- int ret;
+ /* CAVEAT EMPTOR! all message fields not set here should have been
+ * initialised previously. */
+ msg->ibm_magic = IBNAL_MSG_MAGIC;
+ msg->ibm_version = IBNAL_MSG_VERSION;
+ /* ibm_type */
+ msg->ibm_credits = credits;
+ /* ibm_nob */
+ msg->ibm_cksum = 0;
+ msg->ibm_srcnid = kibnal_lib.libnal_ni.ni_pid.nid;
+ msg->ibm_srcstamp = kibnal_data.kib_incarnation;
+ msg->ibm_dstnid = dstnid;
+ msg->ibm_dststamp = dststamp;
+#if IBNAL_CKSUM
+ /* NB ibm_cksum zero while computing cksum */
+ msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob);
+#endif
+}
- LASSERT (kibnal_data.kib_nid != PTL_NID_ANY);
+int
+kibnal_unpack_msg(kib_msg_t *msg, int nob)
+{
+ const int hdr_size = offsetof(kib_msg_t, ibm_u);
+ __u32 msg_cksum;
+ int flip;
+ int msg_nob;
+ int i;
+ int n;
+
+ /* 6 bytes are enough to have received magic + version */
+ if (nob < 6) {
+ CERROR("Short message: %d\n", nob);
+ return -EPROTO;
+ }
+
+ if (msg->ibm_magic == IBNAL_MSG_MAGIC) {
+ flip = 0;
+ } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) {
+ flip = 1;
+ } else {
+ CERROR("Bad magic: %08x\n", msg->ibm_magic);
+ return -EPROTO;
+ }
- CDEBUG(D_NET, "kibnal_advertize_op: nid="LPX64", op=%d\n", nid, op);
+ if (msg->ibm_version !=
+ (flip ? __swab16(IBNAL_MSG_VERSION) : IBNAL_MSG_VERSION)) {
+ CERROR("Bad version: %d\n", msg->ibm_version);
+ return -EPROTO;
+ }
- request = alloc_sa_request();
- if (request == NULL) {
- CERROR("Cannot allocate a SA request");
- return -ENOMEM;
+ if (nob < hdr_size) {
+ CERROR("Short message: %d\n", nob);
+ return -EPROTO;
}
-
- fill_sa_request(request, op, nid);
- if (callback) {
- request->callback = callback;
- request->context = context;
- } else {
- init_completion(&request->signal);
+ msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
+ if (msg_nob > nob) {
+ CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
+ return -EPROTO;
}
- ret = vibnal_start_sa_request(request);
- if (ret) {
- CERROR("vibnal_send_sa failed: %d\n", ret);
- free_sa_request(request);
- } else {
- if (callback) {
- /* Return. The callback will have to free the SA request. */
- ret = 0;
- } else {
- wait_for_completion(&request->signal);
+ /* checksum must be computed with ibm_cksum zero and BEFORE anything
+ * gets flipped */
+ msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
+ msg->ibm_cksum = 0;
+ if (msg_cksum != 0 &&
+ msg_cksum != kibnal_cksum(msg, msg_nob)) {
+ CERROR("Bad checksum\n");
+ return -EPROTO;
+ }
+ msg->ibm_cksum = msg_cksum;
+
+ if (flip) {
+ /* leave magic unflipped as a clue to peer endianness */
+ __swab16s(&msg->ibm_version);
+ CLASSERT (sizeof(msg->ibm_type) == 1);
+ CLASSERT (sizeof(msg->ibm_credits) == 1);
+ msg->ibm_nob = msg_nob;
+ __swab64s(&msg->ibm_srcnid);
+ __swab64s(&msg->ibm_srcstamp);
+ __swab64s(&msg->ibm_dstnid);
+ __swab64s(&msg->ibm_dststamp);
+ }
+
+ if (msg->ibm_srcnid == PTL_NID_ANY) {
+ CERROR("Bad src nid: "LPX64"\n", msg->ibm_srcnid);
+ return -EPROTO;
+ }
- ret = request->status;
+ switch (msg->ibm_type) {
+ default:
+ CERROR("Unknown message type %x\n", msg->ibm_type);
+ return -EPROTO;
+
+ case IBNAL_MSG_NOOP:
+ break;
- if (ret != 0) {
- CERROR ("Error %d in advertising operation %d for NID "LPX64"\n",
- ret, op, kibnal_data.kib_nid);
+ case IBNAL_MSG_IMMEDIATE:
+ if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) {
+ CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob,
+ (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]));
+ return -EPROTO;
+ }
+ break;
+
+ case IBNAL_MSG_PUT_REQ:
+ /* CAVEAT EMPTOR! We don't actually put ibprm_rd on the wire;
+ * it's just there to remember the source buffers while we wait
+ * for the PUT_ACK */
+ if (msg_nob < offsetof(kib_msg_t, ibm_u.putreq.ibprm_rd)) {
+ CERROR("Short PUT_REQ: %d(%d)\n", msg_nob,
+ (int)(hdr_size + sizeof(msg->ibm_u.putreq)));
+ return -EPROTO;
+ }
+ break;
+
+ case IBNAL_MSG_PUT_ACK:
+ if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[0])) {
+ CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
+ (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[0]));
+ return -EPROTO;
+ }
+
+ if (flip) {
+ __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
+ __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag);
+ }
+
+ n = msg->ibm_u.putack.ibpam_rd.rd_nfrag;
+ if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
+ CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n",
+ n, IBNAL_MAX_RDMA_FRAGS);
+ return -EPROTO;
+ }
+
+ if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) {
+ CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
+ (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n]));
+ return -EPROTO;
+ }
+
+ if (flip)
+ for (i = 0; i < n; i++) {
+ __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob);
+ __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr_lo);
+ __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr_hi);
}
-
- free_sa_request(request);
+ break;
+
+ case IBNAL_MSG_GET_REQ:
+ if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) {
+ CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
+ (int)(hdr_size + sizeof(msg->ibm_u.get)));
+ return -EPROTO;
+ }
+ if (flip) {
+ __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
+ __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag);
}
- }
- return ret;
+ n = msg->ibm_u.get.ibgm_rd.rd_nfrag;
+ if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
+ CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n",
+ n, IBNAL_MAX_RDMA_FRAGS);
+ return -EPROTO;
+ }
+
+ if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) {
+ CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
+ (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n]));
+ return -EPROTO;
+ }
+
+ if (flip)
+ for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) {
+ __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob);
+ __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr_lo);
+ __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr_hi);
+ }
+ break;
+
+ case IBNAL_MSG_PUT_NAK:
+ case IBNAL_MSG_PUT_DONE:
+ case IBNAL_MSG_GET_DONE:
+ if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) {
+ CERROR("Short RDMA completion: %d(%d)\n", msg_nob,
+ (int)(hdr_size + sizeof(msg->ibm_u.completion)));
+ return -EPROTO;
+ }
+ if (flip)
+ __swab32s(&msg->ibm_u.completion.ibcm_status);
+ break;
+
+ case IBNAL_MSG_CONNREQ:
+ case IBNAL_MSG_CONNACK:
+ if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) {
+ CERROR("Short connreq/ack: %d(%d)\n", msg_nob,
+ (int)(hdr_size + sizeof(msg->ibm_u.connparams)));
+ return -EPROTO;
+ }
+ if (flip) {
+ __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth);
+ __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size);
+ __swab32s(&msg->ibm_u.connparams.ibcp_max_frags);
+ }
+ break;
+ }
+ return 0;
}
-static int
+int
kibnal_set_mynid(ptl_nid_t nid)
{
- struct timeval tv;
- lib_ni_t *ni = &kibnal_lib.libnal_ni;
- int rc;
- vv_return_t retval;
+ static cm_listen_data_t info; /* protected by kib_nid_mutex */
+
+ lib_ni_t *ni = &kibnal_lib.libnal_ni;
+ int rc;
+ cm_return_t cmrc;
CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
nid, ni->ni_pid.nid);
- do_gettimeofday(&tv);
-
down (&kibnal_data.kib_nid_mutex);
- if (nid == kibnal_data.kib_nid) {
+ if (nid == ni->ni_pid.nid) {
/* no change of NID */
up (&kibnal_data.kib_nid_mutex);
return (0);
}
- CDEBUG(D_NET, "NID "LPX64"("LPX64")\n",
- kibnal_data.kib_nid, nid);
-
- /* Unsubscribes the current NID */
- if (kibnal_data.kib_nid != PTL_NID_ANY) {
+ CDEBUG(D_NET, "NID "LPX64"("LPX64")\n", ni->ni_pid.nid, nid);
- rc = kibnal_advertize_op(kibnal_data.kib_nid, SUBN_ADM_DELETE, NULL, NULL);
+ if (kibnal_data.kib_listen_handle != NULL) {
+ cmrc = cm_cancel(kibnal_data.kib_listen_handle);
+ if (cmrc != cm_stat_success)
+ CERROR ("Error %d stopping listener\n", cmrc);
- if (rc) {
- CERROR("Error %d unadvertising NID "LPX64"\n",
- rc, kibnal_data.kib_nid);
- }
- }
+ kibnal_pause(HZ/10); /* ensure no more callbacks */
- kibnal_data.kib_nid = ni->ni_pid.nid = nid;
- kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
+ cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
+ if (cmrc != vv_return_ok)
+ CERROR ("Error %d destroying CEP\n", cmrc);
- /* Destroys the current endpoint, if any. */
- if (kibnal_data.kib_cep) {
- retval = cm_cancel(kibnal_data.kib_cep);
- if (retval)
- CERROR ("Error %d stopping listener\n", retval);
-
- retval = cm_destroy_cep(kibnal_data.kib_cep);
- if (retval)
- CERROR ("Error %d destroying CEP\n", retval);
-
- kibnal_data.kib_cep = NULL;
+ kibnal_data.kib_listen_handle = NULL;
}
-
+
+ /* Change NID. NB queued passive connection requests (if any) will be
+ * rejected with an incorrect destination NID */
+ ni->ni_pid.nid = nid;
+ kibnal_data.kib_incarnation++;
+ mb();
+
/* Delete all existing peers and their connections after new
* NID/incarnation set to ensure no old connections in our brave
* new world. */
kibnal_del_peer (PTL_NID_ANY, 0);
- if (kibnal_data.kib_nid == PTL_NID_ANY) {
- /* No new NID to install. The driver is shuting down. */
- up (&kibnal_data.kib_nid_mutex);
- return (0);
- }
-
- /* remove any previous advert (crashed node etc) */
- kibnal_advertize_op(kibnal_data.kib_nid, SUBN_ADM_DELETE, NULL, NULL);
-
- kibnal_data.kib_cep = cm_create_cep(cm_cep_transp_rc);
- if (kibnal_data.kib_cep == NULL) {
- CERROR ("Can't create CEP\n");
- rc = -ENOMEM;
- } else {
- cm_return_t cmret;
- cm_listen_data_t info;
+ if (ni->ni_pid.nid != PTL_NID_ANY) { /* got a new NID to install */
+ kibnal_data.kib_listen_handle =
+ cm_create_cep(cm_cep_transp_rc);
+ if (kibnal_data.kib_listen_handle == NULL) {
+ CERROR ("Can't create listen CEP\n");
+ rc = -ENOMEM;
+ goto failed_0;
+ }
- CDEBUG(D_NET, "Created CEP %p for listening\n", kibnal_data.kib_cep);
+ CDEBUG(D_NET, "Created CEP %p for listening\n",
+ kibnal_data.kib_listen_handle);
memset(&info, 0, sizeof(info));
- info.listen_addr.end_pt.sid = kibnal_data.kib_service_id;
+ info.listen_addr.end_pt.sid = kibnal_data.kib_svc_id;
- cmret = cm_listen(kibnal_data.kib_cep, &info,
- kibnal_listen_callback, NULL);
- if (cmret) {
- CERROR ("cm_listen error: %d\n", cmret);
+ cmrc = cm_listen(kibnal_data.kib_listen_handle, &info,
+ kibnal_listen_callback, NULL);
+ if (cmrc != 0) {
+ CERROR ("cm_listen error: %d\n", cmrc);
rc = -EINVAL;
- } else {
- rc = 0;
+ goto failed_1;
}
}
-
- if (rc == 0) {
- rc = kibnal_advertize_op(kibnal_data.kib_nid, SUBN_ADM_SET, NULL, NULL);
- if (rc == 0) {
-#ifdef IBNAL_CHECK_ADVERT
- kibnal_advertize_op(kibnal_data.kib_nid, SUBN_ADM_GET, NULL, NULL);
-#endif
- up (&kibnal_data.kib_nid_mutex);
- return (0);
- }
-
- retval = cm_cancel (kibnal_data.kib_cep);
- if (retval)
- CERROR("cm_cancel failed: %d\n", retval);
- retval = cm_destroy_cep (kibnal_data.kib_cep);
- if (retval)
- CERROR("cm_destroy_cep failed: %d\n", retval);
-
- /* remove any peers that sprung up while I failed to
- * advertise myself */
- kibnal_del_peer (PTL_NID_ANY, 0);
- }
+ up (&kibnal_data.kib_nid_mutex);
+ return (0);
- kibnal_data.kib_nid = PTL_NID_ANY;
+ failed_1:
+ cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
+ LASSERT (cmrc == cm_stat_success);
+ kibnal_data.kib_listen_handle = NULL;
+ failed_0:
+ ni->ni_pid.nid = PTL_NID_ANY;
+ kibnal_data.kib_incarnation++;
+ mb();
+ kibnal_del_peer (PTL_NID_ANY, 0);
up (&kibnal_data.kib_nid_mutex);
- return (rc);
+ return rc;
}
kib_peer_t *
peer->ibp_reconnect_interval = IBNAL_MIN_RECONNECT_INTERVAL;
atomic_inc (&kibnal_data.kib_npeers);
- return (peer);
+ if (atomic_read(&kibnal_data.kib_npeers) <= IBNAL_CONCURRENT_PEERS)
+ return peer;
+
+ CERROR("Too many peers: CQ will overflow\n");
+ kibnal_peer_decref(peer);
+ return NULL;
}
void
return (NULL);
}
-kib_peer_t *
-kibnal_get_peer (ptl_nid_t nid)
-{
- kib_peer_t *peer;
- unsigned long flags;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- peer = kibnal_find_peer_locked (nid);
- if (peer != NULL) /* +1 ref for caller? */
- kib_peer_addref(peer);
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- return (peer);
-}
-
void
kibnal_unlink_peer_locked (kib_peer_t *peer)
{
LASSERT (kibnal_peer_active(peer));
list_del_init (&peer->ibp_list);
/* lose peerlist's ref */
- kib_peer_decref(peer);
+ kibnal_peer_decref(peer);
}
-static int
-kibnal_get_peer_info (int index, ptl_nid_t *nidp, int *persistencep)
+int
+kibnal_get_peer_info (int index, ptl_nid_t *nidp, __u32 *ipp,
+ int *persistencep)
{
kib_peer_t *peer;
struct list_head *ptmp;
- unsigned long flags;
int i;
+ unsigned long flags;
read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
continue;
*nidp = peer->ibp_nid;
+ *ipp = peer->ibp_ip;
*persistencep = peer->ibp_persistence;
read_unlock_irqrestore(&kibnal_data.kib_global_lock,
return (-ENOENT);
}
-static int
-kibnal_add_persistent_peer (ptl_nid_t nid)
+int
+kibnal_add_persistent_peer (ptl_nid_t nid, __u32 ip)
{
- unsigned long flags;
kib_peer_t *peer;
kib_peer_t *peer2;
+ unsigned long flags;
+
+ CDEBUG(D_NET, LPX64"@%08x\n", nid, ip);
if (nid == PTL_NID_ANY)
return (-EINVAL);
if (peer == NULL)
return (-ENOMEM);
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
+ write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
peer2 = kibnal_find_peer_locked (nid);
if (peer2 != NULL) {
- kib_peer_decref (peer);
+ kibnal_peer_decref (peer);
peer = peer2;
} else {
/* peer table takes existing ref on peer */
kibnal_nid2peerlist (nid));
}
+ peer->ibp_ip = ip;
peer->ibp_persistence++;
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
+ write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
return (0);
}
-static void
+void
kibnal_del_peer_locked (kib_peer_t *peer, int single_share)
{
struct list_head *ctmp;
int
kibnal_del_peer (ptl_nid_t nid, int single_share)
{
- unsigned long flags;
struct list_head *ptmp;
struct list_head *pnxt;
kib_peer_t *peer;
int lo;
int hi;
int i;
+ unsigned long flags;
int rc = -ENOENT;
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
+ write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
if (nid != PTL_NID_ANY)
lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
}
}
out:
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
+ write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
return (rc);
}
-static kib_conn_t *
+kib_conn_t *
kibnal_get_conn_by_idx (int index)
{
kib_peer_t *peer;
struct list_head *ptmp;
kib_conn_t *conn;
struct list_head *ctmp;
- unsigned long flags;
int i;
+ unsigned long flags;
read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
continue;
conn = list_entry (ctmp, kib_conn_t, ibc_list);
- CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
- conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
- atomic_read (&conn->ibc_refcount));
- atomic_inc (&conn->ibc_refcount);
+ kibnal_conn_addref(conn);
read_unlock_irqrestore(&kibnal_data.kib_global_lock,
flags);
return (conn);
return (NULL);
}
+int
+kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state)
+{
+ static vv_qp_attr_t attr;
+
+ kib_connvars_t *cv = conn->ibc_connvars;
+ vv_return_t vvrc;
+
+ /* Only called by connd => static OK */
+ LASSERT (!in_interrupt());
+ LASSERT (current == kibnal_data.kib_connd);
+
+ memset(&attr, 0, sizeof(attr));
+
+ switch (new_state) {
+ default:
+ LBUG();
+
+ case vv_qp_state_init: {
+ struct vv_qp_modify_init_st *init = &attr.modify.params.init;
+
+ init->p_key_indx = cv->cv_pkey_index;
+ init->phy_port_num = cv->cv_port;
+ init->q_key = IBNAL_QKEY; /* XXX but VV_QP_AT_Q_KEY not set! */
+ init->access_control = vv_acc_r_mem_read |
+ vv_acc_r_mem_write; /* XXX vv_acc_l_mem_write ? */
+
+ attr.modify.vv_qp_attr_mask = VV_QP_AT_P_KEY_IX |
+ VV_QP_AT_PHY_PORT_NUM |
+ VV_QP_AT_ACCESS_CON_F;
+ break;
+ }
+ case vv_qp_state_rtr: {
+ struct vv_qp_modify_rtr_st *rtr = &attr.modify.params.rtr;
+ vv_add_vec_t *av = &rtr->remote_add_vec;
+
+ av->dlid = cv->cv_path.dlid;
+ av->grh_flag = (!IBNAL_LOCAL_SUB);
+ av->max_static_rate = IBNAL_R_2_STATIC_RATE(cv->cv_path.rate);
+ av->service_level = cv->cv_path.sl;
+ av->source_path_bit = IBNAL_SOURCE_PATH_BIT;
+ av->pmtu = cv->cv_path.mtu;
+ av->rnr_retry_count = cv->cv_rnr_count;
+ av->global_dest.traffic_class = cv->cv_path.traffic_class;
+ av->global_dest.hope_limit = cv->cv_path.hop_limut;
+ av->global_dest.flow_lable = cv->cv_path.flow_label;
+ av->global_dest.s_gid_index = cv->cv_sgid_index;
+ // XXX other av fields zero?
+
+ rtr->destanation_qp = cv->cv_remote_qpn;
+ rtr->receive_psn = cv->cv_rxpsn;
+ rtr->responder_rdma_r_atom_num = IBNAL_OUS_DST_RD;
+
+ // XXX ? rtr->opt_min_rnr_nak_timer = 16;
+
+
+ // XXX sdp sets VV_QP_AT_OP_F but no actual optional options
+ attr.modify.vv_qp_attr_mask = VV_QP_AT_ADD_VEC |
+ VV_QP_AT_DEST_QP |
+ VV_QP_AT_R_PSN |
+ VV_QP_AT_MIN_RNR_NAK_T |
+ VV_QP_AT_RESP_RDMA_ATOM_OUT_NUM |
+ VV_QP_AT_OP_F;
+ break;
+ }
+ case vv_qp_state_rts: {
+ struct vv_qp_modify_rts_st *rts = &attr.modify.params.rts;
+
+ rts->send_psn = cv->cv_txpsn;
+ rts->local_ack_timeout = IBNAL_LOCAL_ACK_TIMEOUT;
+ rts->retry_num = IBNAL_RETRY_CNT;
+ rts->rnr_num = IBNAL_RNR_CNT;
+ rts->dest_out_rdma_r_atom_num = IBNAL_OUS_DST_RD;
+
+ attr.modify.vv_qp_attr_mask = VV_QP_AT_S_PSN |
+ VV_QP_AT_L_ACK_T |
+ VV_QP_AT_RETRY_NUM |
+ VV_QP_AT_RNR_NUM |
+ VV_QP_AT_DEST_RDMA_ATOM_OUT_NUM;
+ break;
+ }
+ case vv_qp_state_error:
+ case vv_qp_state_reset:
+ attr.modify.vv_qp_attr_mask = 0;
+ break;
+ }
+
+ attr.modify.qp_modify_into_state = new_state;
+ attr.modify.vv_qp_attr_mask |= VV_QP_AT_STATE;
+
+ vvrc = vv_qp_modify(kibnal_data.kib_hca, conn->ibc_qp, &attr, NULL);
+ if (vvrc != vv_return_ok) {
+ CERROR("Can't modify qp -> "LPX64" state to %d: %d\n",
+ conn->ibc_peer->ibp_nid, new_state, vvrc);
+ return -EIO;
+ }
+
+ return 0;
+}
+
kib_conn_t *
-kibnal_create_conn (void)
+kibnal_create_conn (cm_cep_handle_t cep)
{
- kib_conn_t *conn;
- int i;
- __u64 vaddr = 0;
- __u64 vaddr_base;
- int page_offset;
- int ipage;
- vv_qp_attr_t qp_attr;
- vv_return_t retval;
- int rc;
- void *qp_context;
+ kib_conn_t *conn;
+ int i;
+ __u64 vaddr = 0;
+ __u64 vaddr_base;
+ int page_offset;
+ int ipage;
+ vv_return_t vvrc;
+ int rc;
+
+ static vv_qp_attr_t reqattr;
+ static vv_qp_attr_t rspattr;
+
+ /* Only the connd creates conns => single threaded */
+ LASSERT(!in_interrupt());
+ LASSERT(current == kibnal_data.kib_connd);
PORTAL_ALLOC(conn, sizeof (*conn));
if (conn == NULL) {
/* zero flags, NULL pointers etc... */
memset (conn, 0, sizeof (*conn));
+ INIT_LIST_HEAD (&conn->ibc_early_rxs);
INIT_LIST_HEAD (&conn->ibc_tx_queue);
INIT_LIST_HEAD (&conn->ibc_active_txs);
spin_lock_init (&conn->ibc_lock);
atomic_inc (&kibnal_data.kib_nconns);
/* well not really, but I call destroy() on failure, which decrements */
+ conn->ibc_cep = cep;
+
+ PORTAL_ALLOC(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
+ if (conn->ibc_connvars == NULL) {
+ CERROR("Can't allocate in-progress connection state\n");
+ goto failed;
+ }
+ memset (conn->ibc_connvars, 0, sizeof(*conn->ibc_connvars));
+ /* Random seed for QP sequence number */
+ get_random_bytes(&conn->ibc_connvars->cv_rxpsn,
+ sizeof(conn->ibc_connvars->cv_rxpsn));
+
PORTAL_ALLOC(conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
if (conn->ibc_rxs == NULL) {
CERROR("Cannot allocate RX buffers\n");
rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) +
page_offset);
- if (kibnal_whole_mem()) {
- void *newaddr;
- vv_mem_reg_h_t mem_h;
- vv_r_key_t r_key;
+#if IBNAL_WHOLE_MEM
+ {
+ vv_mem_reg_h_t mem_h;
+ vv_r_key_t r_key;
/* Voltaire stack already registers the whole
* memory, so use that API. */
- retval = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
- rx->rx_msg,
- IBNAL_MSG_SIZE,
- &mem_h,
- &rx->l_key,
- &r_key);
- if (retval) {
- CERROR("vv_get_gen_mr_attrib failed: %d", retval);
- /* TODO: free pages? */
- goto failed;
- }
+ vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
+ rx->rx_msg,
+ IBNAL_MSG_SIZE,
+ &mem_h,
+ &rx->rx_lkey,
+ &r_key);
+ LASSERT (vvrc == vv_return_ok);
}
-
+#else
+ rx->rx_vaddr = vaddr;
+#endif
+ CDEBUG(D_NET, "Rx[%d] %p->%p[%x:"LPX64"]\n", i, rx,
+ rx->rx_msg, KIBNAL_RX_LKEY(rx), KIBNAL_RX_VADDR(rx));
+
vaddr += IBNAL_MSG_SIZE;
LASSERT (vaddr <= vaddr_base + IBNAL_RX_MSG_BYTES);
}
}
- qp_attr = (vv_qp_attr_t) {
- .create.qp_type = vv_qp_type_r_conn,
- .create.cq_send_h = kibnal_data.kib_cq,
- .create.cq_receive_h = kibnal_data.kib_cq,
- .create.send_max_outstand_wr = IBNAL_TX_MAX_SG *
- IBNAL_MSG_QUEUE_SIZE,
- .create.receive_max_outstand_wr = IBNAL_MSG_QUEUE_SIZE,
- .create.max_scatgat_per_send_wr = 1,
- .create.max_scatgat_per_receive_wr = 1,
- .create.signaling_type = vv_selectable_signaling, /* TODO: correct? */
- .create.pd_h = kibnal_data.kib_pd,
- .create.recv_solicited_events = vv_signal_all,
- };
- retval = vv_qp_create(kibnal_data.kib_hca, &qp_attr, NULL,
- &conn->ibc_qp, &conn->ibc_qp_attrs);
- if (retval != 0) {
- CERROR ("Failed to create queue pair: %d\n", retval);
+ memset(&reqattr, 0, sizeof(reqattr));
+
+ reqattr.create.qp_type = vv_qp_type_r_conn;
+ reqattr.create.cq_send_h = kibnal_data.kib_cq;
+ reqattr.create.cq_receive_h = kibnal_data.kib_cq;
+ reqattr.create.send_max_outstand_wr = (1 + IBNAL_MAX_RDMA_FRAGS) *
+ IBNAL_MSG_QUEUE_SIZE;
+ reqattr.create.receive_max_outstand_wr = IBNAL_RX_MSGS;
+ reqattr.create.max_scatgat_per_send_wr = 1;
+ reqattr.create.max_scatgat_per_receive_wr = 1;
+ reqattr.create.signaling_type = vv_selectable_signaling;
+ reqattr.create.pd_h = kibnal_data.kib_pd;
+ reqattr.create.recv_solicited_events = vv_selectable_signaling; // vv_signal_all;
+
+ vvrc = vv_qp_create(kibnal_data.kib_hca, &reqattr, NULL,
+ &conn->ibc_qp, &rspattr);
+ if (vvrc != vv_return_ok) {
+ CERROR ("Failed to create queue pair: %d\n", vvrc);
goto failed;
}
/* Mark QP created */
- conn->ibc_state = IBNAL_CONN_INIT_QP;
-
- qp_attr = (vv_qp_attr_t) {
- .modify.qp_modify_into_state = vv_qp_state_init,
- .modify.vv_qp_attr_mask = VV_QP_AT_STATE | VV_QP_AT_PHY_PORT_NUM | VV_QP_AT_P_KEY_IX | VV_QP_AT_ACCESS_CON_F,
- .modify.qp_type = vv_qp_type_r_conn,
-
- .modify.params.init.p_key_indx = 0,
- .modify.params.init.phy_port_num = kibnal_data.kib_port,
- .modify.params.init.access_control = vv_acc_r_mem_write | vv_acc_r_mem_read,
- };
- retval = vv_qp_modify(kibnal_data.kib_hca, conn->ibc_qp, &qp_attr, &conn->ibc_qp_attrs);
- if (retval != 0) {
- CERROR ("Failed to modify queue pair: %d\n", retval);
- goto failed;
- }
-
- retval = vv_qp_query(kibnal_data.kib_hca, conn->ibc_qp, &qp_context, &conn->ibc_qp_attrs);
- if (retval) {
- CERROR ("Failed to query queue pair: %d\n", retval);
+ conn->ibc_state = IBNAL_CONN_INIT;
+ conn->ibc_connvars->cv_local_qpn = rspattr.create_return.qp_num;
+
+ if (rspattr.create_return.receive_max_outstand_wr <
+ IBNAL_MSG_QUEUE_SIZE ||
+ rspattr.create_return.send_max_outstand_wr <
+ (1 + IBNAL_MAX_RDMA_FRAGS) * IBNAL_MSG_QUEUE_SIZE) {
+ CERROR("Insufficient rx/tx work items: wanted %d/%d got %d/%d\n",
+ IBNAL_MSG_QUEUE_SIZE,
+ (1 + IBNAL_MAX_RDMA_FRAGS) * IBNAL_MSG_QUEUE_SIZE,
+ rspattr.create_return.receive_max_outstand_wr,
+ rspattr.create_return.send_max_outstand_wr);
goto failed;
}
void
kibnal_destroy_conn (kib_conn_t *conn)
{
- vv_return_t retval;
+ vv_return_t vvrc;
+
+ /* Only the connd does this (i.e. single threaded) */
+ LASSERT (!in_interrupt());
+ LASSERT (current == kibnal_data.kib_connd);
CDEBUG (D_NET, "connection %p\n", conn);
LASSERT (atomic_read (&conn->ibc_refcount) == 0);
+ LASSERT (list_empty(&conn->ibc_early_rxs));
LASSERT (list_empty(&conn->ibc_tx_queue));
LASSERT (list_empty(&conn->ibc_active_txs));
LASSERT (conn->ibc_nsends_posted == 0);
- LASSERT (conn->ibc_connreq == NULL);
switch (conn->ibc_state) {
+ default:
+ /* conn must be completely disengaged from the network */
+ LBUG();
+
case IBNAL_CONN_DISCONNECTED:
- /* called after connection sequence initiated */
+ /* connvars should have been freed already */
+ LASSERT (conn->ibc_connvars == NULL);
/* fall through */
- case IBNAL_CONN_INIT_QP:
- /* _destroy includes an implicit Reset of the QP which
- * discards posted work */
- retval = vv_qp_destroy(kibnal_data.kib_hca, conn->ibc_qp);
- if (retval)
- CERROR("Can't destroy QP: %d\n", retval);
+ case IBNAL_CONN_INIT:
+ kibnal_set_qp_state(conn, vv_qp_state_reset);
+ vvrc = vv_qp_destroy(kibnal_data.kib_hca, conn->ibc_qp);
+ if (vvrc != vv_return_ok)
+ CERROR("Can't destroy QP: %d\n", vvrc);
/* fall through */
case IBNAL_CONN_INIT_NOTHING:
break;
-
- default:
- LASSERT (0);
- }
-
- if (conn->ibc_cep != NULL) {
- retval = cm_destroy_cep(conn->ibc_cep);
- if (retval)
- CERROR("Can't destroy CEP %p: %d\n", conn->ibc_cep,
- retval);
}
if (conn->ibc_rx_pages != NULL)
kibnal_free_pages(conn->ibc_rx_pages);
-
+
if (conn->ibc_rxs != NULL)
PORTAL_FREE(conn->ibc_rxs,
IBNAL_RX_MSGS * sizeof(kib_rx_t));
+ if (conn->ibc_connvars != NULL)
+ PORTAL_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
+
if (conn->ibc_peer != NULL)
- kib_peer_decref(conn->ibc_peer);
+ kibnal_peer_decref(conn->ibc_peer);
+
+ vvrc = cm_destroy_cep(conn->ibc_cep);
+ LASSERT (vvrc == vv_return_ok);
PORTAL_FREE(conn, sizeof (*conn));
atomic_dec(&kibnal_data.kib_nconns);
-
- if (atomic_read (&kibnal_data.kib_nconns) == 0 &&
- kibnal_data.kib_shutdown) {
- /* I just nuked the last connection on shutdown; wake up
- * everyone so they can exit. */
- wake_up_all(&kibnal_data.kib_sched_waitq);
- wake_up_all(&kibnal_data.kib_connd_waitq);
- }
}
-void
-kibnal_put_conn (kib_conn_t *conn)
-{
- unsigned long flags;
-
- CDEBUG (D_NET, "putting conn[%p] state %d -> "LPX64" (%d)\n",
- conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
- atomic_read (&conn->ibc_refcount));
-
- LASSERT (atomic_read (&conn->ibc_refcount) > 0);
- if (!atomic_dec_and_test (&conn->ibc_refcount))
- return;
-
- /* must disconnect before dropping the final ref */
- LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECTED);
-
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
-
- list_add (&conn->ibc_list, &kibnal_data.kib_connd_conns);
- wake_up (&kibnal_data.kib_connd_waitq);
-
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-}
-
-static int
+int
kibnal_close_peer_conns_locked (kib_peer_t *peer, int why)
{
kib_conn_t *conn;
return (count);
}
-static int
+int
kibnal_close_matching_conns (ptl_nid_t nid)
{
- unsigned long flags;
kib_peer_t *peer;
struct list_head *ptmp;
struct list_head *pnxt;
int lo;
int hi;
int i;
+ unsigned long flags;
int count = 0;
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
+ write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
if (nid != PTL_NID_ANY)
lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
}
}
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
+ write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
/* wildcards always succeed */
if (nid == PTL_NID_ANY)
return (count == 0 ? -ENOENT : 0);
}
-static int
+int
kibnal_cmd(struct portals_cfg *pcfg, void * private)
{
int rc = -EINVAL;
- ENTRY;
LASSERT (pcfg != NULL);
switch(pcfg->pcfg_command) {
case NAL_CMD_GET_PEER: {
ptl_nid_t nid = 0;
+ __u32 ip = 0;
int share_count = 0;
rc = kibnal_get_peer_info(pcfg->pcfg_count,
- &nid, &share_count);
+ &nid, &ip, &share_count);
pcfg->pcfg_nid = nid;
pcfg->pcfg_size = 0;
- pcfg->pcfg_id = 0;
- pcfg->pcfg_misc = 0;
+ pcfg->pcfg_id = ip;
+ pcfg->pcfg_misc = IBNAL_SERVICE_NUMBER; /* port */
pcfg->pcfg_count = 0;
pcfg->pcfg_wait = share_count;
break;
}
case NAL_CMD_ADD_PEER: {
- rc = kibnal_add_persistent_peer (pcfg->pcfg_nid);
+ rc = kibnal_add_persistent_peer (pcfg->pcfg_nid,
+ pcfg->pcfg_id); /* IP */
break;
}
case NAL_CMD_DEL_PEER: {
pcfg->pcfg_id = 0;
pcfg->pcfg_misc = 0;
pcfg->pcfg_flags = 0;
- kibnal_put_conn (conn);
+ kibnal_conn_decref(conn);
}
break;
}
}
}
- RETURN(rc);
+ return rc;
}
void
kibnal_free_pages (kib_pages_t *p)
{
- int npages = p->ibp_npages;
- vv_return_t retval;
- int i;
+ int npages = p->ibp_npages;
+ vv_return_t vvrc;
+ int i;
if (p->ibp_mapped) {
- retval = vv_mem_region_destroy(kibnal_data.kib_hca, p->ibp_handle);
- if (retval != 0)
- CERROR ("Deregister error: %d\n", retval);
+ vvrc = vv_mem_region_destroy(kibnal_data.kib_hca,
+ p->ibp_handle);
+ if (vvrc != vv_return_ok)
+ CERROR ("Deregister error: %d\n", vvrc);
}
for (i = 0; i < npages; i++)
kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write)
{
kib_pages_t *p;
- vv_phy_list_t phys_pages;
- vv_phy_buf_t *phys_buf;
int i;
- vv_return_t retval;
+#if !IBNAL_WHOLE_MEM
+ vv_phy_list_t vv_phys;
+ vv_phy_buf_t *phys_pages;
+ vv_return_t vvrc;
+ vv_access_con_bit_mask_t access;
+#endif
PORTAL_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
if (p == NULL) {
}
}
- if (kibnal_whole_mem())
- goto out;
-
- PORTAL_ALLOC(phys_buf, npages * sizeof(vv_phy_buf_t));
- if (phys_buf == NULL) {
- CERROR ("Can't allocate phys_buf for %d pages\n", npages);
- /* XXX free ibp_pages? */
+#if !IBNAL_WHOLE_MEM
+ PORTAL_ALLOC(phys_pages, npages * sizeof(*phys_pages));
+ if (phys_pages == NULL) {
+ CERROR ("Can't allocate physarray for %d pages\n", npages);
kibnal_free_pages(p);
return (-ENOMEM);
}
- phys_pages.number_of_buff = npages;
- phys_pages.phy_list = phys_buf;
+ vv_phys.number_of_buff = npages;
+ vv_phys.phy_list = phys_pages;
- /* if we were using the _contig_ registration variant we would have
- * an array of PhysAddr/Length pairs, but the discontiguous variant
- * just takes the PhysAddr */
for (i = 0; i < npages; i++) {
- phys_buf[i].start = kibnal_page2phys(p->ibp_pages[i]);
- phys_buf[i].size = PAGE_SIZE;
- }
-
- retval = vv_phy_mem_region_register(kibnal_data.kib_hca,
- &phys_pages,
- 0, /* requested vaddr */
- npages * PAGE_SIZE,
- 0, /* offset */
- kibnal_data.kib_pd,
- vv_acc_l_mem_write | vv_acc_r_mem_write | vv_acc_r_mem_read | vv_acc_mem_bind, /* TODO: translated as-is, but seems incorrect or too much */
- &p->ibp_handle, &p->ibp_vaddr,
- &p->ibp_lkey, &p->ibp_rkey);
+ phys_pages[i].size = PAGE_SIZE;
+ phys_pages[i].start =
+ kibnal_page2phys(p->ibp_pages[i]);
+ }
+
+ VV_ACCESS_CONTROL_MASK_SET_ALL(access);
+
+ vvrc = vv_phy_mem_region_register(kibnal_data.kib_hca,
+ &vv_phys,
+ 0, /* requested vaddr */
+ npages * PAGE_SIZE, 0, /* offset */
+ kibnal_data.kib_pd,
+ access,
+ &p->ibp_handle,
+ &p->ibp_vaddr,
+ &p->ibp_lkey,
+ &p->ibp_rkey);
- PORTAL_FREE(phys_buf, npages * sizeof(vv_phy_buf_t));
+ PORTAL_FREE(phys_pages, npages * sizeof(*phys_pages));
- if (retval) {
- CERROR ("Error %d mapping %d pages\n", retval, npages);
+ if (vvrc != vv_return_ok) {
+ CERROR ("Error %d mapping %d pages\n", vvrc, npages);
kibnal_free_pages(p);
- return (-ENOMEM);
+ return (-EFAULT);
}
CDEBUG(D_NET, "registered %d pages; handle: %x vaddr "LPX64" "
- "lkey %x rkey %x\n", npages, p->ibp_handle,
- p->ibp_vaddr, p->ibp_lkey, p->ibp_rkey);
+ "lkey %x rkey %x\n", npages, p->ibp_handle,
+ p->ibp_vaddr, p->ibp_lkey, p->ibp_rkey);
p->ibp_mapped = 1;
-out:
+#endif
*pp = p;
return (0);
}
-static int
+int
+kibnal_alloc_tx_descs (void)
+{
+ int i;
+
+ PORTAL_ALLOC (kibnal_data.kib_tx_descs,
+ IBNAL_TX_MSGS * sizeof(kib_tx_t));
+ if (kibnal_data.kib_tx_descs == NULL)
+ return -ENOMEM;
+
+ memset(kibnal_data.kib_tx_descs, 0,
+ IBNAL_TX_MSGS * sizeof(kib_tx_t));
+
+ for (i = 0; i < IBNAL_TX_MSGS; i++) {
+ kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
+
+ PORTAL_ALLOC(tx->tx_wrq,
+ (1 + IBNAL_MAX_RDMA_FRAGS) *
+ sizeof(*tx->tx_wrq));
+ if (tx->tx_wrq == NULL)
+ return -ENOMEM;
+
+ PORTAL_ALLOC(tx->tx_gl,
+ (1 + IBNAL_MAX_RDMA_FRAGS) *
+ sizeof(*tx->tx_gl));
+ if (tx->tx_gl == NULL)
+ return -ENOMEM;
+
+ PORTAL_ALLOC(tx->tx_rd,
+ offsetof(kib_rdma_desc_t,
+ rd_frags[IBNAL_MAX_RDMA_FRAGS]));
+ if (tx->tx_rd == NULL)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void
+kibnal_free_tx_descs (void)
+{
+ int i;
+
+ if (kibnal_data.kib_tx_descs == NULL)
+ return;
+
+ for (i = 0; i < IBNAL_TX_MSGS; i++) {
+ kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
+
+ if (tx->tx_wrq != NULL)
+ PORTAL_FREE(tx->tx_wrq,
+ (1 + IBNAL_MAX_RDMA_FRAGS) *
+ sizeof(*tx->tx_wrq));
+
+ if (tx->tx_gl != NULL)
+ PORTAL_FREE(tx->tx_gl,
+ (1 + IBNAL_MAX_RDMA_FRAGS) *
+ sizeof(*tx->tx_gl));
+
+ if (tx->tx_rd != NULL)
+ PORTAL_FREE(tx->tx_rd,
+ offsetof(kib_rdma_desc_t,
+ rd_frags[IBNAL_MAX_RDMA_FRAGS]));
+ }
+
+ PORTAL_FREE(kibnal_data.kib_tx_descs,
+ IBNAL_TX_MSGS * sizeof(kib_tx_t));
+}
+
+int
kibnal_setup_tx_descs (void)
{
int ipage = 0;
int rc;
/* pre-mapped messages are not bigger than 1 page */
- LASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
+ CLASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
/* No fancy arithmetic when we do the buffer calculations */
- LASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
+ CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, IBNAL_TX_MSG_PAGES,
0);
page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
tx = &kibnal_data.kib_tx_descs[i];
- memset (tx, 0, sizeof(*tx)); /* zero flags etc */
-
tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) +
page_offset);
-
- if (kibnal_whole_mem()) {
- void *newaddr;
- vv_mem_reg_h_t mem_h;
- vv_return_t retval;
+#if IBNAL_WHOLE_MEM
+ {
+ vv_mem_reg_h_t mem_h;
+ vv_r_key_t rkey;
+ vv_return_t vvrc;
/* Voltaire stack already registers the whole
* memory, so use that API. */
- retval = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
- tx->tx_msg,
- IBNAL_MSG_SIZE,
- &mem_h,
- &tx->l_key,
- &tx->r_key);
- if (retval) {
- CERROR("vv_get_gen_mr_attrib failed: %d", retval);
- /* TODO: free pages? */
- /* TODO: return. */
- }
+ vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
+ tx->tx_msg,
+ IBNAL_MSG_SIZE,
+ &mem_h,
+ &tx->tx_lkey,
+ &rkey);
+ LASSERT (vvrc == vv_return_ok);
}
-
+#else
+ tx->tx_vaddr = vaddr;
+#endif
tx->tx_isnblk = (i >= IBNAL_NTX);
tx->tx_mapped = KIB_TX_UNMAPPED;
- CDEBUG(D_NET, "Tx[%d] %p->%p\n", i, tx, tx->tx_msg);
+ CDEBUG(D_NET, "Tx[%d] %p->%p[%x:"LPX64"]\n", i, tx,
+ tx->tx_msg, KIBNAL_TX_LKEY(tx), KIBNAL_TX_VADDR(tx));
if (tx->tx_isnblk)
list_add (&tx->tx_list,
return (0);
}
-static void
+void
kibnal_api_shutdown (nal_t *nal)
{
- int i;
- int rc;
- vv_return_t retval;
+ int i;
+ vv_return_t vvrc;
if (nal->nal_refct != 0) {
/* This module got the first ref */
libcfs_nal_cmd_unregister(VIBNAL);
/* No new peers */
- /* resetting my NID to unadvertises me, removes my
- * listener and nukes all current peers */
+ /* resetting my NID removes my listener and nukes all current
+ * peers and their connections */
kibnal_set_mynid (PTL_NID_ANY);
- /* Wait for all peer state to clean up (crazy) */
+ /* Wait for all peer state to clean up */
i = 2;
while (atomic_read (&kibnal_data.kib_npeers) != 0) {
i++;
CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "waiting for %d peers to disconnect (can take a few seconds)\n",
+ "waiting for %d peers to disconnect\n",
atomic_read (&kibnal_data.kib_npeers));
set_current_state (TASK_UNINTERRUPTIBLE);
schedule_timeout (HZ);
/* fall through */
case IBNAL_INIT_CQ:
- retval = vv_cq_destroy(kibnal_data.kib_hca, kibnal_data.kib_cq);
- if (retval)
- CERROR ("Destroy CQ error: %d\n", retval);
+ vvrc = vv_cq_destroy(kibnal_data.kib_hca, kibnal_data.kib_cq);
+ if (vvrc != vv_return_ok)
+ CERROR ("Destroy CQ error: %d\n", vvrc);
/* fall through */
case IBNAL_INIT_TXD:
kibnal_free_pages (kibnal_data.kib_tx_pages);
/* fall through */
-#if IBNAL_FMR
- case IBNAL_INIT_FMR:
- rc = ib_fmr_pool_destroy (kibnal_data.kib_fmr_pool);
- if (rc != 0)
- CERROR ("Destroy FMR pool error: %d\n", rc);
- /* fall through */
-#endif
case IBNAL_INIT_PD:
-#if IBNAL_WHOLE_MEM==0
- retval = vv_pd_deallocate(kibnal_data.kib_hca, kibnal_data.kib_pd);
- if (retval != 0)
- CERROR ("Destroy PD error: %d\n", retval);
+#if !IBNAL_WHOLE_MEM
+ vvrc = vv_pd_deallocate(kibnal_data.kib_hca,
+ kibnal_data.kib_pd);
+ if (vvrc != vv_return_ok)
+ CERROR ("Destroy PD error: %d\n", vvrc);
#endif
/* fall through */
- case IBNAL_INIT_GSI:
- retval = gsi_deregister_class(kibnal_data.gsi_handle);
- if (retval != 0)
- CERROR ("GSI deregister failed: %d\n", retval);
- /* fall through */
-
- case IBNAL_INIT_GSI_POOL:
- gsi_dtgrm_pool_destroy(kibnal_data.gsi_pool_handle);
- /* fall through */
-
- case IBNAL_INIT_PORT:
- /* XXX ??? */
- /* fall through */
-
case IBNAL_INIT_ASYNC:
- retval = vv_dell_async_event_cb (kibnal_data.kib_hca,
- kibnal_ca_async_callback);
- if (retval)
- CERROR("deregister asynchronous call back error: %d\n", retval);
+ vvrc = vv_dell_async_event_cb (kibnal_data.kib_hca,
+ kibnal_async_callback);
+ if (vvrc != vv_return_ok)
+ CERROR("vv_dell_async_event_cb error: %d\n", vvrc);
/* fall through */
case IBNAL_INIT_HCA:
- retval = vv_hca_close(kibnal_data.kib_hca);
- if (retval != 0)
- CERROR ("Close HCA error: %d\n", retval);
+ vvrc = vv_hca_close(kibnal_data.kib_hca);
+ if (vvrc != vv_return_ok)
+ CERROR ("Close HCA error: %d\n", vvrc);
/* fall through */
case IBNAL_INIT_LIB:
/* fall through */
case IBNAL_INIT_DATA:
- /* Module refcount only gets to zero when all peers
- * have been closed so all lists must be empty */
LASSERT (atomic_read (&kibnal_data.kib_npeers) == 0);
LASSERT (kibnal_data.kib_peers != NULL);
for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0);
LASSERT (list_empty (&kibnal_data.kib_sched_rxq));
LASSERT (list_empty (&kibnal_data.kib_sched_txq));
+ LASSERT (list_empty (&kibnal_data.kib_connd_zombies));
LASSERT (list_empty (&kibnal_data.kib_connd_conns));
+ LASSERT (list_empty (&kibnal_data.kib_connd_pcreqs));
LASSERT (list_empty (&kibnal_data.kib_connd_peers));
/* flag threads to terminate; wake and wait for them to die */
break;
}
- if (kibnal_data.kib_tx_descs != NULL)
- PORTAL_FREE (kibnal_data.kib_tx_descs,
- IBNAL_TX_MSGS * sizeof(kib_tx_t));
+ kibnal_free_tx_descs();
if (kibnal_data.kib_peers != NULL)
PORTAL_FREE (kibnal_data.kib_peers,
kibnal_data.kib_init = IBNAL_INIT_NOTHING;
}
-#define roundup_power(val, power) \
- ( (val + (__u64)(power - 1)) & ~((__u64)(power - 1)) )
-
-/* this isn't very portable or sturdy in the face of funny mem/bus configs */
-static __u64 max_phys_mem(void)
-{
- struct sysinfo si;
- __u64 ret;
-
- si_meminfo(&si);
- ret = (__u64)max(si.totalram, max_mapnr) * si.mem_unit;
- return roundup_power(ret, 128 * 1024 * 1024);
-}
-#undef roundup_power
-
-static int
+int
kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
ptl_ni_limits_t *requested_limits,
ptl_ni_limits_t *actual_limits)
{
- ptl_process_id_t process_id;
- int pkmem = atomic_read(&portal_kmemory);
- int rc;
- int i;
+ struct timeval tv;
+ ptl_process_id_t process_id;
+ int pkmem = atomic_read(&portal_kmemory);
+ int rc;
+ int i;
vv_request_event_record_t req_er;
- vv_return_t retval;
+ vv_return_t vvrc;
LASSERT (nal == &kibnal_api);
}
LASSERT (kibnal_data.kib_init == IBNAL_INIT_NOTHING);
+ memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */
+
+ do_gettimeofday(&tv);
+ kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
+ kibnal_data.kib_svc_id = IBNAL_SERVICE_NUMBER;
init_MUTEX (&kibnal_data.kib_nid_mutex);
- kibnal_data.kib_nid = PTL_NID_ANY;
rwlock_init(&kibnal_data.kib_global_lock);
spin_lock_init (&kibnal_data.kib_connd_lock);
INIT_LIST_HEAD (&kibnal_data.kib_connd_peers);
+ INIT_LIST_HEAD (&kibnal_data.kib_connd_pcreqs);
INIT_LIST_HEAD (&kibnal_data.kib_connd_conns);
+ INIT_LIST_HEAD (&kibnal_data.kib_connd_zombies);
init_waitqueue_head (&kibnal_data.kib_connd_waitq);
spin_lock_init (&kibnal_data.kib_sched_lock);
INIT_LIST_HEAD (&kibnal_data.kib_idle_nblk_txs);
init_waitqueue_head(&kibnal_data.kib_idle_tx_waitq);
- INIT_LIST_HEAD (&kibnal_data.gsi_pending);
- init_MUTEX (&kibnal_data.gsi_mutex);
-
- PORTAL_ALLOC (kibnal_data.kib_tx_descs,
- IBNAL_TX_MSGS * sizeof(kib_tx_t));
- if (kibnal_data.kib_tx_descs == NULL) {
- CERROR ("Can't allocate tx descs\n");
+ rc = kibnal_alloc_tx_descs();
+ if (rc != 0) {
+ CERROR("Can't allocate tx descs\n");
goto failed;
}
-
+
/* lists/ptrs/locks initialised */
kibnal_data.kib_init = IBNAL_INIT_DATA;
/*****************************************************/
process_id.pid = requested_pid;
- process_id.nid = kibnal_data.kib_nid;
+ process_id.nid = PTL_NID_ANY;
rc = lib_init(&kibnal_lib, nal, process_id,
requested_limits, actual_limits);
/*****************************************************/
for (i = 0; i < IBNAL_N_SCHED; i++) {
- rc = kibnal_thread_start (kibnal_scheduler, (void *)i);
+ rc = kibnal_thread_start (kibnal_scheduler, (void *)((long)i));
if (rc != 0) {
CERROR("Can't spawn vibnal scheduler[%d]: %d\n",
i, rc);
}
/* TODO: apparently only one adapter is supported */
- retval = vv_hca_open("ANY_HCA", NULL, &kibnal_data.kib_hca);
- if (retval) {
- CERROR ("Can't open CA: %d\n", retval);
+ vvrc = vv_hca_open("ANY_HCA", NULL, &kibnal_data.kib_hca);
+ if (vvrc != vv_return_ok) {
+ CERROR ("Can't open CA: %d\n", vvrc);
goto failed;
}
/* register to get HCA's asynchronous events. */
req_er.req_event_type = VV_ASYNC_EVENT_ALL_MASK;
- retval = vv_set_async_event_cb (kibnal_data.kib_hca,
- req_er,
- kibnal_ca_async_callback);
-
- if (retval) {
- CERROR ("Can't open CA: %d\n", retval);
+ vvrc = vv_set_async_event_cb (kibnal_data.kib_hca, req_er,
+ kibnal_async_callback);
+ if (vvrc != vv_return_ok) {
+ CERROR ("Can't open CA: %d\n", vvrc);
goto failed;
}
/*****************************************************/
- retval = vv_hca_query(kibnal_data.kib_hca,
- &kibnal_data.kib_hca_attrs);
- if (retval) {
- CERROR ("Can't size port attrs: %d\n", retval);
+ vvrc = vv_hca_query(kibnal_data.kib_hca, &kibnal_data.kib_hca_attrs);
+ if (vvrc != vv_return_ok) {
+ CERROR ("Can't size port attrs: %d\n", vvrc);
goto failed;
}
u_int32_t tbl_count;
vv_port_attrib_t *pattr = &kibnal_data.kib_port_attr;
- retval = vv_port_query(kibnal_data.kib_hca, port_num, pattr);
- if (retval) {
- CERROR("vv_port_query failed for port %d: %d\n", port_num, retval);
+ vvrc = vv_port_query(kibnal_data.kib_hca, port_num, pattr);
+ if (vvrc != vv_return_ok) {
+ CERROR("vv_port_query failed for port %d: %d\n",
+ port_num, vvrc);
continue;
}
kibnal_data.kib_port = port_num;
tbl_count = 1;
- retval = vv_get_port_gid_tbl(kibnal_data.kib_hca, port_num, &tbl_count, &kibnal_data.kib_port_gid);
- if (retval) {
- CERROR("vv_get_port_gid_tbl failed for port %d: %d\n", port_num, retval);
+ vvrc = vv_get_port_gid_tbl(kibnal_data.kib_hca,
+ port_num, &tbl_count,
+ &kibnal_data.kib_port_gid);
+ if (vvrc != vv_return_ok) {
+ CERROR("vv_get_port_gid_tbl failed "
+ "for port %d: %d\n", port_num, vvrc);
continue;
}
tbl_count = 1;
- retval = vv_get_port_partition_tbl (kibnal_data.kib_hca, port_num, &tbl_count, &kibnal_data.kib_port_pkey);
- if (retval) {
- CERROR("vv_get_port_partition_tbl failed for port %d: %d\n", port_num, retval);
+ vvrc = vv_get_port_partition_tbl(kibnal_data.kib_hca,
+ port_num, &tbl_count,
+ &kibnal_data.kib_port_pkey);
+ if (vvrc != vv_return_ok) {
+ CERROR("vv_get_port_partition_tbl failed "
+ "for port %d: %d\n", port_num, vvrc);
continue;
}
}
CDEBUG(D_NET, "Using port %d - GID="LPX64":"LPX64"\n",
- kibnal_data.kib_port, kibnal_data.kib_port_gid.scope.g.subnet, kibnal_data.kib_port_gid.scope.g.eui64);
- CDEBUG(D_NET, "got guid "LPX64"\n", cpu_to_le64(kibnal_data.kib_port_gid.scope.g.eui64));
+ kibnal_data.kib_port,
+ kibnal_data.kib_port_gid.scope.g.subnet,
+ kibnal_data.kib_port_gid.scope.g.eui64);
- /* Active port found */
- kibnal_data.kib_init = IBNAL_INIT_PORT;
/*****************************************************/
- /* Prepare things to be able to send/receive MADS */
- retval = gsi_dtgrm_pool_create(IBNAL_CONCURRENT_PEERS, &kibnal_data.gsi_pool_handle);
- if (retval) {
- CERROR("Could not create GSI pool: %d\n", retval);
- goto failed;
- }
- kibnal_data.kib_init = IBNAL_INIT_GSI_POOL;
-
- retval = gsi_register_class(MAD_CLASS_SUBN_ADM, /* TODO: correct? */
- 2, /* version */
- "ANY_HCA",
-#ifdef GSI_PASS_PORT_NUM
- kibnal_data.kib_port,
-#endif
- 0, 0,
- vibnal_mad_sent_cb, vibnal_mad_received_cb,
- NULL, &kibnal_data.gsi_handle);
- if (retval) {
- CERROR("Cannot register GSI class: %d\n", retval);
- goto failed;
- }
-
- kibnal_data.kib_init = IBNAL_INIT_GSI;
- /*****************************************************/
-
-#if IBNAL_WHOLE_MEM==0
- retval = vv_pd_allocate(kibnal_data.kib_hca, &kibnal_data.kib_pd);
+#if !IBNAL_WHOLE_MEM
+ vvrc = vv_pd_allocate(kibnal_data.kib_hca, &kibnal_data.kib_pd);
#else
- retval = vv_get_gen_pd_h(kibnal_data.kib_hca, &kibnal_data.kib_pd);
+ vvrc = vv_get_gen_pd_h(kibnal_data.kib_hca, &kibnal_data.kib_pd);
#endif
- if (retval) {
- CERROR ("Can't create PD: %d\n", retval);
+ if (vvrc != 0) {
+ CERROR ("Can't create PD: %d\n", vvrc);
goto failed;
}
kibnal_data.kib_init = IBNAL_INIT_PD;
/*****************************************************/
-#if IBNAL_FMR
- {
- const int pool_size = IBNAL_NTX + IBNAL_NTX_NBLK;
- struct ib_fmr_pool_param params = {
- .max_pages_per_fmr = PTL_MTU/PAGE_SIZE,
- .access = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ),
- .pool_size = pool_size,
- .dirty_watermark = (pool_size * 3)/4,
- .flush_function = NULL,
- .flush_arg = NULL,
- .cache = 1,
- };
- rc = ib_fmr_pool_create(kibnal_data.kib_pd, ¶ms,
- &kibnal_data.kib_fmr_pool);
- if (rc != 0) {
- CERROR ("Can't create FMR pool size %d: %d\n",
- pool_size, rc);
- goto failed;
- }
- }
-
- /* flag FMR pool initialised */
- kibnal_data.kib_init = IBNAL_INIT_FMR;
-#endif
-
- /*****************************************************/
-
rc = kibnal_setup_tx_descs();
if (rc != 0) {
CERROR ("Can't register tx descs: %d\n", rc);
{
uint32_t nentries;
- retval = vv_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES,
- kibnal_ca_callback,
- NULL, /* context */
- &kibnal_data.kib_cq, &nentries);
- if (retval) {
- CERROR ("Can't create RX CQ: %d\n", retval);
+ vvrc = vv_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES,
+ kibnal_cq_callback,
+ NULL, /* context */
+ &kibnal_data.kib_cq, &nentries);
+ if (vvrc != 0) {
+ CERROR ("Can't create RX CQ: %d\n", vvrc);
goto failed;
}
goto failed;
}
- retval = vv_request_completion_notification(kibnal_data.kib_hca, kibnal_data.kib_cq, vv_next_solicit_unsolicit_event);
- if (retval != 0) {
+ vvrc = vv_request_completion_notification(kibnal_data.kib_hca,
+ kibnal_data.kib_cq,
+ vv_next_solicit_unsolicit_event);
+ if (vvrc != 0) {
CERROR ("Failed to re-arm completion queue: %d\n", rc);
goto failed;
}
{
int rc;
- if (sizeof(kib_wire_connreq_t) > cm_REQ_priv_data_len) {
- CERROR("sizeof(kib_wire_connreq_t) > cm_REQ_priv_data_len\n");
- return -EINVAL;
- }
-
+ CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)
+ <= cm_REQ_priv_data_len);
+ CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)
+ <= cm_REP_priv_data_len);
+ CLASSERT (offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[IBNAL_MAX_RDMA_FRAGS])
+ <= IBNAL_MSG_SIZE);
+ CLASSERT (offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[IBNAL_MAX_RDMA_FRAGS])
+ <= IBNAL_MSG_SIZE);
+
/* the following must be sizeof(int) for proc_dointvec() */
- if (sizeof (kibnal_tunables.kib_io_timeout) != sizeof (int)) {
- CERROR("sizeof (kibnal_tunables.kib_io_timeout) != sizeof (int)\n");
- return -EINVAL;
- }
+ CLASSERT (sizeof (kibnal_tunables.kib_io_timeout) == sizeof (int));
kibnal_api.nal_ni_init = kibnal_api_startup;
kibnal_api.nal_ni_fini = kibnal_api_shutdown;