*
*/
-#include "iibnal.h"
-
-nal_t kibnal_api;
-ptl_handle_ni_t kibnal_ni;
-kib_tunables_t kibnal_tunables;
-
-kib_data_t kibnal_data = {
- .kib_service_id = IBNAL_SERVICE_NUMBER,
+#include "iiblnd.h"
+
+lnd_t the_kiblnd = {
+ .lnd_type = IIBLND,
+ .lnd_startup = kibnal_startup,
+ .lnd_shutdown = kibnal_shutdown,
+ .lnd_ctl = kibnal_ctl,
+ .lnd_send = kibnal_send,
+ .lnd_recv = kibnal_recv,
+ .lnd_eager_recv = kibnal_eager_recv,
};
-#ifdef CONFIG_SYSCTL
-#define IBNAL_SYSCTL 202
+kib_data_t kibnal_data;
-#define IBNAL_SYSCTL_TIMEOUT 1
+__u32
+kibnal_cksum (void *ptr, int nob)
+{
+ char *c = ptr;
+ __u32 sum = 0;
-static ctl_table kibnal_ctl_table[] = {
- {IBNAL_SYSCTL_TIMEOUT, "timeout",
- &kibnal_tunables.kib_io_timeout, sizeof (int),
- 0644, NULL, &proc_dointvec},
- { 0 }
-};
+ while (nob-- > 0)
+ sum = ((sum << 1) | (sum >> 31)) + *c++;
+
+ /* ensure I don't return 0 (== no checksum) */
+ return (sum == 0) ? 1 : sum;
+}
-static ctl_table kibnal_top_ctl_table[] = {
- {IBNAL_SYSCTL, "iibnal", NULL, 0, 0555, kibnal_ctl_table},
- { 0 }
-};
-#endif
+void
+kibnal_init_msg(kib_msg_t *msg, int type, int body_nob)
+{
+ msg->ibm_type = type;
+ msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob;
+}
-#ifdef unused
void
-print_service(IB_SERVICE_RECORD *service, char *tag, int rc)
+kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits,
+ lnet_nid_t dstnid, __u64 dststamp, __u64 seq)
{
- char name[32];
+ /* CAVEAT EMPTOR! all message fields not set here should have been
+ * initialised previously. */
+ msg->ibm_magic = IBNAL_MSG_MAGIC;
+ msg->ibm_version = version;
+ /* ibm_type */
+ msg->ibm_credits = credits;
+ /* ibm_nob */
+ msg->ibm_cksum = 0;
+ msg->ibm_srcnid = lnet_ptlcompat_srcnid(kibnal_data.kib_ni->ni_nid,
+ dstnid);
+ msg->ibm_srcstamp = kibnal_data.kib_incarnation;
+ msg->ibm_dstnid = dstnid;
+ msg->ibm_dststamp = dststamp;
+ msg->ibm_seq = seq;
+
+ if (*kibnal_tunables.kib_cksum) {
+ /* NB ibm_cksum zero while computing cksum */
+ msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob);
+ }
+}
- if (service == NULL)
- {
- CWARN("tag : %s\n"
- "status : %d (NULL)\n", tag, rc);
- return;
+void
+kibnal_pack_connmsg(kib_msg_t *msg, __u32 version, int nob,
+ int type, lnet_nid_t dstnid, __u64 dststamp)
+{
+ LASSERT (nob >= offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t));
+
+ memset(msg, 0, nob);
+ kibnal_init_msg(msg, type, sizeof(kib_connparams_t));
+
+ msg->ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE;
+ msg->ibm_u.connparams.ibcp_max_msg_size = IBNAL_MSG_SIZE;
+ msg->ibm_u.connparams.ibcp_max_frags = IBNAL_MAX_RDMA_FRAGS;
+
+ kibnal_pack_msg(msg, version, 0, dstnid, dststamp, 0);
+}
+
+int
+kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob)
+{
+ const int hdr_size = offsetof(kib_msg_t, ibm_u);
+ __u32 msg_cksum;
+ __u32 msg_version;
+ int flip;
+ int msg_nob;
+#if !IBNAL_USE_FMR
+ int i;
+ int n;
+#endif
+ /* 6 bytes are enough to have received magic + version */
+ if (nob < 6) {
+ CERROR("Short message: %d\n", nob);
+ return -EPROTO;
+ }
+
+ /* Future protocol version compatibility support!
+ * If the iiblnd-specific protocol changes, or when LNET unifies
+ * protocols over all LNDs, the initial connection will negotiate a
+ * protocol version. If I find this, I avoid any console errors. If
+ * my is doing connection establishment, the reject will tell the peer
+ * which version I'm running. */
+
+ if (msg->ibm_magic == IBNAL_MSG_MAGIC) {
+ flip = 0;
+ } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) {
+ flip = 1;
+ } else {
+ if (msg->ibm_magic == LNET_PROTO_MAGIC ||
+ msg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
+ return -EPROTO;
+
+ /* Completely out to lunch */
+ CERROR("Bad magic: %08x\n", msg->ibm_magic);
+ return -EPROTO;
}
- strncpy (name, service->ServiceName, sizeof(name)-1);
- name[sizeof(name)-1] = 0;
+
+ msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
+ if (expected_version == 0) {
+ if (msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD &&
+ msg_version != IBNAL_MSG_VERSION)
+ return -EPROTO;
+ } else if (msg_version != expected_version) {
+ CERROR("Bad version: %x(%x expected)\n",
+ msg_version, expected_version);
+ return -EPROTO;
+ }
+
+ if (nob < hdr_size) {
+ CERROR("Short message: %d\n", nob);
+ return -EPROTO;
+ }
+
+ msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
+ if (msg_nob > nob) {
+ CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
+ return -EPROTO;
+ }
+
+ /* checksum must be computed with ibm_cksum zero and BEFORE anything
+ * gets flipped */
+ msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
+ msg->ibm_cksum = 0;
+ if (msg_cksum != 0 &&
+ msg_cksum != kibnal_cksum(msg, msg_nob)) {
+ CERROR("Bad checksum\n");
+ return -EPROTO;
+ }
+ msg->ibm_cksum = msg_cksum;
- CWARN("tag : %s\n"
- "status : %d\n"
- "service id: "LPX64"\n"
- "name : %s\n"
- "NID : "LPX64"\n", tag, rc,
- service->RID.ServiceID, name,
- *kibnal_service_nid_field(service));
-}
+ if (flip) {
+ /* leave magic unflipped as a clue to peer endianness */
+ msg->ibm_version = msg_version;
+ CLASSERT (sizeof(msg->ibm_type) == 1);
+ CLASSERT (sizeof(msg->ibm_credits) == 1);
+ msg->ibm_nob = msg_nob;
+ __swab64s(&msg->ibm_srcnid);
+ __swab64s(&msg->ibm_srcstamp);
+ __swab64s(&msg->ibm_dstnid);
+ __swab64s(&msg->ibm_dststamp);
+ __swab64s(&msg->ibm_seq);
+ }
+
+ if (msg->ibm_srcnid == LNET_NID_ANY) {
+ CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
+ return -EPROTO;
+ }
+
+ switch (msg->ibm_type) {
+ default:
+ CERROR("Unknown message type %x\n", msg->ibm_type);
+ return -EPROTO;
+
+ case IBNAL_MSG_NOOP:
+ break;
+
+ case IBNAL_MSG_IMMEDIATE:
+ if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) {
+ CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob,
+ (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]));
+ return -EPROTO;
+ }
+ break;
+
+ case IBNAL_MSG_PUT_REQ:
+ if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) {
+ CERROR("Short PUT_REQ: %d(%d)\n", msg_nob,
+ (int)(hdr_size + sizeof(msg->ibm_u.putreq)));
+ return -EPROTO;
+ }
+ break;
+
+ case IBNAL_MSG_PUT_ACK:
+ if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) {
+ CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
+ (int)(hdr_size + sizeof(msg->ibm_u.putack)));
+ return -EPROTO;
+ }
+#if IBNAL_USE_FMR
+ if (flip) {
+ __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr);
+ __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob);
+ __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
+ }
+#else
+ if (flip) {
+ __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
+ __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag);
+ }
+
+ n = msg->ibm_u.putack.ibpam_rd.rd_nfrag;
+ if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
+ CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n",
+ n, IBNAL_MAX_RDMA_FRAGS);
+ return -EPROTO;
+ }
+
+ if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) {
+ CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
+ (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n]));
+ return -EPROTO;
+ }
+
+ if (flip) {
+ for (i = 0; i < n; i++) {
+ __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob);
+ __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr);
+ }
+ }
#endif
+ break;
-static void
-kibnal_service_setunset_done (void *arg, FABRIC_OPERATION_DATA *fod,
- FSTATUS frc, uint32 madrc)
+ case IBNAL_MSG_GET_REQ:
+ if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) {
+ CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
+ (int)(hdr_size + sizeof(msg->ibm_u.get)));
+ return -EPROTO;
+ }
+#if IBNAL_USE_FMR
+ if (flip) {
+ __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr);
+ __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob);
+ __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
+ }
+#else
+ if (flip) {
+ __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
+ __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag);
+ }
+
+ n = msg->ibm_u.get.ibgm_rd.rd_nfrag;
+ if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
+ CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n",
+ n, IBNAL_MAX_RDMA_FRAGS);
+ return -EPROTO;
+ }
+
+ if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) {
+ CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
+ (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n]));
+ return -EPROTO;
+ }
+
+ if (flip)
+ for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) {
+ __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob);
+ __swab64s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr);
+ }
+#endif
+ break;
+
+ case IBNAL_MSG_PUT_NAK:
+ case IBNAL_MSG_PUT_DONE:
+ case IBNAL_MSG_GET_DONE:
+ if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) {
+ CERROR("Short RDMA completion: %d(%d)\n", msg_nob,
+ (int)(hdr_size + sizeof(msg->ibm_u.completion)));
+ return -EPROTO;
+ }
+ if (flip)
+ __swab32s(&msg->ibm_u.completion.ibcm_status);
+ break;
+
+ case IBNAL_MSG_CONNREQ:
+ case IBNAL_MSG_CONNACK:
+ if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) {
+ CERROR("Short connreq/ack: %d(%d)\n", msg_nob,
+ (int)(hdr_size + sizeof(msg->ibm_u.connparams)));
+ return -EPROTO;
+ }
+ if (flip) {
+ __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth);
+ __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size);
+ __swab32s(&msg->ibm_u.connparams.ibcp_max_frags);
+ }
+ break;
+ }
+ return 0;
+}
+
+IB_HANDLE
+kibnal_create_cep(lnet_nid_t nid)
{
- *(FSTATUS *)arg = frc;
- up (&kibnal_data.kib_nid_signal);
+ FSTATUS frc;
+ __u32 u32val;
+ IB_HANDLE cep;
+
+ cep = iba_cm_create_cep(CM_RC_TYPE);
+ if (cep == NULL) {
+ CERROR ("Can't create CEP for %s\n",
+ (nid == LNET_NID_ANY) ? "listener" :
+ libcfs_nid2str(nid));
+ return NULL;
+ }
+
+ if (nid == LNET_NID_ANY) {
+ u32val = 1;
+ frc = iba_cm_modify_cep(cep, CM_FLAG_ASYNC_ACCEPT,
+ (char *)&u32val, sizeof(u32val), 0);
+ if (frc != FSUCCESS) {
+ CERROR("Can't set async_accept: %d\n", frc);
+ goto failed;
+ }
+
+ u32val = 0; /* sets system max */
+ frc = iba_cm_modify_cep(cep, CM_FLAG_LISTEN_BACKLOG,
+ (char *)&u32val, sizeof(u32val), 0);
+ if (frc != FSUCCESS) {
+ CERROR("Can't set listen backlog: %d\n", frc);
+ goto failed;
+ }
+ }
+
+ u32val = 1;
+ frc = iba_cm_modify_cep(cep, CM_FLAG_TIMEWAIT_CALLBACK,
+ (char *)&u32val, sizeof(u32val), 0);
+ if (frc != FSUCCESS) {
+ CERROR("Can't set timewait_callback for %s: %d\n",
+ (nid == LNET_NID_ANY) ? "listener" :
+ libcfs_nid2str(nid), frc);
+ goto failed;
+ }
+
+ return cep;
+
+ failed:
+ iba_cm_destroy_cep(cep);
+ return NULL;
}
+#define IBNAL_CHECK_ADVERT 1
#if IBNAL_CHECK_ADVERT
-static void
+void
kibnal_service_query_done (void *arg, QUERY *qry,
QUERY_RESULT_VALUES *qry_result)
{
- FSTATUS frc = qry_result->Status;
+ int *rcp = arg;
+ FSTATUS frc = qry_result->Status;
+ SERVICE_RECORD_RESULTS *svc_rslt;
+ IB_SERVICE_RECORD *svc;
+ lnet_nid_t nid;
+
+ if (frc != FSUCCESS || qry_result->ResultDataSize == 0) {
+ CERROR("Error checking advert: status %d data size %d\n",
+ frc, qry_result->ResultDataSize);
+ *rcp = -EIO;
+ goto out;
+ }
+
+ svc_rslt = (SERVICE_RECORD_RESULTS *)qry_result->QueryResult;
+
+ if (svc_rslt->NumServiceRecords < 1) {
+ CERROR("Check advert: %d records\n",
+ svc_rslt->NumServiceRecords);
+ *rcp = -ENOENT;
+ goto out;
+ }
- if (frc != FSUCCESS &&
- qry_result->ResultDataSize == 0)
- frc = FERROR;
+ svc = &svc_rslt->ServiceRecords[0];
+ nid = le64_to_cpu(*kibnal_service_nid_field(svc));
- *(FSTATUS *)arg = frc;
- up (&kibnal_data.kib_nid_signal);
+ CDEBUG(D_NET, "Check advert: %s "LPX64" "LPX64":%04x\n",
+ libcfs_nid2str(nid), svc->RID.ServiceID,
+ svc->RID.ServiceGID.Type.Global.InterfaceID,
+ svc->RID.ServiceP_Key);
+
+ if (nid != kibnal_data.kib_ni->ni_nid) {
+ CERROR("Check advert: Bad NID %s (%s expected)\n",
+ libcfs_nid2str(nid),
+ libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
+ *rcp = -EINVAL;
+ goto out;
+ }
+
+ if (svc->RID.ServiceID != *kibnal_tunables.kib_service_number) {
+ CERROR("Check advert: Bad ServiceID "LPX64" (%x expected)\n",
+ svc->RID.ServiceID,
+ *kibnal_tunables.kib_service_number);
+ *rcp = -EINVAL;
+ goto out;
+ }
+
+ if (svc->RID.ServiceGID.Type.Global.InterfaceID !=
+ kibnal_data.kib_port_guid) {
+ CERROR("Check advert: Bad GUID "LPX64" ("LPX64" expected)\n",
+ svc->RID.ServiceGID.Type.Global.InterfaceID,
+ kibnal_data.kib_port_guid);
+ *rcp = -EINVAL;
+ goto out;
+ }
+
+ if (svc->RID.ServiceP_Key != kibnal_data.kib_port_pkey) {
+ CERROR("Check advert: Bad PKEY %04x (%04x expected)\n",
+ svc->RID.ServiceP_Key, kibnal_data.kib_port_pkey);
+ *rcp = -EINVAL;
+ goto out;
+ }
+
+ CDEBUG(D_NET, "Check advert OK\n");
+ *rcp = 0;
+
+ out:
+ up (&kibnal_data.kib_listener_signal);
}
-static void
+int
kibnal_check_advert (void)
{
- QUERY *qry;
- IB_SERVICE_RECORD *svc;
- FSTATUS frc;
- FSTATUS frc2;
+ /* single-threaded */
+ static QUERY qry;
- PORTAL_ALLOC(qry, sizeof(*qry));
- if (qry == NULL)
- return;
+ FSTATUS frc;
+ int rc;
- memset (qry, 0, sizeof(*qry));
- qry->InputType = InputTypeServiceRecord;
- qry->OutputType = OutputTypeServiceRecord;
- qry->InputValue.ServiceRecordValue.ComponentMask = KIBNAL_SERVICE_KEY_MASK;
- svc = &qry->InputValue.ServiceRecordValue.ServiceRecord;
- kibnal_set_service_keys(svc, kibnal_data.kib_nid);
-
- frc = iibt_sd_query_port_fabric_information(kibnal_data.kib_sd,
- kibnal_data.kib_port_guid,
- qry,
- kibnal_service_query_done,
- NULL, &frc2);
- if (frc != FSUCCESS && frc != FPENDING) {
- CERROR ("Immediate error %d checking SM service\n", frc);
- } else {
- down (&kibnal_data.kib_nid_signal);
- frc = frc2;
+ memset (&qry, 0, sizeof(qry));
+ qry.InputType = InputTypeServiceRecord;
+ qry.OutputType = OutputTypeServiceRecord;
+ kibnal_set_service_keys(&qry.InputValue.ServiceRecordValue.ServiceRecord,
+ kibnal_data.kib_ni->ni_nid);
+ qry.InputValue.ServiceRecordValue.ComponentMask = KIBNAL_SERVICE_KEY_MASK;
- if (frc != 0)
- CERROR ("Error %d checking SM service\n", rc);
+ frc = iba_sd_query_port_fabric_info(kibnal_data.kib_sd,
+ kibnal_data.kib_port_guid,
+ &qry,
+ kibnal_service_query_done,
+ &kibnal_data.kib_sdretry,
+ &rc);
+ if (frc != FPENDING) {
+ CERROR ("Immediate error %d checking SM service\n", frc);
+ return -EIO;
}
-
- return (rc);
+
+ down (&kibnal_data.kib_listener_signal);
+
+ if (rc != 0)
+ CERROR ("Error %d checking SM service\n", rc);
+ return rc;
+}
+#else
+int
+kibnal_check_advert(void)
+{
+ return 0;
}
#endif
-static void fill_fod(FABRIC_OPERATION_DATA *fod, FABRIC_OPERATION_TYPE type)
+void
+kibnal_fill_fod(FABRIC_OPERATION_DATA *fod, FABRIC_OPERATION_TYPE type)
{
IB_SERVICE_RECORD *svc;
fod->Type = type;
svc = &fod->Value.ServiceRecordValue.ServiceRecord;
- svc->RID.ServiceID = kibnal_data.kib_service_id;
+ svc->RID.ServiceID = *kibnal_tunables.kib_service_number;
svc->RID.ServiceGID.Type.Global.InterfaceID = kibnal_data.kib_port_guid;
svc->RID.ServiceGID.Type.Global.SubnetPrefix = DEFAULT_SUBNET_PREFIX;
svc->RID.ServiceP_Key = kibnal_data.kib_port_pkey;
svc->ServiceLease = 0xffffffff;
- kibnal_set_service_keys(svc, kibnal_data.kib_nid);
+ kibnal_set_service_keys(svc, kibnal_data.kib_ni->ni_nid);
}
-static int
-kibnal_advertise (void)
+void
+kibnal_service_setunset_done (void *arg, FABRIC_OPERATION_DATA *fod,
+ FSTATUS frc, uint32 madrc)
{
- FABRIC_OPERATION_DATA *fod;
- IB_SERVICE_RECORD *svc;
- FSTATUS frc;
- FSTATUS frc2;
-
- LASSERT (kibnal_data.kib_nid != PTL_NID_ANY);
+ *(FSTATUS *)arg = frc;
+ up (&kibnal_data.kib_listener_signal);
+}
- PORTAL_ALLOC(fod, sizeof(*fod));
- if (fod == NULL)
- return (-ENOMEM);
+int
+kibnal_advertise (void)
+{
+ /* Single threaded here */
+ static FABRIC_OPERATION_DATA fod;
+
+ IB_SERVICE_RECORD *svc = &fod.Value.ServiceRecordValue.ServiceRecord;
+ FSTATUS frc;
+ FSTATUS frc2;
+
+ if (strlen(*kibnal_tunables.kib_service_name) >=
+ sizeof(svc->ServiceName)) {
+ CERROR("Service name '%s' too long (%d chars max)\n",
+ *kibnal_tunables.kib_service_name,
+ (int)sizeof(svc->ServiceName) - 1);
+ return -EINVAL;
+ }
- fill_fod(fod, FabOpSetServiceRecord);
- svc = &fod->Value.ServiceRecordValue.ServiceRecord;
+ kibnal_fill_fod(&fod, FabOpSetServiceRecord);
- CDEBUG(D_NET, "Advertising service id "LPX64" %s:"LPX64"\n",
- svc->RID.ServiceID,
- svc->ServiceName, *kibnal_service_nid_field(svc));
+ CDEBUG(D_NET, "Advertising service id "LPX64" %s:%s\n",
+ svc->RID.ServiceID, svc->ServiceName,
+ libcfs_nid2str(le64_to_cpu(*kibnal_service_nid_field(svc))));
- frc = iibt_sd_port_fabric_operation(kibnal_data.kib_sd,
- kibnal_data.kib_port_guid,
- fod, kibnal_service_setunset_done,
- NULL, &frc2);
+ frc = iba_sd_port_fabric_operation(kibnal_data.kib_sd,
+ kibnal_data.kib_port_guid,
+ &fod,
+ kibnal_service_setunset_done,
+ &kibnal_data.kib_sdretry,
+ &frc2);
if (frc != FSUCCESS && frc != FPENDING) {
- CERROR ("Immediate error %d advertising NID "LPX64"\n",
- frc, kibnal_data.kib_nid);
- goto out;
+ CERROR ("Immediate error %d advertising NID %s\n",
+ frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
+ return -EIO;
}
- down (&kibnal_data.kib_nid_signal);
+ down (&kibnal_data.kib_listener_signal);
frc = frc2;
- if (frc != FSUCCESS)
- CERROR ("Error %d advertising BUD "LPX64"\n",
- frc, kibnal_data.kib_nid);
-out:
- PORTAL_FREE(fod, sizeof(*fod));
- return (frc == FSUCCESS) ? 0 : -EINVAL;
+ if (frc == FSUCCESS)
+ return 0;
+
+ CERROR ("Error %d advertising %s\n",
+ frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
+ return -EIO;
}
-static void
+void
kibnal_unadvertise (int expect_success)
{
- FABRIC_OPERATION_DATA *fod;
- IB_SERVICE_RECORD *svc;
- FSTATUS frc;
- FSTATUS frc2;
+ /* single threaded */
+ static FABRIC_OPERATION_DATA fod;
- LASSERT (kibnal_data.kib_nid != PTL_NID_ANY);
+ IB_SERVICE_RECORD *svc = &fod.Value.ServiceRecordValue.ServiceRecord;
+ FSTATUS frc;
+ FSTATUS frc2;
- PORTAL_ALLOC(fod, sizeof(*fod));
- if (fod == NULL)
- return;
+ LASSERT (kibnal_data.kib_ni->ni_nid != LNET_NID_ANY);
- fill_fod(fod, FabOpDeleteServiceRecord);
- svc = &fod->Value.ServiceRecordValue.ServiceRecord;
+ kibnal_fill_fod(&fod, FabOpDeleteServiceRecord);
- CDEBUG(D_NET, "Unadvertising service %s:"LPX64"\n",
- svc->ServiceName, *kibnal_service_nid_field(svc));
+ CDEBUG(D_NET, "Unadvertising service %s:%s\n",
+ svc->ServiceName,
+ libcfs_nid2str(le64_to_cpu(*kibnal_service_nid_field(svc))));
- frc = iibt_sd_port_fabric_operation(kibnal_data.kib_sd,
- kibnal_data.kib_port_guid,
- fod, kibnal_service_setunset_done,
- NULL, &frc2);
-
+ frc = iba_sd_port_fabric_operation(kibnal_data.kib_sd,
+ kibnal_data.kib_port_guid,
+ &fod,
+ kibnal_service_setunset_done,
+ &kibnal_data.kib_sdretry,
+ &frc2);
if (frc != FSUCCESS && frc != FPENDING) {
- CERROR ("Immediate error %d unadvertising NID "LPX64"\n",
- frc, kibnal_data.kib_nid);
- goto out;
+ CERROR ("Immediate error %d unadvertising NID %s\n",
+ frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
+ return;
}
- down (&kibnal_data.kib_nid_signal);
+ down (&kibnal_data.kib_listener_signal);
+
+ CDEBUG(D_NET, "Unadvertise rc: %d\n", frc2);
if ((frc2 == FSUCCESS) == !!expect_success)
- goto out;
+ return;
if (expect_success)
- CERROR("Error %d unadvertising NID "LPX64"\n",
- frc2, kibnal_data.kib_nid);
+ CERROR("Error %d unadvertising NID %s\n",
+ frc2, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
else
- CWARN("Removed conflicting NID "LPX64"\n",
- kibnal_data.kib_nid);
- out:
- PORTAL_FREE(fod, sizeof(*fod));
+ CWARN("Removed conflicting NID %s\n",
+ libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
}
-static int
-kibnal_set_mynid(ptl_nid_t nid)
+void
+kibnal_stop_listener(int normal_shutdown)
{
- struct timeval tv;
- lib_ni_t *ni = &kibnal_lib.libnal_ni;
- int rc;
+ /* NB this also disables peer creation and destroys all existing
+ * peers */
+ IB_HANDLE cep = kibnal_data.kib_listener_cep;
+ unsigned long flags;
FSTATUS frc;
- CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
- nid, ni->ni_pid.nid);
+ LASSERT (cep != NULL);
- do_gettimeofday(&tv);
+ kibnal_unadvertise(normal_shutdown);
- down (&kibnal_data.kib_nid_mutex);
+ frc = iba_cm_cancel(cep);
+ if (frc != FSUCCESS && frc != FPENDING)
+ CERROR ("Error %d stopping listener\n", frc);
- if (nid == kibnal_data.kib_nid) {
- /* no change of NID */
- up (&kibnal_data.kib_nid_mutex);
- return (0);
- }
+ down(&kibnal_data.kib_listener_signal);
- CDEBUG(D_NET, "NID "LPX64"("LPX64")\n",
- kibnal_data.kib_nid, nid);
-
- if (kibnal_data.kib_nid != PTL_NID_ANY) {
+ frc = iba_cm_destroy_cep(cep);
+ if (frc != FSUCCESS)
+ CERROR ("Error %d destroying listener CEP\n", frc);
- kibnal_unadvertise (1);
+ write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
+ /* This assignment disables peer creation */
+ kibnal_data.kib_listener_cep = NULL;
+ write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- frc = iibt_cm_cancel(kibnal_data.kib_cep);
- if (frc != FSUCCESS && frc != FPENDING)
- CERROR ("Error %d stopping listener\n", frc);
+ /* Start to tear down any peers created while the listener was
+ * running */
+ kibnal_del_peer(LNET_NID_ANY);
+}
- frc = iibt_cm_destroy_cep(kibnal_data.kib_cep);
- if (frc != FSUCCESS)
- CERROR ("Error %d destroying CEP\n", frc);
+int
+kibnal_start_listener(void)
+{
+ /* NB this also enables peer creation */
- kibnal_data.kib_cep = NULL;
- }
-
- kibnal_data.kib_nid = ni->ni_pid.nid = nid;
- kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-
- /* Delete all existing peers and their connections after new
- * NID/incarnation set to ensure no old connections in our brave
- * new world. */
- kibnal_del_peer (PTL_NID_ANY, 0);
-
- if (kibnal_data.kib_nid == PTL_NID_ANY) {
- /* No new NID to install */
- up (&kibnal_data.kib_nid_mutex);
- return (0);
- }
+ IB_HANDLE cep;
+ CM_LISTEN_INFO info;
+ unsigned long flags;
+ int rc;
+ FSTATUS frc;
- /* remove any previous advert (crashed node etc) */
- kibnal_unadvertise(0);
+ LASSERT (kibnal_data.kib_listener_cep == NULL);
+ init_MUTEX_LOCKED (&kibnal_data.kib_listener_signal);
- kibnal_data.kib_cep = iibt_cm_create_cep(CM_RC_TYPE);
- if (kibnal_data.kib_cep == NULL) {
- CERROR ("Can't create CEP\n");
- rc = -ENOMEM;
- } else {
- CM_LISTEN_INFO info;
- memset (&info, 0, sizeof(info));
- info.ListenAddr.EndPt.SID = kibnal_data.kib_service_id;
-
- frc = iibt_cm_listen(kibnal_data.kib_cep, &info,
- kibnal_listen_callback, NULL);
- if (frc != FSUCCESS && frc != FPENDING) {
- CERROR ("iibt_cm_listen error: %d\n", frc);
- rc = -EINVAL;
- } else {
- rc = 0;
- }
- }
-
- if (rc == 0) {
- rc = kibnal_advertise();
- if (rc == 0) {
-#if IBNAL_CHECK_ADVERT
- kibnal_check_advert();
-#endif
- up (&kibnal_data.kib_nid_mutex);
- return (0);
- }
-
- iibt_cm_cancel (kibnal_data.kib_cep);
- iibt_cm_destroy_cep (kibnal_data.kib_cep);
- /* remove any peers that sprung up while I failed to
- * advertise myself */
- kibnal_del_peer (PTL_NID_ANY, 0);
+ cep = kibnal_create_cep(LNET_NID_ANY);
+ if (cep == NULL)
+ return -ENOMEM;
+
+ memset (&info, 0, sizeof(info));
+ info.ListenAddr.EndPt.SID = *kibnal_tunables.kib_service_number;
+
+ frc = iba_cm_listen(cep, &info, kibnal_listen_callback, NULL);
+ if (frc != FSUCCESS && frc != FPENDING) {
+ CERROR ("iba_cm_listen error: %d\n", frc);
+
+ iba_cm_destroy_cep(cep);
+ return -EIO;
}
- kibnal_data.kib_nid = PTL_NID_ANY;
- up (&kibnal_data.kib_nid_mutex);
- return (rc);
+ write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
+ /* This assignment enables peer creation */
+ kibnal_data.kib_listener_cep = cep;
+ write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
+
+ rc = kibnal_advertise();
+ if (rc == 0)
+ rc = kibnal_check_advert();
+
+ if (rc == 0)
+ return 0;
+
+ kibnal_stop_listener(0);
+ return rc;
}
-kib_peer_t *
-kibnal_create_peer (ptl_nid_t nid)
+int
+kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid)
{
- kib_peer_t *peer;
+ kib_peer_t *peer;
+ unsigned long flags;
+ int rc;
- LASSERT (nid != PTL_NID_ANY);
+ LASSERT (nid != LNET_NID_ANY);
- PORTAL_ALLOC (peer, sizeof (*peer));
- if (peer == NULL)
- return (NULL);
+ LIBCFS_ALLOC (peer, sizeof (*peer));
+ if (peer == NULL) {
+ CERROR("Cannot allocate peer\n");
+ return -ENOMEM;
+ }
memset(peer, 0, sizeof(*peer)); /* zero flags etc */
INIT_LIST_HEAD (&peer->ibp_conns);
INIT_LIST_HEAD (&peer->ibp_tx_queue);
- peer->ibp_reconnect_time = jiffies;
- peer->ibp_reconnect_interval = IBNAL_MIN_RECONNECT_INTERVAL;
+ peer->ibp_error = 0;
+ peer->ibp_last_alive = cfs_time_current();
+ peer->ibp_reconnect_interval = 0; /* OK to connect at any time */
+
+ write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
+
+ if (atomic_read(&kibnal_data.kib_npeers) >=
+ *kibnal_tunables.kib_concurrent_peers) {
+ rc = -EOVERFLOW; /* !! but at least it distinguishes */
+ } else if (kibnal_data.kib_listener_cep == NULL) {
+ rc = -ESHUTDOWN; /* shutdown has started */
+ } else {
+ rc = 0;
+ /* npeers only grows with the global lock held */
+ atomic_inc(&kibnal_data.kib_npeers);
+ }
+
+ write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- atomic_inc (&kibnal_data.kib_npeers);
- return (peer);
+ if (rc != 0) {
+ CERROR("Can't create peer: %s\n",
+ (rc == -ESHUTDOWN) ? "shutting down" :
+ "too many peers");
+ LIBCFS_FREE(peer, sizeof(*peer));
+ } else {
+ *peerp = peer;
+ }
+
+ return rc;
}
void
LASSERT (atomic_read (&peer->ibp_refcount) == 0);
LASSERT (peer->ibp_persistence == 0);
LASSERT (!kibnal_peer_active(peer));
- LASSERT (peer->ibp_connecting == 0);
+ LASSERT (!kibnal_peer_connecting(peer));
LASSERT (list_empty (&peer->ibp_conns));
LASSERT (list_empty (&peer->ibp_tx_queue));
- PORTAL_FREE (peer, sizeof (*peer));
+ LIBCFS_FREE (peer, sizeof (*peer));
/* NB a peer's connections keep a reference on their peer until
* they are destroyed, so we can be assured that _all_ state to do
/* the caller is responsible for accounting for the additional reference
* that this creates */
kib_peer_t *
-kibnal_find_peer_locked (ptl_nid_t nid)
+kibnal_find_peer_locked (lnet_nid_t nid)
{
struct list_head *peer_list = kibnal_nid2peerlist (nid);
struct list_head *tmp;
peer = list_entry (tmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 || /* persistent peer */
- peer->ibp_connecting != 0 || /* creating conns */
- !list_empty (&peer->ibp_conns)); /* active conn */
+ LASSERT (peer->ibp_persistence != 0 ||
+ kibnal_peer_connecting(peer) ||
+ !list_empty (&peer->ibp_conns));
if (peer->ibp_nid != nid)
continue;
- CDEBUG(D_NET, "got peer [%p] -> "LPX64" (%d)\n",
- peer, nid, atomic_read (&peer->ibp_refcount));
+ CDEBUG(D_NET, "got peer %s (%d)\n",
+ libcfs_nid2str(nid), atomic_read (&peer->ibp_refcount));
return (peer);
}
return (NULL);
}
-kib_peer_t *
-kibnal_get_peer (ptl_nid_t nid)
-{
- kib_peer_t *peer;
- unsigned long flags;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- peer = kibnal_find_peer_locked (nid);
- if (peer != NULL) /* +1 ref for caller? */
- kib_peer_addref(peer);
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- return (peer);
-}
-
void
kibnal_unlink_peer_locked (kib_peer_t *peer)
{
LASSERT (kibnal_peer_active(peer));
list_del_init (&peer->ibp_list);
/* lose peerlist's ref */
- kib_peer_decref(peer);
+ kibnal_peer_decref(peer);
}
-static int
-kibnal_get_peer_info (int index, ptl_nid_t *nidp, int *persistencep)
+int
+kibnal_get_peer_info (int index, lnet_nid_t *nidp, int *persistencep)
{
kib_peer_t *peer;
struct list_head *ptmp;
peer = list_entry (ptmp, kib_peer_t, ibp_list);
LASSERT (peer->ibp_persistence != 0 ||
- peer->ibp_connecting != 0 ||
+ kibnal_peer_connecting(peer) ||
!list_empty (&peer->ibp_conns));
if (index-- > 0)
return (-ENOENT);
}
-static int
-kibnal_add_persistent_peer (ptl_nid_t nid)
+int
+kibnal_add_persistent_peer (lnet_nid_t nid)
{
unsigned long flags;
kib_peer_t *peer;
kib_peer_t *peer2;
+ int rc;
- if (nid == PTL_NID_ANY)
+ if (nid == LNET_NID_ANY)
return (-EINVAL);
- peer = kibnal_create_peer (nid);
- if (peer == NULL)
- return (-ENOMEM);
+ rc = kibnal_create_peer(&peer, nid);
+ if (rc != 0)
+ return rc;
write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
peer2 = kibnal_find_peer_locked (nid);
if (peer2 != NULL) {
- kib_peer_decref (peer);
+ kibnal_peer_decref (peer);
peer = peer2;
} else {
/* peer table takes existing ref on peer */
return (0);
}
-static void
-kibnal_del_peer_locked (kib_peer_t *peer, int single_share)
+void
+kibnal_del_peer_locked (kib_peer_t *peer)
{
struct list_head *ctmp;
struct list_head *cnxt;
kib_conn_t *conn;
- if (!single_share)
- peer->ibp_persistence = 0;
- else if (peer->ibp_persistence > 0)
- peer->ibp_persistence--;
-
- if (peer->ibp_persistence != 0)
- return;
+ peer->ibp_persistence = 0;
if (list_empty(&peer->ibp_conns)) {
kibnal_unlink_peer_locked(peer);
}
int
-kibnal_del_peer (ptl_nid_t nid, int single_share)
+kibnal_del_peer (lnet_nid_t nid)
{
unsigned long flags;
+ CFS_LIST_HEAD (zombies);
struct list_head *ptmp;
struct list_head *pnxt;
kib_peer_t *peer;
write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
- if (nid != PTL_NID_ANY)
+ if (nid != LNET_NID_ANY)
lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
else {
lo = 0;
list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
peer = list_entry (ptmp, kib_peer_t, ibp_list);
LASSERT (peer->ibp_persistence != 0 ||
- peer->ibp_connecting != 0 ||
+ kibnal_peer_connecting(peer) ||
!list_empty (&peer->ibp_conns));
- if (!(nid == PTL_NID_ANY || peer->ibp_nid == nid))
+ if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
continue;
- kibnal_del_peer_locked (peer, single_share);
- rc = 0; /* matched something */
+ if (!list_empty(&peer->ibp_tx_queue)) {
+ LASSERT (list_empty(&peer->ibp_conns));
- if (single_share)
- goto out;
+ list_splice_init(&peer->ibp_tx_queue, &zombies);
+ }
+
+ kibnal_del_peer_locked (peer);
+ rc = 0; /* matched something */
}
}
- out:
+
write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
+ kibnal_txlist_done(&zombies, -EIO);
+
return (rc);
}
-static kib_conn_t *
+kib_conn_t *
kibnal_get_conn_by_idx (int index)
{
kib_peer_t *peer;
list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence > 0 ||
- peer->ibp_connecting != 0 ||
+ LASSERT (peer->ibp_persistence != 0 ||
+ kibnal_peer_connecting(peer) ||
!list_empty (&peer->ibp_conns));
list_for_each (ctmp, &peer->ibp_conns) {
if (index-- > 0)
continue;
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
- CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
- conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
- atomic_read (&conn->ibc_refcount));
- atomic_inc (&conn->ibc_refcount);
- read_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- return (conn);
- }
- }
+ conn = list_entry (ctmp, kib_conn_t, ibc_list);
+ kibnal_conn_addref(conn);
+ read_unlock_irqrestore(&kibnal_data.kib_global_lock,
+ flags);
+ return (conn);
+ }
+ }
+ }
+
+ read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
+ return (NULL);
+}
+
+int
+kibnal_conn_rts(kib_conn_t *conn,
+ __u32 qpn, __u8 resp_res, __u8 init_depth, __u32 psn)
+{
+ IB_PATH_RECORD *path = &conn->ibc_cvars->cv_path;
+ IB_HANDLE qp = conn->ibc_qp;
+ IB_QP_ATTRIBUTES_MODIFY modify_attr;
+ FSTATUS frc;
+ int rc;
+
+ if (resp_res > kibnal_data.kib_hca_attrs.MaxQPResponderResources)
+ resp_res = kibnal_data.kib_hca_attrs.MaxQPResponderResources;
+
+ if (init_depth > kibnal_data.kib_hca_attrs.MaxQPInitiatorDepth)
+ init_depth = kibnal_data.kib_hca_attrs.MaxQPInitiatorDepth;
+
+ modify_attr = (IB_QP_ATTRIBUTES_MODIFY) {
+ .RequestState = QPStateReadyToRecv,
+ .RecvPSN = IBNAL_STARTING_PSN,
+ .DestQPNumber = qpn,
+ .ResponderResources = resp_res,
+ .MinRnrTimer = UsecToRnrNakTimer(2000), /* 20 ms */
+ .Attrs = (IB_QP_ATTR_RECVPSN |
+ IB_QP_ATTR_DESTQPNUMBER |
+ IB_QP_ATTR_RESPONDERRESOURCES |
+ IB_QP_ATTR_DESTAV |
+ IB_QP_ATTR_PATHMTU |
+ IB_QP_ATTR_MINRNRTIMER),
+ };
+ GetAVFromPath(0, path, &modify_attr.PathMTU, NULL,
+ &modify_attr.DestAV);
+
+ frc = iba_modify_qp(qp, &modify_attr, NULL);
+ if (frc != FSUCCESS) {
+ CERROR("Can't set QP %s ready to receive: %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
+ return -EIO;
+ }
+
+ rc = kibnal_post_receives(conn);
+ if (rc != 0) {
+ CERROR("Can't post receives for %s: %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
+ return rc;
+ }
+
+ modify_attr = (IB_QP_ATTRIBUTES_MODIFY) {
+ .RequestState = QPStateReadyToSend,
+ .FlowControl = TRUE,
+ .InitiatorDepth = init_depth,
+ .SendPSN = psn,
+ .LocalAckTimeout = path->PktLifeTime + 2, /* 2 or 1? */
+ .RetryCount = IBNAL_RETRY,
+ .RnrRetryCount = IBNAL_RNR_RETRY,
+ .Attrs = (IB_QP_ATTR_FLOWCONTROL |
+ IB_QP_ATTR_INITIATORDEPTH |
+ IB_QP_ATTR_SENDPSN |
+ IB_QP_ATTR_LOCALACKTIMEOUT |
+ IB_QP_ATTR_RETRYCOUNT |
+ IB_QP_ATTR_RNRRETRYCOUNT),
+ };
+
+ frc = iba_modify_qp(qp, &modify_attr, NULL);
+ if (frc != FSUCCESS) {
+ CERROR("Can't set QP %s ready to send: %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
+ return -EIO;
}
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- return (NULL);
+ frc = iba_query_qp(conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs, NULL);
+ if (frc != FSUCCESS) {
+ CERROR ("Can't query QP %s attributes: %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
+ return -EIO;
+ }
+
+ return 0;
}
kib_conn_t *
-kibnal_create_conn (void)
+kibnal_create_conn (lnet_nid_t nid, int proto_version)
{
kib_conn_t *conn;
int i;
- __u64 vaddr = 0;
- __u64 vaddr_base;
int page_offset;
int ipage;
int rc;
IB_QP_ATTRIBUTES_MODIFY qp_attr;
} params;
- PORTAL_ALLOC (conn, sizeof (*conn));
+ LIBCFS_ALLOC (conn, sizeof (*conn));
if (conn == NULL) {
- CERROR ("Can't allocate connection\n");
+ CERROR ("Can't allocate connection for %s\n",
+ libcfs_nid2str(nid));
return (NULL);
}
/* zero flags, NULL pointers etc... */
memset (conn, 0, sizeof (*conn));
+ conn->ibc_state = IBNAL_CONN_INIT_NOTHING;
+ conn->ibc_version = proto_version;
+ INIT_LIST_HEAD (&conn->ibc_early_rxs);
+ INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred);
INIT_LIST_HEAD (&conn->ibc_tx_queue);
+ INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd);
INIT_LIST_HEAD (&conn->ibc_active_txs);
spin_lock_init (&conn->ibc_lock);
atomic_inc (&kibnal_data.kib_nconns);
/* well not really, but I call destroy() on failure, which decrements */
- PORTAL_ALLOC (conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
- if (conn->ibc_rxs == NULL)
+ LIBCFS_ALLOC(conn->ibc_cvars, sizeof (*conn->ibc_cvars));
+ if (conn->ibc_cvars == NULL) {
+ CERROR ("Can't allocate connvars for %s\n",
+ libcfs_nid2str(nid));
goto failed;
- memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t));
+ }
+ memset(conn->ibc_cvars, 0, sizeof (*conn->ibc_cvars));
- rc = kibnal_alloc_pages(&conn->ibc_rx_pages, IBNAL_RX_MSG_PAGES, 1);
- if (rc != 0)
+ LIBCFS_ALLOC(conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
+ if (conn->ibc_rxs == NULL) {
+ CERROR("Cannot allocate RX descriptors for %s\n",
+ libcfs_nid2str(nid));
goto failed;
+ }
+ memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t));
- vaddr_base = vaddr = conn->ibc_rx_pages->ibp_vaddr;
-
+ rc = kibnal_alloc_pages(&conn->ibc_rx_pages, IBNAL_RX_MSG_PAGES);
+ if (rc != 0) {
+ CERROR("Can't allocate RX buffers for %s\n",
+ libcfs_nid2str(nid));
+ goto failed;
+ }
+
for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) {
struct page *page = conn->ibc_rx_pages->ibp_pages[ipage];
- kib_rx_t *rx = &conn->ibc_rxs[i];
+ kib_rx_t *rx = &conn->ibc_rxs[i];
rx->rx_conn = conn;
rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) +
page_offset);
- if (kibnal_whole_mem())
- rx->rx_vaddr = kibnal_page2phys(page) +
- page_offset +
- kibnal_data.kib_md.md_addr;
- else
- rx->rx_vaddr = vaddr;
-
- vaddr += IBNAL_MSG_SIZE;
- LASSERT (vaddr <= vaddr_base + IBNAL_RX_MSG_BYTES);
+ rx->rx_hca_msg = kibnal_data.kib_whole_mem.md_addr +
+ lnet_page2phys(page) + page_offset;
page_offset += IBNAL_MSG_SIZE;
LASSERT (page_offset <= PAGE_SIZE);
params.qp_create = (IB_QP_ATTRIBUTES_CREATE) {
.Type = QPTypeReliableConnected,
- .SendQDepth = IBNAL_TX_MAX_SG *
- IBNAL_MSG_QUEUE_SIZE,
- .RecvQDepth = IBNAL_MSG_QUEUE_SIZE,
+ .SendQDepth = (1 + IBNAL_MAX_RDMA_FRAGS) *
+ (*kibnal_tunables.kib_concurrent_sends),
+ .RecvQDepth = IBNAL_RX_MSGS,
.SendDSListDepth = 1,
.RecvDSListDepth = 1,
.SendCQHandle = kibnal_data.kib_cq,
.PDHandle = kibnal_data.kib_pd,
.SendSignaledCompletions = TRUE,
};
- frc = iibt_qp_create(kibnal_data.kib_hca, ¶ms.qp_create, NULL,
- &conn->ibc_qp, &conn->ibc_qp_attrs);
- if (rc != 0) {
- CERROR ("Failed to create queue pair: %d\n", rc);
+ frc = iba_create_qp(kibnal_data.kib_hca, ¶ms.qp_create, NULL,
+ &conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs);
+ if (frc != 0) {
+ CERROR ("Can't create QP %s: %d\n", libcfs_nid2str(nid), frc);
goto failed;
}
/* Mark QP created */
- conn->ibc_state = IBNAL_CONN_INIT_QP;
+ kibnal_set_conn_state(conn, IBNAL_CONN_INIT_QP);
params.qp_attr = (IB_QP_ATTRIBUTES_MODIFY) {
.RequestState = QPStateInit,
IB_QP_ATTR_ACCESSCONTROL),
.PortGUID = kibnal_data.kib_port_guid,
.PkeyIndex = 0,
- .AccessControl = {
+ .AccessControl = {
.s = {
.RdmaWrite = 1,
.RdmaRead = 1,
},
},
};
- rc = iibt_qp_modify(conn->ibc_qp, ¶ms.qp_attr, NULL);
- if (rc != 0) {
- CERROR ("Failed to modify queue pair: %d\n", rc);
+ frc = iba_modify_qp(conn->ibc_qp, ¶ms.qp_attr, NULL);
+ if (frc != 0) {
+ CERROR ("Can't set QP %s state to INIT: %d\n",
+ libcfs_nid2str(nid), frc);
+ goto failed;
+ }
+
+ frc = iba_query_qp(conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs, NULL);
+ if (frc != FSUCCESS) {
+ CERROR ("Can't query QP %s attributes: %d\n",
+ libcfs_nid2str(nid), frc);
goto failed;
}
/* 1 ref for caller */
atomic_set (&conn->ibc_refcount, 1);
+ CDEBUG(D_NET, "New conn %p\n", conn);
return (conn);
failed:
void
kibnal_destroy_conn (kib_conn_t *conn)
{
- int rc;
FSTATUS frc;
+
+ LASSERT (!in_interrupt());
- CDEBUG (D_NET, "connection %p\n", conn);
+ CDEBUG (D_NET, "connection %s\n",
+ (conn->ibc_peer) == NULL ? "<ANON>" :
+ libcfs_nid2str(conn->ibc_peer->ibp_nid));
LASSERT (atomic_read (&conn->ibc_refcount) == 0);
+ LASSERT (list_empty(&conn->ibc_early_rxs));
LASSERT (list_empty(&conn->ibc_tx_queue));
+ LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd));
+ LASSERT (list_empty(&conn->ibc_tx_queue_nocred));
LASSERT (list_empty(&conn->ibc_active_txs));
LASSERT (conn->ibc_nsends_posted == 0);
- LASSERT (conn->ibc_connreq == NULL);
switch (conn->ibc_state) {
- case IBNAL_CONN_DISCONNECTED:
- /* called after connection sequence initiated */
- /* fall through */
-
- case IBNAL_CONN_INIT_QP:
- /* _destroy includes an implicit Reset of the QP which
- * discards posted work */
- rc = iibt_qp_destroy(conn->ibc_qp);
- if (rc != 0)
- CERROR("Can't destroy QP: %d\n", rc);
- /* fall through */
-
case IBNAL_CONN_INIT_NOTHING:
+ case IBNAL_CONN_INIT_QP:
+ case IBNAL_CONN_DISCONNECTED:
break;
default:
- LASSERT (0);
+ /* conn must either have never engaged with the CM, or have
+ * completely disengaged from it */
+ CERROR("Bad conn %s state %d\n",
+ (conn->ibc_peer) == NULL ? "<anon>" :
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), conn->ibc_state);
+ LBUG();
}
if (conn->ibc_cep != NULL) {
- frc = iibt_cm_destroy_cep(conn->ibc_cep);
- if (frc != 0)
- CERROR("Can't destroy CEP %p: %d\n", conn->ibc_cep,
- frc);
+ frc = iba_cm_destroy_cep(conn->ibc_cep);
+ if (frc != FSUCCESS)
+ CERROR("Error destroying CEP %p: %d\n",
+ conn->ibc_cep, frc);
+ }
+
+ if (conn->ibc_qp != NULL) {
+ frc = iba_destroy_qp(conn->ibc_qp);
+ if (frc != FSUCCESS)
+ CERROR("Error destroying QP %p: %d\n",
+ conn->ibc_qp, frc);
}
if (conn->ibc_rx_pages != NULL)
kibnal_free_pages(conn->ibc_rx_pages);
if (conn->ibc_rxs != NULL)
- PORTAL_FREE(conn->ibc_rxs,
+ LIBCFS_FREE(conn->ibc_rxs,
IBNAL_RX_MSGS * sizeof(kib_rx_t));
+ if (conn->ibc_cvars != NULL)
+ LIBCFS_FREE(conn->ibc_cvars, sizeof(*conn->ibc_cvars));
+
if (conn->ibc_peer != NULL)
- kib_peer_decref(conn->ibc_peer);
+ kibnal_peer_decref(conn->ibc_peer);
- PORTAL_FREE(conn, sizeof (*conn));
+ LIBCFS_FREE(conn, sizeof (*conn));
atomic_dec(&kibnal_data.kib_nconns);
-
- if (atomic_read (&kibnal_data.kib_nconns) == 0 &&
- kibnal_data.kib_shutdown) {
- /* I just nuked the last connection on shutdown; wake up
- * everyone so they can exit. */
- wake_up_all(&kibnal_data.kib_sched_waitq);
- wake_up_all(&kibnal_data.kib_connd_waitq);
- }
-}
-
-void
-kibnal_put_conn (kib_conn_t *conn)
-{
- unsigned long flags;
-
- CDEBUG (D_NET, "putting conn[%p] state %d -> "LPX64" (%d)\n",
- conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
- atomic_read (&conn->ibc_refcount));
-
- LASSERT (atomic_read (&conn->ibc_refcount) > 0);
- if (!atomic_dec_and_test (&conn->ibc_refcount))
- return;
-
- /* must disconnect before dropping the final ref */
- LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECTED);
-
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
-
- list_add (&conn->ibc_list, &kibnal_data.kib_connd_conns);
- wake_up (&kibnal_data.kib_connd_waitq);
-
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
}
-static int
+int
kibnal_close_peer_conns_locked (kib_peer_t *peer, int why)
{
kib_conn_t *conn;
if (conn->ibc_incarnation == incarnation)
continue;
- CDEBUG(D_NET, "Closing stale conn nid:"LPX64" incarnation:"LPX64"("LPX64")\n",
- peer->ibp_nid, conn->ibc_incarnation, incarnation);
+ CDEBUG(D_NET, "Closing stale conn nid:%s incarnation:"LPX64"("LPX64")\n",
+ libcfs_nid2str(peer->ibp_nid),
+ conn->ibc_incarnation, incarnation);
count++;
kibnal_close_conn_locked (conn, -ESTALE);
return (count);
}
-static int
-kibnal_close_matching_conns (ptl_nid_t nid)
+int
+kibnal_close_matching_conns (lnet_nid_t nid)
{
unsigned long flags;
kib_peer_t *peer;
write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
- if (nid != PTL_NID_ANY)
+ if (nid != LNET_NID_ANY)
lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
else {
lo = 0;
peer = list_entry (ptmp, kib_peer_t, ibp_list);
LASSERT (peer->ibp_persistence != 0 ||
- peer->ibp_connecting != 0 ||
+ kibnal_peer_connecting(peer) ||
!list_empty (&peer->ibp_conns));
- if (!(nid == PTL_NID_ANY || nid == peer->ibp_nid))
+ if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
continue;
count += kibnal_close_peer_conns_locked (peer, 0);
write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
/* wildcards always succeed */
- if (nid == PTL_NID_ANY)
+ if (nid == LNET_NID_ANY)
return (0);
return (count == 0 ? -ENOENT : 0);
}
-static int
-kibnal_cmd(struct portals_cfg *pcfg, void * private)
+int
+kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
{
- int rc = -EINVAL;
+ struct libcfs_ioctl_data *data = arg;
+ int rc = -EINVAL;
ENTRY;
- LASSERT (pcfg != NULL);
+ LASSERT (ni == kibnal_data.kib_ni);
- switch(pcfg->pcfg_command) {
- case NAL_CMD_GET_PEER: {
- ptl_nid_t nid = 0;
- int share_count = 0;
+ switch(cmd) {
+ case IOC_LIBCFS_GET_PEER: {
+ lnet_nid_t nid = 0;
+ int share_count = 0;
- rc = kibnal_get_peer_info(pcfg->pcfg_count,
+ rc = kibnal_get_peer_info(data->ioc_count,
&nid, &share_count);
- pcfg->pcfg_nid = nid;
- pcfg->pcfg_size = 0;
- pcfg->pcfg_id = 0;
- pcfg->pcfg_misc = 0;
- pcfg->pcfg_count = 0;
- pcfg->pcfg_wait = share_count;
+ data->ioc_nid = nid;
+ data->ioc_count = share_count;
break;
}
- case NAL_CMD_ADD_PEER: {
- rc = kibnal_add_persistent_peer (pcfg->pcfg_nid);
+ case IOC_LIBCFS_ADD_PEER: {
+ rc = kibnal_add_persistent_peer (data->ioc_nid);
break;
}
- case NAL_CMD_DEL_PEER: {
- rc = kibnal_del_peer (pcfg->pcfg_nid,
- /* flags == single_share */
- pcfg->pcfg_flags != 0);
+ case IOC_LIBCFS_DEL_PEER: {
+ rc = kibnal_del_peer (data->ioc_nid);
break;
}
- case NAL_CMD_GET_CONN: {
- kib_conn_t *conn = kibnal_get_conn_by_idx (pcfg->pcfg_count);
+ case IOC_LIBCFS_GET_CONN: {
+ kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count);
if (conn == NULL)
rc = -ENOENT;
else {
rc = 0;
- pcfg->pcfg_nid = conn->ibc_peer->ibp_nid;
- pcfg->pcfg_id = 0;
- pcfg->pcfg_misc = 0;
- pcfg->pcfg_flags = 0;
- kibnal_put_conn (conn);
+ data->ioc_nid = conn->ibc_peer->ibp_nid;
+ kibnal_conn_decref(conn);
}
break;
}
- case NAL_CMD_CLOSE_CONNECTION: {
- rc = kibnal_close_matching_conns (pcfg->pcfg_nid);
+ case IOC_LIBCFS_CLOSE_CONNECTION: {
+ rc = kibnal_close_matching_conns (data->ioc_nid);
break;
}
- case NAL_CMD_REGISTER_MYNID: {
- if (pcfg->pcfg_nid == PTL_NID_ANY)
+ case IOC_LIBCFS_REGISTER_MYNID: {
+ if (ni->ni_nid == data->ioc_nid) {
+ rc = 0;
+ } else {
+ CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
+ libcfs_nid2str(data->ioc_nid),
+ libcfs_nid2str(ni->ni_nid));
rc = -EINVAL;
- else
- rc = kibnal_set_mynid (pcfg->pcfg_nid);
+ }
break;
}
}
kibnal_free_pages (kib_pages_t *p)
{
int npages = p->ibp_npages;
- int rc;
int i;
- if (p->ibp_mapped) {
- rc = iibt_deregister_memory(p->ibp_handle);
- if (rc != 0)
- CERROR ("Deregister error: %d\n", rc);
- }
-
for (i = 0; i < npages; i++)
if (p->ibp_pages[i] != NULL)
__free_page(p->ibp_pages[i]);
- PORTAL_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
+ LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
}
int
-kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write)
+kibnal_alloc_pages (kib_pages_t **pp, int npages)
{
- kib_pages_t *p;
- __u64 *phys_pages;
- int i;
- FSTATUS frc;
- IB_ACCESS_CONTROL access;
-
- memset(&access, 0, sizeof(access));
- access.s.MWBindable = 1;
- access.s.LocalWrite = 1;
- access.s.RdmaRead = 1;
- access.s.RdmaWrite = 1;
+ kib_pages_t *p;
+ int i;
- PORTAL_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
+ LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
if (p == NULL) {
CERROR ("Can't allocate buffer %d\n", npages);
return (-ENOMEM);
}
}
- if (kibnal_whole_mem())
- goto out;
-
- PORTAL_ALLOC(phys_pages, npages * sizeof(*phys_pages));
- if (phys_pages == NULL) {
- CERROR ("Can't allocate physarray for %d pages\n", npages);
- /* XXX free ibp_pages? */
- kibnal_free_pages(p);
- return (-ENOMEM);
- }
+ *pp = p;
+ return (0);
+}
- /* if we were using the _contig_ registration variant we would have
- * an array of PhysAddr/Length pairs, but the discontiguous variant
- * just takes the PhysAddr */
- for (i = 0; i < npages; i++)
- phys_pages[i] = kibnal_page2phys(p->ibp_pages[i]);
-
- frc = iibt_register_physical_memory(kibnal_data.kib_hca,
- 0, /* requested vaddr */
- phys_pages, npages,
- 0, /* offset */
- kibnal_data.kib_pd,
- access,
- &p->ibp_handle, &p->ibp_vaddr,
- &p->ibp_lkey, &p->ibp_rkey);
+int
+kibnal_alloc_tx_descs (void)
+{
+ int i;
- PORTAL_FREE(phys_pages, npages * sizeof(*phys_pages));
+ LIBCFS_ALLOC (kibnal_data.kib_tx_descs,
+ IBNAL_TX_MSGS() * sizeof(kib_tx_t));
+ if (kibnal_data.kib_tx_descs == NULL)
+ return -ENOMEM;
- if (frc != FSUCCESS) {
- CERROR ("Error %d mapping %d pages\n", frc, npages);
- kibnal_free_pages(p);
- return (-ENOMEM);
+ memset(kibnal_data.kib_tx_descs, 0,
+ IBNAL_TX_MSGS() * sizeof(kib_tx_t));
+
+ for (i = 0; i < IBNAL_TX_MSGS(); i++) {
+ kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
+
+#if IBNAL_USE_FMR
+ LIBCFS_ALLOC(tx->tx_pages, LNET_MAX_IOV *
+ sizeof(*tx->tx_pages));
+ if (tx->tx_pages == NULL)
+ return -ENOMEM;
+#else
+ LIBCFS_ALLOC(tx->tx_wrq,
+ (1 + IBNAL_MAX_RDMA_FRAGS) *
+ sizeof(*tx->tx_wrq));
+ if (tx->tx_wrq == NULL)
+ return -ENOMEM;
+
+ LIBCFS_ALLOC(tx->tx_gl,
+ (1 + IBNAL_MAX_RDMA_FRAGS) *
+ sizeof(*tx->tx_gl));
+ if (tx->tx_gl == NULL)
+ return -ENOMEM;
+
+ LIBCFS_ALLOC(tx->tx_rd,
+ offsetof(kib_rdma_desc_t,
+ rd_frags[IBNAL_MAX_RDMA_FRAGS]));
+ if (tx->tx_rd == NULL)
+ return -ENOMEM;
+#endif
}
- CDEBUG(D_NET, "registered %d pages; handle: %p vaddr "LPX64" "
- "lkey %x rkey %x\n", npages, p->ibp_handle,
- p->ibp_vaddr, p->ibp_lkey, p->ibp_rkey);
-
- p->ibp_mapped = 1;
-out:
- *pp = p;
- return (0);
+ return 0;
+}
+
+void
+kibnal_free_tx_descs (void)
+{
+ int i;
+
+ if (kibnal_data.kib_tx_descs == NULL)
+ return;
+
+ for (i = 0; i < IBNAL_TX_MSGS(); i++) {
+ kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
+
+#if IBNAL_USE_FMR
+ if (tx->tx_pages != NULL)
+ LIBCFS_FREE(tx->tx_pages, LNET_MAX_IOV *
+ sizeof(*tx->tx_pages));
+#else
+ if (tx->tx_wrq != NULL)
+ LIBCFS_FREE(tx->tx_wrq,
+ (1 + IBNAL_MAX_RDMA_FRAGS) *
+ sizeof(*tx->tx_wrq));
+
+ if (tx->tx_gl != NULL)
+ LIBCFS_FREE(tx->tx_gl,
+ (1 + IBNAL_MAX_RDMA_FRAGS) *
+ sizeof(*tx->tx_gl));
+
+ if (tx->tx_rd != NULL)
+ LIBCFS_FREE(tx->tx_rd,
+ offsetof(kib_rdma_desc_t,
+ rd_frags[IBNAL_MAX_RDMA_FRAGS]));
+#endif
+ }
+
+ LIBCFS_FREE(kibnal_data.kib_tx_descs,
+ IBNAL_TX_MSGS() * sizeof(kib_tx_t));
}
-static int
+int
kibnal_setup_tx_descs (void)
{
int ipage = 0;
int page_offset = 0;
- __u64 vaddr;
- __u64 vaddr_base;
struct page *page;
kib_tx_t *tx;
int i;
int rc;
/* pre-mapped messages are not bigger than 1 page */
- LASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
+ CLASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
/* No fancy arithmetic when we do the buffer calculations */
- LASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
+ CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
- rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, IBNAL_TX_MSG_PAGES,
- 0);
+ rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages,
+ IBNAL_TX_MSG_PAGES());
if (rc != 0)
return (rc);
- /* ignored for the whole_mem case */
- vaddr = vaddr_base = kibnal_data.kib_tx_pages->ibp_vaddr;
-
- for (i = 0; i < IBNAL_TX_MSGS; i++) {
+ for (i = 0; i < IBNAL_TX_MSGS(); i++) {
page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
tx = &kibnal_data.kib_tx_descs[i];
- memset (tx, 0, sizeof(*tx)); /* zero flags etc */
-
+#if IBNAL_USE_FMR
+ /* Allocate an FMR for this TX so it can map src/sink buffers
+ * for large transfers */
+#endif
tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) +
page_offset);
- if (kibnal_whole_mem())
- tx->tx_vaddr = kibnal_page2phys(page) +
- page_offset +
- kibnal_data.kib_md.md_addr;
- else
- tx->tx_vaddr = vaddr;
-
- tx->tx_isnblk = (i >= IBNAL_NTX);
- tx->tx_mapped = KIB_TX_UNMAPPED;
+ tx->tx_hca_msg = kibnal_data.kib_whole_mem.md_addr +
+ lnet_page2phys(page) + page_offset;
CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n",
- i, tx, tx->tx_msg, tx->tx_vaddr);
+ i, tx, tx->tx_msg, tx->tx_hca_msg);
- if (tx->tx_isnblk)
- list_add (&tx->tx_list,
- &kibnal_data.kib_idle_nblk_txs);
- else
- list_add (&tx->tx_list,
- &kibnal_data.kib_idle_txs);
-
- vaddr += IBNAL_MSG_SIZE;
- LASSERT (vaddr <= vaddr_base + IBNAL_TX_MSG_BYTES);
+ list_add (&tx->tx_list, &kibnal_data.kib_idle_txs);
page_offset += IBNAL_MSG_SIZE;
LASSERT (page_offset <= PAGE_SIZE);
if (page_offset == PAGE_SIZE) {
page_offset = 0;
ipage++;
- LASSERT (ipage <= IBNAL_TX_MSG_PAGES);
+ LASSERT (ipage <= IBNAL_TX_MSG_PAGES());
}
}
return (0);
}
-static void
-kibnal_api_shutdown (nal_t *nal)
+int
+kibnal_register_all_memory(void)
{
- int i;
- int rc;
+ /* CAVEAT EMPTOR: this assumes all physical memory is in 1 contiguous
+ * chunk starting at 0 */
+ struct sysinfo si;
+ __u64 total;
+ __u64 total2;
+ __u64 roundup = (128<<20); /* round up in big chunks */
+ IB_MR_PHYS_BUFFER phys;
+ IB_ACCESS_CONTROL access;
+ FSTATUS frc;
- if (nal->nal_refct != 0) {
- /* This module got the first ref */
- PORTAL_MODULE_UNUSE;
- return;
+ memset(&access, 0, sizeof(access));
+ access.s.MWBindable = 1;
+ access.s.LocalWrite = 1;
+ access.s.RdmaRead = 1;
+ access.s.RdmaWrite = 1;
+
+ /* XXX we don't bother with first-gen cards */
+ if (kibnal_data.kib_hca_attrs.VendorId == 0xd0b7 &&
+ kibnal_data.kib_hca_attrs.DeviceId == 0x3101) {
+ CERROR("Can't register all memory on first generation HCAs\n");
+ return -EINVAL;
}
- CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
- atomic_read (&portal_kmemory));
+ si_meminfo(&si);
+
+ CDEBUG(D_NET, "si_meminfo: %lu/%u, num_physpages %lu/%lu\n",
+ si.totalram, si.mem_unit, num_physpages, PAGE_SIZE);
+
+ total = ((__u64)si.totalram) * si.mem_unit;
+ total2 = num_physpages * PAGE_SIZE;
+ if (total < total2)
+ total = total2;
+
+ if (total == 0) {
+ CERROR("Can't determine memory size\n");
+ return -ENOMEM;
+ }
+
+ roundup = (128<<20);
+ total = (total + (roundup - 1)) & ~(roundup - 1);
+
+ phys.PhysAddr = 0;
+ phys.Length = total;
- LASSERT(nal == &kibnal_api);
+ frc = iba_register_contig_pmr(kibnal_data.kib_hca, 0, &phys, 1, 0,
+ kibnal_data.kib_pd, access,
+ &kibnal_data.kib_whole_mem.md_handle,
+ &kibnal_data.kib_whole_mem.md_addr,
+ &kibnal_data.kib_whole_mem.md_lkey,
+ &kibnal_data.kib_whole_mem.md_rkey);
+
+ if (frc != FSUCCESS) {
+ CERROR("registering physical memory failed: %d\n", frc);
+ return -EIO;
+ }
+
+ CDEBUG(D_WARNING, "registered phys mem from 0("LPX64") for "LPU64"("LPU64") -> "LPX64"\n",
+ phys.PhysAddr, total, phys.Length, kibnal_data.kib_whole_mem.md_addr);
+
+ return 0;
+}
+
+void
+kibnal_shutdown (lnet_ni_t *ni)
+{
+ int i;
+ int rc;
+
+ LASSERT (ni == kibnal_data.kib_ni);
+ LASSERT (ni->ni_data == &kibnal_data);
+
+ CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
+ atomic_read (&libcfs_kmemory));
switch (kibnal_data.kib_init) {
default:
LBUG();
case IBNAL_INIT_ALL:
- /* stop calls to nal_cmd */
- libcfs_nal_cmd_unregister(IIBNAL);
- /* No new peers */
+ /* stop accepting connections, prevent new peers and start to
+ * tear down all existing ones... */
+ kibnal_stop_listener(1);
- /* resetting my NID to unadvertises me, removes my
- * listener and nukes all current peers */
- kibnal_set_mynid (PTL_NID_ANY);
-
- /* Wait for all peer state to clean up (crazy) */
+ /* Wait for all peer state to clean up */
i = 2;
while (atomic_read (&kibnal_data.kib_npeers) != 0) {
i++;
CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "waiting for %d peers to disconnect (can take a few seconds)\n",
+ "waiting for %d peers to disconnect\n",
atomic_read (&kibnal_data.kib_npeers));
set_current_state (TASK_UNINTERRUPTIBLE);
schedule_timeout (HZ);
/* fall through */
case IBNAL_INIT_CQ:
- rc = iibt_cq_destroy(kibnal_data.kib_cq);
+ rc = iba_destroy_cq(kibnal_data.kib_cq);
if (rc != 0)
CERROR ("Destroy CQ error: %d\n", rc);
/* fall through */
kibnal_free_pages (kibnal_data.kib_tx_pages);
/* fall through */
- case IBNAL_INIT_MR:
- if (kibnal_data.kib_md.md_handle != NULL) {
- rc = iibt_deregister_memory(kibnal_data.kib_md.md_handle);
- if (rc != FSUCCESS)
- CERROR ("Deregister memory: %d\n", rc);
- }
+ case IBNAL_INIT_MD:
+ rc = iba_deregister_mr(kibnal_data.kib_whole_mem.md_handle);
+ if (rc != FSUCCESS)
+ CERROR ("Deregister memory: %d\n", rc);
/* fall through */
-#if IBNAL_FMR
- case IBNAL_INIT_FMR:
- rc = ib_fmr_pool_destroy (kibnal_data.kib_fmr_pool);
- if (rc != 0)
- CERROR ("Destroy FMR pool error: %d\n", rc);
- /* fall through */
-#endif
case IBNAL_INIT_PD:
- rc = iibt_pd_free(kibnal_data.kib_pd);
+ rc = iba_free_pd(kibnal_data.kib_pd);
if (rc != 0)
CERROR ("Destroy PD error: %d\n", rc);
/* fall through */
case IBNAL_INIT_SD:
- rc = iibt_sd_deregister(kibnal_data.kib_sd);
+ rc = iba_sd_deregister(kibnal_data.kib_sd);
if (rc != 0)
CERROR ("Deregister SD error: %d\n", rc);
/* fall through */
- case IBNAL_INIT_PORT:
- /* XXX ??? */
- /* fall through */
-
case IBNAL_INIT_PORTATTRS:
- PORTAL_FREE(kibnal_data.kib_hca_attrs.PortAttributesList,
+ LIBCFS_FREE(kibnal_data.kib_hca_attrs.PortAttributesList,
kibnal_data.kib_hca_attrs.PortAttributesListSize);
/* fall through */
case IBNAL_INIT_HCA:
- rc = iibt_close_hca(kibnal_data.kib_hca);
+ rc = iba_close_ca(kibnal_data.kib_hca);
if (rc != 0)
CERROR ("Close HCA error: %d\n", rc);
/* fall through */
- case IBNAL_INIT_LIB:
- lib_fini(&kibnal_lib);
- /* fall through */
-
case IBNAL_INIT_DATA:
- /* Module refcount only gets to zero when all peers
- * have been closed so all lists must be empty */
LASSERT (atomic_read (&kibnal_data.kib_npeers) == 0);
LASSERT (kibnal_data.kib_peers != NULL);
for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
LASSERT (list_empty (&kibnal_data.kib_peers[i]));
}
LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0);
- LASSERT (list_empty (&kibnal_data.kib_sched_rxq));
- LASSERT (list_empty (&kibnal_data.kib_sched_txq));
+ LASSERT (list_empty (&kibnal_data.kib_connd_zombies));
LASSERT (list_empty (&kibnal_data.kib_connd_conns));
LASSERT (list_empty (&kibnal_data.kib_connd_peers));
break;
}
- if (kibnal_data.kib_tx_descs != NULL)
- PORTAL_FREE (kibnal_data.kib_tx_descs,
- IBNAL_TX_MSGS * sizeof(kib_tx_t));
+ kibnal_free_tx_descs();
if (kibnal_data.kib_peers != NULL)
- PORTAL_FREE (kibnal_data.kib_peers,
+ LIBCFS_FREE (kibnal_data.kib_peers,
sizeof (struct list_head) *
kibnal_data.kib_peer_hash_size);
CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
- atomic_read (&portal_kmemory));
- printk(KERN_INFO "Lustre: Infinicon IB NAL unloaded (final mem %d)\n",
- atomic_read(&portal_kmemory));
+ atomic_read (&libcfs_kmemory));
kibnal_data.kib_init = IBNAL_INIT_NOTHING;
+ PORTAL_MODULE_UNUSE;
}
-#define roundup_power(val, power) \
- ( (val + (__u64)(power - 1)) & ~((__u64)(power - 1)) )
-
-/* this isn't very portable or sturdy in the face of funny mem/bus configs */
-static __u64 max_phys_mem(IB_CA_ATTRIBUTES *ca_attr)
+int
+kibnal_get_ipif_name(char *ifname, int ifname_size, int idx)
{
- struct sysinfo si;
- __u64 ret;
+ char *basename = *kibnal_tunables.kib_ipif_basename;
+ int n = strlen(basename);
+ int baseidx;
+ int m;
- /* XXX we don't bother with first-gen cards */
- if (ca_attr->VendorId == 0xd0b7 && ca_attr->DeviceId == 0x3101)
- return 0ULL;
+ if (n == 0) { /* empty string */
+ CERROR("Empty IP interface basename specified\n");
+ return -EINVAL;
+ }
- si_meminfo(&si);
- ret = (__u64)max(si.totalram, max_mapnr) * si.mem_unit;
- return roundup_power(ret, 128 * 1024 * 1024);
-}
-#undef roundup_power
-
-static int
-kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
- ptl_ni_limits_t *requested_limits,
- ptl_ni_limits_t *actual_limits)
+ for (m = n; m > 0; m--) /* find max numeric postfix */
+ if (sscanf(basename + m - 1, "%d", &baseidx) != 1)
+ break;
+
+ if (m == 0) /* just a number */
+ m = n;
+
+ if (m == n) /* no postfix */
+ baseidx = 1; /* default to 1 */
+
+ if (m >= ifname_size)
+ m = ifname_size - 1;
+
+ memcpy(ifname, basename, m); /* copy prefix name */
+
+ snprintf(ifname + m, ifname_size - m, "%d", baseidx + idx);
+
+ if (strlen(ifname) == ifname_size - 1) {
+ CERROR("IP interface basename %s too long\n", basename);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int
+kibnal_startup (lnet_ni_t *ni)
{
- ptl_process_id_t process_id;
- int pkmem = atomic_read(&portal_kmemory);
+ char ipif_name[32];
+ __u32 ip;
+ __u32 netmask;
+ int up;
+ int nob;
+ struct timeval tv;
IB_PORT_ATTRIBUTES *pattr;
FSTATUS frc;
int rc;
- int n;
+ __u32 n;
int i;
- LASSERT (nal == &kibnal_api);
+ LASSERT (ni->ni_lnd == &the_kiblnd);
- if (nal->nal_refct != 0) {
- if (actual_limits != NULL)
- *actual_limits = kibnal_lib.libnal_ni.ni_actual_limits;
- /* This module got the first ref */
- PORTAL_MODULE_USE;
- return (PTL_OK);
+ /* Only 1 instance supported */
+ if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) {
+ CERROR ("Only 1 instance supported\n");
+ return -EPERM;
}
- LASSERT (kibnal_data.kib_init == IBNAL_INIT_NOTHING);
+ if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) {
+ CERROR ("Can't set credits(%d) > ntx(%d)\n",
+ *kibnal_tunables.kib_credits,
+ *kibnal_tunables.kib_ntx);
+ return -EINVAL;
+ }
- frc = IbtGetInterfaceByVersion(IBT_INTERFACE_VERSION_2,
- &kibnal_data.kib_interfaces);
- if (frc != FSUCCESS) {
- CERROR("IbtGetInterfaceByVersion(IBT_INTERFACE_VERSION_2) = %d\n",
- frc);
- return -ENOSYS;
+ ni->ni_maxtxcredits = *kibnal_tunables.kib_credits;
+ ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits;
+
+ CLASSERT (LNET_MAX_INTERFACES > 1);
+
+ if (ni->ni_interfaces[0] == NULL) {
+ kibnal_data.kib_hca_idx = 0;
+ } else {
+ /* Use the HCA specified in 'networks=' */
+ if (ni->ni_interfaces[1] != NULL) {
+ CERROR("Multiple interfaces not supported\n");
+ return -EPERM;
+ }
+
+ /* Parse <number> into kib_hca_idx */
+ nob = strlen(ni->ni_interfaces[0]);
+ if (sscanf(ni->ni_interfaces[0], "%d%n",
+ &kibnal_data.kib_hca_idx, &nob) < 1 ||
+ nob != strlen(ni->ni_interfaces[0])) {
+ CERROR("Can't parse interface '%s'\n",
+ ni->ni_interfaces[0]);
+ return -EINVAL;
+ }
+ }
+
+ rc = kibnal_get_ipif_name(ipif_name, sizeof(ipif_name),
+ kibnal_data.kib_hca_idx);
+ if (rc != 0)
+ return rc;
+
+ rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask);
+ if (rc != 0) {
+ CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc);
+ return -ENETDOWN;
+ }
+
+ if (!up) {
+ CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name);
+ return -ENETDOWN;
}
+
+ ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip);
+
+ ni->ni_data = &kibnal_data;
+ kibnal_data.kib_ni = ni;
+
+ do_gettimeofday(&tv);
+ kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
- init_MUTEX (&kibnal_data.kib_nid_mutex);
- init_MUTEX_LOCKED (&kibnal_data.kib_nid_signal);
- kibnal_data.kib_nid = PTL_NID_ANY;
+ PORTAL_MODULE_USE;
rwlock_init(&kibnal_data.kib_global_lock);
kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE;
- PORTAL_ALLOC (kibnal_data.kib_peers,
+ LIBCFS_ALLOC (kibnal_data.kib_peers,
sizeof (struct list_head) * kibnal_data.kib_peer_hash_size);
if (kibnal_data.kib_peers == NULL) {
goto failed;
spin_lock_init (&kibnal_data.kib_connd_lock);
INIT_LIST_HEAD (&kibnal_data.kib_connd_peers);
INIT_LIST_HEAD (&kibnal_data.kib_connd_conns);
+ INIT_LIST_HEAD (&kibnal_data.kib_connd_zombies);
init_waitqueue_head (&kibnal_data.kib_connd_waitq);
spin_lock_init (&kibnal_data.kib_sched_lock);
- INIT_LIST_HEAD (&kibnal_data.kib_sched_txq);
- INIT_LIST_HEAD (&kibnal_data.kib_sched_rxq);
init_waitqueue_head (&kibnal_data.kib_sched_waitq);
spin_lock_init (&kibnal_data.kib_tx_lock);
INIT_LIST_HEAD (&kibnal_data.kib_idle_txs);
- INIT_LIST_HEAD (&kibnal_data.kib_idle_nblk_txs);
- init_waitqueue_head(&kibnal_data.kib_idle_tx_waitq);
- PORTAL_ALLOC (kibnal_data.kib_tx_descs,
- IBNAL_TX_MSGS * sizeof(kib_tx_t));
- if (kibnal_data.kib_tx_descs == NULL) {
- CERROR ("Can't allocate tx descs\n");
+ rc = kibnal_alloc_tx_descs();
+ if (rc != 0) {
+ CERROR("Can't allocate tx descs\n");
goto failed;
}
kibnal_data.kib_init = IBNAL_INIT_DATA;
/*****************************************************/
- process_id.pid = requested_pid;
- process_id.nid = kibnal_data.kib_nid;
-
- rc = lib_init(&kibnal_lib, nal, process_id,
- requested_limits, actual_limits);
- if (rc != PTL_OK) {
- CERROR("lib_init failed: error %d\n", rc);
- goto failed;
- }
-
- /* lib interface initialised */
- kibnal_data.kib_init = IBNAL_INIT_LIB;
- /*****************************************************/
+ kibnal_data.kib_sdretry.RetryCount = *kibnal_tunables.kib_sd_retries;
+ kibnal_data.kib_sdretry.Timeout = (*kibnal_tunables.kib_timeout * 1000)/
+ *kibnal_tunables.kib_sd_retries;
for (i = 0; i < IBNAL_N_SCHED; i++) {
- rc = kibnal_thread_start (kibnal_scheduler, (void *)i);
+ rc = kibnal_thread_start (kibnal_scheduler,
+ (void *)(unsigned long)i);
if (rc != 0) {
- CERROR("Can't spawn iibnal scheduler[%d]: %d\n",
+ CERROR("Can't spawn iib scheduler[%d]: %d\n",
i, rc);
goto failed;
}
rc = kibnal_thread_start (kibnal_connd, NULL);
if (rc != 0) {
- CERROR ("Can't spawn iibnal connd: %d\n", rc);
+ CERROR ("Can't spawn iib connd: %d\n", rc);
goto failed;
}
n = sizeof(kibnal_data.kib_hca_guids) /
sizeof(kibnal_data.kib_hca_guids[0]);
- frc = iibt_get_hca_guids(&n, kibnal_data.kib_hca_guids);
+ frc = iba_get_caguids(&n, kibnal_data.kib_hca_guids);
if (frc != FSUCCESS) {
- CERROR ("Can't get channel adapter guids: %d\n", frc);
+ CERROR ("Can't get HCA guids: %d\n", frc);
goto failed;
}
+
if (n == 0) {
- CERROR ("No channel adapters found\n");
+ CERROR ("No HCAs found\n");
goto failed;
}
- /* Infinicon has per-HCA rather than per CQ completion handlers */
- frc = iibt_open_hca(kibnal_data.kib_hca_guids[0],
- kibnal_ca_callback,
- kibnal_ca_async_callback,
- &kibnal_data.kib_hca,
+ if (n <= kibnal_data.kib_hca_idx) {
+ CERROR("Invalid HCA %d requested: (must be 0 - %d inclusive)\n",
+ kibnal_data.kib_hca_idx, n - 1);
+ goto failed;
+ }
+
+ /* Infinicon has per-HCA notification callbacks */
+ frc = iba_open_ca(kibnal_data.kib_hca_guids[kibnal_data.kib_hca_idx],
+ kibnal_hca_callback,
+ kibnal_hca_async_callback,
+ NULL,
&kibnal_data.kib_hca);
if (frc != FSUCCESS) {
- CERROR ("Can't open CA[0]: %d\n", frc);
+ CERROR ("Can't open HCA[%d]: %d\n",
+ kibnal_data.kib_hca_idx, frc);
goto failed;
}
kibnal_data.kib_hca_attrs.PortAttributesList = NULL;
kibnal_data.kib_hca_attrs.PortAttributesListSize = 0;
- frc = iibt_query_hca(kibnal_data.kib_hca,
- &kibnal_data.kib_hca_attrs, NULL);
+ frc = iba_query_ca(kibnal_data.kib_hca,
+ &kibnal_data.kib_hca_attrs, NULL);
if (frc != FSUCCESS) {
CERROR ("Can't size port attrs: %d\n", frc);
goto failed;
}
- PORTAL_ALLOC(kibnal_data.kib_hca_attrs.PortAttributesList,
+ LIBCFS_ALLOC(kibnal_data.kib_hca_attrs.PortAttributesList,
kibnal_data.kib_hca_attrs.PortAttributesListSize);
if (kibnal_data.kib_hca_attrs.PortAttributesList == NULL)
goto failed;
kibnal_data.kib_init = IBNAL_INIT_PORTATTRS;
/*****************************************************/
- frc = iibt_query_hca(kibnal_data.kib_hca, &kibnal_data.kib_hca_attrs,
- NULL);
+ frc = iba_query_ca(kibnal_data.kib_hca, &kibnal_data.kib_hca_attrs,
+ NULL);
if (frc != FSUCCESS) {
- CERROR ("Can't get port attrs for CA 0: %d\n", frc);
+ CERROR ("Can't get port attrs for HCA %d: %d\n",
+ kibnal_data.kib_hca_idx, frc);
goto failed;
}
CDEBUG(D_NET, "got guid "LPX64"\n", kibnal_data.kib_port_guid);
- /* Active port found */
- kibnal_data.kib_init = IBNAL_INIT_PORT;
- /*****************************************************/
-
- frc = iibt_sd_register(&kibnal_data.kib_sd, NULL);
+ frc = iba_sd_register(&kibnal_data.kib_sd, NULL);
if (frc != FSUCCESS) {
CERROR ("Can't register with SD: %d\n", frc);
goto failed;
kibnal_data.kib_init = IBNAL_INIT_SD;
/*****************************************************/
- frc = iibt_pd_allocate(kibnal_data.kib_hca, 0, &kibnal_data.kib_pd);
+ frc = iba_alloc_pd(kibnal_data.kib_hca, 0, &kibnal_data.kib_pd);
if (frc != FSUCCESS) {
CERROR ("Can't create PD: %d\n", rc);
goto failed;
kibnal_data.kib_init = IBNAL_INIT_PD;
/*****************************************************/
-#if IBNAL_FMR
- {
- const int pool_size = IBNAL_NTX + IBNAL_NTX_NBLK;
- struct ib_fmr_pool_param params = {
- .max_pages_per_fmr = PTL_MTU/PAGE_SIZE,
- .access = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ),
- .pool_size = pool_size,
- .dirty_watermark = (pool_size * 3)/4,
- .flush_function = NULL,
- .flush_arg = NULL,
- .cache = 1,
- };
- rc = ib_fmr_pool_create(kibnal_data.kib_pd, ¶ms,
- &kibnal_data.kib_fmr_pool);
- if (rc != 0) {
- CERROR ("Can't create FMR pool size %d: %d\n",
- pool_size, rc);
- goto failed;
- }
- }
-
- /* flag FMR pool initialised */
- kibnal_data.kib_init = IBNAL_INIT_FMR;
-#endif
- /*****************************************************/
- if (IBNAL_WHOLE_MEM) {
- IB_MR_PHYS_BUFFER phys;
- IB_ACCESS_CONTROL access;
- kib_md_t *md = &kibnal_data.kib_md;
-
- memset(&access, 0, sizeof(access));
- access.s.MWBindable = 1;
- access.s.LocalWrite = 1;
- access.s.RdmaRead = 1;
- access.s.RdmaWrite = 1;
-
- phys.PhysAddr = 0;
- phys.Length = max_phys_mem(&kibnal_data.kib_hca_attrs);
- if (phys.Length == 0) {
- CERROR ("couldn't determine the end of phys mem\n");
- goto failed;
- }
-
- rc = iibt_register_contig_physical_memory(kibnal_data.kib_hca,
- 0,
- &phys, 1,
- 0,
- kibnal_data.kib_pd,
- access,
- &md->md_handle,
- &md->md_addr,
- &md->md_lkey,
- &md->md_rkey);
- if (rc != FSUCCESS) {
- CERROR("registering physical memory failed: %d\n",
- rc);
- CERROR("falling back to registration per-rdma\n");
- md->md_handle = NULL;
- } else {
- CDEBUG(D_NET, "registered "LPU64" bytes of mem\n",
- phys.Length);
- kibnal_data.kib_init = IBNAL_INIT_MR;
- }
+ rc = kibnal_register_all_memory();
+ if (rc != 0) {
+ CERROR ("Can't register all memory\n");
+ goto failed;
}
-
+
+ /* flag whole memory MD initialised */
+ kibnal_data.kib_init = IBNAL_INIT_MD;
/*****************************************************/
rc = kibnal_setup_tx_descs();
kibnal_data.kib_init = IBNAL_INIT_TXD;
/*****************************************************/
- {
- uint32 nentries;
-
- frc = iibt_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES,
- &kibnal_data.kib_cq, &kibnal_data.kib_cq,
- &nentries);
- if (frc != FSUCCESS) {
- CERROR ("Can't create RX CQ: %d\n", frc);
- goto failed;
- }
-
- /* flag CQ initialised */
- kibnal_data.kib_init = IBNAL_INIT_CQ;
+ frc = iba_create_cq(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES(),
+ &kibnal_data.kib_cq, &kibnal_data.kib_cq,
+ &n);
+ if (frc != FSUCCESS) {
+ CERROR ("Can't create RX CQ: %d\n", frc);
+ goto failed;
+ }
- if (nentries < IBNAL_CQ_ENTRIES) {
- CERROR ("CQ only has %d entries, need %d\n",
- nentries, IBNAL_CQ_ENTRIES);
- goto failed;
- }
+ /* flag CQ initialised */
+ kibnal_data.kib_init = IBNAL_INIT_CQ;
+ /*****************************************************/
+
+ if (n < IBNAL_CQ_ENTRIES()) {
+ CERROR ("CQ only has %d entries: %d needed\n",
+ n, IBNAL_CQ_ENTRIES());
+ goto failed;
+ }
- rc = iibt_cq_rearm(kibnal_data.kib_cq, CQEventSelNextWC);
- if (rc != 0) {
- CERROR ("Failed to re-arm completion queue: %d\n", rc);
- goto failed;
- }
+ rc = iba_rearm_cq(kibnal_data.kib_cq, CQEventSelNextWC);
+ if (rc != 0) {
+ CERROR ("Failed to re-arm completion queue: %d\n", rc);
+ goto failed;
}
- /*****************************************************/
-
- rc = libcfs_nal_cmd_register(IIBNAL, &kibnal_cmd, NULL);
+ rc = kibnal_start_listener();
if (rc != 0) {
- CERROR ("Can't initialise command interface (rc = %d)\n", rc);
+ CERROR("Can't start listener: %d\n", rc);
goto failed;
}
kibnal_data.kib_init = IBNAL_INIT_ALL;
/*****************************************************/
- printk(KERN_INFO "Lustre: Infinicon IB NAL loaded "
- "(initial mem %d)\n", pkmem);
-
- return (PTL_OK);
+ return (0);
failed:
- kibnal_api_shutdown (&kibnal_api);
- return (PTL_FAIL);
+ kibnal_shutdown (ni);
+ return (-ENETDOWN);
}
void __exit
kibnal_module_fini (void)
{
-#ifdef CONFIG_SYSCTL
- if (kibnal_tunables.kib_sysctl != NULL)
- unregister_sysctl_table (kibnal_tunables.kib_sysctl);
-#endif
- PtlNIFini(kibnal_ni);
-
- ptl_unregister_nal(IIBNAL);
+ lnet_unregister_lnd(&the_kiblnd);
+ kibnal_tunables_fini();
}
int __init
{
int rc;
- if (sizeof(kib_wire_connreq_t) > CM_REQUEST_INFO_USER_LEN) {
- CERROR("sizeof(kib_wire_connreq_t) > CM_REQUEST_INFO_USER_LEN\n");
- return -EINVAL;
- }
-
- /* the following must be sizeof(int) for proc_dointvec() */
- if (sizeof (kibnal_tunables.kib_io_timeout) != sizeof (int)) {
- CERROR("sizeof (kibnal_tunables.kib_io_timeout) != sizeof (int)\n");
- return -EINVAL;
+ if (the_lnet.ln_ptlcompat != 0) {
+ LCONSOLE_ERROR("IIB does not support portals compatibility mode\n");
+ return -ENODEV;
}
+
+ rc = kibnal_tunables_init();
+ if (rc != 0)
+ return rc;
- kibnal_api.nal_ni_init = kibnal_api_startup;
- kibnal_api.nal_ni_fini = kibnal_api_shutdown;
-
- /* Initialise dynamic tunables to defaults once only */
- kibnal_tunables.kib_io_timeout = IBNAL_IO_TIMEOUT;
-
- rc = ptl_register_nal(IIBNAL, &kibnal_api);
- if (rc != PTL_OK) {
- CERROR("Can't register IBNAL: %d\n", rc);
- return (-ENOMEM); /* or something... */
- }
+ lnet_register_lnd(&the_kiblnd);
- /* Pure gateways want the NAL started up at module load time... */
- rc = PtlNIInit(IIBNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kibnal_ni);
- if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
- ptl_unregister_nal(IIBNAL);
- return (-ENODEV);
- }
-
-#ifdef CONFIG_SYSCTL
- /* Press on regardless even if registering sysctl doesn't work */
- kibnal_tunables.kib_sysctl =
- register_sysctl_table (kibnal_top_ctl_table, 0);
-#endif
- return (0);
+ return 0;
}
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel Infinicon IB NAL v0.01");
+MODULE_DESCRIPTION("Kernel Infinicon IB LND v1.00");
MODULE_LICENSE("GPL");
module_init(kibnal_module_init);