Whamcloud - gitweb
Landing b_hd_newconfig on HEAD
[fs/lustre-release.git] / lnet / klnds / iiblnd / iiblnd.c
index 1ecd32d..27b31a5 100644 (file)
  *
  */
 
-#include "iibnal.h"
-
-nal_t                   kibnal_api;
-ptl_handle_ni_t         kibnal_ni;
-kib_tunables_t          kibnal_tunables;
-
-kib_data_t              kibnal_data = {
-        .kib_service_id = IBNAL_SERVICE_NUMBER,
+#include "iiblnd.h"
+
+lnd_t the_kiblnd = {
+        .lnd_type          = IIBLND,
+        .lnd_startup       = kibnal_startup,
+        .lnd_shutdown      = kibnal_shutdown,
+        .lnd_ctl           = kibnal_ctl,
+        .lnd_send          = kibnal_send,
+        .lnd_recv          = kibnal_recv,
+        .lnd_eager_recv    = kibnal_eager_recv,
 };
 
-#ifdef CONFIG_SYSCTL
-#define IBNAL_SYSCTL             202
+kib_data_t              kibnal_data;
 
-#define IBNAL_SYSCTL_TIMEOUT     1
+__u32 
+kibnal_cksum (void *ptr, int nob)
+{
+        char  *c  = ptr;
+        __u32  sum = 0;
 
-static ctl_table kibnal_ctl_table[] = {
-        {IBNAL_SYSCTL_TIMEOUT, "timeout", 
-         &kibnal_tunables.kib_io_timeout, sizeof (int),
-         0644, NULL, &proc_dointvec},
-        { 0 }
-};
+        while (nob-- > 0)
+                sum = ((sum << 1) | (sum >> 31)) + *c++;
+        
+        /* ensure I don't return 0 (== no checksum) */
+        return (sum == 0) ? 1 : sum;
+}
 
-static ctl_table kibnal_top_ctl_table[] = {
-        {IBNAL_SYSCTL, "iibnal", NULL, 0, 0555, kibnal_ctl_table},
-        { 0 }
-};
-#endif
+void
+kibnal_init_msg(kib_msg_t *msg, int type, int body_nob)
+{
+        msg->ibm_type = type;
+        msg->ibm_nob  = offsetof(kib_msg_t, ibm_u) + body_nob;
+}
 
-#ifdef unused
 void
-print_service(IB_SERVICE_RECORD *service, char *tag, int rc)
+kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits, 
+                lnet_nid_t dstnid, __u64 dststamp, __u64 seq)
 {
-        char name[32];
+        /* CAVEAT EMPTOR! all message fields not set here should have been
+         * initialised previously. */
+        msg->ibm_magic    = IBNAL_MSG_MAGIC;
+        msg->ibm_version  = version;
+        /*   ibm_type */
+        msg->ibm_credits  = credits;
+        /*   ibm_nob */
+        msg->ibm_cksum    = 0;
+        msg->ibm_srcnid   = lnet_ptlcompat_srcnid(kibnal_data.kib_ni->ni_nid,
+                                                  dstnid);
+        msg->ibm_srcstamp = kibnal_data.kib_incarnation;
+        msg->ibm_dstnid   = dstnid;
+        msg->ibm_dststamp = dststamp;
+        msg->ibm_seq      = seq;
+
+        if (*kibnal_tunables.kib_cksum) {
+                /* NB ibm_cksum zero while computing cksum */
+                msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob);
+        }
+}
 
-        if (service == NULL) 
-        {
-                CWARN("tag       : %s\n"
-                      "status    : %d (NULL)\n", tag, rc);
-                return;
+void
+kibnal_pack_connmsg(kib_msg_t *msg, __u32 version, int nob, 
+                    int type, lnet_nid_t dstnid, __u64 dststamp)
+{
+        LASSERT (nob >= offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t));
+
+        memset(msg, 0, nob);
+        kibnal_init_msg(msg, type, sizeof(kib_connparams_t));
+
+        msg->ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE;
+        msg->ibm_u.connparams.ibcp_max_msg_size = IBNAL_MSG_SIZE;
+        msg->ibm_u.connparams.ibcp_max_frags = IBNAL_MAX_RDMA_FRAGS;
+
+        kibnal_pack_msg(msg, version, 0, dstnid, dststamp, 0);
+}
+
+int
+kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob)
+{
+        const int hdr_size = offsetof(kib_msg_t, ibm_u);
+        __u32     msg_cksum;
+        __u32     msg_version;
+        int       flip;
+        int       msg_nob;
+#if !IBNAL_USE_FMR
+        int       i;
+        int       n;
+#endif
+        /* 6 bytes are enough to have received magic + version */
+        if (nob < 6) {
+                CERROR("Short message: %d\n", nob);
+                return -EPROTO;
+        }
+
+        /* Future protocol version compatibility support!
+         * If the iiblnd-specific protocol changes, or when LNET unifies
+         * protocols over all LNDs, the initial connection will negotiate a
+         * protocol version.  If I find this, I avoid any console errors.  If
+         * my is doing connection establishment, the reject will tell the peer
+         * which version I'm running. */
+
+        if (msg->ibm_magic == IBNAL_MSG_MAGIC) {
+                flip = 0;
+        } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) {
+                flip = 1;
+        } else {
+                if (msg->ibm_magic == LNET_PROTO_MAGIC ||
+                    msg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
+                        return -EPROTO;
+
+                /* Completely out to lunch */
+                CERROR("Bad magic: %08x\n", msg->ibm_magic);
+                return -EPROTO;
         }
-        strncpy (name, service->ServiceName, sizeof(name)-1);
-        name[sizeof(name)-1] = 0;
+
+        msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
+        if (expected_version == 0) {
+                if (msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD &&
+                    msg_version != IBNAL_MSG_VERSION)
+                        return -EPROTO;
+        } else if (msg_version != expected_version) {
+                CERROR("Bad version: %x(%x expected)\n", 
+                       msg_version, expected_version);
+                return -EPROTO;
+        }
+
+        if (nob < hdr_size) {
+                CERROR("Short message: %d\n", nob);
+                return -EPROTO;
+        }
+
+        msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
+        if (msg_nob > nob) {
+                CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
+                return -EPROTO;
+        }
+
+        /* checksum must be computed with ibm_cksum zero and BEFORE anything
+         * gets flipped */
+        msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
+        msg->ibm_cksum = 0;
+        if (msg_cksum != 0 &&
+            msg_cksum != kibnal_cksum(msg, msg_nob)) {
+                CERROR("Bad checksum\n");
+                return -EPROTO;
+        }
+        msg->ibm_cksum = msg_cksum;
         
-        CWARN("tag       : %s\n"
-              "status    : %d\n"
-              "service id: "LPX64"\n"
-              "name      : %s\n"
-              "NID       : "LPX64"\n", tag, rc,
-              service->RID.ServiceID, name,
-              *kibnal_service_nid_field(service));
-}
+        if (flip) {
+                /* leave magic unflipped as a clue to peer endianness */
+                msg->ibm_version = msg_version;
+                CLASSERT (sizeof(msg->ibm_type) == 1);
+                CLASSERT (sizeof(msg->ibm_credits) == 1);
+                msg->ibm_nob = msg_nob;
+                __swab64s(&msg->ibm_srcnid);
+                __swab64s(&msg->ibm_srcstamp);
+                __swab64s(&msg->ibm_dstnid);
+                __swab64s(&msg->ibm_dststamp);
+                __swab64s(&msg->ibm_seq);
+        }
+        
+        if (msg->ibm_srcnid == LNET_NID_ANY) {
+                CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
+                return -EPROTO;
+        }
+
+        switch (msg->ibm_type) {
+        default:
+                CERROR("Unknown message type %x\n", msg->ibm_type);
+                return -EPROTO;
+                
+        case IBNAL_MSG_NOOP:
+                break;
+
+        case IBNAL_MSG_IMMEDIATE:
+                if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) {
+                        CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob,
+                               (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]));
+                        return -EPROTO;
+                }
+                break;
+
+        case IBNAL_MSG_PUT_REQ:
+                if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) {
+                        CERROR("Short PUT_REQ: %d(%d)\n", msg_nob,
+                               (int)(hdr_size + sizeof(msg->ibm_u.putreq)));
+                        return -EPROTO;
+                }
+                break;
+
+        case IBNAL_MSG_PUT_ACK:
+                if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) {
+                        CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
+                               (int)(hdr_size + sizeof(msg->ibm_u.putack)));
+                        return -EPROTO;
+                }
+#if IBNAL_USE_FMR
+                if (flip) {
+                        __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr);
+                        __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob);
+                        __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
+                }
+#else
+                if (flip) {
+                        __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
+                        __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag);
+                }
+                
+                n = msg->ibm_u.putack.ibpam_rd.rd_nfrag;
+                if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
+                        CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n", 
+                               n, IBNAL_MAX_RDMA_FRAGS);
+                        return -EPROTO;
+                }
+                
+                if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) {
+                        CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
+                               (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n]));
+                        return -EPROTO;
+                }
+
+                if (flip) {
+                        for (i = 0; i < n; i++) {
+                                __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob);
+                                __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr);
+                        }
+                }
 #endif
+                break;
 
-static void
-kibnal_service_setunset_done (void *arg, FABRIC_OPERATION_DATA *fod,
-                              FSTATUS frc, uint32 madrc)
+        case IBNAL_MSG_GET_REQ:
+                if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) {
+                        CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
+                               (int)(hdr_size + sizeof(msg->ibm_u.get)));
+                        return -EPROTO;
+                }
+#if IBNAL_USE_FMR
+                if (flip) {
+                        __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr);
+                        __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob);
+                        __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
+                }
+#else                
+                if (flip) {
+                        __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
+                        __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag);
+                }
+
+                n = msg->ibm_u.get.ibgm_rd.rd_nfrag;
+                if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
+                        CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n", 
+                               n, IBNAL_MAX_RDMA_FRAGS);
+                        return -EPROTO;
+                }
+                
+                if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) {
+                        CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
+                               (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n]));
+                        return -EPROTO;
+                }
+                
+                if (flip)
+                        for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) {
+                                __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob);
+                                __swab64s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr);
+                        }
+#endif
+                break;
+
+        case IBNAL_MSG_PUT_NAK:
+        case IBNAL_MSG_PUT_DONE:
+        case IBNAL_MSG_GET_DONE:
+                if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) {
+                        CERROR("Short RDMA completion: %d(%d)\n", msg_nob,
+                               (int)(hdr_size + sizeof(msg->ibm_u.completion)));
+                        return -EPROTO;
+                }
+                if (flip)
+                        __swab32s(&msg->ibm_u.completion.ibcm_status);
+                break;
+
+        case IBNAL_MSG_CONNREQ:
+        case IBNAL_MSG_CONNACK:
+                if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) {
+                        CERROR("Short connreq/ack: %d(%d)\n", msg_nob,
+                               (int)(hdr_size + sizeof(msg->ibm_u.connparams)));
+                        return -EPROTO;
+                }
+                if (flip) {
+                        __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth);
+                        __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size);
+                        __swab32s(&msg->ibm_u.connparams.ibcp_max_frags);
+                }
+                break;
+        }
+        return 0;
+}
+
+IB_HANDLE
+kibnal_create_cep(lnet_nid_t nid)
 {
-        *(FSTATUS *)arg = frc;
-        up (&kibnal_data.kib_nid_signal);
+        FSTATUS        frc;
+        __u32          u32val;
+        IB_HANDLE      cep;
+
+        cep = iba_cm_create_cep(CM_RC_TYPE);
+        if (cep == NULL) {
+                CERROR ("Can't create CEP for %s\n",
+                        (nid == LNET_NID_ANY) ? "listener" :
+                        libcfs_nid2str(nid));
+                return NULL;
+        }
+
+        if (nid == LNET_NID_ANY) {
+                u32val = 1;
+                frc = iba_cm_modify_cep(cep, CM_FLAG_ASYNC_ACCEPT,
+                                        (char *)&u32val, sizeof(u32val), 0);
+                if (frc != FSUCCESS) {
+                        CERROR("Can't set async_accept: %d\n", frc);
+                        goto failed;
+                }
+
+                u32val = 0;                     /* sets system max */
+                frc = iba_cm_modify_cep(cep, CM_FLAG_LISTEN_BACKLOG,
+                                        (char *)&u32val, sizeof(u32val), 0);
+                if (frc != FSUCCESS) {
+                        CERROR("Can't set listen backlog: %d\n", frc);
+                        goto failed;
+                }
+        }
+        
+        u32val = 1;
+        frc = iba_cm_modify_cep(cep, CM_FLAG_TIMEWAIT_CALLBACK,
+                                (char *)&u32val, sizeof(u32val), 0);
+        if (frc != FSUCCESS) {
+                CERROR("Can't set timewait_callback for %s: %d\n", 
+                        (nid == LNET_NID_ANY) ? "listener" :
+                        libcfs_nid2str(nid), frc);
+                goto failed;
+        }
+
+        return cep;
+        
+ failed:
+        iba_cm_destroy_cep(cep);
+        return NULL;
 }
 
+#define IBNAL_CHECK_ADVERT 1
 #if IBNAL_CHECK_ADVERT
-static void
+void
 kibnal_service_query_done (void *arg, QUERY *qry, 
                            QUERY_RESULT_VALUES *qry_result)
 {
-        FSTATUS frc = qry_result->Status;
+        int                    *rcp = arg;
+        FSTATUS                 frc = qry_result->Status;
+        SERVICE_RECORD_RESULTS *svc_rslt;
+        IB_SERVICE_RECORD      *svc;
+        lnet_nid_t              nid;
+
+        if (frc != FSUCCESS || qry_result->ResultDataSize == 0) {
+                CERROR("Error checking advert: status %d data size %d\n",
+                       frc, qry_result->ResultDataSize);
+                *rcp = -EIO;
+                goto out;
+        }
+
+        svc_rslt = (SERVICE_RECORD_RESULTS *)qry_result->QueryResult;
+
+        if (svc_rslt->NumServiceRecords < 1) {
+                CERROR("Check advert: %d records\n",
+                       svc_rslt->NumServiceRecords);
+                *rcp = -ENOENT;
+                goto out;
+        }
 
-        if (frc != FSUCCESS &&
-            qry_result->ResultDataSize == 0)
-                frc = FERROR;
+        svc = &svc_rslt->ServiceRecords[0];
+        nid = le64_to_cpu(*kibnal_service_nid_field(svc));
         
-        *(FSTATUS *)arg = frc;
-        up (&kibnal_data.kib_nid_signal);
+        CDEBUG(D_NET, "Check advert: %s "LPX64" "LPX64":%04x\n",
+               libcfs_nid2str(nid), svc->RID.ServiceID, 
+               svc->RID.ServiceGID.Type.Global.InterfaceID, 
+               svc->RID.ServiceP_Key);
+
+        if (nid != kibnal_data.kib_ni->ni_nid) {
+                CERROR("Check advert: Bad NID %s (%s expected)\n",
+                       libcfs_nid2str(nid),
+                       libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
+                *rcp = -EINVAL;
+                goto out;
+        }
+
+        if (svc->RID.ServiceID != *kibnal_tunables.kib_service_number) {
+                CERROR("Check advert: Bad ServiceID "LPX64" (%x expected)\n",
+                       svc->RID.ServiceID,
+                       *kibnal_tunables.kib_service_number);
+                *rcp = -EINVAL;
+                goto out;
+        }
+
+        if (svc->RID.ServiceGID.Type.Global.InterfaceID != 
+            kibnal_data.kib_port_guid) {
+                CERROR("Check advert: Bad GUID "LPX64" ("LPX64" expected)\n",
+                       svc->RID.ServiceGID.Type.Global.InterfaceID,
+                       kibnal_data.kib_port_guid);
+                *rcp = -EINVAL;
+                goto out;
+        }
+
+        if (svc->RID.ServiceP_Key != kibnal_data.kib_port_pkey) {
+                CERROR("Check advert: Bad PKEY %04x (%04x expected)\n",
+                       svc->RID.ServiceP_Key, kibnal_data.kib_port_pkey);
+                *rcp = -EINVAL;
+                goto out;
+        }
+
+        CDEBUG(D_NET, "Check advert OK\n");
+        *rcp = 0;
+                
+ out:
+        up (&kibnal_data.kib_listener_signal);                
 }
 
-static void
+int
 kibnal_check_advert (void)
 {
-        QUERY                  *qry;
-        IB_SERVICE_RECORD      *svc;
-        FSTATUS                 frc;
-        FSTATUS                 frc2;
+        /* single-threaded */
+        static QUERY               qry;
 
-        PORTAL_ALLOC(qry, sizeof(*qry));
-        if (qry == NULL)
-                return;
+        FSTATUS                    frc;
+        int                        rc;
 
-        memset (qry, 0, sizeof(*qry));
-        qry->InputType = InputTypeServiceRecord;
-        qry->OutputType = OutputTypeServiceRecord;
-        qry->InputValue.ServiceRecordValue.ComponentMask = KIBNAL_SERVICE_KEY_MASK;
-        svc = &qry->InputValue.ServiceRecordValue.ServiceRecord;
-        kibnal_set_service_keys(svc, kibnal_data.kib_nid);
-
-        frc = iibt_sd_query_port_fabric_information(kibnal_data.kib_sd,
-                                                    kibnal_data.kib_port_guid,
-                                                    qry,
-                                                    kibnal_service_query_done,
-                                                    NULL, &frc2);
-        if (frc != FSUCCESS && frc != FPENDING) {
-                CERROR ("Immediate error %d checking SM service\n", frc);
-        } else {
-                down (&kibnal_data.kib_nid_signal);
-                frc = frc2;
+        memset (&qry, 0, sizeof(qry));
+        qry.InputType = InputTypeServiceRecord;
+        qry.OutputType = OutputTypeServiceRecord;
+        kibnal_set_service_keys(&qry.InputValue.ServiceRecordValue.ServiceRecord,
+                                kibnal_data.kib_ni->ni_nid);
+        qry.InputValue.ServiceRecordValue.ComponentMask = KIBNAL_SERVICE_KEY_MASK;
 
-                if (frc != 0)
-                        CERROR ("Error %d checking SM service\n", rc);
+        frc = iba_sd_query_port_fabric_info(kibnal_data.kib_sd, 
+                                            kibnal_data.kib_port_guid,
+                                            &qry, 
+                                            kibnal_service_query_done,
+                                            &kibnal_data.kib_sdretry, 
+                                            &rc);
+        if (frc != FPENDING) {
+                CERROR ("Immediate error %d checking SM service\n", frc);
+                return -EIO;
         }
-
-        return (rc);
+        
+        down (&kibnal_data.kib_listener_signal);
+        
+        if (rc != 0)
+                CERROR ("Error %d checking SM service\n", rc);
+        return rc;
+}
+#else
+int
+kibnal_check_advert(void)
+{
+        return 0;
 }
 #endif
 
-static void fill_fod(FABRIC_OPERATION_DATA *fod, FABRIC_OPERATION_TYPE type)
+void 
+kibnal_fill_fod(FABRIC_OPERATION_DATA *fod, FABRIC_OPERATION_TYPE type)
 {
         IB_SERVICE_RECORD     *svc;
 
@@ -143,211 +493,208 @@ static void fill_fod(FABRIC_OPERATION_DATA *fod, FABRIC_OPERATION_TYPE type)
         fod->Type = type;
 
         svc = &fod->Value.ServiceRecordValue.ServiceRecord;
-        svc->RID.ServiceID = kibnal_data.kib_service_id;
+        svc->RID.ServiceID = *kibnal_tunables.kib_service_number;
         svc->RID.ServiceGID.Type.Global.InterfaceID = kibnal_data.kib_port_guid;
         svc->RID.ServiceGID.Type.Global.SubnetPrefix = DEFAULT_SUBNET_PREFIX;
         svc->RID.ServiceP_Key = kibnal_data.kib_port_pkey;
         svc->ServiceLease = 0xffffffff;
 
-        kibnal_set_service_keys(svc, kibnal_data.kib_nid);
+        kibnal_set_service_keys(svc, kibnal_data.kib_ni->ni_nid);
 }
 
-static int
-kibnal_advertise (void)
+void
+kibnal_service_setunset_done (void *arg, FABRIC_OPERATION_DATA *fod,
+                              FSTATUS frc, uint32 madrc)
 {
-        FABRIC_OPERATION_DATA *fod;
-        IB_SERVICE_RECORD     *svc;
-        FSTATUS                frc;
-        FSTATUS                frc2;
-
-        LASSERT (kibnal_data.kib_nid != PTL_NID_ANY);
+        *(FSTATUS *)arg = frc;
+        up (&kibnal_data.kib_listener_signal);
+}
 
-        PORTAL_ALLOC(fod, sizeof(*fod));
-        if (fod == NULL)
-                return (-ENOMEM);
+int
+kibnal_advertise (void)
+{
+        /* Single threaded here */
+        static FABRIC_OPERATION_DATA fod;
+
+        IB_SERVICE_RECORD *svc = &fod.Value.ServiceRecordValue.ServiceRecord;
+        FSTATUS            frc;
+        FSTATUS            frc2;
+
+        if (strlen(*kibnal_tunables.kib_service_name) >=
+            sizeof(svc->ServiceName)) {
+                CERROR("Service name '%s' too long (%d chars max)\n",
+                       *kibnal_tunables.kib_service_name,
+                       (int)sizeof(svc->ServiceName) - 1);
+                return -EINVAL;
+        }
 
-        fill_fod(fod, FabOpSetServiceRecord);
-        svc = &fod->Value.ServiceRecordValue.ServiceRecord;
+        kibnal_fill_fod(&fod, FabOpSetServiceRecord);
 
-        CDEBUG(D_NET, "Advertising service id "LPX64" %s:"LPX64"\n", 
-               svc->RID.ServiceID, 
-               svc->ServiceName, *kibnal_service_nid_field(svc));
+        CDEBUG(D_NET, "Advertising service id "LPX64" %s:%s\n", 
+               svc->RID.ServiceID, svc->ServiceName, 
+               libcfs_nid2str(le64_to_cpu(*kibnal_service_nid_field(svc))));
 
-        frc = iibt_sd_port_fabric_operation(kibnal_data.kib_sd,
-                                            kibnal_data.kib_port_guid,
-                                            fod, kibnal_service_setunset_done, 
-                                            NULL, &frc2);
+        frc = iba_sd_port_fabric_operation(kibnal_data.kib_sd,
+                                           kibnal_data.kib_port_guid,
+                                           &fod, 
+                                           kibnal_service_setunset_done, 
+                                           &kibnal_data.kib_sdretry,
+                                           &frc2);
 
         if (frc != FSUCCESS && frc != FPENDING) {
-                CERROR ("Immediate error %d advertising NID "LPX64"\n",
-                        frc, kibnal_data.kib_nid);
-                goto out;
+                CERROR ("Immediate error %d advertising NID %s\n",
+                        frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
+                return -EIO;
         }
 
-        down (&kibnal_data.kib_nid_signal);
+        down (&kibnal_data.kib_listener_signal);
 
         frc = frc2;
-        if (frc != FSUCCESS)
-                CERROR ("Error %d advertising BUD "LPX64"\n",
-                        frc, kibnal_data.kib_nid);
-out:
-        PORTAL_FREE(fod, sizeof(*fod));
-        return (frc == FSUCCESS) ? 0 : -EINVAL;
+        if (frc == FSUCCESS)
+                return 0;
+        
+        CERROR ("Error %d advertising %s\n",
+                frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
+        return -EIO;
 }
 
-static void
+void
 kibnal_unadvertise (int expect_success)
 {
-        FABRIC_OPERATION_DATA *fod;
-        IB_SERVICE_RECORD     *svc;
-        FSTATUS                frc;
-        FSTATUS                frc2;
+        /* single threaded */
+        static FABRIC_OPERATION_DATA fod;
 
-        LASSERT (kibnal_data.kib_nid != PTL_NID_ANY);
+        IB_SERVICE_RECORD *svc = &fod.Value.ServiceRecordValue.ServiceRecord;
+        FSTATUS            frc;
+        FSTATUS            frc2;
 
-        PORTAL_ALLOC(fod, sizeof(*fod));
-        if (fod == NULL)
-                return;
+        LASSERT (kibnal_data.kib_ni->ni_nid != LNET_NID_ANY);
 
-        fill_fod(fod, FabOpDeleteServiceRecord);
-        svc = &fod->Value.ServiceRecordValue.ServiceRecord;
+        kibnal_fill_fod(&fod, FabOpDeleteServiceRecord);
 
-        CDEBUG(D_NET, "Unadvertising service %s:"LPX64"\n",
-               svc->ServiceName, *kibnal_service_nid_field(svc));
+        CDEBUG(D_NET, "Unadvertising service %s:%s\n",
+               svc->ServiceName, 
+               libcfs_nid2str(le64_to_cpu(*kibnal_service_nid_field(svc))));
         
-        frc = iibt_sd_port_fabric_operation(kibnal_data.kib_sd,
-                                            kibnal_data.kib_port_guid,
-                                            fod, kibnal_service_setunset_done, 
-                                            NULL, &frc2);
-
+        frc = iba_sd_port_fabric_operation(kibnal_data.kib_sd,
+                                           kibnal_data.kib_port_guid,
+                                           &fod, 
+                                           kibnal_service_setunset_done, 
+                                           &kibnal_data.kib_sdretry, 
+                                           &frc2);
         if (frc != FSUCCESS && frc != FPENDING) {
-                CERROR ("Immediate error %d unadvertising NID "LPX64"\n",
-                        frc, kibnal_data.kib_nid);
-                goto out;
+                CERROR ("Immediate error %d unadvertising NID %s\n",
+                        frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
+                return;
         }
 
-        down (&kibnal_data.kib_nid_signal);
+        down (&kibnal_data.kib_listener_signal);
+
+        CDEBUG(D_NET, "Unadvertise rc: %d\n", frc2);
 
         if ((frc2 == FSUCCESS) == !!expect_success)
-                goto out;
+                return;
 
         if (expect_success)
-                CERROR("Error %d unadvertising NID "LPX64"\n",
-                       frc2, kibnal_data.kib_nid);
+                CERROR("Error %d unadvertising NID %s\n",
+                       frc2, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
         else
-                CWARN("Removed conflicting NID "LPX64"\n",
-                      kibnal_data.kib_nid);
- out:
-        PORTAL_FREE(fod, sizeof(*fod));
+                CWARN("Removed conflicting NID %s\n",
+                      libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
 }
 
-static int
-kibnal_set_mynid(ptl_nid_t nid)
+void
+kibnal_stop_listener(int normal_shutdown)
 {
-        struct timeval tv;
-        lib_ni_t      *ni = &kibnal_lib.libnal_ni;
-        int            rc;
+        /* NB this also disables peer creation and destroys all existing
+         * peers */
+        IB_HANDLE      cep = kibnal_data.kib_listener_cep;
+        unsigned long  flags;
         FSTATUS        frc;
 
-        CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
-               nid, ni->ni_pid.nid);
+        LASSERT (cep != NULL);
 
-        do_gettimeofday(&tv);
+        kibnal_unadvertise(normal_shutdown);
 
-        down (&kibnal_data.kib_nid_mutex);
+        frc = iba_cm_cancel(cep);
+        if (frc != FSUCCESS && frc != FPENDING)
+                CERROR ("Error %d stopping listener\n", frc);
 
-        if (nid == kibnal_data.kib_nid) {
-                /* no change of NID */
-                up (&kibnal_data.kib_nid_mutex);
-                return (0);
-        }
+        down(&kibnal_data.kib_listener_signal);
 
-        CDEBUG(D_NET, "NID "LPX64"("LPX64")\n",
-               kibnal_data.kib_nid, nid);
-        
-        if (kibnal_data.kib_nid != PTL_NID_ANY) {
+        frc = iba_cm_destroy_cep(cep);
+        if (frc != FSUCCESS)
+                CERROR ("Error %d destroying listener CEP\n", frc);
 
-                kibnal_unadvertise (1);
+        write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
+        /* This assignment disables peer creation */
+        kibnal_data.kib_listener_cep = NULL;
+        write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
 
-                frc = iibt_cm_cancel(kibnal_data.kib_cep);
-                if (frc != FSUCCESS && frc != FPENDING)
-                        CERROR ("Error %d stopping listener\n", frc);
+        /* Start to tear down any peers created while the listener was
+         * running */
+        kibnal_del_peer(LNET_NID_ANY);
+}
 
-                frc = iibt_cm_destroy_cep(kibnal_data.kib_cep);
-                if (frc != FSUCCESS)
-                        CERROR ("Error %d destroying CEP\n", frc);
+int
+kibnal_start_listener(void)
+{
+        /* NB this also enables peer creation */
 
-                kibnal_data.kib_cep = NULL;
-        }
-        
-        kibnal_data.kib_nid = ni->ni_pid.nid = nid;
-        kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-        
-        /* Delete all existing peers and their connections after new
-         * NID/incarnation set to ensure no old connections in our brave
-         * new world. */
-        kibnal_del_peer (PTL_NID_ANY, 0);
-
-        if (kibnal_data.kib_nid == PTL_NID_ANY) {
-                /* No new NID to install */
-                up (&kibnal_data.kib_nid_mutex);
-                return (0);
-        }
+        IB_HANDLE      cep;
+        CM_LISTEN_INFO info;
+        unsigned long  flags;
+        int            rc;
+        FSTATUS        frc;
 
-        /* remove any previous advert (crashed node etc) */
-        kibnal_unadvertise(0);
+        LASSERT (kibnal_data.kib_listener_cep == NULL);
+        init_MUTEX_LOCKED (&kibnal_data.kib_listener_signal);
 
-        kibnal_data.kib_cep = iibt_cm_create_cep(CM_RC_TYPE);
-        if (kibnal_data.kib_cep == NULL) {
-                CERROR ("Can't create CEP\n");
-                rc = -ENOMEM;
-        } else {
-                CM_LISTEN_INFO info;
-                memset (&info, 0, sizeof(info));
-                info.ListenAddr.EndPt.SID = kibnal_data.kib_service_id;
-
-                frc = iibt_cm_listen(kibnal_data.kib_cep, &info,
-                                     kibnal_listen_callback, NULL);
-                if (frc != FSUCCESS && frc != FPENDING) {
-                        CERROR ("iibt_cm_listen error: %d\n", frc);
-                        rc = -EINVAL;
-                } else {
-                        rc = 0;
-                }
-        }
-        
-        if (rc == 0) {
-                rc = kibnal_advertise();
-                if (rc == 0) {
-#if IBNAL_CHECK_ADVERT
-                        kibnal_check_advert();
-#endif
-                        up (&kibnal_data.kib_nid_mutex);
-                        return (0);
-                }
-                
-                iibt_cm_cancel (kibnal_data.kib_cep);
-                iibt_cm_destroy_cep (kibnal_data.kib_cep);
-                /* remove any peers that sprung up while I failed to
-                 * advertise myself */
-                kibnal_del_peer (PTL_NID_ANY, 0);
+        cep = kibnal_create_cep(LNET_NID_ANY);
+        if (cep == NULL)
+                return -ENOMEM;
+
+        memset (&info, 0, sizeof(info));
+        info.ListenAddr.EndPt.SID = *kibnal_tunables.kib_service_number;
+
+        frc = iba_cm_listen(cep, &info, kibnal_listen_callback, NULL);
+        if (frc != FSUCCESS && frc != FPENDING) {
+                CERROR ("iba_cm_listen error: %d\n", frc);
+
+                iba_cm_destroy_cep(cep);
+                return -EIO;
         }
 
-        kibnal_data.kib_nid = PTL_NID_ANY;
-        up (&kibnal_data.kib_nid_mutex);
-        return (rc);
+        write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
+        /* This assignment enables peer creation */
+        kibnal_data.kib_listener_cep = cep;
+        write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
+
+        rc = kibnal_advertise();
+        if (rc == 0)
+                rc = kibnal_check_advert();
+
+        if (rc == 0)
+                return 0;
+
+        kibnal_stop_listener(0);
+        return rc;
 }
 
-kib_peer_t *
-kibnal_create_peer (ptl_nid_t nid)
+int
+kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid)
 {
-        kib_peer_t *peer;
+        kib_peer_t    *peer;
+        unsigned long  flags;
+        int            rc;
 
-        LASSERT (nid != PTL_NID_ANY);
+        LASSERT (nid != LNET_NID_ANY);
 
-        PORTAL_ALLOC (peer, sizeof (*peer));
-        if (peer == NULL)
-                return (NULL);
+        LIBCFS_ALLOC (peer, sizeof (*peer));
+        if (peer == NULL) {
+                CERROR("Cannot allocate peer\n");
+                return -ENOMEM;
+        }
 
         memset(peer, 0, sizeof(*peer));         /* zero flags etc */
 
@@ -358,11 +705,35 @@ kibnal_create_peer (ptl_nid_t nid)
         INIT_LIST_HEAD (&peer->ibp_conns);
         INIT_LIST_HEAD (&peer->ibp_tx_queue);
 
-        peer->ibp_reconnect_time = jiffies;
-        peer->ibp_reconnect_interval = IBNAL_MIN_RECONNECT_INTERVAL;
+        peer->ibp_error = 0;
+        peer->ibp_last_alive = cfs_time_current();
+        peer->ibp_reconnect_interval = 0;       /* OK to connect at any time */
+
+        write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
+        
+        if (atomic_read(&kibnal_data.kib_npeers) >=
+            *kibnal_tunables.kib_concurrent_peers) {
+                rc = -EOVERFLOW;        /* !! but at least it distinguishes */
+        } else if (kibnal_data.kib_listener_cep == NULL) {
+                rc = -ESHUTDOWN;        /* shutdown has started */
+        } else {
+                rc = 0;
+                /* npeers only grows with the global lock held */
+                atomic_inc(&kibnal_data.kib_npeers);
+        }
+        
+        write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
 
-        atomic_inc (&kibnal_data.kib_npeers);
-        return (peer);
+        if (rc != 0) {
+                CERROR("Can't create peer: %s\n", 
+                       (rc == -ESHUTDOWN) ? "shutting down" : 
+                       "too many peers");
+                LIBCFS_FREE(peer, sizeof(*peer));
+        } else {
+                *peerp = peer;
+        }
+        
+        return rc;
 }
 
 void
@@ -372,11 +743,11 @@ kibnal_destroy_peer (kib_peer_t *peer)
         LASSERT (atomic_read (&peer->ibp_refcount) == 0);
         LASSERT (peer->ibp_persistence == 0);
         LASSERT (!kibnal_peer_active(peer));
-        LASSERT (peer->ibp_connecting == 0);
+        LASSERT (!kibnal_peer_connecting(peer));
         LASSERT (list_empty (&peer->ibp_conns));
         LASSERT (list_empty (&peer->ibp_tx_queue));
 
-        PORTAL_FREE (peer, sizeof (*peer));
+        LIBCFS_FREE (peer, sizeof (*peer));
 
         /* NB a peer's connections keep a reference on their peer until
          * they are destroyed, so we can be assured that _all_ state to do
@@ -388,7 +759,7 @@ kibnal_destroy_peer (kib_peer_t *peer)
 /* the caller is responsible for accounting for the additional reference
  * that this creates */
 kib_peer_t *
-kibnal_find_peer_locked (ptl_nid_t nid)
+kibnal_find_peer_locked (lnet_nid_t nid)
 {
         struct list_head *peer_list = kibnal_nid2peerlist (nid);
         struct list_head *tmp;
@@ -398,35 +769,20 @@ kibnal_find_peer_locked (ptl_nid_t nid)
 
                 peer = list_entry (tmp, kib_peer_t, ibp_list);
 
-                LASSERT (peer->ibp_persistence != 0 || /* persistent peer */
-                         peer->ibp_connecting != 0 || /* creating conns */
-                         !list_empty (&peer->ibp_conns));  /* active conn */
+                LASSERT (peer->ibp_persistence != 0 ||
+                         kibnal_peer_connecting(peer) ||
+                         !list_empty (&peer->ibp_conns));
 
                 if (peer->ibp_nid != nid)
                         continue;
 
-                CDEBUG(D_NET, "got peer [%p] -> "LPX64" (%d)\n",
-                       peer, nid, atomic_read (&peer->ibp_refcount));
+                CDEBUG(D_NET, "got peer %s (%d)\n",
+                       libcfs_nid2str(nid), atomic_read (&peer->ibp_refcount));
                 return (peer);
         }
         return (NULL);
 }
 
-kib_peer_t *
-kibnal_get_peer (ptl_nid_t nid)
-{
-        kib_peer_t     *peer;
-        unsigned long   flags;
-
-        read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-        peer = kibnal_find_peer_locked (nid);
-        if (peer != NULL)                       /* +1 ref for caller? */
-                kib_peer_addref(peer);
-        read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
-        return (peer);
-}
-
 void
 kibnal_unlink_peer_locked (kib_peer_t *peer)
 {
@@ -436,11 +792,11 @@ kibnal_unlink_peer_locked (kib_peer_t *peer)
         LASSERT (kibnal_peer_active(peer));
         list_del_init (&peer->ibp_list);
         /* lose peerlist's ref */
-        kib_peer_decref(peer);
+        kibnal_peer_decref(peer);
 }
 
-static int
-kibnal_get_peer_info (int index, ptl_nid_t *nidp, int *persistencep)
+int
+kibnal_get_peer_info (int index, lnet_nid_t *nidp, int *persistencep)
 {
         kib_peer_t        *peer;
         struct list_head  *ptmp;
@@ -455,7 +811,7 @@ kibnal_get_peer_info (int index, ptl_nid_t *nidp, int *persistencep)
 
                         peer = list_entry (ptmp, kib_peer_t, ibp_list);
                         LASSERT (peer->ibp_persistence != 0 ||
-                                 peer->ibp_connecting != 0 ||
+                                 kibnal_peer_connecting(peer) ||
                                  !list_empty (&peer->ibp_conns));
 
                         if (index-- > 0)
@@ -474,25 +830,26 @@ kibnal_get_peer_info (int index, ptl_nid_t *nidp, int *persistencep)
         return (-ENOENT);
 }
 
-static int
-kibnal_add_persistent_peer (ptl_nid_t nid)
+int
+kibnal_add_persistent_peer (lnet_nid_t nid)
 {
         unsigned long      flags;
         kib_peer_t        *peer;
         kib_peer_t        *peer2;
+        int                rc;
         
-        if (nid == PTL_NID_ANY)
+        if (nid == LNET_NID_ANY)
                 return (-EINVAL);
 
-        peer = kibnal_create_peer (nid);
-        if (peer == NULL)
-                return (-ENOMEM);
+        rc = kibnal_create_peer(&peer, nid);
+        if (rc != 0)
+                return rc;
 
         write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
 
         peer2 = kibnal_find_peer_locked (nid);
         if (peer2 != NULL) {
-                kib_peer_decref (peer);
+                kibnal_peer_decref (peer);
                 peer = peer2;
         } else {
                 /* peer table takes existing ref on peer */
@@ -506,20 +863,14 @@ kibnal_add_persistent_peer (ptl_nid_t nid)
         return (0);
 }
 
-static void
-kibnal_del_peer_locked (kib_peer_t *peer, int single_share)
+void
+kibnal_del_peer_locked (kib_peer_t *peer)
 {
         struct list_head *ctmp;
         struct list_head *cnxt;
         kib_conn_t       *conn;
 
-        if (!single_share)
-                peer->ibp_persistence = 0;
-        else if (peer->ibp_persistence > 0)
-                peer->ibp_persistence--;
-
-        if (peer->ibp_persistence != 0)
-                return;
+        peer->ibp_persistence = 0;
 
         if (list_empty(&peer->ibp_conns)) {
                 kibnal_unlink_peer_locked(peer);
@@ -537,9 +888,10 @@ kibnal_del_peer_locked (kib_peer_t *peer, int single_share)
 }
 
 int
-kibnal_del_peer (ptl_nid_t nid, int single_share)
+kibnal_del_peer (lnet_nid_t nid)
 {
         unsigned long      flags;
+        CFS_LIST_HEAD     (zombies);
         struct list_head  *ptmp;
         struct list_head  *pnxt;
         kib_peer_t        *peer;
@@ -550,7 +902,7 @@ kibnal_del_peer (ptl_nid_t nid, int single_share)
 
         write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
 
-        if (nid != PTL_NID_ANY)
+        if (nid != LNET_NID_ANY)
                 lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
         else {
                 lo = 0;
@@ -561,26 +913,31 @@ kibnal_del_peer (ptl_nid_t nid, int single_share)
                 list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
                         peer = list_entry (ptmp, kib_peer_t, ibp_list);
                         LASSERT (peer->ibp_persistence != 0 ||
-                                 peer->ibp_connecting != 0 ||
+                                 kibnal_peer_connecting(peer) ||
                                  !list_empty (&peer->ibp_conns));
 
-                        if (!(nid == PTL_NID_ANY || peer->ibp_nid == nid))
+                        if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
                                 continue;
 
-                        kibnal_del_peer_locked (peer, single_share);
-                        rc = 0;         /* matched something */
+                        if (!list_empty(&peer->ibp_tx_queue)) {
+                                LASSERT (list_empty(&peer->ibp_conns));
 
-                        if (single_share)
-                                goto out;
+                                list_splice_init(&peer->ibp_tx_queue, &zombies);
+                        }
+
+                        kibnal_del_peer_locked (peer);
+                        rc = 0;         /* matched something */
                 }
         }
- out:
+
         write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
 
+        kibnal_txlist_done(&zombies, -EIO);
+
         return (rc);
 }
 
-static kib_conn_t *
+kib_conn_t *
 kibnal_get_conn_by_idx (int index)
 {
         kib_peer_t        *peer;
@@ -596,37 +953,111 @@ kibnal_get_conn_by_idx (int index)
                 list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
 
                         peer = list_entry (ptmp, kib_peer_t, ibp_list);
-                        LASSERT (peer->ibp_persistence > 0 ||
-                                 peer->ibp_connecting != 0 ||
+                        LASSERT (peer->ibp_persistence != 0 ||
+                                 kibnal_peer_connecting(peer) ||
                                  !list_empty (&peer->ibp_conns));
 
                         list_for_each (ctmp, &peer->ibp_conns) {
                                 if (index-- > 0)
                                         continue;
 
-                                conn = list_entry (ctmp, kib_conn_t, ibc_list);
-                                CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                                       conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                                       atomic_read (&conn->ibc_refcount));
-                                atomic_inc (&conn->ibc_refcount);
-                                read_unlock_irqrestore(&kibnal_data.kib_global_lock,
-                                                       flags);
-                                return (conn);
-                        }
-                }
+                                conn = list_entry (ctmp, kib_conn_t, ibc_list);
+                                kibnal_conn_addref(conn);
+                                read_unlock_irqrestore(&kibnal_data.kib_global_lock,
+                                                       flags);
+                                return (conn);
+                        }
+                }
+        }
+
+        read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
+        return (NULL);
+}
+
+int
+kibnal_conn_rts(kib_conn_t *conn, 
+                __u32 qpn, __u8 resp_res, __u8 init_depth, __u32 psn)
+{
+        IB_PATH_RECORD         *path = &conn->ibc_cvars->cv_path;
+        IB_HANDLE               qp = conn->ibc_qp;
+        IB_QP_ATTRIBUTES_MODIFY modify_attr;
+        FSTATUS                 frc;
+        int                     rc;
+
+        if (resp_res > kibnal_data.kib_hca_attrs.MaxQPResponderResources)
+                resp_res = kibnal_data.kib_hca_attrs.MaxQPResponderResources;
+
+        if (init_depth > kibnal_data.kib_hca_attrs.MaxQPInitiatorDepth)
+                init_depth = kibnal_data.kib_hca_attrs.MaxQPInitiatorDepth;
+
+        modify_attr = (IB_QP_ATTRIBUTES_MODIFY) {
+                .RequestState       = QPStateReadyToRecv,
+                .RecvPSN            = IBNAL_STARTING_PSN,
+                .DestQPNumber       = qpn,
+                .ResponderResources = resp_res,
+                .MinRnrTimer        = UsecToRnrNakTimer(2000), /* 20 ms */
+                .Attrs              = (IB_QP_ATTR_RECVPSN |
+                                       IB_QP_ATTR_DESTQPNUMBER | 
+                                       IB_QP_ATTR_RESPONDERRESOURCES | 
+                                       IB_QP_ATTR_DESTAV | 
+                                       IB_QP_ATTR_PATHMTU | 
+                                       IB_QP_ATTR_MINRNRTIMER),
+        };
+        GetAVFromPath(0, path, &modify_attr.PathMTU, NULL, 
+                      &modify_attr.DestAV);
+
+        frc = iba_modify_qp(qp, &modify_attr, NULL);
+        if (frc != FSUCCESS) {
+                CERROR("Can't set QP %s ready to receive: %d\n",
+                       libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
+                return -EIO;
+        }
+
+        rc = kibnal_post_receives(conn);
+        if (rc != 0) {
+                CERROR("Can't post receives for %s: %d\n",
+                       libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
+                return rc;
+        }
+
+        modify_attr = (IB_QP_ATTRIBUTES_MODIFY) {
+                .RequestState           = QPStateReadyToSend,
+                .FlowControl            = TRUE,
+                .InitiatorDepth         = init_depth,
+                .SendPSN                = psn,
+                .LocalAckTimeout        = path->PktLifeTime + 2, /* 2 or 1? */
+                .RetryCount             = IBNAL_RETRY,
+                .RnrRetryCount          = IBNAL_RNR_RETRY,
+                .Attrs                  = (IB_QP_ATTR_FLOWCONTROL | 
+                                           IB_QP_ATTR_INITIATORDEPTH | 
+                                           IB_QP_ATTR_SENDPSN | 
+                                           IB_QP_ATTR_LOCALACKTIMEOUT | 
+                                           IB_QP_ATTR_RETRYCOUNT | 
+                                           IB_QP_ATTR_RNRRETRYCOUNT),
+        };
+
+        frc = iba_modify_qp(qp, &modify_attr, NULL);
+        if (frc != FSUCCESS) {
+                CERROR("Can't set QP %s ready to send: %d\n",
+                       libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
+                return -EIO;
         }
 
-        read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-        return (NULL);
+        frc = iba_query_qp(conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs, NULL);
+        if (frc != FSUCCESS) {
+                CERROR ("Can't query QP %s attributes: %d\n",
+                        libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
+                return -EIO;
+        }
+        
+        return 0;
 }
 
 kib_conn_t *
-kibnal_create_conn (void)
+kibnal_create_conn (lnet_nid_t nid, int proto_version)
 {
         kib_conn_t  *conn;
         int          i;
-        __u64        vaddr = 0;
-        __u64        vaddr_base;
         int          page_offset;
         int          ipage;
         int          rc;
@@ -636,50 +1067,61 @@ kibnal_create_conn (void)
                 IB_QP_ATTRIBUTES_MODIFY    qp_attr;
         } params;
         
-        PORTAL_ALLOC (conn, sizeof (*conn));
+        LIBCFS_ALLOC (conn, sizeof (*conn));
         if (conn == NULL) {
-                CERROR ("Can't allocate connection\n");
+                CERROR ("Can't allocate connection for %s\n",
+                        libcfs_nid2str(nid));
                 return (NULL);
         }
 
         /* zero flags, NULL pointers etc... */
         memset (conn, 0, sizeof (*conn));
+        conn->ibc_state = IBNAL_CONN_INIT_NOTHING;
+        conn->ibc_version = proto_version;
 
+        INIT_LIST_HEAD (&conn->ibc_early_rxs);
+        INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred);
         INIT_LIST_HEAD (&conn->ibc_tx_queue);
+        INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd);
         INIT_LIST_HEAD (&conn->ibc_active_txs);
         spin_lock_init (&conn->ibc_lock);
         
         atomic_inc (&kibnal_data.kib_nconns);
         /* well not really, but I call destroy() on failure, which decrements */
 
-        PORTAL_ALLOC (conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
-        if (conn->ibc_rxs == NULL)
+        LIBCFS_ALLOC(conn->ibc_cvars, sizeof (*conn->ibc_cvars));
+        if (conn->ibc_cvars == NULL) {
+                CERROR ("Can't allocate connvars for %s\n", 
+                        libcfs_nid2str(nid));
                 goto failed;
-        memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t));
+        }
+        memset(conn->ibc_cvars, 0, sizeof (*conn->ibc_cvars));
 
-        rc = kibnal_alloc_pages(&conn->ibc_rx_pages, IBNAL_RX_MSG_PAGES, 1);
-        if (rc != 0)
+        LIBCFS_ALLOC(conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
+        if (conn->ibc_rxs == NULL) {
+                CERROR("Cannot allocate RX descriptors for %s\n",
+                       libcfs_nid2str(nid));
                 goto failed;
+        }
+        memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t));
 
-        vaddr_base = vaddr = conn->ibc_rx_pages->ibp_vaddr;
-
+        rc = kibnal_alloc_pages(&conn->ibc_rx_pages, IBNAL_RX_MSG_PAGES);
+        if (rc != 0) {
+                CERROR("Can't allocate RX buffers for %s\n",
+                       libcfs_nid2str(nid));
+                goto failed;
+        }
+        
         for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) {
                 struct page *page = conn->ibc_rx_pages->ibp_pages[ipage];
-                kib_rx_t   *rx = &conn->ibc_rxs[i];
+                kib_rx_t    *rx = &conn->ibc_rxs[i];
 
                 rx->rx_conn = conn;
                 rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + 
                              page_offset);
 
-                if (kibnal_whole_mem()) 
-                        rx->rx_vaddr = kibnal_page2phys(page) + 
-                                       page_offset + 
-                                       kibnal_data.kib_md.md_addr;
-                else
-                        rx->rx_vaddr = vaddr;
-                
-                vaddr += IBNAL_MSG_SIZE;
-                LASSERT (vaddr <= vaddr_base + IBNAL_RX_MSG_BYTES);
+                rx->rx_hca_msg = kibnal_data.kib_whole_mem.md_addr +
+                                 lnet_page2phys(page) + page_offset;
                 
                 page_offset += IBNAL_MSG_SIZE;
                 LASSERT (page_offset <= PAGE_SIZE);
@@ -693,9 +1135,9 @@ kibnal_create_conn (void)
 
         params.qp_create = (IB_QP_ATTRIBUTES_CREATE) {
                 .Type                    = QPTypeReliableConnected,
-                .SendQDepth              = IBNAL_TX_MAX_SG * 
-                                           IBNAL_MSG_QUEUE_SIZE,
-                .RecvQDepth              = IBNAL_MSG_QUEUE_SIZE,
+                .SendQDepth              = (1 + IBNAL_MAX_RDMA_FRAGS) *
+                                           (*kibnal_tunables.kib_concurrent_sends),
+                .RecvQDepth              = IBNAL_RX_MSGS,
                 .SendDSListDepth         = 1,
                 .RecvDSListDepth         = 1,
                 .SendCQHandle            = kibnal_data.kib_cq,
@@ -703,15 +1145,15 @@ kibnal_create_conn (void)
                 .PDHandle                = kibnal_data.kib_pd,
                 .SendSignaledCompletions = TRUE,
         };
-        frc = iibt_qp_create(kibnal_data.kib_hca, &params.qp_create, NULL,
-                             &conn->ibc_qp, &conn->ibc_qp_attrs);
-        if (rc != 0) {
-                CERROR ("Failed to create queue pair: %d\n", rc);
+        frc = iba_create_qp(kibnal_data.kib_hca, &params.qp_create, NULL,
+                            &conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs);
+        if (frc != 0) {
+                CERROR ("Can't create QP %s: %d\n", libcfs_nid2str(nid), frc);
                 goto failed;
         }
 
         /* Mark QP created */
-        conn->ibc_state = IBNAL_CONN_INIT_QP;
+        kibnal_set_conn_state(conn, IBNAL_CONN_INIT_QP);
 
         params.qp_attr = (IB_QP_ATTRIBUTES_MODIFY) {
                 .RequestState             = QPStateInit,
@@ -720,21 +1162,30 @@ kibnal_create_conn (void)
                                              IB_QP_ATTR_ACCESSCONTROL),
                 .PortGUID                 = kibnal_data.kib_port_guid,
                 .PkeyIndex                = 0,
-                .AccessControl = {
+                .AccessControl = { 
                         .s = {
                                 .RdmaWrite = 1,
                                 .RdmaRead  = 1,
                         },
                 },
         };
-        rc = iibt_qp_modify(conn->ibc_qp, &params.qp_attr, NULL);
-        if (rc != 0) {
-                CERROR ("Failed to modify queue pair: %d\n", rc);
+        frc = iba_modify_qp(conn->ibc_qp, &params.qp_attr, NULL);
+        if (frc != 0) {
+                CERROR ("Can't set QP %s state to INIT: %d\n",
+                        libcfs_nid2str(nid), frc);
+                goto failed;
+        }
+
+        frc = iba_query_qp(conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs, NULL);
+        if (frc != FSUCCESS) {
+                CERROR ("Can't query QP %s attributes: %d\n",
+                        libcfs_nid2str(nid), frc);
                 goto failed;
         }
 
         /* 1 ref for caller */
         atomic_set (&conn->ibc_refcount, 1);
+        CDEBUG(D_NET, "New conn %p\n", conn);
         return (conn);
         
  failed:
@@ -745,92 +1196,70 @@ kibnal_create_conn (void)
 void
 kibnal_destroy_conn (kib_conn_t *conn)
 {
-        int    rc;
         FSTATUS frc;
+
+        LASSERT (!in_interrupt());
         
-        CDEBUG (D_NET, "connection %p\n", conn);
+        CDEBUG (D_NET, "connection %s\n", 
+                (conn->ibc_peer) == NULL ? "<ANON>" :
+                libcfs_nid2str(conn->ibc_peer->ibp_nid));
 
         LASSERT (atomic_read (&conn->ibc_refcount) == 0);
+        LASSERT (list_empty(&conn->ibc_early_rxs));
         LASSERT (list_empty(&conn->ibc_tx_queue));
+        LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd));
+        LASSERT (list_empty(&conn->ibc_tx_queue_nocred));
         LASSERT (list_empty(&conn->ibc_active_txs));
         LASSERT (conn->ibc_nsends_posted == 0);
-        LASSERT (conn->ibc_connreq == NULL);
 
         switch (conn->ibc_state) {
-        case IBNAL_CONN_DISCONNECTED:
-                /* called after connection sequence initiated */
-                /* fall through */
-
-        case IBNAL_CONN_INIT_QP:
-                /* _destroy includes an implicit Reset of the QP which 
-                 * discards posted work */
-                rc = iibt_qp_destroy(conn->ibc_qp);
-                if (rc != 0)
-                        CERROR("Can't destroy QP: %d\n", rc);
-                /* fall through */
-                
         case IBNAL_CONN_INIT_NOTHING:
+        case IBNAL_CONN_INIT_QP:
+        case IBNAL_CONN_DISCONNECTED:
                 break;
 
         default:
-                LASSERT (0);
+                /* conn must either have never engaged with the CM, or have
+                 * completely disengaged from it */
+                CERROR("Bad conn %s state %d\n",
+                       (conn->ibc_peer) == NULL ? "<anon>" :
+                       libcfs_nid2str(conn->ibc_peer->ibp_nid), conn->ibc_state);
+                LBUG();
         }
 
         if (conn->ibc_cep != NULL) {
-                frc = iibt_cm_destroy_cep(conn->ibc_cep);
-                if (frc != 0)
-                        CERROR("Can't destroy CEP %p: %d\n", conn->ibc_cep, 
-                               frc);
+                frc = iba_cm_destroy_cep(conn->ibc_cep);
+                if (frc != FSUCCESS)
+                        CERROR("Error destroying CEP %p: %d\n",
+                               conn->ibc_cep, frc);
+        }
+
+        if (conn->ibc_qp != NULL) {
+                frc = iba_destroy_qp(conn->ibc_qp);
+                if (frc != FSUCCESS)
+                        CERROR("Error destroying QP %p: %d\n",
+                               conn->ibc_qp, frc);
         }
 
         if (conn->ibc_rx_pages != NULL) 
                 kibnal_free_pages(conn->ibc_rx_pages);
         
         if (conn->ibc_rxs != NULL)
-                PORTAL_FREE(conn->ibc_rxs, 
+                LIBCFS_FREE(conn->ibc_rxs, 
                             IBNAL_RX_MSGS * sizeof(kib_rx_t));
 
+        if (conn->ibc_cvars != NULL)
+                LIBCFS_FREE(conn->ibc_cvars, sizeof(*conn->ibc_cvars));
+
         if (conn->ibc_peer != NULL)
-                kib_peer_decref(conn->ibc_peer);
+                kibnal_peer_decref(conn->ibc_peer);
 
-        PORTAL_FREE(conn, sizeof (*conn));
+        LIBCFS_FREE(conn, sizeof (*conn));
 
         atomic_dec(&kibnal_data.kib_nconns);
-        
-        if (atomic_read (&kibnal_data.kib_nconns) == 0 &&
-            kibnal_data.kib_shutdown) {
-                /* I just nuked the last connection on shutdown; wake up
-                 * everyone so they can exit. */
-                wake_up_all(&kibnal_data.kib_sched_waitq);
-                wake_up_all(&kibnal_data.kib_connd_waitq);
-        }
-}
-
-void
-kibnal_put_conn (kib_conn_t *conn)
-{
-        unsigned long flags;
-
-        CDEBUG (D_NET, "putting conn[%p] state %d -> "LPX64" (%d)\n",
-                conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                atomic_read (&conn->ibc_refcount));
-
-        LASSERT (atomic_read (&conn->ibc_refcount) > 0);
-        if (!atomic_dec_and_test (&conn->ibc_refcount))
-                return;
-
-        /* must disconnect before dropping the final ref */
-        LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECTED);
-
-        spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
-
-        list_add (&conn->ibc_list, &kibnal_data.kib_connd_conns);
-        wake_up (&kibnal_data.kib_connd_waitq);
-
-        spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
 }
 
-static int
+int
 kibnal_close_peer_conns_locked (kib_peer_t *peer, int why)
 {
         kib_conn_t         *conn;
@@ -862,8 +1291,9 @@ kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
                 if (conn->ibc_incarnation == incarnation)
                         continue;
 
-                CDEBUG(D_NET, "Closing stale conn nid:"LPX64" incarnation:"LPX64"("LPX64")\n",
-                       peer->ibp_nid, conn->ibc_incarnation, incarnation);
+                CDEBUG(D_NET, "Closing stale conn nid:%s incarnation:"LPX64"("LPX64")\n",
+                       libcfs_nid2str(peer->ibp_nid), 
+                       conn->ibc_incarnation, incarnation);
                 
                 count++;
                 kibnal_close_conn_locked (conn, -ESTALE);
@@ -872,8 +1302,8 @@ kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
         return (count);
 }
 
-static int
-kibnal_close_matching_conns (ptl_nid_t nid)
+int
+kibnal_close_matching_conns (lnet_nid_t nid)
 {
         unsigned long       flags;
         kib_peer_t         *peer;
@@ -886,7 +1316,7 @@ kibnal_close_matching_conns (ptl_nid_t nid)
 
         write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
 
-        if (nid != PTL_NID_ANY)
+        if (nid != LNET_NID_ANY)
                 lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
         else {
                 lo = 0;
@@ -898,10 +1328,10 @@ kibnal_close_matching_conns (ptl_nid_t nid)
 
                         peer = list_entry (ptmp, kib_peer_t, ibp_list);
                         LASSERT (peer->ibp_persistence != 0 ||
-                                 peer->ibp_connecting != 0 ||
+                                 kibnal_peer_connecting(peer) ||
                                  !list_empty (&peer->ibp_conns));
 
-                        if (!(nid == PTL_NID_ANY || nid == peer->ibp_nid))
+                        if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
                                 continue;
 
                         count += kibnal_close_peer_conns_locked (peer, 0);
@@ -911,69 +1341,65 @@ kibnal_close_matching_conns (ptl_nid_t nid)
         write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
 
         /* wildcards always succeed */
-        if (nid == PTL_NID_ANY)
+        if (nid == LNET_NID_ANY)
                 return (0);
         
         return (count == 0 ? -ENOENT : 0);
 }
 
-static int
-kibnal_cmd(struct portals_cfg *pcfg, void * private)
+int
+kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
 {
-        int rc = -EINVAL;
+        struct libcfs_ioctl_data *data = arg;
+        int                       rc = -EINVAL;
         ENTRY;
 
-        LASSERT (pcfg != NULL);
+        LASSERT (ni == kibnal_data.kib_ni);
 
-        switch(pcfg->pcfg_command) {
-        case NAL_CMD_GET_PEER: {
-                ptl_nid_t   nid = 0;
-                int         share_count = 0;
+        switch(cmd) {
+        case IOC_LIBCFS_GET_PEER: {
+                lnet_nid_t   nid = 0;
+                int          share_count = 0;
 
-                rc = kibnal_get_peer_info(pcfg->pcfg_count,
+                rc = kibnal_get_peer_info(data->ioc_count,
                                           &nid, &share_count);
-                pcfg->pcfg_nid   = nid;
-                pcfg->pcfg_size  = 0;
-                pcfg->pcfg_id    = 0;
-                pcfg->pcfg_misc  = 0;
-                pcfg->pcfg_count = 0;
-                pcfg->pcfg_wait  = share_count;
+                data->ioc_nid   = nid;
+                data->ioc_count = share_count;
                 break;
         }
-        case NAL_CMD_ADD_PEER: {
-                rc = kibnal_add_persistent_peer (pcfg->pcfg_nid);
+        case IOC_LIBCFS_ADD_PEER: {
+                rc = kibnal_add_persistent_peer (data->ioc_nid);
                 break;
         }
-        case NAL_CMD_DEL_PEER: {
-                rc = kibnal_del_peer (pcfg->pcfg_nid, 
-                                       /* flags == single_share */
-                                       pcfg->pcfg_flags != 0);
+        case IOC_LIBCFS_DEL_PEER: {
+                rc = kibnal_del_peer (data->ioc_nid);
                 break;
         }
-        case NAL_CMD_GET_CONN: {
-                kib_conn_t *conn = kibnal_get_conn_by_idx (pcfg->pcfg_count);
+        case IOC_LIBCFS_GET_CONN: {
+                kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count);
 
                 if (conn == NULL)
                         rc = -ENOENT;
                 else {
                         rc = 0;
-                        pcfg->pcfg_nid   = conn->ibc_peer->ibp_nid;
-                        pcfg->pcfg_id    = 0;
-                        pcfg->pcfg_misc  = 0;
-                        pcfg->pcfg_flags = 0;
-                        kibnal_put_conn (conn);
+                        data->ioc_nid = conn->ibc_peer->ibp_nid;
+                        kibnal_conn_decref(conn);
                 }
                 break;
         }
-        case NAL_CMD_CLOSE_CONNECTION: {
-                rc = kibnal_close_matching_conns (pcfg->pcfg_nid);
+        case IOC_LIBCFS_CLOSE_CONNECTION: {
+                rc = kibnal_close_matching_conns (data->ioc_nid);
                 break;
         }
-        case NAL_CMD_REGISTER_MYNID: {
-                if (pcfg->pcfg_nid == PTL_NID_ANY)
+        case IOC_LIBCFS_REGISTER_MYNID: {
+                if (ni->ni_nid == data->ioc_nid) {
+                        rc = 0;
+                } else {
+                        CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
+                               libcfs_nid2str(data->ioc_nid),
+                               libcfs_nid2str(ni->ni_nid));
                         rc = -EINVAL;
-                else
-                        rc = kibnal_set_mynid (pcfg->pcfg_nid);
+                }
                 break;
         }
         }
@@ -985,38 +1411,22 @@ void
 kibnal_free_pages (kib_pages_t *p)
 {
         int     npages = p->ibp_npages;
-        int     rc;
         int     i;
         
-        if (p->ibp_mapped) {
-                rc = iibt_deregister_memory(p->ibp_handle);
-                if (rc != 0)
-                        CERROR ("Deregister error: %d\n", rc);
-        }
-        
         for (i = 0; i < npages; i++)
                 if (p->ibp_pages[i] != NULL)
                         __free_page(p->ibp_pages[i]);
         
-        PORTAL_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
+        LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
 }
 
 int
-kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write)
+kibnal_alloc_pages (kib_pages_t **pp, int npages)
 {
-        kib_pages_t                *p;
-        __u64                      *phys_pages;
-        int                         i;
-        FSTATUS                     frc;
-        IB_ACCESS_CONTROL           access;
-
-        memset(&access, 0, sizeof(access));
-        access.s.MWBindable = 1;
-        access.s.LocalWrite = 1;
-        access.s.RdmaRead = 1;
-        access.s.RdmaWrite = 1;
+        kib_pages_t   *p;
+        int            i;
 
-        PORTAL_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
+        LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
         if (p == NULL) {
                 CERROR ("Can't allocate buffer %d\n", npages);
                 return (-ENOMEM);
@@ -1034,107 +1444,131 @@ kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write)
                 }
         }
 
-        if (kibnal_whole_mem())
-                goto out;
-
-        PORTAL_ALLOC(phys_pages, npages * sizeof(*phys_pages));
-        if (phys_pages == NULL) {
-                CERROR ("Can't allocate physarray for %d pages\n", npages);
-                /* XXX free ibp_pages? */
-                kibnal_free_pages(p);
-                return (-ENOMEM);
-        }
+        *pp = p;
+        return (0);
+}
 
-        /* if we were using the _contig_ registration variant we would have
-         * an array of PhysAddr/Length pairs, but the discontiguous variant
-         * just takes the PhysAddr */
-        for (i = 0; i < npages; i++)
-                phys_pages[i] = kibnal_page2phys(p->ibp_pages[i]);
-
-        frc = iibt_register_physical_memory(kibnal_data.kib_hca,
-                                            0,          /* requested vaddr */
-                                            phys_pages, npages,
-                                            0,          /* offset */
-                                            kibnal_data.kib_pd,
-                                            access,
-                                            &p->ibp_handle, &p->ibp_vaddr,
-                                            &p->ibp_lkey, &p->ibp_rkey);
+int
+kibnal_alloc_tx_descs (void) 
+{
+        int    i;
         
-        PORTAL_FREE(phys_pages, npages * sizeof(*phys_pages));
+        LIBCFS_ALLOC (kibnal_data.kib_tx_descs,
+                      IBNAL_TX_MSGS() * sizeof(kib_tx_t));
+        if (kibnal_data.kib_tx_descs == NULL)
+                return -ENOMEM;
         
-        if (frc != FSUCCESS) {
-                CERROR ("Error %d mapping %d pages\n", frc, npages);
-                kibnal_free_pages(p);
-                return (-ENOMEM);
+        memset(kibnal_data.kib_tx_descs, 0,
+               IBNAL_TX_MSGS() * sizeof(kib_tx_t));
+
+        for (i = 0; i < IBNAL_TX_MSGS(); i++) {
+                kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
+
+#if IBNAL_USE_FMR
+                LIBCFS_ALLOC(tx->tx_pages, LNET_MAX_IOV *
+                             sizeof(*tx->tx_pages));
+                if (tx->tx_pages == NULL)
+                        return -ENOMEM;
+#else
+                LIBCFS_ALLOC(tx->tx_wrq, 
+                             (1 + IBNAL_MAX_RDMA_FRAGS) * 
+                             sizeof(*tx->tx_wrq));
+                if (tx->tx_wrq == NULL)
+                        return -ENOMEM;
+                
+                LIBCFS_ALLOC(tx->tx_gl, 
+                             (1 + IBNAL_MAX_RDMA_FRAGS) * 
+                             sizeof(*tx->tx_gl));
+                if (tx->tx_gl == NULL)
+                        return -ENOMEM;
+                
+                LIBCFS_ALLOC(tx->tx_rd, 
+                             offsetof(kib_rdma_desc_t, 
+                                      rd_frags[IBNAL_MAX_RDMA_FRAGS]));
+                if (tx->tx_rd == NULL)
+                        return -ENOMEM;
+#endif
         }
 
-        CDEBUG(D_NET, "registered %d pages; handle: %p vaddr "LPX64" "
-                      "lkey %x rkey %x\n", npages, p->ibp_handle,
-                      p->ibp_vaddr, p->ibp_lkey, p->ibp_rkey);
-        
-        p->ibp_mapped = 1;
-out:
-        *pp = p;
-        return (0);
+        return 0;
+}
+
+void
+kibnal_free_tx_descs (void) 
+{
+        int    i;
+
+        if (kibnal_data.kib_tx_descs == NULL)
+                return;
+
+        for (i = 0; i < IBNAL_TX_MSGS(); i++) {
+                kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
+
+#if IBNAL_USE_FMR
+                if (tx->tx_pages != NULL)
+                        LIBCFS_FREE(tx->tx_pages, LNET_MAX_IOV *
+                                    sizeof(*tx->tx_pages));
+#else
+                if (tx->tx_wrq != NULL)
+                        LIBCFS_FREE(tx->tx_wrq, 
+                                    (1 + IBNAL_MAX_RDMA_FRAGS) * 
+                                    sizeof(*tx->tx_wrq));
+
+                if (tx->tx_gl != NULL)
+                        LIBCFS_FREE(tx->tx_gl, 
+                                    (1 + IBNAL_MAX_RDMA_FRAGS) * 
+                                    sizeof(*tx->tx_gl));
+
+                if (tx->tx_rd != NULL)
+                        LIBCFS_FREE(tx->tx_rd, 
+                                    offsetof(kib_rdma_desc_t, 
+                                             rd_frags[IBNAL_MAX_RDMA_FRAGS]));
+#endif
+        }
+
+        LIBCFS_FREE(kibnal_data.kib_tx_descs,
+                    IBNAL_TX_MSGS() * sizeof(kib_tx_t));
 }
 
-static int
+int
 kibnal_setup_tx_descs (void)
 {
         int           ipage = 0;
         int           page_offset = 0;
-        __u64         vaddr;
-        __u64         vaddr_base;
         struct page  *page;
         kib_tx_t     *tx;
         int           i;
         int           rc;
 
         /* pre-mapped messages are not bigger than 1 page */
-        LASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
+        CLASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
 
         /* No fancy arithmetic when we do the buffer calculations */
-        LASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
+        CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
 
-        rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, IBNAL_TX_MSG_PAGES, 
-                                0);
+        rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages,
+                                IBNAL_TX_MSG_PAGES());
         if (rc != 0)
                 return (rc);
 
-        /* ignored for the whole_mem case */
-        vaddr = vaddr_base = kibnal_data.kib_tx_pages->ibp_vaddr;
-
-        for (i = 0; i < IBNAL_TX_MSGS; i++) {
+        for (i = 0; i < IBNAL_TX_MSGS(); i++) {
                 page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
                 tx = &kibnal_data.kib_tx_descs[i];
 
-                memset (tx, 0, sizeof(*tx));    /* zero flags etc */
-                
+#if IBNAL_USE_FMR
+                /* Allocate an FMR for this TX so it can map src/sink buffers
+                 * for large transfers */
+#endif
                 tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + 
                                             page_offset);
 
-                if (kibnal_whole_mem()) 
-                        tx->tx_vaddr = kibnal_page2phys(page) + 
-                                       page_offset + 
-                                       kibnal_data.kib_md.md_addr;
-                else
-                        tx->tx_vaddr = vaddr;
-
-                tx->tx_isnblk = (i >= IBNAL_NTX);
-                tx->tx_mapped = KIB_TX_UNMAPPED;
+                tx->tx_hca_msg = kibnal_data.kib_whole_mem.md_addr +
+                                 lnet_page2phys(page) + page_offset;
 
                 CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n", 
-                       i, tx, tx->tx_msg, tx->tx_vaddr);
+                       i, tx, tx->tx_msg, tx->tx_hca_msg);
 
-                if (tx->tx_isnblk)
-                        list_add (&tx->tx_list, 
-                                  &kibnal_data.kib_idle_nblk_txs);
-                else
-                        list_add (&tx->tx_list, 
-                                  &kibnal_data.kib_idle_txs);
-
-                vaddr += IBNAL_MSG_SIZE;
-                LASSERT (vaddr <= vaddr_base + IBNAL_TX_MSG_BYTES);
+                list_add (&tx->tx_list, &kibnal_data.kib_idle_txs);
 
                 page_offset += IBNAL_MSG_SIZE;
                 LASSERT (page_offset <= PAGE_SIZE);
@@ -1142,29 +1576,89 @@ kibnal_setup_tx_descs (void)
                 if (page_offset == PAGE_SIZE) {
                         page_offset = 0;
                         ipage++;
-                        LASSERT (ipage <= IBNAL_TX_MSG_PAGES);
+                        LASSERT (ipage <= IBNAL_TX_MSG_PAGES());
                 }
         }
         
         return (0);
 }
 
-static void
-kibnal_api_shutdown (nal_t *nal)
+int
+kibnal_register_all_memory(void)
 {
-        int   i;
-        int   rc;
+        /* CAVEAT EMPTOR: this assumes all physical memory is in 1 contiguous
+         * chunk starting at 0 */
+        struct sysinfo     si;
+        __u64              total;
+        __u64              total2;
+        __u64              roundup = (128<<20);     /* round up in big chunks */
+        IB_MR_PHYS_BUFFER  phys;
+        IB_ACCESS_CONTROL  access;
+        FSTATUS            frc;
 
-        if (nal->nal_refct != 0) {
-                /* This module got the first ref */
-                PORTAL_MODULE_UNUSE;
-                return;
+        memset(&access, 0, sizeof(access));
+        access.s.MWBindable = 1;
+        access.s.LocalWrite = 1;
+        access.s.RdmaRead = 1;
+        access.s.RdmaWrite = 1;
+
+        /* XXX we don't bother with first-gen cards */
+        if (kibnal_data.kib_hca_attrs.VendorId == 0xd0b7 && 
+            kibnal_data.kib_hca_attrs.DeviceId == 0x3101) {
+                CERROR("Can't register all memory on first generation HCAs\n");
+                return -EINVAL;
         }
 
-        CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
-               atomic_read (&portal_kmemory));
+        si_meminfo(&si);
+
+        CDEBUG(D_NET, "si_meminfo: %lu/%u, num_physpages %lu/%lu\n",
+               si.totalram, si.mem_unit, num_physpages, PAGE_SIZE);
+
+        total = ((__u64)si.totalram) * si.mem_unit;
+        total2 = num_physpages * PAGE_SIZE;
+        if (total < total2)
+                total = total2;
+
+        if (total == 0) {
+                CERROR("Can't determine memory size\n");
+                return -ENOMEM;
+        }
+                 
+        roundup = (128<<20);
+        total = (total + (roundup - 1)) & ~(roundup - 1);
+
+        phys.PhysAddr = 0;
+        phys.Length = total;
 
-        LASSERT(nal == &kibnal_api);
+        frc = iba_register_contig_pmr(kibnal_data.kib_hca, 0, &phys, 1, 0,
+                                      kibnal_data.kib_pd, access,
+                                      &kibnal_data.kib_whole_mem.md_handle,
+                                      &kibnal_data.kib_whole_mem.md_addr,
+                                      &kibnal_data.kib_whole_mem.md_lkey,
+                                      &kibnal_data.kib_whole_mem.md_rkey);
+
+        if (frc != FSUCCESS) {
+                CERROR("registering physical memory failed: %d\n", frc);
+                return -EIO;
+        }
+
+        CDEBUG(D_WARNING, "registered phys mem from 0("LPX64") for "LPU64"("LPU64") -> "LPX64"\n",
+               phys.PhysAddr, total, phys.Length, kibnal_data.kib_whole_mem.md_addr);
+
+        return 0;
+}
+
+void
+kibnal_shutdown (lnet_ni_t *ni)
+{
+        int   i;
+        int   rc;
+
+        LASSERT (ni == kibnal_data.kib_ni);
+        LASSERT (ni->ni_data == &kibnal_data);
+       
+        CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
+               atomic_read (&libcfs_kmemory));
 
         switch (kibnal_data.kib_init) {
         default:
@@ -1172,20 +1666,16 @@ kibnal_api_shutdown (nal_t *nal)
                 LBUG();
 
         case IBNAL_INIT_ALL:
-                /* stop calls to nal_cmd */
-                libcfs_nal_cmd_unregister(IIBNAL);
-                /* No new peers */
+                /* stop accepting connections, prevent new peers and start to
+                 * tear down all existing ones... */
+                kibnal_stop_listener(1);
 
-                /* resetting my NID to unadvertises me, removes my
-                 * listener and nukes all current peers */
-                kibnal_set_mynid (PTL_NID_ANY);
-
-                /* Wait for all peer state to clean up (crazy) */
+                /* Wait for all peer state to clean up */
                 i = 2;
                 while (atomic_read (&kibnal_data.kib_npeers) != 0) {
                         i++;
                         CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-                               "waiting for %d peers to disconnect (can take a few seconds)\n",
+                               "waiting for %d peers to disconnect\n",
                                atomic_read (&kibnal_data.kib_npeers));
                         set_current_state (TASK_UNINTERRUPTIBLE);
                         schedule_timeout (HZ);
@@ -1193,7 +1683,7 @@ kibnal_api_shutdown (nal_t *nal)
                 /* fall through */
 
         case IBNAL_INIT_CQ:
-                rc = iibt_cq_destroy(kibnal_data.kib_cq);
+                rc = iba_destroy_cq(kibnal_data.kib_cq);
                 if (rc != 0)
                         CERROR ("Destroy CQ error: %d\n", rc);
                 /* fall through */
@@ -1202,63 +1692,43 @@ kibnal_api_shutdown (nal_t *nal)
                 kibnal_free_pages (kibnal_data.kib_tx_pages);
                 /* fall through */
 
-        case IBNAL_INIT_MR:
-                if (kibnal_data.kib_md.md_handle != NULL) {
-                        rc = iibt_deregister_memory(kibnal_data.kib_md.md_handle);
-                        if (rc != FSUCCESS)
-                                CERROR ("Deregister memory: %d\n", rc);
-                }
+        case IBNAL_INIT_MD:
+                rc = iba_deregister_mr(kibnal_data.kib_whole_mem.md_handle);
+                if (rc != FSUCCESS)
+                        CERROR ("Deregister memory: %d\n", rc);
                 /* fall through */
 
-#if IBNAL_FMR
-        case IBNAL_INIT_FMR:
-                rc = ib_fmr_pool_destroy (kibnal_data.kib_fmr_pool);
-                if (rc != 0)
-                        CERROR ("Destroy FMR pool error: %d\n", rc);
-                /* fall through */
-#endif
         case IBNAL_INIT_PD:
-                rc = iibt_pd_free(kibnal_data.kib_pd);
+                rc = iba_free_pd(kibnal_data.kib_pd);
                 if (rc != 0)
                         CERROR ("Destroy PD error: %d\n", rc);
                 /* fall through */
 
         case IBNAL_INIT_SD:
-                rc = iibt_sd_deregister(kibnal_data.kib_sd);
+                rc = iba_sd_deregister(kibnal_data.kib_sd);
                 if (rc != 0)
                         CERROR ("Deregister SD error: %d\n", rc);
                 /* fall through */
 
-        case IBNAL_INIT_PORT:
-                /* XXX ??? */
-                /* fall through */
-
         case IBNAL_INIT_PORTATTRS:
-                PORTAL_FREE(kibnal_data.kib_hca_attrs.PortAttributesList,
+                LIBCFS_FREE(kibnal_data.kib_hca_attrs.PortAttributesList,
                             kibnal_data.kib_hca_attrs.PortAttributesListSize);
                 /* fall through */
 
         case IBNAL_INIT_HCA:
-                rc = iibt_close_hca(kibnal_data.kib_hca);
+                rc = iba_close_ca(kibnal_data.kib_hca);
                 if (rc != 0)
                         CERROR ("Close HCA  error: %d\n", rc);
                 /* fall through */
 
-        case IBNAL_INIT_LIB:
-                lib_fini(&kibnal_lib);
-                /* fall through */
-
         case IBNAL_INIT_DATA:
-                /* Module refcount only gets to zero when all peers
-                 * have been closed so all lists must be empty */
                 LASSERT (atomic_read (&kibnal_data.kib_npeers) == 0);
                 LASSERT (kibnal_data.kib_peers != NULL);
                 for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
                         LASSERT (list_empty (&kibnal_data.kib_peers[i]));
                 }
                 LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0);
-                LASSERT (list_empty (&kibnal_data.kib_sched_rxq));
-                LASSERT (list_empty (&kibnal_data.kib_sched_txq));
+                LASSERT (list_empty (&kibnal_data.kib_connd_zombies));
                 LASSERT (list_empty (&kibnal_data.kib_connd_conns));
                 LASSERT (list_empty (&kibnal_data.kib_connd_peers));
 
@@ -1282,83 +1752,143 @@ kibnal_api_shutdown (nal_t *nal)
                 break;
         }
 
-        if (kibnal_data.kib_tx_descs != NULL)
-                PORTAL_FREE (kibnal_data.kib_tx_descs,
-                             IBNAL_TX_MSGS * sizeof(kib_tx_t));
+        kibnal_free_tx_descs();
 
         if (kibnal_data.kib_peers != NULL)
-                PORTAL_FREE (kibnal_data.kib_peers,
+                LIBCFS_FREE (kibnal_data.kib_peers,
                              sizeof (struct list_head) * 
                              kibnal_data.kib_peer_hash_size);
 
         CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
-               atomic_read (&portal_kmemory));
-        printk(KERN_INFO "Lustre: Infinicon IB NAL unloaded (final mem %d)\n",
-               atomic_read(&portal_kmemory));
+               atomic_read (&libcfs_kmemory));
 
         kibnal_data.kib_init = IBNAL_INIT_NOTHING;
+        PORTAL_MODULE_UNUSE;
 }
 
-#define roundup_power(val, power) \
-        ( (val + (__u64)(power - 1)) & ~((__u64)(power - 1)) )
-
-/* this isn't very portable or sturdy in the face of funny mem/bus configs */
-static __u64 max_phys_mem(IB_CA_ATTRIBUTES *ca_attr)
+int 
+kibnal_get_ipif_name(char *ifname, int ifname_size, int idx)
 {
-        struct sysinfo si;
-        __u64 ret;
+        char  *basename = *kibnal_tunables.kib_ipif_basename;
+        int    n = strlen(basename);
+        int    baseidx;
+        int    m;
 
-        /* XXX we don't bother with first-gen cards */
-        if (ca_attr->VendorId == 0xd0b7 && ca_attr->DeviceId == 0x3101)
-                return 0ULL;
+        if (n == 0) {                           /* empty string */
+                CERROR("Empty IP interface basename specified\n");
+                return -EINVAL;
+        }
 
-        si_meminfo(&si);
-        ret = (__u64)max(si.totalram, max_mapnr) * si.mem_unit;
-        return roundup_power(ret, 128 * 1024 * 1024);
-} 
-#undef roundup_power
-
-static int
-kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
-                     ptl_ni_limits_t *requested_limits,
-                     ptl_ni_limits_t *actual_limits)
+        for (m = n; m > 0; m--)                 /* find max numeric postfix */
+                if (sscanf(basename + m - 1, "%d", &baseidx) != 1)
+                        break;
+
+        if (m == 0)                             /* just a number */
+                m = n;
+
+        if (m == n)                             /* no postfix */
+                baseidx = 1;                    /* default to 1 */
+
+        if (m >= ifname_size)
+                m = ifname_size - 1;
+
+        memcpy(ifname, basename, m);            /* copy prefix name */
+        
+        snprintf(ifname + m, ifname_size - m, "%d", baseidx + idx);
+        
+        if (strlen(ifname) == ifname_size - 1) {
+                CERROR("IP interface basename %s too long\n", basename);
+                return -EINVAL;
+        }
+        
+        return 0;
+}
+
+int
+kibnal_startup (lnet_ni_t *ni)
 {
-        ptl_process_id_t    process_id;
-        int                 pkmem = atomic_read(&portal_kmemory);
+        char                ipif_name[32];
+        __u32               ip;
+        __u32               netmask;
+        int                 up;
+        int                 nob;
+        struct timeval      tv;
         IB_PORT_ATTRIBUTES *pattr;
         FSTATUS             frc;
         int                 rc;
-        int                 n;
+        __u32               n;
         int                 i;
 
-        LASSERT (nal == &kibnal_api);
+        LASSERT (ni->ni_lnd == &the_kiblnd);
 
-        if (nal->nal_refct != 0) {
-                if (actual_limits != NULL)
-                        *actual_limits = kibnal_lib.libnal_ni.ni_actual_limits;
-                /* This module got the first ref */
-                PORTAL_MODULE_USE;
-                return (PTL_OK);
+        /* Only 1 instance supported */
+        if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) {
+                CERROR ("Only 1 instance supported\n");
+                return -EPERM;
         }
 
-        LASSERT (kibnal_data.kib_init == IBNAL_INIT_NOTHING);
+        if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) {
+                CERROR ("Can't set credits(%d) > ntx(%d)\n",
+                        *kibnal_tunables.kib_credits,
+                        *kibnal_tunables.kib_ntx);
+                return -EINVAL;
+        }
 
-        frc = IbtGetInterfaceByVersion(IBT_INTERFACE_VERSION_2, 
-                                       &kibnal_data.kib_interfaces);
-        if (frc != FSUCCESS) {
-                CERROR("IbtGetInterfaceByVersion(IBT_INTERFACE_VERSION_2) = %d\n",
-                        frc);
-                return -ENOSYS;
+        ni->ni_maxtxcredits = *kibnal_tunables.kib_credits;
+        ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits;
+
+        CLASSERT (LNET_MAX_INTERFACES > 1);
+
+        if (ni->ni_interfaces[0] == NULL) {
+                kibnal_data.kib_hca_idx = 0;
+        } else {
+                /* Use the HCA specified in 'networks=' */
+                if (ni->ni_interfaces[1] != NULL) {
+                        CERROR("Multiple interfaces not supported\n");
+                        return -EPERM;
+                }
+                
+                /* Parse <number> into kib_hca_idx */
+                nob = strlen(ni->ni_interfaces[0]);
+                if (sscanf(ni->ni_interfaces[0], "%d%n", 
+                           &kibnal_data.kib_hca_idx, &nob) < 1 ||
+                    nob != strlen(ni->ni_interfaces[0])) {
+                        CERROR("Can't parse interface '%s'\n",
+                               ni->ni_interfaces[0]);
+                        return -EINVAL;
+                }
+        }
+
+        rc = kibnal_get_ipif_name(ipif_name, sizeof(ipif_name),
+                                  kibnal_data.kib_hca_idx);
+        if (rc != 0)
+                return rc;
+        
+        rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask);
+        if (rc != 0) {
+                CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc);
+                return -ENETDOWN;
+        }
+        
+        if (!up) {
+                CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name);
+                return -ENETDOWN;
         }
+        
+        ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip);
+
+        ni->ni_data = &kibnal_data;
+        kibnal_data.kib_ni = ni;
+
+        do_gettimeofday(&tv);
+        kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
 
-        init_MUTEX (&kibnal_data.kib_nid_mutex);
-        init_MUTEX_LOCKED (&kibnal_data.kib_nid_signal);
-        kibnal_data.kib_nid = PTL_NID_ANY;
+        PORTAL_MODULE_USE;
 
         rwlock_init(&kibnal_data.kib_global_lock);
 
         kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE;
-        PORTAL_ALLOC (kibnal_data.kib_peers,
+        LIBCFS_ALLOC (kibnal_data.kib_peers,
                       sizeof (struct list_head) * kibnal_data.kib_peer_hash_size);
         if (kibnal_data.kib_peers == NULL) {
                 goto failed;
@@ -1369,22 +1899,18 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         spin_lock_init (&kibnal_data.kib_connd_lock);
         INIT_LIST_HEAD (&kibnal_data.kib_connd_peers);
         INIT_LIST_HEAD (&kibnal_data.kib_connd_conns);
+        INIT_LIST_HEAD (&kibnal_data.kib_connd_zombies);
         init_waitqueue_head (&kibnal_data.kib_connd_waitq);
 
         spin_lock_init (&kibnal_data.kib_sched_lock);
-        INIT_LIST_HEAD (&kibnal_data.kib_sched_txq);
-        INIT_LIST_HEAD (&kibnal_data.kib_sched_rxq);
         init_waitqueue_head (&kibnal_data.kib_sched_waitq);
 
         spin_lock_init (&kibnal_data.kib_tx_lock);
         INIT_LIST_HEAD (&kibnal_data.kib_idle_txs);
-        INIT_LIST_HEAD (&kibnal_data.kib_idle_nblk_txs);
-        init_waitqueue_head(&kibnal_data.kib_idle_tx_waitq);
 
-        PORTAL_ALLOC (kibnal_data.kib_tx_descs,
-                      IBNAL_TX_MSGS * sizeof(kib_tx_t));
-        if (kibnal_data.kib_tx_descs == NULL) {
-                CERROR ("Can't allocate tx descs\n");
+        rc = kibnal_alloc_tx_descs();
+        if (rc != 0) {
+                CERROR("Can't allocate tx descs\n");
                 goto failed;
         }
 
@@ -1392,24 +1918,15 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         kibnal_data.kib_init = IBNAL_INIT_DATA;
         /*****************************************************/
 
-        process_id.pid = requested_pid;
-        process_id.nid = kibnal_data.kib_nid;
-        
-        rc = lib_init(&kibnal_lib, nal, process_id,
-                      requested_limits, actual_limits);
-        if (rc != PTL_OK) {
-                CERROR("lib_init failed: error %d\n", rc);
-                goto failed;
-        }
-
-        /* lib interface initialised */
-        kibnal_data.kib_init = IBNAL_INIT_LIB;
-        /*****************************************************/
+        kibnal_data.kib_sdretry.RetryCount = *kibnal_tunables.kib_sd_retries;
+        kibnal_data.kib_sdretry.Timeout = (*kibnal_tunables.kib_timeout * 1000)/
+                                          *kibnal_tunables.kib_sd_retries;
 
         for (i = 0; i < IBNAL_N_SCHED; i++) {
-                rc = kibnal_thread_start (kibnal_scheduler, (void *)i);
+                rc = kibnal_thread_start (kibnal_scheduler,
+                                          (void *)(unsigned long)i);
                 if (rc != 0) {
-                        CERROR("Can't spawn iibnal scheduler[%d]: %d\n",
+                        CERROR("Can't spawn iib scheduler[%d]: %d\n",
                                i, rc);
                         goto failed;
                 }
@@ -1417,30 +1934,38 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
 
         rc = kibnal_thread_start (kibnal_connd, NULL);
         if (rc != 0) {
-                CERROR ("Can't spawn iibnal connd: %d\n", rc);
+                CERROR ("Can't spawn iib connd: %d\n", rc);
                 goto failed;
         }
 
         n = sizeof(kibnal_data.kib_hca_guids) /
             sizeof(kibnal_data.kib_hca_guids[0]);
-        frc = iibt_get_hca_guids(&n, kibnal_data.kib_hca_guids);
+        frc = iba_get_caguids(&n, kibnal_data.kib_hca_guids);
         if (frc != FSUCCESS) {
-                CERROR ("Can't get channel adapter guids: %d\n", frc);
+                CERROR ("Can't get HCA guids: %d\n", frc);
                 goto failed;
         }
+
         if (n == 0) {
-                CERROR ("No channel adapters found\n");
+                CERROR ("No HCAs found\n");
                 goto failed;
         }
 
-        /* Infinicon has per-HCA rather than per CQ completion handlers */
-        frc = iibt_open_hca(kibnal_data.kib_hca_guids[0],
-                            kibnal_ca_callback,
-                            kibnal_ca_async_callback,
-                            &kibnal_data.kib_hca,
+        if (n <= kibnal_data.kib_hca_idx) {
+                CERROR("Invalid HCA %d requested: (must be 0 - %d inclusive)\n",
+                       kibnal_data.kib_hca_idx, n - 1);
+                goto failed;
+        }
+        
+        /* Infinicon has per-HCA notification callbacks */
+        frc = iba_open_ca(kibnal_data.kib_hca_guids[kibnal_data.kib_hca_idx],
+                            kibnal_hca_callback,
+                            kibnal_hca_async_callback,
+                            NULL,
                             &kibnal_data.kib_hca);
         if (frc != FSUCCESS) {
-                CERROR ("Can't open CA[0]: %d\n", frc);
+                CERROR ("Can't open HCA[%d]: %d\n", 
+                        kibnal_data.kib_hca_idx, frc);
                 goto failed;
         }
         
@@ -1450,14 +1975,14 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
 
         kibnal_data.kib_hca_attrs.PortAttributesList = NULL;
         kibnal_data.kib_hca_attrs.PortAttributesListSize = 0;
-        frc = iibt_query_hca(kibnal_data.kib_hca,
-                             &kibnal_data.kib_hca_attrs, NULL);
+        frc = iba_query_ca(kibnal_data.kib_hca,
+                           &kibnal_data.kib_hca_attrs, NULL);
         if (frc != FSUCCESS) {
                 CERROR ("Can't size port attrs: %d\n", frc);
                 goto failed;
         }
         
-        PORTAL_ALLOC(kibnal_data.kib_hca_attrs.PortAttributesList,
+        LIBCFS_ALLOC(kibnal_data.kib_hca_attrs.PortAttributesList,
                      kibnal_data.kib_hca_attrs.PortAttributesListSize);
         if (kibnal_data.kib_hca_attrs.PortAttributesList == NULL)
                 goto failed;
@@ -1466,10 +1991,11 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         kibnal_data.kib_init = IBNAL_INIT_PORTATTRS;
         /*****************************************************/
         
-        frc = iibt_query_hca(kibnal_data.kib_hca, &kibnal_data.kib_hca_attrs,
-                             NULL);
+        frc = iba_query_ca(kibnal_data.kib_hca, &kibnal_data.kib_hca_attrs,
+                           NULL);
         if (frc != FSUCCESS) {
-                CERROR ("Can't get port attrs for CA 0: %d\n", frc);
+                CERROR ("Can't get port attrs for HCA %d: %d\n",
+                        kibnal_data.kib_hca_idx, frc);
                 goto failed;
         }
 
@@ -1508,11 +2034,7 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
 
         CDEBUG(D_NET, "got guid "LPX64"\n", kibnal_data.kib_port_guid);
         
-        /* Active port found */
-        kibnal_data.kib_init = IBNAL_INIT_PORT;
-        /*****************************************************/
-
-        frc = iibt_sd_register(&kibnal_data.kib_sd, NULL);
+        frc = iba_sd_register(&kibnal_data.kib_sd, NULL);
         if (frc != FSUCCESS) {
                 CERROR ("Can't register with SD: %d\n", frc);
                 goto failed;
@@ -1522,7 +2044,7 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         kibnal_data.kib_init = IBNAL_INIT_SD;
         /*****************************************************/
 
-        frc = iibt_pd_allocate(kibnal_data.kib_hca, 0, &kibnal_data.kib_pd);
+        frc = iba_alloc_pd(kibnal_data.kib_hca, 0, &kibnal_data.kib_pd);
         if (frc != FSUCCESS) {
                 CERROR ("Can't create PD: %d\n", rc);
                 goto failed;
@@ -1532,73 +2054,14 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         kibnal_data.kib_init = IBNAL_INIT_PD;
         /*****************************************************/
 
-#if IBNAL_FMR
-        {
-                const int pool_size = IBNAL_NTX + IBNAL_NTX_NBLK;
-                struct ib_fmr_pool_param params = {
-                        .max_pages_per_fmr = PTL_MTU/PAGE_SIZE,
-                        .access            = (IB_ACCESS_LOCAL_WRITE |
-                                              IB_ACCESS_REMOTE_WRITE |
-                                              IB_ACCESS_REMOTE_READ),
-                        .pool_size         = pool_size,
-                        .dirty_watermark   = (pool_size * 3)/4,
-                        .flush_function    = NULL,
-                        .flush_arg         = NULL,
-                        .cache             = 1,
-                };
-                rc = ib_fmr_pool_create(kibnal_data.kib_pd, &params,
-                                        &kibnal_data.kib_fmr_pool);
-                if (rc != 0) {
-                        CERROR ("Can't create FMR pool size %d: %d\n", 
-                                pool_size, rc);
-                        goto failed;
-                }
-        }
-
-        /* flag FMR pool initialised */
-        kibnal_data.kib_init = IBNAL_INIT_FMR;
-#endif
-        /*****************************************************/
-        if (IBNAL_WHOLE_MEM) {
-                IB_MR_PHYS_BUFFER phys;
-                IB_ACCESS_CONTROL access;
-                kib_md_t *md = &kibnal_data.kib_md;
-
-                memset(&access, 0, sizeof(access));
-                access.s.MWBindable = 1;
-                access.s.LocalWrite = 1;
-                access.s.RdmaRead = 1;
-                access.s.RdmaWrite = 1;
-
-                phys.PhysAddr = 0;
-                phys.Length = max_phys_mem(&kibnal_data.kib_hca_attrs);
-                if (phys.Length == 0) {
-                        CERROR ("couldn't determine the end of phys mem\n");
-                        goto failed;
-                }
-       
-                rc = iibt_register_contig_physical_memory(kibnal_data.kib_hca,
-                                                          0,
-                                                          &phys, 1,
-                                                          0,
-                                                          kibnal_data.kib_pd,
-                                                          access,
-                                                          &md->md_handle,
-                                                          &md->md_addr,
-                                                          &md->md_lkey,
-                                                          &md->md_rkey);
-                if (rc != FSUCCESS) {
-                        CERROR("registering physical memory failed: %d\n", 
-                               rc);
-                        CERROR("falling back to registration per-rdma\n");
-                        md->md_handle = NULL;
-                } else {
-                        CDEBUG(D_NET, "registered "LPU64" bytes of mem\n",
-                               phys.Length);
-                        kibnal_data.kib_init = IBNAL_INIT_MR;
-                }
+        rc = kibnal_register_all_memory();
+        if (rc != 0) {
+                CERROR ("Can't register all memory\n");
+                goto failed;
         }
-
+        
+        /* flag whole memory MD initialised */
+        kibnal_data.kib_init = IBNAL_INIT_MD;
         /*****************************************************/
 
         rc = kibnal_setup_tx_descs();
@@ -1611,38 +2074,33 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         kibnal_data.kib_init = IBNAL_INIT_TXD;
         /*****************************************************/
         
-        {
-                uint32 nentries;
-
-                frc = iibt_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES,
-                                     &kibnal_data.kib_cq, &kibnal_data.kib_cq,
-                                     &nentries);
-                if (frc != FSUCCESS) {
-                        CERROR ("Can't create RX CQ: %d\n", frc);
-                        goto failed;
-                }
-
-                /* flag CQ initialised */
-                kibnal_data.kib_init = IBNAL_INIT_CQ;
+        frc = iba_create_cq(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES(),
+                            &kibnal_data.kib_cq, &kibnal_data.kib_cq,
+                            &n);
+        if (frc != FSUCCESS) {
+                CERROR ("Can't create RX CQ: %d\n", frc);
+                goto failed;
+        }
 
-                if (nentries < IBNAL_CQ_ENTRIES) {
-                        CERROR ("CQ only has %d entries, need %d\n", 
-                                nentries, IBNAL_CQ_ENTRIES);
-                        goto failed;
-                }
+        /* flag CQ initialised */
+        kibnal_data.kib_init = IBNAL_INIT_CQ;
+        /*****************************************************/
+        
+        if (n < IBNAL_CQ_ENTRIES()) {
+                CERROR ("CQ only has %d entries: %d needed\n", 
+                        n, IBNAL_CQ_ENTRIES());
+                goto failed;
+        }
 
-                rc = iibt_cq_rearm(kibnal_data.kib_cq, CQEventSelNextWC);
-                if (rc != 0) {
-                        CERROR ("Failed to re-arm completion queue: %d\n", rc);
-                        goto failed;
-                }
+        rc = iba_rearm_cq(kibnal_data.kib_cq, CQEventSelNextWC);
+        if (rc != 0) {
+                CERROR ("Failed to re-arm completion queue: %d\n", rc);
+                goto failed;
         }
         
-        /*****************************************************/
-
-        rc = libcfs_nal_cmd_register(IIBNAL, &kibnal_cmd, NULL);
+        rc = kibnal_start_listener();
         if (rc != 0) {
-                CERROR ("Can't initialise command interface (rc = %d)\n", rc);
+                CERROR("Can't start listener: %d\n", rc);
                 goto failed;
         }
 
@@ -1650,26 +2108,18 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         kibnal_data.kib_init = IBNAL_INIT_ALL;
         /*****************************************************/
 
-        printk(KERN_INFO "Lustre: Infinicon IB NAL loaded "
-               "(initial mem %d)\n", pkmem);
-
-        return (PTL_OK);
+        return (0);
 
  failed:
-        kibnal_api_shutdown (&kibnal_api);    
-        return (PTL_FAIL);
+        kibnal_shutdown (ni);    
+        return (-ENETDOWN);
 }
 
 void __exit
 kibnal_module_fini (void)
 {
-#ifdef CONFIG_SYSCTL
-        if (kibnal_tunables.kib_sysctl != NULL)
-                unregister_sysctl_table (kibnal_tunables.kib_sysctl);
-#endif
-        PtlNIFini(kibnal_ni);
-
-        ptl_unregister_nal(IIBNAL);
+        lnet_unregister_lnd(&the_kiblnd);
+        kibnal_tunables_fini();
 }
 
 int __init
@@ -1677,46 +2127,22 @@ kibnal_module_init (void)
 {
         int    rc;
 
-        if (sizeof(kib_wire_connreq_t) > CM_REQUEST_INFO_USER_LEN) {
-                CERROR("sizeof(kib_wire_connreq_t) > CM_REQUEST_INFO_USER_LEN\n");
-                return -EINVAL;
-        }
-
-        /* the following must be sizeof(int) for proc_dointvec() */
-        if (sizeof (kibnal_tunables.kib_io_timeout) != sizeof (int)) {
-                CERROR("sizeof (kibnal_tunables.kib_io_timeout) != sizeof (int)\n");
-                return -EINVAL;
+        if (the_lnet.ln_ptlcompat != 0) {
+                LCONSOLE_ERROR("IIB does not support portals compatibility mode\n");
+                return -ENODEV;
         }
+        
+        rc = kibnal_tunables_init();
+        if (rc != 0)
+                return rc;
 
-        kibnal_api.nal_ni_init = kibnal_api_startup;
-        kibnal_api.nal_ni_fini = kibnal_api_shutdown;
-
-        /* Initialise dynamic tunables to defaults once only */
-        kibnal_tunables.kib_io_timeout = IBNAL_IO_TIMEOUT;
-
-        rc = ptl_register_nal(IIBNAL, &kibnal_api);
-        if (rc != PTL_OK) {
-                CERROR("Can't register IBNAL: %d\n", rc);
-                return (-ENOMEM);               /* or something... */
-        }
+        lnet_register_lnd(&the_kiblnd);
 
-        /* Pure gateways want the NAL started up at module load time... */
-        rc = PtlNIInit(IIBNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kibnal_ni);
-        if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
-                ptl_unregister_nal(IIBNAL);
-                return (-ENODEV);
-        }
-        
-#ifdef CONFIG_SYSCTL
-        /* Press on regardless even if registering sysctl doesn't work */
-        kibnal_tunables.kib_sysctl = 
-                register_sysctl_table (kibnal_top_ctl_table, 0);
-#endif
-        return (0);
+        return 0;
 }
 
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel Infinicon IB NAL v0.01");
+MODULE_DESCRIPTION("Kernel Infinicon IB LND v1.00");
 MODULE_LICENSE("GPL");
 
 module_init(kibnal_module_init);