Whamcloud - gitweb
* First cut working vibnal
[fs/lustre-release.git] / lnet / klnds / viblnd / viblnd.c
index 0c0a0e7..2cb4b7d 100644 (file)
 
 nal_t                   kibnal_api;
 ptl_handle_ni_t         kibnal_ni;
+kib_data_t              kibnal_data;
 kib_tunables_t          kibnal_tunables;
 
-kib_data_t              kibnal_data = {
-        .kib_service_id = IBNAL_SERVICE_NUMBER,
-};
-
 #ifdef CONFIG_SYSCTL
 #define IBNAL_SYSCTL             202
 
@@ -50,268 +47,330 @@ static ctl_table kibnal_top_ctl_table[] = {
 };
 #endif
 
-#ifdef unused
 void
-print_service(IB_SERVICE_RECORD *service, char *tag, int rc)
+kibnal_pause(int ticks)
 {
-        char name[32];
+        set_current_state(TASK_UNINTERRUPTIBLE);
+        schedule_timeout(ticks);
+}
 
-        if (service == NULL) 
-        {
-                CWARN("tag       : %s\n"
-                      "status    : %d (NULL)\n", tag, rc);
-                return;
-        }
-        strncpy (name, service->ServiceName, sizeof(name)-1);
-        name[sizeof(name)-1] = 0;
-        
-        CWARN("tag       : %s\n"
-              "status    : %d\n"
-              "service id: "LPX64"\n"
-              "name      : %s\n"
-              "NID       : "LPX64"\n", tag, rc,
-              service->RID.ServiceID, name,
-              *kibnal_service_nid_field(service));
+__u32 
+kibnal_cksum (void *ptr, int nob)
+{
+        char  *c  = ptr;
+        __u32  sum = 0;
+
+        while (nob-- > 0)
+                sum = ((sum << 1) | (sum >> 31)) + *c++;
+
+        /* ensure I don't return 0 (== no checksum) */
+        return (sum == 0) ? 1 : sum;
 }
-#endif
 
-/* 
- * method is SUBN_ADM_SET, SUBN_ADM_GET, SUBN_ADM_DELETE. Tables not supported.
- * nid is the nid to advertize/query/unadvertize
- */
-static void fill_sa_request(struct sa_request *request, int method, ptl_nid_t nid)
+void
+kibnal_init_msg(kib_msg_t *msg, int type, int body_nob)
 {
-        gsi_dtgrm_t *dtgrm = request->dtgrm_req;
-        sa_mad_v2_t *mad = (sa_mad_v2_t *) dtgrm->mad;
-        ib_service_record_v2_t *sr = (ib_service_record_v2_t *) mad->payload;
-        
-        memset(mad, 0, MAD_BLOCK_SIZE);
-
-        request->mad = mad;
-
-        dtgrm->rlid = kibnal_data.kib_port_attr.port_sma_address_info.sm_lid;
-        dtgrm->sl = kibnal_data.kib_port_attr.port_sma_address_info.service_level;
-
-        mad->hdr.base_ver = MAD_IB_BASE_VERSION;
-        mad->hdr.class = MAD_CLASS_SUBN_ADM;
-        mad->hdr.class_ver = 2;
-        mad->hdr.m.ms.method = method;
-        mad->hdr.attrib_id = SA_SERVICE_RECORD; /* something(?) will swap that field */
-
-               /* Note: the transaction ID is set by the Voltaire stack if it is 0. */
-
-        /* TODO: change the 40 to sizeof(something) */
-        mad->payload_len = cpu_to_be32(0x40 /*header size */  +
-                                       sizeof (ib_service_record_v2_t));
-
-
-        mad->component_mask = cpu_to_be64(
-                                          (1ull << 0)  |       /* service_id       */
-                                          (1ull << 2)  |       /* service_pkey     */
-                                          (1ull << 6)  |       /* service_name     */
-                                          (1ull << 7)  |       /* service_data8[0] */
-                                          (1ull << 8)  |       /* service_data8[1] */
-                                          (1ull << 9)  |       /* service_data8[2] */
-                                          (1ull << 10) |       /* service_data8[3] */
-                                          (1ull << 11) |       /* service_data8[4] */
-                                          (1ull << 12) |       /* service_data8[5] */
-                                          (1ull << 13) |       /* service_data8[6] */
-                                          (1ull << 14)      /* service_data8[7] */
-                                          );
-
-        sr->service_id = cpu_to_be64(kibnal_data.kib_service_id);
-        sr->service_pkey = cpu_to_be16(kibnal_data.kib_port_pkey);
-
-        /* Set the service name and the data (bytes 0 to 7) in data8 */
-        kibnal_set_service_keys(sr, nid);
-
-        if (method == SUBN_ADM_SET) {
-                mad->component_mask |= cpu_to_be64(
-                                                   (1ull << 1) |       /* service_gid       */
-                                                   (1ull << 4)         /* service_lease     */
-                                                   );
-
-                sr->service_gid = kibnal_data.kib_port_gid;
-                gid_swap(&sr->service_gid);
-                sr->service_lease = cpu_to_be32(0xffffffff);
-        }
-
-        CDEBUG(D_NET, "SA request %02x for service id "LPX64" %s:"LPX64"\n",
-               mad->hdr.m.ms.method,
-               sr->service_id, 
-               sr->service_name,
-               *kibnal_service_nid_field(sr));
+        msg->ibm_type = type;
+        msg->ibm_nob  = offsetof(kib_msg_t, ibm_u) + body_nob;
 }
 
-/* Do an advertizement operation: 
- *   SUBN_ADM_GET = 0x01 (i.e. query),
- *   SUBN_ADM_SET = 0x02 (i.e. advertize),
- *   SUBN_ADM_DELETE = 0x15 (i.e. un-advertize).
- * If callback is NULL, the function is synchronous (and context is ignored).
- */
-int kibnal_advertize_op(ptl_nid_t nid, int op, sa_request_cb_t callback, void *context)
+void
+kibnal_pack_msg(kib_msg_t *msg, int credits, ptl_nid_t dstnid, __u64 dststamp)
 {
-        struct sa_request *request;
-        int ret;
+        /* CAVEAT EMPTOR! all message fields not set here should have been
+         * initialised previously. */
+        msg->ibm_magic    = IBNAL_MSG_MAGIC;
+        msg->ibm_version  = IBNAL_MSG_VERSION;
+        /*   ibm_type */
+        msg->ibm_credits  = credits;
+        /*   ibm_nob */
+        msg->ibm_cksum    = 0;
+        msg->ibm_srcnid   = kibnal_lib.libnal_ni.ni_pid.nid;
+        msg->ibm_srcstamp = kibnal_data.kib_incarnation;
+        msg->ibm_dstnid   = dstnid;
+        msg->ibm_dststamp = dststamp;
+#if IBNAL_CKSUM
+        /* NB ibm_cksum zero while computing cksum */
+        msg->ibm_cksum    = kibnal_cksum(msg, msg->ibm_nob);
+#endif
+}
 
-        LASSERT (kibnal_data.kib_nid != PTL_NID_ANY);
+int
+kibnal_unpack_msg(kib_msg_t *msg, int nob)
+{
+        const int hdr_size = offsetof(kib_msg_t, ibm_u);
+        __u32     msg_cksum;
+        int       flip;
+        int       msg_nob;
+        int       i;
+        int       n;
+
+        /* 6 bytes are enough to have received magic + version */
+        if (nob < 6) {
+                CERROR("Short message: %d\n", nob);
+                return -EPROTO;
+        }
+
+        if (msg->ibm_magic == IBNAL_MSG_MAGIC) {
+                flip = 0;
+        } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) {
+                flip = 1;
+        } else {
+                CERROR("Bad magic: %08x\n", msg->ibm_magic);
+                return -EPROTO;
+        }
 
-        CDEBUG(D_NET, "kibnal_advertize_op: nid="LPX64", op=%d\n", nid, op);
+        if (msg->ibm_version != 
+            (flip ? __swab16(IBNAL_MSG_VERSION) : IBNAL_MSG_VERSION)) {
+                CERROR("Bad version: %d\n", msg->ibm_version);
+                return -EPROTO;
+        }
 
-        request = alloc_sa_request();
-        if (request == NULL) {
-                CERROR("Cannot allocate a SA request");
-                return -ENOMEM;
+        if (nob < hdr_size) {
+                CERROR("Short message: %d\n", nob);
+                return -EPROTO;
         }
-                
-        fill_sa_request(request, op, nid);
 
-        if (callback) {
-                request->callback = callback;
-                request->context = context;
-        } else {
-                init_completion(&request->signal);
+        msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
+        if (msg_nob > nob) {
+                CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
+                return -EPROTO;
         }
 
-        ret = vibnal_start_sa_request(request);
-        if (ret) {
-                CERROR("vibnal_send_sa failed: %d\n", ret);
-                free_sa_request(request);
-        } else {
-                if (callback) {
-                        /* Return. The callback will have to free the SA request. */
-                        ret = 0;
-                } else {
-                        wait_for_completion(&request->signal);
+        /* checksum must be computed with ibm_cksum zero and BEFORE anything
+         * gets flipped */
+        msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
+        msg->ibm_cksum = 0;
+        if (msg_cksum != 0 &&
+            msg_cksum != kibnal_cksum(msg, msg_nob)) {
+                CERROR("Bad checksum\n");
+                return -EPROTO;
+        }
+        msg->ibm_cksum = msg_cksum;
+        
+        if (flip) {
+                /* leave magic unflipped as a clue to peer endianness */
+                __swab16s(&msg->ibm_version);
+                CLASSERT (sizeof(msg->ibm_type) == 1);
+                CLASSERT (sizeof(msg->ibm_credits) == 1);
+                msg->ibm_nob = msg_nob;
+                __swab64s(&msg->ibm_srcnid);
+                __swab64s(&msg->ibm_srcstamp);
+                __swab64s(&msg->ibm_dstnid);
+                __swab64s(&msg->ibm_dststamp);
+        }
+        
+        if (msg->ibm_srcnid == PTL_NID_ANY) {
+                CERROR("Bad src nid: "LPX64"\n", msg->ibm_srcnid);
+                return -EPROTO;
+        }
 
-                        ret = request->status;
+        switch (msg->ibm_type) {
+        default:
+                CERROR("Unknown message type %x\n", msg->ibm_type);
+                return -EPROTO;
+                
+        case IBNAL_MSG_NOOP:
+                break;
 
-                        if (ret != 0) {
-                                CERROR ("Error %d in advertising operation %d for NID "LPX64"\n",
-                                        ret, op, kibnal_data.kib_nid);
+        case IBNAL_MSG_IMMEDIATE:
+                if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) {
+                        CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob,
+                               (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]));
+                        return -EPROTO;
+                }
+                break;
+
+        case IBNAL_MSG_PUT_REQ:
+                /* CAVEAT EMPTOR!  We don't actually put ibprm_rd on the wire;
+                 * it's just there to remember the source buffers while we wait
+                 * for the PUT_ACK */
+                if (msg_nob < offsetof(kib_msg_t, ibm_u.putreq.ibprm_rd)) {
+                        CERROR("Short PUT_REQ: %d(%d)\n", msg_nob,
+                               (int)(hdr_size + sizeof(msg->ibm_u.putreq)));
+                        return -EPROTO;
+                }
+                break;
+
+        case IBNAL_MSG_PUT_ACK:
+                if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[0])) {
+                        CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
+                               (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[0]));
+                        return -EPROTO;
+                }
+
+                if (flip) {
+                        __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
+                        __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag);
+                }
+                
+                n = msg->ibm_u.putack.ibpam_rd.rd_nfrag;
+                if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
+                        CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n", 
+                               n, IBNAL_MAX_RDMA_FRAGS);
+                        return -EPROTO;
+                }
+                
+                if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) {
+                        CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
+                               (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n]));
+                        return -EPROTO;
+                }
+
+                if (flip)
+                        for (i = 0; i < n; i++) {
+                                __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob);
+                                __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr_lo);
+                                __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr_hi);
                         }
-                        
-                        free_sa_request(request);
+                break;
+
+        case IBNAL_MSG_GET_REQ:
+                if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) {
+                        CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
+                               (int)(hdr_size + sizeof(msg->ibm_u.get)));
+                        return -EPROTO;
+                }
+                if (flip) {
+                        __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
+                        __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag);
                 }
-        }
 
-        return ret;
+                n = msg->ibm_u.get.ibgm_rd.rd_nfrag;
+                if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
+                        CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n", 
+                               n, IBNAL_MAX_RDMA_FRAGS);
+                        return -EPROTO;
+                }
+                
+                if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) {
+                        CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
+                               (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n]));
+                        return -EPROTO;
+                }
+                
+                if (flip)
+                        for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) {
+                                __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob);
+                                __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr_lo);
+                                __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr_hi);
+                        }
+                break;
+
+        case IBNAL_MSG_PUT_NAK:
+        case IBNAL_MSG_PUT_DONE:
+        case IBNAL_MSG_GET_DONE:
+                if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) {
+                        CERROR("Short RDMA completion: %d(%d)\n", msg_nob,
+                               (int)(hdr_size + sizeof(msg->ibm_u.completion)));
+                        return -EPROTO;
+                }
+                if (flip)
+                        __swab32s(&msg->ibm_u.completion.ibcm_status);
+                break;
+
+        case IBNAL_MSG_CONNREQ:
+        case IBNAL_MSG_CONNACK:
+                if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) {
+                        CERROR("Short connreq/ack: %d(%d)\n", msg_nob,
+                               (int)(hdr_size + sizeof(msg->ibm_u.connparams)));
+                        return -EPROTO;
+                }
+                if (flip) {
+                        __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth);
+                        __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size);
+                        __swab32s(&msg->ibm_u.connparams.ibcp_max_frags);
+                }
+                break;
+        }
+        return 0;
 }
 
-static int
+int
 kibnal_set_mynid(ptl_nid_t nid)
 {
-        struct timeval tv;
-        lib_ni_t      *ni = &kibnal_lib.libnal_ni;
-        int            rc;
-        vv_return_t    retval;
+        static cm_listen_data_t info;           /* protected by kib_nid_mutex */
+
+        lib_ni_t        *ni = &kibnal_lib.libnal_ni;
+        int              rc;
+        cm_return_t      cmrc;
 
         CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
                nid, ni->ni_pid.nid);
 
-        do_gettimeofday(&tv);
-
         down (&kibnal_data.kib_nid_mutex);
 
-        if (nid == kibnal_data.kib_nid) {
+        if (nid == ni->ni_pid.nid) {
                 /* no change of NID */
                 up (&kibnal_data.kib_nid_mutex);
                 return (0);
         }
 
-        CDEBUG(D_NET, "NID "LPX64"("LPX64")\n",
-               kibnal_data.kib_nid, nid);
-
-        /* Unsubscribes the current NID */
-        if (kibnal_data.kib_nid != PTL_NID_ANY) {
+        CDEBUG(D_NET, "NID "LPX64"("LPX64")\n", ni->ni_pid.nid, nid);
 
-                rc = kibnal_advertize_op(kibnal_data.kib_nid, SUBN_ADM_DELETE, NULL, NULL);
+        if (kibnal_data.kib_listen_handle != NULL) {
+                cmrc = cm_cancel(kibnal_data.kib_listen_handle);
+                if (cmrc != cm_stat_success)
+                        CERROR ("Error %d stopping listener\n", cmrc);
 
-                if (rc) {
-                        CERROR("Error %d unadvertising NID "LPX64"\n",
-                               rc, kibnal_data.kib_nid);
-                }
-        }
+                kibnal_pause(HZ/10);            /* ensure no more callbacks */
         
-        kibnal_data.kib_nid = ni->ni_pid.nid = nid;
-        kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
+                cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
+                if (cmrc != vv_return_ok)
+                        CERROR ("Error %d destroying CEP\n", cmrc);
 
-        /* Destroys the current endpoint, if any. */
-        if (kibnal_data.kib_cep) {
-                retval = cm_cancel(kibnal_data.kib_cep);
-                if (retval)
-                        CERROR ("Error %d stopping listener\n", retval);
-        
-                retval = cm_destroy_cep(kibnal_data.kib_cep);
-                if (retval)
-                        CERROR ("Error %d destroying CEP\n", retval);
-        
-                kibnal_data.kib_cep = NULL;
+                kibnal_data.kib_listen_handle = NULL;
         }
-        
+
+        /* Change NID.  NB queued passive connection requests (if any) will be
+         * rejected with an incorrect destination NID */
+        ni->ni_pid.nid = nid;
+        kibnal_data.kib_incarnation++;
+        mb();
+
         /* Delete all existing peers and their connections after new
          * NID/incarnation set to ensure no old connections in our brave
          * new world. */
         kibnal_del_peer (PTL_NID_ANY, 0);
 
-        if (kibnal_data.kib_nid == PTL_NID_ANY) {
-                /* No new NID to install. The driver is shuting down. */
-                up (&kibnal_data.kib_nid_mutex);
-                return (0);
-        }
-
-        /* remove any previous advert (crashed node etc) */
-        kibnal_advertize_op(kibnal_data.kib_nid, SUBN_ADM_DELETE, NULL, NULL);
-
-        kibnal_data.kib_cep = cm_create_cep(cm_cep_transp_rc);
-        if (kibnal_data.kib_cep == NULL) {
-                CERROR ("Can't create CEP\n");
-                rc = -ENOMEM;
-        } else {
-                cm_return_t cmret;
-                cm_listen_data_t info;
+        if (ni->ni_pid.nid != PTL_NID_ANY) {    /* got a new NID to install */
+                kibnal_data.kib_listen_handle = 
+                        cm_create_cep(cm_cep_transp_rc);
+                if (kibnal_data.kib_listen_handle == NULL) {
+                        CERROR ("Can't create listen CEP\n");
+                        rc = -ENOMEM;
+                        goto failed_0;
+                }
 
-                CDEBUG(D_NET, "Created CEP %p for listening\n", kibnal_data.kib_cep);
+                CDEBUG(D_NET, "Created CEP %p for listening\n", 
+                       kibnal_data.kib_listen_handle);
 
                 memset(&info, 0, sizeof(info));
-                info.listen_addr.end_pt.sid = kibnal_data.kib_service_id;
+                info.listen_addr.end_pt.sid = kibnal_data.kib_svc_id;
 
-                cmret = cm_listen(kibnal_data.kib_cep, &info,
-                                  kibnal_listen_callback, NULL);
-                if (cmret) {
-                        CERROR ("cm_listen error: %d\n", cmret);
+                cmrc = cm_listen(kibnal_data.kib_listen_handle, &info,
+                                 kibnal_listen_callback, NULL);
+                if (cmrc != 0) {
+                        CERROR ("cm_listen error: %d\n", cmrc);
                         rc = -EINVAL;
-                } else {
-                        rc = 0;
+                        goto failed_1;
                 }
         }
-        
-        if (rc == 0) {
-                rc = kibnal_advertize_op(kibnal_data.kib_nid, SUBN_ADM_SET, NULL, NULL);
-                if (rc == 0) {
-#ifdef IBNAL_CHECK_ADVERT
-                        kibnal_advertize_op(kibnal_data.kib_nid, SUBN_ADM_GET, NULL, NULL);
-#endif
-                        up (&kibnal_data.kib_nid_mutex);
-                        return (0);
-                }
-                
-                retval = cm_cancel (kibnal_data.kib_cep);
-                if (retval)
-                        CERROR("cm_cancel failed: %d\n", retval);
 
-                retval = cm_destroy_cep (kibnal_data.kib_cep);
-                if (retval)
-                        CERROR("cm_destroy_cep failed: %d\n", retval);
-
-                /* remove any peers that sprung up while I failed to
-                 * advertise myself */
-                kibnal_del_peer (PTL_NID_ANY, 0);
-        }
+        up (&kibnal_data.kib_nid_mutex);
+        return (0);
 
-        kibnal_data.kib_nid = PTL_NID_ANY;
+ failed_1:
+        cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
+        LASSERT (cmrc == cm_stat_success);
+        kibnal_data.kib_listen_handle = NULL;
+ failed_0:
+        ni->ni_pid.nid = PTL_NID_ANY;
+        kibnal_data.kib_incarnation++;
+        mb();
+        kibnal_del_peer (PTL_NID_ANY, 0);
         up (&kibnal_data.kib_nid_mutex);
-        return (rc);
+        return rc;
 }
 
 kib_peer_t *
@@ -340,7 +399,12 @@ kibnal_create_peer (ptl_nid_t nid)
         peer->ibp_reconnect_interval = IBNAL_MIN_RECONNECT_INTERVAL;
 
         atomic_inc (&kibnal_data.kib_npeers);
-        return (peer);
+        if (atomic_read(&kibnal_data.kib_npeers) <= IBNAL_CONCURRENT_PEERS)
+                return peer;
+        
+        CERROR("Too many peers: CQ will overflow\n");
+        kibnal_peer_decref(peer);
+        return NULL;
 }
 
 void
@@ -390,21 +454,6 @@ kibnal_find_peer_locked (ptl_nid_t nid)
         return (NULL);
 }
 
-kib_peer_t *
-kibnal_get_peer (ptl_nid_t nid)
-{
-        kib_peer_t     *peer;
-        unsigned long   flags;
-
-        read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-        peer = kibnal_find_peer_locked (nid);
-        if (peer != NULL)                       /* +1 ref for caller? */
-                kib_peer_addref(peer);
-        read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
-        return (peer);
-}
-
 void
 kibnal_unlink_peer_locked (kib_peer_t *peer)
 {
@@ -414,16 +463,17 @@ kibnal_unlink_peer_locked (kib_peer_t *peer)
         LASSERT (kibnal_peer_active(peer));
         list_del_init (&peer->ibp_list);
         /* lose peerlist's ref */
-        kib_peer_decref(peer);
+        kibnal_peer_decref(peer);
 }
 
-static int
-kibnal_get_peer_info (int index, ptl_nid_t *nidp, int *persistencep)
+int
+kibnal_get_peer_info (int index, ptl_nid_t *nidp, __u32 *ipp,
+                      int *persistencep)
 {
         kib_peer_t        *peer;
         struct list_head  *ptmp;
-        unsigned long      flags;
         int                i;
+        unsigned long      flags;
 
         read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
 
@@ -440,6 +490,7 @@ kibnal_get_peer_info (int index, ptl_nid_t *nidp, int *persistencep)
                                 continue;
 
                         *nidp = peer->ibp_nid;
+                        *ipp = peer->ibp_ip;
                         *persistencep = peer->ibp_persistence;
 
                         read_unlock_irqrestore(&kibnal_data.kib_global_lock,
@@ -452,12 +503,14 @@ kibnal_get_peer_info (int index, ptl_nid_t *nidp, int *persistencep)
         return (-ENOENT);
 }
 
-static int
-kibnal_add_persistent_peer (ptl_nid_t nid)
+int
+kibnal_add_persistent_peer (ptl_nid_t nid, __u32 ip)
 {
-        unsigned long      flags;
         kib_peer_t        *peer;
         kib_peer_t        *peer2;
+        unsigned long      flags;
+
+        CDEBUG(D_NET, LPX64"@%08x\n", nid, ip);
         
         if (nid == PTL_NID_ANY)
                 return (-EINVAL);
@@ -466,11 +519,11 @@ kibnal_add_persistent_peer (ptl_nid_t nid)
         if (peer == NULL)
                 return (-ENOMEM);
 
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
+        write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
 
         peer2 = kibnal_find_peer_locked (nid);
         if (peer2 != NULL) {
-                kib_peer_decref (peer);
+                kibnal_peer_decref (peer);
                 peer = peer2;
         } else {
                 /* peer table takes existing ref on peer */
@@ -478,13 +531,14 @@ kibnal_add_persistent_peer (ptl_nid_t nid)
                                kibnal_nid2peerlist (nid));
         }
 
+        peer->ibp_ip = ip;
         peer->ibp_persistence++;
         
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
+        write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
         return (0);
 }
 
-static void
+void
 kibnal_del_peer_locked (kib_peer_t *peer, int single_share)
 {
         struct list_head *ctmp;
@@ -517,16 +571,16 @@ kibnal_del_peer_locked (kib_peer_t *peer, int single_share)
 int
 kibnal_del_peer (ptl_nid_t nid, int single_share)
 {
-        unsigned long      flags;
         struct list_head  *ptmp;
         struct list_head  *pnxt;
         kib_peer_t        *peer;
         int                lo;
         int                hi;
         int                i;
+        unsigned long      flags;
         int                rc = -ENOENT;
 
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
+        write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
 
         if (nid != PTL_NID_ANY)
                 lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
@@ -553,20 +607,19 @@ kibnal_del_peer (ptl_nid_t nid, int single_share)
                 }
         }
  out:
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
+        write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
         return (rc);
 }
 
-static kib_conn_t *
+kib_conn_t *
 kibnal_get_conn_by_idx (int index)
 {
         kib_peer_t        *peer;
         struct list_head  *ptmp;
         kib_conn_t        *conn;
         struct list_head  *ctmp;
-        unsigned long      flags;
         int                i;
+        unsigned long      flags;
 
         read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
 
@@ -583,10 +636,7 @@ kibnal_get_conn_by_idx (int index)
                                         continue;
 
                                 conn = list_entry (ctmp, kib_conn_t, ibc_list);
-                                CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                                       conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                                       atomic_read (&conn->ibc_refcount));
-                                atomic_inc (&conn->ibc_refcount);
+                                kibnal_conn_addref(conn);
                                 read_unlock_irqrestore(&kibnal_data.kib_global_lock,
                                                        flags);
                                 return (conn);
@@ -598,19 +648,124 @@ kibnal_get_conn_by_idx (int index)
         return (NULL);
 }
 
+int
+kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state)
+{
+        static vv_qp_attr_t attr;
+        
+        kib_connvars_t   *cv = conn->ibc_connvars;
+        vv_return_t       vvrc;
+        
+        /* Only called by connd => static OK */
+        LASSERT (!in_interrupt());
+        LASSERT (current == kibnal_data.kib_connd);
+
+        memset(&attr, 0, sizeof(attr));
+        
+        switch (new_state) {
+        default:
+                LBUG();
+                
+        case vv_qp_state_init: {
+                struct vv_qp_modify_init_st *init = &attr.modify.params.init;
+
+                init->p_key_indx     = cv->cv_pkey_index;
+                init->phy_port_num   = cv->cv_port;
+                init->q_key          = IBNAL_QKEY; /* XXX but VV_QP_AT_Q_KEY not set! */
+                init->access_control = vv_acc_r_mem_read |
+                                       vv_acc_r_mem_write; /* XXX vv_acc_l_mem_write ? */
+
+                attr.modify.vv_qp_attr_mask = VV_QP_AT_P_KEY_IX | 
+                                              VV_QP_AT_PHY_PORT_NUM |
+                                              VV_QP_AT_ACCESS_CON_F;
+                break;
+        }
+        case vv_qp_state_rtr: {
+                struct vv_qp_modify_rtr_st *rtr = &attr.modify.params.rtr;
+                vv_add_vec_t               *av  = &rtr->remote_add_vec;
+
+                av->dlid                      = cv->cv_path.dlid;
+                av->grh_flag                  = (!IBNAL_LOCAL_SUB);
+                av->max_static_rate           = IBNAL_R_2_STATIC_RATE(cv->cv_path.rate);
+                av->service_level             = cv->cv_path.sl;
+                av->source_path_bit           = IBNAL_SOURCE_PATH_BIT;
+                av->pmtu                      = cv->cv_path.mtu;
+                av->rnr_retry_count           = cv->cv_rnr_count;
+                av->global_dest.traffic_class = cv->cv_path.traffic_class;
+                av->global_dest.hope_limit    = cv->cv_path.hop_limut;
+                av->global_dest.flow_lable    = cv->cv_path.flow_label;
+                av->global_dest.s_gid_index   = cv->cv_sgid_index;
+                // XXX other av fields zero?
+
+                rtr->destanation_qp            = cv->cv_remote_qpn;
+                rtr->receive_psn               = cv->cv_rxpsn;
+                rtr->responder_rdma_r_atom_num = IBNAL_OUS_DST_RD;
+
+                // XXX ? rtr->opt_min_rnr_nak_timer = 16;
+
+
+                // XXX sdp sets VV_QP_AT_OP_F but no actual optional options
+                attr.modify.vv_qp_attr_mask = VV_QP_AT_ADD_VEC | 
+                                              VV_QP_AT_DEST_QP |
+                                              VV_QP_AT_R_PSN | 
+                                              VV_QP_AT_MIN_RNR_NAK_T |
+                                              VV_QP_AT_RESP_RDMA_ATOM_OUT_NUM |
+                                              VV_QP_AT_OP_F;
+                break;
+        }
+        case vv_qp_state_rts: {
+                struct vv_qp_modify_rts_st *rts = &attr.modify.params.rts;
+
+                rts->send_psn                 = cv->cv_txpsn;
+                rts->local_ack_timeout        = IBNAL_LOCAL_ACK_TIMEOUT;
+                rts->retry_num                = IBNAL_RETRY_CNT;
+                rts->rnr_num                  = IBNAL_RNR_CNT;
+                rts->dest_out_rdma_r_atom_num = IBNAL_OUS_DST_RD;
+                
+                attr.modify.vv_qp_attr_mask = VV_QP_AT_S_PSN |
+                                              VV_QP_AT_L_ACK_T |
+                                              VV_QP_AT_RETRY_NUM |
+                                              VV_QP_AT_RNR_NUM |
+                                              VV_QP_AT_DEST_RDMA_ATOM_OUT_NUM;
+                break;
+        }
+        case vv_qp_state_error:
+        case vv_qp_state_reset:
+                attr.modify.vv_qp_attr_mask = 0;
+                break;
+        }
+                
+        attr.modify.qp_modify_into_state = new_state;
+        attr.modify.vv_qp_attr_mask |= VV_QP_AT_STATE;
+        
+        vvrc = vv_qp_modify(kibnal_data.kib_hca, conn->ibc_qp, &attr, NULL);
+        if (vvrc != vv_return_ok) {
+                CERROR("Can't modify qp -> "LPX64" state to %d: %d\n", 
+                       conn->ibc_peer->ibp_nid, new_state, vvrc);
+                return -EIO;
+        }
+        
+        return 0;
+}
+
 kib_conn_t *
-kibnal_create_conn (void)
+kibnal_create_conn (cm_cep_handle_t cep)
 {
-        kib_conn_t  *conn;
-        int          i;
-        __u64        vaddr = 0;
-        __u64        vaddr_base;
-        int          page_offset;
-        int          ipage;
-        vv_qp_attr_t qp_attr;
-        vv_return_t  retval;
-        int          rc;
-        void        *qp_context;
+        kib_conn_t   *conn;
+        int           i;
+        __u64         vaddr = 0;
+        __u64         vaddr_base;
+        int           page_offset;
+        int           ipage;
+        vv_return_t   vvrc;
+        int           rc;
+
+        static vv_qp_attr_t  reqattr;
+        static vv_qp_attr_t  rspattr;
+
+        /* Only the connd creates conns => single threaded */
+        LASSERT(!in_interrupt());
+        LASSERT(current == kibnal_data.kib_connd);
         
         PORTAL_ALLOC(conn, sizeof (*conn));
         if (conn == NULL) {
@@ -621,6 +776,7 @@ kibnal_create_conn (void)
         /* zero flags, NULL pointers etc... */
         memset (conn, 0, sizeof (*conn));
 
+        INIT_LIST_HEAD (&conn->ibc_early_rxs);
         INIT_LIST_HEAD (&conn->ibc_tx_queue);
         INIT_LIST_HEAD (&conn->ibc_active_txs);
         spin_lock_init (&conn->ibc_lock);
@@ -628,6 +784,18 @@ kibnal_create_conn (void)
         atomic_inc (&kibnal_data.kib_nconns);
         /* well not really, but I call destroy() on failure, which decrements */
 
+        conn->ibc_cep = cep;
+
+        PORTAL_ALLOC(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
+        if (conn->ibc_connvars == NULL) {
+                CERROR("Can't allocate in-progress connection state\n");
+                goto failed;
+        }
+        memset (conn->ibc_connvars, 0, sizeof(*conn->ibc_connvars));
+        /* Random seed for QP sequence number */
+        get_random_bytes(&conn->ibc_connvars->cv_rxpsn,
+                         sizeof(conn->ibc_connvars->cv_rxpsn));
+
         PORTAL_ALLOC(conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
         if (conn->ibc_rxs == NULL) {
                 CERROR("Cannot allocate RX buffers\n");
@@ -649,26 +817,27 @@ kibnal_create_conn (void)
                 rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + 
                              page_offset);
 
-                if (kibnal_whole_mem()) {
-                        void *newaddr;
-                        vv_mem_reg_h_t mem_h;
-                        vv_r_key_t r_key;
+#if IBNAL_WHOLE_MEM
+                {
+                        vv_mem_reg_h_t  mem_h;
+                        vv_r_key_t      r_key;
 
                         /* Voltaire stack already registers the whole
                          * memory, so use that API. */
-                        retval = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
-                                                      rx->rx_msg,
-                                                      IBNAL_MSG_SIZE,
-                                                      &mem_h,
-                                                      &rx->l_key,
-                                                      &r_key);
-                        if (retval) {
-                                CERROR("vv_get_gen_mr_attrib failed: %d", retval);
-                                /* TODO: free pages? */
-                                goto failed;
-                        }
+                        vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
+                                                    rx->rx_msg,
+                                                    IBNAL_MSG_SIZE,
+                                                    &mem_h,
+                                                    &rx->rx_lkey,
+                                                    &r_key);
+                        LASSERT (vvrc == vv_return_ok);
                 }
-                
+#else
+                rx->rx_vaddr = vaddr;
+#endif                
+                CDEBUG(D_NET, "Rx[%d] %p->%p[%x:"LPX64"]\n", i, rx, 
+                       rx->rx_msg, KIBNAL_RX_LKEY(rx), KIBNAL_RX_VADDR(rx));
+
                 vaddr += IBNAL_MSG_SIZE;
                 LASSERT (vaddr <= vaddr_base + IBNAL_RX_MSG_BYTES);
                 
@@ -682,47 +851,40 @@ kibnal_create_conn (void)
                 }
         }
 
-        qp_attr = (vv_qp_attr_t) {
-                .create.qp_type          = vv_qp_type_r_conn,
-                .create.cq_send_h        = kibnal_data.kib_cq,
-                .create.cq_receive_h     = kibnal_data.kib_cq,
-                .create.send_max_outstand_wr = IBNAL_TX_MAX_SG * 
-                                           IBNAL_MSG_QUEUE_SIZE,
-                .create.receive_max_outstand_wr = IBNAL_MSG_QUEUE_SIZE,
-                .create.max_scatgat_per_send_wr = 1,
-                .create.max_scatgat_per_receive_wr = 1,
-                .create.signaling_type   = vv_selectable_signaling, /* TODO: correct? */
-                .create.pd_h             = kibnal_data.kib_pd,
-                .create.recv_solicited_events = vv_signal_all,
-        };
-        retval = vv_qp_create(kibnal_data.kib_hca, &qp_attr, NULL,
-                              &conn->ibc_qp, &conn->ibc_qp_attrs);
-        if (retval != 0) {
-                CERROR ("Failed to create queue pair: %d\n", retval);
+        memset(&reqattr, 0, sizeof(reqattr));
+
+        reqattr.create.qp_type                    = vv_qp_type_r_conn;
+        reqattr.create.cq_send_h                  = kibnal_data.kib_cq;
+        reqattr.create.cq_receive_h               = kibnal_data.kib_cq;
+        reqattr.create.send_max_outstand_wr       = (1 + IBNAL_MAX_RDMA_FRAGS) * 
+                                                    IBNAL_MSG_QUEUE_SIZE;
+        reqattr.create.receive_max_outstand_wr    = IBNAL_RX_MSGS;
+        reqattr.create.max_scatgat_per_send_wr    = 1;
+        reqattr.create.max_scatgat_per_receive_wr = 1;
+        reqattr.create.signaling_type             = vv_selectable_signaling;
+        reqattr.create.pd_h                       = kibnal_data.kib_pd;
+        reqattr.create.recv_solicited_events      = vv_selectable_signaling; // vv_signal_all;
+
+        vvrc = vv_qp_create(kibnal_data.kib_hca, &reqattr, NULL,
+                            &conn->ibc_qp, &rspattr);
+        if (vvrc != vv_return_ok) {
+                CERROR ("Failed to create queue pair: %d\n", vvrc);
                 goto failed;
         }
 
         /* Mark QP created */
-        conn->ibc_state = IBNAL_CONN_INIT_QP;
-
-        qp_attr = (vv_qp_attr_t) {
-                .modify.qp_modify_into_state = vv_qp_state_init,
-                .modify.vv_qp_attr_mask      = VV_QP_AT_STATE | VV_QP_AT_PHY_PORT_NUM | VV_QP_AT_P_KEY_IX | VV_QP_AT_ACCESS_CON_F,
-                .modify.qp_type              = vv_qp_type_r_conn,
-
-                .modify.params.init.p_key_indx      = 0,
-                .modify.params.init.phy_port_num    = kibnal_data.kib_port,
-                .modify.params.init.access_control  = vv_acc_r_mem_write | vv_acc_r_mem_read,
-        };
-        retval = vv_qp_modify(kibnal_data.kib_hca, conn->ibc_qp, &qp_attr, &conn->ibc_qp_attrs);
-        if (retval != 0) {
-                CERROR ("Failed to modify queue pair: %d\n", retval);
-                goto failed;
-        }
-
-        retval = vv_qp_query(kibnal_data.kib_hca, conn->ibc_qp, &qp_context, &conn->ibc_qp_attrs);
-        if (retval) {
-                CERROR ("Failed to query queue pair: %d\n", retval);
+        conn->ibc_state = IBNAL_CONN_INIT;
+        conn->ibc_connvars->cv_local_qpn = rspattr.create_return.qp_num;
+
+        if (rspattr.create_return.receive_max_outstand_wr < 
+            IBNAL_MSG_QUEUE_SIZE ||
+            rspattr.create_return.send_max_outstand_wr < 
+            (1 + IBNAL_MAX_RDMA_FRAGS) * IBNAL_MSG_QUEUE_SIZE) {
+                CERROR("Insufficient rx/tx work items: wanted %d/%d got %d/%d\n",
+                       IBNAL_MSG_QUEUE_SIZE, 
+                       (1 + IBNAL_MAX_RDMA_FRAGS) * IBNAL_MSG_QUEUE_SIZE,
+                       rspattr.create_return.receive_max_outstand_wr,
+                       rspattr.create_return.send_max_outstand_wr);
                 goto failed;
         }
 
@@ -738,91 +900,63 @@ kibnal_create_conn (void)
 void
 kibnal_destroy_conn (kib_conn_t *conn)
 {
-        vv_return_t retval;
+        vv_return_t vvrc;
+
+        /* Only the connd does this (i.e. single threaded) */
+        LASSERT (!in_interrupt());
+        LASSERT (current == kibnal_data.kib_connd);
         
         CDEBUG (D_NET, "connection %p\n", conn);
 
         LASSERT (atomic_read (&conn->ibc_refcount) == 0);
+        LASSERT (list_empty(&conn->ibc_early_rxs));
         LASSERT (list_empty(&conn->ibc_tx_queue));
         LASSERT (list_empty(&conn->ibc_active_txs));
         LASSERT (conn->ibc_nsends_posted == 0);
-        LASSERT (conn->ibc_connreq == NULL);
 
         switch (conn->ibc_state) {
+        default:
+                /* conn must be completely disengaged from the network */
+                LBUG();
+
         case IBNAL_CONN_DISCONNECTED:
-                /* called after connection sequence initiated */
+                /* connvars should have been freed already */
+                LASSERT (conn->ibc_connvars == NULL);
                 /* fall through */
 
-        case IBNAL_CONN_INIT_QP:
-                /* _destroy includes an implicit Reset of the QP which 
-                 * discards posted work */
-                retval = vv_qp_destroy(kibnal_data.kib_hca, conn->ibc_qp);
-                if (retval)
-                        CERROR("Can't destroy QP: %d\n", retval);
+        case IBNAL_CONN_INIT:
+                kibnal_set_qp_state(conn, vv_qp_state_reset);
+                vvrc = vv_qp_destroy(kibnal_data.kib_hca, conn->ibc_qp);
+                if (vvrc != vv_return_ok)
+                        CERROR("Can't destroy QP: %d\n", vvrc);
                 /* fall through */
                 
         case IBNAL_CONN_INIT_NOTHING:
                 break;
-
-        default:
-                LASSERT (0);
-        }
-
-        if (conn->ibc_cep != NULL) {
-                retval = cm_destroy_cep(conn->ibc_cep);
-                if (retval)
-                        CERROR("Can't destroy CEP %p: %d\n", conn->ibc_cep, 
-                               retval);
         }
 
         if (conn->ibc_rx_pages != NULL) 
                 kibnal_free_pages(conn->ibc_rx_pages);
-        
+
         if (conn->ibc_rxs != NULL)
                 PORTAL_FREE(conn->ibc_rxs, 
                             IBNAL_RX_MSGS * sizeof(kib_rx_t));
 
+        if (conn->ibc_connvars != NULL)
+                PORTAL_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
+
         if (conn->ibc_peer != NULL)
-                kib_peer_decref(conn->ibc_peer);
+                kibnal_peer_decref(conn->ibc_peer);
+
+        vvrc = cm_destroy_cep(conn->ibc_cep);
+        LASSERT (vvrc == vv_return_ok);
 
         PORTAL_FREE(conn, sizeof (*conn));
 
         atomic_dec(&kibnal_data.kib_nconns);
-        
-        if (atomic_read (&kibnal_data.kib_nconns) == 0 &&
-            kibnal_data.kib_shutdown) {
-                /* I just nuked the last connection on shutdown; wake up
-                 * everyone so they can exit. */
-                wake_up_all(&kibnal_data.kib_sched_waitq);
-                wake_up_all(&kibnal_data.kib_connd_waitq);
-        }
 }
 
-void
-kibnal_put_conn (kib_conn_t *conn)
-{
-        unsigned long flags;
-
-        CDEBUG (D_NET, "putting conn[%p] state %d -> "LPX64" (%d)\n",
-                conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                atomic_read (&conn->ibc_refcount));
-
-        LASSERT (atomic_read (&conn->ibc_refcount) > 0);
-        if (!atomic_dec_and_test (&conn->ibc_refcount))
-                return;
-
-        /* must disconnect before dropping the final ref */
-        LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECTED);
-
-        spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
-
-        list_add (&conn->ibc_list, &kibnal_data.kib_connd_conns);
-        wake_up (&kibnal_data.kib_connd_waitq);
-
-        spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-}
-
-static int
+int
 kibnal_close_peer_conns_locked (kib_peer_t *peer, int why)
 {
         kib_conn_t         *conn;
@@ -864,19 +998,19 @@ kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
         return (count);
 }
 
-static int
+int
 kibnal_close_matching_conns (ptl_nid_t nid)
 {
-        unsigned long       flags;
         kib_peer_t         *peer;
         struct list_head   *ptmp;
         struct list_head   *pnxt;
         int                 lo;
         int                 hi;
         int                 i;
+        unsigned long       flags;
         int                 count = 0;
 
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
+        write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
 
         if (nid != PTL_NID_ANY)
                 lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
@@ -900,7 +1034,7 @@ kibnal_close_matching_conns (ptl_nid_t nid)
                 }
         }
 
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
+        write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
 
         /* wildcards always succeed */
         if (nid == PTL_NID_ANY)
@@ -909,31 +1043,32 @@ kibnal_close_matching_conns (ptl_nid_t nid)
         return (count == 0 ? -ENOENT : 0);
 }
 
-static int
+int
 kibnal_cmd(struct portals_cfg *pcfg, void * private)
 {
         int rc = -EINVAL;
-        ENTRY;
 
         LASSERT (pcfg != NULL);
 
         switch(pcfg->pcfg_command) {
         case NAL_CMD_GET_PEER: {
                 ptl_nid_t   nid = 0;
+                __u32       ip = 0;
                 int         share_count = 0;
 
                 rc = kibnal_get_peer_info(pcfg->pcfg_count,
-                                          &nid, &share_count);
+                                          &nid, &ip, &share_count);
                 pcfg->pcfg_nid   = nid;
                 pcfg->pcfg_size  = 0;
-                pcfg->pcfg_id    = 0;
-                pcfg->pcfg_misc  = 0;
+                pcfg->pcfg_id    = ip;
+                pcfg->pcfg_misc  = IBNAL_SERVICE_NUMBER; /* port */
                 pcfg->pcfg_count = 0;
                 pcfg->pcfg_wait  = share_count;
                 break;
         }
         case NAL_CMD_ADD_PEER: {
-                rc = kibnal_add_persistent_peer (pcfg->pcfg_nid);
+                rc = kibnal_add_persistent_peer (pcfg->pcfg_nid,
+                                                 pcfg->pcfg_id); /* IP */
                 break;
         }
         case NAL_CMD_DEL_PEER: {
@@ -953,7 +1088,7 @@ kibnal_cmd(struct portals_cfg *pcfg, void * private)
                         pcfg->pcfg_id    = 0;
                         pcfg->pcfg_misc  = 0;
                         pcfg->pcfg_flags = 0;
-                        kibnal_put_conn (conn);
+                        kibnal_conn_decref(conn);
                 }
                 break;
         }
@@ -970,20 +1105,21 @@ kibnal_cmd(struct portals_cfg *pcfg, void * private)
         }
         }
 
-        RETURN(rc);
+        return rc;
 }
 
 void
 kibnal_free_pages (kib_pages_t *p)
 {
-        int     npages = p->ibp_npages;
-        vv_return_t retval;
-        int     i;
+        int         npages = p->ibp_npages;
+        vv_return_t vvrc;
+        int         i;
         
         if (p->ibp_mapped) {
-                retval = vv_mem_region_destroy(kibnal_data.kib_hca, p->ibp_handle);
-                if (retval != 0)
-                        CERROR ("Deregister error: %d\n", retval);
+                vvrc = vv_mem_region_destroy(kibnal_data.kib_hca, 
+                                             p->ibp_handle);
+                if (vvrc != vv_return_ok)
+                        CERROR ("Deregister error: %d\n", vvrc);
         }
         
         for (i = 0; i < npages; i++)
@@ -997,10 +1133,13 @@ int
 kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write)
 {
         kib_pages_t   *p;
-        vv_phy_list_t  phys_pages;
-        vv_phy_buf_t  *phys_buf;
         int            i;
-        vv_return_t    retval;
+#if !IBNAL_WHOLE_MEM
+        vv_phy_list_t            vv_phys;
+        vv_phy_buf_t            *phys_pages;
+        vv_return_t              vvrc;
+        vv_access_con_bit_mask_t access;
+#endif
 
         PORTAL_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
         if (p == NULL) {
@@ -1020,57 +1159,124 @@ kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write)
                 }
         }
 
-        if (kibnal_whole_mem())
-                goto out;
-
-        PORTAL_ALLOC(phys_buf, npages * sizeof(vv_phy_buf_t));
-        if (phys_buf == NULL) {
-                CERROR ("Can't allocate phys_buf for %d pages\n", npages);
-                /* XXX free ibp_pages? */
+#if !IBNAL_WHOLE_MEM
+        PORTAL_ALLOC(phys_pages, npages * sizeof(*phys_pages));
+        if (phys_pages == NULL) {
+                CERROR ("Can't allocate physarray for %d pages\n", npages);
                 kibnal_free_pages(p);
                 return (-ENOMEM);
         }
 
-        phys_pages.number_of_buff = npages;
-        phys_pages.phy_list = phys_buf;
+        vv_phys.number_of_buff = npages;
+        vv_phys.phy_list = phys_pages;
 
-        /* if we were using the _contig_ registration variant we would have
-         * an array of PhysAddr/Length pairs, but the discontiguous variant
-         * just takes the PhysAddr */
         for (i = 0; i < npages; i++) {
-                phys_buf[i].start = kibnal_page2phys(p->ibp_pages[i]);
-                phys_buf[i].size = PAGE_SIZE;
-        }
-
-        retval = vv_phy_mem_region_register(kibnal_data.kib_hca,
-                                            &phys_pages,
-                                            0, /* requested vaddr */
-                                            npages * PAGE_SIZE,
-                                            0, /* offset */
-                                            kibnal_data.kib_pd,
-                                            vv_acc_l_mem_write | vv_acc_r_mem_write | vv_acc_r_mem_read | vv_acc_mem_bind, /* TODO: translated as-is, but seems incorrect or too much */
-                                            &p->ibp_handle, &p->ibp_vaddr,                                           
-                                            &p->ibp_lkey, &p->ibp_rkey);
+                phys_pages[i].size = PAGE_SIZE;
+                phys_pages[i].start = 
+                        kibnal_page2phys(p->ibp_pages[i]);
+        }
+
+        VV_ACCESS_CONTROL_MASK_SET_ALL(access);
+        
+        vvrc = vv_phy_mem_region_register(kibnal_data.kib_hca,
+                                          &vv_phys,
+                                          0, /* requested vaddr */
+                                          npages * PAGE_SIZE, 0, /* offset */
+                                          kibnal_data.kib_pd,
+                                          access,
+                                          &p->ibp_handle, 
+                                          &p->ibp_vaddr,                                           
+                                          &p->ibp_lkey, 
+                                          &p->ibp_rkey);
         
-        PORTAL_FREE(phys_buf, npages * sizeof(vv_phy_buf_t));
+        PORTAL_FREE(phys_pages, npages * sizeof(*phys_pages));
         
-        if (retval) {
-                CERROR ("Error %d mapping %d pages\n", retval, npages);
+        if (vvrc != vv_return_ok) {
+                CERROR ("Error %d mapping %d pages\n", vvrc, npages);
                 kibnal_free_pages(p);
-                return (-ENOMEM);
+                return (-EFAULT);
         }
 
         CDEBUG(D_NET, "registered %d pages; handle: %x vaddr "LPX64" "
-                      "lkey %x rkey %x\n", npages, p->ibp_handle,
-                      p->ibp_vaddr, p->ibp_lkey, p->ibp_rkey);
+               "lkey %x rkey %x\n", npages, p->ibp_handle,
+               p->ibp_vaddr, p->ibp_lkey, p->ibp_rkey);
         
         p->ibp_mapped = 1;
-out:
+#endif
         *pp = p;
         return (0);
 }
 
-static int
+int
+kibnal_alloc_tx_descs (void) 
+{
+        int    i;
+        
+        PORTAL_ALLOC (kibnal_data.kib_tx_descs,
+                      IBNAL_TX_MSGS * sizeof(kib_tx_t));
+        if (kibnal_data.kib_tx_descs == NULL)
+                return -ENOMEM;
+        
+        memset(kibnal_data.kib_tx_descs, 0,
+               IBNAL_TX_MSGS * sizeof(kib_tx_t));
+
+        for (i = 0; i < IBNAL_TX_MSGS; i++) {
+                kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
+
+                PORTAL_ALLOC(tx->tx_wrq, 
+                             (1 + IBNAL_MAX_RDMA_FRAGS) * 
+                             sizeof(*tx->tx_wrq));
+                if (tx->tx_wrq == NULL)
+                        return -ENOMEM;
+                
+                PORTAL_ALLOC(tx->tx_gl, 
+                             (1 + IBNAL_MAX_RDMA_FRAGS) * 
+                             sizeof(*tx->tx_gl));
+                if (tx->tx_gl == NULL)
+                        return -ENOMEM;
+                
+                PORTAL_ALLOC(tx->tx_rd, 
+                             offsetof(kib_rdma_desc_t, 
+                                      rd_frags[IBNAL_MAX_RDMA_FRAGS]));
+                if (tx->tx_rd == NULL)
+                        return -ENOMEM;
+        }
+
+        return 0;
+}
+
+void
+kibnal_free_tx_descs (void) 
+{
+        int    i;
+
+        if (kibnal_data.kib_tx_descs == NULL)
+                return;
+
+        for (i = 0; i < IBNAL_TX_MSGS; i++) {
+                kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
+
+                if (tx->tx_wrq != NULL)
+                        PORTAL_FREE(tx->tx_wrq, 
+                                    (1 + IBNAL_MAX_RDMA_FRAGS) * 
+                                    sizeof(*tx->tx_wrq));
+
+                if (tx->tx_gl != NULL)
+                        PORTAL_FREE(tx->tx_gl, 
+                                    (1 + IBNAL_MAX_RDMA_FRAGS) * 
+                                    sizeof(*tx->tx_gl));
+
+                if (tx->tx_rd != NULL)
+                        PORTAL_FREE(tx->tx_rd, 
+                                    offsetof(kib_rdma_desc_t, 
+                                             rd_frags[IBNAL_MAX_RDMA_FRAGS]));
+        }
+
+        PORTAL_FREE(kibnal_data.kib_tx_descs,
+                    IBNAL_TX_MSGS * sizeof(kib_tx_t));
+}
+
+int
 kibnal_setup_tx_descs (void)
 {
         int           ipage = 0;
@@ -1083,10 +1289,10 @@ kibnal_setup_tx_descs (void)
         int           rc;
 
         /* pre-mapped messages are not bigger than 1 page */
-        LASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
+        CLASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
 
         /* No fancy arithmetic when we do the buffer calculations */
-        LASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
+        CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
 
         rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, IBNAL_TX_MSG_PAGES, 
                                 0);
@@ -1100,35 +1306,32 @@ kibnal_setup_tx_descs (void)
                 page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
                 tx = &kibnal_data.kib_tx_descs[i];
 
-                memset (tx, 0, sizeof(*tx));    /* zero flags etc */
-                
                 tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + 
                                            page_offset);
-
-                if (kibnal_whole_mem()) {
-                        void *newaddr;
-                        vv_mem_reg_h_t mem_h;
-                        vv_return_t  retval;
+#if IBNAL_WHOLE_MEM
+                {
+                        vv_mem_reg_h_t  mem_h;
+                        vv_r_key_t      rkey;
+                        vv_return_t     vvrc;
 
                         /* Voltaire stack already registers the whole
                          * memory, so use that API. */
-                        retval = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
-                                                      tx->tx_msg,
-                                                      IBNAL_MSG_SIZE,
-                                                      &mem_h,
-                                                      &tx->l_key,
-                                                      &tx->r_key);
-                        if (retval) {
-                                CERROR("vv_get_gen_mr_attrib failed: %d", retval);
-                                /* TODO: free pages? */
-                                /* TODO: return. */
-                        }
+                        vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
+                                                    tx->tx_msg,
+                                                    IBNAL_MSG_SIZE,
+                                                    &mem_h,
+                                                    &tx->tx_lkey,
+                                                    &rkey);
+                        LASSERT (vvrc == vv_return_ok);
                 }
-
+#else
+                tx->tx_vaddr = vaddr;
+#endif
                 tx->tx_isnblk = (i >= IBNAL_NTX);
                 tx->tx_mapped = KIB_TX_UNMAPPED;
 
-                CDEBUG(D_NET, "Tx[%d] %p->%p\n", i, tx, tx->tx_msg);
+                CDEBUG(D_NET, "Tx[%d] %p->%p[%x:"LPX64"]\n", i, tx, 
+                       tx->tx_msg, KIBNAL_TX_LKEY(tx), KIBNAL_TX_VADDR(tx));
 
                 if (tx->tx_isnblk)
                         list_add (&tx->tx_list, 
@@ -1153,12 +1356,11 @@ kibnal_setup_tx_descs (void)
         return (0);
 }
 
-static void
+void
 kibnal_api_shutdown (nal_t *nal)
 {
-        int   i;
-        int   rc;
-        vv_return_t retval;
+        int         i;
+        vv_return_t vvrc;
 
         if (nal->nal_refct != 0) {
                 /* This module got the first ref */
@@ -1178,16 +1380,16 @@ kibnal_api_shutdown (nal_t *nal)
                 libcfs_nal_cmd_unregister(VIBNAL);
                 /* No new peers */
 
-                /* resetting my NID to unadvertises me, removes my
-                 * listener and nukes all current peers */
+                /* resetting my NID removes my listener and nukes all current
+                 * peers and their connections */
                 kibnal_set_mynid (PTL_NID_ANY);
 
-                /* Wait for all peer state to clean up (crazy) */
+                /* Wait for all peer state to clean up */
                 i = 2;
                 while (atomic_read (&kibnal_data.kib_npeers) != 0) {
                         i++;
                         CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-                               "waiting for %d peers to disconnect (can take a few seconds)\n",
+                               "waiting for %d peers to disconnect\n",
                                atomic_read (&kibnal_data.kib_npeers));
                         set_current_state (TASK_UNINTERRUPTIBLE);
                         schedule_timeout (HZ);
@@ -1195,56 +1397,36 @@ kibnal_api_shutdown (nal_t *nal)
                 /* fall through */
 
         case IBNAL_INIT_CQ:
-                retval = vv_cq_destroy(kibnal_data.kib_hca, kibnal_data.kib_cq);
-                if (retval)
-                        CERROR ("Destroy CQ error: %d\n", retval);
+                vvrc = vv_cq_destroy(kibnal_data.kib_hca, kibnal_data.kib_cq);
+                if (vvrc != vv_return_ok)
+                        CERROR ("Destroy CQ error: %d\n", vvrc);
                 /* fall through */
 
         case IBNAL_INIT_TXD:
                 kibnal_free_pages (kibnal_data.kib_tx_pages);
                 /* fall through */
 
-#if IBNAL_FMR
-        case IBNAL_INIT_FMR:
-                rc = ib_fmr_pool_destroy (kibnal_data.kib_fmr_pool);
-                if (rc != 0)
-                        CERROR ("Destroy FMR pool error: %d\n", rc);
-                /* fall through */
-#endif
         case IBNAL_INIT_PD:
-#if IBNAL_WHOLE_MEM==0
-                retval = vv_pd_deallocate(kibnal_data.kib_hca, kibnal_data.kib_pd);
-                if (retval != 0)
-                        CERROR ("Destroy PD error: %d\n", retval);
+#if !IBNAL_WHOLE_MEM
+                vvrc = vv_pd_deallocate(kibnal_data.kib_hca,
+                                        kibnal_data.kib_pd);
+                if (vvrc != vv_return_ok)
+                        CERROR ("Destroy PD error: %d\n", vvrc);
 #endif
                 /* fall through */
 
-        case IBNAL_INIT_GSI:
-                retval = gsi_deregister_class(kibnal_data.gsi_handle);
-                if (retval != 0)
-                        CERROR ("GSI deregister failed: %d\n", retval);
-                /* fall through */
-
-        case IBNAL_INIT_GSI_POOL:
-                gsi_dtgrm_pool_destroy(kibnal_data.gsi_pool_handle);
-                /* fall through */
-
-        case IBNAL_INIT_PORT:
-                /* XXX ??? */
-                /* fall through */
-
         case IBNAL_INIT_ASYNC:
-                retval = vv_dell_async_event_cb (kibnal_data.kib_hca,
-                                                 kibnal_ca_async_callback);
-                if (retval)
-                        CERROR("deregister asynchronous call back error: %d\n", retval);
+                vvrc = vv_dell_async_event_cb (kibnal_data.kib_hca,
+                                              kibnal_async_callback);
+                if (vvrc != vv_return_ok)
+                        CERROR("vv_dell_async_event_cb error: %d\n", vvrc);
                         
                 /* fall through */
 
         case IBNAL_INIT_HCA:
-                retval = vv_hca_close(kibnal_data.kib_hca);
-                if (retval != 0)
-                        CERROR ("Close HCA  error: %d\n", retval);
+                vvrc = vv_hca_close(kibnal_data.kib_hca);
+                if (vvrc != vv_return_ok)
+                        CERROR ("Close HCA  error: %d\n", vvrc);
                 /* fall through */
 
         case IBNAL_INIT_LIB:
@@ -1252,8 +1434,6 @@ kibnal_api_shutdown (nal_t *nal)
                 /* fall through */
 
         case IBNAL_INIT_DATA:
-                /* Module refcount only gets to zero when all peers
-                 * have been closed so all lists must be empty */
                 LASSERT (atomic_read (&kibnal_data.kib_npeers) == 0);
                 LASSERT (kibnal_data.kib_peers != NULL);
                 for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
@@ -1262,7 +1442,9 @@ kibnal_api_shutdown (nal_t *nal)
                 LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0);
                 LASSERT (list_empty (&kibnal_data.kib_sched_rxq));
                 LASSERT (list_empty (&kibnal_data.kib_sched_txq));
+                LASSERT (list_empty (&kibnal_data.kib_connd_zombies));
                 LASSERT (list_empty (&kibnal_data.kib_connd_conns));
+                LASSERT (list_empty (&kibnal_data.kib_connd_pcreqs));
                 LASSERT (list_empty (&kibnal_data.kib_connd_peers));
 
                 /* flag threads to terminate; wake and wait for them to die */
@@ -1285,9 +1467,7 @@ kibnal_api_shutdown (nal_t *nal)
                 break;
         }
 
-        if (kibnal_data.kib_tx_descs != NULL)
-                PORTAL_FREE (kibnal_data.kib_tx_descs,
-                             IBNAL_TX_MSGS * sizeof(kib_tx_t));
+        kibnal_free_tx_descs();
 
         if (kibnal_data.kib_peers != NULL)
                 PORTAL_FREE (kibnal_data.kib_peers,
@@ -1302,32 +1482,18 @@ kibnal_api_shutdown (nal_t *nal)
         kibnal_data.kib_init = IBNAL_INIT_NOTHING;
 }
 
-#define roundup_power(val, power) \
-        ( (val + (__u64)(power - 1)) & ~((__u64)(power - 1)) )
-
-/* this isn't very portable or sturdy in the face of funny mem/bus configs */
-static __u64 max_phys_mem(void)
-{
-        struct sysinfo si;
-        __u64 ret;
-
-        si_meminfo(&si);
-        ret = (__u64)max(si.totalram, max_mapnr) * si.mem_unit;
-        return roundup_power(ret, 128 * 1024 * 1024);
-} 
-#undef roundup_power
-
-static int
+int
 kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                      ptl_ni_limits_t *requested_limits,
                      ptl_ni_limits_t *actual_limits)
 {
-        ptl_process_id_t    process_id;
-        int                 pkmem = atomic_read(&portal_kmemory);
-        int                 rc;
-        int                 i;
+        struct timeval            tv;
+        ptl_process_id_t          process_id;
+        int                       pkmem = atomic_read(&portal_kmemory);
+        int                       rc;
+        int                       i;
         vv_request_event_record_t req_er;
-        vv_return_t         retval;
+        vv_return_t               vvrc;
 
         LASSERT (nal == &kibnal_api);
 
@@ -1340,9 +1506,13 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         }
 
         LASSERT (kibnal_data.kib_init == IBNAL_INIT_NOTHING);
+        memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */
+        
+        do_gettimeofday(&tv);
+        kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
+        kibnal_data.kib_svc_id = IBNAL_SERVICE_NUMBER;
 
         init_MUTEX (&kibnal_data.kib_nid_mutex);
-        kibnal_data.kib_nid = PTL_NID_ANY;
 
         rwlock_init(&kibnal_data.kib_global_lock);
 
@@ -1357,7 +1527,9 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
 
         spin_lock_init (&kibnal_data.kib_connd_lock);
         INIT_LIST_HEAD (&kibnal_data.kib_connd_peers);
+        INIT_LIST_HEAD (&kibnal_data.kib_connd_pcreqs);
         INIT_LIST_HEAD (&kibnal_data.kib_connd_conns);
+        INIT_LIST_HEAD (&kibnal_data.kib_connd_zombies);
         init_waitqueue_head (&kibnal_data.kib_connd_waitq);
 
         spin_lock_init (&kibnal_data.kib_sched_lock);
@@ -1370,22 +1542,18 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         INIT_LIST_HEAD (&kibnal_data.kib_idle_nblk_txs);
         init_waitqueue_head(&kibnal_data.kib_idle_tx_waitq);
 
-        INIT_LIST_HEAD (&kibnal_data.gsi_pending);
-        init_MUTEX (&kibnal_data.gsi_mutex);
-
-        PORTAL_ALLOC (kibnal_data.kib_tx_descs,
-                      IBNAL_TX_MSGS * sizeof(kib_tx_t));
-        if (kibnal_data.kib_tx_descs == NULL) {
-                CERROR ("Can't allocate tx descs\n");
+        rc = kibnal_alloc_tx_descs();
+        if (rc != 0) {
+                CERROR("Can't allocate tx descs\n");
                 goto failed;
         }
-
+        
         /* lists/ptrs/locks initialised */
         kibnal_data.kib_init = IBNAL_INIT_DATA;
         /*****************************************************/
 
         process_id.pid = requested_pid;
-        process_id.nid = kibnal_data.kib_nid;
+        process_id.nid = PTL_NID_ANY;
         
         rc = lib_init(&kibnal_lib, nal, process_id,
                       requested_limits, actual_limits);
@@ -1399,7 +1567,7 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         /*****************************************************/
 
         for (i = 0; i < IBNAL_N_SCHED; i++) {
-                rc = kibnal_thread_start (kibnal_scheduler, (void *)i);
+                rc = kibnal_thread_start (kibnal_scheduler, (void *)((long)i));
                 if (rc != 0) {
                         CERROR("Can't spawn vibnal scheduler[%d]: %d\n",
                                i, rc);
@@ -1414,9 +1582,9 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         }
 
         /* TODO: apparently only one adapter is supported */
-        retval = vv_hca_open("ANY_HCA", NULL, &kibnal_data.kib_hca);
-        if (retval) {
-                CERROR ("Can't open CA: %d\n", retval);
+        vvrc = vv_hca_open("ANY_HCA", NULL, &kibnal_data.kib_hca);
+        if (vvrc != vv_return_ok) {
+                CERROR ("Can't open CA: %d\n", vvrc);
                 goto failed;
         }
 
@@ -1425,12 +1593,10 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
 
         /* register to get HCA's asynchronous events. */
         req_er.req_event_type = VV_ASYNC_EVENT_ALL_MASK;
-        retval = vv_set_async_event_cb (kibnal_data.kib_hca,
-                                        req_er,
-                                        kibnal_ca_async_callback);
-
-        if (retval) {
-                CERROR ("Can't open CA: %d\n", retval);
+        vvrc = vv_set_async_event_cb (kibnal_data.kib_hca, req_er,
+                                     kibnal_async_callback);
+        if (vvrc != vv_return_ok) {
+                CERROR ("Can't open CA: %d\n", vvrc);
                 goto failed; 
         }
 
@@ -1438,10 +1604,9 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
 
         /*****************************************************/
 
-        retval = vv_hca_query(kibnal_data.kib_hca,
-                             &kibnal_data.kib_hca_attrs);
-        if (retval) {
-                CERROR ("Can't size port attrs: %d\n", retval);
+        vvrc = vv_hca_query(kibnal_data.kib_hca, &kibnal_data.kib_hca_attrs);
+        if (vvrc != vv_return_ok) {
+                CERROR ("Can't size port attrs: %d\n", vvrc);
                 goto failed;
         }
 
@@ -1453,9 +1618,10 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                 u_int32_t tbl_count;
                 vv_port_attrib_t *pattr = &kibnal_data.kib_port_attr;
 
-                retval = vv_port_query(kibnal_data.kib_hca, port_num, pattr);
-                if (retval) {
-                        CERROR("vv_port_query failed for port %d: %d\n", port_num, retval);
+                vvrc = vv_port_query(kibnal_data.kib_hca, port_num, pattr);
+                if (vvrc != vv_return_ok) {
+                        CERROR("vv_port_query failed for port %d: %d\n",
+                               port_num, vvrc);
                         continue;
                 }
 
@@ -1476,16 +1642,22 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                         kibnal_data.kib_port = port_num;
                         
                         tbl_count = 1;
-                        retval = vv_get_port_gid_tbl(kibnal_data.kib_hca, port_num, &tbl_count, &kibnal_data.kib_port_gid);
-                        if (retval) {
-                                CERROR("vv_get_port_gid_tbl failed for port %d: %d\n", port_num, retval);
+                        vvrc = vv_get_port_gid_tbl(kibnal_data.kib_hca, 
+                                                   port_num, &tbl_count,
+                                                   &kibnal_data.kib_port_gid);
+                        if (vvrc != vv_return_ok) {
+                                CERROR("vv_get_port_gid_tbl failed "
+                                       "for port %d: %d\n", port_num, vvrc);
                                 continue;
                         }
 
                         tbl_count = 1;
-                        retval = vv_get_port_partition_tbl (kibnal_data.kib_hca, port_num, &tbl_count, &kibnal_data.kib_port_pkey);
-                        if (retval) {
-                                CERROR("vv_get_port_partition_tbl failed for port %d: %d\n", port_num, retval);
+                        vvrc = vv_get_port_partition_tbl(kibnal_data.kib_hca, 
+                                                        port_num, &tbl_count,
+                                                        &kibnal_data.kib_port_pkey);
+                        if (vvrc != vv_return_ok) {
+                                CERROR("vv_get_port_partition_tbl failed "
+                                       "for port %d: %d\n", port_num, vvrc);
                                 continue;
                         }
 
@@ -1505,45 +1677,19 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         }
 
         CDEBUG(D_NET, "Using port %d - GID="LPX64":"LPX64"\n",
-               kibnal_data.kib_port, kibnal_data.kib_port_gid.scope.g.subnet, kibnal_data.kib_port_gid.scope.g.eui64);
-        CDEBUG(D_NET, "got guid "LPX64"\n", cpu_to_le64(kibnal_data.kib_port_gid.scope.g.eui64));
+               kibnal_data.kib_port, 
+               kibnal_data.kib_port_gid.scope.g.subnet, 
+               kibnal_data.kib_port_gid.scope.g.eui64);
         
-        /* Active port found */
-        kibnal_data.kib_init = IBNAL_INIT_PORT;
         /*****************************************************/
 
-        /* Prepare things to be able to send/receive MADS */
-        retval = gsi_dtgrm_pool_create(IBNAL_CONCURRENT_PEERS, &kibnal_data.gsi_pool_handle);
-        if (retval) {
-                CERROR("Could not create GSI pool: %d\n", retval);
-                goto failed;
-        }
-        kibnal_data.kib_init = IBNAL_INIT_GSI_POOL;
-
-        retval = gsi_register_class(MAD_CLASS_SUBN_ADM, /* TODO: correct? */
-                                2,     /* version */
-                                "ANY_HCA",
-#ifdef GSI_PASS_PORT_NUM
-                                kibnal_data.kib_port,
-#endif                   
-                                0, 0,
-                                vibnal_mad_sent_cb,    vibnal_mad_received_cb,
-                                NULL, &kibnal_data.gsi_handle);
-        if (retval) {
-                CERROR("Cannot register GSI class: %d\n", retval);
-                goto failed;
-        }
-
-        kibnal_data.kib_init = IBNAL_INIT_GSI;
-        /*****************************************************/
-
-#if IBNAL_WHOLE_MEM==0
-        retval = vv_pd_allocate(kibnal_data.kib_hca, &kibnal_data.kib_pd);
+#if !IBNAL_WHOLE_MEM
+        vvrc = vv_pd_allocate(kibnal_data.kib_hca, &kibnal_data.kib_pd);
 #else
-        retval = vv_get_gen_pd_h(kibnal_data.kib_hca, &kibnal_data.kib_pd);
+        vvrc = vv_get_gen_pd_h(kibnal_data.kib_hca, &kibnal_data.kib_pd);
 #endif
-        if (retval) {
-                CERROR ("Can't create PD: %d\n", retval);
+        if (vvrc != 0) {
+                CERROR ("Can't create PD: %d\n", vvrc);
                 goto failed;
         }
         
@@ -1551,35 +1697,6 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         kibnal_data.kib_init = IBNAL_INIT_PD;
         /*****************************************************/
 
-#if IBNAL_FMR
-        {
-                const int pool_size = IBNAL_NTX + IBNAL_NTX_NBLK;
-                struct ib_fmr_pool_param params = {
-                        .max_pages_per_fmr = PTL_MTU/PAGE_SIZE,
-                        .access            = (IB_ACCESS_LOCAL_WRITE |
-                                              IB_ACCESS_REMOTE_WRITE |
-                                              IB_ACCESS_REMOTE_READ),
-                        .pool_size         = pool_size,
-                        .dirty_watermark   = (pool_size * 3)/4,
-                        .flush_function    = NULL,
-                        .flush_arg         = NULL,
-                        .cache             = 1,
-                };
-                rc = ib_fmr_pool_create(kibnal_data.kib_pd, &params,
-                                        &kibnal_data.kib_fmr_pool);
-                if (rc != 0) {
-                        CERROR ("Can't create FMR pool size %d: %d\n", 
-                                pool_size, rc);
-                        goto failed;
-                }
-        }
-
-        /* flag FMR pool initialised */
-        kibnal_data.kib_init = IBNAL_INIT_FMR;
-#endif
-
-        /*****************************************************/
-
         rc = kibnal_setup_tx_descs();
         if (rc != 0) {
                 CERROR ("Can't register tx descs: %d\n", rc);
@@ -1592,12 +1709,12 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         {
                 uint32_t nentries;
 
-                retval = vv_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES,
-                                      kibnal_ca_callback, 
-                                      NULL, /* context */
-                                      &kibnal_data.kib_cq, &nentries);
-                if (retval) {
-                        CERROR ("Can't create RX CQ: %d\n", retval);
+                vvrc = vv_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES,
+                                    kibnal_cq_callback, 
+                                    NULL, /* context */
+                                    &kibnal_data.kib_cq, &nentries);
+                if (vvrc != 0) {
+                        CERROR ("Can't create RX CQ: %d\n", vvrc);
                         goto failed;
                 }
 
@@ -1610,8 +1727,10 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                         goto failed;
                 }
 
-                retval = vv_request_completion_notification(kibnal_data.kib_hca, kibnal_data.kib_cq, vv_next_solicit_unsolicit_event);
-                if (retval != 0) {
+                vvrc = vv_request_completion_notification(kibnal_data.kib_hca, 
+                                                          kibnal_data.kib_cq, 
+                                                          vv_next_solicit_unsolicit_event);
+                if (vvrc != 0) {
                         CERROR ("Failed to re-arm completion queue: %d\n", rc);
                         goto failed;
                 }
@@ -1657,16 +1776,17 @@ kibnal_module_init (void)
 {
         int    rc;
 
-        if (sizeof(kib_wire_connreq_t) > cm_REQ_priv_data_len) {
-                CERROR("sizeof(kib_wire_connreq_t) > cm_REQ_priv_data_len\n");
-                return -EINVAL;
-        }
-
+        CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t) 
+                  <= cm_REQ_priv_data_len);
+        CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t) 
+                  <= cm_REP_priv_data_len);
+        CLASSERT (offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[IBNAL_MAX_RDMA_FRAGS])
+                  <= IBNAL_MSG_SIZE);
+        CLASSERT (offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[IBNAL_MAX_RDMA_FRAGS])
+                  <= IBNAL_MSG_SIZE);
+        
         /* the following must be sizeof(int) for proc_dointvec() */
-        if (sizeof (kibnal_tunables.kib_io_timeout) != sizeof (int)) {
-                CERROR("sizeof (kibnal_tunables.kib_io_timeout) != sizeof (int)\n");
-                return -EINVAL;
-        }
+        CLASSERT (sizeof (kibnal_tunables.kib_io_timeout) == sizeof (int));
 
         kibnal_api.nal_ni_init = kibnal_api_startup;
         kibnal_api.nal_ni_fini = kibnal_api_shutdown;