Whamcloud - gitweb
Revert "b20288 fix a deadlock in kiblnd_check_conns i=isaac i=maxim"
[fs/lustre-release.git] / lnet / klnds / viblnd / viblnd.c
index b89fc11..9d904c4 100644 (file)
@@ -1,62 +1,68 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *   Author: Frank Zago <fzago@systemfabricworks.com>
+ * GPL HEADER START
  *
- *   This file is part of Lustre, http://www.lustre.org.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
  *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/klnds/viblnd/viblnd.c
+ *
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ * Author: Frank Zago <fzago@systemfabricworks.com>
  */
 
-#include "vibnal.h"
-
-nal_t                   kibnal_api;
-ptl_handle_ni_t         kibnal_ni;
-kib_data_t              kibnal_data;
-kib_tunables_t          kibnal_tunables;
-
-#ifdef CONFIG_SYSCTL
-#define IBNAL_SYSCTL             202
-
-#define IBNAL_SYSCTL_TIMEOUT     1
+#include "viblnd.h"
 
-static ctl_table kibnal_ctl_table[] = {
-        {IBNAL_SYSCTL_TIMEOUT, "timeout", 
-         &kibnal_tunables.kib_io_timeout, sizeof (int),
-         0644, NULL, &proc_dointvec},
-        { 0 }
+lnd_t the_kiblnd = {
+        .lnd_type       = VIBLND,
+        .lnd_startup    = kibnal_startup,
+        .lnd_shutdown   = kibnal_shutdown,
+        .lnd_ctl        = kibnal_ctl,
+        .lnd_send       = kibnal_send,
+        .lnd_recv       = kibnal_recv,
+        .lnd_eager_recv = kibnal_eager_recv,
 };
 
-static ctl_table kibnal_top_ctl_table[] = {
-        {IBNAL_SYSCTL, "vibnal", NULL, 0, 0555, kibnal_ctl_table},
-        { 0 }
-};
-#endif
+kib_data_t              kibnal_data;
 
 void vibnal_assert_wire_constants (void)
 {
         /* Wire protocol assertions generated by 'wirecheck'
-         * running on Linux robert.bartonsoftware.com 2.6.5-1.358 #1 Sat May 8 09:04:50 EDT 2004 i686
-         * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
+         * running on Linux robert 2.6.11-1.27_FC3 #1 Tue May 17 20:27:37 EDT 2005 i686 athlon i386 G
+         * with gcc version 3.4.3 20050227 (Red Hat 3.4.3-22.fc3) */
 
 
         /* Constants... */
         CLASSERT (IBNAL_MSG_MAGIC == 0x0be91b91);
-        CLASSERT (IBNAL_MSG_VERSION == 5);
+        CLASSERT (IBNAL_MSG_VERSION == 0x11);
         CLASSERT (IBNAL_MSG_CONNREQ == 0xc0);
         CLASSERT (IBNAL_MSG_CONNACK == 0xc1);
         CLASSERT (IBNAL_MSG_NOOP == 0xd0);
@@ -83,24 +89,16 @@ void vibnal_assert_wire_constants (void)
         CLASSERT ((int)sizeof(((kib_immediate_msg_t *)0)->ibim_hdr) == 72);
         CLASSERT ((int)offsetof(kib_immediate_msg_t, ibim_payload[13]) == 85);
         CLASSERT ((int)sizeof(((kib_immediate_msg_t *)0)->ibim_payload[13]) == 1);
-
-        /* Checks for struct kib_rdma_frag_t */
-        CLASSERT ((int)sizeof(kib_rdma_frag_t) == 12);
-        CLASSERT ((int)offsetof(kib_rdma_frag_t, rf_nob) == 0);
-        CLASSERT ((int)sizeof(((kib_rdma_frag_t *)0)->rf_nob) == 4);
-        CLASSERT ((int)offsetof(kib_rdma_frag_t, rf_addr_lo) == 4);
-        CLASSERT ((int)sizeof(((kib_rdma_frag_t *)0)->rf_addr_lo) == 4);
-        CLASSERT ((int)offsetof(kib_rdma_frag_t, rf_addr_hi) == 8);
-        CLASSERT ((int)sizeof(((kib_rdma_frag_t *)0)->rf_addr_hi) == 4);
+        CLASSERT (IBNAL_USE_FMR == 1);
 
         /* Checks for struct kib_rdma_desc_t */
-        CLASSERT ((int)sizeof(kib_rdma_desc_t) == 8);
-        CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_key) == 0);
+        CLASSERT ((int)sizeof(kib_rdma_desc_t) == 16);
+        CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_addr) == 0);
+        CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_addr) == 8);
+        CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_nob) == 8);
+        CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_nob) == 4);
+        CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_key) == 12);
         CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_key) == 4);
-        CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_nfrag) == 4);
-        CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_nfrag) == 4);
-        CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_frags[13]) == 164);
-        CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_frags[13]) == 12);
 
         /* Checks for struct kib_putreq_msg_t */
         CLASSERT ((int)sizeof(kib_putreq_msg_t) == 80);
@@ -110,22 +108,22 @@ void vibnal_assert_wire_constants (void)
         CLASSERT ((int)sizeof(((kib_putreq_msg_t *)0)->ibprm_cookie) == 8);
 
         /* Checks for struct kib_putack_msg_t */
-        CLASSERT ((int)sizeof(kib_putack_msg_t) == 24);
+        CLASSERT ((int)sizeof(kib_putack_msg_t) == 32);
         CLASSERT ((int)offsetof(kib_putack_msg_t, ibpam_src_cookie) == 0);
         CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_src_cookie) == 8);
         CLASSERT ((int)offsetof(kib_putack_msg_t, ibpam_dst_cookie) == 8);
         CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_dst_cookie) == 8);
         CLASSERT ((int)offsetof(kib_putack_msg_t, ibpam_rd) == 16);
-        CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_rd) == 8);
+        CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_rd) == 16);
 
         /* Checks for struct kib_get_msg_t */
-        CLASSERT ((int)sizeof(kib_get_msg_t) == 88);
+        CLASSERT ((int)sizeof(kib_get_msg_t) == 96);
         CLASSERT ((int)offsetof(kib_get_msg_t, ibgm_hdr) == 0);
         CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_hdr) == 72);
         CLASSERT ((int)offsetof(kib_get_msg_t, ibgm_cookie) == 72);
         CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_cookie) == 8);
         CLASSERT ((int)offsetof(kib_get_msg_t, ibgm_rd) == 80);
-        CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_rd) == 8);
+        CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_rd) == 16);
 
         /* Checks for struct kib_completion_msg_t */
         CLASSERT ((int)sizeof(kib_completion_msg_t) == 12);
@@ -135,7 +133,7 @@ void vibnal_assert_wire_constants (void)
         CLASSERT ((int)sizeof(((kib_completion_msg_t *)0)->ibcm_status) == 4);
 
         /* Checks for struct kib_msg_t */
-        CLASSERT ((int)sizeof(kib_msg_t) == 144);
+        CLASSERT ((int)sizeof(kib_msg_t) == 152);
         CLASSERT ((int)offsetof(kib_msg_t, ibm_magic) == 0);
         CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_magic) == 4);
         CLASSERT ((int)offsetof(kib_msg_t, ibm_version) == 4);
@@ -165,20 +163,13 @@ void vibnal_assert_wire_constants (void)
         CLASSERT ((int)offsetof(kib_msg_t, ibm_u.putreq) == 56);
         CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.putreq) == 80);
         CLASSERT ((int)offsetof(kib_msg_t, ibm_u.putack) == 56);
-        CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.putack) == 24);
+        CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.putack) == 32);
         CLASSERT ((int)offsetof(kib_msg_t, ibm_u.get) == 56);
-        CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.get) == 88);
+        CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.get) == 96);
         CLASSERT ((int)offsetof(kib_msg_t, ibm_u.completion) == 56);
         CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.completion) == 12);
 }
 
-void
-kibnal_pause(int ticks)
-{
-        set_current_state(TASK_UNINTERRUPTIBLE);
-        schedule_timeout(ticks);
-}
-
 __u32 
 kibnal_cksum (void *ptr, int nob)
 {
@@ -200,56 +191,76 @@ kibnal_init_msg(kib_msg_t *msg, int type, int body_nob)
 }
 
 void
-kibnal_pack_msg(kib_msg_t *msg, int credits, ptl_nid_t dstnid, 
-                __u64 dststamp, __u64 seq)
+kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits,
+                lnet_nid_t dstnid, __u64 dststamp, __u64 seq)
 {
         /* CAVEAT EMPTOR! all message fields not set here should have been
          * initialised previously. */
         msg->ibm_magic    = IBNAL_MSG_MAGIC;
-        msg->ibm_version  = IBNAL_MSG_VERSION;
+        msg->ibm_version  = version;
         /*   ibm_type */
         msg->ibm_credits  = credits;
         /*   ibm_nob */
         msg->ibm_cksum    = 0;
-        msg->ibm_srcnid   = kibnal_lib.libnal_ni.ni_pid.nid;
+        msg->ibm_srcnid   = kibnal_data.kib_ni->ni_nid;
         msg->ibm_srcstamp = kibnal_data.kib_incarnation;
         msg->ibm_dstnid   = dstnid;
         msg->ibm_dststamp = dststamp;
         msg->ibm_seq      = seq;
-#if IBNAL_CKSUM
-        /* NB ibm_cksum zero while computing cksum */
-        msg->ibm_cksum    = kibnal_cksum(msg, msg->ibm_nob);
-#endif
+
+        if (*kibnal_tunables.kib_cksum) {
+                /* NB ibm_cksum zero while computing cksum */
+                msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob);
+        }
 }
 
 int
-kibnal_unpack_msg(kib_msg_t *msg, int nob)
+kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob)
 {
         const int hdr_size = offsetof(kib_msg_t, ibm_u);
         __u32     msg_cksum;
+        __u32     msg_version;
         int       flip;
         int       msg_nob;
+#if !IBNAL_USE_FMR
         int       i;
         int       n;
-
+#endif
         /* 6 bytes are enough to have received magic + version */
         if (nob < 6) {
                 CERROR("Short message: %d\n", nob);
                 return -EPROTO;
         }
 
+        /* Future protocol version compatibility support!
+         * If the viblnd-specific protocol changes, or when LNET unifies
+         * protocols over all LNDs, the initial connection will negotiate a
+         * protocol version.  If I find this, I avoid any console errors.  If
+         * my is doing connection establishment, the reject will tell the peer
+         * which version I'm running. */
+
         if (msg->ibm_magic == IBNAL_MSG_MAGIC) {
                 flip = 0;
         } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) {
                 flip = 1;
         } else {
+                if (msg->ibm_magic == LNET_PROTO_MAGIC ||
+                    msg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
+                        return -EPROTO;
+
+                /* Completely out to lunch */
                 CERROR("Bad magic: %08x\n", msg->ibm_magic);
                 return -EPROTO;
         }
 
-        if (msg->ibm_version != 
-            (flip ? __swab16(IBNAL_MSG_VERSION) : IBNAL_MSG_VERSION)) {
-                CERROR("Bad version: %d\n", msg->ibm_version);
+        msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
+        if (expected_version == 0) {
+                if (msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD &&
+                    msg_version != IBNAL_MSG_VERSION)
+                        return -EPROTO;
+        } else if (msg_version != expected_version) {
+                CERROR("Bad version: %x(%x expected)\n",
+                       msg_version, expected_version);
                 return -EPROTO;
         }
 
@@ -274,10 +285,10 @@ kibnal_unpack_msg(kib_msg_t *msg, int nob)
                 return -EPROTO;
         }
         msg->ibm_cksum = msg_cksum;
-        
+
         if (flip) {
                 /* leave magic unflipped as a clue to peer endianness */
-                __swab16s(&msg->ibm_version);
+                msg->ibm_version = msg_version;
                 CLASSERT (sizeof(msg->ibm_type) == 1);
                 CLASSERT (sizeof(msg->ibm_credits) == 1);
                 msg->ibm_nob = msg_nob;
@@ -287,9 +298,9 @@ kibnal_unpack_msg(kib_msg_t *msg, int nob)
                 __swab64s(&msg->ibm_dststamp);
                 __swab64s(&msg->ibm_seq);
         }
-        
-        if (msg->ibm_srcnid == PTL_NID_ANY) {
-                CERROR("Bad src nid: "LPX64"\n", msg->ibm_srcnid);
+
+        if (msg->ibm_srcnid == LNET_NID_ANY) {
+                CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
                 return -EPROTO;
         }
 
@@ -297,7 +308,7 @@ kibnal_unpack_msg(kib_msg_t *msg, int nob)
         default:
                 CERROR("Unknown message type %x\n", msg->ibm_type);
                 return -EPROTO;
-                
+
         case IBNAL_MSG_NOOP:
                 break;
 
@@ -310,7 +321,7 @@ kibnal_unpack_msg(kib_msg_t *msg, int nob)
                 break;
 
         case IBNAL_MSG_PUT_REQ:
-                if (msg_nob < sizeof(msg->ibm_u.putreq)) {
+                if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) {
                         CERROR("Short PUT_REQ: %d(%d)\n", msg_nob,
                                (int)(hdr_size + sizeof(msg->ibm_u.putreq)));
                         return -EPROTO;
@@ -318,36 +329,44 @@ kibnal_unpack_msg(kib_msg_t *msg, int nob)
                 break;
 
         case IBNAL_MSG_PUT_ACK:
-                if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[0])) {
+                if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) {
                         CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
-                               (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[0]));
+                               (int)(hdr_size + sizeof(msg->ibm_u.putack)));
                         return -EPROTO;
                 }
-
+#if IBNAL_USE_FMR
+                if (flip) {
+                        __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr);
+                        __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob);
+                        __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
+                }
+#else
                 if (flip) {
                         __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
                         __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag);
                 }
-                
+
                 n = msg->ibm_u.putack.ibpam_rd.rd_nfrag;
                 if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
-                        CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n", 
+                        CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n",
                                n, IBNAL_MAX_RDMA_FRAGS);
                         return -EPROTO;
                 }
-                
+
                 if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) {
                         CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
                                (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n]));
                         return -EPROTO;
                 }
 
-                if (flip)
+                if (flip) {
                         for (i = 0; i < n; i++) {
                                 __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob);
                                 __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr_lo);
                                 __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr_hi);
                         }
+                }
+#endif
                 break;
 
         case IBNAL_MSG_GET_REQ:
@@ -356,6 +375,13 @@ kibnal_unpack_msg(kib_msg_t *msg, int nob)
                                (int)(hdr_size + sizeof(msg->ibm_u.get)));
                         return -EPROTO;
                 }
+#if IBNAL_USE_FMR
+                if (flip) {
+                        __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr);
+                        __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob);
+                        __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
+                }
+#else
                 if (flip) {
                         __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
                         __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag);
@@ -363,23 +389,24 @@ kibnal_unpack_msg(kib_msg_t *msg, int nob)
 
                 n = msg->ibm_u.get.ibgm_rd.rd_nfrag;
                 if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
-                        CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n", 
+                        CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n",
                                n, IBNAL_MAX_RDMA_FRAGS);
                         return -EPROTO;
                 }
-                
+
                 if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) {
                         CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
                                (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n]));
                         return -EPROTO;
                 }
-                
+
                 if (flip)
                         for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) {
                                 __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob);
                                 __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr_lo);
                                 __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr_hi);
                         }
+#endif
                 break;
 
         case IBNAL_MSG_PUT_NAK:
@@ -412,103 +439,75 @@ kibnal_unpack_msg(kib_msg_t *msg, int nob)
 }
 
 int
-kibnal_set_mynid(ptl_nid_t nid)
+kibnal_start_listener (lnet_ni_t *ni)
 {
-        static cm_listen_data_t info;           /* protected by kib_nid_mutex */
+        static cm_listen_data_t info;
 
-        lib_ni_t        *ni = &kibnal_lib.libnal_ni;
-        int              rc;
         cm_return_t      cmrc;
 
-        CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
-               nid, ni->ni_pid.nid);
+        LASSERT (kibnal_data.kib_listen_handle == NULL);
 
-        down (&kibnal_data.kib_nid_mutex);
-
-        if (nid == ni->ni_pid.nid) {
-                /* no change of NID */
-                up (&kibnal_data.kib_nid_mutex);
-                return (0);
+        kibnal_data.kib_listen_handle =
+                cm_create_cep(cm_cep_transp_rc);
+        if (kibnal_data.kib_listen_handle == NULL) {
+                CERROR ("Can't create listen CEP\n");
+                return -ENOMEM;
         }
 
-        CDEBUG(D_NET, "NID "LPX64"("LPX64")\n", ni->ni_pid.nid, nid);
-
-        if (kibnal_data.kib_listen_handle != NULL) {
-                cmrc = cm_cancel(kibnal_data.kib_listen_handle);
-                if (cmrc != cm_stat_success)
-                        CERROR ("Error %d stopping listener\n", cmrc);
+        CDEBUG(D_NET, "Created CEP %p for listening\n",
+               kibnal_data.kib_listen_handle);
 
-                kibnal_pause(HZ/10);            /* ensure no more callbacks */
-        
-                cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
-                if (cmrc != vv_return_ok)
-                        CERROR ("Error %d destroying CEP\n", cmrc);
+        memset(&info, 0, sizeof(info));
+        info.listen_addr.end_pt.sid =
+                (__u64)(*kibnal_tunables.kib_service_number);
 
-                kibnal_data.kib_listen_handle = NULL;
-        }
+        cmrc = cm_listen(kibnal_data.kib_listen_handle, &info,
+                         kibnal_listen_callback, NULL);
+        if (cmrc == cm_stat_success)
+                return 0;
 
-        /* Change NID.  NB queued passive connection requests (if any) will be
-         * rejected with an incorrect destination NID */
-        ni->ni_pid.nid = nid;
-        kibnal_data.kib_incarnation++;
-        mb();
+        CERROR ("cm_listen error: %d\n", cmrc);
 
-        /* Delete all existing peers and their connections after new
-         * NID/incarnation set to ensure no old connections in our brave
-         * new world. */
-        kibnal_del_peer (PTL_NID_ANY, 0);
+        cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
+        LASSERT (cmrc == cm_stat_success);
 
-        if (ni->ni_pid.nid != PTL_NID_ANY) {    /* got a new NID to install */
-                kibnal_data.kib_listen_handle = 
-                        cm_create_cep(cm_cep_transp_rc);
-                if (kibnal_data.kib_listen_handle == NULL) {
-                        CERROR ("Can't create listen CEP\n");
-                        rc = -ENOMEM;
-                        goto failed_0;
-                }
+        kibnal_data.kib_listen_handle = NULL;
+        return -EINVAL;
+}
 
-                CDEBUG(D_NET, "Created CEP %p for listening\n", 
-                       kibnal_data.kib_listen_handle);
+void
+kibnal_stop_listener(lnet_ni_t *ni)
+{
+        cm_return_t      cmrc;
 
-                memset(&info, 0, sizeof(info));
-                info.listen_addr.end_pt.sid = kibnal_data.kib_svc_id;
+        LASSERT (kibnal_data.kib_listen_handle != NULL);
 
-                cmrc = cm_listen(kibnal_data.kib_listen_handle, &info,
-                                 kibnal_listen_callback, NULL);
-                if (cmrc != 0) {
-                        CERROR ("cm_listen error: %d\n", cmrc);
-                        rc = -EINVAL;
-                        goto failed_1;
-                }
-        }
+        cmrc = cm_cancel(kibnal_data.kib_listen_handle);
+        if (cmrc != cm_stat_success)
+                CERROR ("Error %d stopping listener\n", cmrc);
 
-        up (&kibnal_data.kib_nid_mutex);
-        return (0);
+        cfs_pause(cfs_time_seconds(1)/10);   /* ensure no more callbacks */
 
- failed_1:
         cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
-        LASSERT (cmrc == cm_stat_success);
+        if (cmrc != vv_return_ok)
+                CERROR ("Error %d destroying CEP\n", cmrc);
+
         kibnal_data.kib_listen_handle = NULL;
- failed_0:
-        ni->ni_pid.nid = PTL_NID_ANY;
-        kibnal_data.kib_incarnation++;
-        mb();
-        kibnal_del_peer (PTL_NID_ANY, 0);
-        up (&kibnal_data.kib_nid_mutex);
-        return rc;
 }
 
-kib_peer_t *
-kibnal_create_peer (ptl_nid_t nid)
+int
+kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid)
 {
-        kib_peer_t *peer;
+        kib_peer_t     *peer;
+        unsigned long   flags;
+        int             rc;
 
-        LASSERT (nid != PTL_NID_ANY);
+        LASSERT (nid != LNET_NID_ANY);
 
-        PORTAL_ALLOC(peer, sizeof (*peer));
+        LIBCFS_ALLOC(peer, sizeof (*peer));
         if (peer == NULL) {
-                CERROR("Canot allocate perr\n");
-                return (NULL);
+                CERROR("Cannot allocate peer\n");
+                return -ENOMEM;
         }
 
         memset(peer, 0, sizeof(*peer));         /* zero flags etc */
@@ -520,43 +519,62 @@ kibnal_create_peer (ptl_nid_t nid)
         INIT_LIST_HEAD (&peer->ibp_conns);
         INIT_LIST_HEAD (&peer->ibp_tx_queue);
 
-        peer->ibp_reconnect_time = jiffies;
-        peer->ibp_reconnect_interval = IBNAL_MIN_RECONNECT_INTERVAL;
+        peer->ibp_error = 0;
+        peer->ibp_last_alive = cfs_time_current();
+        peer->ibp_reconnect_interval = 0;       /* OK to connect at any time */
 
-        atomic_inc (&kibnal_data.kib_npeers);
-        if (atomic_read(&kibnal_data.kib_npeers) <= IBNAL_CONCURRENT_PEERS)
-                return peer;
-        
-        CERROR("Too many peers: CQ will overflow\n");
-        kibnal_peer_decref(peer);
-        return NULL;
+        write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
+
+        if (atomic_read(&kibnal_data.kib_npeers) >=
+            *kibnal_tunables.kib_concurrent_peers) {
+                rc = -EOVERFLOW;        /* !! but at least it distinguishes */
+        } else if (kibnal_data.kib_listen_handle == NULL) {
+                rc = -ESHUTDOWN;        /* shutdown has started */
+        } else {
+                rc = 0;
+                /* npeers only grows with the global lock held */
+                atomic_inc(&kibnal_data.kib_npeers);
+        }
+
+        write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
+
+        if (rc != 0) {
+                CERROR("Can't create peer: %s\n", 
+                       (rc == -ESHUTDOWN) ? "shutting down" :
+                       "too many peers");
+                LIBCFS_FREE(peer, sizeof(*peer));
+        } else {
+                *peerp = peer;
+        }
+
+        return rc;
 }
 
 void
 kibnal_destroy_peer (kib_peer_t *peer)
 {
-
         LASSERT (atomic_read (&peer->ibp_refcount) == 0);
         LASSERT (peer->ibp_persistence == 0);
         LASSERT (!kibnal_peer_active(peer));
         LASSERT (peer->ibp_connecting == 0);
+        LASSERT (peer->ibp_accepting == 0);
         LASSERT (list_empty (&peer->ibp_conns));
         LASSERT (list_empty (&peer->ibp_tx_queue));
-        
-        PORTAL_FREE (peer, sizeof (*peer));
+
+        LIBCFS_FREE (peer, sizeof (*peer));
 
         /* NB a peer's connections keep a reference on their peer until
          * they are destroyed, so we can be assured that _all_ state to do
          * with this peer has been cleaned up when its refcount drops to
          * zero. */
-        atomic_dec (&kibnal_data.kib_npeers);
+        atomic_dec(&kibnal_data.kib_npeers);
 }
 
-/* the caller is responsible for accounting for the additional reference
- * that this creates */
 kib_peer_t *
-kibnal_find_peer_locked (ptl_nid_t nid)
+kibnal_find_peer_locked (lnet_nid_t nid)
 {
+        /* the caller is responsible for accounting the additional reference
+         * that this creates */
         struct list_head *peer_list = kibnal_nid2peerlist (nid);
         struct list_head *tmp;
         kib_peer_t       *peer;
@@ -567,13 +585,15 @@ kibnal_find_peer_locked (ptl_nid_t nid)
 
                 LASSERT (peer->ibp_persistence != 0 || /* persistent peer */
                          peer->ibp_connecting != 0 || /* creating conns */
+                         peer->ibp_accepting != 0 ||
                          !list_empty (&peer->ibp_conns));  /* active conn */
 
                 if (peer->ibp_nid != nid)
                         continue;
 
-                CDEBUG(D_NET, "got peer [%p] -> "LPX64" (%d)\n",
-                       peer, nid, atomic_read (&peer->ibp_refcount));
+                CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
+                       peer, libcfs_nid2str(nid),
+                       atomic_read (&peer->ibp_refcount));
                 return (peer);
         }
         return (NULL);
@@ -592,7 +612,7 @@ kibnal_unlink_peer_locked (kib_peer_t *peer)
 }
 
 int
-kibnal_get_peer_info (int index, ptl_nid_t *nidp, __u32 *ipp,
+kibnal_get_peer_info (int index, lnet_nid_t *nidp, __u32 *ipp,
                       int *persistencep)
 {
         kib_peer_t        *peer;
@@ -609,6 +629,7 @@ kibnal_get_peer_info (int index, ptl_nid_t *nidp, __u32 *ipp,
                         peer = list_entry (ptmp, kib_peer_t, ibp_list);
                         LASSERT (peer->ibp_persistence != 0 ||
                                  peer->ibp_connecting != 0 ||
+                                 peer->ibp_accepting != 0 ||
                                  !list_empty (&peer->ibp_conns));
 
                         if (index-- > 0)
@@ -629,23 +650,29 @@ kibnal_get_peer_info (int index, ptl_nid_t *nidp, __u32 *ipp,
 }
 
 int
-kibnal_add_persistent_peer (ptl_nid_t nid, __u32 ip)
+kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip)
 {
         kib_peer_t        *peer;
         kib_peer_t        *peer2;
         unsigned long      flags;
+        int                rc;
 
-        CDEBUG(D_NET, LPX64"@%08x\n", nid, ip);
-        
-        if (nid == PTL_NID_ANY)
+        CDEBUG(D_NET, "%s at %u.%u.%u.%u\n",
+               libcfs_nid2str(nid), HIPQUAD(ip));
+
+        if (nid == LNET_NID_ANY)
                 return (-EINVAL);
 
-        peer = kibnal_create_peer (nid);
-        if (peer == NULL)
-                return (-ENOMEM);
+        rc = kibnal_create_peer(&peer, nid);
+        if (rc != 0)
+                return rc;
 
         write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
 
+        /* I'm always called with a reference on kibnal_data.kib_ni
+         * so shutdown can't have started */
+        LASSERT (kibnal_data.kib_listen_handle != NULL);
+
         peer2 = kibnal_find_peer_locked (nid);
         if (peer2 != NULL) {
                 kibnal_peer_decref (peer);
@@ -658,25 +685,19 @@ kibnal_add_persistent_peer (ptl_nid_t nid, __u32 ip)
 
         peer->ibp_ip = ip;
         peer->ibp_persistence++;
-        
+
         write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
         return (0);
 }
 
 void
-kibnal_del_peer_locked (kib_peer_t *peer, int single_share)
+kibnal_del_peer_locked (kib_peer_t *peer)
 {
         struct list_head *ctmp;
         struct list_head *cnxt;
         kib_conn_t       *conn;
 
-        if (!single_share)
-                peer->ibp_persistence = 0;
-        else if (peer->ibp_persistence > 0)
-                peer->ibp_persistence--;
-
-        if (peer->ibp_persistence != 0)
-                return;
+        peer->ibp_persistence = 0;
 
         if (list_empty(&peer->ibp_conns)) {
                 kibnal_unlink_peer_locked(peer);
@@ -694,8 +715,9 @@ kibnal_del_peer_locked (kib_peer_t *peer, int single_share)
 }
 
 int
-kibnal_del_peer (ptl_nid_t nid, int single_share)
+kibnal_del_peer (lnet_nid_t nid)
 {
+        CFS_LIST_HEAD     (zombies);
         struct list_head  *ptmp;
         struct list_head  *pnxt;
         kib_peer_t        *peer;
@@ -707,7 +729,7 @@ kibnal_del_peer (ptl_nid_t nid, int single_share)
 
         write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
 
-        if (nid != PTL_NID_ANY)
+        if (nid != LNET_NID_ANY)
                 lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
         else {
                 lo = 0;
@@ -719,20 +741,27 @@ kibnal_del_peer (ptl_nid_t nid, int single_share)
                         peer = list_entry (ptmp, kib_peer_t, ibp_list);
                         LASSERT (peer->ibp_persistence != 0 ||
                                  peer->ibp_connecting != 0 ||
+                                 peer->ibp_accepting != 0 ||
                                  !list_empty (&peer->ibp_conns));
 
-                        if (!(nid == PTL_NID_ANY || peer->ibp_nid == nid))
+                        if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
                                 continue;
 
-                        kibnal_del_peer_locked (peer, single_share);
-                        rc = 0;         /* matched something */
+                        if (!list_empty(&peer->ibp_tx_queue)) {
+                                LASSERT (list_empty(&peer->ibp_conns));
 
-                        if (single_share)
-                                goto out;
+                                list_splice_init(&peer->ibp_tx_queue, &zombies);
+                        }
+
+                        kibnal_del_peer_locked (peer);
+                        rc = 0;         /* matched something */
                 }
         }
- out:
+
         write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
+
+        kibnal_txlist_done(&zombies, -EIO);
+
         return (rc);
 }
 
@@ -754,6 +783,7 @@ kibnal_get_conn_by_idx (int index)
                         peer = list_entry (ptmp, kib_peer_t, ibp_list);
                         LASSERT (peer->ibp_persistence > 0 ||
                                  peer->ibp_connecting != 0 ||
+                                 peer->ibp_accepting != 0 ||
                                  !list_empty (&peer->ibp_conns));
 
                         list_for_each (ctmp, &peer->ibp_conns) {
@@ -773,24 +803,92 @@ kibnal_get_conn_by_idx (int index)
         return (NULL);
 }
 
+void
+kibnal_debug_rx (kib_rx_t *rx)
+{
+        CDEBUG(D_CONSOLE, "      %p nob %d msg_type %x "
+               "cred %d seq "LPD64"\n",
+               rx, rx->rx_nob, rx->rx_msg->ibm_type,
+               rx->rx_msg->ibm_credits, rx->rx_msg->ibm_seq);
+}
+
+void
+kibnal_debug_tx (kib_tx_t *tx)
+{
+        CDEBUG(D_CONSOLE, "      %p snd %d q %d w %d rc %d dl %lx "
+               "cookie "LPX64" msg %s%s type %x cred %d seq "LPD64"\n",
+               tx, tx->tx_sending, tx->tx_queued, tx->tx_waiting,
+               tx->tx_status, tx->tx_deadline, tx->tx_cookie,
+               tx->tx_lntmsg[0] == NULL ? "-" : "!",
+               tx->tx_lntmsg[1] == NULL ? "-" : "!",
+               tx->tx_msg->ibm_type, tx->tx_msg->ibm_credits,
+               tx->tx_msg->ibm_seq);
+}
+
+void
+kibnal_debug_conn (kib_conn_t *conn)
+{
+        struct list_head *tmp;
+        int               i;
+
+        spin_lock(&conn->ibc_lock);
+
+        CDEBUG(D_CONSOLE, "conn[%d] %p -> %s: \n",
+               atomic_read(&conn->ibc_refcount), conn,
+               libcfs_nid2str(conn->ibc_peer->ibp_nid));
+        CDEBUG(D_CONSOLE, "   txseq "LPD64" rxseq "LPD64" state %d \n",
+               conn->ibc_txseq, conn->ibc_rxseq, conn->ibc_state);
+        CDEBUG(D_CONSOLE, "   nposted %d cred %d o_cred %d r_cred %d\n",
+               conn->ibc_nsends_posted, conn->ibc_credits,
+               conn->ibc_outstanding_credits, conn->ibc_reserved_credits);
+        CDEBUG(D_CONSOLE, "   disc %d comms_err %d\n",
+               conn->ibc_disconnect, conn->ibc_comms_error);
+
+        CDEBUG(D_CONSOLE, "   early_rxs:\n");
+        list_for_each(tmp, &conn->ibc_early_rxs)
+                kibnal_debug_rx(list_entry(tmp, kib_rx_t, rx_list));
+
+        CDEBUG(D_CONSOLE, "   tx_queue_nocred:\n");
+        list_for_each(tmp, &conn->ibc_tx_queue_nocred)
+                kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
+
+        CDEBUG(D_CONSOLE, "   tx_queue_rsrvd:\n");
+        list_for_each(tmp, &conn->ibc_tx_queue_rsrvd)
+                kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
+
+        CDEBUG(D_CONSOLE, "   tx_queue:\n");
+        list_for_each(tmp, &conn->ibc_tx_queue)
+                kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
+
+        CDEBUG(D_CONSOLE, "   active_txs:\n");
+        list_for_each(tmp, &conn->ibc_active_txs)
+                kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
+
+        CDEBUG(D_CONSOLE, "   rxs:\n");
+        for (i = 0; i < IBNAL_RX_MSGS; i++)
+                kibnal_debug_rx(&conn->ibc_rxs[i]);
+
+        spin_unlock(&conn->ibc_lock);
+}
+
 int
 kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state)
 {
         static vv_qp_attr_t attr;
-        
+
         kib_connvars_t   *cv = conn->ibc_connvars;
         vv_return_t       vvrc;
-        
+
         /* Only called by connd => static OK */
         LASSERT (!in_interrupt());
         LASSERT (current == kibnal_data.kib_connd);
 
         memset(&attr, 0, sizeof(attr));
-        
+
         switch (new_state) {
         default:
                 LBUG();
-                
+
         case vv_qp_state_init: {
                 struct vv_qp_modify_init_st *init = &attr.modify.params.init;
 
@@ -800,7 +898,7 @@ kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state)
                 init->access_control = vv_acc_r_mem_read |
                                        vv_acc_r_mem_write; /* XXX vv_acc_l_mem_write ? */
 
-                attr.modify.vv_qp_attr_mask = VV_QP_AT_P_KEY_IX | 
+                attr.modify.vv_qp_attr_mask = VV_QP_AT_P_KEY_IX |
                                               VV_QP_AT_PHY_PORT_NUM |
                                               VV_QP_AT_ACCESS_CON_F;
                 break;
@@ -825,14 +923,13 @@ kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state)
                 rtr->destanation_qp            = cv->cv_remote_qpn;
                 rtr->receive_psn               = cv->cv_rxpsn;
                 rtr->responder_rdma_r_atom_num = IBNAL_OUS_DST_RD;
-
-                // XXX ? rtr->opt_min_rnr_nak_timer = 16;
+                rtr->opt_min_rnr_nak_timer     = *kibnal_tunables.kib_rnr_nak_timer;
 
 
                 // XXX sdp sets VV_QP_AT_OP_F but no actual optional options
-                attr.modify.vv_qp_attr_mask = VV_QP_AT_ADD_VEC | 
+                attr.modify.vv_qp_attr_mask = VV_QP_AT_ADD_VEC |
                                               VV_QP_AT_DEST_QP |
-                                              VV_QP_AT_R_PSN | 
+                                              VV_QP_AT_R_PSN |
                                               VV_QP_AT_MIN_RNR_NAK_T |
                                               VV_QP_AT_RESP_RDMA_ATOM_OUT_NUM |
                                               VV_QP_AT_OP_F;
@@ -842,11 +939,11 @@ kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state)
                 struct vv_qp_modify_rts_st *rts = &attr.modify.params.rts;
 
                 rts->send_psn                 = cv->cv_txpsn;
-                rts->local_ack_timeout        = IBNAL_LOCAL_ACK_TIMEOUT;
-                rts->retry_num                = IBNAL_RETRY_CNT;
-                rts->rnr_num                  = IBNAL_RNR_CNT;
+                rts->local_ack_timeout        = *kibnal_tunables.kib_local_ack_timeout;
+                rts->retry_num                = *kibnal_tunables.kib_retry_cnt;
+                rts->rnr_num                  = *kibnal_tunables.kib_rnr_cnt;
                 rts->dest_out_rdma_r_atom_num = IBNAL_OUS_DST_RD;
-                
+
                 attr.modify.vv_qp_attr_mask = VV_QP_AT_S_PSN |
                                               VV_QP_AT_L_ACK_T |
                                               VV_QP_AT_RETRY_NUM |
@@ -859,17 +956,18 @@ kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state)
                 attr.modify.vv_qp_attr_mask = 0;
                 break;
         }
-                
+
         attr.modify.qp_modify_into_state = new_state;
         attr.modify.vv_qp_attr_mask |= VV_QP_AT_STATE;
-        
+
         vvrc = vv_qp_modify(kibnal_data.kib_hca, conn->ibc_qp, &attr, NULL);
         if (vvrc != vv_return_ok) {
-                CERROR("Can't modify qp -> "LPX64" state to %d: %d\n", 
-                       conn->ibc_peer->ibp_nid, new_state, vvrc);
+                CERROR("Can't modify qp -> %s state to %d: %d\n",
+                       libcfs_nid2str(conn->ibc_peer->ibp_nid),
+                       new_state, vvrc);
                 return -EIO;
         }
-        
+
         return 0;
 }
 
@@ -878,8 +976,6 @@ kibnal_create_conn (cm_cep_handle_t cep)
 {
         kib_conn_t   *conn;
         int           i;
-        __u64         vaddr = 0;
-        __u64         vaddr_base;
         int           page_offset;
         int           ipage;
         vv_return_t   vvrc;
@@ -891,8 +987,8 @@ kibnal_create_conn (cm_cep_handle_t cep)
         /* Only the connd creates conns => single threaded */
         LASSERT(!in_interrupt());
         LASSERT(current == kibnal_data.kib_connd);
-        
-        PORTAL_ALLOC(conn, sizeof (*conn));
+
+        LIBCFS_ALLOC(conn, sizeof (*conn));
         if (conn == NULL) {
                 CERROR ("Can't allocate connection\n");
                 return (NULL);
@@ -901,17 +997,21 @@ kibnal_create_conn (cm_cep_handle_t cep)
         /* zero flags, NULL pointers etc... */
         memset (conn, 0, sizeof (*conn));
 
+        conn->ibc_version = IBNAL_MSG_VERSION;  /* Use latest version at first */
+
         INIT_LIST_HEAD (&conn->ibc_early_rxs);
+        INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred);
         INIT_LIST_HEAD (&conn->ibc_tx_queue);
+        INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd);
         INIT_LIST_HEAD (&conn->ibc_active_txs);
         spin_lock_init (&conn->ibc_lock);
-        
+
         atomic_inc (&kibnal_data.kib_nconns);
         /* well not really, but I call destroy() on failure, which decrements */
 
         conn->ibc_cep = cep;
 
-        PORTAL_ALLOC(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
+        LIBCFS_ALLOC(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
         if (conn->ibc_connvars == NULL) {
                 CERROR("Can't allocate in-progress connection state\n");
                 goto failed;
@@ -921,7 +1021,7 @@ kibnal_create_conn (cm_cep_handle_t cep)
         get_random_bytes(&conn->ibc_connvars->cv_rxpsn,
                          sizeof(conn->ibc_connvars->cv_rxpsn));
 
-        PORTAL_ALLOC(conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
+        LIBCFS_ALLOC(conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
         if (conn->ibc_rxs == NULL) {
                 CERROR("Cannot allocate RX buffers\n");
                 goto failed;
@@ -932,40 +1032,27 @@ kibnal_create_conn (cm_cep_handle_t cep)
         if (rc != 0)
                 goto failed;
 
-        vaddr_base = vaddr = conn->ibc_rx_pages->ibp_vaddr;
-
         for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) {
-                struct page *page = conn->ibc_rx_pages->ibp_pages[ipage];
-                kib_rx_t   *rx = &conn->ibc_rxs[i];
+                struct page    *page = conn->ibc_rx_pages->ibp_pages[ipage];
+                kib_rx_t       *rx = &conn->ibc_rxs[i];
+                vv_mem_reg_h_t  mem_h;
+                vv_r_key_t      r_key;
 
                 rx->rx_conn = conn;
-                rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + 
+                rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) +
                              page_offset);
 
-#if IBNAL_WHOLE_MEM
-                {
-                        vv_mem_reg_h_t  mem_h;
-                        vv_r_key_t      r_key;
-
-                        /* Voltaire stack already registers the whole
-                         * memory, so use that API. */
-                        vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
-                                                    rx->rx_msg,
-                                                    IBNAL_MSG_SIZE,
-                                                    &mem_h,
-                                                    &rx->rx_lkey,
-                                                    &r_key);
-                        LASSERT (vvrc == vv_return_ok);
-                }
-#else
-                rx->rx_vaddr = vaddr;
-#endif                
-                CDEBUG(D_NET, "Rx[%d] %p->%p[%x:"LPX64"]\n", i, rx, 
-                       rx->rx_msg, KIBNAL_RX_LKEY(rx), KIBNAL_RX_VADDR(rx));
-
-                vaddr += IBNAL_MSG_SIZE;
-                LASSERT (vaddr <= vaddr_base + IBNAL_RX_MSG_BYTES);
-                
+                vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
+                                            rx->rx_msg,
+                                            IBNAL_MSG_SIZE,
+                                            &mem_h,
+                                            &rx->rx_lkey,
+                                            &r_key);
+                LASSERT (vvrc == vv_return_ok);
+
+                CDEBUG(D_NET, "Rx[%d] %p->%p[%x]\n", i, rx, 
+                       rx->rx_msg, rx->rx_lkey);
+
                 page_offset += IBNAL_MSG_SIZE;
                 LASSERT (page_offset <= PAGE_SIZE);
 
@@ -981,8 +1068,8 @@ kibnal_create_conn (cm_cep_handle_t cep)
         reqattr.create.qp_type                    = vv_qp_type_r_conn;
         reqattr.create.cq_send_h                  = kibnal_data.kib_cq;
         reqattr.create.cq_receive_h               = kibnal_data.kib_cq;
-        reqattr.create.send_max_outstand_wr       = (1 + IBNAL_MAX_RDMA_FRAGS) * 
-                                                    IBNAL_MSG_QUEUE_SIZE;
+        reqattr.create.send_max_outstand_wr       = (1 + IBNAL_MAX_RDMA_FRAGS) *
+                                                    (*kibnal_tunables.kib_concurrent_sends);
         reqattr.create.receive_max_outstand_wr    = IBNAL_RX_MSGS;
         reqattr.create.max_scatgat_per_send_wr    = 1;
         reqattr.create.max_scatgat_per_receive_wr = 1;
@@ -998,25 +1085,29 @@ kibnal_create_conn (cm_cep_handle_t cep)
         }
 
         /* Mark QP created */
-        conn->ibc_state = IBNAL_CONN_INIT;
+        conn->ibc_state = IBNAL_CONN_INIT_QP;
         conn->ibc_connvars->cv_local_qpn = rspattr.create_return.qp_num;
 
-        if (rspattr.create_return.receive_max_outstand_wr < 
-            IBNAL_MSG_QUEUE_SIZE ||
-            rspattr.create_return.send_max_outstand_wr < 
-            (1 + IBNAL_MAX_RDMA_FRAGS) * IBNAL_MSG_QUEUE_SIZE) {
+        if (rspattr.create_return.receive_max_outstand_wr <
+            IBNAL_RX_MSGS ||
+            rspattr.create_return.send_max_outstand_wr <
+            (1 + IBNAL_MAX_RDMA_FRAGS) * (*kibnal_tunables.kib_concurrent_sends)) {
                 CERROR("Insufficient rx/tx work items: wanted %d/%d got %d/%d\n",
-                       IBNAL_MSG_QUEUE_SIZE, 
-                       (1 + IBNAL_MAX_RDMA_FRAGS) * IBNAL_MSG_QUEUE_SIZE,
+                       IBNAL_RX_MSGS,
+                       (1 + IBNAL_MAX_RDMA_FRAGS) *
+                       (*kibnal_tunables.kib_concurrent_sends),
                        rspattr.create_return.receive_max_outstand_wr,
                        rspattr.create_return.send_max_outstand_wr);
                 goto failed;
         }
 
+        /* Mark init complete */
+        conn->ibc_state = IBNAL_CONN_INIT;
+
         /* 1 ref for caller */
         atomic_set (&conn->ibc_refcount, 1);
         return (conn);
-        
+
  failed:
         kibnal_destroy_conn (conn);
         return (NULL);
@@ -1030,12 +1121,14 @@ kibnal_destroy_conn (kib_conn_t *conn)
         /* Only the connd does this (i.e. single threaded) */
         LASSERT (!in_interrupt());
         LASSERT (current == kibnal_data.kib_connd);
-        
+
         CDEBUG (D_NET, "connection %p\n", conn);
 
         LASSERT (atomic_read (&conn->ibc_refcount) == 0);
         LASSERT (list_empty(&conn->ibc_early_rxs));
         LASSERT (list_empty(&conn->ibc_tx_queue));
+        LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd));
+        LASSERT (list_empty(&conn->ibc_tx_queue_nocred));
         LASSERT (list_empty(&conn->ibc_active_txs));
         LASSERT (conn->ibc_nsends_posted == 0);
 
@@ -1050,33 +1143,35 @@ kibnal_destroy_conn (kib_conn_t *conn)
                 /* fall through */
 
         case IBNAL_CONN_INIT:
+                vvrc = cm_destroy_cep(conn->ibc_cep);
+                LASSERT (vvrc == vv_return_ok);
+                /* fall through */
+
+        case IBNAL_CONN_INIT_QP:
                 kibnal_set_qp_state(conn, vv_qp_state_reset);
                 vvrc = vv_qp_destroy(kibnal_data.kib_hca, conn->ibc_qp);
                 if (vvrc != vv_return_ok)
                         CERROR("Can't destroy QP: %d\n", vvrc);
                 /* fall through */
-                
+
         case IBNAL_CONN_INIT_NOTHING:
                 break;
         }
 
-        if (conn->ibc_rx_pages != NULL) 
+        if (conn->ibc_rx_pages != NULL)
                 kibnal_free_pages(conn->ibc_rx_pages);
 
         if (conn->ibc_rxs != NULL)
-                PORTAL_FREE(conn->ibc_rxs, 
+                LIBCFS_FREE(conn->ibc_rxs,
                             IBNAL_RX_MSGS * sizeof(kib_rx_t));
 
         if (conn->ibc_connvars != NULL)
-                PORTAL_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
+                LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
 
         if (conn->ibc_peer != NULL)
                 kibnal_peer_decref(conn->ibc_peer);
 
-        vvrc = cm_destroy_cep(conn->ibc_cep);
-        LASSERT (vvrc == vv_return_ok);
-
-        PORTAL_FREE(conn, sizeof (*conn));
+        LIBCFS_FREE(conn, sizeof (*conn));
 
         atomic_dec(&kibnal_data.kib_nconns);
 }
@@ -1113,9 +1208,10 @@ kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
                 if (conn->ibc_incarnation == incarnation)
                         continue;
 
-                CDEBUG(D_NET, "Closing stale conn nid:"LPX64" incarnation:"LPX64"("LPX64")\n",
-                       peer->ibp_nid, conn->ibc_incarnation, incarnation);
-                
+                CDEBUG(D_NET, "Closing stale conn -> %s incarnation:"LPX64"("LPX64")\n",
+                       libcfs_nid2str(peer->ibp_nid),
+                       conn->ibc_incarnation, incarnation);
+
                 count++;
                 kibnal_close_conn_locked (conn, -ESTALE);
         }
@@ -1124,7 +1220,7 @@ kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
 }
 
 int
-kibnal_close_matching_conns (ptl_nid_t nid)
+kibnal_close_matching_conns (lnet_nid_t nid)
 {
         kib_peer_t         *peer;
         struct list_head   *ptmp;
@@ -1137,7 +1233,7 @@ kibnal_close_matching_conns (ptl_nid_t nid)
 
         write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
 
-        if (nid != PTL_NID_ANY)
+        if (nid != LNET_NID_ANY)
                 lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
         else {
                 lo = 0;
@@ -1150,9 +1246,10 @@ kibnal_close_matching_conns (ptl_nid_t nid)
                         peer = list_entry (ptmp, kib_peer_t, ibp_list);
                         LASSERT (peer->ibp_persistence != 0 ||
                                  peer->ibp_connecting != 0 ||
+                                 peer->ibp_accepting != 0 ||
                                  !list_empty (&peer->ibp_conns));
 
-                        if (!(nid == PTL_NID_ANY || nid == peer->ibp_nid))
+                        if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
                                 continue;
 
                         count += kibnal_close_peer_conns_locked (peer, 0);
@@ -1162,70 +1259,69 @@ kibnal_close_matching_conns (ptl_nid_t nid)
         write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
 
         /* wildcards always succeed */
-        if (nid == PTL_NID_ANY)
+        if (nid == LNET_NID_ANY)
                 return (0);
-        
+
         return (count == 0 ? -ENOENT : 0);
 }
 
 int
-kibnal_cmd(struct portals_cfg *pcfg, void * private)
+kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
 {
-        int rc = -EINVAL;
+        struct libcfs_ioctl_data *data = arg;
+        int                       rc = -EINVAL;
 
-        LASSERT (pcfg != NULL);
+        LASSERT (ni == kibnal_data.kib_ni);
 
-        switch(pcfg->pcfg_command) {
-        case NAL_CMD_GET_PEER: {
-                ptl_nid_t   nid = 0;
-                __u32       ip = 0;
-                int         share_count = 0;
+        switch(cmd) {
+        case IOC_LIBCFS_GET_PEER: {
+                lnet_nid_t   nid = 0;
+                __u32        ip = 0;
+                int          share_count = 0;
 
-                rc = kibnal_get_peer_info(pcfg->pcfg_count,
+                rc = kibnal_get_peer_info(data->ioc_count,
                                           &nid, &ip, &share_count);
-                pcfg->pcfg_nid   = nid;
-                pcfg->pcfg_size  = 0;
-                pcfg->pcfg_id    = ip;
-                pcfg->pcfg_misc  = IBNAL_SERVICE_NUMBER; /* port */
-                pcfg->pcfg_count = 0;
-                pcfg->pcfg_wait  = share_count;
+                data->ioc_nid    = nid;
+                data->ioc_count  = share_count;
+                data->ioc_u32[0] = ip;
+                data->ioc_u32[1] = *kibnal_tunables.kib_service_number; /* port */
                 break;
         }
-        case NAL_CMD_ADD_PEER: {
-                rc = kibnal_add_persistent_peer (pcfg->pcfg_nid,
-                                                 pcfg->pcfg_id); /* IP */
+        case IOC_LIBCFS_ADD_PEER: {
+                rc = kibnal_add_persistent_peer (data->ioc_nid,
+                                                 data->ioc_u32[0]); /* IP */
                 break;
         }
-        case NAL_CMD_DEL_PEER: {
-                rc = kibnal_del_peer (pcfg->pcfg_nid, 
-                                       /* flags == single_share */
-                                       pcfg->pcfg_flags != 0);
+        case IOC_LIBCFS_DEL_PEER: {
+                rc = kibnal_del_peer (data->ioc_nid);
                 break;
         }
-        case NAL_CMD_GET_CONN: {
-                kib_conn_t *conn = kibnal_get_conn_by_idx (pcfg->pcfg_count);
+        case IOC_LIBCFS_GET_CONN: {
+                kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count);
 
                 if (conn == NULL)
                         rc = -ENOENT;
                 else {
+                        // kibnal_debug_conn(conn);
                         rc = 0;
-                        pcfg->pcfg_nid   = conn->ibc_peer->ibp_nid;
-                        pcfg->pcfg_id    = 0;
-                        pcfg->pcfg_misc  = 0;
-                        pcfg->pcfg_flags = 0;
+                        data->ioc_nid = conn->ibc_peer->ibp_nid;
                         kibnal_conn_decref(conn);
                 }
                 break;
         }
-        case NAL_CMD_CLOSE_CONNECTION: {
-                rc = kibnal_close_matching_conns (pcfg->pcfg_nid);
+        case IOC_LIBCFS_CLOSE_CONNECTION: {
+                rc = kibnal_close_matching_conns (data->ioc_nid);
                 break;
         }
-        case NAL_CMD_REGISTER_MYNID: {
-                if (pcfg->pcfg_nid == PTL_NID_ANY)
+        case IOC_LIBCFS_REGISTER_MYNID: {
+                if (ni->ni_nid == data->ioc_nid) {
+                        rc = 0;
+                } else {
+                        CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
+                               libcfs_nid2str(data->ioc_nid),
+                               libcfs_nid2str(ni->ni_nid));
                         rc = -EINVAL;
-                else
-                        rc = kibnal_set_mynid (pcfg->pcfg_nid);
+                }
                 break;
         }
         }
@@ -1237,21 +1333,13 @@ void
 kibnal_free_pages (kib_pages_t *p)
 {
         int         npages = p->ibp_npages;
-        vv_return_t vvrc;
         int         i;
-        
-        if (p->ibp_mapped) {
-                vvrc = vv_mem_region_destroy(kibnal_data.kib_hca, 
-                                             p->ibp_handle);
-                if (vvrc != vv_return_ok)
-                        CERROR ("Deregister error: %d\n", vvrc);
-        }
-        
+
         for (i = 0; i < npages; i++)
                 if (p->ibp_pages[i] != NULL)
                         __free_page(p->ibp_pages[i]);
-        
-        PORTAL_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
+
+        LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
 }
 
 int
@@ -1259,14 +1347,8 @@ kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write)
 {
         kib_pages_t   *p;
         int            i;
-#if !IBNAL_WHOLE_MEM
-        vv_phy_list_t            vv_phys;
-        vv_phy_buf_t            *phys_pages;
-        vv_return_t              vvrc;
-        vv_access_con_bit_mask_t access;
-#endif
 
-        PORTAL_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
+        LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
         if (p == NULL) {
                 CERROR ("Can't allocate buffer %d\n", npages);
                 return (-ENOMEM);
@@ -1274,7 +1356,7 @@ kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write)
 
         memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
         p->ibp_npages = npages;
-        
+
         for (i = 0; i < npages; i++) {
                 p->ibp_pages[i] = alloc_page (GFP_KERNEL);
                 if (p->ibp_pages[i] == NULL) {
@@ -1284,133 +1366,126 @@ kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write)
                 }
         }
 
-#if !IBNAL_WHOLE_MEM
-        PORTAL_ALLOC(phys_pages, npages * sizeof(*phys_pages));
-        if (phys_pages == NULL) {
-                CERROR ("Can't allocate physarray for %d pages\n", npages);
-                kibnal_free_pages(p);
-                return (-ENOMEM);
-        }
-
-        vv_phys.number_of_buff = npages;
-        vv_phys.phy_list = phys_pages;
-
-        for (i = 0; i < npages; i++) {
-                phys_pages[i].size = PAGE_SIZE;
-                phys_pages[i].start = page_to_phys(p->ibp_pages[i]);
-        }
-
-        VV_ACCESS_CONTROL_MASK_SET_ALL(access);
-        
-        vvrc = vv_phy_mem_region_register(kibnal_data.kib_hca,
-                                          &vv_phys,
-                                          0, /* requested vaddr */
-                                          npages * PAGE_SIZE, 0, /* offset */
-                                          kibnal_data.kib_pd,
-                                          access,
-                                          &p->ibp_handle, 
-                                          &p->ibp_vaddr,                                           
-                                          &p->ibp_lkey, 
-                                          &p->ibp_rkey);
-        
-        PORTAL_FREE(phys_pages, npages * sizeof(*phys_pages));
-        
-        if (vvrc != vv_return_ok) {
-                CERROR ("Error %d mapping %d pages\n", vvrc, npages);
-                kibnal_free_pages(p);
-                return (-EFAULT);
-        }
-
-        CDEBUG(D_NET, "registered %d pages; handle: %x vaddr "LPX64" "
-               "lkey %x rkey %x\n", npages, p->ibp_handle,
-               p->ibp_vaddr, p->ibp_lkey, p->ibp_rkey);
-        
-        p->ibp_mapped = 1;
-#endif
         *pp = p;
         return (0);
 }
 
 int
-kibnal_alloc_tx_descs (void) 
+kibnal_alloc_tx_descs (void)
 {
         int    i;
-        
-        PORTAL_ALLOC (kibnal_data.kib_tx_descs,
-                      IBNAL_TX_MSGS * sizeof(kib_tx_t));
+
+        LIBCFS_ALLOC (kibnal_data.kib_tx_descs,
+                      IBNAL_TX_MSGS() * sizeof(kib_tx_t));
         if (kibnal_data.kib_tx_descs == NULL)
                 return -ENOMEM;
-        
+
         memset(kibnal_data.kib_tx_descs, 0,
-               IBNAL_TX_MSGS * sizeof(kib_tx_t));
+               IBNAL_TX_MSGS() * sizeof(kib_tx_t));
 
-        for (i = 0; i < IBNAL_TX_MSGS; i++) {
+        for (i = 0; i < IBNAL_TX_MSGS(); i++) {
                 kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
 
-                PORTAL_ALLOC(tx->tx_wrq, 
-                             (1 + IBNAL_MAX_RDMA_FRAGS) * 
+#if IBNAL_USE_FMR
+                LIBCFS_ALLOC(tx->tx_pages, LNET_MAX_IOV *
+                             sizeof(*tx->tx_pages));
+                if (tx->tx_pages == NULL)
+                        return -ENOMEM;
+#else
+                LIBCFS_ALLOC(tx->tx_wrq,
+                             (1 + IBNAL_MAX_RDMA_FRAGS) *
                              sizeof(*tx->tx_wrq));
                 if (tx->tx_wrq == NULL)
                         return -ENOMEM;
-                
-                PORTAL_ALLOC(tx->tx_gl, 
-                             (1 + IBNAL_MAX_RDMA_FRAGS) * 
+
+                LIBCFS_ALLOC(tx->tx_gl,
+                             (1 + IBNAL_MAX_RDMA_FRAGS) *
                              sizeof(*tx->tx_gl));
                 if (tx->tx_gl == NULL)
                         return -ENOMEM;
-                
-                PORTAL_ALLOC(tx->tx_rd, 
-                             offsetof(kib_rdma_desc_t, 
+
+                LIBCFS_ALLOC(tx->tx_rd,
+                             offsetof(kib_rdma_desc_t,
                                       rd_frags[IBNAL_MAX_RDMA_FRAGS]));
                 if (tx->tx_rd == NULL)
                         return -ENOMEM;
+#endif
         }
 
         return 0;
 }
 
 void
-kibnal_free_tx_descs (void) 
+kibnal_free_tx_descs (void)
 {
         int    i;
 
         if (kibnal_data.kib_tx_descs == NULL)
                 return;
 
-        for (i = 0; i < IBNAL_TX_MSGS; i++) {
+        for (i = 0; i < IBNAL_TX_MSGS(); i++) {
                 kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
 
+#if IBNAL_USE_FMR
+                if (tx->tx_pages != NULL)
+                        LIBCFS_FREE(tx->tx_pages, LNET_MAX_IOV *
+                                    sizeof(*tx->tx_pages));
+#else
                 if (tx->tx_wrq != NULL)
-                        PORTAL_FREE(tx->tx_wrq, 
-                                    (1 + IBNAL_MAX_RDMA_FRAGS) * 
+                        LIBCFS_FREE(tx->tx_wrq,
+                                    (1 + IBNAL_MAX_RDMA_FRAGS) *
                                     sizeof(*tx->tx_wrq));
 
                 if (tx->tx_gl != NULL)
-                        PORTAL_FREE(tx->tx_gl, 
-                                    (1 + IBNAL_MAX_RDMA_FRAGS) * 
+                        LIBCFS_FREE(tx->tx_gl,
+                                    (1 + IBNAL_MAX_RDMA_FRAGS) *
                                     sizeof(*tx->tx_gl));
 
                 if (tx->tx_rd != NULL)
-                        PORTAL_FREE(tx->tx_rd, 
-                                    offsetof(kib_rdma_desc_t, 
+                        LIBCFS_FREE(tx->tx_rd,
+                                    offsetof(kib_rdma_desc_t,
                                              rd_frags[IBNAL_MAX_RDMA_FRAGS]));
+#endif
         }
 
-        PORTAL_FREE(kibnal_data.kib_tx_descs,
-                    IBNAL_TX_MSGS * sizeof(kib_tx_t));
+        LIBCFS_FREE(kibnal_data.kib_tx_descs,
+                    IBNAL_TX_MSGS() * sizeof(kib_tx_t));
+}
+
+#if IBNAL_USE_FMR
+void
+kibnal_free_fmrs (int n)
+{
+        int             i;
+        vv_return_t     vvrc;
+        kib_tx_t       *tx;
+
+        for (i = 0; i < n; i++) {
+                tx = &kibnal_data.kib_tx_descs[i];
+
+                vvrc = vv_free_fmr(kibnal_data.kib_hca,
+                                   tx->tx_md.md_fmrhandle);
+                if (vvrc != vv_return_ok)
+                        CWARN("vv_free_fmr[%d]: %d\n", i, vvrc);
+        }
 }
+#endif
 
 int
 kibnal_setup_tx_descs (void)
 {
-        int           ipage = 0;
-        int           page_offset = 0;
-        __u64         vaddr;
-        __u64         vaddr_base;
-        struct page  *page;
-        kib_tx_t     *tx;
-        int           i;
-        int           rc;
+        int             ipage = 0;
+        int             page_offset = 0;
+        struct page    *page;
+        kib_tx_t       *tx;
+        vv_mem_reg_h_t  mem_h;
+        vv_r_key_t      rkey;
+        vv_return_t     vvrc;
+        int             i;
+        int             rc;
+#if IBNAL_USE_FMR
+        vv_fmr_t        fmr_props;
+#endif
 
         /* pre-mapped messages are not bigger than 1 page */
         CLASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
@@ -1418,54 +1493,52 @@ kibnal_setup_tx_descs (void)
         /* No fancy arithmetic when we do the buffer calculations */
         CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
 
-        rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, IBNAL_TX_MSG_PAGES, 
-                                0);
+        rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages,
+                                IBNAL_TX_MSG_PAGES(), 0);
         if (rc != 0)
                 return (rc);
 
-        /* ignored for the whole_mem case */
-        vaddr = vaddr_base = kibnal_data.kib_tx_pages->ibp_vaddr;
-
-        for (i = 0; i < IBNAL_TX_MSGS; i++) {
+        for (i = 0; i < IBNAL_TX_MSGS(); i++) {
                 page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
                 tx = &kibnal_data.kib_tx_descs[i];
 
-                tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + 
-                                           page_offset);
-#if IBNAL_WHOLE_MEM
-                {
-                        vv_mem_reg_h_t  mem_h;
-                        vv_r_key_t      rkey;
-                        vv_return_t     vvrc;
-
-                        /* Voltaire stack already registers the whole
-                         * memory, so use that API. */
-                        vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
-                                                    tx->tx_msg,
-                                                    IBNAL_MSG_SIZE,
-                                                    &mem_h,
-                                                    &tx->tx_lkey,
-                                                    &rkey);
-                        LASSERT (vvrc == vv_return_ok);
+#if IBNAL_USE_FMR
+                memset(&fmr_props, 0, sizeof(fmr_props));
+                fmr_props.pd_hndl              = kibnal_data.kib_pd;
+                fmr_props.acl                  = (vv_acc_r_mem_write |
+                                                  vv_acc_l_mem_write);
+                fmr_props.max_pages            = LNET_MAX_IOV;
+                fmr_props.log2_page_sz         = PAGE_SHIFT;
+                fmr_props.max_outstanding_maps = *kibnal_tunables.kib_fmr_remaps;
+
+                vvrc = vv_alloc_fmr(kibnal_data.kib_hca,
+                                    &fmr_props,
+                                    &tx->tx_md.md_fmrhandle);
+                if (vvrc != vv_return_ok) {
+                        CERROR("Can't allocate fmr %d: %d\n", i, vvrc);
+                        kibnal_free_fmrs(i);
+                        kibnal_free_pages (kibnal_data.kib_tx_pages);
+                        return -ENOMEM;
                 }
-#else
-                tx->tx_vaddr = vaddr;
+
+                tx->tx_md.md_fmrcount = *kibnal_tunables.kib_fmr_remaps;
+                tx->tx_md.md_active   = 0;
 #endif
-                tx->tx_isnblk = (i >= IBNAL_NTX);
-                tx->tx_mapped = KIB_TX_UNMAPPED;
+                tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) +
+                                           page_offset);
 
-                CDEBUG(D_NET, "Tx[%d] %p->%p[%x:"LPX64"]\n", i, tx, 
-                       tx->tx_msg, KIBNAL_TX_LKEY(tx), KIBNAL_TX_VADDR(tx));
+                vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
+                                            tx->tx_msg,
+                                            IBNAL_MSG_SIZE,
+                                            &mem_h,
+                                            &tx->tx_lkey,
+                                            &rkey);
+                LASSERT (vvrc == vv_return_ok);
 
-                if (tx->tx_isnblk)
-                        list_add (&tx->tx_list, 
-                                  &kibnal_data.kib_idle_nblk_txs);
-                else
-                        list_add (&tx->tx_list, 
-                                  &kibnal_data.kib_idle_txs);
+                CDEBUG(D_NET, "Tx[%d] %p->%p[%x]\n", i, tx,
+                       tx->tx_msg, tx->tx_lkey);
 
-                vaddr += IBNAL_MSG_SIZE;
-                LASSERT (vaddr <= vaddr_base + IBNAL_TX_MSG_BYTES);
+                list_add (&tx->tx_list, &kibnal_data.kib_idle_txs);
 
                 page_offset += IBNAL_MSG_SIZE;
                 LASSERT (page_offset <= PAGE_SIZE);
@@ -1473,50 +1546,42 @@ kibnal_setup_tx_descs (void)
                 if (page_offset == PAGE_SIZE) {
                         page_offset = 0;
                         ipage++;
-                        LASSERT (ipage <= IBNAL_TX_MSG_PAGES);
+                        LASSERT (ipage <= IBNAL_TX_MSG_PAGES());
                 }
         }
-        
+
         return (0);
 }
 
 void
-kibnal_api_shutdown (nal_t *nal)
+kibnal_shutdown (lnet_ni_t *ni)
 {
-        int         i;
-        vv_return_t vvrc;
+        int           i;
+        vv_return_t   vvrc;
 
-        if (nal->nal_refct != 0) {
-                /* This module got the first ref */
-                PORTAL_MODULE_UNUSE;
-                return;
-        }
+        LASSERT (ni == kibnal_data.kib_ni);
+        LASSERT (ni->ni_data == &kibnal_data);
 
         CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
-               atomic_read (&portal_kmemory));
-
-        LASSERT(nal == &kibnal_api);
+               atomic_read (&libcfs_kmemory));
 
         switch (kibnal_data.kib_init) {
 
         case IBNAL_INIT_ALL:
-                /* stop calls to nal_cmd */
-                libcfs_nal_cmd_unregister(VIBNAL);
-                /* No new peers */
+                /* stop accepting connections and prevent new peers */
+                kibnal_stop_listener(ni);
 
-                /* resetting my NID removes my listener and nukes all current
-                 * peers and their connections */
-                kibnal_set_mynid (PTL_NID_ANY);
+                /* nuke all existing peers */
+                kibnal_del_peer(LNET_NID_ANY);
 
                 /* Wait for all peer state to clean up */
                 i = 2;
-                while (atomic_read (&kibnal_data.kib_npeers) != 0) {
+                while (atomic_read(&kibnal_data.kib_npeers) != 0) {
                         i++;
-                        CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
+                        CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */
                                "waiting for %d peers to disconnect\n",
-                               atomic_read (&kibnal_data.kib_npeers));
-                        set_current_state (TASK_UNINTERRUPTIBLE);
-                        schedule_timeout (HZ);
+                               atomic_read(&kibnal_data.kib_npeers));
+                        cfs_pause(cfs_time_seconds(1));
                 }
                 /* fall through */
 
@@ -1528,10 +1593,14 @@ kibnal_api_shutdown (nal_t *nal)
 
         case IBNAL_INIT_TXD:
                 kibnal_free_pages (kibnal_data.kib_tx_pages);
+#if IBNAL_USE_FMR
+                kibnal_free_fmrs(IBNAL_TX_MSGS());
+#endif
                 /* fall through */
 
         case IBNAL_INIT_PD:
-#if !IBNAL_WHOLE_MEM
+#if 0
+                /* Only deallocate a PD if we actually allocated one */
                 vvrc = vv_pd_deallocate(kibnal_data.kib_hca,
                                         kibnal_data.kib_pd);
                 if (vvrc != vv_return_ok)
@@ -1544,7 +1613,7 @@ kibnal_api_shutdown (nal_t *nal)
                                               kibnal_async_callback);
                 if (vvrc != vv_return_ok)
                         CERROR("vv_dell_async_event_cb error: %d\n", vvrc);
-                        
+
                 /* fall through */
 
         case IBNAL_INIT_HCA:
@@ -1553,19 +1622,13 @@ kibnal_api_shutdown (nal_t *nal)
                         CERROR ("Close HCA  error: %d\n", vvrc);
                 /* fall through */
 
-        case IBNAL_INIT_LIB:
-                lib_fini(&kibnal_lib);
-                /* fall through */
-
         case IBNAL_INIT_DATA:
-                LASSERT (atomic_read (&kibnal_data.kib_npeers) == 0);
+                LASSERT (atomic_read(&kibnal_data.kib_npeers) == 0);
                 LASSERT (kibnal_data.kib_peers != NULL);
                 for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
                         LASSERT (list_empty (&kibnal_data.kib_peers[i]));
                 }
                 LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0);
-                LASSERT (list_empty (&kibnal_data.kib_sched_rxq));
-                LASSERT (list_empty (&kibnal_data.kib_sched_txq));
                 LASSERT (list_empty (&kibnal_data.kib_connd_zombies));
                 LASSERT (list_empty (&kibnal_data.kib_connd_conns));
                 LASSERT (list_empty (&kibnal_data.kib_connd_pcreqs));
@@ -1582,11 +1645,10 @@ kibnal_api_shutdown (nal_t *nal)
                         CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
                                "Waiting for %d threads to terminate\n",
                                atomic_read (&kibnal_data.kib_nthreads));
-                        set_current_state (TASK_INTERRUPTIBLE);
-                        schedule_timeout (HZ);
+                        cfs_pause(cfs_time_seconds(1));
                 }
                 /* fall through */
-                
+
         case IBNAL_INIT_NOTHING:
                 break;
         }
@@ -1594,54 +1656,119 @@ kibnal_api_shutdown (nal_t *nal)
         kibnal_free_tx_descs();
 
         if (kibnal_data.kib_peers != NULL)
-                PORTAL_FREE (kibnal_data.kib_peers,
-                             sizeof (struct list_head) * 
+                LIBCFS_FREE (kibnal_data.kib_peers,
+                             sizeof (struct list_head) *
                              kibnal_data.kib_peer_hash_size);
 
         CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
-               atomic_read (&portal_kmemory));
-        printk(KERN_INFO "Lustre: Voltaire IB NAL unloaded (final mem %d)\n",
-               atomic_read(&portal_kmemory));
+               atomic_read (&libcfs_kmemory));
 
         kibnal_data.kib_init = IBNAL_INIT_NOTHING;
+        PORTAL_MODULE_UNUSE;
 }
 
 int
-kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
-                     ptl_ni_limits_t *requested_limits,
-                     ptl_ni_limits_t *actual_limits)
+kibnal_startup (lnet_ni_t *ni)
 {
+        char                      scratch[32];
+        char                      ipif_name[32];
+        char                     *hca_name;
+        __u32                     ip;
+        __u32                     netmask;
+        int                       up;
+        int                       nob;
+        int                       devno;
         struct timeval            tv;
-        ptl_process_id_t          process_id;
-        int                       pkmem = atomic_read(&portal_kmemory);
         int                       rc;
         int                       i;
         vv_request_event_record_t req_er;
         vv_return_t               vvrc;
 
-        LASSERT (nal == &kibnal_api);
+        LASSERT (ni->ni_lnd == &the_kiblnd);
+
+        /* Only 1 instance supported */
+        if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) {
+                CERROR ("Only 1 instance supported\n");
+                return -EPERM;
+        }
 
-        if (nal->nal_refct != 0) {
-                if (actual_limits != NULL)
-                        *actual_limits = kibnal_lib.libnal_ni.ni_actual_limits;
-                /* This module got the first ref */
-                PORTAL_MODULE_USE;
-                return (PTL_OK);
+        if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) {
+                CERROR ("Can't set credits(%d) > ntx(%d)\n",
+                        *kibnal_tunables.kib_credits,
+                        *kibnal_tunables.kib_ntx);
+                return -EINVAL;
         }
 
-        LASSERT (kibnal_data.kib_init == IBNAL_INIT_NOTHING);
+        ni->ni_maxtxcredits = *kibnal_tunables.kib_credits;
+        ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits;
+
+        CLASSERT (LNET_MAX_INTERFACES > 1);
+
+        if (ni->ni_interfaces[0] != NULL) {
+                /* Use the HCA specified in 'networks=' */
+
+                if (ni->ni_interfaces[1] != NULL) {
+                        CERROR("Multiple interfaces not supported\n");
+                        return -EPERM;
+                }
+
+                /* Parse <hca base name><number> */
+                hca_name = ni->ni_interfaces[0];
+                nob = strlen(*kibnal_tunables.kib_hca_basename);
+
+                if (strncmp(hca_name, *kibnal_tunables.kib_hca_basename, nob) ||
+                    sscanf(hca_name + nob, "%d%n", &devno, &nob) < 1) {
+                        CERROR("Unrecognised HCA %s\n", hca_name);
+                        return -EINVAL;
+                }
+
+        } else {
+                /* Use <hca base name>0 */
+                devno = 0;
+
+                hca_name = scratch;
+                snprintf(hca_name, sizeof(scratch), "%s%d",
+                         *kibnal_tunables.kib_hca_basename, devno);
+                if (strlen(hca_name) == sizeof(scratch) - 1) {
+                        CERROR("HCA name %s truncated\n", hca_name);
+                        return -EINVAL;
+                }
+        }
+
+        /* Find IP address from <ipif base name><hca number> */
+        snprintf(ipif_name, sizeof(ipif_name), "%s%d",
+                 *kibnal_tunables.kib_ipif_basename, devno);
+        if (strlen(ipif_name) == sizeof(ipif_name) - 1) {
+                CERROR("IPoIB interface name %s truncated\n", ipif_name);
+                return -EINVAL;
+        }
+
+        rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask);
+        if (rc != 0) {
+                CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc);
+                return -ENETDOWN;
+        }
+
+        if (!up) {
+                CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name);
+                return -ENETDOWN;
+        }
+
+        ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip);
+
+        PORTAL_MODULE_USE;
         memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */
-        
+
+        kibnal_data.kib_ni = ni;
+        ni->ni_data = &kibnal_data;
+
         do_gettimeofday(&tv);
         kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-        kibnal_data.kib_svc_id = IBNAL_SERVICE_NUMBER;
-
-        init_MUTEX (&kibnal_data.kib_nid_mutex);
 
         rwlock_init(&kibnal_data.kib_global_lock);
 
         kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE;
-        PORTAL_ALLOC (kibnal_data.kib_peers,
+        LIBCFS_ALLOC (kibnal_data.kib_peers,
                       sizeof (struct list_head) * kibnal_data.kib_peer_hash_size);
         if (kibnal_data.kib_peers == NULL) {
                 goto failed;
@@ -1657,39 +1784,21 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         init_waitqueue_head (&kibnal_data.kib_connd_waitq);
 
         spin_lock_init (&kibnal_data.kib_sched_lock);
-        INIT_LIST_HEAD (&kibnal_data.kib_sched_txq);
-        INIT_LIST_HEAD (&kibnal_data.kib_sched_rxq);
         init_waitqueue_head (&kibnal_data.kib_sched_waitq);
 
         spin_lock_init (&kibnal_data.kib_tx_lock);
         INIT_LIST_HEAD (&kibnal_data.kib_idle_txs);
-        INIT_LIST_HEAD (&kibnal_data.kib_idle_nblk_txs);
-        init_waitqueue_head(&kibnal_data.kib_idle_tx_waitq);
 
         rc = kibnal_alloc_tx_descs();
         if (rc != 0) {
                 CERROR("Can't allocate tx descs\n");
                 goto failed;
         }
-        
+
         /* lists/ptrs/locks initialised */
         kibnal_data.kib_init = IBNAL_INIT_DATA;
         /*****************************************************/
 
-        process_id.pid = requested_pid;
-        process_id.nid = PTL_NID_ANY;
-        
-        rc = lib_init(&kibnal_lib, nal, process_id,
-                      requested_limits, actual_limits);
-        if (rc != PTL_OK) {
-                CERROR("lib_init failed: error %d\n", rc);
-                goto failed;
-        }
-
-        /* lib interface initialised */
-        kibnal_data.kib_init = IBNAL_INIT_LIB;
-        /*****************************************************/
-
         for (i = 0; i < IBNAL_N_SCHED; i++) {
                 rc = kibnal_thread_start (kibnal_scheduler, (void *)((long)i));
                 if (rc != 0) {
@@ -1705,10 +1814,9 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                 goto failed;
         }
 
-        /* TODO: apparently only one adapter is supported */
-        vvrc = vv_hca_open("ANY_HCA", NULL, &kibnal_data.kib_hca);
+        vvrc = vv_hca_open(hca_name, NULL, &kibnal_data.kib_hca);
         if (vvrc != vv_return_ok) {
-                CERROR ("Can't open CA: %d\n", vvrc);
+                CERROR ("Can't open HCA %s: %d\n", hca_name, vvrc);
                 goto failed;
         }
 
@@ -1720,7 +1828,7 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         vvrc = vv_set_async_event_cb (kibnal_data.kib_hca, req_er,
                                      kibnal_async_callback);
         if (vvrc != vv_return_ok) {
-                CERROR ("Can't open CA: %d\n", vvrc);
+                CERROR ("Can't set HCA %s callback: %d\n", hca_name, vvrc);
                 goto failed; 
         }
 
@@ -1730,7 +1838,7 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
 
         vvrc = vv_hca_query(kibnal_data.kib_hca, &kibnal_data.kib_hca_attrs);
         if (vvrc != vv_return_ok) {
-                CERROR ("Can't size port attrs: %d\n", vvrc);
+                CERROR ("Can't size port attrs for %s: %d\n", hca_name, vvrc);
                 goto failed;
         }
 
@@ -1744,8 +1852,8 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
 
                 vvrc = vv_port_query(kibnal_data.kib_hca, port_num, pattr);
                 if (vvrc != vv_return_ok) {
-                        CERROR("vv_port_query failed for port %d: %d\n",
-                               port_num, vvrc);
+                        CERROR("vv_port_query failed for %s port %d: %d\n",
+                               hca_name, port_num, vvrc);
                         continue;
                 }
 
@@ -1763,60 +1871,63 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                         CDEBUG(D_NET, "port[%d] Active\n", port_num);
 
                         /* Found a suitable port. Get its GUID and PKEY. */
-                        kibnal_data.kib_port = port_num;
-                        
                         tbl_count = 1;
-                        vvrc = vv_get_port_gid_tbl(kibnal_data.kib_hca, 
+                        vvrc = vv_get_port_gid_tbl(kibnal_data.kib_hca,
                                                    port_num, &tbl_count,
                                                    &kibnal_data.kib_port_gid);
                         if (vvrc != vv_return_ok) {
                                 CERROR("vv_get_port_gid_tbl failed "
-                                       "for port %d: %d\n", port_num, vvrc);
+                                       "for %s port %d: %d\n",
+                                       hca_name, port_num, vvrc);
                                 continue;
                         }
 
                         tbl_count = 1;
-                        vvrc = vv_get_port_partition_tbl(kibnal_data.kib_hca, 
-                                                        port_num, &tbl_count,
-                                                        &kibnal_data.kib_port_pkey);
+                        vvrc = vv_get_port_partition_tbl(kibnal_data.kib_hca,
+                                                         port_num, &tbl_count,
+                                                         &kibnal_data.kib_port_pkey);
                         if (vvrc != vv_return_ok) {
                                 CERROR("vv_get_port_partition_tbl failed "
-                                       "for port %d: %d\n", port_num, vvrc);
+                                       "for %s port %d: %d\n",
+                                       hca_name, port_num, vvrc);
                                 continue;
                         }
 
+                        kibnal_data.kib_port = port_num;
+
                         break;
                 case vv_state_linkActDefer: /* TODO: correct? */
                 case vv_state_linkNoChange:
-                        CERROR("Unexpected port[%d] state %d\n",
-                               i, pattr->port_state);
+                        CERROR("Unexpected %s port[%d] state %d\n",
+                               hca_name, i, pattr->port_state);
                         continue;
                 }
                 break;
         }
 
         if (kibnal_data.kib_port == -1) {
-                CERROR ("Can't find an active port\n");
+                CERROR ("Can't find an active port on %s\n", hca_name);
                 goto failed;
         }
 
-        CDEBUG(D_NET, "Using port %d - GID="LPX64":"LPX64"\n",
-               kibnal_data.kib_port, 
-               kibnal_data.kib_port_gid.scope.g.subnet, 
+        CDEBUG(D_NET, "Using %s port %d - GID="LPX64":"LPX64"\n",
+               hca_name, kibnal_data.kib_port,
+               kibnal_data.kib_port_gid.scope.g.subnet,
                kibnal_data.kib_port_gid.scope.g.eui64);
-        
+
         /*****************************************************/
 
-#if !IBNAL_WHOLE_MEM
-        vvrc = vv_pd_allocate(kibnal_data.kib_hca, &kibnal_data.kib_pd);
-#else
+#if 1
+        /* We use a pre-allocated PD */
         vvrc = vv_get_gen_pd_h(kibnal_data.kib_hca, &kibnal_data.kib_pd);
+#else
+        vvrc = vv_pd_allocate(kibnal_data.kib_hca, &kibnal_data.kib_pd);
 #endif
-        if (vvrc != 0) {
-                CERROR ("Can't create PD: %d\n", vvrc);
+        if (vvrc != vv_return_ok) {
+                CERROR ("Can't init PD: %d\n", vvrc);
                 goto failed;
         }
-        
+
         /* flag PD initialised */
         kibnal_data.kib_init = IBNAL_INIT_PD;
         /*****************************************************/
@@ -1826,15 +1937,16 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                 CERROR ("Can't register tx descs: %d\n", rc);
                 goto failed;
         }
-        
+
         /* flag TX descs initialised */
         kibnal_data.kib_init = IBNAL_INIT_TXD;
         /*****************************************************/
+
         {
-                uint32_t nentries;
+                __u32 nentries;
 
-                vvrc = vv_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES,
-                                    kibnal_cq_callback, 
+                vvrc = vv_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES(),
+                                    kibnal_cq_callback,
                                     NULL, /* context */
                                     &kibnal_data.kib_cq, &nentries);
                 if (vvrc != 0) {
@@ -1845,26 +1957,24 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                 /* flag CQ initialised */
                 kibnal_data.kib_init = IBNAL_INIT_CQ;
 
-                if (nentries < IBNAL_CQ_ENTRIES) {
-                        CERROR ("CQ only has %d entries, need %d\n", 
-                                nentries, IBNAL_CQ_ENTRIES);
+                if (nentries < IBNAL_CQ_ENTRIES()) {
+                        CERROR ("CQ only has %d entries, need %d\n",
+                                nentries, IBNAL_CQ_ENTRIES());
                         goto failed;
                 }
 
-                vvrc = vv_request_completion_notification(kibnal_data.kib_hca, 
-                                                          kibnal_data.kib_cq, 
+                vvrc = vv_request_completion_notification(kibnal_data.kib_hca,
+                                                          kibnal_data.kib_cq,
                                                           vv_next_solicit_unsolicit_event);
                 if (vvrc != 0) {
                         CERROR ("Failed to re-arm completion queue: %d\n", rc);
                         goto failed;
                 }
         }
-        
-        /*****************************************************/
 
-        rc = libcfs_nal_cmd_register(VIBNAL, &kibnal_cmd, NULL);
+        rc = kibnal_start_listener(ni);
         if (rc != 0) {
-                CERROR ("Can't initialise command interface (rc = %d)\n", rc);
+                CERROR("Can't start listener: %d\n", rc);
                 goto failed;
         }
 
@@ -1872,27 +1982,19 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         kibnal_data.kib_init = IBNAL_INIT_ALL;
         /*****************************************************/
 
-        printk(KERN_INFO "Lustre: Voltaire IB NAL loaded "
-               "(initial mem %d)\n", pkmem);
-
-        return (PTL_OK);
+        return (0);
 
  failed:
-        CDEBUG(D_NET, "kibnal_api_startup failed\n");
-        kibnal_api_shutdown (&kibnal_api);    
-        return (PTL_FAIL);
+        CDEBUG(D_NET, "kibnal_startup failed\n");
+        kibnal_shutdown (ni);
+        return (-ENETDOWN);
 }
 
 void __exit
 kibnal_module_fini (void)
 {
-#ifdef CONFIG_SYSCTL
-        if (kibnal_tunables.kib_sysctl != NULL)
-                unregister_sysctl_table (kibnal_tunables.kib_sysctl);
-#endif
-        PtlNIFini(kibnal_ni);
-
-        ptl_unregister_nal(VIBNAL);
+        lnet_unregister_lnd(&the_kiblnd);
+        kibnal_tunables_fini();
 }
 
 int __init
@@ -1902,49 +2004,29 @@ kibnal_module_init (void)
 
         vibnal_assert_wire_constants();
 
-        CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t) 
+        CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)
                   <= cm_REQ_priv_data_len);
-        CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t) 
+        CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)
                   <= cm_REP_priv_data_len);
+        CLASSERT (sizeof(kib_msg_t) <= IBNAL_MSG_SIZE);
+#if !IBNAL_USE_FMR
         CLASSERT (offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[IBNAL_MAX_RDMA_FRAGS])
                   <= IBNAL_MSG_SIZE);
         CLASSERT (offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[IBNAL_MAX_RDMA_FRAGS])
                   <= IBNAL_MSG_SIZE);
-        
-        /* the following must be sizeof(int) for proc_dointvec() */
-        CLASSERT (sizeof (kibnal_tunables.kib_io_timeout) == sizeof (int));
-
-        kibnal_api.nal_ni_init = kibnal_api_startup;
-        kibnal_api.nal_ni_fini = kibnal_api_shutdown;
-
-        /* Initialise dynamic tunables to defaults once only */
-        kibnal_tunables.kib_io_timeout = IBNAL_IO_TIMEOUT;
+#endif
+        rc = kibnal_tunables_init();
+        if (rc != 0)
+                return rc;
 
-        rc = ptl_register_nal(VIBNAL, &kibnal_api);
-        if (rc != PTL_OK) {
-                CERROR("Can't register IBNAL: %d\n", rc);
-                return (-ENOMEM);               /* or something... */
-        }
+        lnet_register_lnd(&the_kiblnd);
 
-        /* Pure gateways want the NAL started up at module load time... */
-        rc = PtlNIInit(VIBNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kibnal_ni);
-        if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
-                ptl_unregister_nal(VIBNAL);
-                return (-ENODEV);
-        }
-        
-#ifdef CONFIG_SYSCTL
-        /* Press on regardless even if registering sysctl doesn't work */
-        kibnal_tunables.kib_sysctl = 
-                register_sysctl_table (kibnal_top_ctl_table, 0);
-#endif
-        return (0);
+        return 0;
 }
 
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel Voltaire IB NAL v0.01");
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Kernel Voltaire IB LND v1.00");
 MODULE_LICENSE("GPL");
 
 module_init(kibnal_module_init);
 module_exit(kibnal_module_fini);
-